Skip to content

Commit fcd4f74

Browse files
yongfengdujfding
authored andcommitted
Add llm-uservice
Added llm-uservice Signed-off-by: Dolpher Du <Dolpher.Du@intel.com>
1 parent f5770a2 commit fcd4f74

26 files changed

+918
-4
lines changed

Makefile

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ CHARTS_DIR := ./helm-charts
33
.PHONY: test
44

55
test:
6-
@for chart in $$(find $(CHARTS_DIR) -mindepth 1 -maxdepth 1 -type d); do \
7-
echo "Testing chart: $$chart"; \
8-
helm lint $$chart; \
6+
@for chart in $$(find $(CHARTS_DIR) -mindepth 1 -maxdepth 2 -type f -name "Chart.yaml"); do \
7+
echo "Testing chart: $$(dirname $$chart)"; \
8+
helm lint $$(dirname $$chart); \
99
done
1010

1111
pre-commit:
12-
@pre-commit run --all-files
12+
@pre-commit run --all-files
-3.42 KB
Binary file not shown.
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v2
5+
name: tgi
6+
description: The Helm chart for HuggingFace Text Generation Inference Server
7+
type: application
8+
version: 0.1.0
9+
# The HF TGI version
10+
appVersion: "1.4"
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# tgi
2+
3+
Helm chart for deploying Hugging Face Text Generation Inference service.
4+
5+
## Installing the Chart
6+
7+
To install the chart, run the following:
8+
9+
```console
10+
$ export MODELDIR=/mnt
11+
$ export MODELNAME="bigscience/bloom-560m"
12+
$ helm install tgi tgi --set hftgi.volume=${MODELDIR} --set hftgi.modelId=${MODELNAME}
13+
```
14+
15+
By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB.
16+
17+
If you already cached the model locally, you can pass it to container like this example:
18+
19+
MODELDIR=/home/ubuntu/hfmodels
20+
21+
MODELNAME="/data/models--bigscience--bloom-560m"
22+
23+
## Values
24+
25+
| Key | Type | Default | Description |
26+
| ------------- | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
27+
| hftgi.modelId | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory |
28+
| hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port |
29+
| hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory |
30+
| hftgi.image | string | `"ghcr.io/huggingface/text-generation-inference"` | |
31+
| hftgi.tag | string | `"1.4"` | |
32+
| service.port | string | `"80"` | The service port |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
1. Get the application URL by running these commands:
2+
{{- if contains "NodePort" .Values.service.type }}
3+
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }})
4+
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
5+
echo http://$NODE_IP:$NODE_PORT
6+
{{- else if contains "LoadBalancer" .Values.service.type }}
7+
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
8+
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}'
9+
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
10+
echo http://$SERVICE_IP:{{ .Values.service.port }}
11+
{{- else if contains "ClusterIP" .Values.service.type }}
12+
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
13+
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
14+
echo "Visit http://127.0.0.1:8080 to use your application"
15+
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
16+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "tgi.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "tgi.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "tgi.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "tgi.labels" -}}
37+
helm.sh/chart: {{ include "tgi.chart" . }}
38+
{{ include "tgi.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "tgi.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "tgi.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Create the name of the service account to use
55+
*/}}
56+
{{- define "tgi.serviceAccountName" -}}
57+
{{- if .Values.serviceAccount.create }}
58+
{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }}
59+
{{- else }}
60+
{{- default "default" .Values.serviceAccount.name }}
61+
{{- end }}
62+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: {{ include "tgi.fullname" . }}
8+
labels:
9+
{{- include "tgi.labels" . | nindent 4 }}
10+
spec:
11+
replicas: {{ .Values.replicaCount }}
12+
selector:
13+
matchLabels:
14+
{{- include "tgi.selectorLabels" . | nindent 6 }}
15+
template:
16+
metadata:
17+
{{- with .Values.podAnnotations }}
18+
annotations:
19+
{{- toYaml . | nindent 8 }}
20+
{{- end }}
21+
labels:
22+
{{- include "tgi.selectorLabels" . | nindent 8 }}
23+
spec:
24+
{{- with .Values.imagePullSecrets }}
25+
imagePullSecrets:
26+
{{- toYaml . | nindent 8 }}
27+
{{- end }}
28+
securityContext:
29+
{{- toYaml .Values.podSecurityContext | nindent 8 }}
30+
containers:
31+
- name: {{ .Chart.Name }}
32+
env:
33+
- name: MODEL_ID
34+
value: {{ .Values.hftgi.modelId }}
35+
- name: PORT
36+
value: {{ .Values.hftgi.port | quote }}
37+
securityContext:
38+
{{- toYaml .Values.securityContext | nindent 12 }}
39+
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
40+
imagePullPolicy: {{ .Values.image.pullPolicy }}
41+
volumeMounts:
42+
- mountPath: /data
43+
name: model-volume
44+
ports:
45+
- name: http
46+
containerPort: 80
47+
protocol: TCP
48+
# livenessProbe:
49+
# httpGet:
50+
# path: /
51+
# port: http
52+
startupProbe:
53+
httpGet:
54+
path: /
55+
port: http
56+
initialDelaySeconds: 5
57+
periodSeconds: 5
58+
failureThreshold: 120
59+
readinessProbe:
60+
httpGet:
61+
path: /
62+
port: http
63+
initialDelaySeconds: 5
64+
periodSeconds: 30
65+
failureThreshold: 20
66+
resources:
67+
{{- toYaml .Values.resources | nindent 12 }}
68+
# command:
69+
# - "/usr/bin/bash"
70+
volumes:
71+
- name: model-volume
72+
hostPath:
73+
path: {{ .Values.hftgi.volume }}
74+
type: Directory
75+
{{- with .Values.nodeSelector }}
76+
nodeSelector:
77+
{{- toYaml . | nindent 8 }}
78+
{{- end }}
79+
{{- with .Values.affinity }}
80+
affinity:
81+
{{- toYaml . | nindent 8 }}
82+
{{- end }}
83+
{{- with .Values.tolerations }}
84+
tolerations:
85+
{{- toYaml . | nindent 8 }}
86+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v1
5+
kind: Service
6+
metadata:
7+
name: {{ include "tgi.fullname" . }}
8+
labels:
9+
{{- include "tgi.labels" . | nindent 4 }}
10+
spec:
11+
type: {{ .Values.service.type }}
12+
ports:
13+
- port: {{ .Values.service.port }}
14+
targetPort: {{ .Values.hftgi.port }}
15+
protocol: TCP
16+
name: tgi
17+
selector:
18+
{{- include "tgi.selectorLabels" . | nindent 4 }}
+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Default values for tgi.
5+
# This is a YAML-formatted file.
6+
# Declare variables to be passed into your templates.
7+
8+
replicaCount: 1
9+
10+
hftgi:
11+
modelId: bigscience/bloom-560m
12+
# modelId: /data/OpenCodeInterpreter-DS-6.7B
13+
port: 80
14+
volume: /mnt
15+
16+
image:
17+
repository: ghcr.io/huggingface/text-generation-inference
18+
pullPolicy: IfNotPresent
19+
# Overrides the image tag whose default is the chart appVersion.
20+
tag: "1.4"
21+
22+
imagePullSecrets: []
23+
nameOverride: ""
24+
fullnameOverride: ""
25+
26+
podAnnotations: {}
27+
28+
podSecurityContext: {}
29+
# fsGroup: 2000
30+
31+
securityContext: {}
32+
# capabilities:
33+
# drop:
34+
# - ALL
35+
# readOnlyRootFilesystem: true
36+
# runAsNonRoot: true
37+
# runAsUser: 1000
38+
39+
service:
40+
type: ClusterIP
41+
port: 80
42+
43+
resources: {}
44+
# We usually recommend not to specify default resources and to leave this as a conscious
45+
# choice for the user. This also increases chances charts run on environments with little
46+
# resources, such as Minikube. If you do want to specify resources, uncomment the following
47+
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
48+
# limits:
49+
# cpu: 100m
50+
# memory: 128Mi
51+
# requests:
52+
# cpu: 100m
53+
# memory: 128Mi
54+
55+
nodeSelector: {}
56+
57+
tolerations: []
58+
59+
affinity: {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v2
5+
name: llm-uservice
6+
description: The Helm chart for deploying llm as microservice
7+
type: application
8+
version: 0.1.0
9+
# The llm microservice server version
10+
appVersion: "1.0"
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# llm-uservice
2+
3+
Helm chart for deploying llm microservice.
4+
5+
llm-uservice depends on tgi, refer to tgi for more config details.
6+
7+
## Installing the Chart
8+
9+
To install the chart, run the following:
10+
11+
```console
12+
$ export HFTOKEN="insert-your-huggingface-token-here"
13+
$ export MODELDIR="/mnt"
14+
$ export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B"
15+
$ helm install codegen codegen --set hfToken=${HFTOKEN} --set tgi.hftgi.volume=${MODELDIR} --set tgi.hftgi.modelId=${MODELNAME}
16+
```
17+
18+
## Values
19+
20+
| Key | Type | Default | Description |
21+
| ----------------- | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
22+
| hfToken | string | `""` | Your own Hugging Face API token |
23+
| image.repository | string | `"intel/gen-ai-examples"` | |
24+
| image.tag | string | `"copilot"` | |
25+
| service.port | string | `"9000"` | |
26+
| tgi.hftgi.modelId | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory |
27+
| tgi.hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port |
28+
| tgi.hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory |

0 commit comments

Comments
 (0)