Skip to content

Commit 868103b

Browse files
authored
Add Nvidia GPU support for ChatQnA (#225)
1. Add Helm-charts support 2. Add manifests support Signed-off-by: PeterYang12 <yuhan.yang@intel.com>
1 parent 70205e5 commit 868103b

File tree

5 files changed

+233
-3
lines changed

5 files changed

+233
-3
lines changed

helm-charts/chatqna/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ export MODELNAME="Intel/neural-chat-7b-v3-3"
2626
helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
2727
# To use Gaudi device
2828
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
29+
# To use Nvidia GPU
30+
#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
2931
```
3032

3133
### IMPORTANT NOTE

helm-charts/chatqna/nv-values.yaml

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Default values for chatqna.
5+
# This is a YAML-formatted file.
6+
# Declare variables to be passed into your templates.
7+
8+
replicaCount: 1
9+
10+
image:
11+
repository: opea/chatqna:latest
12+
pullPolicy: IfNotPresent
13+
# Overrides the image tag whose default is the chart appVersion.
14+
# tag: "1.0"
15+
16+
port: 8888
17+
service:
18+
type: ClusterIP
19+
port: 8888
20+
21+
securityContext:
22+
readOnlyRootFilesystem: true
23+
allowPrivilegeEscalation: false
24+
runAsNonRoot: true
25+
runAsUser: 1000
26+
capabilities:
27+
drop:
28+
- ALL
29+
seccompProfile:
30+
type: RuntimeDefault
31+
32+
# To override values in subchart tgi
33+
tgi:
34+
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
35+
# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
36+
image:
37+
repository: ghcr.io/huggingface/text-generation-inference
38+
tag: "2.0"
39+
resources:
40+
limits:
41+
nvidia.com/gpu: 1
42+
43+
global:
44+
http_proxy:
45+
https_proxy:
46+
no_proxy:
47+
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
48+
LANGCHAIN_TRACING_V2: false
49+
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
50+
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
51+
# comment out modeluseHostPath if you want to download the model from huggingface
52+
modelUseHostPath: /mnt/opea-models

helm-charts/common/tgi/nv-values.yaml

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Default values for tgi.
5+
# This is a YAML-formatted file.
6+
# Declare variables to be passed into your templates.
7+
8+
replicaCount: 1
9+
10+
port: 2080
11+
12+
image:
13+
repository: ghcr.io/huggingface/text-generation-inference
14+
pullPolicy: IfNotPresent
15+
# Overrides the image tag whose default is the chart appVersion.
16+
tag: "2.0"
17+
18+
imagePullSecrets: []
19+
nameOverride: ""
20+
fullnameOverride: ""
21+
22+
podAnnotations: {}
23+
24+
podSecurityContext: {}
25+
# fsGroup: 2000
26+
27+
securityContext:
28+
readOnlyRootFilesystem: true
29+
allowPrivilegeEscalation: false
30+
runAsNonRoot: true
31+
runAsUser: 1000
32+
capabilities:
33+
drop:
34+
- ALL
35+
seccompProfile:
36+
type: RuntimeDefault
37+
38+
service:
39+
type: ClusterIP
40+
41+
resources:
42+
limits:
43+
nvidia.com/gpu: 1
44+
45+
nodeSelector: {}
46+
47+
tolerations: []
48+
49+
affinity: {}
50+
51+
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
52+
53+
global:
54+
http_proxy: ""
55+
https_proxy: ""
56+
no_proxy: ""
57+
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
58+
# set modelUseHostPath to host directory if you want to use hostPath volume for model storage
59+
# comment out modeluseHostPath if you want to download the model from huggingface
60+
modelUseHostPath: /mnt/opea-models

manifests/common/tgi_nv.yaml

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
---
2+
# Source: tgi/templates/configmap.yaml
3+
# Copyright (C) 2024 Intel Corporation
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
apiVersion: v1
7+
kind: ConfigMap
8+
metadata:
9+
name: tgi-config
10+
labels:
11+
helm.sh/chart: tgi-0.8.0
12+
app.kubernetes.io/name: tgi
13+
app.kubernetes.io/instance: tgi
14+
app.kubernetes.io/version: "2.1.0"
15+
app.kubernetes.io/managed-by: Helm
16+
data:
17+
MODEL_ID: "Intel/neural-chat-7b-v3-3"
18+
PORT: "2080"
19+
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
20+
HF_TOKEN: "insert-your-huggingface-token-here"
21+
MAX_INPUT_TOKENS: "1024"
22+
MAX_TOTAL_TOKENS: "4096"
23+
http_proxy: ""
24+
https_proxy: ""
25+
no_proxy: ""
26+
HABANA_LOGS: "/tmp/habana_logs"
27+
NUMBA_CACHE_DIR: "/tmp"
28+
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
29+
HF_HOME: "/tmp/.cache/huggingface"
30+
---
31+
# Source: tgi/templates/service.yaml
32+
# Copyright (C) 2024 Intel Corporation
33+
# SPDX-License-Identifier: Apache-2.0
34+
35+
apiVersion: v1
36+
kind: Service
37+
metadata:
38+
name: tgi
39+
labels:
40+
helm.sh/chart: tgi-0.8.0
41+
app.kubernetes.io/name: tgi
42+
app.kubernetes.io/instance: tgi
43+
app.kubernetes.io/version: "2.1.0"
44+
app.kubernetes.io/managed-by: Helm
45+
spec:
46+
type: ClusterIP
47+
ports:
48+
- port: 80
49+
targetPort: 2080
50+
protocol: TCP
51+
name: tgi
52+
selector:
53+
app.kubernetes.io/name: tgi
54+
app.kubernetes.io/instance: tgi
55+
---
56+
# Source: tgi/templates/deployment.yaml
57+
# Copyright (C) 2024 Intel Corporation
58+
# SPDX-License-Identifier: Apache-2.0
59+
60+
apiVersion: apps/v1
61+
kind: Deployment
62+
metadata:
63+
name: tgi
64+
labels:
65+
helm.sh/chart: tgi-0.8.0
66+
app.kubernetes.io/name: tgi
67+
app.kubernetes.io/instance: tgi
68+
app.kubernetes.io/version: "2.1.0"
69+
app.kubernetes.io/managed-by: Helm
70+
spec:
71+
replicas: 1
72+
selector:
73+
matchLabels:
74+
app.kubernetes.io/name: tgi
75+
app.kubernetes.io/instance: tgi
76+
template:
77+
metadata:
78+
labels:
79+
app.kubernetes.io/name: tgi
80+
app.kubernetes.io/instance: tgi
81+
spec:
82+
securityContext:
83+
{}
84+
containers:
85+
- name: tgi
86+
envFrom:
87+
- configMapRef:
88+
name: tgi-config
89+
- configMapRef:
90+
name: extra-env-config
91+
optional: true
92+
securityContext:
93+
{}
94+
image: "ghcr.io/huggingface/text-generation-inference:2.0"
95+
imagePullPolicy: IfNotPresent
96+
volumeMounts:
97+
- mountPath: /data
98+
name: model-volume
99+
- mountPath: /tmp
100+
name: tmp
101+
ports:
102+
- name: http
103+
containerPort: 2080
104+
protocol: TCP
105+
resources:
106+
limits:
107+
nvidia.com/gpu: 1
108+
volumes:
109+
- name: model-volume
110+
hostPath:
111+
path: /mnt/opea-models
112+
type: Directory
113+
- name: tmp
114+
emptyDir: {}

manifests/update_manifests.sh

+5-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ function generate_yaml {
1414
outputdir=$2
1515

1616
helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/$chart.yaml
17-
if [ -f ../helm-charts/common/$chart/gaudi-values.yaml ]; then
18-
helm template $chart ../helm-charts/common/$chart --skip-tests --values ../helm-charts/common/$chart/gaudi-values.yaml --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/${chart}_gaudi.yaml
19-
fi
17+
18+
for f in `ls ../helm-charts/common/$chart/*-values.yaml 2>/dev/null `; do
19+
ext=$(basename $f | cut -d'-' -f1)
20+
helm template $chart ../helm-charts/common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,noProbe=true > ${outputdir}/${chart}_${ext}.yaml
21+
done
2022
}
2123

2224
mkdir -p $OUTPUTDIR

0 commit comments

Comments
 (0)