Skip to content

Commit 73b5b65

Browse files
committed
helm: Enhance dataprep to support local embedding and getfile
- By setting TEI_EMBEDDING_ENDPOINT to empty and EMBED_MODEL to non-empty, data-prep can support local embedding. - Support getfile and delete URL Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
1 parent 847878a commit 73b5b65

File tree

8 files changed

+111
-25
lines changed

8 files changed

+111
-25
lines changed

helm-charts/common/data-prep/README.md

+32-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,43 @@ Helm chart for deploying data-prep microservice.
44

55
data-prep will use redis and tei service, please specify the endpoints.
66

7-
## Installing the Chart
7+
## (Option1): Installing the chart separately:
88

9-
To install the chart, run the following:
9+
First, you need to install the tei and redis-vector-db chart, please refer to the [tei](../tei) and [redis-vector-db](../redis-vector-db) for more information.
10+
11+
After you've deployted the tei and redis-vector-db chart successfully, please run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `http://tei`, `redis://redis-vector-db:6379`.
12+
13+
To install data-prep chart, run the following:
1014

1115
```console
16+
cd GenAIInfra/helm-charts/common/data-prep
1217
export REDIS_URL="redis://redis-vector-db:6379"
1318
export TEI_EMBEDDING_ENDPOINT="http://tei"
14-
helm install dataprep data-prep --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
19+
helm dependency update
20+
helm install data-prep . --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
21+
```
22+
23+
## (Option2): Installing the chart with dependencies automatically:
24+
25+
```console
26+
cd GenAIInfra/helm-charts/common/data-prep
27+
helm dependency update
28+
helm install data-prep . --set autodependency.enabled=true
29+
```
30+
31+
## Verify
32+
33+
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
34+
35+
Then run the command `kubectl port-forward svc/data-prep 6007:6007` to expose the data-prep service for access.
36+
37+
Open another terminal and run the following command to verify the service if working:
38+
39+
```console
40+
curl http://localhost:6007/v1/dataprep \
41+
-X POST \
42+
-H "Content-Type: multipart/form-data" \
43+
-F "files=@./README.md"
1544
```
1645

1746
## Values

helm-charts/common/data-prep/templates/NOTES.txt

-16
This file was deleted.

helm-charts/common/data-prep/templates/configmap.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@ metadata:
1010
data:
1111
{{- if .Values.TEI_EMBEDDING_ENDPOINT }}
1212
TEI_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
13-
{{- else }}
13+
{{- else if not .Values.EMBED_MODEL }}
1414
TEI_ENDPOINT: "http://{{ .Release.Name }}-tei"
1515
{{- end }}
16+
EMBED_MODEL: {{ .Values.EMBED_MODEL | quote }}
1617
{{- if .Values.REDIS_URL }}
1718
REDIS_URL: {{ .Values.REDIS_URL | quote}}
1819
{{- else }}
1920
REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379"
2021
{{- end }}
21-
INDEX_NAME: "rag-redis"
22+
INDEX_NAME: {{ .Values.INDEX_NAME | quote }}
2223
HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
2324
HF_HOME: "/tmp/.cache/huggingface"
2425
http_proxy: {{ .Values.global.http_proxy | quote }}

helm-charts/common/data-prep/templates/deployment.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ spec:
4545
- name: data-prep
4646
containerPort: 6007
4747
protocol: TCP
48+
# The following need to be modified after GenAIComps bug #282 is resolved.
49+
# https://github.com/opea-project/GenAIComps/issues/282
50+
- containerPort: 6008
51+
protocol: TCP
52+
- containerPort: 6009
53+
protocol: TCP
4854
volumeMounts:
4955
- mountPath: /tmp
5056
name: tmp

helm-charts/common/data-prep/templates/service.yaml

+5-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ metadata:
1010
spec:
1111
type: {{ .Values.service.type }}
1212
ports:
13-
- port: {{ .Values.service.port }}
14-
targetPort: 6007
13+
{{- range .Values.service.ports }}
14+
- port: {{ .port }}
15+
targetPort: {{ .targetPort }}
1516
protocol: TCP
16-
name: data-prep
17+
name: {{ .name }}
18+
{{- end }}
1719
selector:
1820
{{- include "data-prep.selectorLabels" . | nindent 4 }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v1
5+
kind: Pod
6+
metadata:
7+
name: {{ include "data-prep.fullname" . }}-testpod
8+
labels:
9+
{{- include "data-prep.labels" . | nindent 4 }}
10+
annotations:
11+
"helm.sh/hook": test
12+
#"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
13+
spec:
14+
containers:
15+
- name: curl
16+
#image: alpine/curl
17+
image: python:3.10.14
18+
command: ['bash', '-c']
19+
args:
20+
- |
21+
echo "test file" > /tmp/file1.txt;
22+
{{- with index .Values.service.ports 0 }}
23+
export port={{.port}};
24+
{{- end }}
25+
max_retry=20;
26+
for ((i=1; i<=max_retry; i++)); do
27+
curl http://{{ include "data-prep.fullname" . }}:$port/v1/dataprep -sS --fail-with-body \
28+
-X POST \
29+
-H "Content-Type: multipart/form-data" \
30+
-F "files=@/tmp/file1.txt" && break;
31+
sleep 10;
32+
done;
33+
if [ $i -gt $max_retry ]; then echo "dataprep test failed."; exit 1; fi
34+
restartPolicy: Never

helm-charts/common/data-prep/values.yaml

+16-1
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,19 @@ securityContext:
3838

3939
service:
4040
type: ClusterIP
41+
# The following need to be modified after GenAIComps bug #282 is resolved.
42+
# https://github.com/opea-project/GenAIComps/issues/282
43+
ports:
4144
# The default port for data prep service is 6007
42-
port: 6007
45+
- port: 6007
46+
targetPort: 6007
47+
name: data-prep
48+
- port: 6008
49+
targetPort: 6008
50+
name: data-prep-get
51+
- port: 6009
52+
targetPort: 6009
53+
name: data-prep-delete
4354

4455
resources: {}
4556
# We usually recommend not to specify default resources and to leave this as a conscious
@@ -83,8 +94,12 @@ affinity: {}
8394
# text embedding inference service URL, e.g. http://<service-name>:<port>
8495
TEI_EMBEDDING_ENDPOINT: ""
8596

97+
# local embedder's model
98+
EMBED_MODEL: ""
99+
86100
# redis DB service URL, e.g. redis://<service-name>:<port>
87101
REDIS_URL: ""
102+
INDEX_NAME: "rag-redis"
88103

89104
global:
90105
http_proxy: ""

manifests/common/data-prep.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ metadata:
1515
app.kubernetes.io/managed-by: Helm
1616
data:
1717
TEI_ENDPOINT: "http://data-prep-tei"
18+
EMBED_MODEL: ""
1819
REDIS_URL: "redis://data-prep-redis-vector-db:6379"
1920
INDEX_NAME: "rag-redis"
2021
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
@@ -47,6 +48,14 @@ spec:
4748
targetPort: 6007
4849
protocol: TCP
4950
name: data-prep
51+
- port: 6008
52+
targetPort: 6008
53+
protocol: TCP
54+
name: data-prep-get
55+
- port: 6009
56+
targetPort: 6009
57+
protocol: TCP
58+
name: data-prep-delete
5059
selector:
5160
app.kubernetes.io/name: data-prep
5261
app.kubernetes.io/instance: data-prep
@@ -103,6 +112,12 @@ spec:
103112
- name: data-prep
104113
containerPort: 6007
105114
protocol: TCP
115+
# The following need to be modified after GenAIComps bug #282 is resolved.
116+
# https://github.com/opea-project/GenAIComps/issues/282
117+
- containerPort: 6008
118+
protocol: TCP
119+
- containerPort: 6009
120+
protocol: TCP
106121
volumeMounts:
107122
- mountPath: /tmp
108123
name: tmp

0 commit comments

Comments
 (0)