Skip to content

Commit 3849317

Browse files
yongfengdudaisy-ycguo
authored andcommitted
Add data prep component
Signed-off-by: Dolpher Du <Dolpher.Du@intel.com>
1 parent 27a0627 commit 3849317

File tree

9 files changed

+319
-0
lines changed

9 files changed

+319
-0
lines changed

helm-charts/chatqna/Chart.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,8 @@ dependencies:
3030
- name: retriever-usvc
3131
version: "0.1.0"
3232
repository: "file://../common/retriever-usvc"
33+
- name: data-prep
34+
version: "0.1.0"
35+
repository: "file://../common/data-prep"
3336
version: 0.1.0
3437
appVersion: "1.0.0"
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v2
5+
name: data-prep
6+
description: The Helm chart for deploying data prep as microservice
7+
type: application
8+
version: 0.1.0
9+
# The data prep microservice server version
10+
appVersion: "1.0.0"
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# data-prep
2+
3+
Helm chart for deploying data-prep microservice.
4+
5+
data-prep will use redis and tei service, please specify the endpoints.
6+
7+
## Installing the Chart
8+
9+
To install the chart, run the following:
10+
11+
```console
12+
export REDIS_URL="redis://redis-vector-db:6379"
13+
export TEI_EMBEDDING_ENDPOINT="http://tei"
14+
helm install dataprep data-prep --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
15+
```
16+
17+
## Values
18+
19+
| Key | Type | Default | Description |
20+
| ---------------------- | ------ | ------------------------------ | ----------- |
21+
| image.repository | string | `"opea/dataprep-redis:latest"` | |
22+
| service.port | string | `"6007"` | |
23+
| REDIS_URL | string | `""` | |
24+
| TEI_EMBEDDING_ENDPOINT | string | `""` | |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
1. Get the application URL by running these commands:
2+
{{- if contains "NodePort" .Values.service.type }}
3+
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "data-prep.fullname" . }})
4+
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
5+
echo http://$NODE_IP:$NODE_PORT
6+
{{- else if contains "LoadBalancer" .Values.service.type }}
7+
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
8+
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "data-prep.fullname" . }}'
9+
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "data-prep.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
10+
echo http://$SERVICE_IP:{{ .Values.service.port }}
11+
{{- else if contains "ClusterIP" .Values.service.type }}
12+
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "data-prep.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
13+
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
14+
echo "Visit http://127.0.0.1:8080 to use your application"
15+
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
16+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "data-prep.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "data-prep.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "data-prep.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "data-prep.labels" -}}
37+
helm.sh/chart: {{ include "data-prep.chart" . }}
38+
{{ include "data-prep.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "data-prep.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "data-prep.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Create the name of the service account to use
55+
*/}}
56+
{{- define "data-prep.serviceAccountName" -}}
57+
{{- if .Values.serviceAccount.create }}
58+
{{- default (include "data-prep.fullname" .) .Values.serviceAccount.name }}
59+
{{- else }}
60+
{{- default "default" .Values.serviceAccount.name }}
61+
{{- end }}
62+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: {{ include "data-prep.fullname" . }}
8+
labels:
9+
{{- include "data-prep.labels" . | nindent 4 }}
10+
spec:
11+
replicas: {{ .Values.replicaCount }}
12+
selector:
13+
matchLabels:
14+
{{- include "data-prep.selectorLabels" . | nindent 6 }}
15+
template:
16+
metadata:
17+
{{- with .Values.podAnnotations }}
18+
annotations:
19+
{{- toYaml . | nindent 8 }}
20+
{{- end }}
21+
labels:
22+
{{- include "data-prep.selectorLabels" . | nindent 8 }}
23+
spec:
24+
{{- with .Values.imagePullSecrets }}
25+
imagePullSecrets:
26+
{{- toYaml . | nindent 8 }}
27+
{{- end }}
28+
securityContext:
29+
{{- toYaml .Values.podSecurityContext | nindent 8 }}
30+
containers:
31+
- name: {{ .Release.Name }}
32+
env:
33+
- name: TEI_ENDPOINT
34+
{{- if .Values.TEI_EMBEDDING_ENDPOINT }}
35+
value: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
36+
{{- else }}
37+
value: "http://{{ .Release.Name }}-tei"
38+
{{- end }}
39+
- name: REDIS_URL
40+
{{- if .Values.REDIS_URL }}
41+
value: {{ .Values.REDIS_URL | quote}}
42+
{{- else }}
43+
value: "redis://{{ .Release.Name }}-redis-vector-db:6379"
44+
{{- end }}
45+
- name: INDEX_NAME
46+
value: "rag-redis"
47+
- name: HUGGINGFACEHUB_API_TOKEN
48+
value: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
49+
- name: HF_HOME
50+
value: "/tmp/.cache/huggingface"
51+
- name: http_proxy
52+
value: {{ .Values.global.http_proxy }}
53+
- name: https_proxy
54+
value: {{ .Values.global.https_proxy }}
55+
- name: no_proxy
56+
value: {{ .Values.global.no_proxy }}
57+
- name: LANGCHAIN_TRACING_V2
58+
value: {{ .Values.global.LANGCHAIN_TRACING_V2 | quote }}
59+
- name: LANGCHAIN_API_KEY
60+
value: {{ .Values.global.LANGCHAIN_API_KEY }}
61+
- name: LANGCHAIN_PROJECT
62+
value: "opea-dataprep-service"
63+
securityContext:
64+
{{- toYaml .Values.securityContext | nindent 12 }}
65+
image: "{{ .Values.image.repository }}"
66+
imagePullPolicy: {{ .Values.image.pullPolicy }}
67+
ports:
68+
- name: data-prep
69+
containerPort: 6007
70+
protocol: TCP
71+
volumeMounts:
72+
- mountPath: /tmp
73+
name: tmp
74+
# livenessProbe:
75+
# httpGet:
76+
# path: /
77+
# port: 7000
78+
# readinessProbe:
79+
# httpGet:
80+
# path: /
81+
# port: 7000
82+
resources:
83+
{{- toYaml .Values.resources | nindent 12 }}
84+
volumes:
85+
- name: tmp
86+
emptyDir: {}
87+
{{- with .Values.nodeSelector }}
88+
nodeSelector:
89+
{{- toYaml . | nindent 8 }}
90+
{{- end }}
91+
{{- with .Values.affinity }}
92+
affinity:
93+
{{- toYaml . | nindent 8 }}
94+
{{- end }}
95+
{{- with .Values.tolerations }}
96+
tolerations:
97+
{{- toYaml . | nindent 8 }}
98+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v1
5+
kind: Service
6+
metadata:
7+
name: {{ include "data-prep.fullname" . }}
8+
labels:
9+
{{- include "data-prep.labels" . | nindent 4 }}
10+
spec:
11+
type: {{ .Values.service.type }}
12+
ports:
13+
- port: {{ .Values.service.port }}
14+
targetPort: 6007
15+
protocol: TCP
16+
name: data-prep
17+
selector:
18+
{{- include "data-prep.selectorLabels" . | nindent 4 }}
+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Default values for data-prep.
5+
# This is a YAML-formatted file.
6+
# Declare variables to be passed into your templates.
7+
8+
replicaCount: 1
9+
10+
image:
11+
repository: opea/dataprep-redis:latest
12+
pullPolicy: IfNotPresent
13+
# Overrides the image tag whose default is the chart appVersion.
14+
# tag: "1.0"
15+
16+
imagePullSecrets: []
17+
nameOverride: ""
18+
fullnameOverride: ""
19+
20+
podAnnotations: {}
21+
22+
podSecurityContext: {}
23+
# fsGroup: 2000
24+
25+
securityContext:
26+
readOnlyRootFilesystem: false
27+
allowPrivilegeEscalation: false
28+
runAsNonRoot: true
29+
runAsUser: 1000
30+
capabilities:
31+
drop:
32+
- ALL
33+
seccompProfile:
34+
type: RuntimeDefault
35+
36+
service:
37+
type: ClusterIP
38+
# The default port for data prep service is 6007
39+
port: 6007
40+
41+
resources: {}
42+
# We usually recommend not to specify default resources and to leave this as a conscious
43+
# choice for the user. This also increases chances charts run on environments with little
44+
# resources, such as Minikube. If you do want to specify resources, uncomment the following
45+
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
46+
# limits:
47+
# cpu: 100m
48+
# memory: 128Mi
49+
# requests:
50+
# cpu: 100m
51+
# memory: 128Mi
52+
53+
nodeSelector: {}
54+
55+
tolerations: []
56+
57+
affinity: {}
58+
59+
global:
60+
http_proxy:
61+
https_proxy:
62+
no_proxy:
63+
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
64+
LANGCHAIN_TRACING_V2: false
65+
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"

0 commit comments

Comments
 (0)