From 8fe45cb7e3367b24f79154773aed9e48af613f95 Mon Sep 17 00:00:00 2001 From: Dolpher Du Date: Fri, 8 Nov 2024 15:47:22 +0000 Subject: [PATCH] Helm Chart for AgentQnA Signed-off-by: Dolpher Du --- helm-charts/agentqna/.helmignore | 23 ++++ helm-charts/agentqna/Chart.yaml | 43 +++++++ helm-charts/agentqna/README.md | 26 ++++ helm-charts/agentqna/gaudi-values.yaml | 37 ++++++ helm-charts/agentqna/templates/_helpers.tpl | 62 ++++++++++ helm-charts/agentqna/templates/crag.yaml | 94 +++++++++++++++ .../agentqna/templates/docretriever.yaml | 112 ++++++++++++++++++ .../agentqna/templates/tests/test-pod.yaml | 30 +++++ helm-charts/agentqna/values.yaml | 110 +++++++++++++++++ helm-charts/common/agent/.helmignore | 23 ++++ helm-charts/common/agent/Chart.yaml | 19 +++ helm-charts/common/agent/README.md | 48 ++++++++ helm-charts/common/agent/ci-gaudi-values.yaml | 1 + helm-charts/common/agent/gaudi-values.yaml | 38 ++++++ .../common/agent/templates/_helpers.tpl | 62 ++++++++++ .../common/agent/templates/configmap.yaml | 66 +++++++++++ .../common/agent/templates/deployment.yaml | 98 +++++++++++++++ .../common/agent/templates/service.yaml | 18 +++ .../agent/templates/servicemonitor.yaml | 18 +++ .../agent/templates/tests/test-pod.yaml | 30 +++++ helm-charts/common/agent/values.yaml | 112 ++++++++++++++++++ .../common/tgi/templates/configmap.yaml | 3 + 22 files changed, 1073 insertions(+) create mode 100644 helm-charts/agentqna/.helmignore create mode 100644 helm-charts/agentqna/Chart.yaml create mode 100644 helm-charts/agentqna/README.md create mode 100644 helm-charts/agentqna/gaudi-values.yaml create mode 100644 helm-charts/agentqna/templates/_helpers.tpl create mode 100644 helm-charts/agentqna/templates/crag.yaml create mode 100644 helm-charts/agentqna/templates/docretriever.yaml create mode 100644 helm-charts/agentqna/templates/tests/test-pod.yaml create mode 100644 helm-charts/agentqna/values.yaml create mode 100644 helm-charts/common/agent/.helmignore create mode 100644 helm-charts/common/agent/Chart.yaml create mode 100644 helm-charts/common/agent/README.md create mode 120000 helm-charts/common/agent/ci-gaudi-values.yaml create mode 100644 helm-charts/common/agent/gaudi-values.yaml create mode 100644 helm-charts/common/agent/templates/_helpers.tpl create mode 100644 helm-charts/common/agent/templates/configmap.yaml create mode 100644 helm-charts/common/agent/templates/deployment.yaml create mode 100644 helm-charts/common/agent/templates/service.yaml create mode 100644 helm-charts/common/agent/templates/servicemonitor.yaml create mode 100644 helm-charts/common/agent/templates/tests/test-pod.yaml create mode 100644 helm-charts/common/agent/values.yaml diff --git a/helm-charts/agentqna/.helmignore b/helm-charts/agentqna/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/agentqna/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/agentqna/Chart.yaml b/helm-charts/agentqna/Chart.yaml new file mode 100644 index 000000000..cb640fb2c --- /dev/null +++ b/helm-charts/agentqna/Chart.yaml @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: agentqna +description: The Helm chart to deploy AgentQnA +type: application +dependencies: + - name: agent + version: 1.0.0 + alias: worker + repository: "file://../common/agent" + - name: agent + version: 1.0.0 + alias: supervisor + repository: "file://../common/agent" + - name: tgi + version: 1.0.0 + repository: "file://../common/tgi" + condition: tgi.enabled + - name: tei + version: 1.0.0 + repository: "file://../common/tei" + - name: embedding-usvc + version: 1.0.0 + repository: "file://../common/embedding-usvc" + - name: teirerank + version: 1.0.0 + repository: "file://../common/teirerank" + - name: reranking-usvc + version: 1.0.0 + repository: "file://../common/reranking-usvc" + - name: redis-vector-db + version: 1.0.0 + repository: "file://../common/redis-vector-db" + - name: retriever-usvc + version: 1.0.0 + repository: "file://../common/retriever-usvc" + - name: data-prep + version: 1.0.0 + repository: "file://../common/data-prep" +version: 1.0.0 +appVersion: "v1.0" diff --git a/helm-charts/agentqna/README.md b/helm-charts/agentqna/README.md new file mode 100644 index 000000000..bb8006914 --- /dev/null +++ b/helm-charts/agentqna/README.md @@ -0,0 +1,26 @@ +# AgentQnA + +Helm chart for deploying AgentQnA service. + +## Deploy + +helm repo add opea https://opea-project.github.io/GenAIInfra + +helm install agentqna opea/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} --set tgi.enabled=True + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +### Verify the workload through curl command + +Run the command `kubectl port-forward svc/agentqna-supervisor 9090:9090` to expose the service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9090/v1/chat/completions \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"query": "Most recent album by Michael Jackson"}' +``` diff --git a/helm-charts/agentqna/gaudi-values.yaml b/helm-charts/agentqna/gaudi-values.yaml new file mode 100644 index 000000000..fa7277eda --- /dev/null +++ b/helm-charts/agentqna/gaudi-values.yaml @@ -0,0 +1,37 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.5" + resources: + limits: + habana.ai/gaudi: 4 + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + extraCmdArgs: ["--sharded","true","--num-shard","4"] + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/agentqna/templates/_helpers.tpl b/helm-charts/agentqna/templates/_helpers.tpl new file mode 100644 index 000000000..aee028e15 --- /dev/null +++ b/helm-charts/agentqna/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "agentqna.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "agentqna.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "agentqna.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "agentqna.labels" -}} +helm.sh/chart: {{ include "agentqna.chart" . }} +{{ include "agentqna.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "agentqna.selectorLabels" -}} +app.kubernetes.io/name: {{ include "agentqna.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "agentqna.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "agentqna.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/agentqna/templates/crag.yaml b/helm-charts/agentqna/templates/crag.yaml new file mode 100644 index 000000000..d806caced --- /dev/null +++ b/helm-charts/agentqna/templates/crag.yaml @@ -0,0 +1,94 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-crag + labels: + {{- include "agentqna.labels" . | nindent 4 }} + app: {{ .Release.Name }}-crag +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "agentqna.selectorLabels" . | nindent 6 }} + app: {{ .Release.Name }}-crag + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "agentqna.selectorLabels" . | nindent 8 }} + app: {{ .Release.Name }}-crag + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + env: + {{- if .Values.LOGFLAG }} + - name: LOGFLAG + value: {{ .Values.LOGFLAG | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.crag.image.repository }}:{{ .Values.crag.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.crag.image.pullPolicy }} + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: crag + containerPort: 8000 + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "agentqna.selectorLabels" . | nindent 14 }} + app: {{ .Release.Name }}-crag + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-crag + labels: + {{- include "agentqna.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8000 + protocol: TCP + name: crag + selector: + {{- include "agentqna.selectorLabels" . | nindent 4 }} + app: {{ .Release.Name }}-crag diff --git a/helm-charts/agentqna/templates/docretriever.yaml b/helm-charts/agentqna/templates/docretriever.yaml new file mode 100644 index 000000000..fdf74c691 --- /dev/null +++ b/helm-charts/agentqna/templates/docretriever.yaml @@ -0,0 +1,112 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-docretriever + labels: + {{- include "agentqna.labels" . | nindent 4 }} + app: {{ .Release.Name }}-docretriever +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "agentqna.selectorLabels" . | nindent 6 }} + app: {{ .Release.Name }}-docretriever + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "agentqna.selectorLabels" . | nindent 8 }} + app: {{ .Release.Name }}-docretriever + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + env: + #- name: MEGA_SERVICE_PORT + # value: "8889" + - name: RERANK_SERVICE_HOST_IP + value: {{ .Release.Name }}-reranking-usvc + - name: EMBEDDING_SERVICE_HOST_IP + value: {{ .Release.Name }}-embedding-usvc + - name: RETRIEVER_SERVICE_HOST_IP + value: {{ .Release.Name }}-retriever-usvc + - name: RETRIEVER_SERVICE_PORT + value: "7000" + - name: RERANK_SERVER_HOST_IP + value: {{ .Release.Name }}-teirerank + - name: RERANK_SERVER_PORT + value: "80" + - name: EMBEDDING_SERVER_HOST_IP + value: {{ .Release.Name }}-tei + - name: EMBEDDING_SERVER_PORT + value: "80" + {{- if .Values.LOGFLAG }} + - name: LOGFLAG + value: {{ .Values.LOGFLAG | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.docretriever.image.repository }}:{{ .Values.docretriever.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.docretriever.image.pullPolicy }} + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: docretriever + containerPort: 8889 + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "agentqna.selectorLabels" . | nindent 14 }} + app: {{ .Release.Name }}-docretriever + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-docretriever + labels: + {{- include "agentqna.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 8889 + targetPort: 8889 + protocol: TCP + name: docretriever + selector: + {{- include "agentqna.selectorLabels" . | nindent 4 }} + app: {{ .Release.Name }}-docretriever diff --git a/helm-charts/agentqna/templates/tests/test-pod.yaml b/helm-charts/agentqna/templates/tests/test-pod.yaml new file mode 100644 index 000000000..48f6c09aa --- /dev/null +++ b/helm-charts/agentqna/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "agentqna.fullname" . }}-testpod" + labels: + {{- include "agentqna.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "agentqna.fullname" (index .Subcharts "supervisor") }}:{{ .Values.supervisor.service.port }}/v1/chat/completions -sS --fail-with-body \ + -X POST \ + -d '{"query": "Tell me three songs of Michael Jackson."}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/agentqna/values.yaml b/helm-charts/agentqna/values.yaml new file mode 100644 index 000000000..a9f8b9d83 --- /dev/null +++ b/helm-charts/agentqna/values.yaml @@ -0,0 +1,110 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for agentqna. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# This is just to avoid Helm errors when HPA is NOT used +# (use hpa-values.yaml files to actually enable HPA). +horizontalPodAutoscaler: + enabled: false + +docretriever: + image: + repository: opea/doc-index-retriever + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +worker: + toolPath: "/mnt/tools" + service: + port: 9095 + image: + repository: opea/agent-langchain + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + strategy: rag_agent_llama + recursion_limit: "12" + llm_engine: tgi + llm_endpoint_url: "" + model: "meta-llama/Meta-Llama-3.1-70B-Instruct" + temperature: "0.01" + max_new_tokens: "4096" + streaming: "false" + tools: "/home/user/tools/worker_agent_tools.yaml" + require_human_feedback: "false" + RETRIEVAL_TOOL_URL: "" + +supervisor: + toolPath: "/mnt/tools" + service: + port: 9090 + image: + repository: opea/agent-langchain + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + strategy: react_llama + recursion_limit: 10 + llm_engine: tgi + llm_endpoint_url: "" + model: "meta-llama/Meta-Llama-3.1-70B-Instruct" + temperature: "0.01" + max_new_tokens: "4096" + streaming: "false" + tools: /home/user/tools/supervisor_agent_tools.yaml + require_human_feedback: false + CRAG_SERVER: "" + WORKER_AGENT_URL: "" + +crag: + image: + repository: aicrowd/kdd-cup-24-crag-mock-api + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "v0" + +# Override values in specific subcharts +tgi: + enabled: false + LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct" + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + # set modelUseHostPath or modelUsePVC to use model cache. + modelUseHostPath: "" + # modelUseHostPath: /mnt/opea-models + # modelUsePVC: model-volume + + # Install Prometheus serviceMonitors for service components + monitoring: false + + # Prometheus Helm install release name needed for serviceMonitors + prometheusRelease: prometheus-stack diff --git a/helm-charts/common/agent/.helmignore b/helm-charts/common/agent/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/common/agent/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/common/agent/Chart.yaml b/helm-charts/common/agent/Chart.yaml new file mode 100644 index 000000000..8596a32cd --- /dev/null +++ b/helm-charts/common/agent/Chart.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: agent +description: The Helm chart for deploying agent microservice +type: application +version: 1.0.0 +# The llm microservice server version +appVersion: "v1.0" +dependencies: + - name: tgi + version: 1.0.0 + repository: file://../tgi + condition: tgi.enabled + - name: vllm + version: 1.0.0 + repository: file://../vllm + condition: vllm.enabled diff --git a/helm-charts/common/agent/README.md b/helm-charts/common/agent/README.md new file mode 100644 index 000000000..7ff860bef --- /dev/null +++ b/helm-charts/common/agent/README.md @@ -0,0 +1,48 @@ +# agent + +Helm chart for deploying Agent microservice. + +agent depends on LLM service, you should set llm_endpoint_url as LLM endpoint. + +## Deploy + +helm repo add opea https://opea-project.github.io/GenAIInfra + +### Use external LLM endpoint + +helm install agent opea/agent --set llm_endpoint_url=${YOUR_LLM_ENDPOINT} + +### Deploy with tgi + +helm install agent opea/agent --set tgi.enabled=True + +### Deploy with vllm + +helm install agent opea/agent --set vllm.enabled=True + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/agent 9090:9090` to expose the agent service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9090/v1/chat/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{"query":"What is OPEA?"}' +``` + +## Options + +For global options, see Global Options. + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------ | ------------------------------- | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/agent-langchain"` | | +| service.port | string | `"9090"` | | +| llm_endpoint_url | string | `""` | LLM endpoint | +| global.monitoring | bop; | false | Service usage metrics | diff --git a/helm-charts/common/agent/ci-gaudi-values.yaml b/helm-charts/common/agent/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/common/agent/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/common/agent/gaudi-values.yaml b/helm-charts/common/agent/gaudi-values.yaml new file mode 100644 index 000000000..9ac961152 --- /dev/null +++ b/helm-charts/common/agent/gaudi-values.yaml @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: true + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.5" + resources: + limits: + habana.ai/gaudi: 4 + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + extraCmdArgs: ["--sharded","true","--num-shard","4"] + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/common/agent/templates/_helpers.tpl b/helm-charts/common/agent/templates/_helpers.tpl new file mode 100644 index 000000000..1227dfd81 --- /dev/null +++ b/helm-charts/common/agent/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "agent.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "agent.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "agent.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "agent.labels" -}} +helm.sh/chart: {{ include "agent.chart" . }} +{{ include "agent.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "agent.selectorLabels" -}} +app.kubernetes.io/name: {{ include "agent.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "agent.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "agent.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/common/agent/templates/configmap.yaml b/helm-charts/common/agent/templates/configmap.yaml new file mode 100644 index 000000000..62970d7d2 --- /dev/null +++ b/helm-charts/common/agent/templates/configmap.yaml @@ -0,0 +1,66 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "agent.fullname" . }}-config + labels: + {{- include "agent.labels" . | nindent 4 }} +data: + {{- if .Values.tools }} + tools: {{ .Values.tools | quote }} + {{- end }} + {{- if .Values.llm_endpoint_url }} + llm_endpoint_url: {{ .Values.llm_endpoint_url | quote }} + {{- else }} + llm_endpoint_url: "http://{{ .Release.Name }}-tgi" + {{- end }} + # {{- if .Values.port }} + # port: {{ .Values.port | quote }} + # {{- end }} + {{- if .Values.model }} + model: {{ .Values.model | quote }} + {{- end }} + {{- if .Values.streaming }} + streaming: {{ .Values.streaming | quote }} + {{- end }} + {{- if .Values.temperature }} + temperature: {{ .Values.temperature | quote }} + {{- end }} + {{- if .Values.RETRIEVAL_TOOL_URL }} + RETRIEVAL_TOOL_URL: {{ .Values.RETRIEVAL_TOOL_URL | quote }} + {{- else }} + RETRIEVAL_TOOL_URL: "http://{{ .Release.Name }}-docretriever:8889/v1/retrievaltool" + {{- end }} + {{- if .Values.CRAG_SERVER }} + CRAG_SERVER: {{ .Values.CRAG_SERVER | quote }} + {{- else }} + CRAG_SERVER: "http://{{ .Release.Name }}-crag:8080" + {{- end }} + {{- if .Values.WORKER_AGENT_URL }} + WORKER_AGENT_URL: {{ .Values.WORKER_AGENT_URL | quote }} + {{- else }} + WORKER_AGENT_URL: "http://{{ .Release.Name }}-worker:9095/v1/chat/completions" + {{- end }} + require_human_feedback: {{ .Values.require_human_feedback | quote }} + recursion_limit: {{ .Values.recursion_limit | quote }} + llm_engine: {{ .Values.llm_engine | quote }} + strategy: {{ .Values.strategy | quote }} + max_new_tokens: {{ .Values.max_new_tokens | quote }} + {{- if .Values.OPENAI_API_KEY }} + OPENAI_API_KEY: {{ .Values.OPENAI_API_KEY | quote }} + {{- end }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote }} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote }} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.TGI_LLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/common/agent/templates/deployment.yaml b/helm-charts/common/agent/templates/deployment.yaml new file mode 100644 index 000000000..864a75267 --- /dev/null +++ b/helm-charts/common/agent/templates/deployment.yaml @@ -0,0 +1,98 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "agent.fullname" . }} + labels: + {{- include "agent.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "agent.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "agent.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "agent.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: agent + containerPort: 9090 + protocol: TCP + volumeMounts: + {{- if .Values.toolPath }} + - mountPath: /home/user/tools + name: tool + {{- end }} + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + {{- if .Values.toolPath }} + - name: tool + hostPath: + path: {{ .Values.toolPath }} + type: Directory + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "agent.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/common/agent/templates/service.yaml b/helm-charts/common/agent/templates/service.yaml new file mode 100644 index 000000000..00d34de8d --- /dev/null +++ b/helm-charts/common/agent/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "agent.fullname" . }} + labels: + {{- include "agent.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9090 + protocol: TCP + name: agent + selector: + {{- include "agent.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/common/agent/templates/servicemonitor.yaml b/helm-charts/common/agent/templates/servicemonitor.yaml new file mode 100644 index 000000000..0eaae8763 --- /dev/null +++ b/helm-charts/common/agent/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "agent.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "agent.selectorLabels" . | nindent 6 }} + endpoints: + - port: agent + interval: 5s +{{- end }} diff --git a/helm-charts/common/agent/templates/tests/test-pod.yaml b/helm-charts/common/agent/templates/tests/test-pod.yaml new file mode 100644 index 000000000..61515c7a3 --- /dev/null +++ b/helm-charts/common/agent/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "agent.fullname" . }}-testpod" + labels: + {{- include "agent.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "agent.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{"query":"What is OPEA?"}' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/common/agent/values.yaml b/helm-charts/common/agent/values.yaml new file mode 100644 index 000000000..4037f9df3 --- /dev/null +++ b/helm-charts/common/agent/values.yaml @@ -0,0 +1,112 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for agent. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: false + LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct" + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + +vllm: + enabled: false + LLM_MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3" + extraCmdArgs: ["/bin/bash", "-c", "python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model mistralai/Mistral-7B-Instruct-v0.3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 4096 --max-seq_len-to-capture 8192 --enable-auto-tool-choice --tool-call-parser mistral"] + +replicaCount: 1 +llm_endpoint_url: "" +model: "meta-llama/Meta-Llama-3.1-70B-Instruct" +max_new_tokens: "4096" +llm_engine: "tgi" +strategy: "react_langchain" +recursion_limit: "15" +require_human_feedback: "false" + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "True" + +image: + repository: opea/agent-langchain + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for agent service is 9090 + port: 9090 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: agent + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: agent + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: agent + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml index d7e35b31d..254cf5b0e 100644 --- a/helm-charts/common/tgi/templates/configmap.yaml +++ b/helm-charts/common/tgi/templates/configmap.yaml @@ -39,6 +39,9 @@ data: {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }} {{- end }} + {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }} + PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }} + {{- end }} {{- if .Values.ENABLE_HPU_GRAPH }} ENABLE_HPU_GRAPH: {{ .Values.ENABLE_HPU_GRAPH | quote }} {{- end }}