opea-project · yongfengdu · Jan 17, 2025 · Jan 16, 2025 · Jan 17, 2025 · Jan 17, 2025
@@ -8,7 +8,11 @@ type: application
 dependencies:
   - name: agent
     version: 0-latest
-    alias: worker
+    alias: ragagent
+    repository: "file://../common/agent"
+  - name: agent
+    version: 0-latest
+    alias: sqlagent
     repository: "file://../common/agent"
   - name: agent
     version: 0-latest
@@ -18,6 +22,10 @@ dependencies:
     version: 0-latest
     repository: "file://../common/tgi"
     condition: tgi.enabled
+  - name: vllm
+    version: 0-latest
+    repository: "file://../common/vllm"
+    condition: vllm.enabled
   - name: tei
     version: 0-latest
     repository: "file://../common/tei"

@@ -6,23 +6,6 @@
 
 tgi:
   enabled: true
-  accelDevice: "gaudi"
-  image:
-    repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.6"
-  resources:
-    limits:
-      habana.ai/gaudi: 4
-  MAX_INPUT_LENGTH: "4096"
-  MAX_TOTAL_TOKENS: "8192"
-  CUDA_GRAPHS: ""
-  OMPI_MCA_btl_vader_single_copy_mechanism: "none"
-  PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
-  ENABLE_HPU_GRAPH: "true"
-  LIMIT_HPU_GRAPH: "true"
-  USE_FLASH_ATTENTION: "true"
-  FLASH_ATTENTION_RECOMPUTE: "true"
-  extraCmdArgs: ["--sharded","true","--num-shard","4"]
   livenessProbe:
     initialDelaySeconds: 5
     periodSeconds: 5
@@ -36,3 +19,12 @@ tgi:
     periodSeconds: 5
     timeoutSeconds: 1
     failureThreshold: 120
+supervisor:
+  llm_endpoint_url: http://{{ .Release.Name }}-tgi
+  llm_engine: tgi
+ragagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-tgi
+  llm_engine: tgi
+sqlagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-tgi
+  llm_engine: tgi
@@ -17,14 +17,54 @@ spec:
       command: ['bash', '-c']
       args:
         - |
+          # Ingest data
+          cd /mnt/tools
+          pip install requests tqdm
+          ./ingest_data.sh
+          # Test ragagent
+          max_retry=10;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "agentqna.fullname" (index .Subcharts "ragagent") }}:{{ .Values.ragagent.service.port }}/v1/chat/completions -sS --fail-with-body \
+            -X POST \
+            -d '{"messages": "Tell me about Michael Jackson song Thriller"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+          # Test sqlagent
+          max_retry=10;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "agentqna.fullname" (index .Subcharts "sqlagent") }}:{{ .Values.sqlagent.service.port }}/v1/chat/completions -sS --fail-with-body \
+            -X POST \
+            -d '{"messages": "How many employees are there in the company?"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+          # Test supervisor
           max_retry=20;
           for ((i=1; i<=max_retry; i++)); do
             curl http://{{ include "agentqna.fullname" (index .Subcharts "supervisor") }}:{{ .Values.supervisor.service.port }}/v1/chat/completions -sS --fail-with-body \
             -X POST \
-            -d '{"query": "Tell me three songs of Michael Jackson."}' \
+            -d '{"messages": "How many albums does Iron Maiden have?"}' \
             -H 'Content-Type: application/json' && break;
             curlcode=$?
             if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
           done;
           if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+      volumeMounts:
+        - mountPath: /mnt/tools
+          name: test
+  volumes:
+    - name: test
+      hostPath:
+        path: /mnt/tools
+        type: Directory
+  {{- with .Values.nodeSelector }}
+  nodeSelector:
+    {{- toYaml . | nindent 8 }}
+  {{- end }}
+
   restartPolicy: Never
@@ -57,13 +57,30 @@ docretriever:
     # Overrides the image tag whose default is the chart appVersion.
     tag: "latest"
 
-worker:
+sqlagent:
+  DBPath: "/mnt/tools"
+  db_name: "Chinook"
+  db_path: "sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
+  service:
+    port: 9096
+  strategy: sql_agent_llama
+  use_hints: "false"
+  recursion_limit: "6"
+  llm_engine: vllm
+  llm_endpoint_url: ""
+  model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+  temperature: "0.01"
+  max_new_tokens: "4096"
+  stream: "false"
+  require_human_feedback: "false"
+
+ragagent:
   toolPath: "/mnt/tools"
   service:
     port: 9095
   strategy: rag_agent_llama
-  recursion_limit: "12"
-  llm_engine: tgi
+  recursion_limit: "6"
+  llm_engine: vllm
   llm_endpoint_url: ""
   model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
   temperature: "0.01"
@@ -79,7 +96,7 @@ supervisor:
     port: 9090
   strategy: react_llama
   recursion_limit: 10
-  llm_engine: tgi
+  llm_engine: vllm
   llm_endpoint_url: ""
   model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
   temperature: "0.01"
@@ -89,6 +106,7 @@ supervisor:
   require_human_feedback: false
   CRAG_SERVER: ""
   WORKER_AGENT_URL: ""
+  SQL_AGENT_URL: ""
 
 crag:
   image:
@@ -101,9 +119,39 @@ crag:
 # Override values in specific subcharts
 tgi:
   enabled: false
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.6"
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+  MAX_INPUT_LENGTH: 4096
+  MAX_TOTAL_TOKENS: 8192
+  CUDA_GRAPHS: ""
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
+  extraCmdArgs: ["--sharded", "true", "--num-shard", "4"]
+
+vllm:
+  enabled: false
+  accelDevice: "gaudi"
+  image:
+    repository: opea/vllm-gaudi
+    tag: "latest"
+  resources:
+    limits:
+      habana.ai/gaudi: 4
   LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
-  MAX_INPUT_LENGTH: "4096"
-  MAX_TOTAL_TOKENS: "8192"
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+  VLLM_SKIP_WARMUP: true
+  extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384"]
 
 global:
   http_proxy: ""

@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Accelerate inferencing in heaviest components to improve performance
+# by overriding their subchart values
+
+vllm:
+  enabled: true
+  image:
+    repository: opea/vllm-gaudi
+supervisor:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+ragagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
+sqlagent:
+  llm_endpoint_url: http://{{ .Release.Name }}-vllm
@@ -8,11 +8,17 @@ metadata:
   labels:
     {{- include "agent.labels" . | nindent 4 }}
 data:
+  {{- if .Values.db_name }}
+  db_name: {{ .Values.db_name | quote }}
+  {{- end }}
+  {{- if .Values.db_path }}
+  db_path: {{ .Values.db_path | quote }}
+  {{- end }}
   {{- if .Values.tools }}
   tools: {{ .Values.tools | quote }}
   {{- end }}
   {{- if .Values.llm_endpoint_url }}
-  llm_endpoint_url: {{ .Values.llm_endpoint_url | quote }}
+  llm_endpoint_url: {{ tpl .Values.llm_endpoint_url . | quote }}
   {{- else }}
   llm_endpoint_url: "http://{{ .Release.Name }}-tgi"
   {{- end }}
@@ -41,12 +47,18 @@ data:
   {{- if .Values.WORKER_AGENT_URL }}
   WORKER_AGENT_URL: {{ .Values.WORKER_AGENT_URL | quote }}
   {{- else }}
-  WORKER_AGENT_URL: "http://{{ .Release.Name }}-worker:9095/v1/chat/completions"
+  WORKER_AGENT_URL: "http://{{ .Release.Name }}-rag-agent:9095/v1/chat/completions"
+  {{- end }}
+  {{- if .Values.SQL_AGENT_URL }}
+  SQL_AGENT_URL: {{ .Values.SQL_AGENT_URL | quote }}
+  {{- else }}
+  SQL_AGENT_URL: "http://{{ .Release.Name }}-sql-agent:9096/v1/chat/completions"
   {{- end }}
   require_human_feedback: {{ .Values.require_human_feedback | quote }}
   recursion_limit: {{ .Values.recursion_limit | quote }}
   llm_engine: {{ .Values.llm_engine | quote }}
   strategy: {{ .Values.strategy | quote }}
+  use_hints: {{ .Values.use_hints | quote }}
   max_new_tokens: {{ .Values.max_new_tokens | quote }}
   {{- if .Values.OPENAI_API_KEY }}
   OPENAI_API_KEY: {{ .Values.OPENAI_API_KEY | quote }}

@@ -49,6 +49,10 @@ spec:
               containerPort: 9090
               protocol: TCP
           volumeMounts:
+            {{- if .Values.DBPath }}
+            - mountPath: /home/user/chinook-db
+              name: dbpath
+            {{- end }}
             {{- if .Values.toolPath }}
             - mountPath: /home/user/tools
               name: tool
@@ -70,6 +74,12 @@ spec:
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
       volumes:
+        {{- if .Values.DBPath }}
+        - name: dbpath
+          hostPath:
+            path: {{ .Values.DBPath }}
+            type: Directory
+        {{- end }}
         {{- if .Values.toolPath }}
         - name: tool
           hostPath:

@@ -17,6 +17,10 @@ spec:
       command: ['bash', '-c']
       args:
         - |
+          if [ {{ include "agent.fullname" . }} != "agent" ]; then
+            echo "Skip test."
+            exit 0
+          fi
           max_retry=20;
           for ((i=1; i<=max_retry; i++)); do
           curl http://{{ include "agent.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \

@@ -25,6 +25,9 @@ data:
   {{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
   VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
   {{- end }}
+  {{- if .Values.VLLM_SKIP_WARMUP }}
+  VLLM_SKIP_WARMUP: {{ .Values.VLLM_SKIP_WARMUP | quote }}
+  {{- end }}
   {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }}
   PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }}
   {{- end }}