Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sql-agent to AgentQnA #715

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion helm-charts/agentqna/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ type: application
dependencies:
- name: agent
version: 0-latest
alias: worker
alias: ragagent
repository: "file://../common/agent"
- name: agent
version: 0-latest
alias: sqlagent
repository: "file://../common/agent"
- name: agent
version: 0-latest
Expand All @@ -18,6 +22,10 @@ dependencies:
version: 0-latest
repository: "file://../common/tgi"
condition: tgi.enabled
- name: vllm
version: 0-latest
repository: "file://../common/vllm"
condition: vllm.enabled
- name: tei
version: 0-latest
repository: "file://../common/tei"
Expand Down
26 changes: 9 additions & 17 deletions helm-charts/agentqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,6 @@

tgi:
enabled: true
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
extraCmdArgs: ["--sharded","true","--num-shard","4"]
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand All @@ -36,3 +19,12 @@ tgi:
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-tgi
llm_engine: tgi
ragagent:
llm_endpoint_url: http://{{ .Release.Name }}-tgi
llm_engine: tgi
sqlagent:
llm_endpoint_url: http://{{ .Release.Name }}-tgi
llm_engine: tgi
42 changes: 41 additions & 1 deletion helm-charts/agentqna/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,54 @@ spec:
command: ['bash', '-c']
args:
- |
# Ingest data
cd /mnt/tools
pip install requests tqdm
./ingest_data.sh
# Test ragagent
max_retry=10;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agentqna.fullname" (index .Subcharts "ragagent") }}:{{ .Values.ragagent.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
-d '{"messages": "Tell me about Michael Jackson song Thriller"}' \
-H 'Content-Type: application/json' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
# Test sqlagent
max_retry=10;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agentqna.fullname" (index .Subcharts "sqlagent") }}:{{ .Values.sqlagent.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
-d '{"messages": "How many employees are there in the company?"}' \
-H 'Content-Type: application/json' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
# Test supervisor
max_retry=20;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agentqna.fullname" (index .Subcharts "supervisor") }}:{{ .Values.supervisor.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
-d '{"query": "Tell me three songs of Michael Jackson."}' \
-d '{"messages": "How many albums does Iron Maiden have?"}' \
-H 'Content-Type: application/json' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
volumeMounts:
- mountPath: /mnt/tools
name: test
volumes:
- name: test
hostPath:
path: /mnt/tools
type: Directory
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}

restartPolicy: Never
60 changes: 54 additions & 6 deletions helm-charts/agentqna/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,30 @@ docretriever:
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"

worker:
sqlagent:
DBPath: "/mnt/tools"
db_name: "Chinook"
db_path: "sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
service:
port: 9096
strategy: sql_agent_llama
use_hints: "false"
recursion_limit: "6"
llm_engine: vllm
llm_endpoint_url: ""
model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
temperature: "0.01"
max_new_tokens: "4096"
stream: "false"
require_human_feedback: "false"

ragagent:
toolPath: "/mnt/tools"
service:
port: 9095
strategy: rag_agent_llama
recursion_limit: "12"
llm_engine: tgi
recursion_limit: "6"
llm_engine: vllm
llm_endpoint_url: ""
model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
temperature: "0.01"
Expand All @@ -79,7 +96,7 @@ supervisor:
port: 9090
strategy: react_llama
recursion_limit: 10
llm_engine: tgi
llm_engine: vllm
llm_endpoint_url: ""
model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
temperature: "0.01"
Expand All @@ -89,6 +106,7 @@ supervisor:
require_human_feedback: false
CRAG_SERVER: ""
WORKER_AGENT_URL: ""
SQL_AGENT_URL: ""

crag:
image:
Expand All @@ -101,9 +119,39 @@ crag:
# Override values in specific subcharts
tgi:
enabled: false
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
MAX_INPUT_LENGTH: 4096
MAX_TOTAL_TOKENS: 8192
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
extraCmdArgs: ["--sharded", "true", "--num-shard", "4"]

vllm:
enabled: false
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
tag: "latest"
resources:
limits:
habana.ai/gaudi: 4
LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
VLLM_SKIP_WARMUP: true
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384"]

global:
http_proxy: ""
Expand Down
16 changes: 16 additions & 0 deletions helm-charts/agentqna/vllm-gaudi-values.yaml
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File names for vLLM/TGI usage are IMHO rather inconsistent with TGI+Gaudi files being named gaudi-values.yaml, and vLLM+Gaudi ones as:

helm-charts$ find -name '*vllm*.yaml'
./docsum/gaudi-vllm-values.yaml
./chatqna/gaudi-vllm-values.yaml
./agentqna/vllm-gaudi-values.yaml
./common/llm-uservice/vllm-docsum-gaudi-values.yaml
./common/llm-uservice/vllm-gaudi-values.yaml

Especially now that vLLM is becoming default in OPEA, not TGI...

I think accelerated TGI and vLLM files should be explicitly called either:

gaudi-tgi-values.yaml
gaudi-vllm-values.yaml

Or:

tgi-gaudi-values.yaml
vllm-gaudi-values.yaml

What do you think?

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

vllm:
enabled: true
image:
repository: opea/vllm-gaudi
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
ragagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
sqlagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
16 changes: 14 additions & 2 deletions helm-charts/common/agent/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@ metadata:
labels:
{{- include "agent.labels" . | nindent 4 }}
data:
{{- if .Values.db_name }}
db_name: {{ .Values.db_name | quote }}
{{- end }}
{{- if .Values.db_path }}
db_path: {{ .Values.db_path | quote }}
{{- end }}
{{- if .Values.tools }}
tools: {{ .Values.tools | quote }}
{{- end }}
{{- if .Values.llm_endpoint_url }}
llm_endpoint_url: {{ .Values.llm_endpoint_url | quote }}
llm_endpoint_url: {{ tpl .Values.llm_endpoint_url . | quote }}
{{- else }}
llm_endpoint_url: "http://{{ .Release.Name }}-tgi"
{{- end }}
Expand Down Expand Up @@ -41,12 +47,18 @@ data:
{{- if .Values.WORKER_AGENT_URL }}
WORKER_AGENT_URL: {{ .Values.WORKER_AGENT_URL | quote }}
{{- else }}
WORKER_AGENT_URL: "http://{{ .Release.Name }}-worker:9095/v1/chat/completions"
WORKER_AGENT_URL: "http://{{ .Release.Name }}-rag-agent:9095/v1/chat/completions"
{{- end }}
{{- if .Values.SQL_AGENT_URL }}
SQL_AGENT_URL: {{ .Values.SQL_AGENT_URL | quote }}
{{- else }}
SQL_AGENT_URL: "http://{{ .Release.Name }}-sql-agent:9096/v1/chat/completions"
{{- end }}
require_human_feedback: {{ .Values.require_human_feedback | quote }}
recursion_limit: {{ .Values.recursion_limit | quote }}
llm_engine: {{ .Values.llm_engine | quote }}
strategy: {{ .Values.strategy | quote }}
use_hints: {{ .Values.use_hints | quote }}
max_new_tokens: {{ .Values.max_new_tokens | quote }}
{{- if .Values.OPENAI_API_KEY }}
OPENAI_API_KEY: {{ .Values.OPENAI_API_KEY | quote }}
Expand Down
10 changes: 10 additions & 0 deletions helm-charts/common/agent/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ spec:
containerPort: 9090
protocol: TCP
volumeMounts:
{{- if .Values.DBPath }}
- mountPath: /home/user/chinook-db
name: dbpath
{{- end }}
{{- if .Values.toolPath }}
- mountPath: /home/user/tools
name: tool
Expand All @@ -70,6 +74,12 @@ spec:
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
{{- if .Values.DBPath }}
- name: dbpath
hostPath:
path: {{ .Values.DBPath }}
type: Directory
{{- end }}
{{- if .Values.toolPath }}
- name: tool
hostPath:
Expand Down
4 changes: 4 additions & 0 deletions helm-charts/common/agent/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ spec:
command: ['bash', '-c']
args:
- |
if [ {{ include "agent.fullname" . }} != "agent" ]; then
echo "Skip test."
exit 0
fi
max_retry=20;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agent.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/common/vllm/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ data:
{{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
{{- end }}
{{- if .Values.VLLM_SKIP_WARMUP }}
VLLM_SKIP_WARMUP: {{ .Values.VLLM_SKIP_WARMUP | quote }}
{{- end }}
{{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }}
PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }}
{{- end }}
Expand Down
Loading