Skip to content

Commit

Permalink
Add test for sqlagent and ragagent
Browse files Browse the repository at this point in the history
Signed-off-by: Dolpher Du <dolpher.du@intel.com>
  • Loading branch information
yongfengdu committed Jan 17, 2025
1 parent 83eb2ab commit 98d9925
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 35 deletions.
4 changes: 2 additions & 2 deletions helm-charts/agentqna/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ type: application
dependencies:
- name: agent
version: 0-latest
alias: rag-agent
alias: ragagent
repository: "file://../common/agent"
- name: agent
version: 0-latest
alias: sql-agent
alias: sqlagent
repository: "file://../common/agent"
- name: agent
version: 0-latest
Expand Down
26 changes: 9 additions & 17 deletions helm-charts/agentqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,6 @@

tgi:
enabled: true
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
extraCmdArgs: ["--sharded","true","--num-shard","4"]
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand All @@ -36,3 +19,12 @@ tgi:
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-tgi
llm_engine: tgi
ragagent:
llm_endpoint_url: http://{{ .Release.Name }}-tgi
llm_engine: tgi
sqlagent:
llm_endpoint_url: http://{{ .Release.Name }}-tgi
llm_engine: tgi
42 changes: 41 additions & 1 deletion helm-charts/agentqna/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,54 @@ spec:
command: ['bash', '-c']
args:
- |
# Ingest data
cd /mnt/tools
pip install requests tqdm
./ingest_data.sh
# Test ragagent
max_retry=10;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agentqna.fullname" (index .Subcharts "ragagent") }}:{{ .Values.ragagent.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
-d '{"messages": "Tell me about Michael Jackson song Thriller"}' \
-H 'Content-Type: application/json' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
# Test sqlagent
max_retry=10;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agentqna.fullname" (index .Subcharts "sqlagent") }}:{{ .Values.sqlagent.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
-d '{"messages": "How many employees are there in the company?"}' \
-H 'Content-Type: application/json' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
# Test supervisor
max_retry=20;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agentqna.fullname" (index .Subcharts "supervisor") }}:{{ .Values.supervisor.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
-d '{"messages": "Tell me three songs of Michael Jackson."}' \
-d '{"messages": "How many albums does Iron Maiden have?"}' \
-H 'Content-Type: application/json' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
volumeMounts:
- mountPath: /mnt/tools
name: test
volumes:
- name: test
hostPath:
path: /mnt/tools
type: Directory
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}

restartPolicy: Never
31 changes: 23 additions & 8 deletions helm-charts/agentqna/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ docretriever:
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"

sql-agent:
sqlagent:
DBPath: "/mnt/tools"
db_name: "Chinook"
db_path: "sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
Expand All @@ -74,7 +74,7 @@ sql-agent:
stream: "false"
require_human_feedback: "false"

rag-agent:
ragagent:
toolPath: "/mnt/tools"
service:
port: 9095
Expand Down Expand Up @@ -119,24 +119,39 @@ crag:
# Override values in specific subcharts
tgi:
enabled: false
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
MAX_INPUT_LENGTH: 4096
MAX_TOTAL_TOKENS: 8192
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
extraCmdArgs: ["--sharded", "true", "--num-shard", "4"]

vllm:
enabled: false
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
tag: "latest"
resources:
limits:
habana.ai/gaudi: 4
LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
VLLM_SKIP_WARMUP: true
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384"]
resources:
limits:
habana.ai/gaudi: 4


global:
http_proxy: ""
Expand Down
11 changes: 5 additions & 6 deletions helm-charts/agentqna/vllm-gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@ vllm:
enabled: true
image:
repository: opea/vllm-gaudi
# tag: "good"
supervisor:
llm_endpoint_url: http://agentqna-vllm
rag-agent:
llm_endpoint_url: http://agentqna-vllm
sql-agent:
llm_endpoint_url: http://agentqna-vllm
llm_endpoint_url: http://{{ .Release.Name }}-vllm
ragagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
sqlagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
2 changes: 1 addition & 1 deletion helm-charts/common/agent/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ data:
tools: {{ .Values.tools | quote }}
{{- end }}
{{- if .Values.llm_endpoint_url }}
llm_endpoint_url: {{ .Values.llm_endpoint_url | quote }}
llm_endpoint_url: {{ tpl .Values.llm_endpoint_url . | quote }}
{{- else }}
llm_endpoint_url: "http://{{ .Release.Name }}-tgi"
{{- end }}
Expand Down
4 changes: 4 additions & 0 deletions helm-charts/common/agent/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ spec:
command: ['bash', '-c']
args:
- |
if [ {{ include "agent.fullname" . }} != "agent" ]; then
echo "Skip test."
exit 0
fi
max_retry=20;
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "agent.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \
Expand Down

0 comments on commit 98d9925

Please sign in to comment.