Skip to content

Commit

Permalink
Refactor e2e chart: codegen codetrans faqgen visualqna
Browse files Browse the repository at this point in the history
Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
  • Loading branch information
lianhao committed Jan 17, 2025
1 parent d370598 commit 378b984
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 3 deletions.
5 changes: 5 additions & 0 deletions helm-charts/codegen/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/codegen/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ affinity: {}
tgi:
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct

llm-uservice:
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct

nginx:
service:
type: NodePort
Expand Down
5 changes: 5 additions & 0 deletions helm-charts/codetrans/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/codetrans/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ affinity: {}
tgi:
LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3

llm-uservice:
LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3

nginx:
service:
type: NodePort
Expand Down
6 changes: 6 additions & 0 deletions helm-charts/common/tgi/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,9 @@ data:
{{- if .Values.FLASH_ATTENTION_RECOMPUTE }}
FLASH_ATTENTION_RECOMPUTE: {{ .Values.FLASH_ATTENTION_RECOMPUTE | quote }}
{{- end }}
{{- if .Values.PREFILL_BATCH_BUCKET_SIZE }}
PREFILL_BATCH_BUCKET_SIZE: {{ .Values.PREFILL_BATCH_BUCKET_SIZE | quote }}
{{- end }}
{{- if .Values.BATCH_BUCKET_SIZE }}
BATCH_BUCKET_SIZE: {{ .Values.BATCH_BUCKET_SIZE | quote }}
{{- end }}
16 changes: 14 additions & 2 deletions helm-charts/faqgen/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,21 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: "0"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
PREFILL_BATCH_BUCKET_SIZE: 1
BATCH_BUCKET_SIZE: 8
extraCmdArgs:
- "--max-batch-total-tokens"
- "65536"
- "--max-batch-prefill-tokens"
- "4096"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
3 changes: 2 additions & 1 deletion helm-charts/faqgen/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ affinity: {}
# To override values in subchart llm-uservice
llm-uservice:
image:
repository: opea/llm-faqgen-tgi
repository: opea/llm-faqgen
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct

# To override values in subchart tgi
tgi:
Expand Down
5 changes: 5 additions & 0 deletions helm-charts/visualqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ tgi:
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/visualqna/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ tgi:
MAX_TOTAL_TOKENS: "8192"
LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf

lvm-uservice:
LVM_BACKEND: "TGI"

nginx:
service:
type: NodePort
Expand Down

0 comments on commit 378b984

Please sign in to comment.