diff --git a/helm-charts/audioqna/Chart.yaml b/helm-charts/audioqna/Chart.yaml index 2c16ebcaa..05bd148ce 100644 --- a/helm-charts/audioqna/Chart.yaml +++ b/helm-charts/audioqna/Chart.yaml @@ -6,24 +6,15 @@ name: audioqna description: The Helm chart to deploy AudioQnA type: application dependencies: - - name: asr - version: 0-latest - repository: "file://../common/asr" - name: whisper version: 0-latest repository: "file://../common/whisper" - - name: tts - version: 0-latest - repository: "file://../common/tts" - name: speecht5 version: 0-latest repository: "file://../common/speecht5" - name: tgi version: 0-latest repository: "file://../common/tgi" - - name: llm-uservice - version: 0-latest - repository: "file://../common/llm-uservice" - name: ui version: 0-latest repository: "file://../common/ui" diff --git a/helm-charts/audioqna/gaudi-values.yaml b/helm-charts/audioqna/gaudi-values.yaml index c6a18dfa1..7630657ac 100644 --- a/helm-charts/audioqna/gaudi-values.yaml +++ b/helm-charts/audioqna/gaudi-values.yaml @@ -12,6 +12,12 @@ tgi: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true livenessProbe: initialDelaySeconds: 5 periodSeconds: 5 diff --git a/helm-charts/audioqna/templates/deployment.yaml b/helm-charts/audioqna/templates/deployment.yaml index 4282fc304..cbac0c336 100644 --- a/helm-charts/audioqna/templates/deployment.yaml +++ b/helm-charts/audioqna/templates/deployment.yaml @@ -34,18 +34,18 @@ spec: containers: - name: {{ .Release.Name }} env: - - name: LLM_SERVICE_HOST_IP - value: {{ include "llm-uservice.fullname" (index .Subcharts "llm-uservice") }} - - name: LLM_SERVICE_PORT - value: {{ index .Values "llm-uservice" "service" "port" | quote }} - - name: ASR_SERVICE_HOST_IP - value: {{ include "asr.fullname" (index .Subcharts "asr") }} - - name: ASR_SERVICE_PORT - value: {{ index .Values "asr" "service" "port" | quote }} - - name: TTS_SERVICE_HOST_IP - value: {{ include "tts.fullname" (index .Subcharts "tts") }} - - name: TTS_SERVICE_PORT - value: {{ index .Values "tts" "service" "port" | quote }} + - name: LLM_SERVER_HOST_IP + value: {{ include "tgi.fullname" (index .Subcharts "tgi") }} + - name: LLM_SERVER_PORT + value: "80" + - name: WHISPER_SERVER_HOST_IP + value: {{ include "whisper.fullname" (index .Subcharts "whisper") }} + - name: WHISPER_SERVER_PORT + value: {{ index .Values "whisper" "service" "port" | quote }} + - name: SPEECHT5_SERVER_HOST_IP + value: {{ include "speecht5.fullname" (index .Subcharts "speecht5") }} + - name: SPEECHT5_SERVER_PORT + value: {{ index .Values "speecht5" "service" "port" | quote }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"