diff --git a/microservices-connector/config/samples/ChatQnA/chatQnA_switch_nv_OneCard.yaml b/microservices-connector/config/samples/ChatQnA/chatQnA_switch_nv_OneCard.yaml new file mode 100644 index 000000000..69bb24417 --- /dev/null +++ b/microservices-connector/config/samples/ChatQnA/chatQnA_switch_nv_OneCard.yaml @@ -0,0 +1,126 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: nvidia + name: switch + namespace: switch +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + nodeName: node1 + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + nodeName: node2 + node1: + routerType: Switch + steps: + - name: Embedding + condition: embedding-model-id==large + internalService: + serviceName: embedding-svc-large + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Embedding + condition: embedding-model-id==small + internalService: + serviceName: embedding-svc-small + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge15 + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge-small + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: Retriever + condition: embedding-model-id==large + data: $response + internalService: + serviceName: retriever-svc-large + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-large + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Retriever + condition: embedding-model-id==small + data: $response + internalService: + serviceName: retriever-svc-small + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-small + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: VectorDB + internalService: + serviceName: redis-vector-db-large + isDownstreamService: true + - name: VectorDB + internalService: + serviceName: redis-vector-db-small + isDownstreamService: true + node2: + routerType: Switch + steps: + - name: Llm + condition: model-id==intel + internalService: + serviceName: llm-svc-intel + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-intel + - name: Llm + condition: model-id==llama + internalService: + serviceName: llm-svc-llama + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-llama + - name: TgiNvidia + internalService: + serviceName: tgi-service-intel + config: + endpoint: /generate + MODEL_ID: Intel/neural-chat-7b-v3-3 + CUDA_MEMORY_FRACTION: "0.5" + isDownstreamService: true + - name: TgiNvidia + internalService: + serviceName: tgi-service-llama + config: + endpoint: /generate + MODEL_ID: bigscience/bloom-560m + CUDA_MEMORY_FRACTION: "0.5" + isDownstreamService: true