|
| 1 | +--- |
| 2 | +# Source: tgi/templates/configmap.yaml |
| 3 | +# Copyright (C) 2024 Intel Corporation |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | + |
| 6 | +apiVersion: v1 |
| 7 | +kind: ConfigMap |
| 8 | +metadata: |
| 9 | + name: tgi-config |
| 10 | + labels: |
| 11 | + helm.sh/chart: tgi-0.8.0 |
| 12 | + app.kubernetes.io/name: tgi |
| 13 | + app.kubernetes.io/instance: tgi |
| 14 | + app.kubernetes.io/version: "2.1.0" |
| 15 | + app.kubernetes.io/managed-by: Helm |
| 16 | +data: |
| 17 | + MODEL_ID: "Intel/neural-chat-7b-v3-3" |
| 18 | + PORT: "2080" |
| 19 | + HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here" |
| 20 | + HF_TOKEN: "insert-your-huggingface-token-here" |
| 21 | + MAX_INPUT_TOKENS: "1024" |
| 22 | + MAX_TOTAL_TOKENS: "4096" |
| 23 | + http_proxy: "" |
| 24 | + https_proxy: "" |
| 25 | + no_proxy: "" |
| 26 | + HABANA_LOGS: "/tmp/habana_logs" |
| 27 | + NUMBA_CACHE_DIR: "/tmp" |
| 28 | + TRANSFORMERS_CACHE: "/tmp/transformers_cache" |
| 29 | + HF_HOME: "/tmp/.cache/huggingface" |
| 30 | +--- |
| 31 | +# Source: tgi/templates/service.yaml |
| 32 | +# Copyright (C) 2024 Intel Corporation |
| 33 | +# SPDX-License-Identifier: Apache-2.0 |
| 34 | + |
| 35 | +apiVersion: v1 |
| 36 | +kind: Service |
| 37 | +metadata: |
| 38 | + name: tgi |
| 39 | + labels: |
| 40 | + helm.sh/chart: tgi-0.8.0 |
| 41 | + app.kubernetes.io/name: tgi |
| 42 | + app.kubernetes.io/instance: tgi |
| 43 | + app.kubernetes.io/version: "2.1.0" |
| 44 | + app.kubernetes.io/managed-by: Helm |
| 45 | +spec: |
| 46 | + type: ClusterIP |
| 47 | + ports: |
| 48 | + - port: 80 |
| 49 | + targetPort: 2080 |
| 50 | + protocol: TCP |
| 51 | + name: tgi |
| 52 | + selector: |
| 53 | + app.kubernetes.io/name: tgi |
| 54 | + app.kubernetes.io/instance: tgi |
| 55 | +--- |
| 56 | +# Source: tgi/templates/deployment.yaml |
| 57 | +# Copyright (C) 2024 Intel Corporation |
| 58 | +# SPDX-License-Identifier: Apache-2.0 |
| 59 | + |
| 60 | +apiVersion: apps/v1 |
| 61 | +kind: Deployment |
| 62 | +metadata: |
| 63 | + name: tgi |
| 64 | + labels: |
| 65 | + helm.sh/chart: tgi-0.8.0 |
| 66 | + app.kubernetes.io/name: tgi |
| 67 | + app.kubernetes.io/instance: tgi |
| 68 | + app.kubernetes.io/version: "2.1.0" |
| 69 | + app.kubernetes.io/managed-by: Helm |
| 70 | +spec: |
| 71 | + replicas: 1 |
| 72 | + selector: |
| 73 | + matchLabels: |
| 74 | + app.kubernetes.io/name: tgi |
| 75 | + app.kubernetes.io/instance: tgi |
| 76 | + template: |
| 77 | + metadata: |
| 78 | + labels: |
| 79 | + app.kubernetes.io/name: tgi |
| 80 | + app.kubernetes.io/instance: tgi |
| 81 | + spec: |
| 82 | + securityContext: |
| 83 | + {} |
| 84 | + containers: |
| 85 | + - name: tgi |
| 86 | + envFrom: |
| 87 | + - configMapRef: |
| 88 | + name: tgi-config |
| 89 | + - configMapRef: |
| 90 | + name: extra-env-config |
| 91 | + optional: true |
| 92 | + securityContext: |
| 93 | + {} |
| 94 | + image: "ghcr.io/huggingface/text-generation-inference:2.0" |
| 95 | + imagePullPolicy: IfNotPresent |
| 96 | + volumeMounts: |
| 97 | + - mountPath: /data |
| 98 | + name: model-volume |
| 99 | + - mountPath: /tmp |
| 100 | + name: tmp |
| 101 | + ports: |
| 102 | + - name: http |
| 103 | + containerPort: 2080 |
| 104 | + protocol: TCP |
| 105 | + resources: |
| 106 | + limits: |
| 107 | + nvidia.com/gpu: 1 |
| 108 | + volumes: |
| 109 | + - name: model-volume |
| 110 | + hostPath: |
| 111 | + path: /mnt/opea-models |
| 112 | + type: Directory |
| 113 | + - name: tmp |
| 114 | + emptyDir: {} |
0 commit comments