-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompose.airllm.yml
31 lines (31 loc) · 881 Bytes
/
compose.airllm.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
services:
airllm:
build:
context: ./airllm
dockerfile: ./Dockerfile
container_name: ${HARBOR_CONTAINER_PREFIX}.airllm
env_file:
- ./.env
- ./airllm/override.env
environment:
- HF_TOKEN=${HARBOR_HF_TOKEN}
- MODEL=${HARBOR_AIRLLM_MODEL}
- MAX_LENGTH=${HARBOR_AIRLLM_CTX_LEN}
- COMPRESSION=${HARBOR_AIRLLM_COMPRESSION}
ports:
- ${HARBOR_AIRLLM_HOST_PORT}:5000
networks:
- harbor-network
volumes:
- ${HARBOR_HF_CACHE}:/root/.cache/huggingface
- ./airllm/server.py:/app/server.py
# In this instance, it's not split into an ".x." file,
# as AitLLM requires GPU to function, this helps
# to fulfill the requirement.
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]