-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.yaml
53 lines (52 loc) · 1.12 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
networks:
shared_net:
backend_llm_net: # network for backend and llm_server communication
services:
nginx:
image: nginx:stable-alpine
ports:
- "80:80"
networks:
- shared_net
volumes:
- "./nginx.conf:/etc/nginx/nginx.conf"
depends_on:
- backend
- frontend
- llm_server
frontend:
build:
context: frontend
ports:
- "3000:3000"
networks:
- shared_net
depends_on:
- llm_server
- backend
backend:
build:
context: backend
ports:
- "5192:5192"
networks:
- shared_net
- backend_llm_net # Network for backend-vllm communication
depends_on:
- llm_server
llm_server:
image: vllm/vllm-openai:latest
command: --model /mnt/model --dtype=half --max_model_len 8192
ports:
- "8000:8000"
volumes:
- "./models/Meta-Llama-3.1-8B-Instruct:/mnt/model/"
networks:
- backend_llm_net # Only accessible by backend
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]