-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy path.env.example
94 lines (78 loc) · 4.21 KB
/
.env.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
HF_CACHE_PATH=~/.cache/huggingface
HF_TOKEN= # required if you want to access a gated model
ATOMA_NODE_CONFIG_PATH=./config.toml
# ----------------------------------------------------------------------------------
# atoma node configuration
# Postgres Configuration
POSTGRES_DB=atoma
POSTGRES_USER=atoma
POSTGRES_PASSWORD=
POSTGRES_PORT=5432
# Sui Configuration
SUI_CONFIG_PATH=~/.sui/sui_config
# Atoma Node Service Configuration
ATOMA_SERVICE_PORT=3000
# Currently available docker compose profiles:
#
# All possible values are:
#
# 1. chat_completions_vllm
# 2. chat_completions_mistralrs_cpu
# 3. chat_completions_vllm_cpu, running this profile requires a CPU with AVX2 support
# 4. chat_completions_vllm_rocm, running this profile requires a GPU with AMD GPU drivers installed
# 7. chat_completions_mistralrs_rocm, running this profile requires a GPU with AMD GPU drivers installed
# 8. embeddings_tei - runs text embeddings inference server in docker compose
# 9. image_generations_mistralrs - runs image generations server in docker compose
#
# Setting the COMPOSE_PROFILES environment variable will start all services listed in the value, e.g.
# COMPOSE_PROFILES=chat_completions_vllm will start the chat completions server and the postgres database
#
# Please change it accordingly to which inference services you want to run, and which database you want to use (either PostgresSQL or SQLite)
COMPOSE_PROFILES=chat_completions_vllm
# Tracing level
TRACE_LEVEL=info
# Prometheus Configuration
PROMETHEUS_PORT=9090
# Grafana Configuration
GRAFANA_PORT=30001
# ----------------------------------------------------------------------------------
# chat completions server
CHAT_COMPLETIONS_SERVER_PORT=50000
CHAT_COMPLETIONS_MODEL=meta-llama/Llama-3.1-70B-Instruct
CHAT_COMPLETIONS_MAX_MODEL_LEN=4096 # context length
# vllm backend
# Know more about vllm engine arguments here: https://docs.vllm.ai/en/latest/usage/engine_args.html
VLLM_ENGINE_ARGS=--model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN}
# ----------------------------------------------------------------------------------
# embeddings server
EMBEDDINGS_SERVER_PORT=50001
EMBEDDINGS_MODEL=intfloat/multilingual-e5-large-instruct
# tei backend
# Choose one of these based on your GPU architecture:
# CPU: TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
# Volta: UNSUPPORTED
# Turing (T4, RTX 2000 series, ...): TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:turing-1.5
# Ampere 80 (A100, A30): TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:1.5
# Ampere 86 (A10, A40, ...): TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:86-1.5
# Ada Lovelace (RTX 4000 series, ...): TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:89-1.5
# Hopper (H100): TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:hopper-1.5
TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:1.5
# ----------------------------------------------------------------------------------
# image generation server
IMAGE_GENERATIONS_SERVER_PORT=50002
IMAGE_GENERATIONS_MODEL=black-forest-labs/FLUX.1-schnell
IMAGE_GENERATIONS_ARCHITECTURE=flux
# mistralrs backend
# Choose one of these based on your GPU architecture:
# CPU: MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cpu-0.3.1
# Volta: UNSUPPORTED
# Turing (T4, RTX 2000 series, ...): MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-75-0.3.1
# Ampere 80 (A100, A30): MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-80-0.3.1
# Ampere 86 (A10, A40, ...): MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-86-0.3.1
# Ada Lovelace (RTX 4000 series, ...): MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-89-0.3.1
# Hopper (H100): MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-90-0.3.1
MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-80-0.3.1
# ----------------------------------------------------------------------------------
# TDX configuration
# Enable TDX by setting ENABLE_TDX=true for confidential compute, otherwise it will be disabled
ENABLE_TDX=false