.env.example


HF_CACHE_PATH=~/.cache/huggingface
HF_TOKEN=   # required if you want to access a gated model
ATOMA_NODE_CONFIG_PATH=./config.toml

# ----------------------------------------------------------------------------------
# atoma node configuration

# Postgres Configuration
POSTGRES_DB=atoma
POSTGRES_USER=atoma
POSTGRES_PASSWORD=
POSTGRES_PORT=5432

# Sui Configuration
SUI_CONFIG_PATH=~/.sui/sui_config

# Atoma Node Service Configuration
ATOMA_SERVICE_PORT=3000

# Currently available docker compose profiles:
#
# All possible values are:
#   
# 1. chat_completions_vllm
# 2. chat_completions_mistralrs_cpu
# 3. chat_completions_vllm_cpu, running this profile requires a CPU with AVX2 support
# 4. chat_completions_vllm_rocm, running this profile requires a GPU with AMD GPU drivers installed
# 7. chat_completions_mistralrs_rocm, running this profile requires a GPU with AMD GPU drivers installed
# 8. embeddings_tei - runs text embeddings inference server in docker compose
# 9. image_generations_mistralrs - runs image generations server in docker compose
#
# Setting the COMPOSE_PROFILES environment variable will start all services listed in the value, e.g.
# COMPOSE_PROFILES=chat_completions_vllm will start the chat completions server and the postgres database
# 
# Please change it accordingly to which inference services you want to run, and which database you want to use (either PostgresSQL or SQLite)
COMPOSE_PROFILES=chat_completions_vllm

# Tracing level
TRACE_LEVEL=info

# Prometheus Configuration
PROMETHEUS_PORT=9090

# Grafana Configuration
GRAFANA_PORT=30001

# ----------------------------------------------------------------------------------
# chat completions server
CHAT_COMPLETIONS_SERVER_PORT=50000
CHAT_COMPLETIONS_MODEL=meta-llama/Llama-3.1-70B-Instruct
CHAT_COMPLETIONS_MAX_MODEL_LEN=4096 # context length

# vllm backend
# Know more about vllm engine arguments here: https://docs.vllm.ai/en/latest/usage/engine_args.html
VLLM_ENGINE_ARGS=--model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN}

# ----------------------------------------------------------------------------------
# embeddings server
EMBEDDINGS_SERVER_PORT=50001
EMBEDDINGS_MODEL=intfloat/multilingual-e5-large-instruct

# tei backend
# Choose one of these based on your GPU architecture:
# CPU:                                  TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
# Volta:                                UNSUPPORTED
# Turing (T4, RTX 2000 series, ...):    TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:turing-1.5
# Ampere 80 (A100, A30):                TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:1.5
# Ampere 86 (A10, A40, ...):            TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:86-1.5
# Ada Lovelace (RTX 4000 series, ...):  TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:89-1.5
# Hopper (H100):                        TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:hopper-1.5
TEI_IMAGE=ghcr.io/huggingface/text-embeddings-inference:1.5

# ----------------------------------------------------------------------------------
# image generation server
IMAGE_GENERATIONS_SERVER_PORT=50002
IMAGE_GENERATIONS_MODEL=black-forest-labs/FLUX.1-schnell
IMAGE_GENERATIONS_ARCHITECTURE=flux

# mistralrs backend
# Choose one of these based on your GPU architecture:
# CPU:                                  MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cpu-0.3.1
# Volta:                                UNSUPPORTED
# Turing (T4, RTX 2000 series, ...):    MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-75-0.3.1
# Ampere 80 (A100, A30):                MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-80-0.3.1
# Ampere 86 (A10, A40, ...):            MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-86-0.3.1
# Ada Lovelace (RTX 4000 series, ...):  MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-89-0.3.1
# Hopper (H100):                        MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-90-0.3.1
MISTRALRS_IMAGE=ghcr.io/ericlbuehler/mistral.rs:cuda-80-0.3.1

# ----------------------------------------------------------------------------------
# TDX configuration
# Enable TDX by setting ENABLE_TDX=true for confidential compute, otherwise it will be disabled
ENABLE_TDX=false