Skip to content

Commit

Permalink
-Fixed espeak engagement on gpu
Browse files Browse the repository at this point in the history
-Add default voice code setting and update language code resolution logic
  • Loading branch information
remsky committed Feb 11, 2025
1 parent 9b76ce2 commit 24b31cc
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 16 deletions.
1 change: 1 addition & 0 deletions api/src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class Settings(BaseSettings):
output_dir: str = "output"
output_dir_size_limit_mb: float = 500.0 # Maximum size of output directory in MB
default_voice: str = "af_heart"
default_voice_code: str | None = None # If set, overrides the first letter of voice name, though api call param still takes precedence
use_gpu: bool = True # Whether to use GPU acceleration if available
allow_local_voice_saving: bool = (
False # Whether to allow saving combined voices locally
Expand Down
14 changes: 10 additions & 4 deletions api/src/inference/kokoro_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,14 @@ async def generate_from_tokens(
await paths.save_voice_tensor(voice_tensor, temp_path)
voice_path = temp_path

# Use provided lang_code or get from voice name
pipeline_lang_code = lang_code if lang_code else voice_name[0].lower()
# Use provided lang_code, settings voice code override, or first letter of voice name
if lang_code: # api is given priority
pipeline_lang_code = lang_code
elif settings.default_voice_code: # settings is next priority
pipeline_lang_code = settings.default_voice_code
else: # voice name is default/fallback
pipeline_lang_code = voice_name[0].lower()

pipeline = self._get_pipeline(pipeline_lang_code)

logger.debug(
Expand Down Expand Up @@ -232,8 +238,8 @@ async def generate(
await paths.save_voice_tensor(voice_tensor, temp_path)
voice_path = temp_path

# Use provided lang_code or get from voice name
pipeline_lang_code = lang_code if lang_code else voice_name[0].lower()
# Use provided lang_code, settings voice code override, or first letter of voice name
pipeline_lang_code = lang_code if lang_code else (settings.default_voice_code if settings.default_voice_code else voice_name[0].lower())
pipeline = self._get_pipeline(pipeline_lang_code)

logger.debug(
Expand Down
27 changes: 15 additions & 12 deletions docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,47 +12,50 @@ RUN apt-get update && apt-get install -y \
libsndfile1 \
curl \
ffmpeg \
g++ \
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
&& mkdir -p /usr/share/espeak-ng-data \
&& ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/

# Install UV using the installer script
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/ && \
mv /root/.local/bin/uvx /usr/local/bin/ && \
useradd -m -u 1000 appuser && \
mkdir -p /app/api/src/models/v1_0 && \
chown -R appuser:appuser /app
mv /root/.local/bin/uvx /usr/local/bin/

# Create non-root user and set up directories and permissions
RUN useradd -m -u 1000 appuser && \
mkdir -p /app/api/src/models/v1_0 && \
chown -R appuser:appuser /app

USER appuser
WORKDIR /app

# Copy dependency files
COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml

ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
ESPEAK_DATA_PATH=/usr/share/espeak-ng-data

# Install dependencies with GPU extras (using cache mounts)
RUN --mount=type=cache,target=/root/.cache/uv \
uv venv && \
uv sync --extra gpu

# Copy project files including models and sync again
# Copy project files including models
COPY --chown=appuser:appuser api ./api
COPY --chown=appuser:appuser web ./web
COPY --chown=appuser:appuser docker/scripts/ ./
RUN chmod +x ./entrypoint.sh
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra gpu


# Set all environment variables in one go
ENV PYTHONUNBUFFERED=1 \
PYTHONPATH=/app:/app/api \
PATH="/app/.venv/bin:$PATH" \
UV_LINK_MODE=copy \
USE_GPU=true \
PHONEMIZER_ESPEAK_PATH=/usr/bin \
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
ESPEAK_DATA_PATH=/usr/share/espeak-ng-data

USE_GPU=true

ENV DOWNLOAD_MODEL=true
# Download model if enabled
RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
Expand Down

0 comments on commit 24b31cc

Please sign in to comment.