From 24b31ccbb5b5812fe1bb4110e62392d9d4b9f64f Mon Sep 17 00:00:00 2001 From: remsky Date: Tue, 11 Feb 2025 04:49:48 -0700 Subject: [PATCH] -Fixed espeak engagement on gpu -Add default voice code setting and update language code resolution logic --- api/src/core/config.py | 1 + api/src/inference/kokoro_v1.py | 14 ++++++++++---- docker/gpu/Dockerfile | 27 +++++++++++++++------------ 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/api/src/core/config.py b/api/src/core/config.py index d2e369b..26b4178 100644 --- a/api/src/core/config.py +++ b/api/src/core/config.py @@ -13,6 +13,7 @@ class Settings(BaseSettings): output_dir: str = "output" output_dir_size_limit_mb: float = 500.0 # Maximum size of output directory in MB default_voice: str = "af_heart" + default_voice_code: str | None = None # If set, overrides the first letter of voice name, though api call param still takes precedence use_gpu: bool = True # Whether to use GPU acceleration if available allow_local_voice_saving: bool = ( False # Whether to allow saving combined voices locally diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py index 9f5e206..28ad272 100644 --- a/api/src/inference/kokoro_v1.py +++ b/api/src/inference/kokoro_v1.py @@ -139,8 +139,14 @@ async def generate_from_tokens( await paths.save_voice_tensor(voice_tensor, temp_path) voice_path = temp_path - # Use provided lang_code or get from voice name - pipeline_lang_code = lang_code if lang_code else voice_name[0].lower() + # Use provided lang_code, settings voice code override, or first letter of voice name + if lang_code: # api is given priority + pipeline_lang_code = lang_code + elif settings.default_voice_code: # settings is next priority + pipeline_lang_code = settings.default_voice_code + else: # voice name is default/fallback + pipeline_lang_code = voice_name[0].lower() + pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( @@ -232,8 +238,8 @@ async def generate( await paths.save_voice_tensor(voice_tensor, temp_path) voice_path = temp_path - # Use provided lang_code or get from voice name - pipeline_lang_code = lang_code if lang_code else voice_name[0].lower() + # Use provided lang_code, settings voice code override, or first letter of voice name + pipeline_lang_code = lang_code if lang_code else (settings.default_voice_code if settings.default_voice_code else voice_name[0].lower()) pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index ce0f646..ea9b339 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \ libsndfile1 \ curl \ ffmpeg \ + g++ \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ && mkdir -p /usr/share/espeak-ng-data \ && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ @@ -19,40 +20,42 @@ RUN apt-get update && apt-get install -y \ # Install UV using the installer script RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uv /usr/local/bin/ && \ - mv /root/.local/bin/uvx /usr/local/bin/ && \ - useradd -m -u 1000 appuser && \ - mkdir -p /app/api/src/models/v1_0 && \ - chown -R appuser:appuser /app + mv /root/.local/bin/uvx /usr/local/bin/ +# Create non-root user and set up directories and permissions +RUN useradd -m -u 1000 appuser && \ + mkdir -p /app/api/src/models/v1_0 && \ + chown -R appuser:appuser /app + USER appuser WORKDIR /app # Copy dependency files COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml +ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \ + PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ + ESPEAK_DATA_PATH=/usr/share/espeak-ng-data + # Install dependencies with GPU extras (using cache mounts) RUN --mount=type=cache,target=/root/.cache/uv \ uv venv && \ uv sync --extra gpu -# Copy project files including models and sync again +# Copy project files including models COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --extra gpu + # Set all environment variables in one go ENV PYTHONUNBUFFERED=1 \ PYTHONPATH=/app:/app/api \ PATH="/app/.venv/bin:$PATH" \ UV_LINK_MODE=copy \ - USE_GPU=true \ - PHONEMIZER_ESPEAK_PATH=/usr/bin \ - PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ - ESPEAK_DATA_PATH=/usr/share/espeak-ng-data - + USE_GPU=true + ENV DOWNLOAD_MODEL=true # Download model if enabled RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \