From c21c9abe8b90338358a179f49cfda6624bf60cf7 Mon Sep 17 00:00:00 2001 From: Winston H <56998716+winstxnhdw@users.noreply.github.com> Date: Fri, 12 Jul 2024 09:10:50 +0100 Subject: [PATCH] perf/docs: skip language detection step --- README.md | 2 +- capgen/transcriber/transcriber.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5974714..54caaa1 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-md-dark.svg)](https://huggingface.co/spaces/winstxnhdw/CapGen) [![Open a Pull Request](https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-pr-md-dark.svg)](https://github.com/winstxnhdw/CapGen/compare) -A fast cross-platform CPU-first video/audio transcriber for generating caption files with [Whisper](https://openai.com/research/whisper) and [CTranslate2](https://github.com/OpenNMT/CTranslate2), hosted on Hugging Face Spaces. A `pip` installable offline CLI tool with CUDA support is provided. By default, Voice Activity Detection (VAD) preprocessing is always enabled. +A fast cross-platform CPU-first video/audio English-only transcriber for generating caption files with [Whisper](https://openai.com/research/whisper) and [CTranslate2](https://github.com/OpenNMT/CTranslate2), hosted on Hugging Face Spaces. A `pip` installable offline CLI tool with CUDA support is provided. By default, Voice Activity Detection (VAD) preprocessing is always enabled. ## Requirements diff --git a/capgen/transcriber/transcriber.py b/capgen/transcriber/transcriber.py index 6e0b4cd..520d5bb 100644 --- a/capgen/transcriber/transcriber.py +++ b/capgen/transcriber/transcriber.py @@ -70,6 +70,7 @@ def transcribe(self, file: str | BinaryIO, caption_format: str) -> str | None: """ segments, _ = self.model.transcribe( file, + language='en', beam_size=1, vad_filter=True, vad_parameters={'min_silence_duration_ms': 500},