Skip to content

Commit

Permalink
feat: use pysub2 to gen ass subtitle (#4)
Browse files Browse the repository at this point in the history
* remove pkg pysrt
  • Loading branch information
Tohrusky authored Aug 19, 2024
1 parent c58f1be commit b48dec8
Show file tree
Hide file tree
Showing 11 changed files with 186 additions and 190 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- name: Test
run: |
pip install numpy==1.26.4
pip install pre-commit pytest mypy ruff types-requests pytest-cov coverage pydantic openai openai-whisper requests beautifulsoup4 tenacity pysrt
pip install pre-commit pytest mypy ruff types-requests pytest-cov coverage pydantic openai openai-whisper requests beautifulsoup4 tenacity pysubs2
make lint
make test
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,5 +161,7 @@ cython_debug/
.idea/
/.ruff_cache/

/assets/*.srt
/assets/*.mkv
/assets/*.mp3
/assets/*.srt
/assets/*.ass
29 changes: 16 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pip install openai-whisper

### Command Line Usage

`yuisub` can be used from the command line to generate bilingual SRT files. Here's how to use it:
`yuisub` can be used from the command line to generate bilingual ASS files. Here's how to use it:

```bash
yuisub -h # Displays help message
Expand All @@ -38,29 +38,32 @@ yuisub -h # Displays help message
### Example

```python3
from yuisub import bilingual, from_file
from yuisub import translate, bilingual, load
from yuisub.a2t import WhisperModel

# srt from audio
# sub from audio
model = WhisperModel(name="medium", device="cuda")
segs = model.transcribe(audio="path/to/audio.mp3")
srt = model.gen_srt(segs)
sub = model.transcribe(audio="path/to/audio.mp3")

# srt from file
# srt = from_file("path/to/input.srt")
# sub from file
# sub = from_file("path/to/input.srt")

# Generate bilingual SRT
srt_zh, srt_bilingual = bilingual(
srt=srt,
# generate bilingual subtitle
sub_zh = translate(
sub=sub,
model="gpt_model_name",
api_key="your_openai_api_key",
base_url="api_url",
bangumi_url="https://bangumi.tv/subject/424883/"
)
sub_bilingual = bilingual(
sub_origin=sub,
sub_zh=sub_zh
)

# Save the SRT files
srt_zh.save("path/to/output.zh.srt")
srt_bilingual.save("path/to/output.bilingual.srt")
# save the ASS files
sub_zh.save("path/to/output.zh.ass")
sub_bilingual.save("path/to/output.bilingual.ass")
```

### License
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ license = "GPL-3.0-only"
name = "yuisub"
readme = "README.md"
repository = "https://github.com/TensoRaws/yuisub"
version = "0.0.3"
version = "0.0.4"

# Requirements
[tool.poetry.dependencies]
beautifulsoup4 = "*"
openai = "*"
pydantic = "*"
pysrt = "*"
pysubs2 = "*"
python = "^3.9"
requests = "*"
tenacity = "*"
Expand Down
36 changes: 0 additions & 36 deletions tests/test_srt.py

This file was deleted.

36 changes: 36 additions & 0 deletions tests/test_sub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os

import pytest

from tests import util
from yuisub.a2t import WhisperModel
from yuisub.sub import bilingual, load, translate


def test_sub() -> None:
sub = load(util.TEST_ENG_SRT)
sub.save(util.projectPATH / "assets" / "test.en.ass")


def test_audio() -> None:
model = WhisperModel(name=util.MODEL_NAME, device=util.DEVICE)

sub = model.transcribe(audio=str(util.TEST_AUDIO))
sub.save(util.projectPATH / "assets" / "test.audio.ass")


@pytest.mark.skipif(os.environ.get("GITHUB_ACTIONS") == "true", reason="Skipping test when running on CI")
def test_bilingual() -> None:
sub = load(util.TEST_ENG_SRT)

sub_zh = translate(
sub=sub,
model=util.OPENAI_MODEL,
api_key=util.OPENAI_API_KEY,
base_url=util.OPENAI_BASE_URL,
bangumi_url=util.BANGUMI_URL,
)
sub_bilingual = bilingual(sub_origin=sub, sub_zh=sub_zh)

sub_zh.save(util.projectPATH / "assets" / "test.zh.ass")
sub_bilingual.save(util.projectPATH / "assets" / "test.bilingual.ass")
2 changes: 1 addition & 1 deletion yuisub/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from yuisub.bangumi import bangumi # noqa: F401
from yuisub.llm import Translator # noqa: F401
from yuisub.prompt import ORIGIN, ZH # noqa: F401
from yuisub.srt import bilingual, from_file # noqa: F401
from yuisub.sub import bilingual, load, translate # noqa: F401
32 changes: 16 additions & 16 deletions yuisub/__main__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import argparse
import sys

from yuisub.srt import bilingual, from_file
from yuisub.sub import bilingual, load, translate

# ffmpeg -i test.mkv -c:a mp3 -map 0:a:0 test.mp3
# ffmpeg -i test.mkv -map 0:s:0 eng.srt

parser = argparse.ArgumentParser()
parser.description = "Generate bilingual SRT files from audio or SRT input."
parser.description = "Generate Bilingual Subtitle from audio or subtitle file"
# input
parser.add_argument("-a", "--AUDIO", type=str, help="Path to the audio file", required=False)
parser.add_argument("-s", "--SRT", type=str, help="Path to the input SRT file", required=False)
# srt output
parser.add_argument("-oz", "--OUTPUT_ZH", type=str, help="Path to save the Chinese SRT file", required=False)
parser.add_argument("-ob", "--OUTPUT_BILINGUAL", type=str, help="Path to save the bilingual SRT file", required=False)
parser.add_argument("-s", "--SUB", type=str, help="Path to the input Subtitle file", required=False)
# subtitle output
parser.add_argument("-oz", "--OUTPUT_ZH", type=str, help="Path to save the Chinese ASS file", required=False)
parser.add_argument("-ob", "--OUTPUT_BILINGUAL", type=str, help="Path to save the bilingual ASS file", required=False)
# openai gpt
parser.add_argument("-om", "--OPENAI_MODEL", type=str, help="Openai model name", required=True)
parser.add_argument("-api", "--OPENAI_API_KEY", type=str, help="Openai API key", required=True)
Expand All @@ -28,8 +28,8 @@


def main() -> None:
if args.AUDIO and args.SRT:
raise ValueError("Please provide only one input file, either audio or SRT.")
if args.AUDIO and args.SUB:
raise ValueError("Please provide only one input file, either audio or subtitle file")

if not args.OUTPUT_ZH and not args.OUTPUT_BILINGUAL:
raise ValueError("Please provide output paths for the subtitles.")
Expand All @@ -53,26 +53,26 @@ def main() -> None:

model = WhisperModel(name=_MODEL, device=_DEVICE)

segs = model.transcribe(audio=args.AUDIO)

srt = model.gen_srt(segs=segs)
sub = model.transcribe(audio=args.AUDIO)

else:
srt = from_file(args.SRT)
sub = load(args.SUB)

srt_zh, srt_bilingual = bilingual(
srt=srt,
sub_zh = translate(
sub=sub,
model=args.OPENAI_MODEL,
api_key=args.OPENAI_API_KEY,
base_url=args.OPENAI_BASE_URL,
bangumi_url=args.BANGUMI_URL,
)

sub_bilingual = bilingual(sub_origin=sub, sub_zh=sub_zh)

if args.OUTPUT_ZH:
srt_zh.save(args.OUTPUT_ZH)
sub_zh.save(args.OUTPUT_ZH)

if args.OUTPUT_BILINGUAL:
srt_bilingual.save(args.OUTPUT_BILINGUAL)
sub_bilingual.save(args.OUTPUT_BILINGUAL)


if __name__ == "__main__":
Expand Down
45 changes: 5 additions & 40 deletions yuisub/a2t.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,10 @@
from typing import List, Optional, Tuple, Union
from typing import Optional, Tuple, Union

import numpy as np
import pysrt
import pysubs2
import torch
import whisper
from pydantic import BaseModel
from pysrt import SubRipFile


class Segment(BaseModel):
id: int
seek: int
start: float
end: float
text: str
tokens: List[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
from pysubs2 import SSAFile


class WhisperModel:
Expand All @@ -40,7 +26,7 @@ def transcribe(
word_timestamps: bool = False,
prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,,!!??::”)]}、",
) -> List[Segment]:
) -> SSAFile:
result = self.model.transcribe(
audio=audio,
verbose=verbose,
Expand All @@ -54,25 +40,4 @@ def transcribe(
prepend_punctuations=prepend_punctuations,
append_punctuations=append_punctuations,
)
segments: List[Segment] = [Segment(**seg) for seg in result["segments"]]
return segments

@staticmethod
def gen_srt(segs: List[Segment]) -> SubRipFile:
line_out: str = ""
for s in segs:
segment_id = s.id + 1
start_time = format_time(s.start)
end_time = format_time(s.end)
text = s.text

line_out += f"{segment_id}\n{start_time} --> {end_time}\n{text.lstrip()}\n\n"
subs = pysrt.from_string(line_out)
return subs


def format_time(seconds: float) -> str:
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
milliseconds = (seconds - int(seconds)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
return pysubs2.load_from_whisper(result)
80 changes: 0 additions & 80 deletions yuisub/srt.py

This file was deleted.

Loading

0 comments on commit b48dec8

Please sign in to comment.