Skip to content

Commit

Permalink
Merge pull request #4 from overcrash66/develop
Browse files Browse the repository at this point in the history
add TTS tool
  • Loading branch information
overcrash66 authored Dec 7, 2024
2 parents 406a3e2 + 60c5a9b commit 57a6c0b
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 0 deletions.
Binary file modified Screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Screenshot3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
- PyTranscriber (shortcut)
- Exit

## Demo:

[![Open Translator Intro](https://img.youtube.com/vi/GIhtXs8T8FA/0.jpg)](https://www.youtube.com/watch?v=GIhtXs8T8FA)

[![Open Translator GUI demo](https://img.youtube.com/vi/_5SoStnGqIw/0.jpg)](https://www.youtube.com/watch?v=_5SoStnGqIw)

## Requirements

Make sure you have the following dependencies installed:
Expand Down Expand Up @@ -101,6 +107,8 @@ python WebUI.py

![Web](Screenshot.png)

![TTS](Screenshot3.png)

## Audio translation examples:
- [ar-test.mp3](./testResults/ar-test.mp3)
- [cn-test.mp3](./testResults/cn-test.mp3)
Expand Down
112 changes: 112 additions & 0 deletions textToSpeech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import gradio as gr
from TTS.api import TTS
from datetime import datetime
import time

class TTSVoiceCloningTool:
def __init__(self):
self.tts = None

def load_tts_model(self):
"""Load the TTS model."""
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")

def unload_tts_model(self):
"""Unload the TTS model to free memory."""
del self.tts
self.tts = None

def get_supported_languages(self):
"""Return a list of supported languages."""
self.load_tts_model()
languages = self.tts.list_languages()
self.unload_tts_model()
return languages

def generate_audio(self, text, output_path, target_language, input_path, speed):
"""Generate audio using TTS."""
print("Generating audio...")
start_time = time.time()

self.load_tts_model()

# Generate audio with adjustable speed
self.tts.tts_to_file(
text=text,
speaker_wav=input_path,
language=target_language,
file_path=output_path,
speed=speed # Pass the speed parameter
)

end_time = time.time()
execution_time = (end_time - start_time) / 60
print(f"Audio generated in {execution_time:.2f} minutes")

self.unload_tts_model()
return output_path

def tts_interface(text, reference_audio, language, speed):
"""Wrapper for the TTS tool to integrate with Gradio."""
if not reference_audio:
return "Error: Please provide a reference audio file for voice cloning."

# Generate a timestamped filename
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_path = f"Tts_{timestamp}.mp3"

tts_tool = TTSVoiceCloningTool()
try:
result_path = tts_tool.generate_audio(text, output_path, language, reference_audio, speed)
return result_path
except Exception as e:
return f"Error: {str(e)}"

# Gradio GUI
def main():
# Manually specify supported languages
supported_languages = ["en", "es", "fr", "de", "it", "pt", "nl", "ru", "zh", "ja"] # Add or remove languages as necessary

with gr.Blocks() as demo:
gr.Markdown("""
# 🎙️ TTS Voice Cloning Tool
Convert your text into speech using voice cloning! Provide a reference audio to mimic the voice, select the target language, and adjust the output speed.
""")

with gr.Row():
text_input = gr.Textbox(
label="Enter Text to Convert to Speech",
placeholder="Type or paste the text you want to convert to speech here...",
lines=8,
max_lines=20
)

with gr.Row():
reference_audio_input = gr.Audio(label="Reference Audio File", type="filepath")
language_input = gr.Dropdown(
label="Target Language",
choices=supported_languages,
value="en" # Default to English
)
speed_input = gr.Slider(
label="Output Audio Speed",
minimum=0.5,
maximum=2.0,
value=1.0, # Default speed
step=0.1
)

output_audio = gr.Audio(label="Generated Audio", type="filepath")

submit_button = gr.Button(value="🎧 Generate Audio", variant="primary")

submit_button.click(
tts_interface,
inputs=[text_input, reference_audio_input, language_input, speed_input],
outputs=[output_audio]
)

demo.launch(server_name="127.0.0.2", server_port=7862)

if __name__ == "__main__":
main()

0 comments on commit 57a6c0b

Please sign in to comment.