Subtitle to Audio, generate audio or speech from any subtitle file
Project description
Subtitle to Audio
Subtitle to audio, generate audio from any subtitle file using Coqui-ai TTS and synchronize the audio timing according to subtitle time.
Dependencies
ffmpeg, pydub, librosa, coqui-ai TTS, ffmpeg-python
Installation
pip install git+https://github.com/bnsantoso/sub-to-audio
pip install subtoaudio
ffmpeg on linux
apt-get install ffmpeg
Example usage
Basic use is very similiar to Coqui-ai TTS, you can check their documentation and the <lang-iso_code>.
!Note: Use non-overlapping subtitles with an optimal Character per Second / CPS for best result
from subtoaudio import SubToAudio
#Using the Fairseq English speaker model as the default
#The code will output 'yoursubtitle.wav' in the current directory.
sub = SubToAudio(gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle)
#you can choose 1100 different language using fairseq model
sub = SubToAudio(language='<lang-iso_code>')
subtitle = sub.subtitle("yoursubtitle.ass")
sub.convert_to_audio(sub_data=subtitle)
#specify model name
sub = SubToAudio(model_name="tts_models/multilingual/multi-dataset/your_tts")
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, output_path="subtitle.wav")
#specify model and config path
sub = SubToAudio(model_path="path/to/your/model.pth" config_path="config/path.json")
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle)
#By default, it is using "speaker=tts.speakers[0]/None,
#language=tts.languages[0]/None, speaker_wav=None
sub = SubToAudio(model_name="tts_models/multilingual/multi-dataset/your_tts")
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, language="en", speaker="speakername", speaker_wav="your/path/speaker.wav", output_path="subtitle.wav")
#Save temporary audio to current folder
sub = SubToAudio(model_name="tts_models/multilingual/multi-dataset/your_tts")
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, output_path="subtitle.wav", save_temp=True)
Tempo Mode
Use the tempo_mode
parameter to speed up the audio. There are three tempo modes:
tempo_mode="all"
: This accelerates all audio. Usetempo_speed=float
to specify the speed.tempo_mode="overflow"
: This accelerates the audio to match the total subtitle duration plus the blank duration before the next subtitle appears.'tempo_limit'
will limit the speed increase during overflow.tempo_mode="precise"
: This accelerates the audio to match the duration the subtitle appears."
from subtoaudio import SubToAudio
#Speed up tempo or speech rate
sub = SubToAudio(gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, tempo_mode="all", tempo_speed=1.3)
#Change the tempo or speech rate of all audio files , default is 1.2
sub = SubToAudio(gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, tempo_mode="all", tempo_speed=1.3)
#Change tempo or speech rate to audio that doesn't match the subtitle duration
sub = SubToAudio(gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, tempo_mode="overflow")
#Limit tempo speed on the overflow mode
sub.convert_to_audio(sub_data=subtitle, tempo_mode="overflow", tempo_limit=1.2)
#Match audio length to subtitle duration
sub = SubToAudio(gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=subtitle, tempo_mode="precise")
Shift Mode
shift_mode
parameter will shift audio that doesnt match subtitle duration.
shift_mode="right"
: Shift audio time to the right and prevent audio overlaping.shift_mode="left"
: Shift audio to the left and prevent audio overlap, but be cautious of limited space on the left side, as some audio may disappear.shift_mode="interpose"
: Shift audio to mid position and prevent right and left of audio overlaping. (Note: This mode can be clunky, so use it cautiously.)shift_mode="left-overlap"
: Shift audio time to the left, allowing overlap.shift_mode="interpose-overlap"
: Shift audio to mid position, allowing overlap.shift_limit=int or "str"
: limit audio shift, use integer for millisecond or string like2.5s
for second
from subtoaudio import SubToAudio
#shift mode with limit of 2 second to the right.
sub = SubToAudio(languages="vie", gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=sub, tempo_mode="overflow", shift_mode="right", limit_shift="2s")
#shift audio to left position or, time before next subtitle appear
sub = SubToAudio(languages="fra" gpu=True)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=sub, shift_mode="left-overlap")
#shift to left, and limit shift only 1 sec.
sub = SubToAudio(gpu=False)
subtitle = sub.subtitle("yoursubtitle.srt")
sub.convert_to_audio(sub_data=sub, shift_mode="left", shift_limit=1000) #1000 = 1s
Citation
Eren, G., & The Coqui TTS Team. (2021). Coqui TTS (Version 1.4) [Computer software]. https://doi.org/10.5281/zenodo.6334862
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Hashes for subtoaudio-0.1.4-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | e5327e47ef6204f90581b1cbb1d6905849bb7cea61291e186cd8e48005cbe66d |
|
MD5 | 2f50470a550e95954c52f04473e8789f |
|
BLAKE2b-256 | ff9589aaf50b38643857087455b83501d52c288706e10fd7672e0a2bcaa6af41 |