metadata
license: cc-by-4.0
language:
- ru
base_model:
- SWivid/F5-TTS
datasets:
- mozilla-foundation/common_voice_17_0
- bond005/sberdevices_golos_10h_crowd
- bond005/sberdevices_golos_100h_farfield
- bond005/sova_rudevices
- Aniemore/resd_annotated
- espnet/yodas
How to use (example):
!pip install git+https://github.com/SWivid/F5-TTS.git
model_id = "TVI/f5-tts-ru-accent"
download_dir = "ckpts" # You can replace
!huggingface-cli download {model_id} --local-dir {download_dir}
model = "F5TTS_v1_Base"
ckpt_file = "/content/ckpts/model_last.safetensors"
vocab_file = "/content/ckpts/vocab.txt"
ref_audio = "PATH_TO_REF"
ref_text = f"REF TRANSCRIBE or EMPTY"
gen_text = "YOUR TEXT"
wav_output_dir = "OUTPUT_FOLDER"
wav_output_filename = "OUTPUT_FILE.wav"
speed = 1.0
command = (
f'f5-tts_infer-cli --model {model} --ckpt_file "{ckpt_file}" '
f'--vocab_file "{vocab_file}" -r "{ref_audio}" -s "{ref_text}" '
f'-t "{gen_text}" -o "{wav_output_dir}" -w "{wav_output_filename}" '
f'--speed {speed} --remove_silence'
)
print(command)
!{command}
Original repo: https://github.com/SWivid/F5-TTS