|
|
from dia.model import Dia |
|
|
|
|
|
|
|
|
model = Dia.from_pretrained("nari-labs/Dia-1.6B-0626", compute_dtype="float16") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clone_from_text = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face." |
|
|
clone_from_audio = "simple.mp3" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_to_generate = "[S1] Hello, how are you? [S2] I'm good, thank you. [S1] What's your name? [S2] My name is Dia. [S1] Nice to meet you. [S2] Nice to meet you too." |
|
|
|
|
|
|
|
|
output = model.generate( |
|
|
clone_from_text + text_to_generate, |
|
|
audio_prompt=clone_from_audio, |
|
|
use_torch_compile=False, |
|
|
verbose=True, |
|
|
cfg_scale=4.0, |
|
|
temperature=1.8, |
|
|
top_p=0.90, |
|
|
cfg_filter_top_k=50, |
|
|
) |
|
|
|
|
|
model.save_audio("voice_clone.mp3", output) |
|
|
|