|
|
from dia.model import Dia |
|
|
|
|
|
|
|
|
model = Dia.from_pretrained("nari-labs/Dia-1.6B-0626", compute_dtype="float16") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clone_from_text = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_to_generate = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face." |
|
|
|
|
|
clone_from_audios = [f"simple_{i}.mp3" for i in range(10)] |
|
|
|
|
|
texts = [clone_from_text + text_to_generate for _ in range(10)] |
|
|
|
|
|
|
|
|
output = model.generate(texts, audio_prompt=clone_from_audios, use_torch_compile=True, verbose=True, max_tokens=2000) |
|
|
|
|
|
for i, o in enumerate(output): |
|
|
model.save_audio(f"voice_clone_{i}.mp3", o) |
|
|
|