Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

Gregniuki commited on Nov 29, 2024

Commit

5419dac

verified ·

1 Parent(s): c0ab306

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -381,12 +381,12 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
             non_silent_wave += non_silent_seg
         aseg = non_silent_wave
-        aseg = remove_silence_edges(aseg) + AudioSegment.silent(duration=50)
         audio_duration = len(aseg)
-        if audio_duration > 7500:
             gr.Warning("Audio is over 8s, clipping to only first 8s.")
-            aseg = aseg[:7500]
         aseg.export(f.name, format="wav")
         ref_audio = f.name
@@ -415,7 +415,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
     audio, sr = torchaudio.load(ref_audio)
     # Use the new chunk_text function to split gen_text
-    max_chars = int((len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (14 - audio.shape[-1] / sr)))
     print(f"text: {gen_text} ")
     gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
     print('ref_text', ref_text)

             non_silent_wave += non_silent_seg
         aseg = non_silent_wave
+      #  aseg = remove_silence_edges(aseg) + AudioSegment.silent(duration=50)
         audio_duration = len(aseg)
+        if audio_duration > 8000:
             gr.Warning("Audio is over 8s, clipping to only first 8s.")
+            aseg = aseg[:8000]
         aseg.export(f.name, format="wav")
         ref_audio = f.name
     audio, sr = torchaudio.load(ref_audio)
     # Use the new chunk_text function to split gen_text
+    max_chars = int((len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (18 - audio.shape[-1] / sr)))
     print(f"text: {gen_text} ")
     gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
     print('ref_text', ref_text)