import spaces from pathlib import Path import os import sys # Add current directory to Python path for HuggingFace Spaces sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from chatterbox.tts import ChatterboxTTS import torchaudio import torch import random import numpy as np import gradio as gr import tempfile import chatterbox_dhivehi import warnings warnings.filterwarnings("ignore") chatterbox_dhivehi.extend_dhivehi() # Global variables MODEL = None _target = Path.home() / ".chatterbox-tts-dhivehi" def download_model(): """Download model files from HuggingFace if not already present""" try: from huggingface_hub import snapshot_download print("=" * 60) print("Checking model files...") print(f"Target directory: {_target}") if not (_target.exists() and any(_target.rglob("*"))): print("Model files not found. Starting download...") print("This may take a few minutes on first run.") print("=" * 60) snapshot_download( repo_id="alakxender/chatterbox-tts-dhivehi", local_dir=str(_target), local_dir_use_symlinks=False, resume_download=True, force_download=True, allow_patterns=["*.safetensors", "*.json", "*.pt"] ) print("=" * 60) print("Model files downloaded successfully!") print("=" * 60) else: print("Model files already present.") print("=" * 60) except Exception as e: print("=" * 60) print(f"Warning: Could not download model files: {e}") print("=" * 60) def load_model(checkpoint="kn_cbox", device="cuda"): """Load the TTS model""" global MODEL try: checkpoint_path = f"{_target}/{checkpoint}" print(f"Loading model with checkpoint: {checkpoint_path}") print(f"Target device: {device}") MODEL = ChatterboxTTS.from_dhivehi( ckpt_dir=Path(checkpoint_path), device=device ) print(f"Model loaded successfully on {device}!") except Exception as e: print(f"Error loading model: {e}") raise e def set_seed(seed: int): """Set random seed for reproducibility""" torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) random.seed(seed) np.random.seed(seed) # Internal implementation without decorator def _generate_speech_impl(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42): """Internal implementation of generate speech""" global MODEL # Clean the input text text = clean_text(text) if not text: return None, "Please enter some text to generate speech." if MODEL is None: return None, "Model not loaded. Please check your model paths." try: # Set seed for reproducibility set_seed(seed) # Handle reference audio - validate it exists audio_prompt_path = None if reference_audio and isinstance(reference_audio, str) and reference_audio.strip(): # Check if file actually exists if os.path.exists(reference_audio): audio_prompt_path = reference_audio print(f"Using reference audio: {audio_prompt_path}") else: print(f"Reference audio path not found, ignoring: {reference_audio}") if not audio_prompt_path: print("Generating without reference audio") print(f"Generating audio for: {text[:50]}...") # Generate audio - handle optional reference audio if audio_prompt_path: audio = MODEL.generate( text=text, audio_prompt_path=audio_prompt_path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) else: # Try without reference audio try: audio = MODEL.generate( text=text, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) except TypeError: # If the model requires audio_prompt_path, try with empty string audio = MODEL.generate( text=text, audio_prompt_path="", exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) # Save to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: output_path = tmp_file.name torchaudio.save(output_path, audio, 24000) return output_path, f"Successfully generated speech! Audio length: {audio.shape[1]/24000:.2f} seconds" except Exception as e: error_msg = f"Error generating speech: {str(e)}" print(error_msg) return None, error_msg # GPU version with decorator @spaces.GPU def _generate_speech_gpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42): """GPU version of generate speech""" return _generate_speech_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed) # CPU version without decorator def _generate_speech_cpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42): """CPU version of generate speech""" return _generate_speech_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed) # Router function def generate_speech(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42, use_gpu=True): """Generate speech from text using voice cloning""" if use_gpu: return _generate_speech_gpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed) else: return _generate_speech_cpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed) def clean_text(text): """Clean text by removing newlines at start/end, double spaces, and extra whitespace""" import re # Remove newlines at start and end text = text.strip('\n\r') # Replace multiple spaces with single space text = re.sub(r'\s+', ' ', text) # Strip leading and trailing spaces text = text.strip() return text def split_sentences(text): """Split text into sentences based on periods, ensuring each sentence is at least 150 characters""" # Clean the input text first text = clean_text(text) # First, split by periods normally initial_sentences = [] current_sentence = "" for char in text: current_sentence += char if char == '.': # Add sentence if it's not empty after stripping spaces from both sides stripped_sentence = current_sentence.strip() if stripped_sentence: initial_sentences.append(stripped_sentence) current_sentence = "" # Add remaining text if any (without period), stripped of spaces from both sides stripped_remaining = current_sentence.strip() if stripped_remaining: initial_sentences.append(stripped_remaining) # If we only have one sentence, return it if len(initial_sentences) <= 1: return initial_sentences # Now combine sentences until each is at least 150 characters final_sentences = [] combined_sentence = "" for sentence in initial_sentences: if combined_sentence: combined_sentence += " " + sentence else: combined_sentence = sentence # If combined sentence is >= 150 chars, add it to final list if len(combined_sentence) >= 150: final_sentences.append(combined_sentence.strip()) combined_sentence = "" # Add any remaining combined sentence (even if < 150 chars) if combined_sentence.strip(): final_sentences.append(combined_sentence.strip()) return final_sentences # Internal implementation without decorator def _generate_speech_multi_sentence_impl(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42, use_gpu=True): """Internal implementation of multi-sentence speech generation""" global MODEL # Clean the input text text = clean_text(text) if not text: yield None, "Please enter some text to generate speech." return if MODEL is None: yield None, "Model not loaded. Please check your model paths." return # Split text into sentences sentences = split_sentences(text) # If only one sentence or no periods, use regular method if len(sentences) <= 1: yield None, "Generating single sentence..." result_audio, result_status = generate_speech(text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu) yield result_audio, result_status return try: # Set seed for reproducibility set_seed(seed) # Handle reference audio - validate it exists audio_prompt_path = None if reference_audio and isinstance(reference_audio, str) and reference_audio.strip(): # Check if file actually exists if os.path.exists(reference_audio): audio_prompt_path = reference_audio print(f"Using reference audio: {audio_prompt_path}") else: print(f"Reference audio path not found, ignoring: {reference_audio}") if not audio_prompt_path: print("Generating without reference audio") yield None, f"Starting generation for {len(sentences)} sentences..." print(f"Processing {len(sentences)} sentences...") all_audio_segments = [] total_duration = 0 for i, sentence in enumerate(sentences): # Calculate progress percentage progress_percent = int((i / len(sentences)) * 90) # Reserve last 10% for combining yield None, f"Generating sentence {i+1}/{len(sentences)} ({progress_percent}%): {sentence[:50]}..." print(f"Generating audio for sentence {i+1}/{len(sentences)}: {sentence[:50]}...") # Generate audio for this sentence try: if audio_prompt_path: audio = MODEL.generate( text=sentence, audio_prompt_path=audio_prompt_path, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) else: # Try without reference audio try: audio = MODEL.generate( text=sentence, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) except TypeError: # If the model requires audio_prompt_path, try with empty string audio = MODEL.generate( text=sentence, audio_prompt_path="", exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) except Exception as model_error: # If the model fails due to missing reference audio, try with default behavior if "reference_voice.wav not found" in str(model_error) or "No reference audio provided" in str(model_error): print("Attempting generation without reference audio...") # Try different approaches for models that don't support None reference audio try: # Some models might accept an empty string audio = MODEL.generate( text=sentence, audio_prompt_path="", exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) except: # If that fails, try without the audio_prompt_path parameter entirely audio = MODEL.generate( text=sentence, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) else: raise model_error all_audio_segments.append(audio) total_duration += audio.shape[1] / 24000 # Concatenate all audio segments yield None, "Combining audio segments (95%)..." print("Combining audio segments...") combined_audio = torch.cat(all_audio_segments, dim=1) # Save to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: output_path = tmp_file.name torchaudio.save(output_path, combined_audio, 24000) print("Multi-sentence processing complete!") yield output_path, f"Successfully generated speech from {len(sentences)} sentences! Total audio length: {total_duration:.2f} seconds" except Exception as e: error_msg = f"Error generating multi-sentence speech: {str(e)}" print(error_msg) yield None, error_msg # GPU version with decorator @spaces.GPU def _generate_speech_multi_sentence_gpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42): """GPU version of multi-sentence speech generation""" for result in _generate_speech_multi_sentence_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu=True): yield result # CPU version without decorator def _generate_speech_multi_sentence_cpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42): """CPU version of multi-sentence speech generation""" for result in _generate_speech_multi_sentence_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu=False): yield result # Router function def generate_speech_multi_sentence(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42, use_gpu=True): """Generate speech from text with multi-sentence support and progress tracking""" if use_gpu: for result in _generate_speech_multi_sentence_gpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed): yield result else: for result in _generate_speech_multi_sentence_cpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed): yield result def create_interface(): """Create the Gradio interface""" # Sample texts in Dhivehi sample_texts = [ "ކާޑު ނުލައި ފައިސާ ދެއްކޭ ނެޝަނަލް ކިއުއާރް ކޯޑް އެމްއެމްއޭ އިން ތައާރަފްކުރަނީ", """ފުޓްބޯޅަ ސްކޫލްގެ ބިމާއި ގުދަންބަރި ބިމުގައި އިމާރާތް ކުރުމުގެ މަސައްކަތް ހުއްޓާލަން އަންގައިފި... Construction work on football school land and warehouse land has been ordered to stop""", "ސިވިލް ސާވިސްގެ ހިދުމަތުގެ މުއްދަތު ގުނުމުގައި ކުންފުނިތަކާއި އިދާރާތަކަށް ހިދުމަތްކުރި މުއްދަތު ހިމަނަނީ", """އެ ރަށުގެ ބިން ހިއްކުމާއި ބަނދަރުގެ ނެރު ބަދަލުކުރުމާއި ގޮނޑުދޮށް ހިމާޔަތް ކުރުމުގެ މަސައްކަތް އެމްޓީސީސީއާ މިނިސްޓްރީން ހަވާލުކުރީ މިދިޔަ މަހު ރައީސް އެ ރަށަށް ކުރެއްވި ދަތުރުފުޅުގައި. The ministry handed over the land reclamation, replacement of the port canal and beach protection to MTCC during the President's visit to the village last month""" ] with gr.Blocks(title="ChatterboxTTS - Dhivehi Text-to-Speech", css=""" .textbox1 textarea { font-size: 18px !important; font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important; line-height: 1.8 !important; direction: rtl !important; text-align: right !important; } """) as app: gr.Markdown("# 🎤 ChatterboxTTS - Dhivehi Text-to-Speech with Voice Cloning") gr.Markdown("Generate natural-sounding Dhivehi speech with voice cloning capabilities.") # Row 1: Text input and Reference audio with gr.Row(): text_input = gr.Textbox( label="Text to Convert", placeholder="Enter Dhivehi text here...", lines=6, value="""އައްޑޫގައި ވަކިވަކި ކައުންސިލްތައް ހަދަން ފެނޭތޯ ބަލަން ނަގާ ތާރީހީ، ފެންނަ ނުފެންނަ ވޯޓާ ގުޅޭ ބަހުސެއް މާދަމާ ރޭ "މިހާރު" އިން ބާއްވަން ނިންމައިފި. އައްޑޫ ސިޓީ ކައުންސިލުގެ ދަށުން އިދާރީ ގޮތުން ހުޅުދު އާއި މީދޫ އަދި ފޭދޫ ވަކިކޮށް. އެ ތިން ރަށުގައި ވަކިވަކި ކައުންސިލުތައް ހެދުމަށް ފެނޭތޯ ބެލުމަށް ތިން ރަށުގެ ރައްޔިތުންގެ މެދުގައި ފެންނަ ނުފެންނަ ވޯޓެއް ނަގަނީ އަންނަ ހޮނިހިރު ދުވަހު. ރައްޔިތުންގެ ހިޔާލު ހޯދުމުގެ އާންމު ވޯޓު ނެގުމުގެ ގާނޫނުގެ ދަށުން ނަގާ ފުރަތަމަ ވޯޓާ ގުޅޭގޮތުން "މިހާރު" އިން ބަހުސެއް ބާއްވަން ނިންމާފައިވާއިރު. އެ ބަހުސްގައި ބައިވެރިވެވަޑައިގަންނަވާނީ އައްޑޫގެ އެކި ދާއިރާތަކުގައި ތަޖުރިބާކާރު ބޭފުޅުން. """, rtl=True, elem_classes=["textbox1"] ) reference_audio = gr.Audio( label="Reference Voice Audio (optional - for voice cloning)", type="filepath", sources=["upload", "microphone"], value="m2.wav" ) # Row 2: Example buttons gr.Markdown("**Quick Examples:**") with gr.Row(): sample_btn1 = gr.Button("Sample 1", size="sm") sample_btn2 = gr.Button("Sample 2", size="sm") sample_btn3 = gr.Button("Sample 3", size="sm") sample_btn4 = gr.Button("Sample 4", size="sm") # Row 2b: Reference Audio buttons gr.Markdown("**Reference Audio:**") with gr.Row(): ref_btn1 = gr.Button("Female 1 (f1.wav)", size="sm") ref_btn2 = gr.Button("Female 2 (f2.wav)", size="sm") ref_btn3 = gr.Button("Male 1 (m1.wav)", size="sm") ref_btn4 = gr.Button("Male 2 (m2.wav)", size="sm") # Row 3: Advanced settings with gr.Accordion("Advanced Settings", open=False): with gr.Row(): exaggeration = gr.Slider( minimum=0.0, maximum=5.0, value=0.5, step=0.1, label="Exaggeration", info="Controls expressiveness" ) temperature = gr.Slider( minimum=0.01, maximum=1.0, value=0.8, step=0.01, label="Temperature", info="Controls randomness" ) cfg_weight = gr.Slider( minimum=0.0, maximum=5.0, value=0.5, step=0.1, label="CFG Weight", info="Classifier-free guidance weight" ) seed = gr.Slider( minimum=0, maximum=9999, value=42, step=1, label="Seed", info="For reproducible results" ) with gr.Row(): model_select = gr.Dropdown( #choices=["kn_cbox", "f01_cbox"], #f01 upload correct chkpnt choices=["kn_cbox"], value="kn_cbox", label="Model", info="Select TTS model" ) device_select = gr.Dropdown( choices=["GPU", "CPU"], value="GPU", label="Device", info="Select computation device" ) reload_btn = gr.Button("🔄 Reload Model", size="sm") reload_status = gr.Textbox(label="Model Status", value="✅ Model 'kn_cbox' loaded on GPU", interactive=False) gr.Markdown("**Note:** This fine-tune is minimal, so some words may drop or sentences might not complete perfectly. You can experiment with the Advanced Settings to find what works best for your reference audio and to reduce any output issues. This Space uses ZeroGPU for processing, so if your text is long, the GPU might be released before completion, which could cause a timeout. For longer inputs, switch to CPU mode from the Advanced Settings and wait for it to finish. It will run a bit slower, but it should still complete reliably.") # Row 4: Generate button generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg") # Row 5: Output section with gr.Row(): with gr.Column(): output_audio = gr.Audio(label="Generated Speech", type="filepath") status_message = gr.Textbox(label="Status", interactive=False) # Event handlers # Default values for advanced settings DEFAULT_EXAGGERATION = 0.5 DEFAULT_TEMPERATURE = 0.8 DEFAULT_CFG_WEIGHT = 0.5 DEFAULT_SEED = 42 def set_sample_text(sample_idx): return sample_texts[sample_idx], DEFAULT_EXAGGERATION, DEFAULT_TEMPERATURE, DEFAULT_CFG_WEIGHT, DEFAULT_SEED def set_reference_audio(audio_file): return audio_file, DEFAULT_EXAGGERATION, DEFAULT_TEMPERATURE, DEFAULT_CFG_WEIGHT, DEFAULT_SEED def reload_model_handler(model_name, device_name): """Reload model with selected checkpoint and device""" try: device = "cuda" if device_name == "GPU" else "cpu" load_model(checkpoint=model_name, device=device) return f"✅ Model '{model_name}' loaded successfully on {device_name}!" except Exception as e: return f"❌ Error loading model: {str(e)}" sample_btn1.click(lambda: set_sample_text(0), outputs=[text_input, exaggeration, temperature, cfg_weight, seed]) sample_btn2.click(lambda: set_sample_text(1), outputs=[text_input, exaggeration, temperature, cfg_weight, seed]) sample_btn3.click(lambda: set_sample_text(2), outputs=[text_input, exaggeration, temperature, cfg_weight, seed]) sample_btn4.click(lambda: set_sample_text(3), outputs=[text_input, exaggeration, temperature, cfg_weight, seed]) ref_btn1.click(lambda: set_reference_audio("f1.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed]) ref_btn2.click(lambda: set_reference_audio("f2.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed]) ref_btn3.click(lambda: set_reference_audio("m1.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed]) ref_btn4.click(lambda: set_reference_audio("m2.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed]) reload_btn.click( fn=reload_model_handler, inputs=[model_select, device_select], outputs=[reload_status] ) def generate_with_progress(text, reference_audio, exaggeration, temperature, cfg_weight, seed, device_name): """Generate speech with streaming progress updates""" use_gpu = (device_name == "GPU") # Use the streaming generator for result_audio, result_status in generate_speech_multi_sentence( text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu ): yield result_audio, result_status generate_btn.click( fn=generate_with_progress, inputs=[text_input, reference_audio, exaggeration, temperature, cfg_weight, seed, device_select], outputs=[output_audio, status_message] ) # Parameter Examples Section gr.Markdown("### Examples") gr.Markdown("Click any example below to load pre-configured settings:") gr.Examples( examples=[ # [text, reference_audio, exaggeration, temperature, cfg_weight, seed, device] ["""އެއް ދުވަހަކު ސަލާންޖަހާ މީހަކު ޖުހާގެ ގޭގެ ދޮރުމައްޗަށް އަރާ ސަލާން ގޮވާލައިފިއެވެ. ސަލާންޖަހާ މީހާ އައިސް ސަލާން ގޮވާލި އިރު ޖުހާ އުޅެނީ ގޭގެ މަތީ ބުރީގައެވެ. “ކާކު؟ ކީއްކުރަން؟” ޖުހާ ގޭތެރެއިން ގޮވާލައިފިއެވެ. “އައިސްފާނަންތަ ތިރިއަށް؟” ސަލާންޖަހާ މީހާ ބުންޏެވެ. އޭނާ އެހެން ބުނުމުން ޖުހާ ތިރިއަށް ގޮސް ސަލާން ޖަހާ މީހާ އާ ބައްދަލު ކޮށްފިއެވެ. “ކިހިނެއްވީ؟” ސަލާންޖަހާ މީހާ ކުރެން ޖުހާ އަހައިފިއެވެ. “އަހަންނަކީ ވަރަށް ބޮޑު ނިކަމެއްޗެއް، ސަދަގާތެއްގެ ގޮތުން އަހަންނަށް އެހީއެއް ދޭތޯ!” ސަލާންޖަހާ މީހާ ބުންޏެވެ. “އާދޭ އެތެރެއަށް.” ޖުހާ ބުންޏެވެ. ޖުހާ އެހެން ބުނުމުން ސަލާންޖަހާ މީހާ ގޭތެރެއަށް ވަދެއްޖެއެވެ. ގޭތެރެއަށް ވަނުމުން މަށާއެކީ އަންނާށޭ ކިޔާ ޖުހާ ގޭގެ ސިޑިން މައްޗަށް އަރަން ފަށައިފިއެވެ. ސަލާން ޖަހާ މީހާ ވެސް ޖުހާގެ ފަހަތުން މައްޗަށް ދެއެވެ. މި ހެން ގޮސް އެމީހާ ގޮވައިގެން ގޮސް ގޭގެ ފުރާޅު މައްޗަށް އަރައިފިއެވެ. ފުރާޅު މައްޗަށް އެރުމާއެކު ޖުހާ ބުނެފިއެވެ. “މަގޭ އަތަކު ދޭނެ އެއްޗެއް ނެތް.” ސަލާންޖާހާމީހާ މިހާ ހިސާބަށް މައްޗަށް އެރުވުމަށް ފަހު ދޭނެ އެއްޗެއް ނެތޭ ޖުހާ ބުނުމުން އޭނާ ވަރަށް ހިތްހަމަ ނުޖެހިއްޖެއެވެ. “ކީއްވެ ތިހެން ތިހެދީ؟ އަހަރެން ދޮރުމަތީގައި ހުއްޓާވެސް ތިޔަހެން ބުނެލެވުނީހެއްނު! ކީއްކުރަން މިހާ ހިސާބަށް އަރުވާފައި ތިހެން ތިބުނީ؟” “އެހެން ވިއްޔާ ކީއްވެ’ އަހަރެން ތިރިއަށް ނުބާލާ، ތިޔަ ހޯދަން އުޅުނު އެހީއެއްގެ ވާހަކަ ނޭހީ؟ އެހެން ނަމަ މަށަށްވެސް އެއްޗެއް ނެތޭ ބުނެ ތިރިޔަށް ނުފައިބާ ފަރުޖެއްސުނީހެއްނު” ޖުހާ ސަލާންޖަހާ މީހާ އަށް ޖަވާބު ދިނެވެ.""", "m2.wav", 0.5, 0.8, 0.5, 42, "GPU"], ["""ގެދޮރުވެރިޔާ މަޝްރޫއުގެ ދަށުން ހުޅުމާލޭގައި ފަހި ދިރިއުޅުން ކޯޕަރޭޝަން އިން އިމާރާތްކޮށްފައިވާ ފްލެޓްތައް ހަވާލުކުރަން ފެށުމާ އެކު ބޮޑު އަގުގައި އެތަންތަން ކުއްޔަށް ދޭން އިޝްތިހާރު ޖަހަން ފަށައިފި""", "f1.wav", 0.5, 0.8, 0.5, 42, "GPU"], ["""ކަސްޓަމްސްގެ އިސް އޮފިޝަލަކު "މިހާރު" އަށް މިއަދު ވިދާޅުވި ގޮތުގައި، ކަސްޓަމްސް އިން ހިފެހެއްޓުމަށް ފަހު، އެ ދެ ކޮންޓެއިނަރު ހުޅުމާލެ ބަނދަރުގައި ބެހެއްޓީ އެ ބަނދަރު ބަލަހައްޓާ އެމްޕީއެލްގެ ހަވާލުގަ. އެމްޕީއެލްއާ އެ ހަވާލުކުރީ އޮންނަ އުސޫލުގެ ތެރެއިން، ލިޔެކިޔުންތައް ފުރިހަމަކޮށްފައި ކަމަށާއި އެ ސިނގިރެޓްތައް ނައްތާނުލައި ހުރީ ތަހުގީގު ނުނިމޭތީ ކަމަށް އޮފިޝަލް ވިދާޅުވި. A senior customs official told Miharu today that the two containers were seized by customs and placed in the custody of MPL. The cigarettes were handed over to MPL after completing the documents and the investigation was not completed the official said.""", "f2.wav", 0.2, 0.35, 0.4, 42, "GPU"], ["""ޤައުމަށްޓަކާ ދީނަށްޓަކާ ކެރިގެން ޖިހާދު ކުރާނަމޭ. ފައުޅާއި ސިއްރާ އެއްގޮތަށް ހުރިހާ ކަމެއް ގެންދާނަމޭ، އަހުރެންގެ މޭ ޤައުމަށްޓަކައި އައްޑަނައަކަށް ދިއްކޮށްލުމީ. ފަހުނޭވަޔަށް ދަންދެން މޮޓޯ ކަމުގަައި ޚިޔާރުކުރާނަމޭ، އަންނާނެ ތީރެއް އުންޑައެއް ފެނިގެން އަމާޒުވެގެން މެއަށް. ގަންނާ ކުރެއްވި ބިރުން ފިލަން ދާމީހަކަށް މަ ނުވާނަމޭ، އެޅިފައިވި މަސްއޫލިއްޔަތެއް އުފުލަންދިމާވީމާ ދެނެއް. ފެޅިގެން ދެފަޅިއަށް ދިޔަޔަކަސް އެއަކުން މަށެއް ނުރެކޭނަމޭ، ކަމަކަށް ގޮވާލީމާ މިޤައުމުގެ ޢިއްޒަތާ އަބުރަށްޓަކައި. އަމަށުން ހުރީވިއްޔާ އެތާ އެކަކަށް މަވެސް ހުންނާނަމޭ، މިނިވަންކަމާ އެކުވެރިކަމާ ހަމަހަމަ ކަމަށް ތަރުހީބުދީ. ހިނިތުންވެ ތިބެ ދީނީ އުޚުއްވަތް ފެތުރުމަށް މަ ގޮވާނަމޭ، އަޚުނާއި އުޚުތުންނަށް އެދޭނީ ލާބަޔާ މަންފާތަކޭ. ބަޚުތާއިމެދު ނުރުހުންވެގެން ޝަކުވާތަކެއް ނުކުރާނަމޭ، އަނެކުންގެ ކުށްތައް ހޯދުމީ ނަފުސުގެ މަތިން ނައްތައި ހަނދާން.""", "m3.mp3", 0.5, 0.8, 0.5, 42, "GPU"], ["""ކަސްޓަމްސްގެ އިސް އޮފިޝަލަކު "މިހާރު" އަށް މިއަދު ވިދާޅުވި ގޮތުގައި، ކަސްޓަމްސް އިން ހިފެހެއްޓުމަށް ފަހު، އެ ދެ ކޮންޓެއިނަރު ހުޅުމާލެ ބަނދަރުގައި ބެހެއްޓީ އެ ބަނދަރު ބަލަހައްޓާ އެމްޕީއެލްގެ ހަވާލުގަ. އެމްޕީއެލްއާ އެ ހަވާލުކުރީ އޮންނަ އުސޫލުގެ ތެރެއިން، ލިޔެކިޔުންތައް ފުރިހަމަކޮށްފައި ކަމަށާއި އެ ސިނގިރެޓްތައް ނައްތާނުލައި ހުރީ ތަހުގީގު ނުނިމޭތީ ކަމަށް އޮފިޝަލް ވިދާޅުވި. A senior customs official told Mihaaru today that the two containers were seized by customs and placed in the custody of MPL. The cigarettes were handed over to MPL after completing the documents and the investigation was not completed the official said.""", "m1.wav", 0.2, 0.35, 0.4, 42, "GPU"], ["""ގެދޮރުވެރިޔާ މަޝްރޫއުގެ ދަށުން ހުޅުމާލޭގައި ފަހި ދިރިއުޅުން ކޯޕަރޭޝަން އިން އިމާރާތްކޮށްފައިވާ ފްލެޓްތައް ހަވާލުކުރަން ފެށުމާ އެކު ބޮޑު އަގުގައި އެތަންތަން ކުއްޔަށް ދޭން އިޝްތިހާރު ޖަހަން ފަށައިފި""", "m2.wav", 0.5, 0.8, 0.5, 42, "GPU"], ], inputs=[text_input, reference_audio, exaggeration, temperature, cfg_weight, seed, device_select], outputs=[output_audio, status_message], fn=generate_with_progress, label="Preset Configurations", examples_per_page=8, cache_examples="lazy" ) # Instructions with gr.Accordion("Tips", open=False): gr.Markdown(""" ### General Use (TTS and Voice Agents): - The default settings (exaggeration=0.5, cfg=0.5) work well for most prompts. - If the reference speaker has a fast speaking style, lowering cfg to around 0.3 can improve pacing. ### Expressive or Dramatic Speech: - Try lower cfg values (e.g. ~0.3) and increase exaggeration to around 0.7 or higher. - Higher exaggeration tends to speed up speech; reducing cfg helps compensate with slower, more deliberate pacing. ### Language Transfer Notes: - Ensure that the reference clip matches the specified language tag. Otherwise, language transfer outputs may inherit the accent of the reference clip's language. - To mitigate this, set the CFG weight to 0. ### Additional Tips: - For best voice cloning results, use clear audio with minimal background noise - The reference audio should be 3-10 seconds long - Use the same seed value for reproducible results """) return app if __name__ == "__main__": # Step 1: Download model files first print("\nStarting ChatterboxTTS Dhivehi Application") print("=" * 60) download_model() # Step 2: Load the default model with GPU print("\nLoading default model...") print("=" * 60) try: load_model(checkpoint="kn_cbox", device="cuda") print("Default model loaded successfully!") except Exception as e: print(f"Warning: Could not load default model: {e}") print("You can manually load the model using the 'Reload Model' button in the interface.") print("=" * 60) # Step 3: Create and launch the interface print("\nCreating Gradio interface...") app = create_interface() # Step 4: Launch with public sharing and authentication if needed print("Launching application...") print("=" * 60) app.launch()