import spaces
from pathlib import Path
import os
import sys

# Add current directory to Python path for HuggingFace Spaces
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from chatterbox.tts import ChatterboxTTS
import torchaudio
import torch
import random
import numpy as np
import gradio as gr
import tempfile
import chatterbox_dhivehi
import warnings

warnings.filterwarnings("ignore")

chatterbox_dhivehi.extend_dhivehi()

# Global variables
MODEL = None
_target = Path.home() / ".chatterbox-tts-dhivehi"

def download_model():
    """Download model files from HuggingFace if not already present"""
    try:
        from huggingface_hub import snapshot_download
        
        print("=" * 60)
        print("Checking model files...")
        print(f"Target directory: {_target}")
        
        if not (_target.exists() and any(_target.rglob("*"))):
            print("Model files not found. Starting download...")
            print("This may take a few minutes on first run.")
            print("=" * 60)
            
            snapshot_download(
                repo_id="alakxender/chatterbox-tts-dhivehi",
                local_dir=str(_target),
                local_dir_use_symlinks=False,
                resume_download=True,
                force_download=True,
                allow_patterns=["*.safetensors", "*.json", "*.pt"]
            )
            
            print("=" * 60)
            print("Model files downloaded successfully!")
            print("=" * 60)
        else:
            print("Model files already present.")
            print("=" * 60)
            
    except Exception as e:
        print("=" * 60)
        print(f"Warning: Could not download model files: {e}")
        print("=" * 60)

def load_model(checkpoint="kn_cbox", device="cuda"):
    """Load the TTS model"""
    global MODEL
    try:
        checkpoint_path = f"{_target}/{checkpoint}"
        print(f"Loading model with checkpoint: {checkpoint_path}")
        print(f"Target device: {device}")
        MODEL = ChatterboxTTS.from_dhivehi(
            ckpt_dir=Path(checkpoint_path),
            device=device
        )
        print(f"Model loaded successfully on {device}!")
    except Exception as e:
        print(f"Error loading model: {e}")
        raise e

def set_seed(seed: int):
    """Set random seed for reproducibility"""
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    random.seed(seed)
    np.random.seed(seed)

# Internal implementation without decorator
def _generate_speech_impl(text, 
                         reference_audio, 
                         exaggeration=0.5, 
                         temperature=0.1, 
                         cfg_weight=0.5, 
                         seed=42):
    """Internal implementation of generate speech"""
    global MODEL
    
    # Clean the input text
    text = clean_text(text)
    
    if not text:
        return None, "Please enter some text to generate speech."
    
    if MODEL is None:
        return None, "Model not loaded. Please check your model paths."
    
    try:
        # Set seed for reproducibility
        set_seed(seed)
        
        # Handle reference audio - validate it exists
        audio_prompt_path = None
        if reference_audio and isinstance(reference_audio, str) and reference_audio.strip():
            # Check if file actually exists
            if os.path.exists(reference_audio):
                audio_prompt_path = reference_audio
                print(f"Using reference audio: {audio_prompt_path}")
            else:
                print(f"Reference audio path not found, ignoring: {reference_audio}")
        
        if not audio_prompt_path:
            print("Generating without reference audio")
        
        print(f"Generating audio for: {text[:50]}...")
        
        # Generate audio - handle optional reference audio
        if audio_prompt_path:
            audio = MODEL.generate(
                text=text,
                audio_prompt_path=audio_prompt_path,
                exaggeration=exaggeration,
                temperature=temperature,
                cfg_weight=cfg_weight,
            )
        else:
            # Try without reference audio
            try:
                audio = MODEL.generate(
                    text=text,
                    exaggeration=exaggeration,
                    temperature=temperature,
                    cfg_weight=cfg_weight,
                )
            except TypeError:
                # If the model requires audio_prompt_path, try with empty string
                audio = MODEL.generate(
                    text=text,
                    audio_prompt_path="",
                    exaggeration=exaggeration,
                    temperature=temperature,
                    cfg_weight=cfg_weight,
                )
        
        # Save to temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
            output_path = tmp_file.name
        
        torchaudio.save(output_path, audio, 24000)
        
        return output_path, f"Successfully generated speech! Audio length: {audio.shape[1]/24000:.2f} seconds"
        
    except Exception as e:
        error_msg = f"Error generating speech: {str(e)}"
        print(error_msg)
        return None, error_msg

# GPU version with decorator
@spaces.GPU
def _generate_speech_gpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42):
    """GPU version of generate speech"""
    return _generate_speech_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed)

# CPU version without decorator
def _generate_speech_cpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42):
    """CPU version of generate speech"""
    return _generate_speech_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed)

# Router function
def generate_speech(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42, use_gpu=True):
    """Generate speech from text using voice cloning"""
    if use_gpu:
        return _generate_speech_gpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed)
    else:
        return _generate_speech_cpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed)

def clean_text(text):
    """Clean text by removing newlines at start/end, double spaces, and extra whitespace"""
    import re
    
    # Remove newlines at start and end
    text = text.strip('\n\r')
    
    # Replace multiple spaces with single space
    text = re.sub(r'\s+', ' ', text)
    
    # Strip leading and trailing spaces
    text = text.strip()
    
    return text

def split_sentences(text):
    """Split text into sentences based on periods, ensuring each sentence is at least 150 characters"""
    # Clean the input text first
    text = clean_text(text)
    
    # First, split by periods normally
    initial_sentences = []
    current_sentence = ""
    
    for char in text:
        current_sentence += char
        if char == '.':
            # Add sentence if it's not empty after stripping spaces from both sides
            stripped_sentence = current_sentence.strip()
            if stripped_sentence:
                initial_sentences.append(stripped_sentence)
            current_sentence = ""
    
    # Add remaining text if any (without period), stripped of spaces from both sides
    stripped_remaining = current_sentence.strip()
    if stripped_remaining:
        initial_sentences.append(stripped_remaining)
    
    # If we only have one sentence, return it
    if len(initial_sentences) <= 1:
        return initial_sentences
    
    # Now combine sentences until each is at least 150 characters
    final_sentences = []
    combined_sentence = ""
    
    for sentence in initial_sentences:
        if combined_sentence:
            combined_sentence += " " + sentence
        else:
            combined_sentence = sentence
        
        # If combined sentence is >= 150 chars, add it to final list
        if len(combined_sentence) >= 150:
            final_sentences.append(combined_sentence.strip())
            combined_sentence = ""
    
    # Add any remaining combined sentence (even if < 150 chars)
    if combined_sentence.strip():
        final_sentences.append(combined_sentence.strip())
    
    return final_sentences

# Internal implementation without decorator
def _generate_speech_multi_sentence_impl(text, 
                                        reference_audio, 
                                        exaggeration=0.5, 
                                        temperature=0.1, 
                                        cfg_weight=0.5, 
                                        seed=42,
                                        use_gpu=True):
    """Internal implementation of multi-sentence speech generation"""
    global MODEL
    
    # Clean the input text
    text = clean_text(text)
    
    if not text:
        yield None, "Please enter some text to generate speech."
        return
    
    if MODEL is None:
        yield None, "Model not loaded. Please check your model paths."
        return
    
    # Split text into sentences
    sentences = split_sentences(text)
    
    # If only one sentence or no periods, use regular method
    if len(sentences) <= 1:
        yield None, "Generating single sentence..."
        result_audio, result_status = generate_speech(text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu)
        yield result_audio, result_status
        return
    
    try:
        # Set seed for reproducibility
        set_seed(seed)
        
        # Handle reference audio - validate it exists
        audio_prompt_path = None
        if reference_audio and isinstance(reference_audio, str) and reference_audio.strip():
            # Check if file actually exists
            if os.path.exists(reference_audio):
                audio_prompt_path = reference_audio
                print(f"Using reference audio: {audio_prompt_path}")
            else:
                print(f"Reference audio path not found, ignoring: {reference_audio}")
        
        if not audio_prompt_path:
            print("Generating without reference audio")
        
        yield None, f"Starting generation for {len(sentences)} sentences..."
        print(f"Processing {len(sentences)} sentences...")
        
        all_audio_segments = []
        total_duration = 0
        
        for i, sentence in enumerate(sentences):
            # Calculate progress percentage
            progress_percent = int((i / len(sentences)) * 90)  # Reserve last 10% for combining
            yield None, f"Generating sentence {i+1}/{len(sentences)} ({progress_percent}%): {sentence[:50]}..."
            
            print(f"Generating audio for sentence {i+1}/{len(sentences)}: {sentence[:50]}...")
            
            # Generate audio for this sentence
            try:
                if audio_prompt_path:
                    audio = MODEL.generate(
                        text=sentence,
                        audio_prompt_path=audio_prompt_path,
                        exaggeration=exaggeration,
                        temperature=temperature,
                        cfg_weight=cfg_weight,
                    )
                else:
                    # Try without reference audio
                    try:
                        audio = MODEL.generate(
                            text=sentence,
                            exaggeration=exaggeration,
                            temperature=temperature,
                            cfg_weight=cfg_weight,
                        )
                    except TypeError:
                        # If the model requires audio_prompt_path, try with empty string
                        audio = MODEL.generate(
                            text=sentence,
                            audio_prompt_path="",
                            exaggeration=exaggeration,
                            temperature=temperature,
                            cfg_weight=cfg_weight,
                        )
            except Exception as model_error:
                # If the model fails due to missing reference audio, try with default behavior
                if "reference_voice.wav not found" in str(model_error) or "No reference audio provided" in str(model_error):
                    print("Attempting generation without reference audio...")
                    # Try different approaches for models that don't support None reference audio
                    try:
                        # Some models might accept an empty string
                        audio = MODEL.generate(
                            text=sentence,
                            audio_prompt_path="",
                            exaggeration=exaggeration,
                            temperature=temperature,
                            cfg_weight=cfg_weight,
                        )
                    except:
                        # If that fails, try without the audio_prompt_path parameter entirely
                        audio = MODEL.generate(
                            text=sentence,
                            exaggeration=exaggeration,
                            temperature=temperature,
                            cfg_weight=cfg_weight,
                        )
                else:
                    raise model_error
            
            all_audio_segments.append(audio)
            total_duration += audio.shape[1] / 24000
        
        # Concatenate all audio segments
        yield None, "Combining audio segments (95%)..."
        print("Combining audio segments...")
        combined_audio = torch.cat(all_audio_segments, dim=1)
        
        # Save to temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
            output_path = tmp_file.name
        
        torchaudio.save(output_path, combined_audio, 24000)
        print("Multi-sentence processing complete!")
        
        yield output_path, f"Successfully generated speech from {len(sentences)} sentences! Total audio length: {total_duration:.2f} seconds"
        
    except Exception as e:
        error_msg = f"Error generating multi-sentence speech: {str(e)}"
        print(error_msg)
        yield None, error_msg

# GPU version with decorator
@spaces.GPU
def _generate_speech_multi_sentence_gpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42):
    """GPU version of multi-sentence speech generation"""
    for result in _generate_speech_multi_sentence_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu=True):
        yield result

# CPU version without decorator
def _generate_speech_multi_sentence_cpu(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42):
    """CPU version of multi-sentence speech generation"""
    for result in _generate_speech_multi_sentence_impl(text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu=False):
        yield result

# Router function
def generate_speech_multi_sentence(text, reference_audio, exaggeration=0.5, temperature=0.1, cfg_weight=0.5, seed=42, use_gpu=True):
    """Generate speech from text with multi-sentence support and progress tracking"""
    if use_gpu:
        for result in _generate_speech_multi_sentence_gpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed):
            yield result
    else:
        for result in _generate_speech_multi_sentence_cpu(text, reference_audio, exaggeration, temperature, cfg_weight, seed):
            yield result

def create_interface():
    """Create the Gradio interface"""
    
    # Sample texts in Dhivehi
    sample_texts = [
        "ކާޑު ނުލައި ފައިސާ ދެއްކޭ ނެޝަނަލް ކިއުއާރް ކޯޑް އެމްއެމްއޭ އިން ތައާރަފްކުރަނީ",
        """ފުޓްބޯޅަ ސްކޫލްގެ ބިމާއި ގުދަންބަރި ބިމުގައި އިމާރާތް ކުރުމުގެ މަސައްކަތް ހުއްޓާލަން އަންގައިފި...
Construction work on football school land and warehouse land has been ordered to stop""",
        "ސިވިލް ސާވިސްގެ ހިދުމަތުގެ މުއްދަތު ގުނުމުގައި ކުންފުނިތަކާއި އިދާރާތަކަށް ހިދުމަތްކުރި މުއްދަތު ހިމަނަނީ",
        """އެ ރަށުގެ ބިން ހިއްކުމާއި ބަނދަރުގެ ނެރު ބަދަލުކުރުމާއި ގޮނޑުދޮށް ހިމާޔަތް ކުރުމުގެ މަސައްކަތް އެމްޓީސީސީއާ މިނިސްޓްރީން ހަވާލުކުރީ މިދިޔަ މަހު ރައީސް އެ ރަށަށް ކުރެއްވި ދަތުރުފުޅުގައި.
The ministry handed over the land reclamation, replacement of the port canal and beach protection to MTCC during the President's visit to the village last month"""
    ]
    
    with gr.Blocks(title="ChatterboxTTS - Dhivehi Text-to-Speech", css="""
    .textbox1 textarea {
        font-size: 18px !important;
        font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important;
        line-height: 1.8 !important;
        direction: rtl !important;
        text-align: right !important;
    }
    """) as app:
        gr.Markdown("# 🎤 ChatterboxTTS - Dhivehi Text-to-Speech with Voice Cloning")
        gr.Markdown("Generate natural-sounding Dhivehi speech with voice cloning capabilities.")
        
        # Row 1: Text input and Reference audio
        with gr.Row():
            text_input = gr.Textbox(
                label="Text to Convert",
                placeholder="Enter Dhivehi text here...",
                lines=6,
                value="""އައްޑޫގައި ވަކިވަކި ކައުންސިލްތައް ހަދަން ފެނޭތޯ ބަލަން ނަގާ ތާރީހީ، ފެންނަ ނުފެންނަ ވޯޓާ ގުޅޭ ބަހުސެއް މާދަމާ ރޭ "މިހާރު" އިން ބާއްވަން ނިންމައިފި.

އައްޑޫ ސިޓީ ކައުންސިލުގެ ދަށުން އިދާރީ ގޮތުން ހުޅުދު އާއި މީދޫ އަދި ފޭދޫ ވަކިކޮށް. އެ ތިން ރަށުގައި ވަކިވަކި ކައުންސިލުތައް ހެދުމަށް ފެނޭތޯ ބެލުމަށް ތިން ރަށުގެ ރައްޔިތުންގެ މެދުގައި ފެންނަ ނުފެންނަ ވޯޓެއް ނަގަނީ އަންނަ ހޮނިހިރު ދުވަހު.

ރައްޔިތުންގެ ހިޔާލު ހޯދުމުގެ އާންމު ވޯޓު ނެގުމުގެ ގާނޫނުގެ ދަށުން ނަގާ ފުރަތަމަ ވޯޓާ ގުޅޭގޮތުން "މިހާރު" އިން ބަހުސެއް ބާއްވަން ނިންމާފައިވާއިރު. އެ ބަހުސްގައި ބައިވެރިވެވަޑައިގަންނަވާނީ އައްޑޫގެ އެކި ދާއިރާތަކުގައި ތަޖުރިބާކާރު ބޭފުޅުން.
""",
                rtl=True,
                elem_classes=["textbox1"]
            )
            reference_audio = gr.Audio(
                label="Reference Voice Audio (optional - for voice cloning)",
                type="filepath",
                sources=["upload", "microphone"],
                value="m2.wav"
            )
        
        # Row 2: Example buttons
        gr.Markdown("**Quick Examples:**")
        with gr.Row():
            sample_btn1 = gr.Button("Sample 1", size="sm")
            sample_btn2 = gr.Button("Sample 2", size="sm")
            sample_btn3 = gr.Button("Sample 3", size="sm")
            sample_btn4 = gr.Button("Sample 4", size="sm")
        
        # Row 2b: Reference Audio buttons
        gr.Markdown("**Reference Audio:**")
        with gr.Row():
            ref_btn1 = gr.Button("Female 1 (f1.wav)", size="sm")
            ref_btn2 = gr.Button("Female 2 (f2.wav)", size="sm")
            ref_btn3 = gr.Button("Male 1 (m1.wav)", size="sm")
            ref_btn4 = gr.Button("Male 2 (m2.wav)", size="sm")

        # Row 3: Advanced settings
        with gr.Accordion("Advanced Settings", open=False):
            with gr.Row():
                exaggeration = gr.Slider(
                    minimum=0.0,
                    maximum=5.0,
                    value=0.5,
                    step=0.1,
                    label="Exaggeration",
                    info="Controls expressiveness"
                )
                temperature = gr.Slider(
                    minimum=0.01,
                    maximum=1.0,
                    value=0.8,
                    step=0.01,
                    label="Temperature",
                    info="Controls randomness"
                )
                cfg_weight = gr.Slider(
                    minimum=0.0,
                    maximum=5.0,
                    value=0.5,
                    step=0.1,
                    label="CFG Weight",
                    info="Classifier-free guidance weight"
                )
                seed = gr.Slider(
                    minimum=0,
                    maximum=9999,
                    value=42,
                    step=1,
                    label="Seed",
                    info="For reproducible results"
                )
            with gr.Row():
                model_select = gr.Dropdown(
                    #choices=["kn_cbox", "f01_cbox"], #f01 upload correct chkpnt
                    choices=["kn_cbox"],
                    value="kn_cbox",
                    label="Model",
                    info="Select TTS model"
                )
                device_select = gr.Dropdown(
                    choices=["GPU", "CPU"],
                    value="GPU",
                    label="Device",
                    info="Select computation device"
                )
                reload_btn = gr.Button("🔄 Reload Model", size="sm")
            reload_status = gr.Textbox(label="Model Status", value="✅ Model 'kn_cbox' loaded on GPU", interactive=False)
        
        gr.Markdown("**Note:** This fine-tune is minimal, so some words may drop or sentences might not complete perfectly. You can experiment with the Advanced Settings to find what works best for your reference audio and to reduce any output issues. This Space uses ZeroGPU for processing, so if your text is long, the GPU might be released before completion, which could cause a timeout. For longer inputs, switch to CPU mode from the Advanced Settings and wait for it to finish. It will run a bit slower, but it should still complete reliably.")
        
        # Row 4: Generate button
        generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
        
        
        # Row 5: Output section
        with gr.Row():
            with gr.Column():
                output_audio = gr.Audio(label="Generated Speech", type="filepath")
                status_message = gr.Textbox(label="Status", interactive=False)
        
        # Event handlers
        # Default values for advanced settings
        DEFAULT_EXAGGERATION = 0.5
        DEFAULT_TEMPERATURE = 0.8
        DEFAULT_CFG_WEIGHT = 0.5
        DEFAULT_SEED = 42
        
        def set_sample_text(sample_idx):
            return sample_texts[sample_idx], DEFAULT_EXAGGERATION, DEFAULT_TEMPERATURE, DEFAULT_CFG_WEIGHT, DEFAULT_SEED
        
        def set_reference_audio(audio_file):
            return audio_file, DEFAULT_EXAGGERATION, DEFAULT_TEMPERATURE, DEFAULT_CFG_WEIGHT, DEFAULT_SEED
        
        def reload_model_handler(model_name, device_name):
            """Reload model with selected checkpoint and device"""
            try:
                device = "cuda" if device_name == "GPU" else "cpu"
                load_model(checkpoint=model_name, device=device)
                return f"✅ Model '{model_name}' loaded successfully on {device_name}!"
            except Exception as e:
                return f"❌ Error loading model: {str(e)}"
        
        sample_btn1.click(lambda: set_sample_text(0), outputs=[text_input, exaggeration, temperature, cfg_weight, seed])
        sample_btn2.click(lambda: set_sample_text(1), outputs=[text_input, exaggeration, temperature, cfg_weight, seed])
        sample_btn3.click(lambda: set_sample_text(2), outputs=[text_input, exaggeration, temperature, cfg_weight, seed])
        sample_btn4.click(lambda: set_sample_text(3), outputs=[text_input, exaggeration, temperature, cfg_weight, seed])
        
        ref_btn1.click(lambda: set_reference_audio("f1.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed])
        ref_btn2.click(lambda: set_reference_audio("f2.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed])
        ref_btn3.click(lambda: set_reference_audio("m1.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed])
        ref_btn4.click(lambda: set_reference_audio("m2.wav"), outputs=[reference_audio, exaggeration, temperature, cfg_weight, seed])
        
        reload_btn.click(
            fn=reload_model_handler,
            inputs=[model_select, device_select],
            outputs=[reload_status]
        )
        
        def generate_with_progress(text, reference_audio, exaggeration, temperature, cfg_weight, seed, device_name):
            """Generate speech with streaming progress updates"""
            use_gpu = (device_name == "GPU")
            # Use the streaming generator
            for result_audio, result_status in generate_speech_multi_sentence(
                text, reference_audio, exaggeration, temperature, cfg_weight, seed, use_gpu
            ):
                yield result_audio, result_status
        
        generate_btn.click(
            fn=generate_with_progress,
            inputs=[text_input, reference_audio, exaggeration, temperature, cfg_weight, seed, device_select],
            outputs=[output_audio, status_message]
        )

        # Parameter Examples Section
        gr.Markdown("### Examples")
        gr.Markdown("Click any example below to load pre-configured settings:")
        
        gr.Examples(
            examples=[
                # [text, reference_audio, exaggeration, temperature, cfg_weight, seed, device]
                ["""އެއް ދުވަހަކު ސަލާންޖަހާ މީހަކު ޖުހާގެ ގޭގެ ދޮރުމައްޗަށް އަރާ ސަލާން ގޮވާލައިފިއެވެ.

ސަލާންޖަހާ މީހާ އައިސް ސަލާން ގޮވާލި އިރު ޖުހާ އުޅެނީ ގޭގެ މަތީ ބުރީގައެވެ.

“ކާކު؟ ކީއްކުރަން؟” ޖުހާ ގޭތެރެއިން ގޮވާލައިފިއެވެ.

“އައިސްފާނަންތަ ތިރިއަށް؟” ސަލާންޖަހާ މީހާ ބުންޏެވެ.

އޭނާ އެހެން ބުނުމުން ޖުހާ ތިރިއަށް ގޮސް ސަލާން ޖަހާ މީހާ އާ ބައްދަލު ކޮށްފިއެވެ.

“ކިހިނެއްވީ؟” ސަލާންޖަހާ މީހާ ކުރެން ޖުހާ އަހައިފިއެވެ.

“އަހަންނަކީ ވަރަށް ބޮޑު ނިކަމެއްޗެއް، ސަދަގާތެއްގެ ގޮތުން އަހަންނަށް އެހީއެއް ދޭތޯ!” ސަލާންޖަހާ މީހާ ބުންޏެވެ.

“އާދޭ އެތެރެއަށް.” ޖުހާ ބުންޏެވެ.

ޖުހާ އެހެން ބުނުމުން ސަލާންޖަހާ މީހާ ގޭތެރެއަށް ވަދެއްޖެއެވެ. ގޭތެރެއަށް ވަނުމުން މަށާއެކީ އަންނާށޭ ކިޔާ ޖުހާ ގޭގެ ސިޑިން މައްޗަށް އަރަން ފަށައިފިއެވެ. ސަލާން ޖަހާ މީހާ ވެސް ޖުހާގެ ފަހަތުން މައްޗަށް ދެއެވެ. މި ހެން ގޮސް އެމީހާ ގޮވައިގެން ގޮސް ގޭގެ ފުރާޅު މައްޗަށް އަރައިފިއެވެ.

ފުރާޅު މައްޗަށް އެރުމާއެކު ޖުހާ ބުނެފިއެވެ. “މަގޭ އަތަކު ދޭނެ އެއްޗެއް ނެތް.”

ސަލާންޖާހާމީހާ މިހާ ހިސާބަށް މައްޗަށް އެރުވުމަށް ފަހު ދޭނެ އެއްޗެއް ނެތޭ ޖުހާ ބުނުމުން އޭނާ ވަރަށް ހިތްހަމަ ނުޖެހިއްޖެއެވެ. “ކީއްވެ ތިހެން ތިހެދީ؟ އަހަރެން ދޮރުމަތީގައި ހުއްޓާވެސް ތިޔަހެން ބުނެލެވުނީހެއްނު! ކީއްކުރަން މިހާ ހިސާބަށް އަރުވާފައި ތިހެން ތިބުނީ؟”

“އެހެން ވިއްޔާ ކީއްވެ’ އަހަރެން ތިރިއަށް ނުބާލާ، ތިޔަ ހޯދަން އުޅުނު އެހީއެއްގެ ވާހަކަ ނޭހީ؟ އެހެން ނަމަ މަށަށްވެސް އެއްޗެއް ނެތޭ ބުނެ ތިރިޔަށް ނުފައިބާ ފަރުޖެއްސުނީހެއްނު” ޖުހާ ސަލާންޖަހާ މީހާ އަށް ޖަވާބު ދިނެވެ.""", "m2.wav", 0.5, 0.8, 0.5, 42, "GPU"],
                ["""ގެދޮރުވެރިޔާ މަޝްރޫއުގެ ދަށުން ހުޅުމާލޭގައި ފަހި ދިރިއުޅުން ކޯޕަރޭޝަން އިން އިމާރާތްކޮށްފައިވާ ފްލެޓްތައް ހަވާލުކުރަން ފެށުމާ އެކު ބޮޑު އަގުގައި އެތަންތަން ކުއްޔަށް ދޭން އިޝްތިހާރު ޖަހަން ފަށައިފި""", "f1.wav", 0.5, 0.8, 0.5, 42, "GPU"],
                ["""ކަސްޓަމްސްގެ އިސް އޮފިޝަލަކު "މިހާރު" އަށް މިއަދު ވިދާޅުވި ގޮތުގައި، ކަސްޓަމްސް އިން ހިފެހެއްޓުމަށް ފަހު، އެ ދެ ކޮންޓެއިނަރު ހުޅުމާލެ ބަނދަރުގައި ބެހެއްޓީ އެ ބަނދަރު ބަލަހައްޓާ އެމްޕީއެލްގެ ހަވާލުގަ. އެމްޕީއެލްއާ އެ ހަވާލުކުރީ އޮންނަ އުސޫލުގެ ތެރެއިން، ލިޔެކިޔުންތައް ފުރިހަމަކޮށްފައި ކަމަށާއި އެ ސިނގިރެޓްތައް ނައްތާނުލައި ހުރީ ތަހުގީގު ނުނިމޭތީ ކަމަށް އޮފިޝަލް ވިދާޅުވި.
A senior customs official told Miharu today that the two containers were seized by customs and placed in the custody of MPL. The cigarettes were handed over to MPL after completing the documents and the investigation was not completed the official said.""", "f2.wav", 0.2, 0.35, 0.4, 42, "GPU"],
                ["""ޤައުމަށްޓަކާ ދީނަށްޓަކާ ކެރިގެން ޖިހާދު ކުރާނަމޭ.
ފައުޅާއި ސިއްރާ އެއްގޮތަށް ހުރިހާ ކަމެއް ގެންދާނަމޭ،
އަހުރެންގެ މޭ ޤައުމަށްޓަކައި އައްޑަނައަކަށް ދިއްކޮށްލުމީ.
ފަހުނޭވަޔަށް ދަންދެން މޮޓޯ ކަމުގަައި ޚިޔާރުކުރާނަމޭ،
އަންނާނެ ތީރެއް އުންޑައެއް ފެނިގެން އަމާޒުވެގެން މެއަށް.
ގަންނާ ކުރެއްވި ބިރުން ފިލަން ދާމީހަކަށް މަ ނުވާނަމޭ،
އެޅިފައިވި މަސްއޫލިއްޔަތެއް އުފުލަންދިމާވީމާ ދެނެއް.
ފެޅިގެން ދެފަޅިއަށް ދިޔަޔަކަސް އެއަކުން މަށެއް ނުރެކޭނަމޭ،
ކަމަކަށް ގޮވާލީމާ މިޤައުމުގެ ޢިއްޒަތާ އަބުރަށްޓަކައި.
އަމަށުން ހުރީވިއްޔާ އެތާ އެކަކަށް މަވެސް ހުންނާނަމޭ،
މިނިވަންކަމާ އެކުވެރިކަމާ ހަމަހަމަ ކަމަށް ތަރުހީބުދީ.
ހިނިތުންވެ ތިބެ ދީނީ އުޚުއްވަތް ފެތުރުމަށް މަ ގޮވާނަމޭ،
އަޚުނާއި އުޚުތުންނަށް އެދޭނީ ލާބަޔާ މަންފާތަކޭ.
ބަޚުތާއިމެދު ނުރުހުންވެގެން ޝަކުވާތަކެއް ނުކުރާނަމޭ،
އަނެކުންގެ ކުށްތައް ހޯދުމީ ނަފުސުގެ މަތިން ނައްތައި ހަނދާން.""", "m3.mp3", 0.5, 0.8, 0.5, 42, "GPU"],
                ["""ކަސްޓަމްސްގެ އިސް އޮފިޝަލަކު "މިހާރު" އަށް މިއަދު ވިދާޅުވި ގޮތުގައި، ކަސްޓަމްސް އިން ހިފެހެއްޓުމަށް ފަހު، އެ ދެ ކޮންޓެއިނަރު ހުޅުމާލެ ބަނދަރުގައި ބެހެއްޓީ އެ ބަނދަރު ބަލަހައްޓާ އެމްޕީއެލްގެ ހަވާލުގަ. އެމްޕީއެލްއާ އެ ހަވާލުކުރީ އޮންނަ އުސޫލުގެ ތެރެއިން، ލިޔެކިޔުންތައް ފުރިހަމަކޮށްފައި ކަމަށާއި އެ ސިނގިރެޓްތައް ނައްތާނުލައި ހުރީ ތަހުގީގު ނުނިމޭތީ ކަމަށް އޮފިޝަލް ވިދާޅުވި.
A senior customs official told Mihaaru today that the two containers were seized by customs and placed in the custody of MPL. The cigarettes were handed over to MPL after completing the documents and the investigation was not completed the official said.""", "m1.wav",  0.2, 0.35, 0.4, 42, "GPU"],
                ["""ގެދޮރުވެރިޔާ މަޝްރޫއުގެ ދަށުން ހުޅުމާލޭގައި ފަހި ދިރިއުޅުން ކޯޕަރޭޝަން އިން އިމާރާތްކޮށްފައިވާ ފްލެޓްތައް ހަވާލުކުރަން ފެށުމާ އެކު ބޮޑު އަގުގައި އެތަންތަން ކުއްޔަށް ދޭން އިޝްތިހާރު ޖަހަން ފަށައިފި""", "m2.wav", 0.5, 0.8, 0.5, 42, "GPU"],
            ],
            inputs=[text_input, reference_audio, exaggeration, temperature, cfg_weight, seed, device_select],
            outputs=[output_audio, status_message],
            fn=generate_with_progress,
            label="Preset Configurations",
            examples_per_page=8,
            cache_examples="lazy"
        )
        
        # Instructions
        with gr.Accordion("Tips", open=False):
            gr.Markdown("""
            ### General Use (TTS and Voice Agents):
            - The default settings (exaggeration=0.5, cfg=0.5) work well for most prompts.
            - If the reference speaker has a fast speaking style, lowering cfg to around 0.3 can improve pacing.
            
            ### Expressive or Dramatic Speech:
            - Try lower cfg values (e.g. ~0.3) and increase exaggeration to around 0.7 or higher.
            - Higher exaggeration tends to speed up speech; reducing cfg helps compensate with slower, more deliberate pacing.
            
            ### Language Transfer Notes:
            - Ensure that the reference clip matches the specified language tag. Otherwise, language transfer outputs may inherit the accent of the reference clip's language.
            - To mitigate this, set the CFG weight to 0.
            
            ### Additional Tips:
            - For best voice cloning results, use clear audio with minimal background noise
            - The reference audio should be 3-10 seconds long
            - Use the same seed value for reproducible results
            """)
    
    return app

if __name__ == "__main__":
    # Step 1: Download model files first
    print("\nStarting ChatterboxTTS Dhivehi Application")
    print("=" * 60)
    download_model()
    
    # Step 2: Load the default model with GPU
    print("\nLoading default model...")
    print("=" * 60)
    try:
        load_model(checkpoint="kn_cbox", device="cuda")
        print("Default model loaded successfully!")
    except Exception as e:
        print(f"Warning: Could not load default model: {e}")
        print("You can manually load the model using the 'Reload Model' button in the interface.")
    print("=" * 60)
    
    # Step 3: Create and launch the interface
    print("\nCreating Gradio interface...")
    app = create_interface()
    
    # Step 4: Launch with public sharing and authentication if needed
    print("Launching application...")
    print("=" * 60)
    app.launch()