# app_wan.py

import os
import gradio as gr
import tempfile
import numpy as np
from PIL import Image

# === Constantes ===
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 16
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 81
MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)

# === Importa os serviços de geração (managers) ===
from aduc_framework.managers.wan_manager import WanManager
from aduc_framework.managers.wan_manager_s2v import WanManagerS2V

print("Initializing managers...")
wan_manager = WanManager()
wan_manager_s2v = WanManagerS2V()
print("Managers initialized.")

# === Wrapper da UI para o Serviço ===
def ui_generate_video(
    start_image_pil,
    start_frame_text,
    handle_image_pil,
    handle_frame_text,
    handle_peso,
    end_image_pil,
    end_frame_text,
    end_peso,
    prompt,
    negative_prompt,
    duration_seconds,
    steps,
    guidance_scale,
    guidance_scale_2, # Usado apenas no I2V
    seed,
    randomize_seed,
    audio_path,
    progress=gr.Progress(track_tqdm=True),
):
    # <<< LÓGICA DE DIRECIONAMENTO >>>
    if audio_path and os.path.exists(audio_path):
        print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.")
        video_path, current_seed = wan_manager_s2v.generate_video(
            start_image=start_image_pil,
            audio_path=audio_path,
            prompt=prompt,
            negative_prompt=negative_prompt,
            steps=int(steps),
            guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale
            seed=int(seed),
            randomize_seed=bool(randomize_seed),
        )
    else:
        print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.")
        def to_int_safe(v, default=0):
            try: return int(v)
            except: return default
        def to_float_safe(v, default=1.0):
            try: return float(v)
            except: return default

        # Prepara a lista de imagens de condição para o I2V
        start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
        items = [start_item]
        if handle_image_pil is not None:
            items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
        items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])

        video_path, current_seed = wan_manager.generate_video_from_conditions(
            images_condition_items=items,
            prompt=prompt,
            negative_prompt=negative_prompt,
            duration_seconds=float(duration_seconds),
            steps=int(steps),
            guidance_scale=float(guidance_scale),
            guidance_scale_2=float(guidance_scale_2),
            seed=int(seed),
            randomize_seed=bool(randomize_seed),
        )
    
    return video_path, current_seed

# === Interface Gradio ===
# ... (o restante da UI permanece o mesmo, pois os inputs já estão lá)
css = '''
.fillable{max-width: 1100px !important}
.dark .progress-text {color: white}
#general_items{margin-top: 2em}
'''

with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
    gr.Markdown("# Wan 2.2 Aduca-SDR")
    gr.Markdown("Forneça um arquivo de áudio para usar o modo **Speech-to-Video**. Deixe em branco para usar o modo **Image-to-Video** (interpolação).")


    with gr.Row(elem_id="general_items"):
        with gr.Column(scale=2):
            with gr.Group():
                with gr.Row():
                    with gr.Column():
                        start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
                        start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)

                    with gr.Column():
                        handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"])
                        handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
                        handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")

                    with gr.Column():
                        end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"])
                        end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
                        end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")
                
                prompt = gr.Textbox(
                    label="Prompt",
                    info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'."
                )

                audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)")

                with gr.Accordion("Advanced Settings", open=False):
                    duration_seconds_input = gr.Slider(
                        minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
                        label="Video Duration (I2V only)",
                        info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
                    )
                    negative_prompt_input = gr.Textbox(
                        label="Negative Prompt",
                        value=wan_manager.default_negative_prompt, # Pode usar o mesmo default
                        lines=3
                    )
                    steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps")
                    guidance_scale_input = gr.Slider(
                        minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale"
                    )
                    guidance_scale_2_input = gr.Slider(
                        minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)"
                    )
                    with gr.Row():
                        seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                        randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)

                generate_button = gr.Button("Generate Video", variant="primary")

        with gr.Column(scale=1):
            output_video = gr.Video(label="Generated Video", autoplay=True)
            
    ui_inputs = [
        start_image, start_frame_tb,
        handle_image, handle_frame_tb, handle_peso_sl,
        end_image, end_frame_tb, end_peso_sl,
        prompt, negative_prompt_input, duration_seconds_input,
        steps_slider, guidance_scale_input, guidance_scale_2_input,
        seed_input, randomize_seed_checkbox,
        audio_input,
    ]
    ui_outputs = [output_video, seed_input]

    generate_button.click(fn=ui_generate_video, inputs=ui_inputs, outputs=ui_outputs)

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860, show_error=True)