# app_wan.py import os import gradio as gr import tempfile import numpy as np from PIL import Image # === Constantes === MAX_SEED = np.iinfo(np.int32).max FIXED_FPS = 16 MIN_FRAMES_MODEL = 8 MAX_FRAMES_MODEL = 81 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1) MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1) # === Importa os serviços de geração (managers) === from aduc_framework.managers.wan_manager import WanManager from aduc_framework.managers.wan_manager_s2v import WanManagerS2V print("Initializing managers...") wan_manager = WanManager() wan_manager_s2v = WanManagerS2V() print("Managers initialized.") # === Wrapper da UI para o Serviço === def ui_generate_video( start_image_pil, start_frame_text, handle_image_pil, handle_frame_text, handle_peso, end_image_pil, end_frame_text, end_peso, prompt, negative_prompt, duration_seconds, steps, guidance_scale, guidance_scale_2, # Usado apenas no I2V seed, randomize_seed, audio_path, progress=gr.Progress(track_tqdm=True), ): # <<< LÓGICA DE DIRECIONAMENTO >>> if audio_path and os.path.exists(audio_path): print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.") video_path, current_seed = wan_manager_s2v.generate_video( start_image=start_image_pil, audio_path=audio_path, prompt=prompt, negative_prompt=negative_prompt, steps=int(steps), guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale seed=int(seed), randomize_seed=bool(randomize_seed), ) else: print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.") def to_int_safe(v, default=0): try: return int(v) except: return default def to_float_safe(v, default=1.0): try: return float(v) except: return default # Prepara a lista de imagens de condição para o I2V start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0] items = [start_item] if handle_image_pil is not None: items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)]) items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)]) video_path, current_seed = wan_manager.generate_video_from_conditions( images_condition_items=items, prompt=prompt, negative_prompt=negative_prompt, duration_seconds=float(duration_seconds), steps=int(steps), guidance_scale=float(guidance_scale), guidance_scale_2=float(guidance_scale_2), seed=int(seed), randomize_seed=bool(randomize_seed), ) return video_path, current_seed # === Interface Gradio === # ... (o restante da UI permanece o mesmo, pois os inputs já estão lá) css = ''' .fillable{max-width: 1100px !important} .dark .progress-text {color: white} #general_items{margin-top: 2em} ''' with gr.Blocks(theme=gr.themes.Glass(), css=css) as app: gr.Markdown("# Wan 2.2 Aduca-SDR") gr.Markdown("Forneça um arquivo de áudio para usar o modo **Speech-to-Video**. Deixe em branco para usar o modo **Image-to-Video** (interpolação).") with gr.Row(elem_id="general_items"): with gr.Column(scale=2): with gr.Group(): with gr.Row(): with gr.Column(): start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"]) start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False) with gr.Column(): handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"]) handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17") handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight") with gr.Column(): end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"]) end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False) end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight") prompt = gr.Textbox( label="Prompt", info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'." ) audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)") with gr.Accordion("Advanced Settings", open=False): duration_seconds_input = gr.Slider( minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2, label="Video Duration (I2V only)", info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames." ) negative_prompt_input = gr.Textbox( label="Negative Prompt", value=wan_manager.default_negative_prompt, # Pode usar o mesmo default lines=3 ) steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps") guidance_scale_input = gr.Slider( minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale" ) guidance_scale_2_input = gr.Slider( minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)" ) with gr.Row(): seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True) randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True) generate_button = gr.Button("Generate Video", variant="primary") with gr.Column(scale=1): output_video = gr.Video(label="Generated Video", autoplay=True) ui_inputs = [ start_image, start_frame_tb, handle_image, handle_frame_tb, handle_peso_sl, end_image, end_frame_tb, end_peso_sl, prompt, negative_prompt_input, duration_seconds_input, steps_slider, guidance_scale_input, guidance_scale_2_input, seed_input, randomize_seed_checkbox, audio_input, ] ui_outputs = [output_video, seed_input] generate_button.click(fn=ui_generate_video, inputs=ui_inputs, outputs=ui_outputs) if __name__ == "__main__": app.launch(server_name="0.0.0.0", server_port=7860, show_error=True)