Spaces:
Paused
Paused
| # app_wan.py | |
| import os | |
| import gradio as gr | |
| import tempfile | |
| import numpy as np | |
| from PIL import Image | |
| # === Constantes === | |
| MAX_SEED = np.iinfo(np.int32).max | |
| FIXED_FPS = 16 | |
| MIN_FRAMES_MODEL = 8 | |
| MAX_FRAMES_MODEL = 81 | |
| MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1) | |
| MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1) | |
| # === Importa os serviços de geração (managers) === | |
| from aduc_framework.managers.wan_manager import WanManager | |
| from aduc_framework.managers.wan_manager_s2v import WanManagerS2V | |
| print("Initializing managers...") | |
| wan_manager = WanManager() | |
| wan_manager_s2v = WanManagerS2V() | |
| print("Managers initialized.") | |
| # === Wrapper da UI para o Serviço === | |
| def ui_generate_video( | |
| start_image_pil, | |
| start_frame_text, | |
| handle_image_pil, | |
| handle_frame_text, | |
| handle_peso, | |
| end_image_pil, | |
| end_frame_text, | |
| end_peso, | |
| prompt, | |
| negative_prompt, | |
| duration_seconds, | |
| steps, | |
| guidance_scale, | |
| guidance_scale_2, # Usado apenas no I2V | |
| seed, | |
| randomize_seed, | |
| audio_path, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| # <<< LÓGICA DE DIRECIONAMENTO >>> | |
| if audio_path and os.path.exists(audio_path): | |
| print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.") | |
| video_path, current_seed = wan_manager_s2v.generate_video( | |
| start_image=start_image_pil, | |
| audio_path=audio_path, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| steps=int(steps), | |
| guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale | |
| seed=int(seed), | |
| randomize_seed=bool(randomize_seed), | |
| ) | |
| else: | |
| print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.") | |
| def to_int_safe(v, default=0): | |
| try: return int(v) | |
| except: return default | |
| def to_float_safe(v, default=1.0): | |
| try: return float(v) | |
| except: return default | |
| # Prepara a lista de imagens de condição para o I2V | |
| start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0] | |
| items = [start_item] | |
| if handle_image_pil is not None: | |
| items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)]) | |
| items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)]) | |
| video_path, current_seed = wan_manager.generate_video_from_conditions( | |
| images_condition_items=items, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| duration_seconds=float(duration_seconds), | |
| steps=int(steps), | |
| guidance_scale=float(guidance_scale), | |
| guidance_scale_2=float(guidance_scale_2), | |
| seed=int(seed), | |
| randomize_seed=bool(randomize_seed), | |
| ) | |
| return video_path, current_seed | |
| # === Interface Gradio === | |
| # ... (o restante da UI permanece o mesmo, pois os inputs já estão lá) | |
| css = ''' | |
| .fillable{max-width: 1100px !important} | |
| .dark .progress-text {color: white} | |
| #general_items{margin-top: 2em} | |
| ''' | |
| with gr.Blocks(theme=gr.themes.Glass(), css=css) as app: | |
| gr.Markdown("# Wan 2.2 Aduca-SDR") | |
| gr.Markdown("Forneça um arquivo de áudio para usar o modo **Speech-to-Video**. Deixe em branco para usar o modo **Image-to-Video** (interpolação).") | |
| with gr.Row(elem_id="general_items"): | |
| with gr.Column(scale=2): | |
| with gr.Group(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"]) | |
| start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False) | |
| with gr.Column(): | |
| handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"]) | |
| handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17") | |
| handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight") | |
| with gr.Column(): | |
| end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"]) | |
| end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False) | |
| end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight") | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'." | |
| ) | |
| audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| duration_seconds_input = gr.Slider( | |
| minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2, | |
| label="Video Duration (I2V only)", | |
| info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames." | |
| ) | |
| negative_prompt_input = gr.Textbox( | |
| label="Negative Prompt", | |
| value=wan_manager.default_negative_prompt, # Pode usar o mesmo default | |
| lines=3 | |
| ) | |
| steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps") | |
| guidance_scale_input = gr.Slider( | |
| minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale" | |
| ) | |
| guidance_scale_2_input = gr.Slider( | |
| minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)" | |
| ) | |
| with gr.Row(): | |
| seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True) | |
| randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True) | |
| generate_button = gr.Button("Generate Video", variant="primary") | |
| with gr.Column(scale=1): | |
| output_video = gr.Video(label="Generated Video", autoplay=True) | |
| ui_inputs = [ | |
| start_image, start_frame_tb, | |
| handle_image, handle_frame_tb, handle_peso_sl, | |
| end_image, end_frame_tb, end_peso_sl, | |
| prompt, negative_prompt_input, duration_seconds_input, | |
| steps_slider, guidance_scale_input, guidance_scale_2_input, | |
| seed_input, randomize_seed_checkbox, | |
| audio_input, | |
| ] | |
| ui_outputs = [output_video, seed_input] | |
| generate_button.click(fn=ui_generate_video, inputs=ui_inputs, outputs=ui_outputs) | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |