Aduc-sdr-2_5 / app_wan.py
x2XcarleX2x's picture
Update app_wan.py
8902d93 verified
# app_wan.py
import os
import gradio as gr
import tempfile
import numpy as np
from PIL import Image
# === Constantes ===
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 16
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 81
MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
# === Importa os serviços de geração (managers) ===
from aduc_framework.managers.wan_manager import WanManager
from aduc_framework.managers.wan_manager_s2v import WanManagerS2V
print("Initializing managers...")
wan_manager = WanManager()
wan_manager_s2v = WanManagerS2V()
print("Managers initialized.")
# === Wrapper da UI para o Serviço ===
def ui_generate_video(
start_image_pil,
start_frame_text,
handle_image_pil,
handle_frame_text,
handle_peso,
end_image_pil,
end_frame_text,
end_peso,
prompt,
negative_prompt,
duration_seconds,
steps,
guidance_scale,
guidance_scale_2, # Usado apenas no I2V
seed,
randomize_seed,
audio_path,
progress=gr.Progress(track_tqdm=True),
):
# <<< LÓGICA DE DIRECIONAMENTO >>>
if audio_path and os.path.exists(audio_path):
print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.")
video_path, current_seed = wan_manager_s2v.generate_video(
start_image=start_image_pil,
audio_path=audio_path,
prompt=prompt,
negative_prompt=negative_prompt,
steps=int(steps),
guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale
seed=int(seed),
randomize_seed=bool(randomize_seed),
)
else:
print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.")
def to_int_safe(v, default=0):
try: return int(v)
except: return default
def to_float_safe(v, default=1.0):
try: return float(v)
except: return default
# Prepara a lista de imagens de condição para o I2V
start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
items = [start_item]
if handle_image_pil is not None:
items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])
video_path, current_seed = wan_manager.generate_video_from_conditions(
images_condition_items=items,
prompt=prompt,
negative_prompt=negative_prompt,
duration_seconds=float(duration_seconds),
steps=int(steps),
guidance_scale=float(guidance_scale),
guidance_scale_2=float(guidance_scale_2),
seed=int(seed),
randomize_seed=bool(randomize_seed),
)
return video_path, current_seed
# === Interface Gradio ===
# ... (o restante da UI permanece o mesmo, pois os inputs já estão lá)
css = '''
.fillable{max-width: 1100px !important}
.dark .progress-text {color: white}
#general_items{margin-top: 2em}
'''
with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
gr.Markdown("# Wan 2.2 Aduca-SDR")
gr.Markdown("Forneça um arquivo de áudio para usar o modo **Speech-to-Video**. Deixe em branco para usar o modo **Image-to-Video** (interpolação).")
with gr.Row(elem_id="general_items"):
with gr.Column(scale=2):
with gr.Group():
with gr.Row():
with gr.Column():
start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)
with gr.Column():
handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"])
handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")
with gr.Column():
end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"])
end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")
prompt = gr.Textbox(
label="Prompt",
info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'."
)
audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)")
with gr.Accordion("Advanced Settings", open=False):
duration_seconds_input = gr.Slider(
minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
label="Video Duration (I2V only)",
info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
)
negative_prompt_input = gr.Textbox(
label="Negative Prompt",
value=wan_manager.default_negative_prompt, # Pode usar o mesmo default
lines=3
)
steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps")
guidance_scale_input = gr.Slider(
minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale"
)
guidance_scale_2_input = gr.Slider(
minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)"
)
with gr.Row():
seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
generate_button = gr.Button("Generate Video", variant="primary")
with gr.Column(scale=1):
output_video = gr.Video(label="Generated Video", autoplay=True)
ui_inputs = [
start_image, start_frame_tb,
handle_image, handle_frame_tb, handle_peso_sl,
end_image, end_frame_tb, end_peso_sl,
prompt, negative_prompt_input, duration_seconds_input,
steps_slider, guidance_scale_input, guidance_scale_2_input,
seed_input, randomize_seed_checkbox,
audio_input,
]
ui_outputs = [output_video, seed_input]
generate_button.click(fn=ui_generate_video, inputs=ui_inputs, outputs=ui_outputs)
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860, show_error=True)