Aduc-sdr-2_5

Paused

App Files Files Community

Aduc-sdr-2_5 / app_wan.py

x2XcarleX2x

Update app_wan.py

8902d93 verified about 1 month ago

raw

history blame contribute delete

7.11 kB

	# app_wan.py

	import os
	import gradio as gr
	import tempfile
	import numpy as np
	from PIL import Image

	# === Constantes ===
	MAX_SEED = np.iinfo(np.int32).max
	FIXED_FPS = 16
	MIN_FRAMES_MODEL = 8
	MAX_FRAMES_MODEL = 81
	MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
	MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)

	# === Importa os serviços de geração (managers) ===
	from aduc_framework.managers.wan_manager import WanManager
	from aduc_framework.managers.wan_manager_s2v import WanManagerS2V

	print("Initializing managers...")
	wan_manager = WanManager()
	wan_manager_s2v = WanManagerS2V()
	print("Managers initialized.")

	# === Wrapper da UI para o Serviço ===
	def ui_generate_video(
	start_image_pil,
	start_frame_text,
	handle_image_pil,
	handle_frame_text,
	handle_peso,
	end_image_pil,
	end_frame_text,
	end_peso,
	prompt,
	negative_prompt,
	duration_seconds,
	steps,
	guidance_scale,
	guidance_scale_2, # Usado apenas no I2V
	seed,
	randomize_seed,
	audio_path,
	progress=gr.Progress(track_tqdm=True),
	):
	# <<< LÓGICA DE DIRECIONAMENTO >>>
	if audio_path and os.path.exists(audio_path):
	print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.")
	video_path, current_seed = wan_manager_s2v.generate_video(
	start_image=start_image_pil,
	audio_path=audio_path,
	prompt=prompt,
	negative_prompt=negative_prompt,
	steps=int(steps),
	guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale
	seed=int(seed),
	randomize_seed=bool(randomize_seed),
	)
	else:
	print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.")
	def to_int_safe(v, default=0):
	try: return int(v)
	except: return default
	def to_float_safe(v, default=1.0):
	try: return float(v)
	except: return default

	# Prepara a lista de imagens de condição para o I2V
	start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
	items = [start_item]
	if handle_image_pil is not None:
	items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
	items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])

	video_path, current_seed = wan_manager.generate_video_from_conditions(
	images_condition_items=items,
	prompt=prompt,
	negative_prompt=negative_prompt,
	duration_seconds=float(duration_seconds),
	steps=int(steps),
	guidance_scale=float(guidance_scale),
	guidance_scale_2=float(guidance_scale_2),
	seed=int(seed),
	randomize_seed=bool(randomize_seed),
	)

	return video_path, current_seed

	# === Interface Gradio ===
	# ... (o restante da UI permanece o mesmo, pois os inputs já estão lá)
	css = '''
	.fillable{max-width: 1100px !important}
	.dark .progress-text {color: white}
	#general_items{margin-top: 2em}
	'''

	with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
	gr.Markdown("# Wan 2.2 Aduca-SDR")
	gr.Markdown("Forneça um arquivo de áudio para usar o modo Speech-to-Video. Deixe em branco para usar o modo Image-to-Video (interpolação).")


	with gr.Row(elem_id="general_items"):
	with gr.Column(scale=2):
	with gr.Group():
	with gr.Row():
	with gr.Column():
	start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
	start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)

	with gr.Column():
	handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"])
	handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
	handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")

	with gr.Column():
	end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"])
	end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
	end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")

	prompt = gr.Textbox(
	label="Prompt",
	info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'."
	)

	audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)")

	with gr.Accordion("Advanced Settings", open=False):
	duration_seconds_input = gr.Slider(
	minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
	label="Video Duration (I2V only)",
	info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
	)
	negative_prompt_input = gr.Textbox(
	label="Negative Prompt",
	value=wan_manager.default_negative_prompt, # Pode usar o mesmo default
	lines=3
	)
	steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps")
	guidance_scale_input = gr.Slider(
	minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale"
	)
	guidance_scale_2_input = gr.Slider(
	minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)"
	)
	with gr.Row():
	seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
	randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)

	generate_button = gr.Button("Generate Video", variant="primary")

	with gr.Column(scale=1):
	output_video = gr.Video(label="Generated Video", autoplay=True)

	ui_inputs = [
	start_image, start_frame_tb,
	handle_image, handle_frame_tb, handle_peso_sl,
	end_image, end_frame_tb, end_peso_sl,
	prompt, negative_prompt_input, duration_seconds_input,
	steps_slider, guidance_scale_input, guidance_scale_2_input,
	seed_input, randomize_seed_checkbox,
	audio_input,
	]
	ui_outputs = [output_video, seed_input]

	generate_button.click(fn=ui_generate_video, inputs=ui_inputs, outputs=ui_outputs)

	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0", server_port=7860, show_error=True)