Update app.py
Browse files
app.py
CHANGED
|
@@ -482,32 +482,9 @@ character_name_to_yaml = {
|
|
| 482 |
"101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
|
| 483 |
}
|
| 484 |
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
"./emage/smplx_models/",
|
| 489 |
-
model_type='smplx',
|
| 490 |
-
gender='NEUTRAL_2020',
|
| 491 |
-
use_face_contour=False,
|
| 492 |
-
num_betas=300,
|
| 493 |
-
num_expression_coeffs=100,
|
| 494 |
-
ext='npz',
|
| 495 |
-
use_pca=False,
|
| 496 |
-
)
|
| 497 |
-
model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
|
| 498 |
-
for param in model.parameters():
|
| 499 |
-
param.requires_grad = False
|
| 500 |
-
model.smplx_model = smplx_model
|
| 501 |
-
model.get_motion_reps = get_motion_reps_tensor
|
| 502 |
-
|
| 503 |
-
checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
|
| 504 |
-
checkpoint = torch.load(checkpoint_path)
|
| 505 |
-
state_dict = checkpoint['model_state_dict']
|
| 506 |
-
# new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
|
| 507 |
-
model.load_state_dict(state_dict, strict=False)
|
| 508 |
-
|
| 509 |
-
@spaces.GPU(duration=299)
|
| 510 |
-
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
|
| 511 |
cfg.seed = seed
|
| 512 |
seed_everything(cfg.seed)
|
| 513 |
experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
|
|
@@ -542,13 +519,35 @@ def tango(audio_path, character_name, seed, create_graph=False, video_folder_pat
|
|
| 542 |
os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
|
| 543 |
cfg.data.test_meta_paths = json_save_path
|
| 544 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
local_rank = 0
|
| 546 |
torch.cuda.set_device(local_rank)
|
| 547 |
device = torch.device("cuda", local_rank)
|
| 548 |
-
|
| 549 |
smplx_model = smplx_model.to(device).eval()
|
| 550 |
model = model.to(device)
|
| 551 |
model.smplx_model = model.smplx_model.to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
|
| 554 |
os.makedirs(test_path, exist_ok=True)
|
|
@@ -572,7 +571,11 @@ examples_video = [
|
|
| 572 |
]
|
| 573 |
|
| 574 |
combined_examples = [
|
| 575 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
]
|
| 577 |
|
| 578 |
def make_demo():
|
|
@@ -594,31 +597,39 @@ def make_demo():
|
|
| 594 |
<a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
|
| 595 |
<a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
|
| 596 |
<a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
|
|
|
|
|
|
|
| 597 |
"""
|
| 598 |
)
|
| 599 |
|
| 600 |
-
gr.Markdown("""
|
| 601 |
-
<h4 style="text-align: left;">
|
| 602 |
-
This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
| 603 |
|
| 604 |
-
Details of the low-quality mode:
|
| 605 |
-
1. Lower resolution.
|
| 606 |
-
2. More discontinuous frames (causing noticeable "frame jumps").
|
| 607 |
-
3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
| 608 |
-
4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
| 609 |
-
5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
| 610 |
|
| 611 |
-
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
| 612 |
-
</h4>
|
| 613 |
-
""")
|
| 614 |
|
| 615 |
# Create a gallery with 5 videos
|
| 616 |
with gr.Row():
|
| 617 |
-
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo
|
| 618 |
-
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo
|
| 619 |
-
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo
|
| 620 |
-
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo
|
| 621 |
-
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
|
| 624 |
with gr.Row():
|
|
@@ -635,12 +646,31 @@ def make_demo():
|
|
| 635 |
loop=False,
|
| 636 |
show_share_button=True)
|
| 637 |
with gr.Column(scale=1):
|
| 638 |
-
file_output_1 = gr.File(label="Download Motion and Visualize in Blender")
|
| 639 |
-
file_output_2 = gr.File(label="Download Motion and Visualize in Blender")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
with gr.Row():
|
| 642 |
with gr.Column(scale=1):
|
| 643 |
audio_input = gr.Audio(label="Upload your audio")
|
|
|
|
| 644 |
with gr.Column(scale=2):
|
| 645 |
gr.Examples(
|
| 646 |
examples=examples_audio,
|
|
@@ -659,9 +689,7 @@ def make_demo():
|
|
| 659 |
label="Character Examples",
|
| 660 |
cache_examples=False
|
| 661 |
)
|
| 662 |
-
|
| 663 |
-
seed_input = gr.Number(label="Seed", value=2024, interactive=True)
|
| 664 |
-
|
| 665 |
# Fourth row: Generate video button
|
| 666 |
with gr.Row():
|
| 667 |
run_button = gr.Button("Generate Video")
|
|
|
|
| 482 |
"101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
|
| 483 |
}
|
| 484 |
|
| 485 |
+
@spaces.GPU(duration=240)
|
| 486 |
+
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
|
| 487 |
+
cfg = prepare_all("./configs/gradio.yaml")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
cfg.seed = seed
|
| 489 |
seed_everything(cfg.seed)
|
| 490 |
experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
|
|
|
|
| 519 |
os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
|
| 520 |
cfg.data.test_meta_paths = json_save_path
|
| 521 |
|
| 522 |
+
smplx_model = smplx.create(
|
| 523 |
+
"./emage/smplx_models/",
|
| 524 |
+
model_type='smplx',
|
| 525 |
+
gender='NEUTRAL_2020',
|
| 526 |
+
use_face_contour=False,
|
| 527 |
+
num_betas=300,
|
| 528 |
+
num_expression_coeffs=100,
|
| 529 |
+
ext='npz',
|
| 530 |
+
use_pca=False,
|
| 531 |
+
)
|
| 532 |
+
model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
|
| 533 |
+
for param in model.parameters():
|
| 534 |
+
param.requires_grad = False
|
| 535 |
+
model.smplx_model = smplx_model
|
| 536 |
+
model.get_motion_reps = get_motion_reps_tensor
|
| 537 |
+
|
| 538 |
local_rank = 0
|
| 539 |
torch.cuda.set_device(local_rank)
|
| 540 |
device = torch.device("cuda", local_rank)
|
| 541 |
+
|
| 542 |
smplx_model = smplx_model.to(device).eval()
|
| 543 |
model = model.to(device)
|
| 544 |
model.smplx_model = model.smplx_model.to(device)
|
| 545 |
+
|
| 546 |
+
checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
|
| 547 |
+
checkpoint = torch.load(checkpoint_path)
|
| 548 |
+
state_dict = checkpoint['model_state_dict']
|
| 549 |
+
new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
|
| 550 |
+
model.load_state_dict(new_state_dict, strict=False)
|
| 551 |
|
| 552 |
test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
|
| 553 |
os.makedirs(test_path, exist_ok=True)
|
|
|
|
| 571 |
]
|
| 572 |
|
| 573 |
combined_examples = [
|
| 574 |
+
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker9_o7Ik1OB4TaE_00-00-38.15_00-00-42.33.mp4", 2024],
|
| 575 |
+
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker7_iuYlGRnC7J8_00-00-0.00_00-00-3.25.mp4", 2024],
|
| 576 |
+
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
| 577 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/1wrQ6Msp7wM_00-00-39.69_00-00-45.68.mp4", 2024],
|
| 578 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/speaker8_jjRWaMCWs44_00-00-30.16_00-00-33.32.mp4", 2024],
|
| 579 |
]
|
| 580 |
|
| 581 |
def make_demo():
|
|
|
|
| 597 |
<a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
|
| 598 |
<a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
|
| 599 |
<a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
|
| 600 |
+
</h2> \
|
| 601 |
+
<a style='font-size:18px;color: #000000'>This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some high-quality mode results are shown below. </a> </div>
|
| 602 |
"""
|
| 603 |
)
|
| 604 |
|
| 605 |
+
# gr.Markdown("""
|
| 606 |
+
# <h4 style="text-align: left;">
|
| 607 |
+
# This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
| 608 |
|
| 609 |
+
# Details of the low-quality mode:
|
| 610 |
+
# 1. Lower resolution.
|
| 611 |
+
# 2. More discontinuous frames (causing noticeable "frame jumps").
|
| 612 |
+
# 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
| 613 |
+
# 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
| 614 |
+
# 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
| 615 |
|
| 616 |
+
# Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
| 617 |
+
# </h4>
|
| 618 |
+
# """)
|
| 619 |
|
| 620 |
# Create a gallery with 5 videos
|
| 621 |
with gr.Row():
|
| 622 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
|
| 623 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
|
| 624 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
|
| 625 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
|
| 626 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
|
| 627 |
+
with gr.Row():
|
| 628 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
|
| 629 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
|
| 630 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
| 631 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
| 632 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
| 633 |
|
| 634 |
|
| 635 |
with gr.Row():
|
|
|
|
| 646 |
loop=False,
|
| 647 |
show_share_button=True)
|
| 648 |
with gr.Column(scale=1):
|
| 649 |
+
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 650 |
+
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 651 |
+
gr.Markdown("""
|
| 652 |
+
<h4 style="text-align: left;">
|
| 653 |
+
<a style='font-size:18px;color: #000000'> Details of the low-quality mode: </a>
|
| 654 |
+
<br>
|
| 655 |
+
<a style='font-size:18px;color: #000000'> 1. Lower resolution.</a>
|
| 656 |
+
<br>
|
| 657 |
+
<a style='font-size:18px;color: #000000'> 2. More discontinuous graph nodes (causing noticeable "frame jumps"). </a>
|
| 658 |
+
<br>
|
| 659 |
+
<a style='font-size:18px;color: #000000'> 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing. </a>
|
| 660 |
+
<br>
|
| 661 |
+
<a style='font-size:18px;color: #000000'> 4. only use first 8 seconds of your input audio.</a>
|
| 662 |
+
<br>
|
| 663 |
+
<a style='font-size:18px;color: #000000'> 5. custom character for a video up to 10 seconds. </a>
|
| 664 |
+
<br>
|
| 665 |
+
<br>
|
| 666 |
+
<a style='font-size:18px;color: #000000'> Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.</a>
|
| 667 |
+
</h4>
|
| 668 |
+
""")
|
| 669 |
|
| 670 |
with gr.Row():
|
| 671 |
with gr.Column(scale=1):
|
| 672 |
audio_input = gr.Audio(label="Upload your audio")
|
| 673 |
+
seed_input = gr.Number(label="Seed", value=2024, interactive=True)
|
| 674 |
with gr.Column(scale=2):
|
| 675 |
gr.Examples(
|
| 676 |
examples=examples_audio,
|
|
|
|
| 689 |
label="Character Examples",
|
| 690 |
cache_examples=False
|
| 691 |
)
|
| 692 |
+
|
|
|
|
|
|
|
| 693 |
# Fourth row: Generate video button
|
| 694 |
with gr.Row():
|
| 695 |
run_button = gr.Button("Generate Video")
|