Update app.py
Browse files
app.py
CHANGED
|
@@ -580,42 +580,36 @@ combined_examples = [
|
|
| 580 |
|
| 581 |
def make_demo():
|
| 582 |
with gr.Blocks(analytics_enabled=False) as Interface:
|
| 583 |
-
# First row: Audio upload and Audio examples with adjusted ratio
|
| 584 |
gr.Markdown(
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
)
|
| 604 |
-
|
| 605 |
-
# gr.Markdown("""
|
| 606 |
-
# <h4 style="text-align: left;">
|
| 607 |
-
# This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
| 608 |
-
|
| 609 |
-
# Details of the low-quality mode:
|
| 610 |
-
# 1. Lower resolution.
|
| 611 |
-
# 2. More discontinuous frames (causing noticeable "frame jumps").
|
| 612 |
-
# 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
| 613 |
-
# 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
| 614 |
-
# 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
| 615 |
-
|
| 616 |
-
# Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
| 617 |
-
# </h4>
|
| 618 |
-
# """)
|
| 619 |
|
| 620 |
# Create a gallery with 5 videos
|
| 621 |
with gr.Row():
|
|
@@ -630,7 +624,15 @@ def make_demo():
|
|
| 630 |
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
| 631 |
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
| 632 |
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
| 633 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
|
| 635 |
with gr.Row():
|
| 636 |
with gr.Column(scale=4):
|
|
@@ -650,20 +652,20 @@ def make_demo():
|
|
| 650 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 651 |
gr.Markdown("""
|
| 652 |
<h4 style="text-align: left;">
|
| 653 |
-
|
| 654 |
<br>
|
| 655 |
-
|
| 656 |
<br>
|
| 657 |
-
|
| 658 |
<br>
|
| 659 |
-
|
| 660 |
<br>
|
| 661 |
-
|
| 662 |
<br>
|
| 663 |
-
|
| 664 |
<br>
|
| 665 |
<br>
|
| 666 |
-
|
| 667 |
</h4>
|
| 668 |
""")
|
| 669 |
|
|
@@ -701,17 +703,17 @@ def make_demo():
|
|
| 701 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
| 702 |
)
|
| 703 |
|
| 704 |
-
with gr.Row():
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
|
| 716 |
return Interface
|
| 717 |
|
|
|
|
| 580 |
|
| 581 |
def make_demo():
|
| 582 |
with gr.Blocks(analytics_enabled=False) as Interface:
|
|
|
|
| 583 |
gr.Markdown(
|
| 584 |
+
"""
|
| 585 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 586 |
+
<div>
|
| 587 |
+
<h1>TANGO</h1>
|
| 588 |
+
<span>Generating full-body talking videos from audio and reference video</span>
|
| 589 |
+
<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
|
| 590 |
+
<a href='https://h-liu1997.github.io/'>Haiyang Liu</a>, \
|
| 591 |
+
<a href='https://yangxingchao.github.io/'>Xingchao Yang</a>, \
|
| 592 |
+
<a href=''>Tomoya Akiyama</a>, \
|
| 593 |
+
<a href='https://sky24h.github.io/'> Yuantian Huang</a>, \
|
| 594 |
+
<a href=''>Qiaoge Li</a>, \
|
| 595 |
+
<a href='https://www.tut.ac.jp/english/university/faculty/cs/164.html'>Shigeru Kuriyama</a>, \
|
| 596 |
+
<a href='https://taketomitakafumi.sakura.ne.jp/web/en/'>Takafumi Taketomi</a>\
|
| 597 |
+
</h2>
|
| 598 |
+
<br>
|
| 599 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 600 |
+
<a href="https://arxiv.org/abs/2410.04221"><img src="https://img.shields.io/badge/arXiv-2410.04221-blue"></a>
|
| 601 |
+
|
| 602 |
+
<a href="https://pantomatrix.github.io/TANGO/"><img src="https://img.shields.io/badge/Project_Page-TANGO-orange" alt="Project Page"></a>
|
| 603 |
+
|
| 604 |
+
<a href="https://github.com/CyberAgentAILab/TANGO"><img src="https://img.shields.io/badge/Github-Code-green"></a>
|
| 605 |
+
|
| 606 |
+
<a href="https://github.com/CyberAgentAILab/TANGO"><img src="https://img.shields.io/github/stars/CyberAgentAILab/TANGO
|
| 607 |
+
"></a>
|
| 608 |
+
</div>
|
| 609 |
+
</div>
|
| 610 |
+
</div>
|
| 611 |
+
"""
|
| 612 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
# Create a gallery with 5 videos
|
| 615 |
with gr.Row():
|
|
|
|
| 624 |
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
| 625 |
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
| 626 |
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
| 627 |
+
|
| 628 |
+
with gr.Row():
|
| 629 |
+
gr.Markdown(
|
| 630 |
+
"""
|
| 631 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 632 |
+
This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
|
| 633 |
+
</div>
|
| 634 |
+
"""
|
| 635 |
+
)
|
| 636 |
|
| 637 |
with gr.Row():
|
| 638 |
with gr.Column(scale=4):
|
|
|
|
| 652 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 653 |
gr.Markdown("""
|
| 654 |
<h4 style="text-align: left;">
|
| 655 |
+
Details of the low-quality mode:
|
| 656 |
<br>
|
| 657 |
+
1. Lower resolution.
|
| 658 |
<br>
|
| 659 |
+
2. More discontinuous graph nodes (causing noticeable "frame jumps").
|
| 660 |
<br>
|
| 661 |
+
3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
| 662 |
<br>
|
| 663 |
+
4. only use first 8 seconds of your input audio.
|
| 664 |
<br>
|
| 665 |
+
5. custom character for a video up to 10 seconds.
|
| 666 |
<br>
|
| 667 |
<br>
|
| 668 |
+
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
| 669 |
</h4>
|
| 670 |
""")
|
| 671 |
|
|
|
|
| 703 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
| 704 |
)
|
| 705 |
|
| 706 |
+
# with gr.Row():
|
| 707 |
+
# with gr.Column(scale=4):
|
| 708 |
+
# print(combined_examples)
|
| 709 |
+
# gr.Examples(
|
| 710 |
+
# examples=combined_examples,
|
| 711 |
+
# inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
|
| 712 |
+
# outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
|
| 713 |
+
# fn=tango, # Function that processes both audio and video inputs
|
| 714 |
+
# label="Select Combined Audio and Video Examples (Cached)",
|
| 715 |
+
# cache_examples=True
|
| 716 |
+
# )
|
| 717 |
|
| 718 |
return Interface
|
| 719 |
|