Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,6 +22,7 @@ from decord import VideoReader
|
|
| 22 |
from PIL import Image
|
| 23 |
import copy
|
| 24 |
import cv2
|
|
|
|
| 25 |
|
| 26 |
import importlib
|
| 27 |
import torch
|
|
@@ -349,7 +350,7 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
| 349 |
res_motion = []
|
| 350 |
counter = 0
|
| 351 |
for path, is_continue in zip(path_list, is_continue_list):
|
| 352 |
-
if
|
| 353 |
# time is limited if we create graph on hugging face, lets skip blending.
|
| 354 |
res_motion_current = path_visualization(
|
| 355 |
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
|
|
@@ -481,7 +482,7 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
|
|
| 481 |
new_width = int(original_width * (max_length / original_height))
|
| 482 |
|
| 483 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 484 |
-
out = cv2.VideoWriter(output_path, fourcc, fps, (new_width, new_height))
|
| 485 |
|
| 486 |
frames_to_save = fps * 20
|
| 487 |
frame_count = 0
|
|
@@ -498,6 +499,14 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
|
|
| 498 |
|
| 499 |
cap.release()
|
| 500 |
out.release()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
|
| 502 |
|
| 503 |
character_name_to_yaml = {
|
|
@@ -510,6 +519,7 @@ character_name_to_yaml = {
|
|
| 510 |
|
| 511 |
@spaces.GPU(duration=200)
|
| 512 |
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
|
|
|
|
| 513 |
cfg = prepare_all("./configs/gradio.yaml")
|
| 514 |
cfg.seed = seed
|
| 515 |
seed_everything(cfg.seed)
|
|
@@ -601,8 +611,8 @@ examples_video = [
|
|
| 601 |
]
|
| 602 |
|
| 603 |
combined_examples = [
|
| 604 |
-
["./datasets/cached_audio/
|
| 605 |
-
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
| 606 |
]
|
| 607 |
|
| 608 |
|
|
@@ -641,23 +651,29 @@ def make_demo():
|
|
| 641 |
|
| 642 |
# Create a gallery with 5 videos
|
| 643 |
with gr.Row():
|
| 644 |
-
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
|
| 645 |
-
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
|
| 646 |
-
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
|
| 647 |
-
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
|
| 648 |
-
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
|
| 649 |
with gr.Row():
|
| 650 |
-
video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
|
| 651 |
-
video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
|
| 652 |
-
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
| 653 |
-
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
| 654 |
-
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
| 655 |
|
| 656 |
with gr.Row():
|
| 657 |
gr.Markdown(
|
| 658 |
"""
|
| 659 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 660 |
-
This is an open-source project supported by Hugging Face's free
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
</div>
|
| 662 |
"""
|
| 663 |
)
|
|
@@ -668,13 +684,15 @@ def make_demo():
|
|
| 668 |
interactive=False,
|
| 669 |
autoplay=False,
|
| 670 |
loop=False,
|
| 671 |
-
show_share_button=True
|
|
|
|
| 672 |
with gr.Column(scale=4):
|
| 673 |
video_output_2 = gr.Video(label="Generated video - 2",
|
| 674 |
interactive=False,
|
| 675 |
autoplay=False,
|
| 676 |
loop=False,
|
| 677 |
-
show_share_button=True
|
|
|
|
| 678 |
with gr.Column(scale=1):
|
| 679 |
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 680 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
|
@@ -682,8 +700,6 @@ def make_demo():
|
|
| 682 |
<div style="display: flex; justify-content: center; align-items: center; text-align: left;">
|
| 683 |
Details of the low-quality mode:
|
| 684 |
<br>
|
| 685 |
-
0. for free users, hugging face zero-gpu has quota, if you see "over quota", please try it later, e.g., after 30 mins. for saving your quota, this project is estimated to run around 120~160s. by the following trade-off.
|
| 686 |
-
<br>
|
| 687 |
1. lower resolution, video resized as long-side 512 and keep aspect ratio.
|
| 688 |
<br>
|
| 689 |
2. subgraph instead of full-graph, causing noticeable "frame jumps".
|
|
@@ -733,17 +749,16 @@ def make_demo():
|
|
| 733 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
| 734 |
)
|
| 735 |
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
# )
|
| 747 |
|
| 748 |
return Interface
|
| 749 |
|
|
@@ -752,4 +767,4 @@ if __name__ == "__main__":
|
|
| 752 |
os.environ["MASTER_PORT"]='8675'
|
| 753 |
|
| 754 |
demo = make_demo()
|
| 755 |
-
demo.launch(share=True)
|
|
|
|
| 22 |
from PIL import Image
|
| 23 |
import copy
|
| 24 |
import cv2
|
| 25 |
+
import subprocess
|
| 26 |
|
| 27 |
import importlib
|
| 28 |
import torch
|
|
|
|
| 350 |
res_motion = []
|
| 351 |
counter = 0
|
| 352 |
for path, is_continue in zip(path_list, is_continue_list):
|
| 353 |
+
if False:
|
| 354 |
# time is limited if we create graph on hugging face, lets skip blending.
|
| 355 |
res_motion_current = path_visualization(
|
| 356 |
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
|
|
|
|
| 482 |
new_width = int(original_width * (max_length / original_height))
|
| 483 |
|
| 484 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 485 |
+
out = cv2.VideoWriter(output_path.replace(".mp4", "_fps.mp4"), fourcc, fps, (new_width, new_height))
|
| 486 |
|
| 487 |
frames_to_save = fps * 20
|
| 488 |
frame_count = 0
|
|
|
|
| 499 |
|
| 500 |
cap.release()
|
| 501 |
out.release()
|
| 502 |
+
command = [
|
| 503 |
+
'ffmpeg',
|
| 504 |
+
'-i', output_path.replace(".mp4", "_fps.mp4"),
|
| 505 |
+
'-vf', 'minterpolate=fps=30:mi_mode=mci:mc_mode=aobmc:vsbmc=1',
|
| 506 |
+
output_path
|
| 507 |
+
]
|
| 508 |
+
subprocess.run(command)
|
| 509 |
+
os.remove(output_path.replace(".mp4", "_fps.mp4"))
|
| 510 |
|
| 511 |
|
| 512 |
character_name_to_yaml = {
|
|
|
|
| 519 |
|
| 520 |
@spaces.GPU(duration=200)
|
| 521 |
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
|
| 522 |
+
os.system("rm -r ./outputs/")
|
| 523 |
cfg = prepare_all("./configs/gradio.yaml")
|
| 524 |
cfg.seed = seed
|
| 525 |
seed_everything(cfg.seed)
|
|
|
|
| 611 |
]
|
| 612 |
|
| 613 |
combined_examples = [
|
| 614 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/female_test_V1.mp4", 2024],
|
| 615 |
+
# ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
| 616 |
]
|
| 617 |
|
| 618 |
|
|
|
|
| 651 |
|
| 652 |
# Create a gallery with 5 videos
|
| 653 |
with gr.Row():
|
| 654 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0", watermark="./datasets/watermark.png")
|
| 655 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1", watermark="./datasets/watermark.png")
|
| 656 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2", watermark="./datasets/watermark.png")
|
| 657 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3", watermark="./datasets/watermark.png")
|
| 658 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4", watermark="./datasets/watermark.png")
|
| 659 |
with gr.Row():
|
| 660 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5", watermark="./datasets/watermark.png")
|
| 661 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6", watermark="./datasets/watermark.png")
|
| 662 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7", watermark="./datasets/watermark.png")
|
| 663 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8", watermark="./datasets/watermark.png")
|
| 664 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9", watermark="./datasets/watermark.png")
|
| 665 |
|
| 666 |
with gr.Row():
|
| 667 |
gr.Markdown(
|
| 668 |
"""
|
| 669 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 670 |
+
This is an open-source project supported by Hugging Face's free L40S GPU. Runtime is limited, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
|
| 671 |
+
<br>
|
| 672 |
+
News:
|
| 673 |
+
<br>
|
| 674 |
+
[10/15]: Add watermark, fix bugs on custom character by downgrades to py3.9
|
| 675 |
+
<br>
|
| 676 |
+
[10/14]: Hugging face supports free L40S GPU for this project now!
|
| 677 |
</div>
|
| 678 |
"""
|
| 679 |
)
|
|
|
|
| 684 |
interactive=False,
|
| 685 |
autoplay=False,
|
| 686 |
loop=False,
|
| 687 |
+
show_share_button=True,
|
| 688 |
+
watermark="./datasets/watermark.png")
|
| 689 |
with gr.Column(scale=4):
|
| 690 |
video_output_2 = gr.Video(label="Generated video - 2",
|
| 691 |
interactive=False,
|
| 692 |
autoplay=False,
|
| 693 |
loop=False,
|
| 694 |
+
show_share_button=True,
|
| 695 |
+
watermark="./datasets/watermark.png")
|
| 696 |
with gr.Column(scale=1):
|
| 697 |
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 698 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
|
|
|
| 700 |
<div style="display: flex; justify-content: center; align-items: center; text-align: left;">
|
| 701 |
Details of the low-quality mode:
|
| 702 |
<br>
|
|
|
|
|
|
|
| 703 |
1. lower resolution, video resized as long-side 512 and keep aspect ratio.
|
| 704 |
<br>
|
| 705 |
2. subgraph instead of full-graph, causing noticeable "frame jumps".
|
|
|
|
| 749 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
| 750 |
)
|
| 751 |
|
| 752 |
+
with gr.Row():
|
| 753 |
+
with gr.Column(scale=4):
|
| 754 |
+
gr.Examples(
|
| 755 |
+
examples=combined_examples,
|
| 756 |
+
inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
|
| 757 |
+
outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
|
| 758 |
+
fn=tango, # Function that processes both audio and video inputs
|
| 759 |
+
label="Select Combined Audio and Video Examples (Cached)",
|
| 760 |
+
cache_examples=True
|
| 761 |
+
)
|
|
|
|
| 762 |
|
| 763 |
return Interface
|
| 764 |
|
|
|
|
| 767 |
os.environ["MASTER_PORT"]='8675'
|
| 768 |
|
| 769 |
demo = make_demo()
|
| 770 |
+
demo.launch(share=True)
|