TANGO

Runtime error

App Files Files Community

H-Liu1997 commited on Oct 15, 2024

Commit

f9d911c

verified ·

1 Parent(s): a49013b

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -31

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ from decord import VideoReader
 from PIL import Image
 import copy
 import cv2
 import importlib
 import torch
@@ -349,7 +350,7 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
     res_motion = []
     counter = 0
     for path, is_continue in zip(path_list, is_continue_list):
-        if create_graph:
             # time is limited if we create graph on hugging face, lets skip blending.
             res_motion_current = path_visualization(
               graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
@@ -481,7 +482,7 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
         new_width = int(original_width * (max_length / original_height))
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_path, fourcc, fps, (new_width, new_height))
     frames_to_save = fps * 20
     frame_count = 0
@@ -498,6 +499,14 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
     cap.release()
     out.release()
 character_name_to_yaml = {
@@ -510,6 +519,7 @@ character_name_to_yaml = {
 @spaces.GPU(duration=200)
 def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
     cfg = prepare_all("./configs/gradio.yaml")
     cfg.seed = seed
     seed_everything(cfg.seed)
@@ -601,8 +611,8 @@ examples_video = [
 ]
 combined_examples = [
-    ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
-    ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
 ]
@@ -641,23 +651,29 @@ def make_demo():
         # Create a gallery with 5 videos
         with gr.Row():
-            video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
-            video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
-            video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
-            video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
-            video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
         with gr.Row():
-            video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
-            video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
-            video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
-            video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
-            video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
         with gr.Row():
             gr.Markdown(
               """
               <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
-              This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
               </div>
               """
             )
@@ -668,13 +684,15 @@ def make_demo():
                             interactive=False,
                             autoplay=False,
                             loop=False,
-                            show_share_button=True)
             with gr.Column(scale=4):
                 video_output_2 = gr.Video(label="Generated video - 2",
                             interactive=False,
                             autoplay=False,
                             loop=False,
-                            show_share_button=True)
             with gr.Column(scale=1):
                 file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
                 file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
@@ -682,8 +700,6 @@ def make_demo():
                 <div style="display: flex; justify-content: center; align-items: center; text-align: left;">
                 Details of the low-quality mode:
                 <br>
-                0. for free users, hugging face zero-gpu has quota, if you see "over quota", please try it later, e.g., after 30 mins. for saving your quota, this project is estimated to run around 120~160s. by the following trade-off.
-                <br>
                 1. lower resolution, video resized as long-side 512 and keep aspect ratio.
                 <br>
                 2. subgraph instead of full-graph, causing noticeable "frame jumps".
@@ -733,17 +749,16 @@ def make_demo():
             outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
         )
-        # with gr.Row():
-        #     with gr.Column(scale=4):
-        #         print(combined_examples)
-        #         gr.Examples(
-        #             examples=combined_examples,
-        #             inputs=[audio_input, video_input, seed_input],  # Both audio and video as inputs
-        #             outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
-        #             fn=tango,  # Function that processes both audio and video inputs
-        #             label="Select Combined Audio and Video Examples (Cached)",
-        #             cache_examples=True
-        #         )
     return Interface
@@ -752,4 +767,4 @@ if __name__ == "__main__":
     os.environ["MASTER_PORT"]='8675'
     demo = make_demo()
-    demo.launch(share=True)

 from PIL import Image
 import copy
 import cv2
+import subprocess
 import importlib
 import torch
     res_motion = []
     counter = 0
     for path, is_continue in zip(path_list, is_continue_list):
+        if False:
             # time is limited if we create graph on hugging face, lets skip blending.
             res_motion_current = path_visualization(
               graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
         new_width = int(original_width * (max_length / original_height))
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path.replace(".mp4", "_fps.mp4"), fourcc, fps, (new_width, new_height))
     frames_to_save = fps * 20
     frame_count = 0
     cap.release()
     out.release()
+    command = [
+        'ffmpeg',
+        '-i', output_path.replace(".mp4", "_fps.mp4"),
+        '-vf', 'minterpolate=fps=30:mi_mode=mci:mc_mode=aobmc:vsbmc=1',
+        output_path
+    ]
+    subprocess.run(command)
+    os.remove(output_path.replace(".mp4", "_fps.mp4"))
 character_name_to_yaml = {
 @spaces.GPU(duration=200)
 def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
+    os.system("rm -r ./outputs/")
     cfg = prepare_all("./configs/gradio.yaml")
     cfg.seed = seed
     seed_everything(cfg.seed)
 ]
 combined_examples = [
+    ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/female_test_V1.mp4", 2024],
+    # ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
 ]
         # Create a gallery with 5 videos
         with gr.Row():
+            video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0", watermark="./datasets/watermark.png")
+            video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1", watermark="./datasets/watermark.png")
+            video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2", watermark="./datasets/watermark.png")
+            video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3", watermark="./datasets/watermark.png")
+            video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4", watermark="./datasets/watermark.png")
         with gr.Row():
+            video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5", watermark="./datasets/watermark.png")
+            video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6", watermark="./datasets/watermark.png")
+            video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7", watermark="./datasets/watermark.png")
+            video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8", watermark="./datasets/watermark.png")
+            video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9", watermark="./datasets/watermark.png")
         with gr.Row():
             gr.Markdown(
               """
               <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+              This is an open-source project supported by Hugging Face's free L40S GPU. Runtime is limited, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
+              <br>
+              News:
+              <br>
+              [10/15]: Add watermark, fix bugs on custom character by downgrades to py3.9
+              <br>
+              [10/14]: Hugging face supports free L40S GPU for this project now!
               </div>
               """
             )
                             interactive=False,
                             autoplay=False,
                             loop=False,
+                            show_share_button=True,
+                            watermark="./datasets/watermark.png")
             with gr.Column(scale=4):
                 video_output_2 = gr.Video(label="Generated video - 2",
                             interactive=False,
                             autoplay=False,
                             loop=False,
+                            show_share_button=True,
+                            watermark="./datasets/watermark.png")
             with gr.Column(scale=1):
                 file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
                 file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
                 <div style="display: flex; justify-content: center; align-items: center; text-align: left;">
                 Details of the low-quality mode:
                 <br>
                 1. lower resolution, video resized as long-side 512 and keep aspect ratio.
                 <br>
                 2. subgraph instead of full-graph, causing noticeable "frame jumps".
             outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
         )
+        with gr.Row():
+            with gr.Column(scale=4):
+                gr.Examples(
+                    examples=combined_examples,
+                    inputs=[audio_input, video_input, seed_input],  # Both audio and video as inputs
+                    outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
+                    fn=tango,  # Function that processes both audio and video inputs
+                    label="Select Combined Audio and Video Examples (Cached)",
+                    cache_examples=True
+                )
     return Interface
     os.environ["MASTER_PORT"]='8675'
     demo = make_demo()
+    demo.launch(share=True)