fixbugs
Browse files- SMPLer-X/app.py +2 -1
- app.py +75 -46
- create_graph.py +47 -6
- requirements.txt +3 -3
SMPLer-X/app.py
CHANGED
|
@@ -17,7 +17,8 @@ try:
|
|
| 17 |
except:
|
| 18 |
os.system('pip install ./main/transformer_utils')
|
| 19 |
# hf_hub_download(repo_id="caizhongang/SMPLer-X", filename="smpler_x_h32.pth.tar", local_dir="/home/user/app/pretrained_models")
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
def extract_frame_number(file_name):
|
| 23 |
match = re.search(r'(\d{5})', file_name)
|
|
|
|
| 17 |
except:
|
| 18 |
os.system('pip install ./main/transformer_utils')
|
| 19 |
# hf_hub_download(repo_id="caizhongang/SMPLer-X", filename="smpler_x_h32.pth.tar", local_dir="/home/user/app/pretrained_models")
|
| 20 |
+
# /home/user/.pyenv/versions/3.9.19/lib/python3.9/site-packages/torchgeometry/core/conversions.py
|
| 21 |
+
# os.system('cp -rf ./assets/conversions.py /content/myenv/lib/python3.10/site-packages/torchgeometry/core/conversions.py')
|
| 22 |
|
| 23 |
def extract_frame_number(file_name):
|
| 24 |
match = re.search(r'(\d{5})', file_name)
|
app.py
CHANGED
|
@@ -21,6 +21,7 @@ from datetime import datetime
|
|
| 21 |
from decord import VideoReader
|
| 22 |
from PIL import Image
|
| 23 |
import copy
|
|
|
|
| 24 |
|
| 25 |
import importlib
|
| 26 |
import torch
|
|
@@ -178,6 +179,7 @@ def search_path_dp(graph, audio_low_np, audio_high_np, loop_penalty=0.1, top_k=1
|
|
| 178 |
|
| 179 |
|
| 180 |
def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio_path, **kwargs):
|
|
|
|
| 181 |
torch.set_grad_enabled(False)
|
| 182 |
pool_path = candidate_json_path.replace("data_json", "cached_graph").replace(".json", ".pkl")
|
| 183 |
graph = igraph.Graph.Read_Pickle(fname=pool_path)
|
|
@@ -347,25 +349,25 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
| 347 |
res_motion = []
|
| 348 |
counter = 0
|
| 349 |
for path, is_continue in zip(path_list, is_continue_list):
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
|
| 370 |
audio_temp_path = audio_path
|
| 371 |
lipsync_output_path = os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4")
|
|
@@ -377,6 +379,17 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
| 377 |
|
| 378 |
start_node = path[1].index
|
| 379 |
end_node = start_node + 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
print(f"delete gt-nodes {start_node}, {end_node}")
|
| 381 |
nodes_to_delete = list(range(start_node, end_node))
|
| 382 |
graph.delete_vertices(nodes_to_delete)
|
|
@@ -385,9 +398,9 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
| 385 |
res_motion = []
|
| 386 |
counter = 1
|
| 387 |
for path, is_continue in zip(path_list, is_continue_list):
|
| 388 |
-
res_motion_current =
|
| 389 |
-
|
| 390 |
-
|
| 391 |
video_temp_path = os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4")
|
| 392 |
|
| 393 |
video_reader = VideoReader(video_temp_path)
|
|
@@ -397,7 +410,7 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
| 397 |
video_frame = video_reader[i].asnumpy()
|
| 398 |
video_np.append(Image.fromarray(video_frame))
|
| 399 |
adjusted_video_pil = adjust_statistics_to_match_reference([video_np])
|
| 400 |
-
save_videos_from_pil(adjusted_video_pil[0], os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), fps=
|
| 401 |
|
| 402 |
|
| 403 |
audio_temp_path = audio_path
|
|
@@ -446,28 +459,41 @@ def prepare_all(yaml_name):
|
|
| 446 |
return config
|
| 447 |
|
| 448 |
|
| 449 |
-
def save_first_10_seconds(video_path, output_path="./save_video.mp4"):
|
| 450 |
-
|
|
|
|
|
|
|
| 451 |
cap = cv2.VideoCapture(video_path)
|
| 452 |
|
| 453 |
if not cap.isOpened():
|
| 454 |
return
|
| 455 |
|
| 456 |
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
| 457 |
-
|
| 458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 461 |
-
out = cv2.VideoWriter(output_path, fourcc, fps, (
|
| 462 |
|
| 463 |
-
frames_to_save = fps *
|
| 464 |
frame_count = 0
|
| 465 |
|
| 466 |
while cap.isOpened() and frame_count < frames_to_save:
|
| 467 |
ret, frame = cap.read()
|
| 468 |
if not ret:
|
| 469 |
break
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
| 471 |
frame_count += 1
|
| 472 |
|
| 473 |
cap.release()
|
|
@@ -515,9 +541,13 @@ def tango(audio_path, character_name, seed, create_graph=False, video_folder_pat
|
|
| 515 |
data_save_path = "./outputs/tmpdata/"
|
| 516 |
json_save_path = "./outputs/save_video.json"
|
| 517 |
graph_save_path = "./outputs/save_video.pkl"
|
| 518 |
-
os.system(f"cd ./SMPLer-X/ && python app.py --video_folder_path {video_folder_path} --data_save_path {data_save_path} --json_save_path {json_save_path} && cd ..")
|
|
|
|
| 519 |
os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
|
| 520 |
cfg.data.test_meta_paths = json_save_path
|
|
|
|
|
|
|
|
|
|
| 521 |
|
| 522 |
smplx_model = smplx.create(
|
| 523 |
"./emage/smplx_models/",
|
|
@@ -551,7 +581,7 @@ def tango(audio_path, character_name, seed, create_graph=False, video_folder_pat
|
|
| 551 |
|
| 552 |
test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
|
| 553 |
os.makedirs(test_path, exist_ok=True)
|
| 554 |
-
result = test_fn(model, device, 0, cfg.data.test_meta_paths, test_path, cfg, audio_path)
|
| 555 |
gc.collect()
|
| 556 |
torch.cuda.empty_cache()
|
| 557 |
return result
|
|
@@ -571,13 +601,11 @@ examples_video = [
|
|
| 571 |
]
|
| 572 |
|
| 573 |
combined_examples = [
|
| 574 |
-
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker9_o7Ik1OB4TaE_00-00-38.15_00-00-42.33.mp4", 2024],
|
| 575 |
-
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker7_iuYlGRnC7J8_00-00-0.00_00-00-3.25.mp4", 2024],
|
| 576 |
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
| 577 |
-
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/
|
| 578 |
-
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/speaker8_jjRWaMCWs44_00-00-30.16_00-00-33.32.mp4", 2024],
|
| 579 |
]
|
| 580 |
|
|
|
|
| 581 |
def make_demo():
|
| 582 |
with gr.Blocks(analytics_enabled=False) as Interface:
|
| 583 |
gr.Markdown(
|
|
@@ -651,22 +679,24 @@ def make_demo():
|
|
| 651 |
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 652 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 653 |
gr.Markdown("""
|
| 654 |
-
<
|
| 655 |
Details of the low-quality mode:
|
| 656 |
<br>
|
| 657 |
-
|
|
|
|
|
|
|
| 658 |
<br>
|
| 659 |
-
2.
|
| 660 |
<br>
|
| 661 |
-
3.
|
| 662 |
<br>
|
| 663 |
-
4. only use first
|
| 664 |
<br>
|
| 665 |
-
5.
|
| 666 |
<br>
|
| 667 |
<br>
|
| 668 |
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
| 669 |
-
</
|
| 670 |
""")
|
| 671 |
|
| 672 |
with gr.Row():
|
|
@@ -720,7 +750,6 @@ def make_demo():
|
|
| 720 |
if __name__ == "__main__":
|
| 721 |
os.environ["MASTER_ADDR"]='127.0.0.1'
|
| 722 |
os.environ["MASTER_PORT"]='8675'
|
| 723 |
-
|
| 724 |
-
|
| 725 |
demo = make_demo()
|
| 726 |
-
demo.launch(share=True)
|
|
|
|
| 21 |
from decord import VideoReader
|
| 22 |
from PIL import Image
|
| 23 |
import copy
|
| 24 |
+
import cv2
|
| 25 |
|
| 26 |
import importlib
|
| 27 |
import torch
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio_path, **kwargs):
|
| 182 |
+
create_graph = kwargs["create_graph"]
|
| 183 |
torch.set_grad_enabled(False)
|
| 184 |
pool_path = candidate_json_path.replace("data_json", "cached_graph").replace(".json", ".pkl")
|
| 185 |
graph = igraph.Graph.Read_Pickle(fname=pool_path)
|
|
|
|
| 349 |
res_motion = []
|
| 350 |
counter = 0
|
| 351 |
for path, is_continue in zip(path_list, is_continue_list):
|
| 352 |
+
if create_graph:
|
| 353 |
+
# time is limited if we create graph on hugging face, lets skip blending.
|
| 354 |
+
res_motion_current = path_visualization(
|
| 355 |
+
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
|
| 356 |
+
)
|
| 357 |
+
video_temp_path = os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4")
|
| 358 |
+
else:
|
| 359 |
+
res_motion_current = path_visualization_v2(
|
| 360 |
+
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=None, return_motion=True, verbose_continue=True
|
| 361 |
+
)
|
| 362 |
+
video_temp_path = os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4")
|
| 363 |
+
video_reader = VideoReader(video_temp_path)
|
| 364 |
+
video_np = []
|
| 365 |
+
for i in range(len(video_reader)):
|
| 366 |
+
if i == 0: continue
|
| 367 |
+
video_frame = video_reader[i].asnumpy()
|
| 368 |
+
video_np.append(Image.fromarray(video_frame))
|
| 369 |
+
adjusted_video_pil = adjust_statistics_to_match_reference([video_np])
|
| 370 |
+
save_videos_from_pil(adjusted_video_pil[0], os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), fps=graph.vs[0]['fps'], bitrate=2000000)
|
| 371 |
|
| 372 |
audio_temp_path = audio_path
|
| 373 |
lipsync_output_path = os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4")
|
|
|
|
| 379 |
|
| 380 |
start_node = path[1].index
|
| 381 |
end_node = start_node + 100
|
| 382 |
+
|
| 383 |
+
if create_graph:
|
| 384 |
+
# time is limited if create graph, let us skip the second video
|
| 385 |
+
result = [
|
| 386 |
+
os.path.join(save_dir, f"audio_{idx}_retri_0.mp4"),
|
| 387 |
+
os.path.join(save_dir, f"audio_{idx}_retri_0.mp4"),
|
| 388 |
+
os.path.join(save_dir, f"audio_{idx}_retri_0.npz"),
|
| 389 |
+
os.path.join(save_dir, f"audio_{idx}_retri_0.npz")
|
| 390 |
+
]
|
| 391 |
+
return result
|
| 392 |
+
|
| 393 |
print(f"delete gt-nodes {start_node}, {end_node}")
|
| 394 |
nodes_to_delete = list(range(start_node, end_node))
|
| 395 |
graph.delete_vertices(nodes_to_delete)
|
|
|
|
| 398 |
res_motion = []
|
| 399 |
counter = 1
|
| 400 |
for path, is_continue in zip(path_list, is_continue_list):
|
| 401 |
+
res_motion_current = path_visualization_v2(
|
| 402 |
+
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=None, return_motion=True, verbose_continue=True
|
| 403 |
+
)
|
| 404 |
video_temp_path = os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4")
|
| 405 |
|
| 406 |
video_reader = VideoReader(video_temp_path)
|
|
|
|
| 410 |
video_frame = video_reader[i].asnumpy()
|
| 411 |
video_np.append(Image.fromarray(video_frame))
|
| 412 |
adjusted_video_pil = adjust_statistics_to_match_reference([video_np])
|
| 413 |
+
save_videos_from_pil(adjusted_video_pil[0], os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), fps=graph.vs[0]['fps'], bitrate=2000000)
|
| 414 |
|
| 415 |
|
| 416 |
audio_temp_path = audio_path
|
|
|
|
| 459 |
return config
|
| 460 |
|
| 461 |
|
| 462 |
+
def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length=512):
|
| 463 |
+
if os.path.exists(output_path):
|
| 464 |
+
os.remove(output_path)
|
| 465 |
+
|
| 466 |
cap = cv2.VideoCapture(video_path)
|
| 467 |
|
| 468 |
if not cap.isOpened():
|
| 469 |
return
|
| 470 |
|
| 471 |
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
| 472 |
+
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 473 |
+
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 474 |
+
|
| 475 |
+
# Calculate the aspect ratio and resize dimensions
|
| 476 |
+
if original_width >= original_height:
|
| 477 |
+
new_width = max_length
|
| 478 |
+
new_height = int(original_height * (max_length / original_width))
|
| 479 |
+
else:
|
| 480 |
+
new_height = max_length
|
| 481 |
+
new_width = int(original_width * (max_length / original_height))
|
| 482 |
|
| 483 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 484 |
+
out = cv2.VideoWriter(output_path, fourcc, fps, (new_width, new_height))
|
| 485 |
|
| 486 |
+
frames_to_save = fps * 20
|
| 487 |
frame_count = 0
|
| 488 |
|
| 489 |
while cap.isOpened() and frame_count < frames_to_save:
|
| 490 |
ret, frame = cap.read()
|
| 491 |
if not ret:
|
| 492 |
break
|
| 493 |
+
# Resize the frame while keeping the aspect ratio
|
| 494 |
+
resized_frame = cv2.resize(frame, (new_width, new_height))
|
| 495 |
+
# resized_frame = frame
|
| 496 |
+
out.write(resized_frame)
|
| 497 |
frame_count += 1
|
| 498 |
|
| 499 |
cap.release()
|
|
|
|
| 541 |
data_save_path = "./outputs/tmpdata/"
|
| 542 |
json_save_path = "./outputs/save_video.json"
|
| 543 |
graph_save_path = "./outputs/save_video.pkl"
|
| 544 |
+
os.system(f"cd ./SMPLer-X/ && python app.py --video_folder_path .{video_folder_path} --data_save_path .{data_save_path} --json_save_path .{json_save_path} && cd ..")
|
| 545 |
+
print(f"cd ./SMPLer-X/ && python app.py --video_folder_path .{video_folder_path} --data_save_path .{data_save_path} --json_save_path .{json_save_path} && cd ..")
|
| 546 |
os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
|
| 547 |
cfg.data.test_meta_paths = json_save_path
|
| 548 |
+
gc.collect()
|
| 549 |
+
torch.cuda.empty_cache()
|
| 550 |
+
|
| 551 |
|
| 552 |
smplx_model = smplx.create(
|
| 553 |
"./emage/smplx_models/",
|
|
|
|
| 581 |
|
| 582 |
test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
|
| 583 |
os.makedirs(test_path, exist_ok=True)
|
| 584 |
+
result = test_fn(model, device, 0, cfg.data.test_meta_paths, test_path, cfg, audio_path, create_graph=create_graph)
|
| 585 |
gc.collect()
|
| 586 |
torch.cuda.empty_cache()
|
| 587 |
return result
|
|
|
|
| 601 |
]
|
| 602 |
|
| 603 |
combined_examples = [
|
|
|
|
|
|
|
| 604 |
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
| 605 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
|
|
|
| 606 |
]
|
| 607 |
|
| 608 |
+
|
| 609 |
def make_demo():
|
| 610 |
with gr.Blocks(analytics_enabled=False) as Interface:
|
| 611 |
gr.Markdown(
|
|
|
|
| 679 |
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 680 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
| 681 |
gr.Markdown("""
|
| 682 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: left;">
|
| 683 |
Details of the low-quality mode:
|
| 684 |
<br>
|
| 685 |
+
0. for free users, hugging face zero-gpu has quota, if you see "over quota", please try it later, e.g., after 30 mins. for saving your quota, this project is estimated to run around 120~160s. by the following trade-off.
|
| 686 |
+
<br>
|
| 687 |
+
1. lower resolution, video resized as long-side 512 and keep aspect ratio.
|
| 688 |
<br>
|
| 689 |
+
2. subgraph instead of full-graph, causing noticeable "frame jumps".
|
| 690 |
<br>
|
| 691 |
+
3. only use the first 8s of your input audio.
|
| 692 |
<br>
|
| 693 |
+
4. only use the first 20s of your input video for custom character. if you custom character, it will only generate one video result without "smoothing" for saving time.
|
| 694 |
<br>
|
| 695 |
+
5. use open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
| 696 |
<br>
|
| 697 |
<br>
|
| 698 |
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
| 699 |
+
</div>
|
| 700 |
""")
|
| 701 |
|
| 702 |
with gr.Row():
|
|
|
|
| 750 |
if __name__ == "__main__":
|
| 751 |
os.environ["MASTER_ADDR"]='127.0.0.1'
|
| 752 |
os.environ["MASTER_PORT"]='8675'
|
| 753 |
+
|
|
|
|
| 754 |
demo = make_demo()
|
| 755 |
+
demo.launch(share=True)
|
create_graph.py
CHANGED
|
@@ -18,7 +18,7 @@ import librosa
|
|
| 18 |
import igraph
|
| 19 |
import json
|
| 20 |
import utils.rotation_conversions as rc
|
| 21 |
-
from moviepy.editor import VideoClip, AudioFileClip
|
| 22 |
from tqdm import tqdm
|
| 23 |
import imageio
|
| 24 |
import tempfile
|
|
@@ -263,27 +263,68 @@ def random_walk(graph, walk_length, start_node=None):
|
|
| 263 |
is_continue.append(is_cont)
|
| 264 |
return walk, is_continue
|
| 265 |
|
| 266 |
-
|
| 267 |
def path_visualization(graph, path, is_continue, save_path, verbose_continue=False, audio_path=None, return_motion=False):
|
| 268 |
all_frames = [node['video'] for node in path]
|
| 269 |
average_dis_continue = 1 - sum(is_continue) / len(is_continue)
|
| 270 |
if verbose_continue:
|
| 271 |
print("average_dis_continue:", average_dis_continue)
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
| 273 |
def make_frame(t):
|
| 274 |
-
idx = min(int(t *
|
| 275 |
return all_frames[idx]
|
|
|
|
|
|
|
| 276 |
video_clip = VideoClip(make_frame, duration=duration)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
if audio_path is not None:
|
| 278 |
audio_clip = AudioFileClip(audio_path)
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
if return_motion:
|
| 283 |
all_motion = [node['axis_angle'] for node in path]
|
| 284 |
all_motion = np.stack(all_motion, 0)
|
| 285 |
return all_motion
|
| 286 |
|
|
|
|
|
|
|
| 287 |
def generate_transition_video(frame_start_path, frame_end_path, output_video_path):
|
| 288 |
import subprocess
|
| 289 |
import os
|
|
|
|
| 18 |
import igraph
|
| 19 |
import json
|
| 20 |
import utils.rotation_conversions as rc
|
| 21 |
+
from moviepy.editor import VideoClip, AudioFileClip, VideoFileClip
|
| 22 |
from tqdm import tqdm
|
| 23 |
import imageio
|
| 24 |
import tempfile
|
|
|
|
| 263 |
is_continue.append(is_cont)
|
| 264 |
return walk, is_continue
|
| 265 |
|
| 266 |
+
import subprocess
|
| 267 |
def path_visualization(graph, path, is_continue, save_path, verbose_continue=False, audio_path=None, return_motion=False):
|
| 268 |
all_frames = [node['video'] for node in path]
|
| 269 |
average_dis_continue = 1 - sum(is_continue) / len(is_continue)
|
| 270 |
if verbose_continue:
|
| 271 |
print("average_dis_continue:", average_dis_continue)
|
| 272 |
+
|
| 273 |
+
fps = graph.vs[0]['fps']
|
| 274 |
+
duration = len(all_frames) / fps
|
| 275 |
+
|
| 276 |
def make_frame(t):
|
| 277 |
+
idx = min(int(t * fps), len(all_frames) - 1)
|
| 278 |
return all_frames[idx]
|
| 279 |
+
|
| 280 |
+
video_only_path = 'video_only.mp4' # Temporary file
|
| 281 |
video_clip = VideoClip(make_frame, duration=duration)
|
| 282 |
+
video_clip.write_videofile(
|
| 283 |
+
video_only_path,
|
| 284 |
+
codec='libx264',
|
| 285 |
+
fps=fps,
|
| 286 |
+
audio=False
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
# Optionally, ensure audio and video durations match
|
| 290 |
if audio_path is not None:
|
| 291 |
audio_clip = AudioFileClip(audio_path)
|
| 292 |
+
video_duration = video_clip.duration
|
| 293 |
+
audio_duration = audio_clip.duration
|
| 294 |
+
|
| 295 |
+
if audio_duration > video_duration:
|
| 296 |
+
# Trim the audio
|
| 297 |
+
trimmed_audio_path = 'trimmed_audio.aac'
|
| 298 |
+
audio_clip = audio_clip.subclip(0, video_duration)
|
| 299 |
+
audio_clip.write_audiofile(trimmed_audio_path)
|
| 300 |
+
audio_input = trimmed_audio_path
|
| 301 |
+
else:
|
| 302 |
+
audio_input = audio_path
|
| 303 |
+
|
| 304 |
+
# Use FFmpeg to combine video and audio
|
| 305 |
+
ffmpeg_command = [
|
| 306 |
+
'ffmpeg', '-y',
|
| 307 |
+
'-i', video_only_path,
|
| 308 |
+
'-i', audio_input,
|
| 309 |
+
'-c:v', 'copy',
|
| 310 |
+
'-c:a', 'aac',
|
| 311 |
+
'-strict', 'experimental',
|
| 312 |
+
save_path
|
| 313 |
+
]
|
| 314 |
+
subprocess.check_call(ffmpeg_command)
|
| 315 |
|
| 316 |
+
# Clean up temporary files if necessary
|
| 317 |
+
os.remove(video_only_path)
|
| 318 |
+
if audio_input != audio_path:
|
| 319 |
+
os.remove(audio_input)
|
| 320 |
+
|
| 321 |
if return_motion:
|
| 322 |
all_motion = [node['axis_angle'] for node in path]
|
| 323 |
all_motion = np.stack(all_motion, 0)
|
| 324 |
return all_motion
|
| 325 |
|
| 326 |
+
|
| 327 |
+
|
| 328 |
def generate_transition_video(frame_start_path, frame_end_path, output_video_path):
|
| 329 |
import subprocess
|
| 330 |
import os
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
--extra-index-url https://download.openmmlab.com/mmcv/dist/cu118/torch2.1.0/index.html
|
| 2 |
|
| 3 |
torch==2.1.0
|
| 4 |
-
|
| 5 |
scikit-image==0.21.0
|
| 6 |
scikit-learn==1.3.2
|
| 7 |
scipy==1.11.4
|
|
@@ -14,7 +14,7 @@ opencv-python==4.8.1.78
|
|
| 14 |
tensorboardx
|
| 15 |
filterpy
|
| 16 |
cython
|
| 17 |
-
chumpy
|
| 18 |
Pillow==9.5.0
|
| 19 |
trimesh
|
| 20 |
pyrender
|
|
@@ -32,7 +32,7 @@ timm
|
|
| 32 |
pyglet
|
| 33 |
mmcv==2.1.0
|
| 34 |
mmdet==3.2.0
|
| 35 |
-
mmpose
|
| 36 |
eval_type_backport
|
| 37 |
|
| 38 |
wget
|
|
|
|
| 1 |
--extra-index-url https://download.openmmlab.com/mmcv/dist/cu118/torch2.1.0/index.html
|
| 2 |
|
| 3 |
torch==2.1.0
|
| 4 |
+
numpy==1.23.5
|
| 5 |
scikit-image==0.21.0
|
| 6 |
scikit-learn==1.3.2
|
| 7 |
scipy==1.11.4
|
|
|
|
| 14 |
tensorboardx
|
| 15 |
filterpy
|
| 16 |
cython
|
| 17 |
+
chumpy==0.70.0
|
| 18 |
Pillow==9.5.0
|
| 19 |
trimesh
|
| 20 |
pyrender
|
|
|
|
| 32 |
pyglet
|
| 33 |
mmcv==2.1.0
|
| 34 |
mmdet==3.2.0
|
| 35 |
+
mmpose==0.28.0
|
| 36 |
eval_type_backport
|
| 37 |
|
| 38 |
wget
|