Spaces:

YaohuiW
/

LIA-X

Running on Zero

App Files Files Community

YaohuiW commited on Aug 22

Commit

f01681c

1 Parent(s): 6b0ef0f

update

Browse files

Files changed (5) hide show

app.py +1 -1
assets/instruction.md +3 -3
gradio_tabs/animation.py +16 -36
gradio_tabs/img_edit.py +2 -14
gradio_tabs/vid_edit.py +8 -13

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ ckpt_path = hf_hub_download(repo_id="YaohuiW/LIA-X", filename="lia-x.pt")
 gen.load_state_dict(torch.load(ckpt_path, weights_only=True))
 gen.eval()
-chunk_size=16
 def load_file(path):

 gen.load_state_dict(torch.load(ckpt_path, weights_only=True))
 gen.eval()
+chunk_size=30
 def load_file(path):

assets/instruction.md CHANGED Viewed

@@ -3,18 +3,18 @@
 * **Image Animation**
     - Upload `Source Image` and `Driving Video`
-    - Using sliders in the `Control Panel` to edit image
 	- Use `Animate` button to obtain `Animated Video`
 * **Image Editing**
     - Upload `Source Image`
-	- Using sliders in the `Control Panel` to edit image
 * **Video Editing**
     - Upload `Video`
-	- Using sliders in the `Control Panel` to edit image
     - Use `Generate` button to obtain `Edited Video`
 **NOTE: we recommend to crop both input images and videos using provided [tools](https://github.com/wyhsirius/LIA-X/tree/main) for better results**

 * **Image Animation**
     - Upload `Source Image` and `Driving Video`
+    - Using `sliders` in the `Control Panel` to edit image
 	- Use `Animate` button to obtain `Animated Video`
 * **Image Editing**
     - Upload `Source Image`
+	- Using `sliders` in the `Control Panel` to edit image
 * **Video Editing**
     - Upload `Video`
+	- Using `sliders` in the `Control Panel` to edit image
     - Use `Generate` button to obtain `Edited Video`
 **NOTE: we recommend to crop both input images and videos using provided [tools](https://github.com/wyhsirius/LIA-X/tree/main) for better results**

gradio_tabs/animation.py CHANGED Viewed

@@ -90,10 +90,6 @@ def vid_preprocessing(vid_path, size):
 	vid = vid_dict[0].permute(0, 3, 1, 2) # tchw
 	fps = vid_dict[2]['video_fps']
 	vid_norm = (vid / 255.0 - 0.5) * 2.0  # [-1, 1]
-	#vid_norm = torch.cat([
-	#	resize(vid_norm[i:i+1, :, :, :], size).unsqueeze(1) for i in range(vid.size(0))
-	#], dim=1)
 	vid_norm = resize(vid_norm, size) # tchw
 	return vid_norm, fps
@@ -135,9 +131,7 @@ def vid_postprocessing(video, w, h, fps):
 	t,c,_,_ = video.size()
 	vid = resize_back(video, w, h)
-	vid = vid.clamp(-1, 1)
-	vid = (vid - vid.min()) / (vid.max() - vid.min())
 	vid = rearrange(vid, "t c h w -> t h w c")	# T H W C
 	vid_np = (vid.cpu().numpy() * 255).astype('uint8')
@@ -215,30 +209,27 @@ def animation(gen, chunk_size, device):
 		vid_target_tensor, fps = vid_preprocessing(video, 512)
 		image_tensor = image_tensor.to(device)
 		video_target_tensor = vid_target_tensor.to(device) #tchw
-		#animated_video = gen.animate_batch(image_tensor, video_target_tensor, labels_v, selected_s, chunk_size)
-		#edited_image = animated_video[:,:,0,:,:]
 		img_start = video_target_tensor[0:1,:,:,:]
-		#vid_target_tensor_batch = rearrange(video_target_tensor, 'b t c h w -> (b t) c h w')
 		res = []
-		t = video_target_tensor.size(1)
 		chunks = t // chunk_size
 		z_s2r, alpha_r2s, feat_rgb = compiled_enc_img(image_tensor, selected_s)
-		#z_s2r, alpha_r2s, feat_rgb = gen.enc_img(image_tensor, labels_v, selected_s)
 		for i in range(chunks+1):
-			if i == chunks:
-				img_target = vid_target_tensor[i*chunk_size:, :, :, :]
-				img_animated = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target)
-				#img_animated_batch = gen.dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target_batch)
-			else:
-				img_target = vid_target_tensor[i*chunk_size:(i+1)*chunk_size, :, :, :]
-				img_animated = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target)
-				#img_animated_batch = gen.dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target_batch)
-			res.append(img_animated)
-		animated_video = torch.cat(res, dim=0) # TCHW
 		edited_image = animated_video[0:1,:,:,:]
 		# postprocessing
@@ -308,7 +299,7 @@ def animation(gen, chunk_size, device):
 						#video_output.render()
 						video_output = gr.Video(label="Output Video", elem_id="output_vid", width=512)#.render()
-				with gr.Accordion("Control Panel (Using Sliders to Edit Image)", open=True):
 					with gr.Tab("Head"):
 						with gr.Row():
 							for k in labels_k[:3]:
@@ -344,23 +335,12 @@ def animation(gen, chunk_size, device):
 				fn=edit_media,
 				inputs=[image_input] + inputs_s,
 				outputs=[image_output],
 				show_progress='hidden',
 				trigger_mode='always_last',
 				# currently we have a latency around 450ms
 				stream_every=0.5
 			)
-		#edit_btn.click(
-		#	fn=edit_media,
-		#	inputs=[image_input] + inputs_s,
-		#	outputs=[image_output],
-		#	show_progress=True
-		#)
 		animate_btn.click(
 			fn=animate_media,
 			inputs=[image_input, video_input] + inputs_s,

 	vid = vid_dict[0].permute(0, 3, 1, 2) # tchw
 	fps = vid_dict[2]['video_fps']
 	vid_norm = (vid / 255.0 - 0.5) * 2.0  # [-1, 1]
 	vid_norm = resize(vid_norm, size) # tchw
 	return vid_norm, fps
 	t,c,_,_ = video.size()
 	vid = resize_back(video, w, h)
+	vid = vid_denorm(vid)
 	vid = rearrange(vid, "t c h w -> t h w c")	# T H W C
 	vid_np = (vid.cpu().numpy() * 255).astype('uint8')
 		vid_target_tensor, fps = vid_preprocessing(video, 512)
 		image_tensor = image_tensor.to(device)
 		video_target_tensor = vid_target_tensor.to(device) #tchw
 		img_start = video_target_tensor[0:1,:,:,:]
 		res = []
+		t, c, h, w = video_target_tensor.size()
 		chunks = t // chunk_size
+		if t%chunk_size == 0:
+			vid_target_tensor_batch = torch.zeros(chunk_size * chunks, c, h, w).to(device)
+		else:
+			vid_target_tensor_batch = torch.zeros(chunk_size * (chunks + 1), c, h, w).to(device)
+		vid_target_tensor_batch[:t] = video_target_tensor
 		z_s2r, alpha_r2s, feat_rgb = compiled_enc_img(image_tensor, selected_s)
 		for i in range(chunks+1):
+			img_target_batch = vid_target_tensor_batch[i * chunk_size:(i + 1) * chunk_size, :, :, :]
+			img_animated_batch = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target_batch)
+			res.append(img_animated_batch)
+		animated_video = torch.cat(res, dim=0)[:t] # TCHW
 		edited_image = animated_video[0:1,:,:,:]
 		# postprocessing
 						#video_output.render()
 						video_output = gr.Video(label="Output Video", elem_id="output_vid", width=512)#.render()
+				with gr.Accordion("Control Panel - Using Sliders to Edit Image", open=True):
 					with gr.Tab("Head"):
 						with gr.Row():
 							for k in labels_k[:3]:
 				fn=edit_media,
 				inputs=[image_input] + inputs_s,
 				outputs=[image_output],
 				show_progress='hidden',
 				trigger_mode='always_last',
 				# currently we have a latency around 450ms
 				stream_every=0.5
 			)
 		animate_btn.click(
 			fn=animate_media,
 			inputs=[image_input, video_input] + inputs_s,

gradio_tabs/img_edit.py CHANGED Viewed

@@ -95,14 +95,10 @@ def img_denorm(img):
 def img_postprocessing(img, w, h):
 	img = resize_back(img, w, h)
-	#image = image.permute(0, 2, 3, 1)
 	img = img_denorm(img)
 	img = img.squeeze(0).permute(1, 2, 0).contiguous()	# contiguous() for fast transfer
 	img_output = (img.cpu().numpy() * 255).astype(np.uint8)
-	#with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
-	#	imageio.imwrite(temp_file.name, img_output, quality=8)
-	#	return temp_file.name
 	return img_output
@@ -196,7 +192,7 @@ def img_edit(gen, device):
 						image_output = gr.Image(label="Output Image", type='numpy', interactive=False, width=512)
-				with gr.Accordion("Control Panel (Using Sliders to Edit Image)", open=True):
 					with gr.Tab("Head"):
 						with gr.Row():
 							for k in labels_k[:3]:
@@ -239,15 +235,7 @@ def img_edit(gen, device):
 			# currently we have a latency around 450ms
 			stream_every=0.5
-		)
-		#edit_btn.click(
-		#	fn=edit_img,
-		#	inputs=[image_input] + inputs_s,
-		#	outputs=[image_output],
-		#	show_progress=True
-		#)
 		clear_btn.click(
 			fn=clear_media,

 def img_postprocessing(img, w, h):
 	img = resize_back(img, w, h)
 	img = img_denorm(img)
 	img = img.squeeze(0).permute(1, 2, 0).contiguous()	# contiguous() for fast transfer
 	img_output = (img.cpu().numpy() * 255).astype(np.uint8)
 	return img_output
 						image_output = gr.Image(label="Output Image", type='numpy', interactive=False, width=512)
+				with gr.Accordion("Control Panel - Using Sliders to Edit Image", open=True):
 					with gr.Tab("Head"):
 						with gr.Row():
 							for k in labels_k[:3]:
 			# currently we have a latency around 450ms
 			stream_every=0.5
+		)
 		clear_btn.click(
 			fn=clear_media,

gradio_tabs/vid_edit.py CHANGED Viewed

@@ -231,21 +231,23 @@ def vid_edit(gen, chunk_size, device):
 		res = []
 		t = video_target_tensor.size(1)
 		chunks = t // chunk_size
 		z_s2r, alpha_r2s, feat_rgb = compiled_enc_img(img_start, selected_s)
 		for i in range(chunks + 1):
 			if i == chunks:
-				img_target_batch = vid_target_tensor_batch[i * chunk_size:, :, :, :]
-				img_animated_batch = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target)
 			else:
-				img_target_batch = vid_target_tensor_batch[i * chunk_size:(i + 1) * chunk_size, :, :, :]
-				img_animated_batch = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target)
 			res.append(img_animated_batch)
 		edited_video_tensor = torch.cat(res, dim=0)  # TCHW
 		edited_image_tensor = edited_video_tensor[0:1,:,:,:]
 		# de-norm
-		animated_video, animated_all_video = vid_all_save(vid_target_tensor_batch, edited_video_tensor, w, h, fps)
 		edited_image = img_postprocessing(edited_image_tensor, w, h)
 		return edited_image, animated_video, animated_all_video
@@ -293,7 +295,7 @@ def vid_edit(gen, chunk_size, device):
 						video_all_output = gr.Video(label="Videos", elem_id="output_vid_all")
 			with gr.Column(scale=1):
-				with gr.Accordion("Control Panel (Using Sliders to Edit Image)", open=True):
 					with gr.Tab("Head"):
 						with gr.Row():
 							for k in labels_k[:3]:
@@ -342,13 +344,6 @@ def vid_edit(gen, chunk_size, device):
 				stream_every=0.5
 			)
-		#edit_btn.click(
-		#	fn=edit_img,
-		#	inputs=[video_input] + inputs_s,
-		#	outputs=[image_output],
-		#	show_progress=True
-		#)
 		animate_btn.click(
 			fn=edit_vid,
 			inputs=[video_input] + inputs_s,  # [image_input, video_input] + inputs_s,

 		res = []
 		t = video_target_tensor.size(1)
 		chunks = t // chunk_size
 		z_s2r, alpha_r2s, feat_rgb = compiled_enc_img(img_start, selected_s)
 		for i in range(chunks + 1):
 			if i == chunks:
+				img_target_batch = video_target_tensor[i * chunk_size:, :, :, :]
+				img_animated_batch = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target_batch)
 			else:
+				img_target_batch = video_target_tensor[i * chunk_size:(i + 1) * chunk_size, :, :, :]
+				img_animated_batch = compiled_dec_vid(z_s2r, alpha_r2s, feat_rgb, img_start, img_target_batch)
 			res.append(img_animated_batch)
 		edited_video_tensor = torch.cat(res, dim=0)  # TCHW
 		edited_image_tensor = edited_video_tensor[0:1,:,:,:]
 		# de-norm
+		animated_video, animated_all_video = vid_all_save(video_target_tensor, edited_video_tensor, w, h, fps)
 		edited_image = img_postprocessing(edited_image_tensor, w, h)
 		return edited_image, animated_video, animated_all_video
 						video_all_output = gr.Video(label="Videos", elem_id="output_vid_all")
 			with gr.Column(scale=1):
+				with gr.Accordion("Control Panel - Using Sliders to Edit Image", open=True):
 					with gr.Tab("Head"):
 						with gr.Row():
 							for k in labels_k[:3]:
 				stream_every=0.5
 			)
 		animate_btn.click(
 			fn=edit_vid,
 			inputs=[video_input] + inputs_s,  # [image_input, video_input] + inputs_s,