garyuzair commited on
Commit
a87d440
·
verified ·
1 Parent(s): adccf8a

Upload 7 files

Browse files
Files changed (7) hide show
  1. animator.py +176 -17
  2. app.py +200 -4
  3. image_generator.py +50 -2
  4. prompt_generator.py +110 -19
  5. requirements.txt +15 -12
  6. transcriber.py +125 -89
  7. video_creator.py +239 -83
animator.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import os
3
  import numpy as np
4
- from PIL import Image
5
  import time
6
  from concurrent.futures import ThreadPoolExecutor
7
  from functools import partial
@@ -9,11 +9,66 @@ from functools import partial
9
  class Animator:
10
  def __init__(self):
11
  self.frame_cache = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
14
- """Add a simple zoom animation to an image"""
15
  # Check cache first
16
- cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}"
17
  if cache_key in self.frame_cache:
18
  return self.frame_cache[cache_key]
19
 
@@ -35,6 +90,9 @@ class Animator:
35
  top = (img.height - scaled_img.height) // 2
36
  new_img.paste(scaled_img, (left, top))
37
 
 
 
 
38
  # Save the frame
39
  frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{len(frames)}.png"
40
  new_img.save(frame_path)
@@ -45,9 +103,9 @@ class Animator:
45
  return frames
46
 
47
  def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
48
- """Add a simple panning animation to an image"""
49
  # Check cache first
50
- cache_key = f"pan_{image_path}_{num_frames}_{direction}"
51
  if cache_key in self.frame_cache:
52
  return self.frame_cache[cache_key]
53
 
@@ -60,22 +118,32 @@ class Animator:
60
  # Create a sequence of panned images
61
  frames = []
62
 
 
 
 
 
 
 
 
 
 
 
63
  # Calculate pan parameters
64
  if direction == "right":
65
- x_shifts = np.linspace(0, img.width * 0.1, num_frames)
66
  y_shifts = np.zeros(num_frames)
67
  elif direction == "left":
68
- x_shifts = np.linspace(0, -img.width * 0.1, num_frames)
69
  y_shifts = np.zeros(num_frames)
70
  elif direction == "down":
71
  x_shifts = np.zeros(num_frames)
72
- y_shifts = np.linspace(0, img.height * 0.1, num_frames)
73
  elif direction == "up":
74
  x_shifts = np.zeros(num_frames)
75
- y_shifts = np.linspace(0, -img.height * 0.1, num_frames)
76
  else:
77
  # Default to right
78
- x_shifts = np.linspace(0, img.width * 0.1, num_frames)
79
  y_shifts = np.zeros(num_frames)
80
 
81
  for i in range(num_frames):
@@ -85,6 +153,9 @@ class Animator:
85
  # Paste the original image with shift
86
  new_img.paste(img, (int(x_shifts[i]), int(y_shifts[i])))
87
 
 
 
 
88
  # Save the frame
89
  frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
90
  new_img.save(frame_path)
@@ -95,9 +166,9 @@ class Animator:
95
  return frames
96
 
97
  def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
98
- """Add a fade in/out animation to an image"""
99
  # Check cache first
100
- cache_key = f"fade_{image_path}_{num_frames}_{fade_type}"
101
  if cache_key in self.frame_cache:
102
  return self.frame_cache[cache_key]
103
 
@@ -122,10 +193,86 @@ class Animator:
122
  # Create a new image with adjusted brightness
123
  enhancer = Image.new("RGBA", img.size, (0, 0, 0, 0))
124
  new_img = Image.blend(enhancer, img.convert("RGBA"), alpha)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # Save the frame
127
  frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
128
- new_img.convert("RGB").save(frame_path)
129
  frames.append(frame_path)
130
 
131
  # Cache the result
@@ -133,18 +280,30 @@ class Animator:
133
  return frames
134
 
135
  def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
136
- """Animate a single image"""
137
  # Choose animation type
138
- animation_types = ["zoom", "pan_right", "pan_left", "fade_in"]
 
 
 
 
 
 
 
 
139
 
140
  if animation_type == "random":
141
  # Use hash of image path to deterministically select animation type
142
- chosen_type = animation_types[hash(img_path) % len(animation_types)]
 
 
143
  else:
144
  chosen_type = animation_type
145
 
146
  # Apply the chosen animation
147
- if chosen_type.startswith("pan"):
 
 
148
  direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
149
  frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
150
  elif chosen_type.startswith("fade"):
 
1
  import streamlit as st
2
  import os
3
  import numpy as np
4
+ from PIL import Image, ImageEnhance, ImageFilter, ImageDraw
5
  import time
6
  from concurrent.futures import ThreadPoolExecutor
7
  from functools import partial
 
9
  class Animator:
10
  def __init__(self):
11
  self.frame_cache = {}
12
+ self.aspect_ratio = "1:1" # Default aspect ratio
13
+
14
+ def set_aspect_ratio(self, aspect_ratio):
15
+ """Set the aspect ratio for animations"""
16
+ self.aspect_ratio = aspect_ratio
17
+
18
+ def apply_cinematic_effects(self, image):
19
+ """Apply cinematic effects to enhance the frame quality"""
20
+ try:
21
+ # Convert to PIL Image if it's a path
22
+ if isinstance(image, str):
23
+ img = Image.open(image)
24
+ else:
25
+ img = image
26
+
27
+ # Enhance contrast slightly
28
+ enhancer = ImageEnhance.Contrast(img)
29
+ img = enhancer.enhance(1.2)
30
+
31
+ # Enhance color saturation slightly
32
+ enhancer = ImageEnhance.Color(img)
33
+ img = enhancer.enhance(1.1)
34
+
35
+ # Add subtle vignette effect
36
+ # Create a radial gradient mask
37
+ mask = Image.new('L', img.size, 255)
38
+ draw = ImageDraw.Draw(mask)
39
+
40
+ width, height = img.size
41
+ center_x, center_y = width // 2, height // 2
42
+ max_radius = min(width, height) // 2
43
+
44
+ for y in range(height):
45
+ for x in range(width):
46
+ # Calculate distance from center
47
+ distance = np.sqrt((x - center_x)**2 + (y - center_y)**2)
48
+ # Create vignette effect (darker at edges)
49
+ intensity = int(255 * (1 - 0.3 * (distance / max_radius)**2))
50
+ mask.putpixel((x, y), intensity)
51
+
52
+ # Apply the mask
53
+ img = Image.composite(img, Image.new('RGB', img.size, (0, 0, 0)), mask)
54
+
55
+ # Add subtle film grain
56
+ grain = Image.effect_noise((img.width, img.height), 10)
57
+ grain = grain.convert('L')
58
+ grain = grain.filter(ImageFilter.GaussianBlur(radius=1))
59
+ img = Image.blend(img, Image.composite(img, Image.new('RGB', img.size, (128, 128, 128)), grain), 0.05)
60
+
61
+ return img
62
+ except Exception as e:
63
+ # If effects fail, return original image
64
+ if isinstance(image, str):
65
+ return Image.open(image)
66
+ return image
67
 
68
  def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
69
+ """Add a simple zoom animation to an image with cinematic effects"""
70
  # Check cache first
71
+ cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}_{self.aspect_ratio}"
72
  if cache_key in self.frame_cache:
73
  return self.frame_cache[cache_key]
74
 
 
90
  top = (img.height - scaled_img.height) // 2
91
  new_img.paste(scaled_img, (left, top))
92
 
93
+ # Apply cinematic effects
94
+ new_img = self.apply_cinematic_effects(new_img)
95
+
96
  # Save the frame
97
  frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{len(frames)}.png"
98
  new_img.save(frame_path)
 
103
  return frames
104
 
105
  def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
106
+ """Add a simple panning animation to an image with cinematic effects"""
107
  # Check cache first
108
+ cache_key = f"pan_{image_path}_{num_frames}_{direction}_{self.aspect_ratio}"
109
  if cache_key in self.frame_cache:
110
  return self.frame_cache[cache_key]
111
 
 
118
  # Create a sequence of panned images
119
  frames = []
120
 
121
+ # Calculate pan parameters based on aspect ratio
122
+ # For portrait (9:16), horizontal panning should be more subtle
123
+ # For landscape (16:9), vertical panning should be more subtle
124
+ pan_factor = 0.1 # Default pan factor
125
+
126
+ if self.aspect_ratio == "9:16" and (direction == "left" or direction == "right"):
127
+ pan_factor = 0.05 # Reduce horizontal pan for portrait
128
+ elif self.aspect_ratio == "16:9" and (direction == "up" or direction == "down"):
129
+ pan_factor = 0.05 # Reduce vertical pan for landscape
130
+
131
  # Calculate pan parameters
132
  if direction == "right":
133
+ x_shifts = np.linspace(0, img.width * pan_factor, num_frames)
134
  y_shifts = np.zeros(num_frames)
135
  elif direction == "left":
136
+ x_shifts = np.linspace(0, -img.width * pan_factor, num_frames)
137
  y_shifts = np.zeros(num_frames)
138
  elif direction == "down":
139
  x_shifts = np.zeros(num_frames)
140
+ y_shifts = np.linspace(0, img.height * pan_factor, num_frames)
141
  elif direction == "up":
142
  x_shifts = np.zeros(num_frames)
143
+ y_shifts = np.linspace(0, -img.height * pan_factor, num_frames)
144
  else:
145
  # Default to right
146
+ x_shifts = np.linspace(0, img.width * pan_factor, num_frames)
147
  y_shifts = np.zeros(num_frames)
148
 
149
  for i in range(num_frames):
 
153
  # Paste the original image with shift
154
  new_img.paste(img, (int(x_shifts[i]), int(y_shifts[i])))
155
 
156
+ # Apply cinematic effects
157
+ new_img = self.apply_cinematic_effects(new_img)
158
+
159
  # Save the frame
160
  frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
161
  new_img.save(frame_path)
 
166
  return frames
167
 
168
  def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
169
+ """Add a fade in/out animation to an image with cinematic effects"""
170
  # Check cache first
171
+ cache_key = f"fade_{image_path}_{num_frames}_{fade_type}_{self.aspect_ratio}"
172
  if cache_key in self.frame_cache:
173
  return self.frame_cache[cache_key]
174
 
 
193
  # Create a new image with adjusted brightness
194
  enhancer = Image.new("RGBA", img.size, (0, 0, 0, 0))
195
  new_img = Image.blend(enhancer, img.convert("RGBA"), alpha)
196
+ new_img = new_img.convert("RGB")
197
+
198
+ # Apply cinematic effects
199
+ new_img = self.apply_cinematic_effects(new_img)
200
+
201
+ # Save the frame
202
+ frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
203
+ new_img.save(frame_path)
204
+ frames.append(frame_path)
205
+
206
+ # Cache the result
207
+ self.frame_cache[cache_key] = frames
208
+ return frames
209
+
210
+ def add_ken_burns_effect(self, image_path, num_frames=10, output_dir="temp"):
211
+ """Add a Ken Burns effect (combination of pan and zoom) with cinematic effects"""
212
+ # Check cache first
213
+ cache_key = f"kenburns_{image_path}_{num_frames}_{self.aspect_ratio}"
214
+ if cache_key in self.frame_cache:
215
+ return self.frame_cache[cache_key]
216
+
217
+ # Ensure output directory exists
218
+ os.makedirs(output_dir, exist_ok=True)
219
+
220
+ # Load the image
221
+ img = Image.open(image_path)
222
+
223
+ # Create a sequence of images with Ken Burns effect
224
+ frames = []
225
+
226
+ # Determine direction based on aspect ratio and image content
227
+ import random
228
+ if self.aspect_ratio == "16:9":
229
+ # For landscape, prefer horizontal movement
230
+ direction = random.choice(["right", "left"])
231
+ elif self.aspect_ratio == "9:16":
232
+ # For portrait, prefer vertical movement
233
+ direction = random.choice(["up", "down"])
234
+ else:
235
+ # For square, random direction
236
+ direction = random.choice(["right", "left", "up", "down"])
237
+
238
+ # Calculate pan parameters
239
+ if direction == "right":
240
+ x_shifts = np.linspace(0, img.width * 0.05, num_frames)
241
+ y_shifts = np.zeros(num_frames)
242
+ elif direction == "left":
243
+ x_shifts = np.linspace(0, -img.width * 0.05, num_frames)
244
+ y_shifts = np.zeros(num_frames)
245
+ elif direction == "down":
246
+ x_shifts = np.zeros(num_frames)
247
+ y_shifts = np.linspace(0, img.height * 0.05, num_frames)
248
+ elif direction == "up":
249
+ x_shifts = np.zeros(num_frames)
250
+ y_shifts = np.linspace(0, -img.height * 0.05, num_frames)
251
+
252
+ # Calculate zoom factors
253
+ zoom_factors = np.linspace(1.0, 1.05, num_frames)
254
+
255
+ for i in range(num_frames):
256
+ # Apply zoom
257
+ size = (int(img.width * zoom_factors[i]), int(img.height * zoom_factors[i]))
258
+ zoomed_img = img.resize(size, Image.LANCZOS)
259
+
260
+ # Create a new image with the same size as original
261
+ new_img = Image.new("RGB", (img.width, img.height))
262
+
263
+ # Calculate position with both zoom and pan
264
+ left = (img.width - zoomed_img.width) // 2 + int(x_shifts[i])
265
+ top = (img.height - zoomed_img.height) // 2 + int(y_shifts[i])
266
+
267
+ # Paste the zoomed image with shift
268
+ new_img.paste(zoomed_img, (left, top))
269
+
270
+ # Apply cinematic effects
271
+ new_img = self.apply_cinematic_effects(new_img)
272
 
273
  # Save the frame
274
  frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
275
+ new_img.save(frame_path)
276
  frames.append(frame_path)
277
 
278
  # Cache the result
 
280
  return frames
281
 
282
  def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
283
+ """Animate a single image with cinematic effects"""
284
  # Choose animation type
285
+ animation_types = ["zoom", "pan_right", "pan_left", "fade_in", "ken_burns"]
286
+
287
+ # For different aspect ratios, prioritize certain animations
288
+ if self.aspect_ratio == "16:9":
289
+ # For landscape, prioritize horizontal panning
290
+ animation_types = ["zoom", "pan_left", "pan_right", "ken_burns", "fade_in"]
291
+ elif self.aspect_ratio == "9:16":
292
+ # For portrait, prioritize vertical panning
293
+ animation_types = ["zoom", "ken_burns", "fade_in", "pan_up", "pan_down"]
294
 
295
  if animation_type == "random":
296
  # Use hash of image path to deterministically select animation type
297
+ import random
298
+ random.seed(hash(img_path))
299
+ chosen_type = random.choice(animation_types)
300
  else:
301
  chosen_type = animation_type
302
 
303
  # Apply the chosen animation
304
+ if chosen_type == "ken_burns":
305
+ frames = self.add_ken_burns_effect(img_path, output_dir=output_dir)
306
+ elif chosen_type.startswith("pan"):
307
  direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
308
  frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
309
  elif chosen_type.startswith("fade"):
app.py CHANGED
@@ -6,6 +6,7 @@ import concurrent.futures
6
  from functools import partial
7
  import torch
8
  import hashlib
 
9
 
10
  from transcriber import AudioTranscriber
11
  from prompt_generator import PromptGenerator
@@ -116,10 +117,10 @@ def process_audio_segment(segment, transcriber):
116
  st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
117
  return ""
118
 
119
- def generate_prompt_for_segment(transcription, prompt_generator):
120
  """Generate a prompt for a single transcription in parallel"""
121
  try:
122
- return prompt_generator.generate_optimized_prompt(transcription)
123
  except Exception as e:
124
  st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
125
  return f"{transcription}, visual scene, detailed, vibrant, cinematic"
@@ -204,7 +205,7 @@ def main():
204
  help="How many scenes to create in your video")
205
  animation_type = st.selectbox(
206
  "Animation style",
207
- ["random", "zoom", "pan_right", "pan_left", "fade_in"],
208
  help="Choose how images will animate in your video"
209
  )
210
 
@@ -378,4 +379,199 @@ def main():
378
  trans = transcriber.transcribe_segment(segment)
379
  transcriptions.append(trans)
380
  except Exception as e:
381
- st.warning("Error transcribing"<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from functools import partial
7
  import torch
8
  import hashlib
9
+ from PIL import Image, ImageDraw
10
 
11
  from transcriber import AudioTranscriber
12
  from prompt_generator import PromptGenerator
 
117
  st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
118
  return ""
119
 
120
+ def generate_prompt_for_segment(transcription, prompt_generator, aspect_ratio="16:9"):
121
  """Generate a prompt for a single transcription in parallel"""
122
  try:
123
+ return prompt_generator.generate_optimized_prompt(transcription, aspect_ratio)
124
  except Exception as e:
125
  st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
126
  return f"{transcription}, visual scene, detailed, vibrant, cinematic"
 
205
  help="How many scenes to create in your video")
206
  animation_type = st.selectbox(
207
  "Animation style",
208
+ ["random", "zoom", "pan_right", "pan_left", "fade_in", "ken_burns"],
209
  help="Choose how images will animate in your video"
210
  )
211
 
 
379
  trans = transcriber.transcribe_segment(segment)
380
  transcriptions.append(trans)
381
  except Exception as e:
382
+ st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
383
+ transcriptions.append("")
384
+
385
+ # Display transcriptions with better styling
386
+ progress_bar.progress(30)
387
+ st.markdown("### 📝 Transcriptions")
388
+ for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
389
+ st.markdown(f"""
390
+ <div style="background-color: #f0f2f6; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
391
+ <strong>Segment {i+1} ({start:.1f}s - {end:.1f}s):</strong> {trans}
392
+ </div>
393
+ """, unsafe_allow_html=True)
394
+
395
+ # Step 3: Generate prompts in parallel
396
+ status_text.text("Generating prompts from transcriptions...")
397
+ status_message.markdown("✍️ **Creating image descriptions...**")
398
+ if parallel_processing:
399
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
400
+ # Create a partial function with the prompt generator and aspect ratio
401
+ prompt_func = partial(generate_prompt_for_segment,
402
+ prompt_generator=prompt_generator,
403
+ aspect_ratio=selected_aspect_ratio)
404
+ # Generate prompts in parallel
405
+ prompts = list(executor.map(prompt_func, transcriptions))
406
+ else:
407
+ prompts = []
408
+ for trans in transcriptions:
409
+ try:
410
+ prompt = prompt_generator.generate_optimized_prompt(trans, selected_aspect_ratio)
411
+ prompts.append(prompt)
412
+ except Exception as e:
413
+ st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
414
+ prompts.append(f"{trans}, visual scene, detailed, vibrant, cinematic")
415
+
416
+ # Display prompts with better styling
417
+ progress_bar.progress(40)
418
+ st.markdown("### 🖋️ Generated Prompts")
419
+ for i, prompt in enumerate(prompts):
420
+ st.markdown(f"""
421
+ <div style="background-color: #e8f4f8; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
422
+ <strong>Prompt {i+1}:</strong> {prompt}
423
+ </div>
424
+ """, unsafe_allow_html=True)
425
+
426
+ # Step 4: Generate images in parallel
427
+ status_text.text("Generating images from prompts...")
428
+ status_message.markdown("🎨 **Creating images...**")
429
+ if parallel_processing:
430
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
431
+ # Create a partial function with the image generator
432
+ image_func = partial(generate_image_for_prompt, image_generator=image_generator)
433
+ # Generate images in parallel
434
+ images = list(executor.map(image_func, prompts))
435
+ else:
436
+ images = []
437
+ for i, prompt in enumerate(prompts):
438
+ status_text.text(f"Generating image {i+1}/{len(prompts)}...")
439
+ try:
440
+ img_path = image_generator.generate_image(prompt)
441
+ images.append(img_path)
442
+ except Exception as e:
443
+ st.warning(f"Error generating image: {str(e)}. Using fallback image.")
444
+ # Create a fallback image
445
+ from PIL import Image, ImageDraw
446
+ img = Image.new('RGB', image_generator.target_size, color=(240, 240, 240))
447
+ draw = ImageDraw.Draw(img)
448
+ draw.text((10, 10), prompt[:50], fill=(0, 0, 0))
449
+ path = f"temp/fallback_{int(time.time() * 1000)}.png"
450
+ img.save(path)
451
+ images.append(path)
452
+
453
+ # Display images with better styling
454
+ progress_bar.progress(60)
455
+ st.markdown("### 🖼️ Generated Images")
456
+ image_cols = st.columns(min(len(images), 3))
457
+ for i, img_path in enumerate(images):
458
+ with image_cols[i % len(image_cols)]:
459
+ st.image(img_path, caption=f"Image {i+1}", use_column_width=True)
460
+
461
+ # Step 5: Add animations in parallel
462
+ status_text.text("Adding animations to images...")
463
+ status_message.markdown("✨ **Adding animations...**")
464
+ if parallel_processing:
465
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
466
+ # Create a partial function with the animator and animation type
467
+ animate_func = partial(animate_image, animator=animator, animation_type=animation_type)
468
+ # Animate images in parallel
469
+ animated_frames = list(executor.map(animate_func, images))
470
+ else:
471
+ animated_frames = []
472
+ for i, img_path in enumerate(images):
473
+ status_text.text(f"Animating image {i+1}/{len(images)}...")
474
+ try:
475
+ frames = animator.animate_single_image(img_path, animation_type)
476
+ animated_frames.append(frames)
477
+ except Exception as e:
478
+ st.warning(f"Error animating image: {str(e)}. Using static frames.")
479
+ # Create a sequence of identical frames as fallback
480
+ frames = []
481
+ for _ in range(10):
482
+ frames.append(img_path)
483
+ animated_frames.append(frames)
484
+
485
+ progress_bar.progress(80)
486
+
487
+ # Step 6: Create video
488
+ status_text.text("Creating final video...")
489
+ status_message.markdown("🎬 **Assembling video...**")
490
+ output_video = video_creator.create_video_from_frames(
491
+ animated_frames,
492
+ audio_file,
493
+ segments=transcriptions,
494
+ timestamps=timestamps,
495
+ parallel=parallel_processing,
496
+ max_workers=max_workers
497
+ )
498
+
499
+ # Check if output is an error file
500
+ if output_video.endswith('.txt'):
501
+ with open(output_video, 'r') as f:
502
+ error_message = f.read()
503
+ st.error(f"Error creating video: {error_message}")
504
+ st.stop()
505
+
506
+ # Optimize video if needed
507
+ if video_quality != "High":
508
+ status_text.text("Optimizing video for web...")
509
+ status_message.markdown("⚙️ **Optimizing video...**")
510
+ output_video = video_creator.optimize_video(
511
+ output_video,
512
+ bitrate=bitrate,
513
+ threads=max_workers
514
+ )
515
+
516
+ # Cache the result if caching is enabled
517
+ if use_caching:
518
+ import shutil
519
+ cached_path = result_cache.get_cache_path(cache_key, ".mp4")
520
+ shutil.copy(output_video, cached_path)
521
+
522
+ progress_bar.progress(100)
523
+ status_text.text("Video creation complete!")
524
+ status_message.markdown("✅ **Done!**")
525
+
526
+ # Step 7: Display and provide download link with better styling
527
+ st.markdown("### 🎥 Your Video")
528
+ st.video(output_video)
529
+
530
+ st.markdown("### 📥 Download")
531
+ with open(output_video, "rb") as file:
532
+ st.download_button(
533
+ label="📥 Download Video",
534
+ data=file,
535
+ file_name=f"audio_to_video_{selected_aspect_ratio.replace(':', '_')}.mp4",
536
+ mime="video/mp4",
537
+ use_container_width=True
538
+ )
539
+
540
+ # Performance metrics
541
+ st.markdown("### ⏱️ Performance Metrics")
542
+ st.info(f"""
543
+ - Video Format: {aspect_ratio}
544
+ - Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
545
+ - Workers: {max_workers}
546
+ - Image Size: {actual_image_size[0]}x{actual_image_size[1]}
547
+ - Inference Steps: {inference_steps}
548
+ - Video Quality: {video_quality}
549
+ """)
550
+
551
+ # Clean up temporary files
552
+ status_text.text("Cleaning up temporary files...")
553
+ for path in images + [p for frames in animated_frames for p in frames]:
554
+ if os.path.exists(path):
555
+ try:
556
+ os.remove(path)
557
+ except:
558
+ pass
559
+
560
+ status_text.text("All done! Your video is ready for download.")
561
+
562
+ except Exception as e:
563
+ st.error(f"An error occurred: {str(e)}")
564
+ st.exception(e)
565
+
566
+ # Provide troubleshooting tips
567
+ st.markdown("### 🔧 Troubleshooting Tips")
568
+ st.info("""
569
+ - Try reducing the number of segments
570
+ - Use a smaller image size
571
+ - Reduce inference steps
572
+ - Make sure your audio file is in a supported format
573
+ - Clear the cache and try again
574
+ """)
575
+
576
+ if __name__ == "__main__":
577
+ main()
image_generator.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  import torch
3
  import os
4
  import numpy as np
5
- from PIL import Image
6
  import time
7
  from concurrent.futures import ThreadPoolExecutor
8
  from functools import partial
@@ -114,6 +114,48 @@ class ImageGenerator:
114
  # Default to original size
115
  return base_size
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def generate_image(self, prompt, output_dir="temp"):
118
  """Generate a single image from a prompt"""
119
  # Ensure output directory exists
@@ -134,9 +176,12 @@ class ImageGenerator:
134
  # Resize to target size for consistency and performance
135
  if image.size != self.target_size:
136
  image = image.resize(self.target_size, Image.LANCZOS)
 
 
 
137
  else:
138
  # Fallback: Create a colored gradient image with text
139
- from PIL import Image, ImageDraw, ImageFont, ImageFilter
140
 
141
  # Create a base image with gradient background
142
  image = Image.new('RGB', self.target_size, color=(240, 240, 240))
@@ -242,6 +287,9 @@ class ImageGenerator:
242
  # Resize to target size
243
  img = img.resize(target_size, Image.LANCZOS)
244
 
 
 
 
245
  # Save optimized image
246
  img.save(image_path)
247
 
 
2
  import torch
3
  import os
4
  import numpy as np
5
+ from PIL import Image, ImageEnhance, ImageFilter
6
  import time
7
  from concurrent.futures import ThreadPoolExecutor
8
  from functools import partial
 
114
  # Default to original size
115
  return base_size
116
 
117
+ def apply_cinematic_effects(self, image):
118
+ """Apply cinematic effects to enhance the image quality"""
119
+ try:
120
+ # Enhance contrast slightly
121
+ enhancer = ImageEnhance.Contrast(image)
122
+ image = enhancer.enhance(1.2)
123
+
124
+ # Enhance color saturation slightly
125
+ enhancer = ImageEnhance.Color(image)
126
+ image = enhancer.enhance(1.1)
127
+
128
+ # Add subtle vignette effect
129
+ # Create a radial gradient mask
130
+ mask = Image.new('L', image.size, 255)
131
+ draw = ImageDraw.Draw(mask)
132
+
133
+ width, height = image.size
134
+ center_x, center_y = width // 2, height // 2
135
+ max_radius = min(width, height) // 2
136
+
137
+ for y in range(height):
138
+ for x in range(width):
139
+ # Calculate distance from center
140
+ distance = np.sqrt((x - center_x)**2 + (y - center_y)**2)
141
+ # Create vignette effect (darker at edges)
142
+ intensity = int(255 * (1 - 0.3 * (distance / max_radius)**2))
143
+ mask.putpixel((x, y), intensity)
144
+
145
+ # Apply the mask
146
+ image = Image.composite(image, Image.new('RGB', image.size, (0, 0, 0)), mask)
147
+
148
+ # Add subtle film grain
149
+ grain = Image.effect_noise((image.width, image.height), 10)
150
+ grain = grain.convert('L')
151
+ grain = grain.filter(ImageFilter.GaussianBlur(radius=1))
152
+ image = Image.blend(image, Image.composite(image, Image.new('RGB', image.size, (128, 128, 128)), grain), 0.05)
153
+
154
+ return image
155
+ except Exception as e:
156
+ # If effects fail, return original image
157
+ return image
158
+
159
  def generate_image(self, prompt, output_dir="temp"):
160
  """Generate a single image from a prompt"""
161
  # Ensure output directory exists
 
176
  # Resize to target size for consistency and performance
177
  if image.size != self.target_size:
178
  image = image.resize(self.target_size, Image.LANCZOS)
179
+
180
+ # Apply cinematic effects
181
+ image = self.apply_cinematic_effects(image)
182
  else:
183
  # Fallback: Create a colored gradient image with text
184
+ from PIL import Image, ImageDraw, ImageFilter
185
 
186
  # Create a base image with gradient background
187
  image = Image.new('RGB', self.target_size, color=(240, 240, 240))
 
287
  # Resize to target size
288
  img = img.resize(target_size, Image.LANCZOS)
289
 
290
+ # Apply cinematic effects
291
+ img = self.apply_cinematic_effects(img)
292
+
293
  # Save optimized image
294
  img.save(image_path)
295
 
prompt_generator.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import torch
3
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
 
5
  class PromptGenerator:
6
  def __init__(self):
@@ -36,11 +36,11 @@ class PromptGenerator:
36
 
37
  return self.model, self.tokenizer
38
 
39
- def generate_optimized_prompt(self, transcription):
40
- """Generate an optimized prompt from a single transcription"""
41
  # Check cache first
42
  import hashlib
43
- cache_key = hashlib.md5(transcription.encode()).hexdigest()
44
 
45
  if cache_key in self.prompt_cache:
46
  return self.prompt_cache[cache_key]
@@ -49,13 +49,91 @@ class PromptGenerator:
49
  if not transcription.strip():
50
  return ""
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  try:
53
  # Try to use the model if available
54
  model, tokenizer = self.load_model()
55
 
56
  if model is not None and tokenizer is not None:
57
  # Create a prompt template focused on visual elements
58
- template = f"Describe a visual scene for: '{transcription}'"
59
 
60
  # Tokenize
61
  inputs = tokenizer(template, return_tensors="pt")
@@ -74,24 +152,32 @@ class PromptGenerator:
74
  generated_text = generated_text.replace(template, "").strip()
75
 
76
  # Create an optimized prompt with style keywords
77
- prompt = f"{transcription} {generated_text}, detailed, vibrant, cinematic"
78
  else:
79
- # Fallback method using keywords
80
- # Extract key nouns and adjectives from transcription
81
- words = transcription.split()
82
- # Add visual keywords
83
- prompt = f"{transcription}, visual scene, detailed, vibrant, cinematic"
84
  except Exception as e:
85
  st.warning(f"Error generating prompt: {str(e)}. Using fallback method.")
86
  # Fallback to a simple prompt
87
- prompt = f"{transcription}, visual scene, detailed, vibrant, cinematic"
 
 
 
 
 
 
88
 
89
  # Cache the result
90
- self.prompt_cache[cache_key] = prompt
91
 
92
- return prompt
93
 
94
- def generate_prompts(self, text, num_segments=5):
 
 
 
 
 
95
  """Generate image prompts from the transcription"""
96
  # Split text into segments
97
  words = text.split()
@@ -109,22 +195,27 @@ class PromptGenerator:
109
  prompts = []
110
  for segment in segments:
111
  # Create an enhanced prompt
112
- enhanced_prompt = self.generate_optimized_prompt(segment)
113
  prompts.append(enhanced_prompt)
114
 
115
  return prompts, segments
116
 
117
- def generate_optimized_prompts(self, transcriptions, parallel=False, max_workers=4):
118
  """Generate optimized prompts from transcribed segments with parallel processing"""
119
  import concurrent.futures
120
 
121
  if parallel and len(transcriptions) > 1:
122
  # Process in parallel
123
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
124
- prompts = list(executor.map(self.generate_optimized_prompt, transcriptions))
 
 
 
 
 
125
  else:
126
  # Process sequentially
127
- prompts = [self.generate_optimized_prompt(trans) for trans in transcriptions]
128
 
129
  return prompts
130
 
 
1
  import streamlit as st
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  class PromptGenerator:
6
  def __init__(self):
 
36
 
37
  return self.model, self.tokenizer
38
 
39
+ def generate_hyper_realistic_prompt(self, transcription, aspect_ratio="16:9"):
40
+ """Generate a hyper-realistic prompt from a transcription with cinematic quality"""
41
  # Check cache first
42
  import hashlib
43
+ cache_key = hashlib.md5((transcription + aspect_ratio).encode()).hexdigest()
44
 
45
  if cache_key in self.prompt_cache:
46
  return self.prompt_cache[cache_key]
 
49
  if not transcription.strip():
50
  return ""
51
 
52
+ # Base prompt components
53
+ base_prompt = transcription.strip()
54
+
55
+ # Hyper-realism keywords
56
+ realism_keywords = [
57
+ "hyper realistic",
58
+ "photo realistic",
59
+ "ultra detailed",
60
+ "hyper detailed textures",
61
+ "8K resolution"
62
+ ]
63
+
64
+ # Lighting based on content analysis
65
+ lighting_options = {
66
+ "warm": ["golden hour glow", "warm sunlight", "sunset lighting", "soft warm glow"],
67
+ "dramatic": ["moody overcast", "dramatic lighting", "high contrast", "film noir shadows"],
68
+ "historical": ["candle light", "gas lamps", "torch glow", "lantern light", "flickering light"],
69
+ "modern": ["harsh industrial lighting", "fluorescent lighting", "neon glow", "studio lighting"]
70
+ }
71
+
72
+ # Camera effects
73
+ camera_effects = [
74
+ "shallow depth of field",
75
+ "film grain",
76
+ "cinematic composition"
77
+ ]
78
+
79
+ # Environmental details
80
+ environmental_details = [
81
+ "atmospheric",
82
+ "detailed environment",
83
+ "realistic textures",
84
+ "natural lighting"
85
+ ]
86
+
87
+ # Material details
88
+ material_details = [
89
+ "detailed materials",
90
+ "realistic textures",
91
+ "natural wear and tear"
92
+ ]
93
+
94
+ # Analyze transcription to determine appropriate lighting and mood
95
+ lower_trans = transcription.lower()
96
+
97
+ # Select lighting based on content
98
+ selected_lighting = []
99
+ if any(word in lower_trans for word in ["sunset", "warm", "evening", "afternoon", "golden"]):
100
+ selected_lighting = lighting_options["warm"]
101
+ elif any(word in lower_trans for word in ["dramatic", "dark", "night", "shadow", "mystery", "tension"]):
102
+ selected_lighting = lighting_options["dramatic"]
103
+ elif any(word in lower_trans for word in ["history", "ancient", "medieval", "old", "traditional", "past"]):
104
+ selected_lighting = lighting_options["historical"]
105
+ else:
106
+ selected_lighting = lighting_options["modern"]
107
+
108
+ # Select a random lighting keyword from the chosen category
109
+ import random
110
+ lighting_keyword = random.choice(selected_lighting)
111
+
112
+ # Select a random camera effect
113
+ camera_effect = random.choice(camera_effects)
114
+
115
+ # Select environmental details based on aspect ratio
116
+ if aspect_ratio == "16:9":
117
+ # For landscape, emphasize wide environmental shots
118
+ environmental_keyword = "wide angle " + random.choice(environmental_details)
119
+ elif aspect_ratio == "9:16":
120
+ # For portrait, emphasize vertical composition
121
+ environmental_keyword = "vertical composition " + random.choice(environmental_details)
122
+ else:
123
+ # For square, balanced composition
124
+ environmental_keyword = "balanced composition " + random.choice(environmental_details)
125
+
126
+ # Material detail
127
+ material_keyword = random.choice(material_details)
128
+
129
+ # Construct the enhanced prompt
130
  try:
131
  # Try to use the model if available
132
  model, tokenizer = self.load_model()
133
 
134
  if model is not None and tokenizer is not None:
135
  # Create a prompt template focused on visual elements
136
+ template = f"Create a hyper-realistic visual scene for: '{base_prompt}'"
137
 
138
  # Tokenize
139
  inputs = tokenizer(template, return_tensors="pt")
 
152
  generated_text = generated_text.replace(template, "").strip()
153
 
154
  # Create an optimized prompt with style keywords
155
+ scene_description = f"{base_prompt} {generated_text}"
156
  else:
157
+ # Fallback method using the base prompt
158
+ scene_description = base_prompt
 
 
 
159
  except Exception as e:
160
  st.warning(f"Error generating prompt: {str(e)}. Using fallback method.")
161
  # Fallback to a simple prompt
162
+ scene_description = base_prompt
163
+
164
+ # Combine all elements into a hyper-realistic prompt
165
+ realism_part = ", ".join(random.sample(realism_keywords, 3)) # Select 3 random realism keywords
166
+
167
+ # Final prompt construction with all elements
168
+ enhanced_prompt = f"{scene_description}, {realism_part}, {lighting_keyword}, {camera_effect}, {environmental_keyword}, {material_keyword}"
169
 
170
  # Cache the result
171
+ self.prompt_cache[cache_key] = enhanced_prompt
172
 
173
+ return enhanced_prompt
174
 
175
+ def generate_optimized_prompt(self, transcription, aspect_ratio="16:9"):
176
+ """Generate an optimized prompt from a single transcription"""
177
+ # This is now a wrapper for the hyper-realistic prompt generator
178
+ return self.generate_hyper_realistic_prompt(transcription, aspect_ratio)
179
+
180
+ def generate_prompts(self, text, num_segments=5, aspect_ratio="16:9"):
181
  """Generate image prompts from the transcription"""
182
  # Split text into segments
183
  words = text.split()
 
195
  prompts = []
196
  for segment in segments:
197
  # Create an enhanced prompt
198
+ enhanced_prompt = self.generate_hyper_realistic_prompt(segment, aspect_ratio)
199
  prompts.append(enhanced_prompt)
200
 
201
  return prompts, segments
202
 
203
+ def generate_optimized_prompts(self, transcriptions, parallel=False, max_workers=4, aspect_ratio="16:9"):
204
  """Generate optimized prompts from transcribed segments with parallel processing"""
205
  import concurrent.futures
206
 
207
  if parallel and len(transcriptions) > 1:
208
  # Process in parallel
209
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
210
+ # Create a function that includes aspect ratio
211
+ def generate_with_aspect(trans):
212
+ return self.generate_hyper_realistic_prompt(trans, aspect_ratio)
213
+
214
+ # Map with the new function
215
+ prompts = list(executor.map(generate_with_aspect, transcriptions))
216
  else:
217
  # Process sequentially
218
+ prompts = [self.generate_hyper_realistic_prompt(trans, aspect_ratio) for trans in transcriptions]
219
 
220
  return prompts
221
 
requirements.txt CHANGED
@@ -1,13 +1,16 @@
1
- streamlit
2
- transformers
3
- torch --extra-index-url https://download.pytorch.org/whl/cpu
4
- torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
5
- diffusers
6
- accelerate
7
  moviepy==1.0.3
8
- librosa
9
- soundfile
10
- numpy
11
- pillow
12
- scipy
13
- matplotlib
 
 
 
 
1
+ streamlit==1.25.0
2
+ transformers==4.30.2
3
+ torch==2.0.1
4
+ torchaudio==2.0.2
5
+ diffusers==0.19.3
6
+ accelerate==0.21.0
7
  moviepy==1.0.3
8
+ pillow==9.5.0
9
+ numpy==1.24.3
10
+ scipy==1.10.1
11
+ matplotlib==3.7.2
12
+ librosa==0.10.0.post2
13
+ soundfile==0.12.1
14
+ huggingface-hub==0.16.4
15
+ ftfy==6.1.1
16
+ regex==2023.6.3
transcriber.py CHANGED
@@ -1,109 +1,65 @@
1
  import streamlit as st
2
- import torch
3
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
4
- import librosa
5
  import numpy as np
 
 
6
  import tempfile
7
  import os
8
  from concurrent.futures import ThreadPoolExecutor
 
9
 
10
  class AudioTranscriber:
11
  def __init__(self):
12
  self.model = None
13
  self.processor = None
14
- self.pipe = None
15
  self.transcription_cache = {}
16
 
17
  def load_model(self):
18
  """Load a lightweight transcription model"""
19
- if self.pipe is None:
20
- with st.spinner("Loading transcription model... This may take a moment."):
21
- # Use the small Whisper model to save resources
22
- model_id = "openai/whisper-small"
23
-
24
- # Use CPU for inference to save memory
25
- device = "cpu"
26
- torch_dtype = torch.float32
27
-
28
- # Load model with memory optimization settings
29
- self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
30
- model_id,
31
- torch_dtype=torch_dtype,
32
- low_cpu_mem_usage=True,
33
- use_safetensors=True
34
- )
35
- self.processor = AutoProcessor.from_pretrained(model_id)
36
-
37
- # Create pipeline for efficient processing
38
- self.pipe = pipeline(
39
- "automatic-speech-recognition",
40
- model=self.model,
41
- tokenizer=self.processor.tokenizer,
42
- feature_extractor=self.processor.feature_extractor,
43
- max_new_tokens=128,
44
- chunk_length_s=30,
45
- batch_size=16,
46
- device=device,
47
- )
48
- return self.pipe
49
-
50
- def transcribe(self, audio_file):
51
- """Transcribe the audio file using the loaded model"""
52
- # Generate a cache key based on the audio file
53
- import hashlib
54
- cache_key = hashlib.md5(audio_file.getvalue()).hexdigest()
55
-
56
- # Check if result is in cache
57
- if cache_key in self.transcription_cache:
58
- return self.transcription_cache[cache_key]
59
 
60
- # Load the model if not already loaded
61
- pipe = self.load_model()
62
-
63
- # Save the uploaded file to a temporary location
64
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
65
- tmp_file.write(audio_file.getvalue())
66
- tmp_path = tmp_file.name
67
-
68
- try:
69
- # Load audio using librosa for processing
70
- y, sr = librosa.load(tmp_path, sr=16000)
71
-
72
- # Process in smaller chunks for memory efficiency
73
- result = pipe(y)
74
- transcription = result["text"]
75
-
76
- # Cache the result
77
- self.transcription_cache[cache_key] = transcription
78
-
79
- return transcription
80
- finally:
81
- # Clean up temporary file
82
- if os.path.exists(tmp_path):
83
- os.unlink(tmp_path)
84
 
85
- def segment_audio(self, audio_file, num_segments=5):
86
  """Segment the audio file into chunks for processing"""
87
- # Save the uploaded file to a temporary location
88
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
89
  tmp_file.write(audio_file.getvalue())
90
- tmp_path = tmp_file.name
91
 
92
  try:
93
- # Load audio using librosa
94
- y, sr = librosa.load(tmp_path, sr=16000)
95
 
96
  # Get total duration
97
  duration = librosa.get_duration(y=y, sr=sr)
98
 
 
 
 
 
 
99
  # Calculate segment duration
100
- segment_duration = duration / num_segments
101
 
102
  # Create segments
103
  segments = []
104
  timestamps = []
105
 
106
- for i in range(num_segments):
107
  start_time = i * segment_duration
108
  end_time = min((i + 1) * segment_duration, duration)
109
 
@@ -117,32 +73,112 @@ class AudioTranscriber:
117
  timestamps.append((start_time, end_time))
118
 
119
  return segments, timestamps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  finally:
121
  # Clean up temporary file
122
- if os.path.exists(tmp_path):
123
- os.unlink(tmp_path)
 
 
 
124
 
125
- def transcribe_segment(self, segment):
126
  """Transcribe a single audio segment"""
127
- pipe = self.load_model()
128
- result = pipe(segment)
129
- return result["text"]
130
-
131
- def transcribe_segments(self, segments, parallel=False, max_workers=4):
132
- """Transcribe individual audio segments with optional parallel processing"""
133
- pipe = self.load_model()
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  if parallel and len(segments) > 1:
136
  # Process in parallel using ThreadPoolExecutor
137
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
138
- # Process segments in parallel
139
- transcriptions = list(executor.map(self.transcribe_segment, segments))
 
 
 
140
  else:
141
  # Process sequentially
142
  transcriptions = []
143
  for segment in segments:
144
- result = pipe(segment)
145
- transcriptions.append(result["text"])
146
 
147
  return transcriptions
148
 
 
1
  import streamlit as st
 
 
 
2
  import numpy as np
3
+ import soundfile as sf
4
+ import librosa
5
  import tempfile
6
  import os
7
  from concurrent.futures import ThreadPoolExecutor
8
+ from functools import partial
9
 
10
  class AudioTranscriber:
11
  def __init__(self):
12
  self.model = None
13
  self.processor = None
 
14
  self.transcription_cache = {}
15
 
16
  def load_model(self):
17
  """Load a lightweight transcription model"""
18
+ if self.model is None:
19
+ with st.spinner("Loading transcription model..."):
20
+ try:
21
+ from transformers import pipeline
22
+
23
+ # Use a small model for transcription to save memory
24
+ self.model = pipeline(
25
+ "automatic-speech-recognition",
26
+ model="openai/whisper-small",
27
+ chunk_length_s=30,
28
+ device="cpu"
29
+ )
30
+ except Exception as e:
31
+ st.warning(f"Error loading transcription model: {str(e)}. Using fallback method.")
32
+ self.model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ return self.model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ def segment_audio(self, audio_file, num_segments=5, min_segment_duration=3.0):
37
  """Segment the audio file into chunks for processing"""
38
+ # Save the uploaded audio to a temporary file
39
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
40
  tmp_file.write(audio_file.getvalue())
41
+ audio_path = tmp_file.name
42
 
43
  try:
44
+ # Load the audio file
45
+ y, sr = librosa.load(audio_path, sr=None)
46
 
47
  # Get total duration
48
  duration = librosa.get_duration(y=y, sr=sr)
49
 
50
+ # Ensure we don't create segments that are too short
51
+ actual_segments = min(num_segments, int(duration / min_segment_duration))
52
+ if actual_segments < 1:
53
+ actual_segments = 1
54
+
55
  # Calculate segment duration
56
+ segment_duration = duration / actual_segments
57
 
58
  # Create segments
59
  segments = []
60
  timestamps = []
61
 
62
+ for i in range(actual_segments):
63
  start_time = i * segment_duration
64
  end_time = min((i + 1) * segment_duration, duration)
65
 
 
73
  timestamps.append((start_time, end_time))
74
 
75
  return segments, timestamps
76
+
77
+ except Exception as e:
78
+ st.warning(f"Error segmenting audio: {str(e)}. Using simplified segmentation.")
79
+
80
+ # Fallback: Create equal segments
81
+ try:
82
+ y, sr = sf.read(audio_path)
83
+ duration = len(y) / sr
84
+
85
+ # Ensure we don't create segments that are too short
86
+ actual_segments = min(num_segments, int(duration / min_segment_duration))
87
+ if actual_segments < 1:
88
+ actual_segments = 1
89
+
90
+ # Calculate segment duration
91
+ segment_duration = duration / actual_segments
92
+
93
+ # Create segments
94
+ segments = []
95
+ timestamps = []
96
+
97
+ for i in range(actual_segments):
98
+ start_time = i * segment_duration
99
+ end_time = min((i + 1) * segment_duration, duration)
100
+
101
+ # Convert time to samples
102
+ start_sample = int(start_time * sr)
103
+ end_sample = int(end_time * sr)
104
+
105
+ # Extract segment
106
+ segment = y[start_sample:end_sample]
107
+ segments.append(segment)
108
+ timestamps.append((start_time, end_time))
109
+
110
+ return segments, timestamps
111
+
112
+ except Exception as inner_e:
113
+ st.error(f"Critical error in audio segmentation: {str(inner_e)}")
114
+ # Last resort: Create dummy segments
115
+ segments = [np.zeros(16000) for _ in range(num_segments)] # 1-second silent segments
116
+ timestamps = [(i, i+1) for i in range(num_segments)]
117
+ return segments, timestamps
118
  finally:
119
  # Clean up temporary file
120
+ if os.path.exists(audio_path):
121
+ try:
122
+ os.unlink(audio_path)
123
+ except:
124
+ pass
125
 
126
+ def transcribe_segment(self, segment, sr=16000):
127
  """Transcribe a single audio segment"""
128
+ # Generate a cache key based on the audio data
129
+ import hashlib
130
+ cache_key = hashlib.md5(segment.tobytes()).hexdigest()
131
+
132
+ # Check if result is in cache
133
+ if cache_key in self.transcription_cache:
134
+ return self.transcription_cache[cache_key]
135
 
136
+ try:
137
+ # Load the model if not already loaded
138
+ model = self.load_model()
139
+
140
+ if model is not None:
141
+ # Save segment to a temporary file
142
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
143
+ sf.write(tmp_file.name, segment, sr)
144
+ segment_path = tmp_file.name
145
+
146
+ # Transcribe using the model
147
+ result = model(segment_path)
148
+ transcription = result["text"]
149
+
150
+ # Clean up temporary file
151
+ if os.path.exists(segment_path):
152
+ os.unlink(segment_path)
153
+ else:
154
+ # Fallback: Return empty string or placeholder
155
+ transcription = "Audio content"
156
+ except Exception as e:
157
+ st.warning(f"Error transcribing segment: {str(e)}. Using fallback method.")
158
+ # Fallback: Return empty string or placeholder
159
+ transcription = "Audio content"
160
+
161
+ # Cache the result
162
+ self.transcription_cache[cache_key] = transcription
163
+
164
+ return transcription
165
+
166
+ def transcribe_segments(self, segments, sr=16000, parallel=False, max_workers=4):
167
+ """Transcribe multiple audio segments with parallel processing"""
168
  if parallel and len(segments) > 1:
169
  # Process in parallel using ThreadPoolExecutor
170
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
171
+ # Create a partial function with fixed sample rate
172
+ transcribe_func = partial(self.transcribe_segment, sr=sr)
173
+
174
+ # Map and collect results
175
+ transcriptions = list(executor.map(transcribe_func, segments))
176
  else:
177
  # Process sequentially
178
  transcriptions = []
179
  for segment in segments:
180
+ transcription = self.transcribe_segment(segment, sr)
181
+ transcriptions.append(transcription)
182
 
183
  return transcriptions
184
 
video_creator.py CHANGED
@@ -11,41 +11,124 @@ class VideoCreator:
11
  # Ensure output directory exists
12
  os.makedirs("outputs", exist_ok=True)
13
  self.video_cache = {}
 
14
 
15
- def create_segment_clip(self, frames, segment_duration, segment_text=None):
16
- """Create a video clip from frames with optional text overlay"""
17
- # Calculate frame duration based on segment duration
18
- frame_duration = segment_duration / len(frames)
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- # Create a clip from the frames
21
- segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
22
 
23
- # Add text overlay if segment text is provided
24
- if segment_text:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
- txt_clip = TextClip(
27
- segment_text,
28
- fontsize=24,
29
- color='white',
30
- bg_color='rgba(0,0,0,0.5)',
31
- size=(segment_clip.w, None),
32
- method='caption'
33
- ).set_duration(segment_clip.duration)
34
-
35
- txt_clip = txt_clip.set_position(('center', 'bottom'))
36
- segment_clip = CompositeVideoClip([segment_clip, txt_clip])
37
- except Exception as e:
38
- # If TextClip fails, continue without text overlay
39
- st.warning(f"Could not add text overlay: {e}")
40
-
41
- return segment_clip
 
 
42
 
43
  def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None,
44
  output_dir="outputs", parallel=False, max_workers=4):
45
  """Create a video from animated frames synchronized with audio using parallel processing"""
46
  # Generate a cache key based on inputs
47
  import hashlib
48
- cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}"
49
 
50
  # Check if result is in cache
51
  if cache_key in self.video_cache:
@@ -72,83 +155,156 @@ class VideoCreator:
72
  # Create video clips for each animated segment
73
  video_clips = []
74
 
75
- if parallel and len(animated_frames) > 1:
76
- # Process segments in parallel
77
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
78
- # Prepare arguments for parallel processing
79
- args = []
 
 
 
 
 
 
 
 
 
 
80
  for i, frames in enumerate(animated_frames):
81
  segment_duration = segment_durations[min(i, len(segment_durations)-1)]
82
  segment_text = segments[i] if segments and i < len(segments) else None
83
- args.append((frames, segment_duration, segment_text))
84
-
85
- # Process in parallel
86
- video_clips = list(executor.map(lambda x: self.create_segment_clip(*x), args))
87
- else:
88
- # Process segments sequentially
89
- for i, frames in enumerate(animated_frames):
 
 
90
  segment_duration = segment_durations[min(i, len(segment_durations)-1)]
91
- segment_text = segments[i] if segments and i < len(segments) else None
92
-
93
- segment_clip = self.create_segment_clip(frames, segment_duration, segment_text)
94
- video_clips.append(segment_clip)
95
 
96
  # Concatenate all clips
97
- final_clip = concatenate_videoclips(video_clips)
98
-
99
- # Set the audio
100
- final_clip = final_clip.set_audio(audio_clip)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  # Write the result to a file
103
- output_path = f"{output_dir}/output_video_{int(time.time())}.mp4"
104
-
105
- # Use lower resolution and bitrate for faster processing
106
- final_clip.write_videofile(
107
- output_path,
108
- fps=24,
109
- codec='libx264',
110
- audio_codec='aac',
111
- preset='ultrafast', # Faster encoding
112
- threads=max_workers, # Use multiple threads for encoding
113
- bitrate='1000k' # Lower bitrate
114
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  # Cache the result
117
  self.video_cache[cache_key] = output_path
118
 
119
  return output_path
120
 
 
 
 
 
 
 
 
121
  finally:
122
  # Clean up temporary file
123
  if os.path.exists(audio_path):
124
- os.unlink(audio_path)
 
 
 
125
 
126
- def optimize_video(self, video_path, target_size=(640, 480), bitrate='1000k', threads=2):
127
  """Optimize video size and quality for web delivery"""
128
- from moviepy.editor import VideoFileClip
129
-
130
- # Load the video
131
- clip = VideoFileClip(video_path)
132
-
133
- # Resize to target size
134
- clip_resized = clip.resize(target_size)
135
-
136
- # Save optimized video
137
- optimized_path = video_path.replace('.mp4', f'_optimized_{int(time.time())}.mp4')
138
- clip_resized.write_videofile(
139
- optimized_path,
140
- codec='libx264',
141
- audio_codec='aac',
142
- preset='ultrafast',
143
- threads=threads,
144
- bitrate=bitrate
145
- )
146
-
147
- # Close clips to free memory
148
- clip.close()
149
- clip_resized.close()
150
-
151
- return optimized_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  def clear_cache(self):
154
  """Clear the video cache"""
 
11
  # Ensure output directory exists
12
  os.makedirs("outputs", exist_ok=True)
13
  self.video_cache = {}
14
+ self.aspect_ratio = "1:1" # Default aspect ratio
15
 
16
+ def set_aspect_ratio(self, aspect_ratio):
17
+ """Set the aspect ratio for video creation"""
18
+ self.aspect_ratio = aspect_ratio
19
+
20
+ def get_video_dimensions(self, base_size=None):
21
+ """Get video dimensions based on aspect ratio"""
22
+ if base_size is None:
23
+ # Default base sizes for different aspect ratios
24
+ if self.aspect_ratio == "1:1":
25
+ return (640, 640) # Square
26
+ elif self.aspect_ratio == "16:9":
27
+ return (854, 480) # Landscape HD
28
+ elif self.aspect_ratio == "9:16":
29
+ return (480, 854) # Portrait (mobile)
30
+ else:
31
+ return (640, 640) # Default square
32
 
33
+ # Calculate dimensions based on base size and aspect ratio
34
+ base_pixels = base_size[0] * base_size[1]
35
 
36
+ if self.aspect_ratio == "1:1":
37
+ # Square format
38
+ side = int(np.sqrt(base_pixels))
39
+ # Ensure even dimensions for video compatibility
40
+ side = side if side % 2 == 0 else side + 1
41
+ return (side, side)
42
+ elif self.aspect_ratio == "16:9":
43
+ # Landscape format
44
+ width = int(np.sqrt(base_pixels * 16 / 9))
45
+ height = int(width * 9 / 16)
46
+ # Ensure even dimensions for video compatibility
47
+ width = width if width % 2 == 0 else width + 1
48
+ height = height if height % 2 == 0 else height + 1
49
+ return (width, height)
50
+ elif self.aspect_ratio == "9:16":
51
+ # Portrait format
52
+ height = int(np.sqrt(base_pixels * 16 / 9))
53
+ width = int(height * 9 / 16)
54
+ # Ensure even dimensions for video compatibility
55
+ width = width if width % 2 == 0 else width + 1
56
+ height = height if height % 2 == 0 else height + 1
57
+ return (width, height)
58
+ else:
59
+ # Default to original size
60
+ return base_size
61
+
62
+ def create_segment_clip(self, frames, segment_duration, segment_text=None):
63
+ """Create a video clip from frames with optional text overlay"""
64
+ try:
65
+ # Calculate frame duration based on segment duration
66
+ frame_duration = segment_duration / len(frames)
67
+
68
+ # Create a clip from the frames
69
+ segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
70
+
71
+ # Add text overlay if segment text is provided
72
+ if segment_text:
73
+ try:
74
+ # Adjust text size and position based on aspect ratio
75
+ fontsize = 24
76
+ position = ('center', 'bottom')
77
+
78
+ if self.aspect_ratio == "9:16":
79
+ # For portrait, make text smaller and position it lower
80
+ fontsize = 20
81
+ position = ('center', 0.9) # 90% from top
82
+ elif self.aspect_ratio == "16:9":
83
+ # For landscape, position text at bottom
84
+ position = ('center', 0.95) # 95% from top
85
+
86
+ txt_clip = TextClip(
87
+ segment_text,
88
+ fontsize=fontsize,
89
+ color='white',
90
+ bg_color='rgba(0,0,0,0.5)',
91
+ size=(segment_clip.w, None),
92
+ method='caption'
93
+ ).set_duration(segment_clip.duration)
94
+
95
+ txt_clip = txt_clip.set_position(position)
96
+ segment_clip = CompositeVideoClip([segment_clip, txt_clip])
97
+ except Exception as e:
98
+ # If TextClip fails, continue without text overlay
99
+ st.warning(f"Could not add text overlay: {str(e)}")
100
+
101
+ return segment_clip
102
+ except Exception as e:
103
+ st.warning(f"Error creating segment clip: {str(e)}. Using fallback method.")
104
+
105
+ # Fallback: Create a simple clip with the first frame
106
  try:
107
+ # Use just the first frame if there's an issue with the sequence
108
+ first_frame = frames[0] if frames else None
109
+ if first_frame and os.path.exists(first_frame):
110
+ segment_clip = ImageSequenceClip([first_frame], durations=[segment_duration])
111
+ return segment_clip
112
+ else:
113
+ # Create a blank clip if no frames are available
114
+ from PIL import Image
115
+ blank_img = Image.new('RGB', self.get_video_dimensions(), color=(0, 0, 0))
116
+ blank_path = tempfile.mktemp(suffix='.png')
117
+ blank_img.save(blank_path)
118
+ segment_clip = ImageSequenceClip([blank_path], durations=[segment_duration])
119
+ return segment_clip
120
+ except Exception as inner_e:
121
+ st.error(f"Critical error in fallback clip creation: {str(inner_e)}")
122
+ # Last resort: Create an extremely simple clip
123
+ from moviepy.editor import ColorClip
124
+ return ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=segment_duration)
125
 
126
  def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None,
127
  output_dir="outputs", parallel=False, max_workers=4):
128
  """Create a video from animated frames synchronized with audio using parallel processing"""
129
  # Generate a cache key based on inputs
130
  import hashlib
131
+ cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}_{self.aspect_ratio}"
132
 
133
  # Check if result is in cache
134
  if cache_key in self.video_cache:
 
155
  # Create video clips for each animated segment
156
  video_clips = []
157
 
158
+ try:
159
+ if parallel and len(animated_frames) > 1:
160
+ # Process segments in parallel
161
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
162
+ # Prepare arguments for parallel processing
163
+ args = []
164
+ for i, frames in enumerate(animated_frames):
165
+ segment_duration = segment_durations[min(i, len(segment_durations)-1)]
166
+ segment_text = segments[i] if segments and i < len(segments) else None
167
+ args.append((frames, segment_duration, segment_text))
168
+
169
+ # Process in parallel
170
+ video_clips = list(executor.map(lambda x: self.create_segment_clip(*x), args))
171
+ else:
172
+ # Process segments sequentially
173
  for i, frames in enumerate(animated_frames):
174
  segment_duration = segment_durations[min(i, len(segment_durations)-1)]
175
  segment_text = segments[i] if segments and i < len(segments) else None
176
+
177
+ segment_clip = self.create_segment_clip(frames, segment_duration, segment_text)
178
+ video_clips.append(segment_clip)
179
+ except Exception as e:
180
+ st.warning(f"Error processing video segments: {str(e)}. Using fallback method.")
181
+
182
+ # Fallback: Create a simple clip for each segment
183
+ video_clips = []
184
+ for i, _ in enumerate(animated_frames):
185
  segment_duration = segment_durations[min(i, len(segment_durations)-1)]
186
+ from moviepy.editor import ColorClip
187
+ clip = ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=segment_duration)
188
+ video_clips.append(clip)
 
189
 
190
  # Concatenate all clips
191
+ try:
192
+ final_clip = concatenate_videoclips(video_clips)
193
+
194
+ # Set the audio
195
+ final_clip = final_clip.set_audio(audio_clip)
196
+
197
+ # Get target dimensions based on aspect ratio
198
+ target_dimensions = self.get_video_dimensions()
199
+
200
+ # Resize the final clip to match the target dimensions
201
+ final_clip = final_clip.resize(target_dimensions)
202
+ except Exception as e:
203
+ st.warning(f"Error creating final video: {str(e)}. Using fallback method.")
204
+
205
+ # Fallback: Create a simple video with the audio
206
+ from moviepy.editor import ColorClip
207
+ final_clip = ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=total_duration)
208
+ final_clip = final_clip.set_audio(audio_clip)
209
 
210
  # Write the result to a file
211
+ output_path = f"{output_dir}/output_video_{self.aspect_ratio.replace(':', '_')}_{int(time.time())}.mp4"
212
+
213
+ try:
214
+ # Use lower resolution and bitrate for faster processing
215
+ final_clip.write_videofile(
216
+ output_path,
217
+ fps=24,
218
+ codec='libx264',
219
+ audio_codec='aac',
220
+ preset='ultrafast', # Faster encoding
221
+ threads=max_workers, # Use multiple threads for encoding
222
+ bitrate='1000k' # Lower bitrate
223
+ )
224
+ except Exception as e:
225
+ st.warning(f"Error writing video file: {str(e)}. Trying with simpler settings.")
226
+
227
+ # Try with even simpler settings
228
+ try:
229
+ final_clip.write_videofile(
230
+ output_path,
231
+ fps=15, # Lower fps
232
+ codec='libx264',
233
+ audio_codec='aac',
234
+ preset='ultrafast',
235
+ threads=2, # Fewer threads
236
+ bitrate='800k' # Lower bitrate
237
+ )
238
+ except Exception as inner_e:
239
+ st.error(f"Critical error writing video: {str(inner_e)}")
240
+ # Create a text file explaining the error
241
+ error_path = f"{output_dir}/error_video_{int(time.time())}.txt"
242
+ with open(error_path, 'w') as f:
243
+ f.write(f"Error creating video: {str(e)}\nSecondary error: {str(inner_e)}")
244
+ return error_path
245
 
246
  # Cache the result
247
  self.video_cache[cache_key] = output_path
248
 
249
  return output_path
250
 
251
+ except Exception as e:
252
+ st.error(f"Critical error in video creation: {str(e)}")
253
+ # Create a text file explaining the error
254
+ error_path = f"{output_dir}/error_video_{int(time.time())}.txt"
255
+ with open(error_path, 'w') as f:
256
+ f.write(f"Error creating video: {str(e)}")
257
+ return error_path
258
  finally:
259
  # Clean up temporary file
260
  if os.path.exists(audio_path):
261
+ try:
262
+ os.unlink(audio_path)
263
+ except:
264
+ pass
265
 
266
+ def optimize_video(self, video_path, target_size=None, bitrate='1000k', threads=2):
267
  """Optimize video size and quality for web delivery"""
268
+ if not os.path.exists(video_path) or video_path.endswith('.txt'):
269
+ return video_path # Return as is if it's an error file or doesn't exist
270
+
271
+ try:
272
+ from moviepy.editor import VideoFileClip
273
+
274
+ # Load the video
275
+ clip = VideoFileClip(video_path)
276
+
277
+ # If target_size is not provided, use aspect ratio-based dimensions
278
+ if target_size is None:
279
+ target_size = self.get_video_dimensions()
280
+
281
+ # Resize to target size
282
+ clip_resized = clip.resize(target_size)
283
+
284
+ # Save optimized video
285
+ optimized_path = video_path.replace('.mp4', f'_optimized_{int(time.time())}.mp4')
286
+
287
+ try:
288
+ clip_resized.write_videofile(
289
+ optimized_path,
290
+ codec='libx264',
291
+ audio_codec='aac',
292
+ preset='ultrafast',
293
+ threads=threads,
294
+ bitrate=bitrate
295
+ )
296
+ except Exception as e:
297
+ st.warning(f"Error optimizing video: {str(e)}. Using original video.")
298
+ optimized_path = video_path
299
+
300
+ # Close clips to free memory
301
+ clip.close()
302
+ clip_resized.close()
303
+
304
+ return optimized_path
305
+ except Exception as e:
306
+ st.warning(f"Error in video optimization: {str(e)}. Using original video.")
307
+ return video_path
308
 
309
  def clear_cache(self):
310
  """Clear the video cache"""