Spaces:

garyuzair
/

Video-Fx

Running

App Files Files Community

garyuzair commited on Mar 13

Commit

a87d440

verified ·

1 Parent(s): adccf8a

Upload 7 files

Browse files

Files changed (7) hide show

animator.py +176 -17
app.py +200 -4
image_generator.py +50 -2
prompt_generator.py +110 -19
requirements.txt +15 -12
transcriber.py +125 -89
video_creator.py +239 -83

animator.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 import os
 import numpy as np
-from PIL import Image
 import time
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
@@ -9,11 +9,66 @@ from functools import partial
 class Animator:
     def __init__(self):
         self.frame_cache = {}
     def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
-        """Add a simple zoom animation to an image"""
         # Check cache first
-        cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}"
         if cache_key in self.frame_cache:
             return self.frame_cache[cache_key]
@@ -35,6 +90,9 @@ class Animator:
             top = (img.height - scaled_img.height) // 2
             new_img.paste(scaled_img, (left, top))
             # Save the frame
             frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{len(frames)}.png"
             new_img.save(frame_path)
@@ -45,9 +103,9 @@ class Animator:
         return frames
     def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
-        """Add a simple panning animation to an image"""
         # Check cache first
-        cache_key = f"pan_{image_path}_{num_frames}_{direction}"
         if cache_key in self.frame_cache:
             return self.frame_cache[cache_key]
@@ -60,22 +118,32 @@ class Animator:
         # Create a sequence of panned images
         frames = []
         # Calculate pan parameters
         if direction == "right":
-            x_shifts = np.linspace(0, img.width * 0.1, num_frames)
             y_shifts = np.zeros(num_frames)
         elif direction == "left":
-            x_shifts = np.linspace(0, -img.width * 0.1, num_frames)
             y_shifts = np.zeros(num_frames)
         elif direction == "down":
             x_shifts = np.zeros(num_frames)
-            y_shifts = np.linspace(0, img.height * 0.1, num_frames)
         elif direction == "up":
             x_shifts = np.zeros(num_frames)
-            y_shifts = np.linspace(0, -img.height * 0.1, num_frames)
         else:
             # Default to right
-            x_shifts = np.linspace(0, img.width * 0.1, num_frames)
             y_shifts = np.zeros(num_frames)
         for i in range(num_frames):
@@ -85,6 +153,9 @@ class Animator:
             # Paste the original image with shift
             new_img.paste(img, (int(x_shifts[i]), int(y_shifts[i])))
             # Save the frame
             frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
             new_img.save(frame_path)
@@ -95,9 +166,9 @@ class Animator:
         return frames
     def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
-        """Add a fade in/out animation to an image"""
         # Check cache first
-        cache_key = f"fade_{image_path}_{num_frames}_{fade_type}"
         if cache_key in self.frame_cache:
             return self.frame_cache[cache_key]
@@ -122,10 +193,86 @@ class Animator:
             # Create a new image with adjusted brightness
             enhancer = Image.new("RGBA", img.size, (0, 0, 0, 0))
             new_img = Image.blend(enhancer, img.convert("RGBA"), alpha)
             # Save the frame
             frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
-            new_img.convert("RGB").save(frame_path)
             frames.append(frame_path)
         # Cache the result
@@ -133,18 +280,30 @@ class Animator:
         return frames
     def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
-        """Animate a single image"""
         # Choose animation type
-        animation_types = ["zoom", "pan_right", "pan_left", "fade_in"]
         if animation_type == "random":
             # Use hash of image path to deterministically select animation type
-            chosen_type = animation_types[hash(img_path) % len(animation_types)]
         else:
             chosen_type = animation_type
         # Apply the chosen animation
-        if chosen_type.startswith("pan"):
             direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
             frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
         elif chosen_type.startswith("fade"):

 import streamlit as st
 import os
 import numpy as np
+from PIL import Image, ImageEnhance, ImageFilter, ImageDraw
 import time
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 class Animator:
     def __init__(self):
         self.frame_cache = {}
+        self.aspect_ratio = "1:1"  # Default aspect ratio
+    def set_aspect_ratio(self, aspect_ratio):
+        """Set the aspect ratio for animations"""
+        self.aspect_ratio = aspect_ratio
+    def apply_cinematic_effects(self, image):
+        """Apply cinematic effects to enhance the frame quality"""
+        try:
+            # Convert to PIL Image if it's a path
+            if isinstance(image, str):
+                img = Image.open(image)
+            else:
+                img = image
+            # Enhance contrast slightly
+            enhancer = ImageEnhance.Contrast(img)
+            img = enhancer.enhance(1.2)
+            # Enhance color saturation slightly
+            enhancer = ImageEnhance.Color(img)
+            img = enhancer.enhance(1.1)
+            # Add subtle vignette effect
+            # Create a radial gradient mask
+            mask = Image.new('L', img.size, 255)
+            draw = ImageDraw.Draw(mask)
+            width, height = img.size
+            center_x, center_y = width // 2, height // 2
+            max_radius = min(width, height) // 2
+            for y in range(height):
+                for x in range(width):
+                    # Calculate distance from center
+                    distance = np.sqrt((x - center_x)**2 + (y - center_y)**2)
+                    # Create vignette effect (darker at edges)
+                    intensity = int(255 * (1 - 0.3 * (distance / max_radius)**2))
+                    mask.putpixel((x, y), intensity)
+            # Apply the mask
+            img = Image.composite(img, Image.new('RGB', img.size, (0, 0, 0)), mask)
+            # Add subtle film grain
+            grain = Image.effect_noise((img.width, img.height), 10)
+            grain = grain.convert('L')
+            grain = grain.filter(ImageFilter.GaussianBlur(radius=1))
+            img = Image.blend(img, Image.composite(img, Image.new('RGB', img.size, (128, 128, 128)), grain), 0.05)
+            return img
+        except Exception as e:
+            # If effects fail, return original image
+            if isinstance(image, str):
+                return Image.open(image)
+            return image
     def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
+        """Add a simple zoom animation to an image with cinematic effects"""
         # Check cache first
+        cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}_{self.aspect_ratio}"
         if cache_key in self.frame_cache:
             return self.frame_cache[cache_key]
             top = (img.height - scaled_img.height) // 2
             new_img.paste(scaled_img, (left, top))
+            # Apply cinematic effects
+            new_img = self.apply_cinematic_effects(new_img)
             # Save the frame
             frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{len(frames)}.png"
             new_img.save(frame_path)
         return frames
     def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
+        """Add a simple panning animation to an image with cinematic effects"""
         # Check cache first
+        cache_key = f"pan_{image_path}_{num_frames}_{direction}_{self.aspect_ratio}"
         if cache_key in self.frame_cache:
             return self.frame_cache[cache_key]
         # Create a sequence of panned images
         frames = []
+        # Calculate pan parameters based on aspect ratio
+        # For portrait (9:16), horizontal panning should be more subtle
+        # For landscape (16:9), vertical panning should be more subtle
+        pan_factor = 0.1  # Default pan factor
+        if self.aspect_ratio == "9:16" and (direction == "left" or direction == "right"):
+            pan_factor = 0.05  # Reduce horizontal pan for portrait
+        elif self.aspect_ratio == "16:9" and (direction == "up" or direction == "down"):
+            pan_factor = 0.05  # Reduce vertical pan for landscape
         # Calculate pan parameters
         if direction == "right":
+            x_shifts = np.linspace(0, img.width * pan_factor, num_frames)
             y_shifts = np.zeros(num_frames)
         elif direction == "left":
+            x_shifts = np.linspace(0, -img.width * pan_factor, num_frames)
             y_shifts = np.zeros(num_frames)
         elif direction == "down":
             x_shifts = np.zeros(num_frames)
+            y_shifts = np.linspace(0, img.height * pan_factor, num_frames)
         elif direction == "up":
             x_shifts = np.zeros(num_frames)
+            y_shifts = np.linspace(0, -img.height * pan_factor, num_frames)
         else:
             # Default to right
+            x_shifts = np.linspace(0, img.width * pan_factor, num_frames)
             y_shifts = np.zeros(num_frames)
         for i in range(num_frames):
             # Paste the original image with shift
             new_img.paste(img, (int(x_shifts[i]), int(y_shifts[i])))
+            # Apply cinematic effects
+            new_img = self.apply_cinematic_effects(new_img)
             # Save the frame
             frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
             new_img.save(frame_path)
         return frames
     def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
+        """Add a fade in/out animation to an image with cinematic effects"""
         # Check cache first
+        cache_key = f"fade_{image_path}_{num_frames}_{fade_type}_{self.aspect_ratio}"
         if cache_key in self.frame_cache:
             return self.frame_cache[cache_key]
             # Create a new image with adjusted brightness
             enhancer = Image.new("RGBA", img.size, (0, 0, 0, 0))
             new_img = Image.blend(enhancer, img.convert("RGBA"), alpha)
+            new_img = new_img.convert("RGB")
+            # Apply cinematic effects
+            new_img = self.apply_cinematic_effects(new_img)
+            # Save the frame
+            frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
+            new_img.save(frame_path)
+            frames.append(frame_path)
+        # Cache the result
+        self.frame_cache[cache_key] = frames
+        return frames
+    def add_ken_burns_effect(self, image_path, num_frames=10, output_dir="temp"):
+        """Add a Ken Burns effect (combination of pan and zoom) with cinematic effects"""
+        # Check cache first
+        cache_key = f"kenburns_{image_path}_{num_frames}_{self.aspect_ratio}"
+        if cache_key in self.frame_cache:
+            return self.frame_cache[cache_key]
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        # Load the image
+        img = Image.open(image_path)
+        # Create a sequence of images with Ken Burns effect
+        frames = []
+        # Determine direction based on aspect ratio and image content
+        import random
+        if self.aspect_ratio == "16:9":
+            # For landscape, prefer horizontal movement
+            direction = random.choice(["right", "left"])
+        elif self.aspect_ratio == "9:16":
+            # For portrait, prefer vertical movement
+            direction = random.choice(["up", "down"])
+        else:
+            # For square, random direction
+            direction = random.choice(["right", "left", "up", "down"])
+        # Calculate pan parameters
+        if direction == "right":
+            x_shifts = np.linspace(0, img.width * 0.05, num_frames)
+            y_shifts = np.zeros(num_frames)
+        elif direction == "left":
+            x_shifts = np.linspace(0, -img.width * 0.05, num_frames)
+            y_shifts = np.zeros(num_frames)
+        elif direction == "down":
+            x_shifts = np.zeros(num_frames)
+            y_shifts = np.linspace(0, img.height * 0.05, num_frames)
+        elif direction == "up":
+            x_shifts = np.zeros(num_frames)
+            y_shifts = np.linspace(0, -img.height * 0.05, num_frames)
+        # Calculate zoom factors
+        zoom_factors = np.linspace(1.0, 1.05, num_frames)
+        for i in range(num_frames):
+            # Apply zoom
+            size = (int(img.width * zoom_factors[i]), int(img.height * zoom_factors[i]))
+            zoomed_img = img.resize(size, Image.LANCZOS)
+            # Create a new image with the same size as original
+            new_img = Image.new("RGB", (img.width, img.height))
+            # Calculate position with both zoom and pan
+            left = (img.width - zoomed_img.width) // 2 + int(x_shifts[i])
+            top = (img.height - zoomed_img.height) // 2 + int(y_shifts[i])
+            # Paste the zoomed image with shift
+            new_img.paste(zoomed_img, (left, top))
+            # Apply cinematic effects
+            new_img = self.apply_cinematic_effects(new_img)
             # Save the frame
             frame_path = f"{output_dir}/frame_{os.path.basename(image_path)}_{i}.png"
+            new_img.save(frame_path)
             frames.append(frame_path)
         # Cache the result
         return frames
     def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
+        """Animate a single image with cinematic effects"""
         # Choose animation type
+        animation_types = ["zoom", "pan_right", "pan_left", "fade_in", "ken_burns"]
+        # For different aspect ratios, prioritize certain animations
+        if self.aspect_ratio == "16:9":
+            # For landscape, prioritize horizontal panning
+            animation_types = ["zoom", "pan_left", "pan_right", "ken_burns", "fade_in"]
+        elif self.aspect_ratio == "9:16":
+            # For portrait, prioritize vertical panning
+            animation_types = ["zoom", "ken_burns", "fade_in", "pan_up", "pan_down"]
         if animation_type == "random":
             # Use hash of image path to deterministically select animation type
+            import random
+            random.seed(hash(img_path))
+            chosen_type = random.choice(animation_types)
         else:
             chosen_type = animation_type
         # Apply the chosen animation
+        if chosen_type == "ken_burns":
+            frames = self.add_ken_burns_effect(img_path, output_dir=output_dir)
+        elif chosen_type.startswith("pan"):
             direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
             frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
         elif chosen_type.startswith("fade"):

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import concurrent.futures
 from functools import partial
 import torch
 import hashlib
 from transcriber import AudioTranscriber
 from prompt_generator import PromptGenerator
@@ -116,10 +117,10 @@ def process_audio_segment(segment, transcriber):
         st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
         return ""
-def generate_prompt_for_segment(transcription, prompt_generator):
     """Generate a prompt for a single transcription in parallel"""
     try:
-        return prompt_generator.generate_optimized_prompt(transcription)
     except Exception as e:
         st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
         return f"{transcription}, visual scene, detailed, vibrant, cinematic"
@@ -204,7 +205,7 @@ def main():
                                     help="How many scenes to create in your video")
             animation_type = st.selectbox(
                 "Animation style",
-                ["random", "zoom", "pan_right", "pan_left", "fade_in"],
                 help="Choose how images will animate in your video"
             )
@@ -378,4 +379,199 @@ def main():
                             trans = transcriber.transcribe_segment(segment)
                             transcriptions.append(trans)
                         except Exception as e:
-                            st.warning("Error transcribing"<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>

 from functools import partial
 import torch
 import hashlib
+from PIL import Image, ImageDraw
 from transcriber import AudioTranscriber
 from prompt_generator import PromptGenerator
         st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
         return ""
+def generate_prompt_for_segment(transcription, prompt_generator, aspect_ratio="16:9"):
     """Generate a prompt for a single transcription in parallel"""
     try:
+        return prompt_generator.generate_optimized_prompt(transcription, aspect_ratio)
     except Exception as e:
         st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
         return f"{transcription}, visual scene, detailed, vibrant, cinematic"
                                     help="How many scenes to create in your video")
             animation_type = st.selectbox(
                 "Animation style",
+                ["random", "zoom", "pan_right", "pan_left", "fade_in", "ken_burns"],
                 help="Choose how images will animate in your video"
             )
                             trans = transcriber.transcribe_segment(segment)
                             transcriptions.append(trans)
                         except Exception as e:
+                            st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
+                            transcriptions.append("")
+                # Display transcriptions with better styling
+                progress_bar.progress(30)
+                st.markdown("### 📝 Transcriptions")
+                for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
+                    st.markdown(f"""
+                    <div style="background-color: #f0f2f6; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
+                        <strong>Segment {i+1} ({start:.1f}s - {end:.1f}s):</strong> {trans}
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Step 3: Generate prompts in parallel
+                status_text.text("Generating prompts from transcriptions...")
+                status_message.markdown("✍️ **Creating image descriptions...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the prompt generator and aspect ratio
+                        prompt_func = partial(generate_prompt_for_segment,
+                                             prompt_generator=prompt_generator,
+                                             aspect_ratio=selected_aspect_ratio)
+                        # Generate prompts in parallel
+                        prompts = list(executor.map(prompt_func, transcriptions))
+                else:
+                    prompts = []
+                    for trans in transcriptions:
+                        try:
+                            prompt = prompt_generator.generate_optimized_prompt(trans, selected_aspect_ratio)
+                            prompts.append(prompt)
+                        except Exception as e:
+                            st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
+                            prompts.append(f"{trans}, visual scene, detailed, vibrant, cinematic")
+                # Display prompts with better styling
+                progress_bar.progress(40)
+                st.markdown("### 🖋️ Generated Prompts")
+                for i, prompt in enumerate(prompts):
+                    st.markdown(f"""
+                    <div style="background-color: #e8f4f8; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
+                        <strong>Prompt {i+1}:</strong> {prompt}
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Step 4: Generate images in parallel
+                status_text.text("Generating images from prompts...")
+                status_message.markdown("🎨 **Creating images...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the image generator
+                        image_func = partial(generate_image_for_prompt, image_generator=image_generator)
+                        # Generate images in parallel
+                        images = list(executor.map(image_func, prompts))
+                else:
+                    images = []
+                    for i, prompt in enumerate(prompts):
+                        status_text.text(f"Generating image {i+1}/{len(prompts)}...")
+                        try:
+                            img_path = image_generator.generate_image(prompt)
+                            images.append(img_path)
+                        except Exception as e:
+                            st.warning(f"Error generating image: {str(e)}. Using fallback image.")
+                            # Create a fallback image
+                            from PIL import Image, ImageDraw
+                            img = Image.new('RGB', image_generator.target_size, color=(240, 240, 240))
+                            draw = ImageDraw.Draw(img)
+                            draw.text((10, 10), prompt[:50], fill=(0, 0, 0))
+                            path = f"temp/fallback_{int(time.time() * 1000)}.png"
+                            img.save(path)
+                            images.append(path)
+                # Display images with better styling
+                progress_bar.progress(60)
+                st.markdown("### 🖼️ Generated Images")
+                image_cols = st.columns(min(len(images), 3))
+                for i, img_path in enumerate(images):
+                    with image_cols[i % len(image_cols)]:
+                        st.image(img_path, caption=f"Image {i+1}", use_column_width=True)
+                # Step 5: Add animations in parallel
+                status_text.text("Adding animations to images...")
+                status_message.markdown("✨ **Adding animations...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the animator and animation type
+                        animate_func = partial(animate_image, animator=animator, animation_type=animation_type)
+                        # Animate images in parallel
+                        animated_frames = list(executor.map(animate_func, images))
+                else:
+                    animated_frames = []
+                    for i, img_path in enumerate(images):
+                        status_text.text(f"Animating image {i+1}/{len(images)}...")
+                        try:
+                            frames = animator.animate_single_image(img_path, animation_type)
+                            animated_frames.append(frames)
+                        except Exception as e:
+                            st.warning(f"Error animating image: {str(e)}. Using static frames.")
+                            # Create a sequence of identical frames as fallback
+                            frames = []
+                            for _ in range(10):
+                                frames.append(img_path)
+                            animated_frames.append(frames)
+                progress_bar.progress(80)
+                # Step 6: Create video
+                status_text.text("Creating final video...")
+                status_message.markdown("🎬 **Assembling video...**")
+                output_video = video_creator.create_video_from_frames(
+                    animated_frames,
+                    audio_file,
+                    segments=transcriptions,
+                    timestamps=timestamps,
+                    parallel=parallel_processing,
+                    max_workers=max_workers
+                )
+                # Check if output is an error file
+                if output_video.endswith('.txt'):
+                    with open(output_video, 'r') as f:
+                        error_message = f.read()
+                    st.error(f"Error creating video: {error_message}")
+                    st.stop()
+                # Optimize video if needed
+                if video_quality != "High":
+                    status_text.text("Optimizing video for web...")
+                    status_message.markdown("⚙️ **Optimizing video...**")
+                    output_video = video_creator.optimize_video(
+                        output_video,
+                        bitrate=bitrate,
+                        threads=max_workers
+                    )
+                # Cache the result if caching is enabled
+                if use_caching:
+                    import shutil
+                    cached_path = result_cache.get_cache_path(cache_key, ".mp4")
+                    shutil.copy(output_video, cached_path)
+                progress_bar.progress(100)
+                status_text.text("Video creation complete!")
+                status_message.markdown("✅ **Done!**")
+                # Step 7: Display and provide download link with better styling
+                st.markdown("### 🎥 Your Video")
+                st.video(output_video)
+                st.markdown("### 📥 Download")
+                with open(output_video, "rb") as file:
+                    st.download_button(
+                        label="📥 Download Video",
+                        data=file,
+                        file_name=f"audio_to_video_{selected_aspect_ratio.replace(':', '_')}.mp4",
+                        mime="video/mp4",
+                        use_container_width=True
+                    )
+                # Performance metrics
+                st.markdown("### ⏱️ Performance Metrics")
+                st.info(f"""
+                - Video Format: {aspect_ratio}
+                - Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
+                - Workers: {max_workers}
+                - Image Size: {actual_image_size[0]}x{actual_image_size[1]}
+                - Inference Steps: {inference_steps}
+                - Video Quality: {video_quality}
+                """)
+                # Clean up temporary files
+                status_text.text("Cleaning up temporary files...")
+                for path in images + [p for frames in animated_frames for p in frames]:
+                    if os.path.exists(path):
+                        try:
+                            os.remove(path)
+                        except:
+                            pass
+                status_text.text("All done! Your video is ready for download.")
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
+                st.exception(e)
+                # Provide troubleshooting tips
+                st.markdown("### 🔧 Troubleshooting Tips")
+                st.info("""
+                - Try reducing the number of segments
+                - Use a smaller image size
+                - Reduce inference steps
+                - Make sure your audio file is in a supported format
+                - Clear the cache and try again
+                """)
+if __name__ == "__main__":
+    main()

image_generator.py CHANGED Viewed

@@ -2,7 +2,7 @@ import streamlit as st
 import torch
 import os
 import numpy as np
-from PIL import Image
 import time
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
@@ -114,6 +114,48 @@ class ImageGenerator:
             # Default to original size
             return base_size
     def generate_image(self, prompt, output_dir="temp"):
         """Generate a single image from a prompt"""
         # Ensure output directory exists
@@ -134,9 +176,12 @@ class ImageGenerator:
                 # Resize to target size for consistency and performance
                 if image.size != self.target_size:
                     image = image.resize(self.target_size, Image.LANCZOS)
             else:
                 # Fallback: Create a colored gradient image with text
-                from PIL import Image, ImageDraw, ImageFont, ImageFilter
                 # Create a base image with gradient background
                 image = Image.new('RGB', self.target_size, color=(240, 240, 240))
@@ -242,6 +287,9 @@ class ImageGenerator:
         # Resize to target size
         img = img.resize(target_size, Image.LANCZOS)
         # Save optimized image
         img.save(image_path)

 import torch
 import os
 import numpy as np
+from PIL import Image, ImageEnhance, ImageFilter
 import time
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
             # Default to original size
             return base_size
+    def apply_cinematic_effects(self, image):
+        """Apply cinematic effects to enhance the image quality"""
+        try:
+            # Enhance contrast slightly
+            enhancer = ImageEnhance.Contrast(image)
+            image = enhancer.enhance(1.2)
+            # Enhance color saturation slightly
+            enhancer = ImageEnhance.Color(image)
+            image = enhancer.enhance(1.1)
+            # Add subtle vignette effect
+            # Create a radial gradient mask
+            mask = Image.new('L', image.size, 255)
+            draw = ImageDraw.Draw(mask)
+            width, height = image.size
+            center_x, center_y = width // 2, height // 2
+            max_radius = min(width, height) // 2
+            for y in range(height):
+                for x in range(width):
+                    # Calculate distance from center
+                    distance = np.sqrt((x - center_x)**2 + (y - center_y)**2)
+                    # Create vignette effect (darker at edges)
+                    intensity = int(255 * (1 - 0.3 * (distance / max_radius)**2))
+                    mask.putpixel((x, y), intensity)
+            # Apply the mask
+            image = Image.composite(image, Image.new('RGB', image.size, (0, 0, 0)), mask)
+            # Add subtle film grain
+            grain = Image.effect_noise((image.width, image.height), 10)
+            grain = grain.convert('L')
+            grain = grain.filter(ImageFilter.GaussianBlur(radius=1))
+            image = Image.blend(image, Image.composite(image, Image.new('RGB', image.size, (128, 128, 128)), grain), 0.05)
+            return image
+        except Exception as e:
+            # If effects fail, return original image
+            return image
     def generate_image(self, prompt, output_dir="temp"):
         """Generate a single image from a prompt"""
         # Ensure output directory exists
                 # Resize to target size for consistency and performance
                 if image.size != self.target_size:
                     image = image.resize(self.target_size, Image.LANCZOS)
+                # Apply cinematic effects
+                image = self.apply_cinematic_effects(image)
             else:
                 # Fallback: Create a colored gradient image with text
+                from PIL import Image, ImageDraw, ImageFilter
                 # Create a base image with gradient background
                 image = Image.new('RGB', self.target_size, color=(240, 240, 240))
         # Resize to target size
         img = img.resize(target_size, Image.LANCZOS)
+        # Apply cinematic effects
+        img = self.apply_cinematic_effects(img)
         # Save optimized image
         img.save(image_path)

prompt_generator.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import torch
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 class PromptGenerator:
     def __init__(self):
@@ -36,11 +36,11 @@ class PromptGenerator:
         return self.model, self.tokenizer
-    def generate_optimized_prompt(self, transcription):
-        """Generate an optimized prompt from a single transcription"""
         # Check cache first
         import hashlib
-        cache_key = hashlib.md5(transcription.encode()).hexdigest()
         if cache_key in self.prompt_cache:
             return self.prompt_cache[cache_key]
@@ -49,13 +49,91 @@ class PromptGenerator:
         if not transcription.strip():
             return ""
         try:
             # Try to use the model if available
             model, tokenizer = self.load_model()
             if model is not None and tokenizer is not None:
                 # Create a prompt template focused on visual elements
-                template = f"Describe a visual scene for: '{transcription}'"
                 # Tokenize
                 inputs = tokenizer(template, return_tensors="pt")
@@ -74,24 +152,32 @@ class PromptGenerator:
                 generated_text = generated_text.replace(template, "").strip()
                 # Create an optimized prompt with style keywords
-                prompt = f"{transcription} {generated_text}, detailed, vibrant, cinematic"
             else:
-                # Fallback method using keywords
-                # Extract key nouns and adjectives from transcription
-                words = transcription.split()
-                # Add visual keywords
-                prompt = f"{transcription}, visual scene, detailed, vibrant, cinematic"
         except Exception as e:
             st.warning(f"Error generating prompt: {str(e)}. Using fallback method.")
             # Fallback to a simple prompt
-            prompt = f"{transcription}, visual scene, detailed, vibrant, cinematic"
         # Cache the result
-        self.prompt_cache[cache_key] = prompt
-        return prompt
-    def generate_prompts(self, text, num_segments=5):
         """Generate image prompts from the transcription"""
         # Split text into segments
         words = text.split()
@@ -109,22 +195,27 @@ class PromptGenerator:
         prompts = []
         for segment in segments:
             # Create an enhanced prompt
-            enhanced_prompt = self.generate_optimized_prompt(segment)
             prompts.append(enhanced_prompt)
         return prompts, segments
-    def generate_optimized_prompts(self, transcriptions, parallel=False, max_workers=4):
         """Generate optimized prompts from transcribed segments with parallel processing"""
         import concurrent.futures
         if parallel and len(transcriptions) > 1:
             # Process in parallel
             with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-                prompts = list(executor.map(self.generate_optimized_prompt, transcriptions))
         else:
             # Process sequentially
-            prompts = [self.generate_optimized_prompt(trans) for trans in transcriptions]
         return prompts

 import streamlit as st
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 class PromptGenerator:
     def __init__(self):
         return self.model, self.tokenizer
+    def generate_hyper_realistic_prompt(self, transcription, aspect_ratio="16:9"):
+        """Generate a hyper-realistic prompt from a transcription with cinematic quality"""
         # Check cache first
         import hashlib
+        cache_key = hashlib.md5((transcription + aspect_ratio).encode()).hexdigest()
         if cache_key in self.prompt_cache:
             return self.prompt_cache[cache_key]
         if not transcription.strip():
             return ""
+        # Base prompt components
+        base_prompt = transcription.strip()
+        # Hyper-realism keywords
+        realism_keywords = [
+            "hyper realistic",
+            "photo realistic",
+            "ultra detailed",
+            "hyper detailed textures",
+            "8K resolution"
+        ]
+        # Lighting based on content analysis
+        lighting_options = {
+            "warm": ["golden hour glow", "warm sunlight", "sunset lighting", "soft warm glow"],
+            "dramatic": ["moody overcast", "dramatic lighting", "high contrast", "film noir shadows"],
+            "historical": ["candle light", "gas lamps", "torch glow", "lantern light", "flickering light"],
+            "modern": ["harsh industrial lighting", "fluorescent lighting", "neon glow", "studio lighting"]
+        }
+        # Camera effects
+        camera_effects = [
+            "shallow depth of field",
+            "film grain",
+            "cinematic composition"
+        ]
+        # Environmental details
+        environmental_details = [
+            "atmospheric",
+            "detailed environment",
+            "realistic textures",
+            "natural lighting"
+        ]
+        # Material details
+        material_details = [
+            "detailed materials",
+            "realistic textures",
+            "natural wear and tear"
+        ]
+        # Analyze transcription to determine appropriate lighting and mood
+        lower_trans = transcription.lower()
+        # Select lighting based on content
+        selected_lighting = []
+        if any(word in lower_trans for word in ["sunset", "warm", "evening", "afternoon", "golden"]):
+            selected_lighting = lighting_options["warm"]
+        elif any(word in lower_trans for word in ["dramatic", "dark", "night", "shadow", "mystery", "tension"]):
+            selected_lighting = lighting_options["dramatic"]
+        elif any(word in lower_trans for word in ["history", "ancient", "medieval", "old", "traditional", "past"]):
+            selected_lighting = lighting_options["historical"]
+        else:
+            selected_lighting = lighting_options["modern"]
+        # Select a random lighting keyword from the chosen category
+        import random
+        lighting_keyword = random.choice(selected_lighting)
+        # Select a random camera effect
+        camera_effect = random.choice(camera_effects)
+        # Select environmental details based on aspect ratio
+        if aspect_ratio == "16:9":
+            # For landscape, emphasize wide environmental shots
+            environmental_keyword = "wide angle " + random.choice(environmental_details)
+        elif aspect_ratio == "9:16":
+            # For portrait, emphasize vertical composition
+            environmental_keyword = "vertical composition " + random.choice(environmental_details)
+        else:
+            # For square, balanced composition
+            environmental_keyword = "balanced composition " + random.choice(environmental_details)
+        # Material detail
+        material_keyword = random.choice(material_details)
+        # Construct the enhanced prompt
         try:
             # Try to use the model if available
             model, tokenizer = self.load_model()
             if model is not None and tokenizer is not None:
                 # Create a prompt template focused on visual elements
+                template = f"Create a hyper-realistic visual scene for: '{base_prompt}'"
                 # Tokenize
                 inputs = tokenizer(template, return_tensors="pt")
                 generated_text = generated_text.replace(template, "").strip()
                 # Create an optimized prompt with style keywords
+                scene_description = f"{base_prompt} {generated_text}"
             else:
+                # Fallback method using the base prompt
+                scene_description = base_prompt
         except Exception as e:
             st.warning(f"Error generating prompt: {str(e)}. Using fallback method.")
             # Fallback to a simple prompt
+            scene_description = base_prompt
+        # Combine all elements into a hyper-realistic prompt
+        realism_part = ", ".join(random.sample(realism_keywords, 3))  # Select 3 random realism keywords
+        # Final prompt construction with all elements
+        enhanced_prompt = f"{scene_description}, {realism_part}, {lighting_keyword}, {camera_effect}, {environmental_keyword}, {material_keyword}"
         # Cache the result
+        self.prompt_cache[cache_key] = enhanced_prompt
+        return enhanced_prompt
+    def generate_optimized_prompt(self, transcription, aspect_ratio="16:9"):
+        """Generate an optimized prompt from a single transcription"""
+        # This is now a wrapper for the hyper-realistic prompt generator
+        return self.generate_hyper_realistic_prompt(transcription, aspect_ratio)
+    def generate_prompts(self, text, num_segments=5, aspect_ratio="16:9"):
         """Generate image prompts from the transcription"""
         # Split text into segments
         words = text.split()
         prompts = []
         for segment in segments:
             # Create an enhanced prompt
+            enhanced_prompt = self.generate_hyper_realistic_prompt(segment, aspect_ratio)
             prompts.append(enhanced_prompt)
         return prompts, segments
+    def generate_optimized_prompts(self, transcriptions, parallel=False, max_workers=4, aspect_ratio="16:9"):
         """Generate optimized prompts from transcribed segments with parallel processing"""
         import concurrent.futures
         if parallel and len(transcriptions) > 1:
             # Process in parallel
             with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Create a function that includes aspect ratio
+                def generate_with_aspect(trans):
+                    return self.generate_hyper_realistic_prompt(trans, aspect_ratio)
+                # Map with the new function
+                prompts = list(executor.map(generate_with_aspect, transcriptions))
         else:
             # Process sequentially
+            prompts = [self.generate_hyper_realistic_prompt(trans, aspect_ratio) for trans in transcriptions]
         return prompts

requirements.txt CHANGED Viewed

@@ -1,13 +1,16 @@
-streamlit
-transformers
-torch --extra-index-url https://download.pytorch.org/whl/cpu
-torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
-diffusers
-accelerate
 moviepy==1.0.3
-librosa
-soundfile
-numpy
-pillow
-scipy
-matplotlib

+streamlit==1.25.0
+transformers==4.30.2
+torch==2.0.1
+torchaudio==2.0.2
+diffusers==0.19.3
+accelerate==0.21.0
 moviepy==1.0.3
+pillow==9.5.0
+numpy==1.24.3
+scipy==1.10.1
+matplotlib==3.7.2
+librosa==0.10.0.post2
+soundfile==0.12.1
+huggingface-hub==0.16.4
+ftfy==6.1.1
+regex==2023.6.3

transcriber.py CHANGED Viewed

@@ -1,109 +1,65 @@
 import streamlit as st
-import torch
-from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
-import librosa
 import numpy as np
 import tempfile
 import os
 from concurrent.futures import ThreadPoolExecutor
 class AudioTranscriber:
     def __init__(self):
         self.model = None
         self.processor = None
-        self.pipe = None
         self.transcription_cache = {}
     def load_model(self):
         """Load a lightweight transcription model"""
-        if self.pipe is None:
-            with st.spinner("Loading transcription model... This may take a moment."):
-                # Use the small Whisper model to save resources
-                model_id = "openai/whisper-small"
-                # Use CPU for inference to save memory
-                device = "cpu"
-                torch_dtype = torch.float32
-                # Load model with memory optimization settings
-                self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
-                    model_id,
-                    torch_dtype=torch_dtype,
-                    low_cpu_mem_usage=True,
-                    use_safetensors=True
-                )
-                self.processor = AutoProcessor.from_pretrained(model_id)
-                # Create pipeline for efficient processing
-                self.pipe = pipeline(
-                    "automatic-speech-recognition",
-                    model=self.model,
-                    tokenizer=self.processor.tokenizer,
-                    feature_extractor=self.processor.feature_extractor,
-                    max_new_tokens=128,
-                    chunk_length_s=30,
-                    batch_size=16,
-                    device=device,
-                )
-        return self.pipe
-    def transcribe(self, audio_file):
-        """Transcribe the audio file using the loaded model"""
-        # Generate a cache key based on the audio file
-        import hashlib
-        cache_key = hashlib.md5(audio_file.getvalue()).hexdigest()
-        # Check if result is in cache
-        if cache_key in self.transcription_cache:
-            return self.transcription_cache[cache_key]
-        # Load the model if not already loaded
-        pipe = self.load_model()
-        # Save the uploaded file to a temporary location
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-            tmp_file.write(audio_file.getvalue())
-            tmp_path = tmp_file.name
-        try:
-            # Load audio using librosa for processing
-            y, sr = librosa.load(tmp_path, sr=16000)
-            # Process in smaller chunks for memory efficiency
-            result = pipe(y)
-            transcription = result["text"]
-            # Cache the result
-            self.transcription_cache[cache_key] = transcription
-            return transcription
-        finally:
-            # Clean up temporary file
-            if os.path.exists(tmp_path):
-                os.unlink(tmp_path)
-    def segment_audio(self, audio_file, num_segments=5):
         """Segment the audio file into chunks for processing"""
-        # Save the uploaded file to a temporary location
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             tmp_file.write(audio_file.getvalue())
-            tmp_path = tmp_file.name
         try:
-            # Load audio using librosa
-            y, sr = librosa.load(tmp_path, sr=16000)
             # Get total duration
             duration = librosa.get_duration(y=y, sr=sr)
             # Calculate segment duration
-            segment_duration = duration / num_segments
             # Create segments
             segments = []
             timestamps = []
-            for i in range(num_segments):
                 start_time = i * segment_duration
                 end_time = min((i + 1) * segment_duration, duration)
@@ -117,32 +73,112 @@ class AudioTranscriber:
                 timestamps.append((start_time, end_time))
             return segments, timestamps
         finally:
             # Clean up temporary file
-            if os.path.exists(tmp_path):
-                os.unlink(tmp_path)
-    def transcribe_segment(self, segment):
         """Transcribe a single audio segment"""
-        pipe = self.load_model()
-        result = pipe(segment)
-        return result["text"]
-    def transcribe_segments(self, segments, parallel=False, max_workers=4):
-        """Transcribe individual audio segments with optional parallel processing"""
-        pipe = self.load_model()
         if parallel and len(segments) > 1:
             # Process in parallel using ThreadPoolExecutor
             with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                # Process segments in parallel
-                transcriptions = list(executor.map(self.transcribe_segment, segments))
         else:
             # Process sequentially
             transcriptions = []
             for segment in segments:
-                result = pipe(segment)
-                transcriptions.append(result["text"])
         return transcriptions

 import streamlit as st
 import numpy as np
+import soundfile as sf
+import librosa
 import tempfile
 import os
 from concurrent.futures import ThreadPoolExecutor
+from functools import partial
 class AudioTranscriber:
     def __init__(self):
         self.model = None
         self.processor = None
         self.transcription_cache = {}
     def load_model(self):
         """Load a lightweight transcription model"""
+        if self.model is None:
+            with st.spinner("Loading transcription model..."):
+                try:
+                    from transformers import pipeline
+                    # Use a small model for transcription to save memory
+                    self.model = pipeline(
+                        "automatic-speech-recognition",
+                        model="openai/whisper-small",
+                        chunk_length_s=30,
+                        device="cpu"
+                    )
+                except Exception as e:
+                    st.warning(f"Error loading transcription model: {str(e)}. Using fallback method.")
+                    self.model = None
+        return self.model
+    def segment_audio(self, audio_file, num_segments=5, min_segment_duration=3.0):
         """Segment the audio file into chunks for processing"""
+        # Save the uploaded audio to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             tmp_file.write(audio_file.getvalue())
+            audio_path = tmp_file.name
         try:
+            # Load the audio file
+            y, sr = librosa.load(audio_path, sr=None)
             # Get total duration
             duration = librosa.get_duration(y=y, sr=sr)
+            # Ensure we don't create segments that are too short
+            actual_segments = min(num_segments, int(duration / min_segment_duration))
+            if actual_segments < 1:
+                actual_segments = 1
             # Calculate segment duration
+            segment_duration = duration / actual_segments
             # Create segments
             segments = []
             timestamps = []
+            for i in range(actual_segments):
                 start_time = i * segment_duration
                 end_time = min((i + 1) * segment_duration, duration)
                 timestamps.append((start_time, end_time))
             return segments, timestamps
+        except Exception as e:
+            st.warning(f"Error segmenting audio: {str(e)}. Using simplified segmentation.")
+            # Fallback: Create equal segments
+            try:
+                y, sr = sf.read(audio_path)
+                duration = len(y) / sr
+                # Ensure we don't create segments that are too short
+                actual_segments = min(num_segments, int(duration / min_segment_duration))
+                if actual_segments < 1:
+                    actual_segments = 1
+                # Calculate segment duration
+                segment_duration = duration / actual_segments
+                # Create segments
+                segments = []
+                timestamps = []
+                for i in range(actual_segments):
+                    start_time = i * segment_duration
+                    end_time = min((i + 1) * segment_duration, duration)
+                    # Convert time to samples
+                    start_sample = int(start_time * sr)
+                    end_sample = int(end_time * sr)
+                    # Extract segment
+                    segment = y[start_sample:end_sample]
+                    segments.append(segment)
+                    timestamps.append((start_time, end_time))
+                return segments, timestamps
+            except Exception as inner_e:
+                st.error(f"Critical error in audio segmentation: {str(inner_e)}")
+                # Last resort: Create dummy segments
+                segments = [np.zeros(16000) for _ in range(num_segments)]  # 1-second silent segments
+                timestamps = [(i, i+1) for i in range(num_segments)]
+                return segments, timestamps
         finally:
             # Clean up temporary file
+            if os.path.exists(audio_path):
+                try:
+                    os.unlink(audio_path)
+                except:
+                    pass
+    def transcribe_segment(self, segment, sr=16000):
         """Transcribe a single audio segment"""
+        # Generate a cache key based on the audio data
+        import hashlib
+        cache_key = hashlib.md5(segment.tobytes()).hexdigest()
+        # Check if result is in cache
+        if cache_key in self.transcription_cache:
+            return self.transcription_cache[cache_key]
+        try:
+            # Load the model if not already loaded
+            model = self.load_model()
+            if model is not None:
+                # Save segment to a temporary file
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+                    sf.write(tmp_file.name, segment, sr)
+                    segment_path = tmp_file.name
+                # Transcribe using the model
+                result = model(segment_path)
+                transcription = result["text"]
+                # Clean up temporary file
+                if os.path.exists(segment_path):
+                    os.unlink(segment_path)
+            else:
+                # Fallback: Return empty string or placeholder
+                transcription = "Audio content"
+        except Exception as e:
+            st.warning(f"Error transcribing segment: {str(e)}. Using fallback method.")
+            # Fallback: Return empty string or placeholder
+            transcription = "Audio content"
+        # Cache the result
+        self.transcription_cache[cache_key] = transcription
+        return transcription
+    def transcribe_segments(self, segments, sr=16000, parallel=False, max_workers=4):
+        """Transcribe multiple audio segments with parallel processing"""
         if parallel and len(segments) > 1:
             # Process in parallel using ThreadPoolExecutor
             with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Create a partial function with fixed sample rate
+                transcribe_func = partial(self.transcribe_segment, sr=sr)
+                # Map and collect results
+                transcriptions = list(executor.map(transcribe_func, segments))
         else:
             # Process sequentially
             transcriptions = []
             for segment in segments:
+                transcription = self.transcribe_segment(segment, sr)
+                transcriptions.append(transcription)
         return transcriptions

video_creator.py CHANGED Viewed

@@ -11,41 +11,124 @@ class VideoCreator:
         # Ensure output directory exists
         os.makedirs("outputs", exist_ok=True)
         self.video_cache = {}
-    def create_segment_clip(self, frames, segment_duration, segment_text=None):
-        """Create a video clip from frames with optional text overlay"""
-        # Calculate frame duration based on segment duration
-        frame_duration = segment_duration / len(frames)
-        # Create a clip from the frames
-        segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
-        # Add text overlay if segment text is provided
-        if segment_text:
             try:
-                txt_clip = TextClip(
-                    segment_text,
-                    fontsize=24,
-                    color='white',
-                    bg_color='rgba(0,0,0,0.5)',
-                    size=(segment_clip.w, None),
-                    method='caption'
-                ).set_duration(segment_clip.duration)
-                txt_clip = txt_clip.set_position(('center', 'bottom'))
-                segment_clip = CompositeVideoClip([segment_clip, txt_clip])
-            except Exception as e:
-                # If TextClip fails, continue without text overlay
-                st.warning(f"Could not add text overlay: {e}")
-        return segment_clip
     def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None,
                                 output_dir="outputs", parallel=False, max_workers=4):
         """Create a video from animated frames synchronized with audio using parallel processing"""
         # Generate a cache key based on inputs
         import hashlib
-        cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}"
         # Check if result is in cache
         if cache_key in self.video_cache:
@@ -72,83 +155,156 @@ class VideoCreator:
             # Create video clips for each animated segment
             video_clips = []
-            if parallel and len(animated_frames) > 1:
-                # Process segments in parallel
-                with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                    # Prepare arguments for parallel processing
-                    args = []
                     for i, frames in enumerate(animated_frames):
                         segment_duration = segment_durations[min(i, len(segment_durations)-1)]
                         segment_text = segments[i] if segments and i < len(segments) else None
-                        args.append((frames, segment_duration, segment_text))
-                    # Process in parallel
-                    video_clips = list(executor.map(lambda x: self.create_segment_clip(*x), args))
-            else:
-                # Process segments sequentially
-                for i, frames in enumerate(animated_frames):
                     segment_duration = segment_durations[min(i, len(segment_durations)-1)]
-                    segment_text = segments[i] if segments and i < len(segments) else None
-                    segment_clip = self.create_segment_clip(frames, segment_duration, segment_text)
-                    video_clips.append(segment_clip)
             # Concatenate all clips
-            final_clip = concatenate_videoclips(video_clips)
-            # Set the audio
-            final_clip = final_clip.set_audio(audio_clip)
             # Write the result to a file
-            output_path = f"{output_dir}/output_video_{int(time.time())}.mp4"
-            # Use lower resolution and bitrate for faster processing
-            final_clip.write_videofile(
-                output_path,
-                fps=24,
-                codec='libx264',
-                audio_codec='aac',
-                preset='ultrafast',  # Faster encoding
-                threads=max_workers,  # Use multiple threads for encoding
-                bitrate='1000k'      # Lower bitrate
-            )
             # Cache the result
             self.video_cache[cache_key] = output_path
             return output_path
         finally:
             # Clean up temporary file
             if os.path.exists(audio_path):
-                os.unlink(audio_path)
-    def optimize_video(self, video_path, target_size=(640, 480), bitrate='1000k', threads=2):
         """Optimize video size and quality for web delivery"""
-        from moviepy.editor import VideoFileClip
-        # Load the video
-        clip = VideoFileClip(video_path)
-        # Resize to target size
-        clip_resized = clip.resize(target_size)
-        # Save optimized video
-        optimized_path = video_path.replace('.mp4', f'_optimized_{int(time.time())}.mp4')
-        clip_resized.write_videofile(
-            optimized_path,
-            codec='libx264',
-            audio_codec='aac',
-            preset='ultrafast',
-            threads=threads,
-            bitrate=bitrate
-        )
-        # Close clips to free memory
-        clip.close()
-        clip_resized.close()
-        return optimized_path
     def clear_cache(self):
         """Clear the video cache"""

         # Ensure output directory exists
         os.makedirs("outputs", exist_ok=True)
         self.video_cache = {}
+        self.aspect_ratio = "1:1"  # Default aspect ratio
+    def set_aspect_ratio(self, aspect_ratio):
+        """Set the aspect ratio for video creation"""
+        self.aspect_ratio = aspect_ratio
+    def get_video_dimensions(self, base_size=None):
+        """Get video dimensions based on aspect ratio"""
+        if base_size is None:
+            # Default base sizes for different aspect ratios
+            if self.aspect_ratio == "1:1":
+                return (640, 640)  # Square
+            elif self.aspect_ratio == "16:9":
+                return (854, 480)  # Landscape HD
+            elif self.aspect_ratio == "9:16":
+                return (480, 854)  # Portrait (mobile)
+            else:
+                return (640, 640)  # Default square
+        # Calculate dimensions based on base size and aspect ratio
+        base_pixels = base_size[0] * base_size[1]
+        if self.aspect_ratio == "1:1":
+            # Square format
+            side = int(np.sqrt(base_pixels))
+            # Ensure even dimensions for video compatibility
+            side = side if side % 2 == 0 else side + 1
+            return (side, side)
+        elif self.aspect_ratio == "16:9":
+            # Landscape format
+            width = int(np.sqrt(base_pixels * 16 / 9))
+            height = int(width * 9 / 16)
+            # Ensure even dimensions for video compatibility
+            width = width if width % 2 == 0 else width + 1
+            height = height if height % 2 == 0 else height + 1
+            return (width, height)
+        elif self.aspect_ratio == "9:16":
+            # Portrait format
+            height = int(np.sqrt(base_pixels * 16 / 9))
+            width = int(height * 9 / 16)
+            # Ensure even dimensions for video compatibility
+            width = width if width % 2 == 0 else width + 1
+            height = height if height % 2 == 0 else height + 1
+            return (width, height)
+        else:
+            # Default to original size
+            return base_size
+    def create_segment_clip(self, frames, segment_duration, segment_text=None):
+        """Create a video clip from frames with optional text overlay"""
+        try:
+            # Calculate frame duration based on segment duration
+            frame_duration = segment_duration / len(frames)
+            # Create a clip from the frames
+            segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
+            # Add text overlay if segment text is provided
+            if segment_text:
+                try:
+                    # Adjust text size and position based on aspect ratio
+                    fontsize = 24
+                    position = ('center', 'bottom')
+                    if self.aspect_ratio == "9:16":
+                        # For portrait, make text smaller and position it lower
+                        fontsize = 20
+                        position = ('center', 0.9)  # 90% from top
+                    elif self.aspect_ratio == "16:9":
+                        # For landscape, position text at bottom
+                        position = ('center', 0.95)  # 95% from top
+                    txt_clip = TextClip(
+                        segment_text,
+                        fontsize=fontsize,
+                        color='white',
+                        bg_color='rgba(0,0,0,0.5)',
+                        size=(segment_clip.w, None),
+                        method='caption'
+                    ).set_duration(segment_clip.duration)
+                    txt_clip = txt_clip.set_position(position)
+                    segment_clip = CompositeVideoClip([segment_clip, txt_clip])
+                except Exception as e:
+                    # If TextClip fails, continue without text overlay
+                    st.warning(f"Could not add text overlay: {str(e)}")
+            return segment_clip
+        except Exception as e:
+            st.warning(f"Error creating segment clip: {str(e)}. Using fallback method.")
+            # Fallback: Create a simple clip with the first frame
             try:
+                # Use just the first frame if there's an issue with the sequence
+                first_frame = frames[0] if frames else None
+                if first_frame and os.path.exists(first_frame):
+                    segment_clip = ImageSequenceClip([first_frame], durations=[segment_duration])
+                    return segment_clip
+                else:
+                    # Create a blank clip if no frames are available
+                    from PIL import Image
+                    blank_img = Image.new('RGB', self.get_video_dimensions(), color=(0, 0, 0))
+                    blank_path = tempfile.mktemp(suffix='.png')
+                    blank_img.save(blank_path)
+                    segment_clip = ImageSequenceClip([blank_path], durations=[segment_duration])
+                    return segment_clip
+            except Exception as inner_e:
+                st.error(f"Critical error in fallback clip creation: {str(inner_e)}")
+                # Last resort: Create an extremely simple clip
+                from moviepy.editor import ColorClip
+                return ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=segment_duration)
     def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None,
                                 output_dir="outputs", parallel=False, max_workers=4):
         """Create a video from animated frames synchronized with audio using parallel processing"""
         # Generate a cache key based on inputs
         import hashlib
+        cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}_{self.aspect_ratio}"
         # Check if result is in cache
         if cache_key in self.video_cache:
             # Create video clips for each animated segment
             video_clips = []
+            try:
+                if parallel and len(animated_frames) > 1:
+                    # Process segments in parallel
+                    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Prepare arguments for parallel processing
+                        args = []
+                        for i, frames in enumerate(animated_frames):
+                            segment_duration = segment_durations[min(i, len(segment_durations)-1)]
+                            segment_text = segments[i] if segments and i < len(segments) else None
+                            args.append((frames, segment_duration, segment_text))
+                        # Process in parallel
+                        video_clips = list(executor.map(lambda x: self.create_segment_clip(*x), args))
+                else:
+                    # Process segments sequentially
                     for i, frames in enumerate(animated_frames):
                         segment_duration = segment_durations[min(i, len(segment_durations)-1)]
                         segment_text = segments[i] if segments and i < len(segments) else None
+                        segment_clip = self.create_segment_clip(frames, segment_duration, segment_text)
+                        video_clips.append(segment_clip)
+            except Exception as e:
+                st.warning(f"Error processing video segments: {str(e)}. Using fallback method.")
+                # Fallback: Create a simple clip for each segment
+                video_clips = []
+                for i, _ in enumerate(animated_frames):
                     segment_duration = segment_durations[min(i, len(segment_durations)-1)]
+                    from moviepy.editor import ColorClip
+                    clip = ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=segment_duration)
+                    video_clips.append(clip)
             # Concatenate all clips
+            try:
+                final_clip = concatenate_videoclips(video_clips)
+                # Set the audio
+                final_clip = final_clip.set_audio(audio_clip)
+                # Get target dimensions based on aspect ratio
+                target_dimensions = self.get_video_dimensions()
+                # Resize the final clip to match the target dimensions
+                final_clip = final_clip.resize(target_dimensions)
+            except Exception as e:
+                st.warning(f"Error creating final video: {str(e)}. Using fallback method.")
+                # Fallback: Create a simple video with the audio
+                from moviepy.editor import ColorClip
+                final_clip = ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=total_duration)
+                final_clip = final_clip.set_audio(audio_clip)
             # Write the result to a file
+            output_path = f"{output_dir}/output_video_{self.aspect_ratio.replace(':', '_')}_{int(time.time())}.mp4"
+            try:
+                # Use lower resolution and bitrate for faster processing
+                final_clip.write_videofile(
+                    output_path,
+                    fps=24,
+                    codec='libx264',
+                    audio_codec='aac',
+                    preset='ultrafast',  # Faster encoding
+                    threads=max_workers,  # Use multiple threads for encoding
+                    bitrate='1000k'      # Lower bitrate
+                )
+            except Exception as e:
+                st.warning(f"Error writing video file: {str(e)}. Trying with simpler settings.")
+                # Try with even simpler settings
+                try:
+                    final_clip.write_videofile(
+                        output_path,
+                        fps=15,  # Lower fps
+                        codec='libx264',
+                        audio_codec='aac',
+                        preset='ultrafast',
+                        threads=2,  # Fewer threads
+                        bitrate='800k'  # Lower bitrate
+                    )
+                except Exception as inner_e:
+                    st.error(f"Critical error writing video: {str(inner_e)}")
+                    # Create a text file explaining the error
+                    error_path = f"{output_dir}/error_video_{int(time.time())}.txt"
+                    with open(error_path, 'w') as f:
+                        f.write(f"Error creating video: {str(e)}\nSecondary error: {str(inner_e)}")
+                    return error_path
             # Cache the result
             self.video_cache[cache_key] = output_path
             return output_path
+        except Exception as e:
+            st.error(f"Critical error in video creation: {str(e)}")
+            # Create a text file explaining the error
+            error_path = f"{output_dir}/error_video_{int(time.time())}.txt"
+            with open(error_path, 'w') as f:
+                f.write(f"Error creating video: {str(e)}")
+            return error_path
         finally:
             # Clean up temporary file
             if os.path.exists(audio_path):
+                try:
+                    os.unlink(audio_path)
+                except:
+                    pass
+    def optimize_video(self, video_path, target_size=None, bitrate='1000k', threads=2):
         """Optimize video size and quality for web delivery"""
+        if not os.path.exists(video_path) or video_path.endswith('.txt'):
+            return video_path  # Return as is if it's an error file or doesn't exist
+        try:
+            from moviepy.editor import VideoFileClip
+            # Load the video
+            clip = VideoFileClip(video_path)
+            # If target_size is not provided, use aspect ratio-based dimensions
+            if target_size is None:
+                target_size = self.get_video_dimensions()
+            # Resize to target size
+            clip_resized = clip.resize(target_size)
+            # Save optimized video
+            optimized_path = video_path.replace('.mp4', f'_optimized_{int(time.time())}.mp4')
+            try:
+                clip_resized.write_videofile(
+                    optimized_path,
+                    codec='libx264',
+                    audio_codec='aac',
+                    preset='ultrafast',
+                    threads=threads,
+                    bitrate=bitrate
+                )
+            except Exception as e:
+                st.warning(f"Error optimizing video: {str(e)}. Using original video.")
+                optimized_path = video_path
+            # Close clips to free memory
+            clip.close()
+            clip_resized.close()
+            return optimized_path
+        except Exception as e:
+            st.warning(f"Error in video optimization: {str(e)}. Using original video.")
+            return video_path
     def clear_cache(self):
         """Clear the video cache"""