Spaces:

surfiniaburger
/

aura-mind-glow

Sleeping

File size: 3,985 Bytes

032080e


import time
import os
from google import genai
from google.genai import types
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEndpoint
from PIL import Image
from utils import retry_with_exponential_backoff

def create_story_prompt_from_pdf(pdf_path: str, user_prompt: str, llm: HuggingFaceEndpoint) -> str:
    """
    Reads a PDF, summarizes its content, and creates a creative prompt for video generation.
    """
    try:
        # 1. Load and read the PDF
        loader = PyPDFLoader(pdf_path)
        pages = loader.load_and_split()
        # Limit to first 3 pages for brevity and to manage token count
        pdf_content = " ".join(page.page_content for page in pages[:3])

        # 2. Use an LLM to generate a creative prompt
        system_prompt = """You are a creative assistant for a farmer. Your task is to read the summary of a document and a user's desired tone, and then write a short, visually descriptive prompt for a video generation model (like Google Veo). The prompt should tell a story about a farmer dealing with this paperwork, capturing the user's desired tone. Describe the scene, camera shots, and the farmer's actions.

        Example:
        - Document Summary: "Invoice for tractor parts, total $2,500. Delivery next week."
        - User Tone: "A feeling of progress and investment in the future."
        - Generated Prompt: "Close up on a farmer's weathered hands circling a date on a calendar in a rustic office. The camera pulls back to reveal invoices on the desk. The farmer looks out the window at the fields, a determined smile on their face. Golden morning light fills the room. Cinematic, hopeful, 4k."
        """

        human_prompt = f"""
        Document Summary: "{pdf_content[:1500]}"
        User Tone: "{user_prompt}"

        Generate a creative video prompt based on the summary and tone.
        """

        # The llm object from HuggingFaceEndpoint expects a string prompt
        creative_prompt = llm.invoke(human_prompt, config={"system_prompt": system_prompt})

        print(f"Generated creative prompt: {creative_prompt}")
        return creative_prompt

    except Exception as e:
        print(f"Error creating story from PDF: {e}")
        return f"Error processing PDF: {e}"

@retry_with_exponential_backoff
def generate_video_from_prompt(prompt: str, image_path: str = None) -> str:
    """
    Generates a video using the Veo API from a text prompt and an optional starting image.
    Returns the path to the saved video file.
    """
    # This function is now wrapped with the retry decorator.
    # The try/except block is still useful for catching non-retriable errors.
    try:
        client = genai.Client()

        if image_path:
            print(f"Generating video with initial image: {image_path}")
            img = Image.open(image_path)
            operation = client.models.generate_videos(
                model="veo-3.0-generate-preview",
                prompt=prompt,
                image=img,
            )
        else:
            print("Generating video from text prompt only.")
            operation = client.models.generate_videos(
                model="veo-3.0-generate-preview",
                prompt=prompt,
            )

        print("Video generation started. Polling for completion...")
        while not operation.done:
            print("Waiting for video generation to complete...")
            time.sleep(10)
            operation = client.operations.get(operation)

        generated_video = operation.response.generated_videos[0]

        video_file_name = "generated_story.mp4"
        client.files.download(file=generated_video.video)
        generated_video.video.save(video_file_name)

        print(f"Generated video saved to {video_file_name}")
        return video_file_name

    except Exception as e:
        print(f"Error generating video: {e}")
        return f"Error during video generation: {e}"