Final_Assignment_Template

Sleeping

App Files Files Community

Abbasid commited on Jul 31

Commit

bf3da7a

verified ·

1 Parent(s): 6a09b39

Update agent.py

Browse files

Files changed (1) hide show

agent.py +254 -253

agent.py CHANGED Viewed

@@ -2,7 +2,7 @@
 agent.py
 This file defines the core logic for a sophisticated AI agent using LangGraph.
-This version includes proper multimodal support for images, YouTube videos, and audio files.
 """
 # ----------------------------------------------------------
@@ -14,18 +14,12 @@ import pickle
 import re
 import subprocess
 import textwrap
-import base64
 import functools
-from io import BytesIO
 from pathlib import Path
-import tempfile
-import yt_dlp
-from pydub import AudioSegment
-import speech_recognition as sr
 import requests
 from cachetools import TTLCache
-from PIL import Image
 from langchain.schema import Document
 from langchain.tools.retriever import create_retriever_tool
@@ -34,9 +28,8 @@ from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
 from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_core.tools import Tool, tool
-from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_groq import ChatGroq
-from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import ToolNode, tools_condition
@@ -47,230 +40,205 @@ load_dotenv()
 JSONL_PATH, FAISS_CACHE, EMBED_MODEL = Path("metadata.jsonl"), Path("faiss_index.pkl"), "sentence-transformers/all-mpnet-base-v2"
 RETRIEVER_K, CACHE_TTL = 5, 600
 API_CACHE = TTLCache(maxsize=256, ttl=CACHE_TTL)
 def cached_get(key: str, fetch_fn):
-    if key in API_CACHE: return API_CACHE[key]
     val = fetch_fn()
     API_CACHE[key] = val
     return val
 # ----------------------------------------------------------
-# Section 2: Standalone Tool Functions
 # ----------------------------------------------------------
 @tool
 def python_repl(code: str) -> str:
     """Executes a string of Python code and returns the stdout/stderr."""
     code = textwrap.dedent(code).strip()
     try:
-        result = subprocess.run(["python", "-c", code], capture_output=True, text=True, timeout=10, check=False)
-        if result.returncode == 0: return f"Execution successful.\nSTDOUT:\n```\n{result.stdout}\n```"
-        else: return f"Execution failed.\nSTDOUT:\n```\n{result.stdout}\n```\nSTDERR:\n```\n{result.stderr}\n```"
-    except subprocess.TimeoutExpired: return "Execution timed out (>10s)."
-def describe_image_func(image_source: str, vision_llm_instance) -> str:
-    """Describes an image from a local file path or a URL using a provided vision LLM."""
     try:
-        print(f"Processing image: {image_source}")
-        # Download and process image
-        if image_source.startswith("http"):
-            response = requests.get(image_source, timeout=10)
-            response.raise_for_status()
-            img = Image.open(BytesIO(response.content))
-        else:
-            img = Image.open(image_source)
-        # Convert to base64
-        buffered = BytesIO()
-        img.convert("RGB").save(buffered, format="JPEG")
-        b64_string = base64.b64encode(buffered.getvalue()).decode()
-        # Create multimodal message
-        msg = HumanMessage(content=[
-            {"type": "text", "text": "Describe this image in detail. Include all objects, people, text, colors, setting, and any other relevant information you can see."},
-            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_string}"}}
-        ])
-        result = vision_llm_instance.invoke([msg])
-        return f"Image description: {result.content}"
-    except Exception as e:
-        print(f"Error in describe_image_func: {e}")
-        return f"Error processing image: {e}"
-@tool
-def process_youtube_video(url: str) -> str:
-    """Downloads and processes a YouTube video, extracting audio and converting to text."""
     try:
-        print(f"Processing YouTube video: {url}")
-        # Create temporary directory
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Download audio from YouTube video
-            ydl_opts = {
-                'format': 'bestaudio/best',
-                'outtmpl': f'{temp_dir}/%(title)s.%(ext)s',
-                'postprocessors': [{
-                    'key': 'FFmpegExtractAudio',
-                    'preferredcodec': 'wav',
-                }],
-            }
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                info = ydl.extract_info(url, download=True)
-                title = info.get('title', 'Unknown')
-            # Find the downloaded audio file
-            audio_files = list(Path(temp_dir).glob("*.wav"))
-            if not audio_files:
-                return "Error: Could not download audio from YouTube video"
-            audio_file = audio_files[0]
-            # Convert audio to text using speech recognition
-            r = sr.Recognizer()
-            # Load audio file
-            audio = AudioSegment.from_wav(str(audio_file))
-            # Convert to mono and set sample rate
-            audio = audio.set_channels(1)
-            audio = audio.set_frame_rate(16000)
-            # Convert to smaller chunks for processing (30 seconds each)
-            chunk_length_ms = 30000
-            chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
-            transcript_parts = []
-            for i, chunk in enumerate(chunks[:10]):  # Limit to first 5 minutes
-                chunk_file = Path(temp_dir) / f"chunk_{i}.wav"
-                chunk.export(chunk_file, format="wav")
-                try:
-                    with sr.AudioFile(str(chunk_file)) as source:
-                        audio_data = r.record(source)
-                        text = r.recognize_google(audio_data)
-                        transcript_parts.append(text)
-                except sr.UnknownValueError:
-                    transcript_parts.append("[Unintelligible audio]")
-                except sr.RequestError as e:
-                    transcript_parts.append(f"[Speech recognition error: {e}]")
-            transcript = " ".join(transcript_parts)
-            return f"YouTube Video: {title}\n\nTranscript (first 5 minutes):\n{transcript}"
     except Exception as e:
-        print(f"Error processing YouTube video: {e}")
-        return f"Error processing YouTube video: {e}"
-@tool
-def process_audio_file(file_url: str) -> str:
-    """Downloads and processes an audio file (MP3, WAV, etc.) and converts to text."""
     try:
-        print(f"Processing audio file: {file_url}")
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Download audio file
-            response = requests.get(file_url, timeout=30)
-            response.raise_for_status()
-            # Determine file extension from URL or content type
-            if file_url.lower().endswith('.mp3'):
-                ext = 'mp3'
-            elif file_url.lower().endswith('.wav'):
-                ext = 'wav'
-            else:
-                content_type = response.headers.get('content-type', '')
-                if 'mp3' in content_type:
-                    ext = 'mp3'
-                elif 'wav' in content_type:
-                    ext = 'wav'
-                else:
-                    ext = 'mp3'  # Default assumption
-            audio_file = Path(temp_dir) / f"audio.{ext}"
-            with open(audio_file, 'wb') as f:
-                f.write(response.content)
-            # Convert to WAV if necessary
-            if ext != 'wav':
-                audio = AudioSegment.from_file(str(audio_file))
-                wav_file = Path(temp_dir) / "audio.wav"
-                audio.export(wav_file, format="wav")
-                audio_file = wav_file
-            # Convert audio to text
-            r = sr.Recognizer()
-            # Load and process audio
-            audio = AudioSegment.from_wav(str(audio_file))
-            audio = audio.set_channels(1).set_frame_rate(16000)
-            # Process in chunks
-            chunk_length_ms = 30000
-            chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
-            transcript_parts = []
-            for i, chunk in enumerate(chunks[:20]):  # Limit to first 10 minutes
-                chunk_file = Path(temp_dir) / f"chunk_{i}.wav"
-                chunk.export(chunk_file, format="wav")
-                try:
-                    with sr.AudioFile(str(chunk_file)) as source:
-                        audio_data = r.record(source)
-                        text = r.recognize_google(audio_data)
-                        transcript_parts.append(text)
-                except sr.UnknownValueError:
-                    transcript_parts.append("[Unintelligible audio]")
-                except sr.RequestError as e:
-                    transcript_parts.append(f"[Speech recognition error: {e}]")
-            transcript = " ".join(transcript_parts)
-            return f"Audio file transcript:\n{transcript}"
     except Exception as e:
-        print(f"Error processing audio file: {e}")
-        return f"Error processing audio file: {e}"
-def web_search_func(query: str, cache_func) -> str:
-    """Performs a web search using Tavily and returns a compilation of results."""
-    key = f"web:{query}"
-    results = cache_func(key, lambda: TavilySearchResults(max_results=5).invoke(query))
-    return "\n\n---\n\n".join([f"Source: {res['url']}\nContent: {res['content']}" for res in results])
-def wiki_search_func(query: str, cache_func) -> str:
-    """Searches Wikipedia and returns the top 2 results."""
-    key = f"wiki:{query}"
-    docs = cache_func(key, lambda: WikipediaLoader(query=query, load_max_docs=2, doc_content_chars_max=2000).load())
-    return "\n\n---\n\n".join([f"Source: {d.metadata['source']}\n\n{d.page_content}" for d in docs])
-def arxiv_search_func(query: str, cache_func) -> str:
-    """Searches Arxiv for scientific papers and returns the top 2 results."""
-    key = f"arxiv:{query}"
-    docs = cache_func(key, lambda: ArxivLoader(query=query, load_max_docs=2).load())
-    return "\n\n---\n\n".join([f"Source: {d.metadata['source']}\nPublished: {d.metadata['Published']}\nTitle: {d.metadata['Title']}\n\nSummary:\n{d.page_content}" for d in docs])
 # ----------------------------------------------------------
-# Section 3: DYNAMIC SYSTEM PROMPT
 # ----------------------------------------------------------
-SYSTEM_PROMPT_TEMPLATE = (
-    """You are an expert-level multimodal research assistant. Your goal is to answer the user's question accurately using all available tools.
-**CRITICAL INSTRUCTIONS:**
-1.  **USE YOUR TOOLS:** You have been given a set of tools to find information. You MUST use them when the answer is not immediately known to you. Do not make up answers.
-2.  **MULTIMODAL PROCESSING:** When you encounter URLs or attachments:
-    - For image URLs (jpg, png, gif, etc.): Use the `describe_image` tool
-    - For YouTube URLs: Use the `process_youtube_video` tool
-    - For audio files (mp3, wav, etc.): Use the `process_audio_file` tool
-    - For other content: Use appropriate search tools
-3.  **AVAILABLE TOOLS:** Here is the exact list of tools you have access to:
-    {tools}
-4.  **REASONING:** Think step-by-step. First, analyze the user's question and any attachments. Second, decide which tools are appropriate. Third, call the tools with correct parameters. Finally, synthesize the results.
-5.  **URL DETECTION:** Look for URLs in the user's message, especially in brackets like [Attachment URL: ...]. Process these appropriately.
-6.  **FINAL ANSWER FORMAT:** Your final response MUST strictly follow this format:
-    `FINAL ANSWER: [Your comprehensive answer incorporating all tool results]`
 """
-)
 # ----------------------------------------------------------
 # Section 4: Factory Function for Agent Executor
@@ -281,80 +249,113 @@ def create_agent_executor(provider: str = "groq"):
     """
     print(f"Initializing agent with provider: {provider}")
-    # Step 1: Build LLMs - Use Google for vision capabilities
-    if provider == "google":
-        main_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0)
-        vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0)
-    elif provider == "groq":
-        main_llm = ChatGroq(model_name="llama-3.2-90b-vision-preview", temperature=0)
-        # Use Google for vision since Groq's vision support may be limited
-        vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0)
-    elif provider == "huggingface":
-        main_llm = ChatHuggingFace(llm=HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", temperature=0.1))
-        vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0)
-    else:
-        raise ValueError("Invalid provider selected")
-    # Step 2: Build Retriever
     embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
     if FAISS_CACHE.exists():
-        with open(FAISS_CACHE, "rb") as f: vector_store = pickle.load(f)
     else:
         if JSONL_PATH.exists():
-            docs = [Document(page_content=f"Question: {rec['Question']}\n\nFinal answer: {rec['Final answer']}", metadata={"source": rec["task_id"]}) for rec in (json.loads(line) for line in open(JSONL_PATH, "rt", encoding="utf-8"))]
             vector_store = FAISS.from_documents(docs, embeddings)
-            with open(FAISS_CACHE, "wb") as f: pickle.dump(vector_store, f)
         else:
-            # Create empty vector store if no metadata file exists
             docs = [Document(page_content="Sample document", metadata={"source": "sample"})]
             vector_store = FAISS.from_documents(docs, embeddings)
     retriever = vector_store.as_retriever(search_kwargs={"k": RETRIEVER_K})
-    # Step 3: Create the final list of tools
     tools_list = [
         python_repl,
-        Tool(name="describe_image", func=functools.partial(describe_image_func, vision_llm_instance=vision_llm), description="Describes an image from a local file path or a URL. Use this for any image files or image URLs."),
-        process_youtube_video,
-        process_audio_file,
-        Tool(name="web_search", func=functools.partial(web_search_func, cache_func=cached_get), description="Performs a web search using Tavily."),
-        Tool(name="wiki_search", func=functools.partial(wiki_search_func, cache_func=cached_get), description="Searches Wikipedia."),
-        Tool(name="arxiv_search", func=functools.partial(arxiv_search_func, cache_func=cached_get), description="Searches Arxiv for scientific papers."),
-        create_retriever_tool(retriever=retriever, name="retrieve_examples", description="Retrieve solved questions similar to the user's query."),
     ]
-    # Step 4: Format the tool list into a string for the prompt
-    tool_definitions = "\n".join([f"- `{tool.name}`: {tool.description}" for tool in tools_list])
-    final_system_prompt = SYSTEM_PROMPT_TEMPLATE.format(tools=tool_definitions)
-    llm_with_tools = main_llm.bind_tools(tools_list)
-    # Step 5: Define Graph Nodes
-    def retriever_node(state: MessagesState):
-        user_query = state["messages"][-1].content
-        docs = retriever.invoke(user_query)
-        messages = [SystemMessage(content=final_system_prompt)]
-        if docs:
-            example_text = "\n\n---\n\n".join(d.page_content for d in docs)
-            messages.append(AIMessage(content=f"I have found {len(docs)} similar solved examples:\n\n{example_text}", name="ExampleRetriever"))
-        messages.extend(state["messages"])
-        return {"messages": messages}
     def assistant_node(state: MessagesState):
-        result = llm_with_tools.invoke(state["messages"])
-        return {"messages": [result]}
-    # Step 6: Build Graph
     builder = StateGraph(MessagesState)
-    builder.add_node("retriever", retriever_node)
     builder.add_node("assistant", assistant_node)
-    builder.add_node("tools", ToolNode(tools_list))
-    builder.add_edge(START, "retriever")
-    builder.add_edge("retriever", "assistant")
-    builder.add_conditional_edges("assistant", tools_condition, {"tools": "tools", "__end__": "__end__"})
     builder.add_edge("tools", "assistant")
     agent_executor = builder.compile()
-    print("Agent Executor created successfully.")
     return agent_executor

 agent.py
 This file defines the core logic for a sophisticated AI agent using LangGraph.
+This version uses Groq's vision-capable models and includes proper reasoning steps.
 """
 # ----------------------------------------------------------
 import re
 import subprocess
 import textwrap
 import functools
 from pathlib import Path
+from typing import Dict, Any
 import requests
 from cachetools import TTLCache
 from langchain.schema import Document
 from langchain.tools.retriever import create_retriever_tool
 from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
 from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_core.tools import Tool, tool
 from langchain_groq import ChatGroq
+from langchain_huggingface import HuggingFaceEmbeddings
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import ToolNode, tools_condition
 JSONL_PATH, FAISS_CACHE, EMBED_MODEL = Path("metadata.jsonl"), Path("faiss_index.pkl"), "sentence-transformers/all-mpnet-base-v2"
 RETRIEVER_K, CACHE_TTL = 5, 600
 API_CACHE = TTLCache(maxsize=256, ttl=CACHE_TTL)
 def cached_get(key: str, fetch_fn):
+    if key in API_CACHE:
+        return API_CACHE[key]
     val = fetch_fn()
     API_CACHE[key] = val
     return val
 # ----------------------------------------------------------
+# Section 2: Tool Functions
 # ----------------------------------------------------------
 @tool
 def python_repl(code: str) -> str:
     """Executes a string of Python code and returns the stdout/stderr."""
     code = textwrap.dedent(code).strip()
     try:
+        result = subprocess.run(
+            ["python", "-c", code],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False
+        )
+        if result.returncode == 0:
+            return f"Execution successful.\nSTDOUT:\n```\n{result.stdout}\n```"
+        else:
+            return f"Execution failed.\nSTDOUT:\n```\n{result.stdout}\n```\nSTDERR:\n```\n{result.stderr}\n```"
+    except subprocess.TimeoutExpired:
+        return "Execution timed out (>10s)."
+def web_search_func(query: str, cache_func) -> str:
+    """Performs a web search using Tavily and returns a compilation of results."""
+    if not query or not query.strip():
+        return "Error: Empty search query"
+    key = f"web:{query}"
     try:
+        results = cache_func(key, lambda: TavilySearchResults(max_results=5).invoke(query))
+        if not results:
+            return "No search results found"
+        formatted_results = []
+        for res in results:
+            if isinstance(res, dict) and 'url' in res and 'content' in res:
+                formatted_results.append(f"Source: {res['url']}\nContent: {res['content']}")
+        return "\n\n---\n\n".join(formatted_results) if formatted_results else "No valid results found"
+    except Exception as e:
+        return f"Search error: {e}"
+def wiki_search_func(query: str, cache_func) -> str:
+    """Searches Wikipedia and returns the top 2 results."""
+    if not query or not query.strip():
+        return "Error: Empty search query"
+    key = f"wiki:{query}"
     try:
+        docs = cache_func(key, lambda: WikipediaLoader(
+            query=query,
+            load_max_docs=2,
+            doc_content_chars_max=2000
+        ).load())
+        if not docs:
+            return "No Wikipedia articles found"
+        return "\n\n---\n\n".join([
+            f"Source: {d.metadata.get('source', 'Unknown')}\n\n{d.page_content}"
+            for d in docs
+        ])
     except Exception as e:
+        return f"Wikipedia search error: {e}"
+def arxiv_search_func(query: str, cache_func) -> str:
+    """Searches Arxiv for scientific papers and returns the top 2 results."""
+    if not query or not query.strip():
+        return "Error: Empty search query"
+    key = f"arxiv:{query}"
     try:
+        docs = cache_func(key, lambda: ArxivLoader(query=query, load_max_docs=2).load())
+        if not docs:
+            return "No Arxiv papers found"
+        return "\n\n---\n\n".join([
+            f"Source: {d.metadata.get('source', 'Unknown')}\n"
+            f"Published: {d.metadata.get('Published', 'Unknown')}\n"
+            f"Title: {d.metadata.get('Title', 'Unknown')}\n\n"
+            f"Summary:\n{d.page_content}"
+            for d in docs
+        ])
     except Exception as e:
+        return f"Arxiv search error: {e}"
+@tool
+def analyze_task_and_reason(task_description: str) -> str:
+    """
+    Analyzes the task and provides reasoning about what approach to take.
+    This tool helps determine what other tools might be needed.
+    """
+    analysis = {
+        "task_type": "unknown",
+        "has_image": False,
+        "needs_search": False,
+        "needs_computation": False,
+        "approach": "Direct answer"
+    }
+    task_lower = task_description.lower()
+    # Check for image-related content
+    if any(keyword in task_lower for keyword in [
+        'image', 'picture', 'photo', 'visual', 'see in', 'shown in',
+        'attachment analysis', 'url:', 'http', '.jpg', '.png', '.gif'
+    ]):
+        analysis["has_image"] = True
+        analysis["task_type"] = "image_analysis"
+        analysis["approach"] = "Process image with vision model, then analyze content"
+    # Check for search needs
+    if any(keyword in task_lower for keyword in [
+        'current', 'recent', 'latest', 'news', 'today', 'what is',
+        'who is', 'when did', 'research', 'find information'
+    ]):
+        analysis["needs_search"] = True
+        if analysis["task_type"] == "unknown":
+            analysis["task_type"] = "information_search"
+            analysis["approach"] = "Search for current information"
+    # Check for computation needs
+    if any(keyword in task_lower for keyword in [
+        'calculate', 'compute', 'math', 'formula', 'equation',
+        'algorithm', 'code', 'program', 'python'
+    ]):
+        analysis["needs_computation"] = True
+        if analysis["task_type"] == "unknown":
+            analysis["task_type"] = "computation"
+            analysis["approach"] = "Use Python for calculations"
+    reasoning = f"""TASK ANALYSIS COMPLETE:
+Task Type: {analysis['task_type']}
+Has Image: {analysis['has_image']}
+Needs Search: {analysis['needs_search']}
+Needs Computation: {analysis['needs_computation']}
+RECOMMENDED APPROACH: {analysis['approach']}
+REASONING:
+- If this involves an image, I should process it directly with my vision capabilities
+- If this needs current information, I should use web search or Wikipedia
+- If this needs calculations, I should use the Python tool
+- I should always provide a comprehensive final answer
+NEXT STEPS: Proceed with the identified approach and use appropriate tools."""
+    return reasoning
 # ----------------------------------------------------------
+# Section 3: SYSTEM PROMPT
 # ----------------------------------------------------------
+SYSTEM_PROMPT = """You are an expert multimodal AI assistant with vision capabilities and access to various tools.
+**CORE CAPABILITIES:**
+1. **Vision Processing**: You can directly process and analyze images from URLs
+2. **Web Search**: Access current information via web search and Wikipedia
+3. **Computation**: Execute Python code for calculations and data processing
+4. **Research**: Search academic papers and retrieve similar examples
+**CRITICAL WORKFLOW:**
+1. **ANALYZE FIRST**: Always start by using the 'analyze_task_and_reason' tool to understand what you're being asked to do
+2. **PROCESS IMAGES DIRECTLY**: When you encounter image URLs, process them directly with your vision model - DO NOT use separate image tools
+3. **USE TOOLS STRATEGICALLY**: Based on your analysis, use appropriate tools (web search, Python, etc.)
+4. **VALIDATE PARAMETERS**: Always check that you're passing correct parameters to tools
+5. **SYNTHESIZE**: Combine all information into a comprehensive answer
+**IMAGE HANDLING:**
+- You have native vision capabilities - process image URLs directly
+- Look for image URLs in the task description
+- When you see an image URL, examine it carefully and describe what you see
+- Relate your visual observations to the question being asked
+**TOOL USAGE RULES:**
+- Always use 'analyze_task_and_reason' first to plan your approach
+- Use web_search for current events, factual information, or research
+- Use python_repl for calculations, data processing, or code execution
+- Use wiki_search for encyclopedic information
+- Use arxiv_search for academic/scientific papers
+- Use retrieve_examples for similar solved problems
+**OUTPUT FORMAT:**
+Always end your response with: FINAL ANSWER: [Your comprehensive answer]
+**PARAMETER VALIDATION:**
+- Check that search queries are meaningful and specific
+- Ensure Python code is safe and well-formed
+- Verify image URLs are accessible before processing
 """
 # ----------------------------------------------------------
 # Section 4: Factory Function for Agent Executor
     """
     print(f"Initializing agent with provider: {provider}")
+    # Step 1: Initialize LLM with vision capabilities
+    if provider == "groq":
+        # Use Groq's vision-capable model
+        try:
+            llm = ChatGroq(
+                model_name="llama-3.2-90b-vision-preview",  # Vision-capable model
+                temperature=0.1,
+                max_tokens=4000
+            )
+            print("Initialized Groq LLM with vision capabilities")
+        except Exception as e:
+            print(f"Error initializing Groq: {e}")
+            raise
+    else:
+        raise ValueError(f"Provider '{provider}' not supported in this version")
+    # Step 2: Build Retriever (if metadata exists)
     embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
     if FAISS_CACHE.exists():
+        with open(FAISS_CACHE, "rb") as f:
+            vector_store = pickle.load(f)
+            print("Loaded existing FAISS index")
     else:
         if JSONL_PATH.exists():
+            docs = []
+            with open(JSONL_PATH, "rt", encoding="utf-8") as f:
+                for line in f:
+                    rec = json.loads(line)
+                    docs.append(Document(
+                        page_content=f"Question: {rec['Question']}\n\nFinal answer: {rec['Final answer']}",
+                        metadata={"source": rec["task_id"]}
+                    ))
             vector_store = FAISS.from_documents(docs, embeddings)
+            with open(FAISS_CACHE, "wb") as f:
+                pickle.dump(vector_store, f)
+            print(f"Created new FAISS index with {len(docs)} documents")
         else:
+            # Create minimal vector store
             docs = [Document(page_content="Sample document", metadata={"source": "sample"})]
             vector_store = FAISS.from_documents(docs, embeddings)
+            print("Created minimal FAISS index")
     retriever = vector_store.as_retriever(search_kwargs={"k": RETRIEVER_K})
+    # Step 3: Create tools list
     tools_list = [
+        analyze_task_and_reason,
+        Tool(
+            name="web_search",
+            func=functools.partial(web_search_func, cache_func=cached_get),
+            description="Search the web for current information. Use specific, focused queries."
+        ),
+        Tool(
+            name="wiki_search",
+            func=functools.partial(wiki_search_func, cache_func=cached_get),
+            description="Search Wikipedia for encyclopedic information."
+        ),
+        Tool(
+            name="arxiv_search",
+            func=functools.partial(arxiv_search_func, cache_func=cached_get),
+            description="Search Arxiv for academic papers and research."
+        ),
         python_repl,
+        create_retriever_tool(
+            retriever=retriever,
+            name="retrieve_examples",
+            description="Retrieve similar solved examples from the knowledge base."
+        ),
     ]
+    llm_with_tools = llm.bind_tools(tools_list)
+    # Step 4: Define Graph Nodes
     def assistant_node(state: MessagesState):
+        """Main assistant node that processes user input and tool responses."""
+        messages = [SystemMessage(content=SYSTEM_PROMPT)] + state["messages"]
+        try:
+            result = llm_with_tools.invoke(messages)
+            return {"messages": [result]}
+        except Exception as e:
+            error_msg = f"LLM Error: {e}"
+            print(error_msg)
+            return {"messages": [AIMessage(content=f"I encountered an error: {error_msg}")]}
+    def tools_node_wrapper(state: MessagesState):
+        """Wrapper for tool execution with error handling."""
+        try:
+            tool_node = ToolNode(tools_list)
+            return tool_node.invoke(state)
+        except Exception as e:
+            error_msg = f"Tool execution error: {e}"
+            print(error_msg)
+            return {"messages": [AIMessage(content=error_msg)]}
+    # Step 5: Build Graph
     builder = StateGraph(MessagesState)
     builder.add_node("assistant", assistant_node)
+    builder.add_node("tools", tools_node_wrapper)
+    builder.add_edge(START, "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        tools_condition,
+        {"tools": "tools", "__end__": "__end__"}
+    )
     builder.add_edge("tools", "assistant")
     agent_executor = builder.compile()
+    print("Agent Executor created successfully with vision capabilities")
     return agent_executor