Resolved Dropdown issue And MCP Server
Browse files- README.md +152 -1
- app.py +202 -443
- config.py +5 -0
- core/chunker.py +1 -0
- mcp_server.py +1 -1
- requirements.txt +4 -2
- services/llm_service.py +229 -127
    	
        README.md
    CHANGED
    
    | @@ -6,8 +6,159 @@ colorTo: green | |
| 6 | 
             
            sdk: gradio
         | 
| 7 | 
             
            sdk_version: 5.32.0
         | 
| 8 | 
             
            app_file: app.py
         | 
|  | |
|  | |
|  | |
| 9 | 
             
            pinned: false
         | 
| 10 | 
             
            license: mit
         | 
| 11 | 
             
            ---
         | 
| 12 |  | 
| 13 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 6 | 
             
            sdk: gradio
         | 
| 7 | 
             
            sdk_version: 5.32.0
         | 
| 8 | 
             
            app_file: app.py
         | 
| 9 | 
            +
            tag : 
         | 
| 10 | 
            +
                -mcp-server-track
         | 
| 11 | 
            +
                -Agents-MCP-Hackathon
         | 
| 12 | 
             
            pinned: false
         | 
| 13 | 
             
            license: mit
         | 
| 14 | 
             
            ---
         | 
| 15 |  | 
| 16 | 
            +
            A powerful Model Context Protocol (MCP) server for intelligent content management with semantic search, summarization, and Q&A capabilities powered by **OpenAI, Mistral AI, and Anthropic Claude**.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            ## 🎯 Features
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            ### 🔧 MCP Tools Available
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            - **📄 Document Ingestion**: Upload and process documents (PDF, TXT, DOCX, images with OCR)
         | 
| 23 | 
            +
            - **🔍 Semantic Search**: Find relevant content using natural language queries
         | 
| 24 | 
            +
            - **📝 Summarization**: Generate summaries in different styles (concise, detailed, bullet points, executive)
         | 
| 25 | 
            +
            - **🏷️ Tag Generation**: Automatically generate relevant tags for content
         | 
| 26 | 
            +
            - **❓ Q&A System**: Ask questions about your documents using RAG (Retrieval-Augmented Generation)
         | 
| 27 | 
            +
            - **📊 Categorization**: Classify content into predefined or custom categories
         | 
| 28 | 
            +
            - **🔄 Batch Processing**: Process multiple documents at once
         | 
| 29 | 
            +
            - **📈 Analytics**: Get insights and statistics about your content
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            ### 🚀 Powered By
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            - **🧠 OpenAI GPT models** for powerful text generation and understanding
         | 
| 34 | 
            +
            - **🔥 Mistral AI** for efficient text processing and analysis
         | 
| 35 | 
            +
            - **🤖 Anthropic Claude** for advanced reasoning (available as a specific choice or fallback)
         | 
| 36 | 
            +
            - **🔗 Sentence Transformers** for semantic embeddings
         | 
| 37 | 
            +
            - **📚 FAISS** for fast similarity search
         | 
| 38 | 
            +
            - **👁️ Tesseract OCR** for image text extraction
         | 
| 39 | 
            +
            - **🎨 Gradio** for the user interface and MCP server functionality
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            **LLM Strategy**: The agent intelligently selects the best available LLM for most generative tasks when 'auto' model selection is used, prioritizing OpenAI, then Mistral, and finally Anthropic. Users can also specify a particular model family (e.g., 'gpt-', 'mistral-', 'claude-').
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            ## 📋 Complete File Structure
         | 
| 44 | 
            +
            intelligent-content-organizer/
         | 
| 45 | 
            +
            ├── app.py                     # Main Gradio app and MCP server
         | 
| 46 | 
            +
            ├── config.py                  # Configuration management
         | 
| 47 | 
            +
            ├── mcp_server.py              # mcp server tools
         | 
| 48 | 
            +
            ├── requirements.txt           # Dependencies
         | 
| 49 | 
            +
            ├── README.md                  # Documentation
         | 
| 50 | 
            +
            ├── .gitignore                # Git ignore rules
         | 
| 51 | 
            +
            ├── core/                     # Core processing logic
         | 
| 52 | 
            +
            │   ├── init.py
         | 
| 53 | 
            +
            │   ├── models.py             # Data models
         | 
| 54 | 
            +
            │   ├── document_parser.py    # Document processing
         | 
| 55 | 
            +
            │   ├── text_preprocessor.py  # Text cleaning and processing
         | 
| 56 | 
            +
            │   └── chunker.py           # Text chunking strategies
         | 
| 57 | 
            +
            ├── services/                 # Backend services
         | 
| 58 | 
            +
            │   ├── init.py
         | 
| 59 | 
            +
            │   ├── embedding_service.py  # Sentence transformers integration
         | 
| 60 | 
            +
            │   ├── llm_service.py       # Anthropic + Mistral integration
         | 
| 61 | 
            +
            │   ├── ocr_service.py       # Mistral OCR integration
         | 
| 62 | 
            +
            │   ├── vector_store_service.py # FAISS vector storage
         | 
| 63 | 
            +
            │   └── document_store_service.py # Document metadata storage
         | 
| 64 | 
            +
            └── mcp_tools/               # MCP tool definitions
         | 
| 65 | 
            +
            ├── init.py
         | 
| 66 | 
            +
            ├── ingestion_tool.py    # Document ingestion tool
         | 
| 67 | 
            +
            ├── search_tool.py       # Semantic search tool
         | 
| 68 | 
            +
            ├── generative_tool.py   # AI generation tool
         | 
| 69 | 
            +
            └── utils.py            # Utility functions
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            ## 🎯 Key Features Implemented
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            1. **Full MCP Server**: Complete implementation with all tools exposed
         | 
| 74 | 
            +
            2. **Multi-Modal Processing**: PDF, TXT, DOCX, and image processing with OCR
         | 
| 75 | 
            +
            3. **Advanced Search**: Semantic search with FAISS, filtering, and multi-query support
         | 
| 76 | 
            +
            4. **AI-Powered Features**: Summarization, tagging, categorization, Q&A with RAG
         | 
| 77 | 
            +
            5. **Production Ready**: Error handling, logging, caching, rate limiting
         | 
| 78 | 
            +
            6. **Gradio UI**: Beautiful web interface for testing and direct use
         | 
| 79 | 
            +
            7. **Anthropic + Mistral**: Dual LLM support with fallbacks
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            ## 🎥 Demo Video
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            [📹 Watch the demo video](https://your-demo-video-url.com)
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            *The demo shows the MCP server in action, demonstrating document ingestion, semantic search, and Q&A capabilities, utilizing the configured LLM providers.*
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            ## 🛠️ Installation
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            ### Prerequisites
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            - Python 3.9+
         | 
| 92 | 
            +
            - API keys for OpenAI and Mistral AI. An Anthropic API key.
         | 
| 93 | 
            +
             | 
| 94 | 
            +
            - **MCP Tools Reference** (Tool parameters like model allow specifying "auto" or a specific model family like "gpt-", "mistral-", "claude-")
         | 
| 95 | 
            +
             | 
| 96 | 
            +
            - **ingest_document**
         | 
| 97 | 
            +
              - Process and index a document for searching.
         | 
| 98 | 
            +
              - **Parameters:**
         | 
| 99 | 
            +
                - `file_path` (string): Path to the document file (e.g., an uploaded file path).
         | 
| 100 | 
            +
                - `file_type` (string, optional): File type/extension (e.g., ".pdf", ".txt"). If not provided, it's inferred from file_path.
         | 
| 101 | 
            +
              - **Returns:**
         | 
| 102 | 
            +
                - `success` (boolean): Whether the operation succeeded.
         | 
| 103 | 
            +
                - `document_id` (string): Unique identifier for the processed document.
         | 
| 104 | 
            +
                - `chunks_created` (integer): Number of text chunks created.
         | 
| 105 | 
            +
                - `message` (string): Human-readable result message.
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            - **semantic_search**
         | 
| 108 | 
            +
              - Search through indexed content using natural language.
         | 
| 109 | 
            +
              - **Parameters:**
         | 
| 110 | 
            +
                - `query` (string): Search query.
         | 
| 111 | 
            +
                - `top_k` (integer, optional): Number of results to return (default: 5).
         | 
| 112 | 
            +
                - `filters` (object, optional): Search filters (e.g., {"document_id": "some_id"}).
         | 
| 113 | 
            +
              - **Returns:**
         | 
| 114 | 
            +
                - `success` (boolean): Whether the search succeeded.
         | 
| 115 | 
            +
                - `results` (array of objects): Array of search results, each with content and score.
         | 
| 116 | 
            +
                - `total_results` (integer): Number of results found.
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            - **summarize_content**
         | 
| 119 | 
            +
              - Generate a summary of provided content.
         | 
| 120 | 
            +
              - **Parameters:**
         | 
| 121 | 
            +
                - `content` (string, optional): Text content to summarize.
         | 
| 122 | 
            +
                - `document_id` (string, optional): ID of document to summarize. (Either content or document_id must be provided).
         | 
| 123 | 
            +
                - `style` (string, optional): Summary style: "concise", "detailed", "bullet_points", "executive" (default: "concise").
         | 
| 124 | 
            +
                - `model` (string, optional): Specific LLM to use (e.g., "gpt-4o-mini", "mistral-large-latest", "auto"). Default: "auto".
         | 
| 125 | 
            +
              - **Returns:**
         | 
| 126 | 
            +
                - `success` (boolean): Whether summarization succeeded.
         | 
| 127 | 
            +
                - `summary` (string): Generated summary.
         | 
| 128 | 
            +
                - `original_length` (integer): Character length of original content.
         | 
| 129 | 
            +
                - `summary_length` (integer): Character length of summary.
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            - **generate_tags**
         | 
| 132 | 
            +
              - Generate relevant tags for content.
         | 
| 133 | 
            +
              - **Parameters:**
         | 
| 134 | 
            +
                - `content` (string, optional): Text content to tag.
         | 
| 135 | 
            +
                - `document_id` (string, optional): ID of document to tag. (Either content or document_id must be provided).
         | 
| 136 | 
            +
                - `max_tags` (integer, optional): Maximum number of tags (default: 5).
         | 
| 137 | 
            +
                - `model` (string, optional): Specific LLM to use. Default: "auto".
         | 
| 138 | 
            +
              - **Returns:**
         | 
| 139 | 
            +
                - `success` (boolean): Whether tag generation succeeded.
         | 
| 140 | 
            +
                - `tags` (array of strings): Array of generated tags.
         | 
| 141 | 
            +
             | 
| 142 | 
            +
            - **answer_question**
         | 
| 143 | 
            +
              - Answer questions using RAG over your indexed content.
         | 
| 144 | 
            +
              - **Parameters:**
         | 
| 145 | 
            +
                - `question` (string): Question to answer.
         | 
| 146 | 
            +
                - `context_filter` (object, optional): Filters for context retrieval (e.g., {"document_id": "some_id"}).
         | 
| 147 | 
            +
                - `model` (string, optional): Specific LLM to use. Default: "auto".
         | 
| 148 | 
            +
              - **Returns:**
         | 
| 149 | 
            +
                - `success` (boolean): Whether question answering succeeded.
         | 
| 150 | 
            +
                - `answer` (string): Generated answer.
         | 
| 151 | 
            +
                - `sources` (array of objects): Source document chunks used for context, each with document_id, chunk_id, and content.
         | 
| 152 | 
            +
                - `confidence` (string, optional): Confidence level in the answer (LLM-dependent, might not always be present).
         | 
| 153 | 
            +
             | 
| 154 | 
            +
            📊 Performance
         | 
| 155 | 
            +
             | 
| 156 | 
            +
            Embedding Generation: ~100-500ms per document chunk
         | 
| 157 | 
            +
            Search: <50ms for most queries
         | 
| 158 | 
            +
            Summarization: 1-5s depending on content length
         | 
| 159 | 
            +
            Memory Usage: ~200-500MB base + ~1MB per 1000 document chunks
         | 
| 160 | 
            +
            Supported File Types: PDF, TXT, DOCX, PNG, JPG, JPEG, BMP, TIFF
         | 
| 161 | 
            +
             | 
| 162 | 
            +
             | 
| 163 | 
            +
             | 
| 164 | 
            +
             | 
    	
        app.py
    CHANGED
    
    | @@ -33,7 +33,6 @@ class ContentOrganizerMCPServer: | |
| 33 | 
             
                def __init__(self):
         | 
| 34 | 
             
                    # Initialize services
         | 
| 35 | 
             
                    logger.info("Initializing Content Organizer MCP Server...")
         | 
| 36 | 
            -
                    
         | 
| 37 | 
             
                    self.vector_store = VectorStoreService()
         | 
| 38 | 
             
                    self.document_store = DocumentStoreService()
         | 
| 39 | 
             
                    self.embedding_service = EmbeddingService()
         | 
| @@ -56,13 +55,12 @@ class ContentOrganizerMCPServer: | |
| 56 | 
             
                        llm_service=self.llm_service,
         | 
| 57 | 
             
                        search_tool=self.search_tool
         | 
| 58 | 
             
                    )
         | 
| 59 | 
            -
             | 
| 60 | 
             
                    # Track processing status
         | 
| 61 | 
             
                    self.processing_status = {}
         | 
| 62 |  | 
| 63 | 
             
                    # Document cache for quick access
         | 
| 64 | 
             
                    self.document_cache = {}
         | 
| 65 | 
            -
                    
         | 
| 66 | 
             
                    logger.info("Content Organizer MCP Server initialized successfully!")
         | 
| 67 |  | 
| 68 | 
             
                def run_async(self, coro):
         | 
| @@ -72,7 +70,6 @@ class ContentOrganizerMCPServer: | |
| 72 | 
             
                    except RuntimeError:
         | 
| 73 | 
             
                        loop = asyncio.new_event_loop()
         | 
| 74 | 
             
                        asyncio.set_event_loop(loop)
         | 
| 75 | 
            -
                    
         | 
| 76 | 
             
                    if loop.is_running():
         | 
| 77 | 
             
                        # If loop is already running, create a task
         | 
| 78 | 
             
                        import concurrent.futures
         | 
| @@ -87,31 +84,22 @@ class ContentOrganizerMCPServer: | |
| 87 | 
             
                    try:
         | 
| 88 | 
             
                        task_id = str(uuid.uuid4())
         | 
| 89 | 
             
                        self.processing_status[task_id] = {"status": "processing", "progress": 0}
         | 
| 90 | 
            -
                        
         | 
| 91 | 
             
                        result = await self.ingestion_tool.process_document(file_path, file_type, task_id)
         | 
| 92 | 
            -
                        
         | 
| 93 | 
             
                        if result.get("success"):
         | 
| 94 | 
             
                            self.processing_status[task_id] = {"status": "completed", "progress": 100}
         | 
| 95 | 
            -
                            # Update document cache
         | 
| 96 | 
             
                            doc_id = result.get("document_id")
         | 
| 97 | 
             
                            if doc_id:
         | 
| 98 | 
             
                                doc = await self.document_store.get_document(doc_id)
         | 
| 99 | 
             
                                if doc:
         | 
| 100 | 
             
                                    self.document_cache[doc_id] = doc
         | 
| 101 | 
            -
                            
         | 
| 102 | 
             
                            return result
         | 
| 103 | 
             
                        else:
         | 
| 104 | 
             
                            self.processing_status[task_id] = {"status": "failed", "error": result.get("error")}
         | 
| 105 | 
             
                            return result
         | 
| 106 | 
            -
                            
         | 
| 107 | 
             
                    except Exception as e:
         | 
| 108 | 
             
                        logger.error(f"Document ingestion failed: {str(e)}")
         | 
| 109 | 
            -
                        return {
         | 
| 110 | 
            -
             | 
| 111 | 
            -
                            "error": str(e),
         | 
| 112 | 
            -
                            "message": "Failed to process document"
         | 
| 113 | 
            -
                        }
         | 
| 114 | 
            -
                
         | 
| 115 | 
             
                async def get_document_content_async(self, document_id: str) -> Optional[str]:
         | 
| 116 | 
             
                    """Get document content by ID"""
         | 
| 117 | 
             
                    try:
         | 
| @@ -124,7 +112,6 @@ class ContentOrganizerMCPServer: | |
| 124 | 
             
                        if doc:
         | 
| 125 | 
             
                            self.document_cache[document_id] = doc
         | 
| 126 | 
             
                            return doc.content
         | 
| 127 | 
            -
                        
         | 
| 128 | 
             
                        return None
         | 
| 129 | 
             
                    except Exception as e:
         | 
| 130 | 
             
                        logger.error(f"Error getting document content: {str(e)}")
         | 
| @@ -134,149 +121,78 @@ class ContentOrganizerMCPServer: | |
| 134 | 
             
                    """MCP Tool: Perform semantic search"""
         | 
| 135 | 
             
                    try:
         | 
| 136 | 
             
                        results = await self.search_tool.search(query, top_k, filters)
         | 
| 137 | 
            -
                        return {
         | 
| 138 | 
            -
                            "success": True,
         | 
| 139 | 
            -
                            "query": query,
         | 
| 140 | 
            -
                            "results": [result.to_dict() for result in results],
         | 
| 141 | 
            -
                            "total_results": len(results)
         | 
| 142 | 
            -
                        }
         | 
| 143 | 
             
                    except Exception as e:
         | 
| 144 | 
             
                        logger.error(f"Semantic search failed: {str(e)}")
         | 
| 145 | 
            -
                        return {
         | 
| 146 | 
            -
                            "success": False,
         | 
| 147 | 
            -
                            "error": str(e),
         | 
| 148 | 
            -
                            "query": query,
         | 
| 149 | 
            -
                            "results": []
         | 
| 150 | 
            -
                        }
         | 
| 151 |  | 
| 152 | 
             
                async def summarize_content_async(self, content: str = None, document_id: str = None, style: str = "concise") -> Dict[str, Any]:
         | 
| 153 | 
            -
                    """MCP Tool: Summarize content or document"""
         | 
| 154 | 
             
                    try:
         | 
| 155 | 
            -
                        # If document_id provided, get content from document
         | 
| 156 | 
             
                        if document_id and document_id != "none":
         | 
| 157 | 
             
                            content = await self.get_document_content_async(document_id)
         | 
| 158 | 
             
                            if not content:
         | 
| 159 | 
             
                                return {"success": False, "error": f"Document {document_id} not found"}
         | 
| 160 | 
            -
                        
         | 
| 161 | 
             
                        if not content or not content.strip():
         | 
| 162 | 
             
                            return {"success": False, "error": "No content provided for summarization"}
         | 
| 163 | 
            -
                        
         | 
| 164 | 
            -
                        # Truncate content if too long (for API limits)
         | 
| 165 | 
             
                        max_content_length = 4000
         | 
| 166 | 
             
                        if len(content) > max_content_length:
         | 
| 167 | 
             
                            content = content[:max_content_length] + "..."
         | 
| 168 | 
            -
                        
         | 
| 169 | 
             
                        summary = await self.generative_tool.summarize(content, style)
         | 
| 170 | 
            -
                        return {
         | 
| 171 | 
            -
                            "success": True,
         | 
| 172 | 
            -
                            "summary": summary,
         | 
| 173 | 
            -
                            "original_length": len(content),
         | 
| 174 | 
            -
                            "summary_length": len(summary),
         | 
| 175 | 
            -
                            "style": style,
         | 
| 176 | 
            -
                            "document_id": document_id
         | 
| 177 | 
            -
                        }
         | 
| 178 | 
             
                    except Exception as e:
         | 
| 179 | 
             
                        logger.error(f"Summarization failed: {str(e)}")
         | 
| 180 | 
            -
                        return {
         | 
| 181 | 
            -
                            "success": False,
         | 
| 182 | 
            -
                            "error": str(e)
         | 
| 183 | 
            -
                        }
         | 
| 184 |  | 
| 185 | 
             
                async def generate_tags_async(self, content: str = None, document_id: str = None, max_tags: int = 5) -> Dict[str, Any]:
         | 
| 186 | 
             
                    """MCP Tool: Generate tags for content"""
         | 
| 187 | 
             
                    try:
         | 
| 188 | 
            -
                        # If document_id provided, get content from document
         | 
| 189 | 
             
                        if document_id and document_id != "none":
         | 
| 190 | 
             
                            content = await self.get_document_content_async(document_id)
         | 
| 191 | 
             
                            if not content:
         | 
| 192 | 
             
                                return {"success": False, "error": f"Document {document_id} not found"}
         | 
| 193 | 
            -
                        
         | 
| 194 | 
             
                        if not content or not content.strip():
         | 
| 195 | 
             
                            return {"success": False, "error": "No content provided for tag generation"}
         | 
| 196 | 
            -
                        
         | 
| 197 | 
             
                        tags = await self.generative_tool.generate_tags(content, max_tags)
         | 
| 198 | 
            -
                        
         | 
| 199 | 
            -
                        # Update document tags if document_id provided
         | 
| 200 | 
             
                        if document_id and document_id != "none" and tags:
         | 
| 201 | 
             
                            await self.document_store.update_document_metadata(document_id, {"tags": tags})
         | 
| 202 | 
            -
                        
         | 
| 203 | 
            -
                        return {
         | 
| 204 | 
            -
                            "success": True,
         | 
| 205 | 
            -
                            "tags": tags,
         | 
| 206 | 
            -
                            "content_length": len(content),
         | 
| 207 | 
            -
                            "document_id": document_id
         | 
| 208 | 
            -
                        }
         | 
| 209 | 
             
                    except Exception as e:
         | 
| 210 | 
             
                        logger.error(f"Tag generation failed: {str(e)}")
         | 
| 211 | 
            -
                        return {
         | 
| 212 | 
            -
                            "success": False,
         | 
| 213 | 
            -
                            "error": str(e)
         | 
| 214 | 
            -
                        }
         | 
| 215 |  | 
| 216 | 
             
                async def answer_question_async(self, question: str, context_filter: Optional[Dict] = None) -> Dict[str, Any]:
         | 
| 217 | 
            -
                    """MCP Tool: Answer questions using RAG"""
         | 
| 218 | 
             
                    try:
         | 
| 219 | 
            -
                        # Search for relevant context
         | 
| 220 | 
             
                        search_results = await self.search_tool.search(question, top_k=5, filters=context_filter)
         | 
| 221 | 
            -
                        
         | 
| 222 | 
             
                        if not search_results:
         | 
| 223 | 
            -
                            return {
         | 
| 224 | 
            -
                                "success": False,
         | 
| 225 | 
            -
                                "error": "No relevant context found in your documents. Please make sure you have uploaded relevant documents.",
         | 
| 226 | 
            -
                                "question": question
         | 
| 227 | 
            -
                            }
         | 
| 228 | 
            -
                        
         | 
| 229 | 
            -
                        # Generate answer using context
         | 
| 230 | 
             
                        answer = await self.generative_tool.answer_question(question, search_results)
         | 
| 231 | 
            -
                        
         | 
| 232 | 
            -
                        return {
         | 
| 233 | 
            -
                            "success": True,
         | 
| 234 | 
            -
                            "question": question,
         | 
| 235 | 
            -
                            "answer": answer,
         | 
| 236 | 
            -
                            "sources": [result.to_dict() for result in search_results],
         | 
| 237 | 
            -
                            "confidence": "high" if len(search_results) >= 3 else "medium"
         | 
| 238 | 
            -
                        }
         | 
| 239 | 
             
                    except Exception as e:
         | 
| 240 | 
             
                        logger.error(f"Question answering failed: {str(e)}")
         | 
| 241 | 
            -
                        return {
         | 
| 242 | 
            -
                            "success": False,
         | 
| 243 | 
            -
                            "error": str(e),
         | 
| 244 | 
            -
                            "question": question
         | 
| 245 | 
            -
                        }
         | 
| 246 |  | 
| 247 | 
             
                def list_documents_sync(self, limit: int = 100, offset: int = 0) -> Dict[str, Any]:
         | 
| 248 | 
            -
                    """List stored documents"""
         | 
| 249 | 
             
                    try:
         | 
| 250 | 
             
                        documents = self.run_async(self.document_store.list_documents(limit, offset))
         | 
| 251 | 
            -
                        return {
         | 
| 252 | 
            -
                            "success": True,
         | 
| 253 | 
            -
                            "documents": [doc.to_dict() for doc in documents],
         | 
| 254 | 
            -
                            "total": len(documents)
         | 
| 255 | 
            -
                        }
         | 
| 256 | 
             
                    except Exception as e:
         | 
| 257 | 
            -
                        return {
         | 
| 258 | 
            -
                            "success": False,
         | 
| 259 | 
            -
                            "error": str(e)
         | 
| 260 | 
            -
                        }
         | 
| 261 |  | 
| 262 | 
            -
            # Initialize the MCP server
         | 
| 263 | 
             
            mcp_server = ContentOrganizerMCPServer()
         | 
| 264 |  | 
| 265 | 
            -
            # Helper functions
         | 
| 266 | 
             
            def get_document_list():
         | 
| 267 | 
            -
                """Get list of documents for display"""
         | 
| 268 | 
             
                try:
         | 
| 269 | 
             
                    result = mcp_server.list_documents_sync(limit=100)
         | 
| 270 | 
             
                    if result["success"]:
         | 
| 271 | 
             
                        if result["documents"]:
         | 
| 272 | 
            -
                             | 
| 273 | 
            -
                            for i,  | 
| 274 | 
            -
                                 | 
| 275 | 
            -
                                 | 
| 276 | 
            -
                                if  | 
| 277 | 
            -
                                     | 
| 278 | 
            -
                                 | 
| 279 | 
            -
                            return  | 
| 280 | 
             
                        else:
         | 
| 281 | 
             
                            return "No documents in library yet. Upload some documents to get started!"
         | 
| 282 | 
             
                    else:
         | 
| @@ -285,17 +201,10 @@ def get_document_list(): | |
| 285 | 
             
                    return f"Error: {str(e)}"
         | 
| 286 |  | 
| 287 | 
             
            def get_document_choices():
         | 
| 288 | 
            -
                """Get document choices for dropdown"""
         | 
| 289 | 
             
                try:
         | 
| 290 | 
             
                    result = mcp_server.list_documents_sync(limit=100)
         | 
| 291 | 
             
                    if result["success"] and result["documents"]:
         | 
| 292 | 
            -
                        choices = []
         | 
| 293 | 
            -
                        for doc in result["documents"]:
         | 
| 294 | 
            -
                            # Create label with filename and shortened ID
         | 
| 295 | 
            -
                            choice_label = f"{doc['filename']} ({doc['id'][:8]}...)"
         | 
| 296 | 
            -
                            # Use full document ID as the value
         | 
| 297 | 
            -
                            choices.append((choice_label, doc['id']))
         | 
| 298 | 
            -
                        
         | 
| 299 | 
             
                        logger.info(f"Generated {len(choices)} document choices")
         | 
| 300 | 
             
                        return choices
         | 
| 301 | 
             
                    return []
         | 
| @@ -303,78 +212,82 @@ def get_document_choices(): | |
| 303 | 
             
                    logger.error(f"Error getting document choices: {str(e)}")
         | 
| 304 | 
             
                    return []
         | 
| 305 |  | 
| 306 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 307 | 
             
            def upload_and_process_file(file):
         | 
| 308 | 
            -
                """Gradio interface for file upload"""
         | 
| 309 | 
             
                if file is None:
         | 
| 310 | 
            -
                     | 
| 311 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 312 | 
             
                try:
         | 
| 313 | 
            -
                    # Get file path
         | 
| 314 | 
             
                    file_path = file.name if hasattr(file, 'name') else str(file)
         | 
| 315 | 
            -
                    file_type = Path(file_path).suffix.lower()
         | 
| 316 | 
            -
                    
         | 
| 317 | 
            -
                    logger.info(f"Processing file: {file_path}")
         | 
| 318 | 
            -
                    
         | 
| 319 | 
            -
                    # Process document
         | 
| 320 | 
             
                    result = mcp_server.run_async(mcp_server.ingest_document_async(file_path, file_type))
         | 
| 321 |  | 
|  | |
|  | |
|  | |
| 322 | 
             
                    if result["success"]:
         | 
| 323 | 
            -
                        # Get updated document list and choices
         | 
| 324 | 
            -
                        doc_list = get_document_list()
         | 
| 325 | 
            -
                        doc_choices = get_document_choices()
         | 
| 326 | 
            -
                        
         | 
| 327 | 
             
                        return (
         | 
| 328 | 
            -
                            f"✅ Success: {result['message']}\nDocument ID: {result['document_id']}\nChunks created: {result['chunks_created']}", | 
| 329 | 
             
                            result["document_id"],
         | 
| 330 | 
            -
                             | 
| 331 | 
            -
                            gr.update(choices= | 
| 332 | 
            -
                            gr.update(choices= | 
| 333 | 
            -
                            gr.update(choices= | 
| 334 | 
            -
                            gr.update(choices=doc_choices)
         | 
| 335 | 
             
                        )
         | 
| 336 | 
             
                    else:
         | 
| 337 | 
             
                        return (
         | 
| 338 | 
            -
                            f"❌ Error: {result.get('error', 'Unknown error')}", 
         | 
| 339 | 
            -
                             | 
| 340 | 
            -
                             | 
| 341 | 
            -
                            gr.update(choices= | 
| 342 | 
            -
                            gr.update(choices= | 
| 343 | 
            -
                            gr.update(choices=get_document_choices()),
         | 
| 344 | 
            -
                            gr.update(choices=get_document_choices())
         | 
| 345 | 
             
                        )
         | 
| 346 | 
             
                except Exception as e:
         | 
| 347 | 
             
                    logger.error(f"Error processing file: {str(e)}")
         | 
|  | |
|  | |
| 348 | 
             
                    return (
         | 
| 349 | 
            -
                        f"❌ Error: {str(e)}", 
         | 
| 350 | 
            -
                         | 
| 351 | 
            -
                         | 
| 352 | 
            -
                        gr.update(choices= | 
| 353 | 
            -
                        gr.update(choices= | 
| 354 | 
            -
                        gr.update(choices=get_document_choices()),
         | 
| 355 | 
            -
                        gr.update(choices=get_document_choices())
         | 
| 356 | 
             
                    )
         | 
| 357 |  | 
| 358 | 
             
            def perform_search(query, top_k):
         | 
| 359 | 
            -
                """Gradio interface for search"""
         | 
| 360 | 
             
                if not query.strip():
         | 
| 361 | 
             
                    return "Please enter a search query"
         | 
| 362 | 
            -
                
         | 
| 363 | 
             
                try:
         | 
| 364 | 
             
                    result = mcp_server.run_async(mcp_server.semantic_search_async(query, int(top_k)))
         | 
| 365 | 
            -
                    
         | 
| 366 | 
             
                    if result["success"]:
         | 
| 367 | 
             
                        if result["results"]:
         | 
| 368 | 
            -
                             | 
| 369 | 
            -
                            for i,  | 
| 370 | 
            -
                                 | 
| 371 | 
            -
                                 | 
| 372 | 
            -
                                 | 
| 373 | 
            -
                                if 'document_filename' in  | 
| 374 | 
            -
                                     | 
| 375 | 
            -
                                 | 
| 376 | 
            -
                                 | 
| 377 | 
            -
                            return  | 
| 378 | 
             
                        else:
         | 
| 379 | 
             
                            return f"No results found for: '{query}'\n\nMake sure you have uploaded relevant documents first."
         | 
| 380 | 
             
                    else:
         | 
| @@ -384,19 +297,10 @@ def perform_search(query, top_k): | |
| 384 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 385 |  | 
| 386 | 
             
            def summarize_document(doc_choice, custom_text, style):
         | 
| 387 | 
            -
                """Gradio interface for summarization"""
         | 
| 388 | 
             
                try:
         | 
| 389 | 
            -
                    # Debug logging
         | 
| 390 | 
             
                    logger.info(f"Summarize called with doc_choice: {doc_choice}, type: {type(doc_choice)}")
         | 
|  | |
| 391 |  | 
| 392 | 
            -
                    # Get document ID from dropdown choice
         | 
| 393 | 
            -
                    document_id = None
         | 
| 394 | 
            -
                    if doc_choice and doc_choice != "none" and doc_choice != "":
         | 
| 395 | 
            -
                        # When Gradio dropdown returns a choice, it returns the value part of the (label, value) tuple
         | 
| 396 | 
            -
                        document_id = doc_choice
         | 
| 397 | 
            -
                        logger.info(f"Using document ID: {document_id}")
         | 
| 398 | 
            -
                    
         | 
| 399 | 
            -
                    # Use custom text if provided, otherwise use document
         | 
| 400 | 
             
                    if custom_text and custom_text.strip():
         | 
| 401 | 
             
                        logger.info("Using custom text for summarization")
         | 
| 402 | 
             
                        result = mcp_server.run_async(mcp_server.summarize_content_async(content=custom_text, style=style))
         | 
| @@ -407,14 +311,14 @@ def summarize_document(doc_choice, custom_text, style): | |
| 407 | 
             
                        return "Please select a document from the dropdown or enter text to summarize"
         | 
| 408 |  | 
| 409 | 
             
                    if result["success"]:
         | 
| 410 | 
            -
                         | 
| 411 | 
            -
                         | 
| 412 | 
            -
                         | 
| 413 | 
            -
                         | 
| 414 | 
            -
                         | 
| 415 | 
             
                        if result.get('document_id'):
         | 
| 416 | 
            -
                             | 
| 417 | 
            -
                        return  | 
| 418 | 
             
                    else:
         | 
| 419 | 
             
                        return f"❌ Summarization failed: {result['error']}"
         | 
| 420 | 
             
                except Exception as e:
         | 
| @@ -422,19 +326,10 @@ def summarize_document(doc_choice, custom_text, style): | |
| 422 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 423 |  | 
| 424 | 
             
            def generate_tags_for_document(doc_choice, custom_text, max_tags):
         | 
| 425 | 
            -
                """Gradio interface for tag generation"""
         | 
| 426 | 
             
                try:
         | 
| 427 | 
            -
                    # Debug logging
         | 
| 428 | 
             
                    logger.info(f"Generate tags called with doc_choice: {doc_choice}, type: {type(doc_choice)}")
         | 
| 429 | 
            -
                    
         | 
| 430 | 
            -
             | 
| 431 | 
            -
                    document_id = None
         | 
| 432 | 
            -
                    if doc_choice and doc_choice != "none" and doc_choice != "":
         | 
| 433 | 
            -
                        # When Gradio dropdown returns a choice, it returns the value part of the (label, value) tuple
         | 
| 434 | 
            -
                        document_id = doc_choice
         | 
| 435 | 
            -
                        logger.info(f"Using document ID: {document_id}")
         | 
| 436 | 
            -
                    
         | 
| 437 | 
            -
                    # Use custom text if provided, otherwise use document
         | 
| 438 | 
             
                    if custom_text and custom_text.strip():
         | 
| 439 | 
             
                        logger.info("Using custom text for tag generation")
         | 
| 440 | 
             
                        result = mcp_server.run_async(mcp_server.generate_tags_async(content=custom_text, max_tags=int(max_tags)))
         | 
| @@ -446,14 +341,14 @@ def generate_tags_for_document(doc_choice, custom_text, max_tags): | |
| 446 |  | 
| 447 | 
             
                    if result["success"]:
         | 
| 448 | 
             
                        tags_str = ", ".join(result["tags"])
         | 
| 449 | 
            -
                         | 
| 450 | 
            -
                         | 
| 451 | 
            -
                         | 
| 452 | 
            -
                         | 
| 453 | 
             
                        if result.get('document_id'):
         | 
| 454 | 
            -
                             | 
| 455 | 
            -
                             | 
| 456 | 
            -
                        return  | 
| 457 | 
             
                    else:
         | 
| 458 | 
             
                        return f"❌ Tag generation failed: {result['error']}"
         | 
| 459 | 
             
                except Exception as e:
         | 
| @@ -461,310 +356,174 @@ def generate_tags_for_document(doc_choice, custom_text, max_tags): | |
| 461 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 462 |  | 
| 463 | 
             
            def ask_question(question):
         | 
| 464 | 
            -
                """Gradio interface for Q&A"""
         | 
| 465 | 
             
                if not question.strip():
         | 
| 466 | 
             
                    return "Please enter a question"
         | 
| 467 | 
            -
                
         | 
| 468 | 
             
                try:
         | 
| 469 | 
             
                    result = mcp_server.run_async(mcp_server.answer_question_async(question))
         | 
| 470 | 
            -
                    
         | 
| 471 | 
             
                    if result["success"]:
         | 
| 472 | 
            -
                         | 
| 473 | 
            -
                         | 
| 474 | 
            -
                         | 
| 475 | 
            -
                         | 
| 476 | 
            -
                        for i,  | 
| 477 | 
            -
                            filename =  | 
| 478 | 
            -
                             | 
| 479 | 
            -
                             | 
| 480 | 
            -
                             | 
| 481 | 
            -
                        return  | 
| 482 | 
             
                    else:
         | 
| 483 | 
             
                        return f"❌ {result.get('error', 'Failed to answer question')}"
         | 
| 484 | 
             
                except Exception as e:
         | 
| 485 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 486 |  | 
| 487 | 
             
            def delete_document_from_library(document_id):
         | 
| 488 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 489 | 
             
                try:
         | 
| 490 | 
            -
                     | 
| 491 | 
            -
                     | 
| 492 | 
            -
             | 
| 493 | 
            -
             | 
|  | |
|  | |
| 494 | 
             
                    else:
         | 
| 495 | 
            -
                        msg  | 
| 496 | 
            -
                     | 
| 497 | 
            -
                     | 
| 498 | 
            -
             | 
| 499 | 
            -
                     | 
| 500 | 
            -
             | 
| 501 | 
            -
                    return f"❌ Error: {str(e)}", get_document_list(), gr.update(choices=get_document_choices()), gr.update(choices=get_document_choices()), gr.update(choices=get_document_choices()), gr.update(choices=get_document_choices())
         | 
| 502 |  | 
| 503 | 
            -
            def refresh_library():
         | 
| 504 | 
            -
                """Refresh the document library display"""
         | 
| 505 | 
            -
                doc_list = get_document_list()
         | 
| 506 | 
            -
                doc_choices = get_document_choices()
         | 
| 507 | 
            -
                return doc_list, gr.update(choices=doc_choices), gr.update(choices=doc_choices), gr.update(choices=doc_choices), gr.update(choices=doc_choices)
         | 
| 508 |  | 
| 509 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 510 | 
             
            def create_gradio_interface():
         | 
| 511 | 
             
                with gr.Blocks(title="🧠 Intelligent Content Organizer MCP Agent", theme=gr.themes.Soft()) as interface:
         | 
| 512 | 
             
                    gr.Markdown("""
         | 
| 513 | 
             
                    # 🧠 Intelligent Content Organizer MCP Agent
         | 
| 514 | 
            -
             | 
| 515 | 
             
                    A powerful MCP (Model Context Protocol) server for intelligent content management with semantic search, 
         | 
| 516 | 
            -
                    summarization, and Q&A capabilities | 
| 517 | 
            -
             | 
| 518 | 
             
                    ## 🚀 Quick Start:
         | 
| 519 | 
            -
                    1. ** | 
| 520 | 
            -
                    2. ** | 
| 521 | 
            -
                    3. ** | 
| 522 | 
            -
                    4. ** | 
|  | |
|  | |
|  | |
|  | |
| 523 | 
             
                    """)
         | 
| 524 |  | 
| 525 | 
            -
                    # State components for dropdowns
         | 
| 526 | 
            -
                    with gr.Row(visible=False):
         | 
| 527 | 
            -
                        doc_dropdown_sum = gr.Dropdown(label="Hidden", choices=get_document_choices())
         | 
| 528 | 
            -
                        doc_dropdown_tag = gr.Dropdown(label="Hidden", choices=get_document_choices())
         | 
| 529 | 
            -
                        delete_doc_dropdown = gr.Dropdown(label="Hidden", choices=get_document_choices())
         | 
| 530 | 
            -
             | 
| 531 | 
             
                    with gr.Tabs():
         | 
| 532 | 
            -
                        # 📚 Document Library Tab
         | 
| 533 | 
             
                        with gr.Tab("📚 Document Library"):
         | 
| 534 | 
             
                            with gr.Row():
         | 
| 535 | 
             
                                with gr.Column():
         | 
| 536 | 
             
                                    gr.Markdown("### Your Document Collection")
         | 
| 537 | 
            -
                                     | 
| 538 | 
            -
             | 
| 539 | 
            -
             | 
| 540 | 
            -
                                        lines=20,
         | 
| 541 | 
            -
                                        interactive=False
         | 
| 542 | 
            -
                                    )
         | 
| 543 | 
            -
                                    refresh_btn = gr.Button("🔄 Refresh Library", variant="secondary")
         | 
| 544 | 
            -
             | 
| 545 | 
            -
                                    delete_doc_dropdown_visible = gr.Dropdown(
         | 
| 546 | 
            -
                                        label="Select Document to Delete",
         | 
| 547 | 
            -
                                        choices=get_document_choices(),
         | 
| 548 | 
            -
                                        value=None,
         | 
| 549 | 
            -
                                        interactive=True,
         | 
| 550 | 
            -
                                        allow_custom_value=False
         | 
| 551 | 
            -
                                    )
         | 
| 552 | 
             
                                    delete_btn = gr.Button("🗑️ Delete Selected Document", variant="stop")
         | 
| 553 | 
            -
                                     | 
| 554 | 
            -
             | 
| 555 | 
            -
                            refresh_btn.click(
         | 
| 556 | 
            -
                                fn=refresh_library,
         | 
| 557 | 
            -
                                outputs=[document_list, delete_doc_dropdown_visible, doc_dropdown_sum, doc_dropdown_tag, delete_doc_dropdown]
         | 
| 558 | 
            -
                            )
         | 
| 559 | 
            -
             | 
| 560 | 
            -
                            delete_btn.click(
         | 
| 561 | 
            -
                                delete_document_from_library,
         | 
| 562 | 
            -
                                inputs=[delete_doc_dropdown_visible],
         | 
| 563 | 
            -
                                outputs=[delete_output, document_list, delete_doc_dropdown_visible, doc_dropdown_sum, doc_dropdown_tag, delete_doc_dropdown]
         | 
| 564 | 
            -
                            )
         | 
| 565 | 
            -
             | 
| 566 | 
            -
                        # 📄 Upload Documents Tab
         | 
| 567 | 
             
                        with gr.Tab("📄 Upload Documents"):
         | 
| 568 | 
             
                            with gr.Row():
         | 
| 569 | 
             
                                with gr.Column():
         | 
| 570 | 
             
                                    gr.Markdown("### Add Documents to Your Library")
         | 
| 571 | 
            -
                                     | 
| 572 | 
            -
             | 
| 573 | 
            -
                                        file_types=[".pdf", ".txt", ".docx", ".png", ".jpg", ".jpeg"],
         | 
| 574 | 
            -
                                        type="filepath"
         | 
| 575 | 
            -
                                    )
         | 
| 576 | 
            -
                                    upload_btn = gr.Button("🚀 Process & Add to Library", variant="primary", size="lg")
         | 
| 577 | 
             
                                with gr.Column():
         | 
| 578 | 
            -
                                     | 
| 579 | 
            -
             | 
| 580 | 
            -
             | 
| 581 | 
            -
                                        placeholder="Upload a document to see processing results..."
         | 
| 582 | 
            -
                                    )
         | 
| 583 | 
            -
                                    doc_id_output = gr.Textbox(
         | 
| 584 | 
            -
                                        label="Document ID",
         | 
| 585 | 
            -
                                        placeholder="Document ID will appear here after processing..."
         | 
| 586 | 
            -
                                    )
         | 
| 587 | 
            -
             | 
| 588 | 
            -
                            upload_btn.click(
         | 
| 589 | 
            -
                                upload_and_process_file,
         | 
| 590 | 
            -
                                inputs=[file_input],
         | 
| 591 | 
            -
                                outputs=[upload_output, doc_id_output, document_list, delete_doc_dropdown_visible, doc_dropdown_sum, doc_dropdown_tag, delete_doc_dropdown]
         | 
| 592 | 
            -
                            )
         | 
| 593 | 
            -
             | 
| 594 | 
            -
                        # 🔍 Search Documents Tab
         | 
| 595 | 
             
                        with gr.Tab("🔍 Search Documents"):
         | 
| 596 | 
             
                            with gr.Row():
         | 
| 597 | 
             
                                with gr.Column(scale=1):
         | 
| 598 | 
             
                                    gr.Markdown("### Search Your Document Library")
         | 
| 599 | 
            -
                                     | 
| 600 | 
            -
             | 
| 601 | 
            -
             | 
| 602 | 
            -
                                        lines=2
         | 
| 603 | 
            -
                                    )
         | 
| 604 | 
            -
                                    search_top_k = gr.Slider(
         | 
| 605 | 
            -
                                        label="Number of Results",
         | 
| 606 | 
            -
                                        minimum=1,
         | 
| 607 | 
            -
                                        maximum=20,
         | 
| 608 | 
            -
                                        value=5,
         | 
| 609 | 
            -
                                        step=1
         | 
| 610 | 
            -
                                    )
         | 
| 611 | 
            -
                                    search_btn = gr.Button("🔍 Search Library", variant="primary", size="lg")
         | 
| 612 | 
             
                                with gr.Column(scale=2):
         | 
| 613 | 
            -
                                     | 
| 614 | 
            -
             | 
| 615 | 
            -
                                        lines=20,
         | 
| 616 | 
            -
                                        placeholder="Search results will appear here..."
         | 
| 617 | 
            -
                                    )
         | 
| 618 | 
            -
             | 
| 619 | 
            -
                            search_btn.click(
         | 
| 620 | 
            -
                                perform_search,
         | 
| 621 | 
            -
                                inputs=[search_query, search_top_k],
         | 
| 622 | 
            -
                                outputs=[search_output]
         | 
| 623 | 
            -
                            )
         | 
| 624 | 
            -
             | 
| 625 | 
            -
                        # 📝 Summarize Tab
         | 
| 626 | 
             
                        with gr.Tab("📝 Summarize"):
         | 
| 627 | 
             
                            with gr.Row():
         | 
| 628 | 
             
                                with gr.Column():
         | 
| 629 | 
             
                                    gr.Markdown("### Generate Document Summaries")
         | 
| 630 | 
            -
             | 
| 631 | 
            -
                                     | 
| 632 | 
            -
             | 
| 633 | 
            -
             | 
| 634 | 
            -
                                        value=None,
         | 
| 635 | 
            -
                                        interactive=True,
         | 
| 636 | 
            -
                                        allow_custom_value=False
         | 
| 637 | 
            -
                                    )
         | 
| 638 | 
            -
             | 
| 639 | 
            -
                                    summary_text = gr.Textbox(
         | 
| 640 | 
            -
                                        label="Or Paste Text to Summarize",
         | 
| 641 | 
            -
                                        placeholder="Paste any text here to summarize...",
         | 
| 642 | 
            -
                                        lines=8
         | 
| 643 | 
            -
                                    )
         | 
| 644 | 
            -
             | 
| 645 | 
            -
                                    summary_style = gr.Dropdown(
         | 
| 646 | 
            -
                                        label="Summary Style",
         | 
| 647 | 
            -
                                        choices=["concise", "detailed", "bullet_points", "executive"],
         | 
| 648 | 
            -
                                        value="concise",
         | 
| 649 | 
            -
                                        info="Choose how you want the summary formatted"
         | 
| 650 | 
            -
                                    )
         | 
| 651 | 
            -
                                    summarize_btn = gr.Button("📝 Generate Summary", variant="primary", size="lg")
         | 
| 652 | 
            -
             | 
| 653 | 
             
                                with gr.Column():
         | 
| 654 | 
            -
                                     | 
| 655 | 
            -
             | 
| 656 | 
            -
                                        lines=20,
         | 
| 657 | 
            -
                                        placeholder="Summary will appear here..."
         | 
| 658 | 
            -
                                    )
         | 
| 659 | 
            -
             | 
| 660 | 
            -
                            summarize_btn.click(
         | 
| 661 | 
            -
                                summarize_document,
         | 
| 662 | 
            -
                                inputs=[doc_dropdown_sum_visible, summary_text, summary_style],
         | 
| 663 | 
            -
                                outputs=[summary_output]
         | 
| 664 | 
            -
                            )
         | 
| 665 | 
            -
             | 
| 666 | 
            -
                        # 🏷️ Generate Tags Tab
         | 
| 667 | 
             
                        with gr.Tab("🏷️ Generate Tags"):
         | 
| 668 | 
             
                            with gr.Row():
         | 
| 669 | 
             
                                with gr.Column():
         | 
| 670 | 
            -
                                    gr.Markdown("###  | 
| 671 | 
            -
             | 
| 672 | 
            -
                                     | 
| 673 | 
            -
             | 
| 674 | 
            -
             | 
| 675 | 
            -
                                        value=None,
         | 
| 676 | 
            -
                                        interactive=True,
         | 
| 677 | 
            -
                                        allow_custom_value=False
         | 
| 678 | 
            -
                                    )
         | 
| 679 | 
            -
             | 
| 680 | 
            -
                                    tag_text = gr.Textbox(
         | 
| 681 | 
            -
                                        label="Or Paste Text to Generate Tags",
         | 
| 682 | 
            -
                                        placeholder="Paste any text here to generate tags...",
         | 
| 683 | 
            -
                                        lines=8
         | 
| 684 | 
            -
                                    )
         | 
| 685 | 
            -
             | 
| 686 | 
            -
                                    max_tags = gr.Slider(
         | 
| 687 | 
            -
                                        label="Number of Tags",
         | 
| 688 | 
            -
                                        minimum=3,
         | 
| 689 | 
            -
                                        maximum=15,
         | 
| 690 | 
            -
                                        value=5,
         | 
| 691 | 
            -
                                        step=1
         | 
| 692 | 
            -
                                    )
         | 
| 693 | 
            -
                                    tag_btn = gr.Button("🏷️ Generate Tags", variant="primary", size="lg")
         | 
| 694 | 
            -
             | 
| 695 | 
             
                                with gr.Column():
         | 
| 696 | 
            -
                                     | 
| 697 | 
            -
             | 
| 698 | 
            -
                                        lines=10,
         | 
| 699 | 
            -
                                        placeholder="Tags will appear here..."
         | 
| 700 | 
            -
                                    )
         | 
| 701 | 
            -
             | 
| 702 | 
            -
                            tag_btn.click(
         | 
| 703 | 
            -
                                generate_tags_for_document,
         | 
| 704 | 
            -
                                inputs=[doc_dropdown_tag_visible, tag_text, max_tags],
         | 
| 705 | 
            -
                                outputs=[tag_output]
         | 
| 706 | 
            -
                            )
         | 
| 707 | 
            -
             | 
| 708 | 
            -
                        # ❓ Ask Questions Tab
         | 
| 709 | 
             
                        with gr.Tab("❓ Ask Questions"):
         | 
| 710 | 
             
                            with gr.Row():
         | 
| 711 | 
             
                                with gr.Column():
         | 
| 712 | 
            -
                                    gr.Markdown("""
         | 
| 713 | 
            -
                                    ### Ask Questions About Your Documents
         | 
| 714 | 
            -
             | 
| 715 | 
             
                                    The AI will search through all your uploaded documents to find relevant information 
         | 
| 716 | 
            -
                                    and provide comprehensive answers with sources.
         | 
| 717 | 
            -
                                    """)
         | 
| 718 | 
            -
                                     | 
| 719 | 
            -
                                        label="Your Question",
         | 
| 720 | 
            -
                                        placeholder="Ask anything about your documents...",
         | 
| 721 | 
            -
                                        lines=3
         | 
| 722 | 
            -
                                    )
         | 
| 723 | 
            -
                                    qa_btn = gr.Button("❓ Get Answer", variant="primary", size="lg")
         | 
| 724 | 
            -
             | 
| 725 | 
             
                                with gr.Column():
         | 
| 726 | 
            -
                                     | 
| 727 | 
            -
             | 
| 728 | 
            -
             | 
| 729 | 
            -
                                        placeholder="Answer will appear here with sources..."
         | 
| 730 | 
            -
                                    )
         | 
| 731 | 
            -
             | 
| 732 | 
            -
                            qa_btn.click(
         | 
| 733 | 
            -
                                ask_question,
         | 
| 734 | 
            -
                                inputs=[qa_question],
         | 
| 735 | 
            -
                                outputs=[qa_output]
         | 
| 736 | 
            -
                            )
         | 
| 737 | 
            -
             | 
| 738 | 
            -
                    # Update hidden dropdowns when visible ones change
         | 
| 739 | 
            -
                    doc_dropdown_sum_visible.change(
         | 
| 740 | 
            -
                        lambda x: x,
         | 
| 741 | 
            -
                        inputs=[doc_dropdown_sum_visible],
         | 
| 742 | 
            -
                        outputs=[doc_dropdown_sum]
         | 
| 743 | 
            -
                    )
         | 
| 744 |  | 
| 745 | 
            -
                     | 
| 746 | 
            -
             | 
| 747 | 
            -
                        inputs=[doc_dropdown_tag_visible],
         | 
| 748 | 
            -
                        outputs=[doc_dropdown_tag]
         | 
| 749 | 
            -
                    )
         | 
| 750 |  | 
| 751 | 
            -
                     | 
| 752 | 
            -
             | 
| 753 | 
            -
                        inputs=[delete_doc_dropdown_visible],
         | 
| 754 | 
            -
                        outputs=[delete_doc_dropdown]
         | 
| 755 | 
            -
                    )
         | 
| 756 |  | 
| 757 | 
            -
                     | 
| 758 | 
            -
                     | 
| 759 | 
            -
             | 
| 760 | 
            -
             | 
| 761 | 
            -
                    )
         | 
|  | |
|  | |
| 762 |  | 
|  | |
| 763 | 
             
                    return interface           
         | 
| 764 |  | 
| 765 | 
            -
            # Create and launch the interface
         | 
| 766 | 
             
            if __name__ == "__main__":
         | 
| 767 | 
            -
                 | 
| 768 | 
            -
                
         | 
| 769 | 
            -
                # Launch with proper configuration for Hugging Face Spaces
         | 
| 770 | 
            -
                interface.launch(mcp_server=True)
         | 
|  | |
| 33 | 
             
                def __init__(self):
         | 
| 34 | 
             
                    # Initialize services
         | 
| 35 | 
             
                    logger.info("Initializing Content Organizer MCP Server...")
         | 
|  | |
| 36 | 
             
                    self.vector_store = VectorStoreService()
         | 
| 37 | 
             
                    self.document_store = DocumentStoreService()
         | 
| 38 | 
             
                    self.embedding_service = EmbeddingService()
         | 
|  | |
| 55 | 
             
                        llm_service=self.llm_service,
         | 
| 56 | 
             
                        search_tool=self.search_tool
         | 
| 57 | 
             
                    )
         | 
| 58 | 
            +
             | 
| 59 | 
             
                    # Track processing status
         | 
| 60 | 
             
                    self.processing_status = {}
         | 
| 61 |  | 
| 62 | 
             
                    # Document cache for quick access
         | 
| 63 | 
             
                    self.document_cache = {}
         | 
|  | |
| 64 | 
             
                    logger.info("Content Organizer MCP Server initialized successfully!")
         | 
| 65 |  | 
| 66 | 
             
                def run_async(self, coro):
         | 
|  | |
| 70 | 
             
                    except RuntimeError:
         | 
| 71 | 
             
                        loop = asyncio.new_event_loop()
         | 
| 72 | 
             
                        asyncio.set_event_loop(loop)
         | 
|  | |
| 73 | 
             
                    if loop.is_running():
         | 
| 74 | 
             
                        # If loop is already running, create a task
         | 
| 75 | 
             
                        import concurrent.futures
         | 
|  | |
| 84 | 
             
                    try:
         | 
| 85 | 
             
                        task_id = str(uuid.uuid4())
         | 
| 86 | 
             
                        self.processing_status[task_id] = {"status": "processing", "progress": 0}
         | 
|  | |
| 87 | 
             
                        result = await self.ingestion_tool.process_document(file_path, file_type, task_id)
         | 
|  | |
| 88 | 
             
                        if result.get("success"):
         | 
| 89 | 
             
                            self.processing_status[task_id] = {"status": "completed", "progress": 100}
         | 
|  | |
| 90 | 
             
                            doc_id = result.get("document_id")
         | 
| 91 | 
             
                            if doc_id:
         | 
| 92 | 
             
                                doc = await self.document_store.get_document(doc_id)
         | 
| 93 | 
             
                                if doc:
         | 
| 94 | 
             
                                    self.document_cache[doc_id] = doc
         | 
|  | |
| 95 | 
             
                            return result
         | 
| 96 | 
             
                        else:
         | 
| 97 | 
             
                            self.processing_status[task_id] = {"status": "failed", "error": result.get("error")}
         | 
| 98 | 
             
                            return result
         | 
|  | |
| 99 | 
             
                    except Exception as e:
         | 
| 100 | 
             
                        logger.error(f"Document ingestion failed: {str(e)}")
         | 
| 101 | 
            +
                        return {"success": False, "error": str(e), "message": "Failed to process document"}
         | 
| 102 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
| 103 | 
             
                async def get_document_content_async(self, document_id: str) -> Optional[str]:
         | 
| 104 | 
             
                    """Get document content by ID"""
         | 
| 105 | 
             
                    try:
         | 
|  | |
| 112 | 
             
                        if doc:
         | 
| 113 | 
             
                            self.document_cache[document_id] = doc
         | 
| 114 | 
             
                            return doc.content
         | 
|  | |
| 115 | 
             
                        return None
         | 
| 116 | 
             
                    except Exception as e:
         | 
| 117 | 
             
                        logger.error(f"Error getting document content: {str(e)}")
         | 
|  | |
| 121 | 
             
                    """MCP Tool: Perform semantic search"""
         | 
| 122 | 
             
                    try:
         | 
| 123 | 
             
                        results = await self.search_tool.search(query, top_k, filters)
         | 
| 124 | 
            +
                        return {"success": True, "query": query, "results": [result.to_dict() for result in results], "total_results": len(results)}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 125 | 
             
                    except Exception as e:
         | 
| 126 | 
             
                        logger.error(f"Semantic search failed: {str(e)}")
         | 
| 127 | 
            +
                        return {"success": False, "error": str(e), "query": query, "results": []}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 128 |  | 
| 129 | 
             
                async def summarize_content_async(self, content: str = None, document_id: str = None, style: str = "concise") -> Dict[str, Any]:
         | 
|  | |
| 130 | 
             
                    try:
         | 
|  | |
| 131 | 
             
                        if document_id and document_id != "none":
         | 
| 132 | 
             
                            content = await self.get_document_content_async(document_id)
         | 
| 133 | 
             
                            if not content:
         | 
| 134 | 
             
                                return {"success": False, "error": f"Document {document_id} not found"}
         | 
|  | |
| 135 | 
             
                        if not content or not content.strip():
         | 
| 136 | 
             
                            return {"success": False, "error": "No content provided for summarization"}
         | 
|  | |
|  | |
| 137 | 
             
                        max_content_length = 4000
         | 
| 138 | 
             
                        if len(content) > max_content_length:
         | 
| 139 | 
             
                            content = content[:max_content_length] + "..."
         | 
|  | |
| 140 | 
             
                        summary = await self.generative_tool.summarize(content, style)
         | 
| 141 | 
            +
                        return {"success": True, "summary": summary, "original_length": len(content), "summary_length": len(summary), "style": style, "document_id": document_id}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 142 | 
             
                    except Exception as e:
         | 
| 143 | 
             
                        logger.error(f"Summarization failed: {str(e)}")
         | 
| 144 | 
            +
                        return {"success": False, "error": str(e)}
         | 
|  | |
|  | |
|  | |
| 145 |  | 
| 146 | 
             
                async def generate_tags_async(self, content: str = None, document_id: str = None, max_tags: int = 5) -> Dict[str, Any]:
         | 
| 147 | 
             
                    """MCP Tool: Generate tags for content"""
         | 
| 148 | 
             
                    try:
         | 
|  | |
| 149 | 
             
                        if document_id and document_id != "none":
         | 
| 150 | 
             
                            content = await self.get_document_content_async(document_id)
         | 
| 151 | 
             
                            if not content:
         | 
| 152 | 
             
                                return {"success": False, "error": f"Document {document_id} not found"}
         | 
|  | |
| 153 | 
             
                        if not content or not content.strip():
         | 
| 154 | 
             
                            return {"success": False, "error": "No content provided for tag generation"}
         | 
|  | |
| 155 | 
             
                        tags = await self.generative_tool.generate_tags(content, max_tags)
         | 
|  | |
|  | |
| 156 | 
             
                        if document_id and document_id != "none" and tags:
         | 
| 157 | 
             
                            await self.document_store.update_document_metadata(document_id, {"tags": tags})
         | 
| 158 | 
            +
                        return {"success": True, "tags": tags, "content_length": len(content), "document_id": document_id}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 159 | 
             
                    except Exception as e:
         | 
| 160 | 
             
                        logger.error(f"Tag generation failed: {str(e)}")
         | 
| 161 | 
            +
                        return {"success": False, "error": str(e)}
         | 
|  | |
|  | |
|  | |
| 162 |  | 
| 163 | 
             
                async def answer_question_async(self, question: str, context_filter: Optional[Dict] = None) -> Dict[str, Any]:
         | 
|  | |
| 164 | 
             
                    try:
         | 
|  | |
| 165 | 
             
                        search_results = await self.search_tool.search(question, top_k=5, filters=context_filter)
         | 
|  | |
| 166 | 
             
                        if not search_results:
         | 
| 167 | 
            +
                            return {"success": False, "error": "No relevant context found in your documents. Please make sure you have uploaded relevant documents.", "question": question}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 168 | 
             
                        answer = await self.generative_tool.answer_question(question, search_results)
         | 
| 169 | 
            +
                        return {"success": True, "question": question, "answer": answer, "sources": [result.to_dict() for result in search_results], "confidence": "high" if len(search_results) >= 3 else "medium"}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 170 | 
             
                    except Exception as e:
         | 
| 171 | 
             
                        logger.error(f"Question answering failed: {str(e)}")
         | 
| 172 | 
            +
                        return {"success": False, "error": str(e), "question": question}
         | 
|  | |
|  | |
|  | |
|  | |
| 173 |  | 
| 174 | 
             
                def list_documents_sync(self, limit: int = 100, offset: int = 0) -> Dict[str, Any]:
         | 
|  | |
| 175 | 
             
                    try:
         | 
| 176 | 
             
                        documents = self.run_async(self.document_store.list_documents(limit, offset))
         | 
| 177 | 
            +
                        return {"success": True, "documents": [doc.to_dict() for doc in documents], "total": len(documents)}
         | 
|  | |
|  | |
|  | |
|  | |
| 178 | 
             
                    except Exception as e:
         | 
| 179 | 
            +
                        return {"success": False, "error": str(e)}
         | 
|  | |
|  | |
|  | |
| 180 |  | 
|  | |
| 181 | 
             
            mcp_server = ContentOrganizerMCPServer()
         | 
| 182 |  | 
|  | |
| 183 | 
             
            def get_document_list():
         | 
|  | |
| 184 | 
             
                try:
         | 
| 185 | 
             
                    result = mcp_server.list_documents_sync(limit=100)
         | 
| 186 | 
             
                    if result["success"]:
         | 
| 187 | 
             
                        if result["documents"]:
         | 
| 188 | 
            +
                            doc_list_str = "📚 Documents in Library:\n\n"
         | 
| 189 | 
            +
                            for i, doc_item in enumerate(result["documents"], 1):
         | 
| 190 | 
            +
                                doc_list_str += f"{i}. {doc_item['filename']} (ID: {doc_item['id'][:8]}...)\n"
         | 
| 191 | 
            +
                                doc_list_str += f"   Type: {doc_item['doc_type']}, Size: {doc_item['file_size']} bytes\n"
         | 
| 192 | 
            +
                                if doc_item.get('tags'):
         | 
| 193 | 
            +
                                    doc_list_str += f"   Tags: {', '.join(doc_item['tags'])}\n"
         | 
| 194 | 
            +
                                doc_list_str += f"   Created: {doc_item['created_at'][:10]}\n\n"
         | 
| 195 | 
            +
                            return doc_list_str
         | 
| 196 | 
             
                        else:
         | 
| 197 | 
             
                            return "No documents in library yet. Upload some documents to get started!"
         | 
| 198 | 
             
                    else:
         | 
|  | |
| 201 | 
             
                    return f"Error: {str(e)}"
         | 
| 202 |  | 
| 203 | 
             
            def get_document_choices():
         | 
|  | |
| 204 | 
             
                try:
         | 
| 205 | 
             
                    result = mcp_server.list_documents_sync(limit=100)
         | 
| 206 | 
             
                    if result["success"] and result["documents"]:
         | 
| 207 | 
            +
                        choices = [(f"{doc['filename']} ({doc['id'][:8]}...)", doc['id']) for doc in result["documents"]]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 208 | 
             
                        logger.info(f"Generated {len(choices)} document choices")
         | 
| 209 | 
             
                        return choices
         | 
| 210 | 
             
                    return []
         | 
|  | |
| 212 | 
             
                    logger.error(f"Error getting document choices: {str(e)}")
         | 
| 213 | 
             
                    return []
         | 
| 214 |  | 
| 215 | 
            +
            def refresh_library():
         | 
| 216 | 
            +
                doc_list_refreshed = get_document_list()
         | 
| 217 | 
            +
                doc_choices_refreshed = get_document_choices()
         | 
| 218 | 
            +
                logger.info(f"Refreshing library. Found {len(doc_choices_refreshed)} choices.")
         | 
| 219 | 
            +
                return (
         | 
| 220 | 
            +
                    doc_list_refreshed,
         | 
| 221 | 
            +
                    gr.update(choices=doc_choices_refreshed),
         | 
| 222 | 
            +
                    gr.update(choices=doc_choices_refreshed),
         | 
| 223 | 
            +
                    gr.update(choices=doc_choices_refreshed)
         | 
| 224 | 
            +
                )
         | 
| 225 | 
            +
             | 
| 226 | 
             
            def upload_and_process_file(file):
         | 
|  | |
| 227 | 
             
                if file is None:
         | 
| 228 | 
            +
                    doc_list_initial = get_document_list()
         | 
| 229 | 
            +
                    doc_choices_initial = get_document_choices()
         | 
| 230 | 
            +
                    return (
         | 
| 231 | 
            +
                        "No file uploaded", "", doc_list_initial,
         | 
| 232 | 
            +
                        gr.update(choices=doc_choices_initial),
         | 
| 233 | 
            +
                        gr.update(choices=doc_choices_initial),
         | 
| 234 | 
            +
                        gr.update(choices=doc_choices_initial)
         | 
| 235 | 
            +
                    )
         | 
| 236 | 
             
                try:
         | 
|  | |
| 237 | 
             
                    file_path = file.name if hasattr(file, 'name') else str(file)
         | 
| 238 | 
            +
                    file_type = Path(file_path).suffix.lower().strip('.') # Ensure suffix is clean
         | 
| 239 | 
            +
                    logger.info(f"Processing file: {file_path}, type: {file_type}")
         | 
|  | |
|  | |
|  | |
| 240 | 
             
                    result = mcp_server.run_async(mcp_server.ingest_document_async(file_path, file_type))
         | 
| 241 |  | 
| 242 | 
            +
                    doc_list_updated = get_document_list()
         | 
| 243 | 
            +
                    doc_choices_updated = get_document_choices()
         | 
| 244 | 
            +
             | 
| 245 | 
             
                    if result["success"]:
         | 
|  | |
|  | |
|  | |
|  | |
| 246 | 
             
                        return (
         | 
| 247 | 
            +
                            f"✅ Success: {result['message']}\nDocument ID: {result['document_id']}\nChunks created: {result['chunks_created']}",
         | 
| 248 | 
             
                            result["document_id"],
         | 
| 249 | 
            +
                            doc_list_updated,
         | 
| 250 | 
            +
                            gr.update(choices=doc_choices_updated),
         | 
| 251 | 
            +
                            gr.update(choices=doc_choices_updated),
         | 
| 252 | 
            +
                            gr.update(choices=doc_choices_updated)
         | 
|  | |
| 253 | 
             
                        )
         | 
| 254 | 
             
                    else:
         | 
| 255 | 
             
                        return (
         | 
| 256 | 
            +
                            f"❌ Error: {result.get('error', 'Unknown error')}", "",
         | 
| 257 | 
            +
                            doc_list_updated,
         | 
| 258 | 
            +
                            gr.update(choices=doc_choices_updated),
         | 
| 259 | 
            +
                            gr.update(choices=doc_choices_updated),
         | 
| 260 | 
            +
                            gr.update(choices=doc_choices_updated)
         | 
|  | |
|  | |
| 261 | 
             
                        )
         | 
| 262 | 
             
                except Exception as e:
         | 
| 263 | 
             
                    logger.error(f"Error processing file: {str(e)}")
         | 
| 264 | 
            +
                    doc_list_error = get_document_list()
         | 
| 265 | 
            +
                    doc_choices_error = get_document_choices()
         | 
| 266 | 
             
                    return (
         | 
| 267 | 
            +
                        f"❌ Error: {str(e)}", "",
         | 
| 268 | 
            +
                        doc_list_error,
         | 
| 269 | 
            +
                        gr.update(choices=doc_choices_error),
         | 
| 270 | 
            +
                        gr.update(choices=doc_choices_error),
         | 
| 271 | 
            +
                        gr.update(choices=doc_choices_error)
         | 
|  | |
|  | |
| 272 | 
             
                    )
         | 
| 273 |  | 
| 274 | 
             
            def perform_search(query, top_k):
         | 
|  | |
| 275 | 
             
                if not query.strip():
         | 
| 276 | 
             
                    return "Please enter a search query"
         | 
|  | |
| 277 | 
             
                try:
         | 
| 278 | 
             
                    result = mcp_server.run_async(mcp_server.semantic_search_async(query, int(top_k)))
         | 
|  | |
| 279 | 
             
                    if result["success"]:
         | 
| 280 | 
             
                        if result["results"]:
         | 
| 281 | 
            +
                            output_str = f"🔍 Found {result['total_results']} results for: '{query}'\n\n"
         | 
| 282 | 
            +
                            for i, res_item in enumerate(result["results"], 1):
         | 
| 283 | 
            +
                                output_str += f"Result {i}:\n"
         | 
| 284 | 
            +
                                output_str += f"📊 Relevance Score: {res_item['score']:.3f}\n"
         | 
| 285 | 
            +
                                output_str += f"📄 Content: {res_item['content'][:300]}...\n"
         | 
| 286 | 
            +
                                if 'document_filename' in res_item.get('metadata', {}):
         | 
| 287 | 
            +
                                    output_str += f"📁 Source: {res_item['metadata']['document_filename']}\n"
         | 
| 288 | 
            +
                                output_str += f"🔗 Document ID: {res_item.get('document_id', 'Unknown')}\n"
         | 
| 289 | 
            +
                                output_str += "-" * 80 + "\n\n"
         | 
| 290 | 
            +
                            return output_str
         | 
| 291 | 
             
                        else:
         | 
| 292 | 
             
                            return f"No results found for: '{query}'\n\nMake sure you have uploaded relevant documents first."
         | 
| 293 | 
             
                    else:
         | 
|  | |
| 297 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 298 |  | 
| 299 | 
             
            def summarize_document(doc_choice, custom_text, style):
         | 
|  | |
| 300 | 
             
                try:
         | 
|  | |
| 301 | 
             
                    logger.info(f"Summarize called with doc_choice: {doc_choice}, type: {type(doc_choice)}")
         | 
| 302 | 
            +
                    document_id = doc_choice if doc_choice and doc_choice != "none" and doc_choice != "" else None
         | 
| 303 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 304 | 
             
                    if custom_text and custom_text.strip():
         | 
| 305 | 
             
                        logger.info("Using custom text for summarization")
         | 
| 306 | 
             
                        result = mcp_server.run_async(mcp_server.summarize_content_async(content=custom_text, style=style))
         | 
|  | |
| 311 | 
             
                        return "Please select a document from the dropdown or enter text to summarize"
         | 
| 312 |  | 
| 313 | 
             
                    if result["success"]:
         | 
| 314 | 
            +
                        output_str = f"📝 Summary ({style} style):\n\n{result['summary']}\n\n"
         | 
| 315 | 
            +
                        output_str += f"📊 Statistics:\n"
         | 
| 316 | 
            +
                        output_str += f"- Original length: {result['original_length']} characters\n"
         | 
| 317 | 
            +
                        output_str += f"- Summary length: {result['summary_length']} characters\n"
         | 
| 318 | 
            +
                        output_str += f"- Compression ratio: {(1 - result['summary_length']/max(1,result['original_length']))*100:.1f}%\n" # Avoid division by zero
         | 
| 319 | 
             
                        if result.get('document_id'):
         | 
| 320 | 
            +
                            output_str += f"- Document ID: {result['document_id']}\n"
         | 
| 321 | 
            +
                        return output_str
         | 
| 322 | 
             
                    else:
         | 
| 323 | 
             
                        return f"❌ Summarization failed: {result['error']}"
         | 
| 324 | 
             
                except Exception as e:
         | 
|  | |
| 326 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 327 |  | 
| 328 | 
             
            def generate_tags_for_document(doc_choice, custom_text, max_tags):
         | 
|  | |
| 329 | 
             
                try:
         | 
|  | |
| 330 | 
             
                    logger.info(f"Generate tags called with doc_choice: {doc_choice}, type: {type(doc_choice)}")
         | 
| 331 | 
            +
                    document_id = doc_choice if doc_choice and doc_choice != "none" and doc_choice != "" else None
         | 
| 332 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 333 | 
             
                    if custom_text and custom_text.strip():
         | 
| 334 | 
             
                        logger.info("Using custom text for tag generation")
         | 
| 335 | 
             
                        result = mcp_server.run_async(mcp_server.generate_tags_async(content=custom_text, max_tags=int(max_tags)))
         | 
|  | |
| 341 |  | 
| 342 | 
             
                    if result["success"]:
         | 
| 343 | 
             
                        tags_str = ", ".join(result["tags"])
         | 
| 344 | 
            +
                        output_str = f"🏷️ Generated Tags:\n\n{tags_str}\n\n"
         | 
| 345 | 
            +
                        output_str += f"📊 Statistics:\n"
         | 
| 346 | 
            +
                        output_str += f"- Content length: {result['content_length']} characters\n"
         | 
| 347 | 
            +
                        output_str += f"- Number of tags: {len(result['tags'])}\n"
         | 
| 348 | 
             
                        if result.get('document_id'):
         | 
| 349 | 
            +
                            output_str += f"- Document ID: {result['document_id']}\n"
         | 
| 350 | 
            +
                            output_str += f"\n✅ Tags have been saved to the document."
         | 
| 351 | 
            +
                        return output_str
         | 
| 352 | 
             
                    else:
         | 
| 353 | 
             
                        return f"❌ Tag generation failed: {result['error']}"
         | 
| 354 | 
             
                except Exception as e:
         | 
|  | |
| 356 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 357 |  | 
| 358 | 
             
            def ask_question(question):
         | 
|  | |
| 359 | 
             
                if not question.strip():
         | 
| 360 | 
             
                    return "Please enter a question"
         | 
|  | |
| 361 | 
             
                try:
         | 
| 362 | 
             
                    result = mcp_server.run_async(mcp_server.answer_question_async(question))
         | 
|  | |
| 363 | 
             
                    if result["success"]:
         | 
| 364 | 
            +
                        output_str = f"❓ Question: {result['question']}\n\n"
         | 
| 365 | 
            +
                        output_str += f"💡 Answer:\n{result['answer']}\n\n"
         | 
| 366 | 
            +
                        output_str += f"🎯 Confidence: {result['confidence']}\n\n"
         | 
| 367 | 
            +
                        output_str += f"📚 Sources Used ({len(result['sources'])}):\n"
         | 
| 368 | 
            +
                        for i, source_item in enumerate(result['sources'], 1):
         | 
| 369 | 
            +
                            filename = source_item.get('metadata', {}).get('document_filename', 'Unknown')
         | 
| 370 | 
            +
                            output_str += f"\n{i}. 📄 {filename}\n"
         | 
| 371 | 
            +
                            output_str += f"   📝 Excerpt: {source_item['content'][:150]}...\n"
         | 
| 372 | 
            +
                            output_str += f"   📊 Relevance: {source_item['score']:.3f}\n"
         | 
| 373 | 
            +
                        return output_str
         | 
| 374 | 
             
                    else:
         | 
| 375 | 
             
                        return f"❌ {result.get('error', 'Failed to answer question')}"
         | 
| 376 | 
             
                except Exception as e:
         | 
| 377 | 
             
                    return f"❌ Error: {str(e)}"
         | 
| 378 |  | 
| 379 | 
             
            def delete_document_from_library(document_id):
         | 
| 380 | 
            +
                if not document_id:
         | 
| 381 | 
            +
                    doc_list_current = get_document_list()
         | 
| 382 | 
            +
                    doc_choices_current = get_document_choices()
         | 
| 383 | 
            +
                    return (
         | 
| 384 | 
            +
                        "No document selected to delete.",
         | 
| 385 | 
            +
                        doc_list_current,
         | 
| 386 | 
            +
                        gr.update(choices=doc_choices_current),
         | 
| 387 | 
            +
                        gr.update(choices=doc_choices_current),
         | 
| 388 | 
            +
                        gr.update(choices=doc_choices_current)
         | 
| 389 | 
            +
                    )
         | 
| 390 | 
             
                try:
         | 
| 391 | 
            +
                    delete_doc_store_result = mcp_server.run_async(mcp_server.document_store.delete_document(document_id))
         | 
| 392 | 
            +
                    delete_vec_store_result = mcp_server.run_async(mcp_server.vector_store.delete_document(document_id))
         | 
| 393 | 
            +
             | 
| 394 | 
            +
                    msg = ""
         | 
| 395 | 
            +
                    if delete_doc_store_result:
         | 
| 396 | 
            +
                        msg += f"🗑️ Document {document_id[:8]}... deleted from document store. "
         | 
| 397 | 
             
                    else:
         | 
| 398 | 
            +
                        msg += f"❌ Failed to delete document {document_id[:8]}... from document store. "
         | 
| 399 | 
            +
                    
         | 
| 400 | 
            +
                    if delete_vec_store_result:
         | 
| 401 | 
            +
                         msg += "Embeddings deleted from vector store."
         | 
| 402 | 
            +
                    else:
         | 
| 403 | 
            +
                         msg += "Failed to delete embeddings from vector store (or no embeddings existed)."
         | 
|  | |
| 404 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 405 |  | 
| 406 | 
            +
                    doc_list_updated = get_document_list()
         | 
| 407 | 
            +
                    doc_choices_updated = get_document_choices()
         | 
| 408 | 
            +
                    return (
         | 
| 409 | 
            +
                        msg,
         | 
| 410 | 
            +
                        doc_list_updated,
         | 
| 411 | 
            +
                        gr.update(choices=doc_choices_updated),
         | 
| 412 | 
            +
                        gr.update(choices=doc_choices_updated),
         | 
| 413 | 
            +
                        gr.update(choices=doc_choices_updated)
         | 
| 414 | 
            +
                    )
         | 
| 415 | 
            +
                except Exception as e:
         | 
| 416 | 
            +
                    logger.error(f"Error deleting document: {str(e)}")
         | 
| 417 | 
            +
                    doc_list_error = get_document_list()
         | 
| 418 | 
            +
                    doc_choices_error = get_document_choices()
         | 
| 419 | 
            +
                    return (
         | 
| 420 | 
            +
                        f"❌ Error deleting document: {str(e)}",
         | 
| 421 | 
            +
                        doc_list_error,
         | 
| 422 | 
            +
                        gr.update(choices=doc_choices_error),
         | 
| 423 | 
            +
                        gr.update(choices=doc_choices_error),
         | 
| 424 | 
            +
                        gr.update(choices=doc_choices_error)
         | 
| 425 | 
            +
                    )
         | 
| 426 | 
            +
             | 
| 427 | 
             
            def create_gradio_interface():
         | 
| 428 | 
             
                with gr.Blocks(title="🧠 Intelligent Content Organizer MCP Agent", theme=gr.themes.Soft()) as interface:
         | 
| 429 | 
             
                    gr.Markdown("""
         | 
| 430 | 
             
                    # 🧠 Intelligent Content Organizer MCP Agent
         | 
|  | |
| 431 | 
             
                    A powerful MCP (Model Context Protocol) server for intelligent content management with semantic search, 
         | 
| 432 | 
            +
                    summarization, and Q&A capabilities.
         | 
|  | |
| 433 | 
             
                    ## 🚀 Quick Start:
         | 
| 434 | 
            +
                    1. **Documents in Library** → View your uploaded documents in the "📚 Document Library" tab
         | 
| 435 | 
            +
                    2. **Upload Documents** → Go to "📄 Upload Documents" tab  
         | 
| 436 | 
            +
                    3. **Search Your Content** → Use "🔍 Search Documents" to find information  
         | 
| 437 | 
            +
                    4. **Get Summaries** → Select any document in "📝 Summarize" tab
         | 
| 438 | 
            +
                    5. **Generate Tags** → Auto-generate tags for your documents in "🏷️ Generate Tags" tab
         | 
| 439 | 
            +
                    6. **Ask Questions** → Get answers from your documents in "❓ Ask Questions" tab
         | 
| 440 | 
            +
                    7. **Delete Documents** → Remove documents from your library in "📚 Document Library" tab
         | 
| 441 | 
            +
                    8. **Refresh Library** → Click the 🔄 button to refresh the document list      
         | 
| 442 | 
             
                    """)
         | 
| 443 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 444 | 
             
                    with gr.Tabs():
         | 
|  | |
| 445 | 
             
                        with gr.Tab("📚 Document Library"):
         | 
| 446 | 
             
                            with gr.Row():
         | 
| 447 | 
             
                                with gr.Column():
         | 
| 448 | 
             
                                    gr.Markdown("### Your Document Collection")
         | 
| 449 | 
            +
                                    document_list_display = gr.Textbox(label="Documents in Library", value=get_document_list(), lines=20, interactive=False)
         | 
| 450 | 
            +
                                    refresh_btn_library = gr.Button("🔄 Refresh Library", variant="secondary")
         | 
| 451 | 
            +
                                    delete_doc_dropdown_visible = gr.Dropdown(label="Select Document to Delete", choices=get_document_choices(), value=None, interactive=True, allow_custom_value=False)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 452 | 
             
                                    delete_btn = gr.Button("🗑️ Delete Selected Document", variant="stop")
         | 
| 453 | 
            +
                                    delete_output_display = gr.Textbox(label="Delete Status", visible=True)
         | 
| 454 | 
            +
                        
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 455 | 
             
                        with gr.Tab("📄 Upload Documents"):
         | 
| 456 | 
             
                            with gr.Row():
         | 
| 457 | 
             
                                with gr.Column():
         | 
| 458 | 
             
                                    gr.Markdown("### Add Documents to Your Library")
         | 
| 459 | 
            +
                                    file_input_upload = gr.File(label="Select Document to Upload", file_types=[".pdf", ".txt", ".docx", ".png", ".jpg", ".jpeg"], type="filepath")
         | 
| 460 | 
            +
                                    upload_btn_process = gr.Button("🚀 Process & Add to Library", variant="primary", size="lg")
         | 
|  | |
|  | |
|  | |
|  | |
| 461 | 
             
                                with gr.Column():
         | 
| 462 | 
            +
                                    upload_output_display = gr.Textbox(label="Processing Result", lines=6, placeholder="Upload a document to see processing results...")
         | 
| 463 | 
            +
                                    doc_id_output_display = gr.Textbox(label="Document ID", placeholder="Document ID will appear here after processing...")
         | 
| 464 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 465 | 
             
                        with gr.Tab("🔍 Search Documents"):
         | 
| 466 | 
             
                            with gr.Row():
         | 
| 467 | 
             
                                with gr.Column(scale=1):
         | 
| 468 | 
             
                                    gr.Markdown("### Search Your Document Library")
         | 
| 469 | 
            +
                                    search_query_input = gr.Textbox(label="What are you looking for?", placeholder="Enter your search query...", lines=2)
         | 
| 470 | 
            +
                                    search_top_k_slider = gr.Slider(label="Number of Results", minimum=1, maximum=20, value=5, step=1)
         | 
| 471 | 
            +
                                    search_btn_action = gr.Button("🔍 Search Library", variant="primary", size="lg")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 472 | 
             
                                with gr.Column(scale=2):
         | 
| 473 | 
            +
                                    search_output_display = gr.Textbox(label="Search Results", lines=20, placeholder="Search results will appear here...")
         | 
| 474 | 
            +
                        
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 475 | 
             
                        with gr.Tab("📝 Summarize"):
         | 
| 476 | 
             
                            with gr.Row():
         | 
| 477 | 
             
                                with gr.Column():
         | 
| 478 | 
             
                                    gr.Markdown("### Generate Document Summaries")
         | 
| 479 | 
            +
                                    doc_dropdown_sum_visible = gr.Dropdown(label="Select Document to Summarize", choices=get_document_choices(), value=None, interactive=True, allow_custom_value=False)
         | 
| 480 | 
            +
                                    summary_text_input = gr.Textbox(label="Or Paste Text to Summarize", placeholder="Paste any text here to summarize...", lines=8)
         | 
| 481 | 
            +
                                    summary_style_dropdown = gr.Dropdown(label="Summary Style", choices=["concise", "detailed", "bullet_points", "executive"], value="concise", info="Choose how you want the summary formatted")
         | 
| 482 | 
            +
                                    summarize_btn_action = gr.Button("📝 Generate Summary", variant="primary", size="lg")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 483 | 
             
                                with gr.Column():
         | 
| 484 | 
            +
                                    summary_output_display = gr.Textbox(label="Generated Summary", lines=20, placeholder="Summary will appear here...")
         | 
| 485 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 486 | 
             
                        with gr.Tab("🏷️ Generate Tags"):
         | 
| 487 | 
             
                            with gr.Row():
         | 
| 488 | 
             
                                with gr.Column():
         | 
| 489 | 
            +
                                    gr.Markdown("### Generate Document Tags")
         | 
| 490 | 
            +
                                    doc_dropdown_tag_visible = gr.Dropdown(label="Select Document to Tag", choices=get_document_choices(), value=None, interactive=True, allow_custom_value=False)
         | 
| 491 | 
            +
                                    tag_text_input = gr.Textbox(label="Or Paste Text to Generate Tags", placeholder="Paste any text here to generate tags...", lines=8)
         | 
| 492 | 
            +
                                    max_tags_slider = gr.Slider(label="Number of Tags", minimum=3, maximum=15, value=5, step=1)
         | 
| 493 | 
            +
                                    tag_btn_action = gr.Button("🏷��� Generate Tags", variant="primary", size="lg")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 494 | 
             
                                with gr.Column():
         | 
| 495 | 
            +
                                    tag_output_display = gr.Textbox(label="Generated Tags", lines=10, placeholder="Tags will appear here...")
         | 
| 496 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 497 | 
             
                        with gr.Tab("❓ Ask Questions"):
         | 
| 498 | 
             
                            with gr.Row():
         | 
| 499 | 
             
                                with gr.Column():
         | 
| 500 | 
            +
                                    gr.Markdown("""### Ask Questions About Your Documents
         | 
|  | |
|  | |
| 501 | 
             
                                    The AI will search through all your uploaded documents to find relevant information 
         | 
| 502 | 
            +
                                    and provide comprehensive answers with sources.""")
         | 
| 503 | 
            +
                                    qa_question_input = gr.Textbox(label="Your Question", placeholder="Ask anything about your documents...", lines=3)
         | 
| 504 | 
            +
                                    qa_btn_action = gr.Button("❓ Get Answer", variant="primary", size="lg")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 505 | 
             
                                with gr.Column():
         | 
| 506 | 
            +
                                    qa_output_display = gr.Textbox(label="AI Answer", lines=20, placeholder="Answer will appear here with sources...")
         | 
| 507 | 
            +
             | 
| 508 | 
            +
                    all_dropdowns_to_update = [delete_doc_dropdown_visible, doc_dropdown_sum_visible, doc_dropdown_tag_visible]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 509 |  | 
| 510 | 
            +
                    refresh_outputs = [document_list_display] + [dd for dd in all_dropdowns_to_update]
         | 
| 511 | 
            +
                    refresh_btn_library.click(fn=refresh_library, outputs=refresh_outputs)
         | 
|  | |
|  | |
|  | |
| 512 |  | 
| 513 | 
            +
                    upload_outputs = [upload_output_display, doc_id_output_display, document_list_display] + [dd for dd in all_dropdowns_to_update]
         | 
| 514 | 
            +
                    upload_btn_process.click(upload_and_process_file, inputs=[file_input_upload], outputs=upload_outputs)
         | 
|  | |
|  | |
|  | |
| 515 |  | 
| 516 | 
            +
                    delete_outputs = [delete_output_display, document_list_display] + [dd for dd in all_dropdowns_to_update]
         | 
| 517 | 
            +
                    delete_btn.click(delete_document_from_library, inputs=[delete_doc_dropdown_visible], outputs=delete_outputs)
         | 
| 518 | 
            +
                    
         | 
| 519 | 
            +
                    search_btn_action.click(perform_search, inputs=[search_query_input, search_top_k_slider], outputs=[search_output_display])
         | 
| 520 | 
            +
                    summarize_btn_action.click(summarize_document, inputs=[doc_dropdown_sum_visible, summary_text_input, summary_style_dropdown], outputs=[summary_output_display])
         | 
| 521 | 
            +
                    tag_btn_action.click(generate_tags_for_document, inputs=[doc_dropdown_tag_visible, tag_text_input, max_tags_slider], outputs=[tag_output_display])
         | 
| 522 | 
            +
                    qa_btn_action.click(ask_question, inputs=[qa_question_input], outputs=[qa_output_display])
         | 
| 523 |  | 
| 524 | 
            +
                    interface.load(fn=refresh_library, outputs=refresh_outputs)
         | 
| 525 | 
             
                    return interface           
         | 
| 526 |  | 
|  | |
| 527 | 
             
            if __name__ == "__main__":
         | 
| 528 | 
            +
                gradio_interface = create_gradio_interface()
         | 
| 529 | 
            +
                gradio_interface.launch(mcp_server=True)
         | 
|  | |
|  | 
    	
        config.py
    CHANGED
    
    | @@ -1,5 +1,8 @@ | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            from typing import Optional
         | 
|  | |
|  | |
|  | |
| 3 |  | 
| 4 |  | 
| 5 | 
             
            class Config:
         | 
| @@ -7,11 +10,13 @@ class Config: | |
| 7 | 
             
                ANTHROPIC_API_KEY: Optional[str] = os.getenv("ANTHROPIC_API_KEY")
         | 
| 8 | 
             
                MISTRAL_API_KEY: Optional[str] = os.getenv("MISTRAL_API_KEY")
         | 
| 9 | 
             
                HUGGINGFACE_API_KEY: Optional[str] = os.getenv("HUGGINGFACE_API_KEY", os.getenv("HF_TOKEN"))
         | 
|  | |
| 10 |  | 
| 11 | 
             
                # Model Configuration
         | 
| 12 | 
             
                EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
         | 
| 13 | 
             
                ANTHROPIC_MODEL: str = os.getenv("ANTHROPIC_MODEL", "claude-3-haiku-20240307")  # Using faster model
         | 
| 14 | 
             
                MISTRAL_MODEL: str = os.getenv("MISTRAL_MODEL", "mistral-small-latest")  # Using smaller model
         | 
|  | |
| 15 |  | 
| 16 | 
             
                # Vector Store Configuration
         | 
| 17 | 
             
                VECTOR_STORE_PATH: str = os.getenv("VECTOR_STORE_PATH", "./data/vector_store")
         | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            from typing import Optional
         | 
| 3 | 
            +
            from dotenv import load_dotenv 
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            load_dotenv()
         | 
| 6 |  | 
| 7 |  | 
| 8 | 
             
            class Config:
         | 
|  | |
| 10 | 
             
                ANTHROPIC_API_KEY: Optional[str] = os.getenv("ANTHROPIC_API_KEY")
         | 
| 11 | 
             
                MISTRAL_API_KEY: Optional[str] = os.getenv("MISTRAL_API_KEY")
         | 
| 12 | 
             
                HUGGINGFACE_API_KEY: Optional[str] = os.getenv("HUGGINGFACE_API_KEY", os.getenv("HF_TOKEN"))
         | 
| 13 | 
            +
                OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
         | 
| 14 |  | 
| 15 | 
             
                # Model Configuration
         | 
| 16 | 
             
                EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
         | 
| 17 | 
             
                ANTHROPIC_MODEL: str = os.getenv("ANTHROPIC_MODEL", "claude-3-haiku-20240307")  # Using faster model
         | 
| 18 | 
             
                MISTRAL_MODEL: str = os.getenv("MISTRAL_MODEL", "mistral-small-latest")  # Using smaller model
         | 
| 19 | 
            +
                OPENAI_MODEL: str = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
         | 
| 20 |  | 
| 21 | 
             
                # Vector Store Configuration
         | 
| 22 | 
             
                VECTOR_STORE_PATH: str = os.getenv("VECTOR_STORE_PATH", "./data/vector_store")
         | 
    	
        core/chunker.py
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
|  | |
| 1 | 
             
            import logging
         | 
| 2 | 
             
            from typing import List, Dict, Any, Optional
         | 
| 3 | 
             
            import re
         | 
|  | |
| 1 | 
            +
            # chunker.py 
         | 
| 2 | 
             
            import logging
         | 
| 3 | 
             
            from typing import List, Dict, Any, Optional
         | 
| 4 | 
             
            import re
         | 
    	
        mcp_server.py
    CHANGED
    
    | @@ -41,7 +41,7 @@ generative_tool_instance = GenerativeTool( | |
| 41 | 
             
                search_tool=search_tool_instance
         | 
| 42 | 
             
            )
         | 
| 43 |  | 
| 44 | 
            -
            mcp = FastMCP(" | 
| 45 | 
             
            logger.info("FastMCP server initialized.")
         | 
| 46 |  | 
| 47 | 
             
            @mcp.tool()
         | 
|  | |
| 41 | 
             
                search_tool=search_tool_instance
         | 
| 42 | 
             
            )
         | 
| 43 |  | 
| 44 | 
            +
            mcp = FastMCP("content")
         | 
| 45 | 
             
            logger.info("FastMCP server initialized.")
         | 
| 46 |  | 
| 47 | 
             
            @mcp.tool()
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 | 
             
            gradio
         | 
| 2 | 
             
            anthropic>=0.7.0
         | 
| 3 | 
            -
            mistralai | 
| 4 | 
             
            sentence-transformers>=2.2.2
         | 
| 5 | 
             
            transformers>=4.30.0
         | 
| 6 | 
             
            torch>=2.0.0
         | 
| @@ -20,4 +20,6 @@ asyncio-mqtt>=0.11.1 | |
| 20 | 
             
            nest-asyncio>=1.5.6
         | 
| 21 | 
             
            httpx
         | 
| 22 | 
             
            fastmcp
         | 
| 23 | 
            -
            mcp
         | 
|  | |
|  | 
|  | |
| 1 | 
             
            gradio
         | 
| 2 | 
             
            anthropic>=0.7.0
         | 
| 3 | 
            +
            mistralai
         | 
| 4 | 
             
            sentence-transformers>=2.2.2
         | 
| 5 | 
             
            transformers>=4.30.0
         | 
| 6 | 
             
            torch>=2.0.0
         | 
|  | |
| 20 | 
             
            nest-asyncio>=1.5.6
         | 
| 21 | 
             
            httpx
         | 
| 22 | 
             
            fastmcp
         | 
| 23 | 
            +
            mcp
         | 
| 24 | 
            +
            openai
         | 
| 25 | 
            +
            python-dotenv
         | 
    	
        services/llm_service.py
    CHANGED
    
    | @@ -1,8 +1,10 @@ | |
|  | |
| 1 | 
             
            import logging
         | 
| 2 | 
             
            import asyncio
         | 
| 3 | 
             
            from typing import List, Dict, Any, Optional
         | 
|  | |
| 4 | 
             
            import anthropic
         | 
| 5 | 
            -
             | 
| 6 | 
             
            import config
         | 
| 7 |  | 
| 8 | 
             
            logger = logging.getLogger(__name__)
         | 
| @@ -11,9 +13,9 @@ class LLMService: | |
| 11 | 
             
                def __init__(self):
         | 
| 12 | 
             
                    self.config = config.config
         | 
| 13 |  | 
| 14 | 
            -
                    # Initialize clients
         | 
| 15 | 
             
                    self.anthropic_client = None
         | 
| 16 | 
            -
                    self.mistral_client = None
         | 
|  | |
| 17 |  | 
| 18 | 
             
                    self._initialize_clients()
         | 
| 19 |  | 
| @@ -27,51 +29,110 @@ class LLMService: | |
| 27 | 
             
                            logger.info("Anthropic client initialized")
         | 
| 28 |  | 
| 29 | 
             
                        if self.config.MISTRAL_API_KEY:
         | 
| 30 | 
            -
                            self.mistral_client = Mistral(
         | 
| 31 | 
             
                                api_key=self.config.MISTRAL_API_KEY
         | 
| 32 | 
             
                            )
         | 
| 33 | 
             
                            logger.info("Mistral client initialized")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 34 |  | 
| 35 | 
            -
                        if  | 
| 36 | 
            -
             | 
|  | |
|  | |
|  | |
| 37 |  | 
| 38 | 
             
                    except Exception as e:
         | 
| 39 | 
             
                        logger.error(f"Error initializing LLM clients: {str(e)}")
         | 
| 40 | 
             
                        raise
         | 
| 41 |  | 
| 42 | 
             
                async def generate_text(self, prompt: str, model: str = "auto", max_tokens: int = 1000, temperature: float = 0.7) -> str:
         | 
| 43 | 
            -
                    """Generate text using the specified model"""
         | 
| 44 | 
             
                    try:
         | 
|  | |
|  | |
| 45 | 
             
                        if model == "auto":
         | 
| 46 | 
            -
                            #  | 
| 47 | 
            -
                            if self. | 
| 48 | 
            -
                                 | 
| 49 | 
            -
             | 
| 50 | 
            -
                                return await self. | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 51 | 
             
                            else:
         | 
| 52 | 
            -
                                 | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
                             | 
|  | |
|  | |
|  | |
|  | |
| 57 | 
             
                        elif model.startswith("mistral"):
         | 
| 58 | 
             
                            if not self.mistral_client:
         | 
| 59 | 
            -
                                raise ValueError("Mistral client not available")
         | 
| 60 | 
            -
                            return await self._generate_with_mistral(prompt, max_tokens, temperature)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 61 | 
             
                        else:
         | 
| 62 | 
            -
                            raise ValueError(f"Unsupported model: {model}")
         | 
|  | |
| 63 | 
             
                    except Exception as e:
         | 
| 64 | 
            -
                        logger.error(f"Error generating text: {str(e)}")
         | 
| 65 | 
             
                        raise
         | 
| 66 | 
            -
             | 
| 67 | 
            -
                async def  | 
| 68 | 
            -
                    """Generate text using  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 69 | 
             
                    try:
         | 
|  | |
| 70 | 
             
                        loop = asyncio.get_event_loop()
         | 
| 71 | 
             
                        response = await loop.run_in_executor(
         | 
| 72 | 
             
                            None,
         | 
| 73 | 
             
                            lambda: self.anthropic_client.messages.create(
         | 
| 74 | 
            -
                                model= | 
| 75 | 
             
                                max_tokens=max_tokens,
         | 
| 76 | 
             
                                temperature=temperature,
         | 
| 77 | 
             
                                messages=[
         | 
| @@ -79,66 +140,78 @@ class LLMService: | |
| 79 | 
             
                                ]
         | 
| 80 | 
             
                            )
         | 
| 81 | 
             
                        )
         | 
| 82 | 
            -
                        
         | 
| 83 | 
            -
             | 
|  | |
|  | |
|  | |
| 84 | 
             
                    except Exception as e:
         | 
| 85 | 
            -
                        logger.error(f"Error with Claude generation: {str(e)}")
         | 
| 86 | 
             
                        raise
         | 
| 87 |  | 
| 88 | 
            -
                async def _generate_with_mistral(self, prompt: str, max_tokens: int, temperature: float) -> str:
         | 
| 89 | 
            -
                    """Generate text using Mistral"""
         | 
|  | |
|  | |
| 90 | 
             
                    try:
         | 
|  | |
| 91 | 
             
                        loop = asyncio.get_event_loop()
         | 
|  | |
| 92 | 
             
                        response = await loop.run_in_executor(
         | 
| 93 | 
             
                            None,
         | 
| 94 | 
             
                            lambda: self.mistral_client.chat(
         | 
| 95 | 
            -
                                model= | 
| 96 | 
             
                                messages=[{"role": "user", "content": prompt}],
         | 
| 97 | 
            -
                                max_tokens=max_tokens,
         | 
| 98 | 
             
                                temperature=temperature
         | 
| 99 | 
             
                            )
         | 
| 100 | 
             
                        )
         | 
| 101 | 
            -
                        
         | 
| 102 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 103 | 
             
                    except Exception as e:
         | 
| 104 | 
            -
                        logger.error(f"Error with Mistral generation: {str(e)}")
         | 
| 105 | 
             
                        raise
         | 
| 106 |  | 
|  | |
| 107 | 
             
                async def summarize(self, text: str, style: str = "concise", max_length: Optional[int] = None) -> str:
         | 
| 108 | 
            -
                    """Generate a summary of the given text"""
         | 
| 109 | 
             
                    if not text.strip():
         | 
| 110 | 
             
                        return ""
         | 
| 111 |  | 
| 112 | 
            -
                    # Create style-specific prompts
         | 
| 113 | 
             
                    style_prompts = {
         | 
| 114 | 
             
                        "concise": "Provide a concise summary of the following text, focusing on the main points:",
         | 
| 115 | 
             
                        "detailed": "Provide a detailed summary of the following text, including key details and supporting information:",
         | 
| 116 | 
             
                        "bullet_points": "Summarize the following text as a list of bullet points highlighting the main ideas:",
         | 
| 117 | 
             
                        "executive": "Provide an executive summary of the following text, focusing on key findings and actionable insights:"
         | 
| 118 | 
             
                    }
         | 
| 119 | 
            -
                    
         | 
| 120 | 
             
                    prompt_template = style_prompts.get(style, style_prompts["concise"])
         | 
| 121 | 
            -
                    
         | 
| 122 | 
             
                    if max_length:
         | 
| 123 | 
            -
                        prompt_template += f" Keep the summary under {max_length} words."
         | 
| 124 |  | 
| 125 | 
             
                    prompt = f"{prompt_template}\n\nText to summarize:\n{text}\n\nSummary:"
         | 
| 126 |  | 
| 127 | 
             
                    try:
         | 
| 128 | 
            -
                         | 
|  | |
| 129 | 
             
                        return summary.strip()
         | 
| 130 | 
             
                    except Exception as e:
         | 
| 131 | 
             
                        logger.error(f"Error generating summary: {str(e)}")
         | 
| 132 | 
             
                        return "Error generating summary"
         | 
| 133 |  | 
| 134 | 
             
                async def generate_tags(self, text: str, max_tags: int = 5) -> List[str]:
         | 
| 135 | 
            -
                    """Generate relevant tags for the given text"""
         | 
| 136 | 
             
                    if not text.strip():
         | 
| 137 | 
             
                        return []
         | 
| 138 |  | 
| 139 | 
            -
                    prompt = f"""Generate {max_tags} relevant tags for the following text. | 
| 140 | 
            -
                    Tags should be concise, descriptive keywords or phrases that capture the main topics | 
| 141 | 
            -
                    Return only the tags, separated by commas.
         | 
| 142 |  | 
| 143 | 
             
                    Text:
         | 
| 144 | 
             
                    {text}
         | 
| @@ -146,28 +219,22 @@ class LLMService: | |
| 146 | 
             
                    Tags:"""
         | 
| 147 |  | 
| 148 | 
             
                    try:
         | 
| 149 | 
            -
                        response = await self.generate_text(prompt, max_tokens=100, temperature=0.5)
         | 
| 150 | 
            -
                        
         | 
| 151 | 
            -
                         | 
| 152 | 
            -
                         | 
| 153 | 
            -
                        tags = [tag for tag in tags if tag and len(tag) > 1]
         | 
| 154 | 
            -
                        
         | 
| 155 | 
            -
                        return tags[:max_tags]
         | 
| 156 | 
             
                    except Exception as e:
         | 
| 157 | 
             
                        logger.error(f"Error generating tags: {str(e)}")
         | 
| 158 | 
             
                        return []
         | 
| 159 |  | 
| 160 | 
             
                async def categorize(self, text: str, categories: List[str]) -> str:
         | 
| 161 | 
            -
                    """Categorize text into one of the provided categories"""
         | 
| 162 | 
             
                    if not text.strip() or not categories:
         | 
| 163 | 
             
                        return "Uncategorized"
         | 
| 164 |  | 
| 165 | 
            -
                    categories_str = ", ".join(categories)
         | 
| 166 | 
            -
                    
         | 
| 167 | 
            -
                     | 
| 168 | 
            -
             | 
| 169 | 
            -
                    Choose the most appropriate category based on the content and main theme of the text.
         | 
| 170 | 
            -
                    Return only the category name, nothing else.
         | 
| 171 |  | 
| 172 | 
             
                    Text to classify:
         | 
| 173 | 
             
                    {text}
         | 
| @@ -175,111 +242,146 @@ class LLMService: | |
| 175 | 
             
                    Category:"""
         | 
| 176 |  | 
| 177 | 
             
                    try:
         | 
| 178 | 
            -
                        response = await self.generate_text(prompt, max_tokens=50, temperature=0.1)
         | 
| 179 | 
            -
                         | 
| 180 |  | 
| 181 | 
            -
                         | 
| 182 | 
            -
             | 
| 183 | 
            -
             | 
| 184 | 
            -
                         | 
| 185 | 
            -
             | 
| 186 | 
            -
             | 
| 187 | 
            -
                            for cat in categories:
         | 
| 188 | 
            -
                                if cat.lower() in category_lower or category_lower in cat.lower():
         | 
| 189 | 
            -
                                    return cat
         | 
| 190 | 
            -
                            
         | 
| 191 | 
            -
                            return categories[0] if categories else "Uncategorized"
         | 
| 192 | 
             
                    except Exception as e:
         | 
| 193 | 
             
                        logger.error(f"Error categorizing text: {str(e)}")
         | 
| 194 | 
             
                        return "Uncategorized"
         | 
| 195 |  | 
| 196 | 
            -
                async def answer_question(self, question: str, context: str, max_context_length: int =  | 
| 197 | 
            -
                    """Answer a question based on the provided context"""
         | 
| 198 | 
             
                    if not question.strip():
         | 
| 199 | 
            -
                        return "No question provided"
         | 
| 200 | 
            -
                    
         | 
| 201 | 
             
                    if not context.strip():
         | 
| 202 | 
            -
                        return "I don't have enough context to answer this question. Please provide  | 
| 203 |  | 
| 204 | 
            -
                    # Truncate context if too long
         | 
| 205 | 
             
                    if len(context) > max_context_length:
         | 
| 206 | 
             
                        context = context[:max_context_length] + "..."
         | 
|  | |
| 207 |  | 
| 208 | 
            -
                    prompt = f""" | 
|  | |
|  | |
| 209 |  | 
| 210 | 
            -
             | 
| 211 | 
            -
             | 
|  | |
|  | |
| 212 |  | 
| 213 | 
            -
             | 
| 214 |  | 
| 215 | 
            -
             | 
| 216 |  | 
| 217 | 
             
                    try:
         | 
| 218 | 
            -
                        answer = await self.generate_text(prompt, max_tokens=300, temperature=0. | 
| 219 | 
             
                        return answer.strip()
         | 
| 220 | 
             
                    except Exception as e:
         | 
| 221 | 
             
                        logger.error(f"Error answering question: {str(e)}")
         | 
| 222 | 
             
                        return "I encountered an error while trying to answer your question."
         | 
| 223 |  | 
| 224 | 
             
                async def extract_key_information(self, text: str) -> Dict[str, Any]:
         | 
| 225 | 
            -
                    """Extract key information from text"""
         | 
| 226 | 
             
                    if not text.strip():
         | 
| 227 | 
             
                        return {}
         | 
| 228 |  | 
| 229 | 
            -
                    prompt = f"""Analyze the following text and extract key information. | 
| 230 | 
            -
                    
         | 
| 231 | 
            -
                     | 
| 232 | 
            -
                     | 
| 233 | 
            -
                     | 
| 234 | 
            -
                     | 
| 235 | 
            -
                     | 
|  | |
|  | |
| 236 |  | 
| 237 | 
             
                    Text to analyze:
         | 
|  | |
| 238 | 
             
                    {text}
         | 
|  | |
| 239 |  | 
| 240 | 
            -
                    Analysis:"""
         | 
| 241 |  | 
| 242 | 
             
                    try:
         | 
| 243 | 
            -
                         | 
| 244 | 
            -
                        
         | 
| 245 | 
            -
                        # Parse the structured response
         | 
| 246 | 
            -
                        info = {}
         | 
| 247 | 
            -
                        lines = response.split('\n')
         | 
| 248 | 
            -
                        
         | 
| 249 | 
            -
                        for line in lines:
         | 
| 250 | 
            -
                            if ':' in line:
         | 
| 251 | 
            -
                                key, value = line.split(':', 1)
         | 
| 252 | 
            -
                                key = key.strip().lower().replace(' ', '_')
         | 
| 253 | 
            -
                                value = value.strip()
         | 
| 254 | 
            -
                                if value:
         | 
| 255 | 
            -
                                    info[key] = value
         | 
| 256 |  | 
| 257 | 
            -
                         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 258 | 
             
                    except Exception as e:
         | 
| 259 | 
             
                        logger.error(f"Error extracting key information: {str(e)}")
         | 
| 260 | 
            -
                        return {}
         | 
| 261 | 
            -
             | 
| 262 | 
             
                async def check_availability(self) -> Dict[str, bool]:
         | 
| 263 | 
            -
                    """Check which LLM services are available"""
         | 
| 264 | 
             
                    availability = {
         | 
| 265 | 
            -
                        " | 
| 266 | 
            -
                        "mistral": False
         | 
|  | |
| 267 | 
             
                    }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 268 |  | 
| 269 | 
            -
                     | 
| 270 | 
            -
                         | 
| 271 | 
            -
                             | 
| 272 | 
            -
                            test_response = await self. | 
| 273 | 
            -
                            availability[" | 
| 274 | 
            -
             | 
| 275 | 
            -
             | 
| 276 | 
            -
                    
         | 
| 277 | 
            -
             | 
| 278 | 
            -
             | 
| 279 | 
            -
             | 
| 280 | 
            -
                             | 
| 281 | 
            -
                             | 
| 282 | 
            -
             | 
| 283 | 
            -
                         | 
|  | |
|  | |
| 284 |  | 
|  | |
| 285 | 
             
                    return availability
         | 
|  | |
| 1 | 
            +
            from mistralai import Mistral
         | 
| 2 | 
             
            import logging
         | 
| 3 | 
             
            import asyncio
         | 
| 4 | 
             
            from typing import List, Dict, Any, Optional
         | 
| 5 | 
            +
             | 
| 6 | 
             
            import anthropic
         | 
| 7 | 
            +
            import openai
         | 
| 8 | 
             
            import config
         | 
| 9 |  | 
| 10 | 
             
            logger = logging.getLogger(__name__)
         | 
|  | |
| 13 | 
             
                def __init__(self):
         | 
| 14 | 
             
                    self.config = config.config
         | 
| 15 |  | 
|  | |
| 16 | 
             
                    self.anthropic_client = None
         | 
| 17 | 
            +
                    self.mistral_client = None # Synchronous Mistral client
         | 
| 18 | 
            +
                    self.openai_async_client = None # Asynchronous OpenAI client
         | 
| 19 |  | 
| 20 | 
             
                    self._initialize_clients()
         | 
| 21 |  | 
|  | |
| 29 | 
             
                            logger.info("Anthropic client initialized")
         | 
| 30 |  | 
| 31 | 
             
                        if self.config.MISTRAL_API_KEY:
         | 
| 32 | 
            +
                            self.mistral_client = Mistral( # Standard sync client
         | 
| 33 | 
             
                                api_key=self.config.MISTRAL_API_KEY
         | 
| 34 | 
             
                            )
         | 
| 35 | 
             
                            logger.info("Mistral client initialized")
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                        if self.config.OPENAI_API_KEY:
         | 
| 38 | 
            +
                            self.openai_async_client = openai.AsyncOpenAI(
         | 
| 39 | 
            +
                                api_key=self.config.OPENAI_API_KEY
         | 
| 40 | 
            +
                            )
         | 
| 41 | 
            +
                            logger.info("OpenAI client initialized")
         | 
| 42 |  | 
| 43 | 
            +
                        # Check if at least one client is initialized
         | 
| 44 | 
            +
                        if not any([self.openai_async_client, self.mistral_client, self.anthropic_client]):
         | 
| 45 | 
            +
                            logger.warning("No LLM clients could be initialized based on current config. Check API keys.")
         | 
| 46 | 
            +
                        else:
         | 
| 47 | 
            +
                            logger.info("LLM clients initialized successfully (at least one).")
         | 
| 48 |  | 
| 49 | 
             
                    except Exception as e:
         | 
| 50 | 
             
                        logger.error(f"Error initializing LLM clients: {str(e)}")
         | 
| 51 | 
             
                        raise
         | 
| 52 |  | 
| 53 | 
             
                async def generate_text(self, prompt: str, model: str = "auto", max_tokens: int = 1000, temperature: float = 0.7) -> str:
         | 
| 54 | 
            +
                    """Generate text using the specified model, with new priority for 'auto'."""
         | 
| 55 | 
             
                    try:
         | 
| 56 | 
            +
                        selected_model_name_for_call: str = "" # Actual model name passed to the specific generator
         | 
| 57 | 
            +
             | 
| 58 | 
             
                        if model == "auto":
         | 
| 59 | 
            +
                            # New Priority: 1. OpenAI, 2. Mistral, 3. Anthropic
         | 
| 60 | 
            +
                            if self.openai_async_client and self.config.OPENAI_MODEL:
         | 
| 61 | 
            +
                                selected_model_name_for_call = self.config.OPENAI_MODEL
         | 
| 62 | 
            +
                                logger.debug(f"Auto-selected OpenAI model: {selected_model_name_for_call}")
         | 
| 63 | 
            +
                                return await self._generate_with_openai(prompt, selected_model_name_for_call, max_tokens, temperature)
         | 
| 64 | 
            +
                            elif self.mistral_client and self.config.MISTRAL_MODEL:
         | 
| 65 | 
            +
                                selected_model_name_for_call = self.config.MISTRAL_MODEL
         | 
| 66 | 
            +
                                logger.debug(f"Auto-selected Mistral model: {selected_model_name_for_call}")
         | 
| 67 | 
            +
                                return await self._generate_with_mistral(prompt, selected_model_name_for_call, max_tokens, temperature)
         | 
| 68 | 
            +
                            elif self.anthropic_client and self.config.ANTHROPIC_MODEL:
         | 
| 69 | 
            +
                                selected_model_name_for_call = self.config.ANTHROPIC_MODEL
         | 
| 70 | 
            +
                                logger.debug(f"Auto-selected Anthropic model: {selected_model_name_for_call}")
         | 
| 71 | 
            +
                                return await self._generate_with_claude(prompt, selected_model_name_for_call, max_tokens, temperature)
         | 
| 72 | 
             
                            else:
         | 
| 73 | 
            +
                                logger.error("No LLM clients available for 'auto' mode or default models not configured.")
         | 
| 74 | 
            +
                                raise ValueError("No LLM clients available for 'auto' mode or default models not configured.")
         | 
| 75 | 
            +
                        
         | 
| 76 | 
            +
                        elif model.startswith("gpt-") or model.lower().startswith("openai/"):
         | 
| 77 | 
            +
                            if not self.openai_async_client:
         | 
| 78 | 
            +
                                raise ValueError("OpenAI client not available. Check API key or model prefix.")
         | 
| 79 | 
            +
                            actual_model = model.split('/')[-1] if '/' in model else model
         | 
| 80 | 
            +
                            return await self._generate_with_openai(prompt, actual_model, max_tokens, temperature)
         | 
| 81 | 
            +
                        
         | 
| 82 | 
             
                        elif model.startswith("mistral"):
         | 
| 83 | 
             
                            if not self.mistral_client:
         | 
| 84 | 
            +
                                raise ValueError("Mistral client not available. Check API key or model prefix.")
         | 
| 85 | 
            +
                            return await self._generate_with_mistral(prompt, model, max_tokens, temperature)
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                        elif model.startswith("claude"):
         | 
| 88 | 
            +
                            if not self.anthropic_client:
         | 
| 89 | 
            +
                                raise ValueError("Anthropic client not available. Check API key or model prefix.")
         | 
| 90 | 
            +
                            return await self._generate_with_claude(prompt, model, max_tokens, temperature)
         | 
| 91 | 
            +
                        
         | 
| 92 | 
             
                        else:
         | 
| 93 | 
            +
                            raise ValueError(f"Unsupported model: {model}. Must start with 'gpt-', 'openai/', 'claude', 'mistral', or be 'auto'.")
         | 
| 94 | 
            +
                    
         | 
| 95 | 
             
                    except Exception as e:
         | 
| 96 | 
            +
                        logger.error(f"Error generating text with model '{model}': {str(e)}")
         | 
| 97 | 
             
                        raise
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                async def _generate_with_openai(self, prompt: str, model_name: str, max_tokens: int, temperature: float) -> str:
         | 
| 100 | 
            +
                    """Generate text using OpenAI (Async)"""
         | 
| 101 | 
            +
                    if not self.openai_async_client:
         | 
| 102 | 
            +
                        raise RuntimeError("OpenAI async client not initialized.")
         | 
| 103 | 
            +
                    try:
         | 
| 104 | 
            +
                        logger.debug(f"Generating with OpenAI model: {model_name}, max_tokens: {max_tokens}, temp: {temperature}, prompt: '{prompt[:50]}...'")
         | 
| 105 | 
            +
                        response = await self.openai_async_client.chat.completions.create(
         | 
| 106 | 
            +
                            model=model_name,
         | 
| 107 | 
            +
                            messages=[{"role": "user", "content": prompt}],
         | 
| 108 | 
            +
                            max_tokens=max_tokens,
         | 
| 109 | 
            +
                            temperature=temperature
         | 
| 110 | 
            +
                        )
         | 
| 111 | 
            +
                        if response.choices and response.choices[0].message:
         | 
| 112 | 
            +
                             content = response.choices[0].message.content
         | 
| 113 | 
            +
                             if content is not None:
         | 
| 114 | 
            +
                                 return content.strip()
         | 
| 115 | 
            +
                             else:
         | 
| 116 | 
            +
                                 logger.warning(f"OpenAI response message content is None for model {model_name}.")
         | 
| 117 | 
            +
                                 return ""
         | 
| 118 | 
            +
                        else:
         | 
| 119 | 
            +
                            logger.warning(f"OpenAI response did not contain expected choices or message for model {model_name}.")
         | 
| 120 | 
            +
                            return ""
         | 
| 121 | 
            +
                    except Exception as e:
         | 
| 122 | 
            +
                        logger.error(f"Error with OpenAI generation (model: {model_name}): {str(e)}")
         | 
| 123 | 
            +
                        raise
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                async def _generate_with_claude(self, prompt: str, model_name: str, max_tokens: int, temperature: float) -> str:
         | 
| 126 | 
            +
                    """Generate text using Anthropic/Claude (Sync via run_in_executor)"""
         | 
| 127 | 
            +
                    if not self.anthropic_client:
         | 
| 128 | 
            +
                        raise RuntimeError("Anthropic client not initialized.")
         | 
| 129 | 
             
                    try:
         | 
| 130 | 
            +
                        logger.debug(f"Generating with Anthropic model: {model_name}, max_tokens: {max_tokens}, temp: {temperature}, prompt: '{prompt[:50]}...'")
         | 
| 131 | 
             
                        loop = asyncio.get_event_loop()
         | 
| 132 | 
             
                        response = await loop.run_in_executor(
         | 
| 133 | 
             
                            None,
         | 
| 134 | 
             
                            lambda: self.anthropic_client.messages.create(
         | 
| 135 | 
            +
                                model=model_name, # Use the passed model_name
         | 
| 136 | 
             
                                max_tokens=max_tokens,
         | 
| 137 | 
             
                                temperature=temperature,
         | 
| 138 | 
             
                                messages=[
         | 
|  | |
| 140 | 
             
                                ]
         | 
| 141 | 
             
                            )
         | 
| 142 | 
             
                        )
         | 
| 143 | 
            +
                        if response.content and response.content[0].text:
         | 
| 144 | 
            +
                            return response.content[0].text.strip()
         | 
| 145 | 
            +
                        else:
         | 
| 146 | 
            +
                            logger.warning(f"Anthropic response did not contain expected content for model {model_name}.")
         | 
| 147 | 
            +
                            return ""
         | 
| 148 | 
             
                    except Exception as e:
         | 
| 149 | 
            +
                        logger.error(f"Error with Anthropic (Claude) generation (model: {model_name}): {str(e)}")
         | 
| 150 | 
             
                        raise
         | 
| 151 |  | 
| 152 | 
            +
                async def _generate_with_mistral(self, prompt: str, model_name: str, max_tokens: int, temperature: float) -> str:
         | 
| 153 | 
            +
                    """Generate text using Mistral (Sync via run_in_executor)"""
         | 
| 154 | 
            +
                    if not self.mistral_client:
         | 
| 155 | 
            +
                        raise RuntimeError("Mistral client not initialized.")
         | 
| 156 | 
             
                    try:
         | 
| 157 | 
            +
                        logger.debug(f"Generating with Mistral model: {model_name}, temp: {temperature}, prompt: '{prompt[:50]}...' (max_tokens: {max_tokens} - note: not directly used by MistralClient.chat)")
         | 
| 158 | 
             
                        loop = asyncio.get_event_loop()
         | 
| 159 | 
            +
                        
         | 
| 160 | 
             
                        response = await loop.run_in_executor(
         | 
| 161 | 
             
                            None,
         | 
| 162 | 
             
                            lambda: self.mistral_client.chat(
         | 
| 163 | 
            +
                                model=model_name, # Use the passed model_name
         | 
| 164 | 
             
                                messages=[{"role": "user", "content": prompt}],
         | 
| 165 | 
            +
                                max_tokens=max_tokens, 
         | 
| 166 | 
             
                                temperature=temperature
         | 
| 167 | 
             
                            )
         | 
| 168 | 
             
                        )
         | 
| 169 | 
            +
                        if response.choices and response.choices[0].message:
         | 
| 170 | 
            +
                            content = response.choices[0].message.content
         | 
| 171 | 
            +
                            if content is not None:
         | 
| 172 | 
            +
                                return content.strip()
         | 
| 173 | 
            +
                            else:
         | 
| 174 | 
            +
                                logger.warning(f"Mistral response message content is None for model {model_name}.")
         | 
| 175 | 
            +
                                return ""
         | 
| 176 | 
            +
                        else:
         | 
| 177 | 
            +
                            logger.warning(f"Mistral response did not contain expected choices or message for model {model_name}.")
         | 
| 178 | 
            +
                            return ""
         | 
| 179 | 
             
                    except Exception as e:
         | 
| 180 | 
            +
                        logger.error(f"Error with Mistral generation (model: {model_name}): {str(e)}")
         | 
| 181 | 
             
                        raise
         | 
| 182 |  | 
| 183 | 
            +
             | 
| 184 | 
             
                async def summarize(self, text: str, style: str = "concise", max_length: Optional[int] = None) -> str:
         | 
|  | |
| 185 | 
             
                    if not text.strip():
         | 
| 186 | 
             
                        return ""
         | 
| 187 |  | 
|  | |
| 188 | 
             
                    style_prompts = {
         | 
| 189 | 
             
                        "concise": "Provide a concise summary of the following text, focusing on the main points:",
         | 
| 190 | 
             
                        "detailed": "Provide a detailed summary of the following text, including key details and supporting information:",
         | 
| 191 | 
             
                        "bullet_points": "Summarize the following text as a list of bullet points highlighting the main ideas:",
         | 
| 192 | 
             
                        "executive": "Provide an executive summary of the following text, focusing on key findings and actionable insights:"
         | 
| 193 | 
             
                    }
         | 
|  | |
| 194 | 
             
                    prompt_template = style_prompts.get(style, style_prompts["concise"])
         | 
|  | |
| 195 | 
             
                    if max_length:
         | 
| 196 | 
            +
                        prompt_template += f" Keep the summary under approximately {max_length} words."
         | 
| 197 |  | 
| 198 | 
             
                    prompt = f"{prompt_template}\n\nText to summarize:\n{text}\n\nSummary:"
         | 
| 199 |  | 
| 200 | 
             
                    try:
         | 
| 201 | 
            +
                        summary_max_tokens = (max_length * 2) if max_length else 500 
         | 
| 202 | 
            +
                        summary = await self.generate_text(prompt, model="auto", max_tokens=summary_max_tokens, temperature=0.3)
         | 
| 203 | 
             
                        return summary.strip()
         | 
| 204 | 
             
                    except Exception as e:
         | 
| 205 | 
             
                        logger.error(f"Error generating summary: {str(e)}")
         | 
| 206 | 
             
                        return "Error generating summary"
         | 
| 207 |  | 
| 208 | 
             
                async def generate_tags(self, text: str, max_tags: int = 5) -> List[str]:
         | 
|  | |
| 209 | 
             
                    if not text.strip():
         | 
| 210 | 
             
                        return []
         | 
| 211 |  | 
| 212 | 
            +
                    prompt = f"""Generate up to {max_tags} relevant tags for the following text.
         | 
| 213 | 
            +
                    Tags should be concise, descriptive keywords or phrases (1-3 words typically) that capture the main topics or themes.
         | 
| 214 | 
            +
                    Return only the tags, separated by commas. Do not include any preamble or explanation.
         | 
| 215 |  | 
| 216 | 
             
                    Text:
         | 
| 217 | 
             
                    {text}
         | 
|  | |
| 219 | 
             
                    Tags:"""
         | 
| 220 |  | 
| 221 | 
             
                    try:
         | 
| 222 | 
            +
                        response = await self.generate_text(prompt, model="auto", max_tokens=100, temperature=0.5)
         | 
| 223 | 
            +
                        tags = [tag.strip().lower() for tag in response.split(',') if tag.strip()]
         | 
| 224 | 
            +
                        tags = [tag for tag in tags if tag and len(tag) > 1 and len(tag) < 50]
         | 
| 225 | 
            +
                        return list(dict.fromkeys(tags))[:max_tags]
         | 
|  | |
|  | |
|  | |
| 226 | 
             
                    except Exception as e:
         | 
| 227 | 
             
                        logger.error(f"Error generating tags: {str(e)}")
         | 
| 228 | 
             
                        return []
         | 
| 229 |  | 
| 230 | 
             
                async def categorize(self, text: str, categories: List[str]) -> str:
         | 
|  | |
| 231 | 
             
                    if not text.strip() or not categories:
         | 
| 232 | 
             
                        return "Uncategorized"
         | 
| 233 |  | 
| 234 | 
            +
                    categories_str = ", ".join([f"'{cat}'" for cat in categories])
         | 
| 235 | 
            +
                    prompt = f"""Classify the following text into ONE of these categories: {categories_str}.
         | 
| 236 | 
            +
                    Choose the single most appropriate category based on the content and main theme of the text.
         | 
| 237 | 
            +
                    Return only the category name as a string, exactly as it appears in the list provided. Do not add any other text or explanation.
         | 
|  | |
|  | |
| 238 |  | 
| 239 | 
             
                    Text to classify:
         | 
| 240 | 
             
                    {text}
         | 
|  | |
| 242 | 
             
                    Category:"""
         | 
| 243 |  | 
| 244 | 
             
                    try:
         | 
| 245 | 
            +
                        response = await self.generate_text(prompt, model="auto", max_tokens=50, temperature=0.1) 
         | 
| 246 | 
            +
                        category_candidate = response.strip().strip("'\"")
         | 
| 247 |  | 
| 248 | 
            +
                        for cat in categories:
         | 
| 249 | 
            +
                            if cat.lower() == category_candidate.lower():
         | 
| 250 | 
            +
                                return cat
         | 
| 251 | 
            +
                        
         | 
| 252 | 
            +
                        logger.warning(f"LLM returned category '{category_candidate}' which is not in the provided list: {categories}. Falling back.")
         | 
| 253 | 
            +
                        return categories[0] if categories else "Uncategorized"
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 254 | 
             
                    except Exception as e:
         | 
| 255 | 
             
                        logger.error(f"Error categorizing text: {str(e)}")
         | 
| 256 | 
             
                        return "Uncategorized"
         | 
| 257 |  | 
| 258 | 
            +
                async def answer_question(self, question: str, context: str, max_context_length: int = 3000) -> str:
         | 
|  | |
| 259 | 
             
                    if not question.strip():
         | 
| 260 | 
            +
                        return "No question provided."
         | 
|  | |
| 261 | 
             
                    if not context.strip():
         | 
| 262 | 
            +
                        return "I don't have enough context to answer this question. Please provide relevant information."
         | 
| 263 |  | 
|  | |
| 264 | 
             
                    if len(context) > max_context_length:
         | 
| 265 | 
             
                        context = context[:max_context_length] + "..."
         | 
| 266 | 
            +
                        logger.warning(f"Context truncated to {max_context_length} characters for question answering.")
         | 
| 267 |  | 
| 268 | 
            +
                    prompt = f"""You are a helpful assistant. Answer the following question based ONLY on the provided context.
         | 
| 269 | 
            +
            If the context does not contain the information to answer the question, state that the context does not provide the answer.
         | 
| 270 | 
            +
            Do not make up information or use external knowledge.
         | 
| 271 |  | 
| 272 | 
            +
            Context:
         | 
| 273 | 
            +
            ---
         | 
| 274 | 
            +
            {context}
         | 
| 275 | 
            +
            ---
         | 
| 276 |  | 
| 277 | 
            +
            Question: {question}
         | 
| 278 |  | 
| 279 | 
            +
            Answer:"""
         | 
| 280 |  | 
| 281 | 
             
                    try:
         | 
| 282 | 
            +
                        answer = await self.generate_text(prompt, model="auto", max_tokens=300, temperature=0.2)
         | 
| 283 | 
             
                        return answer.strip()
         | 
| 284 | 
             
                    except Exception as e:
         | 
| 285 | 
             
                        logger.error(f"Error answering question: {str(e)}")
         | 
| 286 | 
             
                        return "I encountered an error while trying to answer your question."
         | 
| 287 |  | 
| 288 | 
             
                async def extract_key_information(self, text: str) -> Dict[str, Any]:
         | 
|  | |
| 289 | 
             
                    if not text.strip():
         | 
| 290 | 
             
                        return {}
         | 
| 291 |  | 
| 292 | 
            +
                    prompt = f"""Analyze the following text and extract key information.
         | 
| 293 | 
            +
                    Provide the response as a JSON object with the following keys:
         | 
| 294 | 
            +
                    - "main_topic": (string) The main topic or subject of the text.
         | 
| 295 | 
            +
                    - "key_points": (array of strings) A list of 3-5 key points or takeaways.
         | 
| 296 | 
            +
                    - "entities": (array of strings) Important people, places, organizations, or products mentioned.
         | 
| 297 | 
            +
                    - "sentiment": (string) Overall sentiment of the text (e.g., "positive", "neutral", "negative", "mixed").
         | 
| 298 | 
            +
                    - "content_type": (string) The perceived type of content (e.g., "article", "email", "report", "conversation", "advertisement", "other").
         | 
| 299 | 
            +
             | 
| 300 | 
            +
                    If a piece of information is not found or not applicable, use null or an empty array/string as appropriate for the JSON structure.
         | 
| 301 |  | 
| 302 | 
             
                    Text to analyze:
         | 
| 303 | 
            +
                    ---
         | 
| 304 | 
             
                    {text}
         | 
| 305 | 
            +
                    ---
         | 
| 306 |  | 
| 307 | 
            +
                    JSON Analysis:"""
         | 
| 308 |  | 
| 309 | 
             
                    try:
         | 
| 310 | 
            +
                        response_str = await self.generate_text(prompt, model="auto", max_tokens=500, temperature=0.4)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 311 |  | 
| 312 | 
            +
                        import json
         | 
| 313 | 
            +
                        try:
         | 
| 314 | 
            +
                            if response_str.startswith("```json"):
         | 
| 315 | 
            +
                                response_str = response_str.lstrip("```json").rstrip("```").strip()
         | 
| 316 | 
            +
                            
         | 
| 317 | 
            +
                            info = json.loads(response_str)
         | 
| 318 | 
            +
                            expected_keys = {"main_topic", "key_points", "entities", "sentiment", "content_type"}
         | 
| 319 | 
            +
                            if not expected_keys.issubset(info.keys()):
         | 
| 320 | 
            +
                                logger.warning(f"Extracted information missing some expected keys. Got: {info.keys()}")
         | 
| 321 | 
            +
                            return info
         | 
| 322 | 
            +
                        except json.JSONDecodeError as je:
         | 
| 323 | 
            +
                            logger.error(f"Failed to parse JSON from LLM response for key_information: {je}")
         | 
| 324 | 
            +
                            logger.debug(f"LLM Response string was: {response_str}")
         | 
| 325 | 
            +
                            info_fallback = {}
         | 
| 326 | 
            +
                            lines = response_str.split('\n')
         | 
| 327 | 
            +
                            for line in lines:
         | 
| 328 | 
            +
                                if ':' in line:
         | 
| 329 | 
            +
                                    key, value = line.split(':', 1)
         | 
| 330 | 
            +
                                    key_clean = key.strip().lower().replace(' ', '_')
         | 
| 331 | 
            +
                                    value_clean = value.strip()
         | 
| 332 | 
            +
                                    if value_clean:
         | 
| 333 | 
            +
                                        if key_clean in ["key_points", "entities"] and '[' in value_clean and ']' in value_clean:
         | 
| 334 | 
            +
                                            try:
         | 
| 335 | 
            +
                                                info_fallback[key_clean] = [item.strip().strip("'\"") for item in value_clean.strip('[]').split(',') if item.strip()]
         | 
| 336 | 
            +
                                            except: info_fallback[key_clean] = value_clean
         | 
| 337 | 
            +
                                        else: info_fallback[key_clean] = value_clean
         | 
| 338 | 
            +
                            if info_fallback:
         | 
| 339 | 
            +
                                logger.info("Successfully parsed key information using fallback line-based method.")
         | 
| 340 | 
            +
                                return info_fallback
         | 
| 341 | 
            +
                            return {"error": "Failed to parse LLM output", "raw_response": response_str}
         | 
| 342 | 
             
                    except Exception as e:
         | 
| 343 | 
             
                        logger.error(f"Error extracting key information: {str(e)}")
         | 
| 344 | 
            +
                        return {"error": f"General error extracting key information: {str(e)}"}
         | 
| 345 | 
            +
             | 
| 346 | 
             
                async def check_availability(self) -> Dict[str, bool]:
         | 
| 347 | 
            +
                    """Check which LLM services are available by making a tiny test call."""
         | 
| 348 | 
             
                    availability = {
         | 
| 349 | 
            +
                        "openai": False,
         | 
| 350 | 
            +
                        "mistral": False,
         | 
| 351 | 
            +
                        "anthropic": False
         | 
| 352 | 
             
                    }
         | 
| 353 | 
            +
                    test_prompt = "Hello"
         | 
| 354 | 
            +
                    test_max_tokens = 5
         | 
| 355 | 
            +
                    test_temp = 0.1
         | 
| 356 | 
            +
             | 
| 357 | 
            +
                    logger.info("Checking LLM availability...")
         | 
| 358 | 
            +
             | 
| 359 | 
            +
                    if self.openai_async_client and self.config.OPENAI_MODEL:
         | 
| 360 | 
            +
                        try:
         | 
| 361 | 
            +
                            logger.debug(f"Testing OpenAI availability with model {self.config.OPENAI_MODEL}...")
         | 
| 362 | 
            +
                            test_response = await self._generate_with_openai(test_prompt, self.config.OPENAI_MODEL, test_max_tokens, test_temp)
         | 
| 363 | 
            +
                            availability["openai"] = bool(test_response.strip())
         | 
| 364 | 
            +
                        except Exception as e:
         | 
| 365 | 
            +
                            logger.warning(f"OpenAI availability check failed for model {self.config.OPENAI_MODEL}: {e}")
         | 
| 366 | 
            +
                    logger.info(f"OpenAI available: {availability['openai']}")
         | 
| 367 |  | 
| 368 | 
            +
                    if self.mistral_client and self.config.MISTRAL_MODEL:
         | 
| 369 | 
            +
                        try:
         | 
| 370 | 
            +
                            logger.debug(f"Testing Mistral availability with model {self.config.MISTRAL_MODEL}...")
         | 
| 371 | 
            +
                            test_response = await self._generate_with_mistral(test_prompt, self.config.MISTRAL_MODEL, test_max_tokens, test_temp)
         | 
| 372 | 
            +
                            availability["mistral"] = bool(test_response.strip())
         | 
| 373 | 
            +
                        except Exception as e:
         | 
| 374 | 
            +
                            logger.warning(f"Mistral availability check failed for model {self.config.MISTRAL_MODEL}: {e}")
         | 
| 375 | 
            +
                    logger.info(f"Mistral available: {availability['mistral']}")
         | 
| 376 | 
            +
             | 
| 377 | 
            +
                    if self.anthropic_client and self.config.ANTHROPIC_MODEL:
         | 
| 378 | 
            +
                        try:
         | 
| 379 | 
            +
                            logger.debug(f"Testing Anthropic availability with model {self.config.ANTHROPIC_MODEL}...")
         | 
| 380 | 
            +
                            test_response = await self._generate_with_claude(test_prompt, self.config.ANTHROPIC_MODEL, test_max_tokens, test_temp)
         | 
| 381 | 
            +
                            availability["anthropic"] = bool(test_response.strip())
         | 
| 382 | 
            +
                        except Exception as e:
         | 
| 383 | 
            +
                            logger.warning(f"Anthropic availability check failed for model {self.config.ANTHROPIC_MODEL}: {e}")
         | 
| 384 | 
            +
                    logger.info(f"Anthropic available: {availability['anthropic']}")
         | 
| 385 |  | 
| 386 | 
            +
                    logger.info(f"Final LLM Availability: {availability}")
         | 
| 387 | 
             
                    return availability
         | 

