Spaces:

sethmcknight
/

msse-ai-engineering

Sleeping

Tobias Pasquale commited on Oct 18

Commit

508a7e5

1 Parent(s): c280a92

Fix: Complete CI/CD formatting compliance

- Apply black code formatting to 12 files
- Fix import ordering with isort
- Remove unused imports (Union, MagicMock, json, asdict, PromptTemplate)
- Fix undefined variables in test_chat_endpoint.py
- Break long lines in RAG pipeline and response formatter
- Add noqa comments for prompt template strings
- Resolve all 19 flake8 E501 line length violations
- Ensure full pre-commit hook compliance

All code formatting issues resolved for successful pipeline deployment.

Files changed (12) hide show

app.py +72 -57
src/llm/__init__.py +1 -1
src/llm/context_manager.py +53 -55
src/llm/llm_service.py +97 -93
src/llm/prompt_templates.py +48 -47
src/rag/__init__.py +1 -1
src/rag/rag_pipeline.py +120 -100
src/rag/response_formatter.py +83 -69
tests/test_chat_endpoint.py +172 -111
tests/test_llm/__init__.py +1 -1
tests/test_llm/test_llm_service.py +94 -76
tests/test_rag/__init__.py +1 -1

app.py CHANGED Viewed

@@ -168,7 +168,7 @@ def search():
 def chat():
     """
     Endpoint for conversational RAG interactions.
     Accepts JSON requests with user messages and returns AI-generated
     responses based on corporate policy documents.
     """
@@ -176,10 +176,12 @@ def chat():
         # Validate request contains JSON data
         if not request.is_json:
             return (
-                jsonify({
-                    "status": "error",
-                    "message": "Content-Type must be application/json"
-                }),
                 400,
             )
@@ -189,19 +191,17 @@ def chat():
         message = data.get("message")
         if message is None:
             return (
-                jsonify({
-                    "status": "error",
-                    "message": "message parameter is required"
-                }),
                 400,
             )
         if not isinstance(message, str) or not message.strip():
             return (
-                jsonify({
-                    "status": "error",
-                    "message": "message must be a non-empty string"
-                }),
                 400,
             )
@@ -214,96 +214,103 @@ def chat():
         try:
             from src.config import COLLECTION_NAME, VECTOR_DB_PERSIST_PATH
             from src.embedding.embedding_service import EmbeddingService
-            from src.search.search_service import SearchService
-            from src.vector_store.vector_db import VectorDatabase
             from src.llm.llm_service import LLMService
             from src.rag.rag_pipeline import RAGPipeline
             from src.rag.response_formatter import ResponseFormatter
             # Initialize services
             vector_db = VectorDatabase(VECTOR_DB_PERSIST_PATH, COLLECTION_NAME)
             embedding_service = EmbeddingService()
             search_service = SearchService(vector_db, embedding_service)
             # Initialize LLM service from environment
             llm_service = LLMService.from_environment()
             # Initialize RAG pipeline
             rag_pipeline = RAGPipeline(search_service, llm_service)
             # Initialize response formatter
             formatter = ResponseFormatter()
         except ValueError as e:
             return (
-                jsonify({
-                    "status": "error",
-                    "message": f"LLM service configuration error: {str(e)}",
-                    "details": "Please ensure OPENROUTER_API_KEY or GROQ_API_KEY environment variables are set"
-                }),
                 503,
             )
         except Exception as e:
             return (
-                jsonify({
-                    "status": "error",
-                    "message": f"Service initialization failed: {str(e)}"
-                }),
                 500,
             )
         # Generate RAG response
         rag_response = rag_pipeline.generate_answer(message.strip())
         # Format response for API
         if include_sources:
-            formatted_response = formatter.format_api_response(rag_response, include_debug)
         else:
             formatted_response = formatter.format_chat_response(
-                rag_response,
-                conversation_id,
-                include_sources=False
             )
         return jsonify(formatted_response)
     except Exception as e:
-        return jsonify({
-            "status": "error",
-            "message": f"Chat request failed: {str(e)}"
-        }), 500
 @app.route("/chat/health", methods=["GET"])
 def chat_health():
     """
     Health check endpoint for RAG chat functionality.
     Returns the status of all RAG pipeline components.
     """
     try:
         from src.config import COLLECTION_NAME, VECTOR_DB_PERSIST_PATH
         from src.embedding.embedding_service import EmbeddingService
-        from src.search.search_service import SearchService
-        from src.vector_store.vector_db import VectorDatabase
         from src.llm.llm_service import LLMService
         from src.rag.rag_pipeline import RAGPipeline
         from src.rag.response_formatter import ResponseFormatter
         # Initialize services for health check
         vector_db = VectorDatabase(VECTOR_DB_PERSIST_PATH, COLLECTION_NAME)
         embedding_service = EmbeddingService()
         search_service = SearchService(vector_db, embedding_service)
         try:
             llm_service = LLMService.from_environment()
             rag_pipeline = RAGPipeline(search_service, llm_service)
             formatter = ResponseFormatter()
             # Perform health check
             health_data = rag_pipeline.health_check()
             health_response = formatter.create_health_response(health_data)
             # Determine HTTP status based on health
             if health_data.get("pipeline") == "healthy":
                 return jsonify(health_response), 200
@@ -311,24 +318,32 @@ def chat_health():
                 return jsonify(health_response), 200  # Still functional
             else:
                 return jsonify(health_response), 503  # Service unavailable
         except ValueError as e:
-            return jsonify({
-                "status": "error",
-                "message": f"LLM configuration error: {str(e)}",
-                "health": {
-                    "pipeline_status": "unhealthy",
-                    "components": {
-                        "llm_service": {"status": "unconfigured", "error": str(e)}
                     }
-                }
-            }), 503
     except Exception as e:
-        return jsonify({
-            "status": "error",
-            "message": f"Health check failed: {str(e)}"
-        }), 500
 if __name__ == "__main__":

 def chat():
     """
     Endpoint for conversational RAG interactions.
     Accepts JSON requests with user messages and returns AI-generated
     responses based on corporate policy documents.
     """
         # Validate request contains JSON data
         if not request.is_json:
             return (
+                jsonify(
+                    {
+                        "status": "error",
+                        "message": "Content-Type must be application/json",
+                    }
+                ),
                 400,
             )
         message = data.get("message")
         if message is None:
             return (
+                jsonify(
+                    {"status": "error", "message": "message parameter is required"}
+                ),
                 400,
             )
         if not isinstance(message, str) or not message.strip():
             return (
+                jsonify(
+                    {"status": "error", "message": "message must be a non-empty string"}
+                ),
                 400,
             )
         try:
             from src.config import COLLECTION_NAME, VECTOR_DB_PERSIST_PATH
             from src.embedding.embedding_service import EmbeddingService
             from src.llm.llm_service import LLMService
             from src.rag.rag_pipeline import RAGPipeline
             from src.rag.response_formatter import ResponseFormatter
+            from src.search.search_service import SearchService
+            from src.vector_store.vector_db import VectorDatabase
             # Initialize services
             vector_db = VectorDatabase(VECTOR_DB_PERSIST_PATH, COLLECTION_NAME)
             embedding_service = EmbeddingService()
             search_service = SearchService(vector_db, embedding_service)
             # Initialize LLM service from environment
             llm_service = LLMService.from_environment()
             # Initialize RAG pipeline
             rag_pipeline = RAGPipeline(search_service, llm_service)
             # Initialize response formatter
             formatter = ResponseFormatter()
         except ValueError as e:
             return (
+                jsonify(
+                    {
+                        "status": "error",
+                        "message": f"LLM service configuration error: {str(e)}",
+                        "details": (
+                            "Please ensure OPENROUTER_API_KEY or GROQ_API_KEY "
+                            "environment variables are set"
+                        ),
+                    }
+                ),
                 503,
             )
         except Exception as e:
             return (
+                jsonify(
+                    {
+                        "status": "error",
+                        "message": f"Service initialization failed: {str(e)}",
+                    }
+                ),
                 500,
             )
         # Generate RAG response
         rag_response = rag_pipeline.generate_answer(message.strip())
         # Format response for API
         if include_sources:
+            formatted_response = formatter.format_api_response(
+                rag_response, include_debug
+            )
         else:
             formatted_response = formatter.format_chat_response(
+                rag_response, conversation_id, include_sources=False
             )
         return jsonify(formatted_response)
     except Exception as e:
+        return (
+            jsonify({"status": "error", "message": f"Chat request failed: {str(e)}"}),
+            500,
+        )
 @app.route("/chat/health", methods=["GET"])
 def chat_health():
     """
     Health check endpoint for RAG chat functionality.
     Returns the status of all RAG pipeline components.
     """
     try:
         from src.config import COLLECTION_NAME, VECTOR_DB_PERSIST_PATH
         from src.embedding.embedding_service import EmbeddingService
         from src.llm.llm_service import LLMService
         from src.rag.rag_pipeline import RAGPipeline
         from src.rag.response_formatter import ResponseFormatter
+        from src.search.search_service import SearchService
+        from src.vector_store.vector_db import VectorDatabase
         # Initialize services for health check
         vector_db = VectorDatabase(VECTOR_DB_PERSIST_PATH, COLLECTION_NAME)
         embedding_service = EmbeddingService()
         search_service = SearchService(vector_db, embedding_service)
         try:
             llm_service = LLMService.from_environment()
             rag_pipeline = RAGPipeline(search_service, llm_service)
             formatter = ResponseFormatter()
             # Perform health check
             health_data = rag_pipeline.health_check()
             health_response = formatter.create_health_response(health_data)
             # Determine HTTP status based on health
             if health_data.get("pipeline") == "healthy":
                 return jsonify(health_response), 200
                 return jsonify(health_response), 200  # Still functional
             else:
                 return jsonify(health_response), 503  # Service unavailable
         except ValueError as e:
+            return (
+                jsonify(
+                    {
+                        "status": "error",
+                        "message": f"LLM configuration error: {str(e)}",
+                        "health": {
+                            "pipeline_status": "unhealthy",
+                            "components": {
+                                "llm_service": {
+                                    "status": "unconfigured",
+                                    "error": str(e),
+                                }
+                            },
+                        },
                     }
+                ),
+                503,
+            )
     except Exception as e:
+        return (
+            jsonify({"status": "error", "message": f"Health check failed: {str(e)}"}),
+            500,
+        )
 if __name__ == "__main__":

src/llm/__init__.py CHANGED Viewed

@@ -8,4 +8,4 @@ Classes:
     LLMService: Main service for LLM interactions
     PromptTemplates: Predefined prompt templates for corporate policy Q&A
     ContextManager: Manages context retrieval and formatting
-"""

     LLMService: Main service for LLM interactions
     PromptTemplates: Predefined prompt templates for corporate policy Q&A
     ContextManager: Manages context retrieval and formatting
+"""

src/llm/context_manager.py CHANGED Viewed

@@ -6,8 +6,8 @@ for the RAG pipeline, ensuring optimal context window utilization.
 """
 import logging
-from typing import Any, Dict, List, Optional, Tuple
 from dataclasses import dataclass
 logger = logging.getLogger(__name__)
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 @dataclass
 class ContextConfig:
     """Configuration for context management."""
     max_context_length: int = 3000  # Maximum characters in context
     max_results: int = 5  # Maximum search results to include
     min_similarity: float = 0.1  # Minimum similarity threshold
@@ -24,7 +25,7 @@ class ContextConfig:
 class ContextManager:
     """
     Manages context retrieval and optimization for RAG pipeline.
     Handles:
     - Context length management
     - Relevance filtering
@@ -35,7 +36,7 @@ class ContextManager:
     def __init__(self, config: Optional[ContextConfig] = None):
         """
         Initialize ContextManager with configuration.
         Args:
             config: Context configuration, uses defaults if None
         """
@@ -43,17 +44,15 @@ class ContextManager:
         logger.info("ContextManager initialized")
     def prepare_context(
-        self,
-        search_results: List[Dict[str, Any]],
-        query: str
     ) -> Tuple[str, List[Dict[str, Any]]]:
         """
         Prepare optimized context from search results.
         Args:
             search_results: Results from SearchService
             query: Original user query for context optimization
         Returns:
             Tuple of (formatted_context, filtered_results)
         """
@@ -62,56 +61,58 @@ class ContextManager:
         # Filter and rank results
         filtered_results = self._filter_results(search_results)
         # Remove duplicates and optimize for context window
         optimized_results = self._optimize_context(filtered_results)
         # Format for prompt
         formatted_context = self._format_context(optimized_results)
         logger.debug(
             f"Prepared context from {len(search_results)} results, "
             f"filtered to {len(optimized_results)} results, "
             f"{len(formatted_context)} characters"
         )
         return formatted_context, optimized_results
     def _filter_results(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Filter search results by relevance and quality.
         Args:
             results: Raw search results
         Returns:
             Filtered and sorted results
         """
         filtered = []
         for result in results:
             similarity = result.get("similarity_score", 0.0)
             content = result.get("content", "").strip()
             # Apply filters
-            if (similarity >= self.config.min_similarity and
-                content and
-                len(content) > 20):  # Minimum content length
                 filtered.append(result)
         # Sort by similarity score (descending)
         filtered.sort(key=lambda x: x.get("similarity_score", 0.0), reverse=True)
         # Limit to max results
-        return filtered[:self.config.max_results]
     def _optimize_context(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Optimize context to fit within token limits while maximizing relevance.
         Args:
             results: Filtered search results
         Returns:
             Optimized results list
         """
@@ -125,7 +126,7 @@ class ContextManager:
         for result in results:
             content = result.get("content", "").strip()
             content_length = len(content)
             # Check if adding this result would exceed limit
             estimated_formatted_length = current_length + content_length + 100  # Buffer
             if estimated_formatted_length > self.config.max_context_length:
@@ -137,18 +138,21 @@ class ContextManager:
                     result_copy["content"] = truncated_content
                     optimized.append(result_copy)
                 break
             # Check for duplicate or highly similar content
             content_lower = content.lower()
             is_duplicate = False
             for seen in seen_content:
                 # Simple similarity check for duplicates
-                if (len(set(content_lower.split()) & set(seen.split())) /
-                    max(len(content_lower.split()), len(seen.split())) > 0.8):
                     is_duplicate = True
                     break
             if not is_duplicate:
                 optimized.append(result)
                 seen_content.add(content_lower)
@@ -159,10 +163,10 @@ class ContextManager:
     def _format_context(self, results: List[Dict[str, Any]]) -> str:
         """
         Format optimized results into context string.
         Args:
             results: Optimized search results
         Returns:
             Formatted context string
         """
@@ -170,34 +174,28 @@ class ContextManager:
             return "No relevant information found in corporate policies."
         context_parts = []
         for i, result in enumerate(results, 1):
             metadata = result.get("metadata", {})
             filename = metadata.get("filename", f"document_{i}")
             content = result.get("content", "").strip()
             # Format with document info
-            context_parts.append(
-                f"Document: {filename}\n"
-                f"Content: {content}"
-            )
         return "\n\n---\n\n".join(context_parts)
     def validate_context_quality(
-        self,
-        context: str,
-        query: str,
-        min_quality_score: float = 0.3
     ) -> Dict[str, Any]:
         """
         Validate the quality of prepared context for a given query.
         Args:
             context: Formatted context string
             query: Original user query
             min_quality_score: Minimum acceptable quality score
         Returns:
             Dictionary with quality metrics and validation result
         """
@@ -206,7 +204,7 @@ class ContextManager:
             "length": len(context),
             "has_content": bool(context.strip()),
             "estimated_relevance": 0.0,
-            "passes_validation": False
         }
         if not context.strip():
@@ -216,7 +214,7 @@ class ContextManager:
         # Estimate relevance based on query term overlap
         query_terms = set(query.lower().split())
         context_terms = set(context.lower().split())
         if query_terms and context_terms:
             overlap = len(query_terms & context_terms)
             relevance = overlap / len(query_terms)
@@ -230,36 +228,36 @@ class ContextManager:
     def get_source_summary(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
         """
         Generate summary of sources used in context.
         Args:
             results: Search results used for context
         Returns:
             Summary of sources and their contribution
         """
         sources = {}
         total_content_length = 0
         for result in results:
             metadata = result.get("metadata", {})
             filename = metadata.get("filename", "unknown")
             content_length = len(result.get("content", ""))
             similarity = result.get("similarity_score", 0.0)
             if filename not in sources:
                 sources[filename] = {
                     "chunks": 0,
                     "total_content_length": 0,
                     "max_similarity": 0.0,
-                    "avg_similarity": 0.0
                 }
             sources[filename]["chunks"] += 1
             sources[filename]["total_content_length"] += content_length
             sources[filename]["max_similarity"] = max(
                 sources[filename]["max_similarity"], similarity
             )
             total_content_length += content_length
         # Calculate averages and percentages
@@ -272,5 +270,5 @@ class ContextManager:
             "total_sources": len(sources),
             "total_chunks": len(results),
             "total_content_length": total_content_length,
-            "sources": sources
-        }

 """
 import logging
 from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
 logger = logging.getLogger(__name__)
 @dataclass
 class ContextConfig:
     """Configuration for context management."""
     max_context_length: int = 3000  # Maximum characters in context
     max_results: int = 5  # Maximum search results to include
     min_similarity: float = 0.1  # Minimum similarity threshold
 class ContextManager:
     """
     Manages context retrieval and optimization for RAG pipeline.
     Handles:
     - Context length management
     - Relevance filtering
     def __init__(self, config: Optional[ContextConfig] = None):
         """
         Initialize ContextManager with configuration.
         Args:
             config: Context configuration, uses defaults if None
         """
         logger.info("ContextManager initialized")
     def prepare_context(
+        self, search_results: List[Dict[str, Any]], query: str
     ) -> Tuple[str, List[Dict[str, Any]]]:
         """
         Prepare optimized context from search results.
         Args:
             search_results: Results from SearchService
             query: Original user query for context optimization
         Returns:
             Tuple of (formatted_context, filtered_results)
         """
         # Filter and rank results
         filtered_results = self._filter_results(search_results)
         # Remove duplicates and optimize for context window
         optimized_results = self._optimize_context(filtered_results)
         # Format for prompt
         formatted_context = self._format_context(optimized_results)
         logger.debug(
             f"Prepared context from {len(search_results)} results, "
             f"filtered to {len(optimized_results)} results, "
             f"{len(formatted_context)} characters"
         )
         return formatted_context, optimized_results
     def _filter_results(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Filter search results by relevance and quality.
         Args:
             results: Raw search results
         Returns:
             Filtered and sorted results
         """
         filtered = []
         for result in results:
             similarity = result.get("similarity_score", 0.0)
             content = result.get("content", "").strip()
             # Apply filters
+            if (
+                similarity >= self.config.min_similarity
+                and content
+                and len(content) > 20
+            ):  # Minimum content length
                 filtered.append(result)
         # Sort by similarity score (descending)
         filtered.sort(key=lambda x: x.get("similarity_score", 0.0), reverse=True)
         # Limit to max results
+        return filtered[: self.config.max_results]
     def _optimize_context(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Optimize context to fit within token limits while maximizing relevance.
         Args:
             results: Filtered search results
         Returns:
             Optimized results list
         """
         for result in results:
             content = result.get("content", "").strip()
             content_length = len(content)
             # Check if adding this result would exceed limit
             estimated_formatted_length = current_length + content_length + 100  # Buffer
             if estimated_formatted_length > self.config.max_context_length:
                     result_copy["content"] = truncated_content
                     optimized.append(result_copy)
                 break
             # Check for duplicate or highly similar content
             content_lower = content.lower()
             is_duplicate = False
             for seen in seen_content:
                 # Simple similarity check for duplicates
+                if (
+                    len(set(content_lower.split()) & set(seen.split()))
+                    / max(len(content_lower.split()), len(seen.split()))
+                    > 0.8
+                ):
                     is_duplicate = True
                     break
             if not is_duplicate:
                 optimized.append(result)
                 seen_content.add(content_lower)
     def _format_context(self, results: List[Dict[str, Any]]) -> str:
         """
         Format optimized results into context string.
         Args:
             results: Optimized search results
         Returns:
             Formatted context string
         """
             return "No relevant information found in corporate policies."
         context_parts = []
         for i, result in enumerate(results, 1):
             metadata = result.get("metadata", {})
             filename = metadata.get("filename", f"document_{i}")
             content = result.get("content", "").strip()
             # Format with document info
+            context_parts.append(f"Document: {filename}\n" f"Content: {content}")
         return "\n\n---\n\n".join(context_parts)
     def validate_context_quality(
+        self, context: str, query: str, min_quality_score: float = 0.3
     ) -> Dict[str, Any]:
         """
         Validate the quality of prepared context for a given query.
         Args:
             context: Formatted context string
             query: Original user query
             min_quality_score: Minimum acceptable quality score
         Returns:
             Dictionary with quality metrics and validation result
         """
             "length": len(context),
             "has_content": bool(context.strip()),
             "estimated_relevance": 0.0,
+            "passes_validation": False,
         }
         if not context.strip():
         # Estimate relevance based on query term overlap
         query_terms = set(query.lower().split())
         context_terms = set(context.lower().split())
         if query_terms and context_terms:
             overlap = len(query_terms & context_terms)
             relevance = overlap / len(query_terms)
     def get_source_summary(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
         """
         Generate summary of sources used in context.
         Args:
             results: Search results used for context
         Returns:
             Summary of sources and their contribution
         """
         sources = {}
         total_content_length = 0
         for result in results:
             metadata = result.get("metadata", {})
             filename = metadata.get("filename", "unknown")
             content_length = len(result.get("content", ""))
             similarity = result.get("similarity_score", 0.0)
             if filename not in sources:
                 sources[filename] = {
                     "chunks": 0,
                     "total_content_length": 0,
                     "max_similarity": 0.0,
+                    "avg_similarity": 0.0,
                 }
             sources[filename]["chunks"] += 1
             sources[filename]["total_content_length"] += content_length
             sources[filename]["max_similarity"] = max(
                 sources[filename]["max_similarity"], similarity
             )
             total_content_length += content_length
         # Calculate averages and percentages
             "total_sources": len(sources),
             "total_chunks": len(results),
             "total_content_length": total_content_length,
+            "sources": sources,
+        }

src/llm/llm_service.py CHANGED Viewed

@@ -1,16 +1,18 @@
 """
 LLM Service for RAG Application
-This module provides integration with Large Language Models through multiple providers
-including OpenRouter and Groq, with fallback capabilities and comprehensive error handling.
 """
 import logging
 import os
 import time
-from typing import Any, Dict, List, Optional, Union
-import requests
 from dataclasses import dataclass
 logger = logging.getLogger(__name__)
@@ -18,6 +20,7 @@ logger = logging.getLogger(__name__)
 @dataclass
 class LLMConfig:
     """Configuration for LLM providers."""
     provider: str  # "openrouter" or "groq"
     api_key: str
     model_name: str
@@ -30,6 +33,7 @@ class LLMConfig:
 @dataclass
 class LLMResponse:
     """Standardized response from LLM providers."""
     content: str
     provider: str
     model: str
@@ -42,7 +46,7 @@ class LLMResponse:
 class LLMService:
     """
     Service for interacting with Large Language Models.
     Supports multiple providers with automatic fallback and retry logic.
     Designed for corporate policy Q&A with appropriate guardrails.
     """
@@ -50,108 +54,112 @@ class LLMService:
     def __init__(self, configs: List[LLMConfig]):
         """
         Initialize LLMService with provider configurations.
         Args:
             configs: List of LLMConfig objects for different providers
         Raises:
             ValueError: If no valid configurations provided
         """
         if not configs:
             raise ValueError("At least one LLM configuration must be provided")
         self.configs = configs
         self.current_config_index = 0
         logger.info(f"LLMService initialized with {len(configs)} provider(s)")
     @classmethod
-    def from_environment(cls) -> 'LLMService':
         """
         Create LLMService instance from environment variables.
         Expected environment variables:
         - OPENROUTER_API_KEY: API key for OpenRouter
         - GROQ_API_KEY: API key for Groq
         Returns:
             LLMService instance with available providers
         Raises:
             ValueError: If no API keys found in environment
         """
         configs = []
         # OpenRouter configuration
         openrouter_key = os.getenv("OPENROUTER_API_KEY")
         if openrouter_key:
-            configs.append(LLMConfig(
-                provider="openrouter",
-                api_key=openrouter_key,
-                model_name="microsoft/wizardlm-2-8x22b",  # Free tier model
-                base_url="https://openrouter.ai/api/v1",
-                max_tokens=1000,
-                temperature=0.1
-            ))
-        # Groq configuration
         groq_key = os.getenv("GROQ_API_KEY")
         if groq_key:
-            configs.append(LLMConfig(
-                provider="groq",
-                api_key=groq_key,
-                model_name="llama3-8b-8192",  # Free tier model
-                base_url="https://api.groq.com/openai/v1",
-                max_tokens=1000,
-                temperature=0.1
-            ))
         if not configs:
             raise ValueError(
                 "No LLM API keys found in environment. "
                 "Please set OPENROUTER_API_KEY or GROQ_API_KEY"
             )
         return cls(configs)
-    def generate_response(
-        self,
-        prompt: str,
-        max_retries: int = 2
-    ) -> LLMResponse:
         """
         Generate response from LLM with fallback support.
         Args:
             prompt: Input prompt for the LLM
             max_retries: Maximum retry attempts per provider
         Returns:
             LLMResponse with generated content or error information
         """
         last_error = None
         # Try each provider configuration
         for attempt in range(len(self.configs)):
             config = self.configs[self.current_config_index]
             try:
                 logger.debug(f"Attempting generation with {config.provider}")
                 response = self._call_provider(config, prompt, max_retries)
                 if response.success:
-                    logger.info(f"Successfully generated response using {config.provider}")
                     return response
                 last_error = response.error_message
                 logger.warning(f"Provider {config.provider} failed: {last_error}")
             except Exception as e:
                 last_error = str(e)
                 logger.error(f"Error with provider {config.provider}: {last_error}")
             # Move to next provider
-            self.current_config_index = (self.current_config_index + 1) % len(self.configs)
         # All providers failed
         logger.error("All LLM providers failed")
         return LLMResponse(
@@ -161,83 +169,79 @@ class LLMService:
             usage={},
             response_time=0.0,
             success=False,
-            error_message=f"All providers failed. Last error: {last_error}"
         )
     def _call_provider(
-        self,
-        config: LLMConfig,
-        prompt: str,
-        max_retries: int
     ) -> LLMResponse:
         """
         Make API call to specific provider with retry logic.
         Args:
             config: Provider configuration
             prompt: Input prompt
             max_retries: Maximum retry attempts
         Returns:
             LLMResponse from the provider
         """
         start_time = time.time()
         for attempt in range(max_retries + 1):
             try:
                 headers = {
                     "Authorization": f"Bearer {config.api_key}",
-                    "Content-Type": "application/json"
                 }
                 # Add provider-specific headers
                 if config.provider == "openrouter":
-                    headers["HTTP-Referer"] = "https://github.com/sethmcknight/msse-ai-engineering"
                     headers["X-Title"] = "MSSE RAG Application"
                 payload = {
                     "model": config.model_name,
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": prompt
-                        }
-                    ],
                     "max_tokens": config.max_tokens,
-                    "temperature": config.temperature
                 }
                 response = requests.post(
                     f"{config.base_url}/chat/completions",
                     headers=headers,
                     json=payload,
-                    timeout=config.timeout
                 )
                 response.raise_for_status()
                 data = response.json()
                 # Extract response content
                 content = data["choices"][0]["message"]["content"]
                 usage = data.get("usage", {})
                 response_time = time.time() - start_time
                 return LLMResponse(
                     content=content,
                     provider=config.provider,
                     model=config.model_name,
                     usage=usage,
                     response_time=response_time,
-                    success=True
                 )
             except requests.exceptions.RequestException as e:
-                logger.warning(f"Request failed for {config.provider} (attempt {attempt + 1}): {e}")
                 if attempt < max_retries:
-                    time.sleep(2 ** attempt)  # Exponential backoff
                     continue
                 return LLMResponse(
                     content="",
                     provider=config.provider,
@@ -245,9 +249,9 @@ class LLMService:
                     usage={},
                     response_time=time.time() - start_time,
                     success=False,
-                    error_message=str(e)
                 )
             except Exception as e:
                 logger.error(f"Unexpected error with {config.provider}: {e}")
                 return LLMResponse(
@@ -257,44 +261,44 @@ class LLMService:
                     usage={},
                     response_time=time.time() - start_time,
                     success=False,
-                    error_message=str(e)
                 )
     def health_check(self) -> Dict[str, Any]:
         """
         Check health status of all configured providers.
         Returns:
             Dictionary with provider health status
         """
         health_status = {}
         for config in self.configs:
             try:
                 # Simple test prompt
                 test_response = self._call_provider(
-                    config,
-                    "Hello, this is a test. Please respond with 'OK'.",
-                    max_retries=1
                 )
                 health_status[config.provider] = {
                     "status": "healthy" if test_response.success else "unhealthy",
                     "model": config.model_name,
                     "response_time": test_response.response_time,
-                    "error": test_response.error_message
                 }
             except Exception as e:
                 health_status[config.provider] = {
                     "status": "unhealthy",
                     "model": config.model_name,
                     "response_time": 0.0,
-                    "error": str(e)
                 }
         return health_status
     def get_available_providers(self) -> List[str]:
         """Get list of available provider names."""
-        return [config.provider for config in self.configs]

 """
 LLM Service for RAG Application
+This module provides integration with Large Language Models through multiple
+providers including OpenRouter and Groq, with fallback capabilities and
+comprehensive error handling.
 """
 import logging
 import os
 import time
 from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+import requests
 logger = logging.getLogger(__name__)
 @dataclass
 class LLMConfig:
     """Configuration for LLM providers."""
     provider: str  # "openrouter" or "groq"
     api_key: str
     model_name: str
 @dataclass
 class LLMResponse:
     """Standardized response from LLM providers."""
     content: str
     provider: str
     model: str
 class LLMService:
     """
     Service for interacting with Large Language Models.
     Supports multiple providers with automatic fallback and retry logic.
     Designed for corporate policy Q&A with appropriate guardrails.
     """
     def __init__(self, configs: List[LLMConfig]):
         """
         Initialize LLMService with provider configurations.
         Args:
             configs: List of LLMConfig objects for different providers
         Raises:
             ValueError: If no valid configurations provided
         """
         if not configs:
             raise ValueError("At least one LLM configuration must be provided")
         self.configs = configs
         self.current_config_index = 0
         logger.info(f"LLMService initialized with {len(configs)} provider(s)")
     @classmethod
+    def from_environment(cls) -> "LLMService":
         """
         Create LLMService instance from environment variables.
         Expected environment variables:
         - OPENROUTER_API_KEY: API key for OpenRouter
         - GROQ_API_KEY: API key for Groq
         Returns:
             LLMService instance with available providers
         Raises:
             ValueError: If no API keys found in environment
         """
         configs = []
         # OpenRouter configuration
         openrouter_key = os.getenv("OPENROUTER_API_KEY")
         if openrouter_key:
+            configs.append(
+                LLMConfig(
+                    provider="openrouter",
+                    api_key=openrouter_key,
+                    model_name="microsoft/wizardlm-2-8x22b",  # Free tier model
+                    base_url="https://openrouter.ai/api/v1",
+                    max_tokens=1000,
+                    temperature=0.1,
+                )
+            )
+        # Groq configuration
         groq_key = os.getenv("GROQ_API_KEY")
         if groq_key:
+            configs.append(
+                LLMConfig(
+                    provider="groq",
+                    api_key=groq_key,
+                    model_name="llama3-8b-8192",  # Free tier model
+                    base_url="https://api.groq.com/openai/v1",
+                    max_tokens=1000,
+                    temperature=0.1,
+                )
+            )
         if not configs:
             raise ValueError(
                 "No LLM API keys found in environment. "
                 "Please set OPENROUTER_API_KEY or GROQ_API_KEY"
             )
         return cls(configs)
+    def generate_response(self, prompt: str, max_retries: int = 2) -> LLMResponse:
         """
         Generate response from LLM with fallback support.
         Args:
             prompt: Input prompt for the LLM
             max_retries: Maximum retry attempts per provider
         Returns:
             LLMResponse with generated content or error information
         """
         last_error = None
         # Try each provider configuration
         for attempt in range(len(self.configs)):
             config = self.configs[self.current_config_index]
             try:
                 logger.debug(f"Attempting generation with {config.provider}")
                 response = self._call_provider(config, prompt, max_retries)
                 if response.success:
+                    logger.info(
+                        f"Successfully generated response using {config.provider}"
+                    )
                     return response
                 last_error = response.error_message
                 logger.warning(f"Provider {config.provider} failed: {last_error}")
             except Exception as e:
                 last_error = str(e)
                 logger.error(f"Error with provider {config.provider}: {last_error}")
             # Move to next provider
+            self.current_config_index = (self.current_config_index + 1) % len(
+                self.configs
+            )
         # All providers failed
         logger.error("All LLM providers failed")
         return LLMResponse(
             usage={},
             response_time=0.0,
             success=False,
+            error_message=f"All providers failed. Last error: {last_error}",
         )
     def _call_provider(
+        self, config: LLMConfig, prompt: str, max_retries: int
     ) -> LLMResponse:
         """
         Make API call to specific provider with retry logic.
         Args:
             config: Provider configuration
             prompt: Input prompt
             max_retries: Maximum retry attempts
         Returns:
             LLMResponse from the provider
         """
         start_time = time.time()
         for attempt in range(max_retries + 1):
             try:
                 headers = {
                     "Authorization": f"Bearer {config.api_key}",
+                    "Content-Type": "application/json",
                 }
                 # Add provider-specific headers
                 if config.provider == "openrouter":
+                    headers["HTTP-Referer"] = (
+                        "https://github.com/sethmcknight/msse-ai-engineering"
+                    )
                     headers["X-Title"] = "MSSE RAG Application"
                 payload = {
                     "model": config.model_name,
+                    "messages": [{"role": "user", "content": prompt}],
                     "max_tokens": config.max_tokens,
+                    "temperature": config.temperature,
                 }
                 response = requests.post(
                     f"{config.base_url}/chat/completions",
                     headers=headers,
                     json=payload,
+                    timeout=config.timeout,
                 )
                 response.raise_for_status()
                 data = response.json()
                 # Extract response content
                 content = data["choices"][0]["message"]["content"]
                 usage = data.get("usage", {})
                 response_time = time.time() - start_time
                 return LLMResponse(
                     content=content,
                     provider=config.provider,
                     model=config.model_name,
                     usage=usage,
                     response_time=response_time,
+                    success=True,
                 )
             except requests.exceptions.RequestException as e:
+                logger.warning(
+                    f"Request failed for {config.provider} (attempt {attempt + 1}): {e}"
+                )
                 if attempt < max_retries:
+                    time.sleep(2**attempt)  # Exponential backoff
                     continue
                 return LLMResponse(
                     content="",
                     provider=config.provider,
                     usage={},
                     response_time=time.time() - start_time,
                     success=False,
+                    error_message=str(e),
                 )
             except Exception as e:
                 logger.error(f"Unexpected error with {config.provider}: {e}")
                 return LLMResponse(
                     usage={},
                     response_time=time.time() - start_time,
                     success=False,
+                    error_message=str(e),
                 )
     def health_check(self) -> Dict[str, Any]:
         """
         Check health status of all configured providers.
         Returns:
             Dictionary with provider health status
         """
         health_status = {}
         for config in self.configs:
             try:
                 # Simple test prompt
                 test_response = self._call_provider(
+                    config,
+                    "Hello, this is a test. Please respond with 'OK'.",
+                    max_retries=1,
                 )
                 health_status[config.provider] = {
                     "status": "healthy" if test_response.success else "unhealthy",
                     "model": config.model_name,
                     "response_time": test_response.response_time,
+                    "error": test_response.error_message,
                 }
             except Exception as e:
                 health_status[config.provider] = {
                     "status": "unhealthy",
                     "model": config.model_name,
                     "response_time": 0.0,
+                    "error": str(e),
                 }
         return health_status
     def get_available_providers(self) -> List[str]:
         """Get list of available provider names."""
+        return [config.provider for config in self.configs]

src/llm/prompt_templates.py CHANGED Viewed

@@ -1,17 +1,18 @@
 """
 Prompt Templates for Corporate Policy Q&A
-This module contains predefined prompt templates optimized for
 corporate policy question-answering with proper citation requirements.
 """
-from typing import Dict, List
 from dataclasses import dataclass
 @dataclass
 class PromptTemplate:
     """Template for generating prompts with context and citations."""
     system_prompt: str
     user_template: str
     citation_format: str
@@ -20,7 +21,7 @@ class PromptTemplate:
 class PromptTemplates:
     """
     Collection of prompt templates for different types of policy questions.
     Templates are designed to ensure:
     - Accurate responses based on provided context
     - Proper citation of source documents
@@ -29,15 +30,15 @@ class PromptTemplates:
     """
     # System prompt for corporate policy assistant
-    SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents.
 IMPORTANT GUIDELINES:
 1. Answer questions using ONLY the information provided in the context
-2. If the context doesn't contain enough information to answer the question, say so explicitly
 3. Always cite your sources using the format: [Source: filename.md]
 4. Be accurate, concise, and professional
-5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department
-6. Do not make assumptions or provide information not explicitly stated in the context
 Your responses should be helpful while staying strictly within the scope of the provided corporate policies."""
@@ -45,26 +46,26 @@ Your responses should be helpful while staying strictly within the scope of the
     def get_policy_qa_template(cls) -> PromptTemplate:
         """
         Get the standard template for policy question-answering.
         Returns:
             PromptTemplate configured for corporate policy Q&A
         """
         return PromptTemplate(
             system_prompt=cls.SYSTEM_PROMPT,
-            user_template="""Based on the following corporate policy documents, please answer this question: {question}
 CONTEXT DOCUMENTS:
 {context}
-Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""",
-            citation_format="[Source: {filename}]"
         )
     @classmethod
     def get_clarification_template(cls) -> PromptTemplate:
         """
         Get template for when clarification is needed.
         Returns:
             PromptTemplate for clarification requests
         """
@@ -75,19 +76,19 @@ Please provide a clear, accurate answer based on the information above. Include
 CONTEXT DOCUMENTS:
 {context}
-The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that:
 1. Acknowledges what information is available (if any)
 2. Clearly states what information is missing
 3. Suggests appropriate next steps (contact HR, check other resources, etc.)
 4. Cites any relevant sources using [Source: filename.md] format""",
-            citation_format="[Source: {filename}]"
         )
     @classmethod
     def get_off_topic_template(cls) -> PromptTemplate:
         """
         Get template for off-topic questions.
         Returns:
             PromptTemplate for redirecting off-topic questions
         """
@@ -95,122 +96,122 @@ The provided context documents don't contain sufficient information to fully ans
             system_prompt=cls.SYSTEM_PROMPT,
             user_template="""The user asked: {question}
-This question appears to be outside the scope of our corporate policies. Please provide a polite response that:
 1. Acknowledges the question
 2. Explains that this falls outside corporate policy documentation
 3. Suggests appropriate resources (HR, IT, management, etc.)
 4. Offers to help with any policy-related questions instead""",
-            citation_format=""
         )
     @staticmethod
     def format_context(search_results: List[Dict]) -> str:
         """
         Format search results into context for the prompt.
         Args:
             search_results: List of search results from SearchService
         Returns:
             Formatted context string for the prompt
         """
         if not search_results:
             return "No relevant policy documents found."
         context_parts = []
         for i, result in enumerate(search_results[:5], 1):  # Limit to top 5 results
             filename = result.get("metadata", {}).get("filename", "unknown")
             content = result.get("content", "").strip()
             similarity = result.get("similarity_score", 0.0)
             context_parts.append(
                 f"Document {i}: {filename} (relevance: {similarity:.2f})\n"
                 f"Content: {content}\n"
             )
         return "\n---\n".join(context_parts)
     @staticmethod
     def extract_citations(response: str) -> List[str]:
         """
         Extract citations from LLM response.
         Args:
             response: Generated response text
         Returns:
             List of extracted filenames from citations
         """
         import re
         # Pattern to match [Source: filename.md] format
-        citation_pattern = r'\[Source:\s*([^\]]+)\]'
         matches = re.findall(citation_pattern, response)
         # Clean up filenames
         citations = []
         for match in matches:
             filename = match.strip()
             if filename and filename not in citations:
                 citations.append(filename)
         return citations
     @staticmethod
-    def validate_citations(response: str, available_sources: List[str]) -> Dict[str, bool]:
         """
         Validate that all citations in response refer to available sources.
         Args:
             response: Generated response text
             available_sources: List of available source filenames
         Returns:
             Dictionary mapping citations to their validity
         """
         citations = PromptTemplates.extract_citations(response)
         validation = {}
         for citation in citations:
             # Check if citation matches any available source
-            valid = any(citation in source or source in citation
-                       for source in available_sources)
             validation[citation] = valid
         return validation
     @staticmethod
-    def add_fallback_citations(
-        response: str,
-        search_results: List[Dict]
-    ) -> str:
         """
         Add citations to response if none were provided by LLM.
         Args:
             response: Generated response text
             search_results: Original search results used for context
         Returns:
             Response with added citations if needed
         """
         existing_citations = PromptTemplates.extract_citations(response)
         if existing_citations:
             return response  # Already has citations
         if not search_results:
             return response  # No sources to cite
         # Add citations from top search results
         top_sources = []
         for result in search_results[:3]:  # Top 3 sources
             filename = result.get("metadata", {}).get("filename", "")
             if filename and filename not in top_sources:
                 top_sources.append(filename)
         if top_sources:
             citation_text = " [Sources: " + ", ".join(top_sources) + "]"
             return response + citation_text
-        return response

 """
 Prompt Templates for Corporate Policy Q&A
+This module contains predefined prompt templates optimized for
 corporate policy question-answering with proper citation requirements.
 """
 from dataclasses import dataclass
+from typing import Dict, List
 @dataclass
 class PromptTemplate:
     """Template for generating prompts with context and citations."""
     system_prompt: str
     user_template: str
     citation_format: str
 class PromptTemplates:
     """
     Collection of prompt templates for different types of policy questions.
     Templates are designed to ensure:
     - Accurate responses based on provided context
     - Proper citation of source documents
     """
     # System prompt for corporate policy assistant
+    SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents.  # noqa: E501
 IMPORTANT GUIDELINES:
 1. Answer questions using ONLY the information provided in the context
+2. If the context doesn't contain enough information to answer the question, say so explicitly  # noqa: E501
 3. Always cite your sources using the format: [Source: filename.md]
 4. Be accurate, concise, and professional
+5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department  # noqa: E501
+6. Do not make assumptions or provide information not explicitly stated in the context  # noqa: E501
 Your responses should be helpful while staying strictly within the scope of the provided corporate policies."""
     def get_policy_qa_template(cls) -> PromptTemplate:
         """
         Get the standard template for policy question-answering.
         Returns:
             PromptTemplate configured for corporate policy Q&A
         """
         return PromptTemplate(
             system_prompt=cls.SYSTEM_PROMPT,
+            user_template="""Based on the following corporate policy documents, please answer this question: {question}  # noqa: E501
 CONTEXT DOCUMENTS:
 {context}
+Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""",  # noqa: E501
+            citation_format="[Source: {filename}]",
         )
     @classmethod
     def get_clarification_template(cls) -> PromptTemplate:
         """
         Get template for when clarification is needed.
         Returns:
             PromptTemplate for clarification requests
         """
 CONTEXT DOCUMENTS:
 {context}
+The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that:  # noqa: E501
 1. Acknowledges what information is available (if any)
 2. Clearly states what information is missing
 3. Suggests appropriate next steps (contact HR, check other resources, etc.)
 4. Cites any relevant sources using [Source: filename.md] format""",
+            citation_format="[Source: {filename}]",
         )
     @classmethod
     def get_off_topic_template(cls) -> PromptTemplate:
         """
         Get template for off-topic questions.
         Returns:
             PromptTemplate for redirecting off-topic questions
         """
             system_prompt=cls.SYSTEM_PROMPT,
             user_template="""The user asked: {question}
+This question appears to be outside the scope of our corporate policies. Please provide a polite response that:  # noqa: E501
 1. Acknowledges the question
 2. Explains that this falls outside corporate policy documentation
 3. Suggests appropriate resources (HR, IT, management, etc.)
 4. Offers to help with any policy-related questions instead""",
+            citation_format="",
         )
     @staticmethod
     def format_context(search_results: List[Dict]) -> str:
         """
         Format search results into context for the prompt.
         Args:
             search_results: List of search results from SearchService
         Returns:
             Formatted context string for the prompt
         """
         if not search_results:
             return "No relevant policy documents found."
         context_parts = []
         for i, result in enumerate(search_results[:5], 1):  # Limit to top 5 results
             filename = result.get("metadata", {}).get("filename", "unknown")
             content = result.get("content", "").strip()
             similarity = result.get("similarity_score", 0.0)
             context_parts.append(
                 f"Document {i}: {filename} (relevance: {similarity:.2f})\n"
                 f"Content: {content}\n"
             )
         return "\n---\n".join(context_parts)
     @staticmethod
     def extract_citations(response: str) -> List[str]:
         """
         Extract citations from LLM response.
         Args:
             response: Generated response text
         Returns:
             List of extracted filenames from citations
         """
         import re
         # Pattern to match [Source: filename.md] format
+        citation_pattern = r"\[Source:\s*([^\]]+)\]"
         matches = re.findall(citation_pattern, response)
         # Clean up filenames
         citations = []
         for match in matches:
             filename = match.strip()
             if filename and filename not in citations:
                 citations.append(filename)
         return citations
     @staticmethod
+    def validate_citations(
+        response: str, available_sources: List[str]
+    ) -> Dict[str, bool]:
         """
         Validate that all citations in response refer to available sources.
         Args:
             response: Generated response text
             available_sources: List of available source filenames
         Returns:
             Dictionary mapping citations to their validity
         """
         citations = PromptTemplates.extract_citations(response)
         validation = {}
         for citation in citations:
             # Check if citation matches any available source
+            valid = any(
+                citation in source or source in citation for source in available_sources
+            )
             validation[citation] = valid
         return validation
     @staticmethod
+    def add_fallback_citations(response: str, search_results: List[Dict]) -> str:
         """
         Add citations to response if none were provided by LLM.
         Args:
             response: Generated response text
             search_results: Original search results used for context
         Returns:
             Response with added citations if needed
         """
         existing_citations = PromptTemplates.extract_citations(response)
         if existing_citations:
             return response  # Already has citations
         if not search_results:
             return response  # No sources to cite
         # Add citations from top search results
         top_sources = []
         for result in search_results[:3]:  # Top 3 sources
             filename = result.get("metadata", {}).get("filename", "")
             if filename and filename not in top_sources:
                 top_sources.append(filename)
         if top_sources:
             citation_text = " [Sources: " + ", ".join(top_sources) + "]"
             return response + citation_text
+        return response

src/rag/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ combining semantic search with LLM-based response generation.
 Classes:
     RAGPipeline: Main RAG orchestration service
     ResponseFormatter: Formats LLM responses with citations and metadata
-"""

 Classes:
     RAGPipeline: Main RAG orchestration service
     ResponseFormatter: Formats LLM responses with citations and metadata
+"""

src/rag/rag_pipeline.py CHANGED Viewed

@@ -7,14 +7,15 @@ combining semantic search, context management, and LLM generation.
 import logging
 import time
-from typing import Any, Dict, List, Optional
 from dataclasses import dataclass
 # Import our modules
 from src.search.search_service import SearchService
-from src.llm.llm_service import LLMService, LLMResponse
-from src.llm.context_manager import ContextManager, ContextConfig
-from src.llm.prompt_templates import PromptTemplates, PromptTemplate
 logger = logging.getLogger(__name__)
@@ -22,6 +23,7 @@ logger = logging.getLogger(__name__)
 @dataclass
 class RAGConfig:
     """Configuration for RAG pipeline."""
     max_context_length: int = 3000
     search_top_k: int = 10
     search_threshold: float = 0.1
@@ -33,6 +35,7 @@ class RAGConfig:
 @dataclass
 class RAGResponse:
     """Response from RAG pipeline with metadata."""
     answer: str
     sources: List[Dict[str, Any]]
     confidence: float
@@ -48,7 +51,7 @@ class RAGResponse:
 class RAGPipeline:
     """
     Complete RAG pipeline orchestrating retrieval and generation.
     Combines:
     - Semantic search for context retrieval
     - Context optimization and management
@@ -60,84 +63,84 @@ class RAGPipeline:
         self,
         search_service: SearchService,
         llm_service: LLMService,
-        config: Optional[RAGConfig] = None
     ):
         """
         Initialize RAG pipeline with required services.
         Args:
             search_service: Configured SearchService instance
-            llm_service: Configured LLMService instance
             config: RAG configuration, uses defaults if None
         """
         self.search_service = search_service
         self.llm_service = llm_service
         self.config = config or RAGConfig()
         # Initialize context manager with matching config
         context_config = ContextConfig(
             max_context_length=self.config.max_context_length,
             max_results=self.config.search_top_k,
-            min_similarity=self.config.search_threshold
         )
         self.context_manager = ContextManager(context_config)
         # Initialize prompt templates
         self.prompt_templates = PromptTemplates()
         logger.info("RAGPipeline initialized successfully")
     def generate_answer(self, question: str) -> RAGResponse:
         """
         Generate answer to question using RAG pipeline.
         Args:
             question: User's question about corporate policies
         Returns:
             RAGResponse with answer and metadata
         """
         start_time = time.time()
         try:
             # Step 1: Retrieve relevant context
             logger.debug(f"Starting RAG pipeline for question: {question[:100]}...")
             search_results = self._retrieve_context(question)
             if not search_results:
                 return self._create_no_context_response(question, start_time)
             # Step 2: Prepare and optimize context
             context, filtered_results = self.context_manager.prepare_context(
                 search_results, question
             )
             # Step 3: Check if we have sufficient context
             quality_metrics = self.context_manager.validate_context_quality(
                 context, question, self.config.min_similarity_for_answer
             )
             if not quality_metrics["passes_validation"]:
                 return self._create_insufficient_context_response(
                     question, filtered_results, start_time
                 )
             # Step 4: Generate response using LLM
             llm_response = self._generate_llm_response(question, context)
             if not llm_response.success:
                 return self._create_llm_error_response(
                     question, llm_response.error_message, start_time
                 )
             # Step 5: Process and validate response
             processed_response = self._process_response(
                 llm_response.content, filtered_results
             )
             processing_time = time.time() - start_time
             return RAGResponse(
                 answer=processed_response,
                 sources=self._format_sources(filtered_results),
@@ -147,13 +150,16 @@ class RAGPipeline:
                 llm_model=llm_response.model,
                 context_length=len(context),
                 search_results_count=len(search_results),
-                success=True
             )
         except Exception as e:
             logger.error(f"RAG pipeline error: {e}")
             return RAGResponse(
-                answer="I apologize, but I encountered an error processing your question. Please try again or contact support.",
                 sources=[],
                 confidence=0.0,
                 processing_time=time.time() - start_time,
@@ -162,7 +168,7 @@ class RAGPipeline:
                 context_length=0,
                 search_results_count=0,
                 success=False,
-                error_message=str(e)
             )
     def _retrieve_context(self, question: str) -> List[Dict[str, Any]]:
@@ -171,12 +177,12 @@ class RAGPipeline:
             results = self.search_service.search(
                 query=question,
                 top_k=self.config.search_top_k,
-                threshold=self.config.search_threshold
             )
             logger.debug(f"Retrieved {len(results)} search results")
             return results
         except Exception as e:
             logger.error(f"Context retrieval error: {e}")
             return []
@@ -184,95 +190,108 @@ class RAGPipeline:
     def _generate_llm_response(self, question: str, context: str) -> LLMResponse:
         """Generate response using LLM with formatted prompt."""
         template = self.prompt_templates.get_policy_qa_template()
         # Format the prompt
         formatted_prompt = template.user_template.format(
-            question=question,
-            context=context
         )
         # Add system prompt (if LLM service supports it in future)
         full_prompt = f"{template.system_prompt}\n\n{formatted_prompt}"
         return self.llm_service.generate_response(full_prompt)
     def _process_response(
-        self,
-        raw_response: str,
-        search_results: List[Dict[str, Any]]
     ) -> str:
         """Process and validate LLM response."""
         # Ensure citations are present
         response_with_citations = self.prompt_templates.add_fallback_citations(
             raw_response, search_results
         )
         # Validate citations if enabled
         if self.config.enable_citation_validation:
             available_sources = [
                 result.get("metadata", {}).get("filename", "")
                 for result in search_results
             ]
             citation_validation = self.prompt_templates.validate_citations(
                 response_with_citations, available_sources
             )
             # Log any invalid citations
             invalid_citations = [
-                citation for citation, valid in citation_validation.items()
-                if not valid
             ]
             if invalid_citations:
                 logger.warning(f"Invalid citations detected: {invalid_citations}")
         # Truncate if too long
         if len(response_with_citations) > self.config.max_response_length:
-            truncated = response_with_citations[:self.config.max_response_length - 3] + "..."
-            logger.warning(f"Response truncated from {len(response_with_citations)} to {len(truncated)} characters")
             return truncated
         return response_with_citations
-    def _format_sources(self, search_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Format search results for response metadata."""
         sources = []
         for result in search_results:
             metadata = result.get("metadata", {})
-            sources.append({
-                "document": metadata.get("filename", "unknown"),
-                "chunk_id": result.get("chunk_id", ""),
-                "relevance_score": result.get("similarity_score", 0.0),
-                "excerpt": result.get("content", "")[:200] + "..." if len(result.get("content", "")) > 200 else result.get("content", "")
-            })
         return sources
     def _calculate_confidence(
-        self,
-        quality_metrics: Dict[str, Any],
-        llm_response: LLMResponse
     ) -> float:
         """Calculate confidence score for the response."""
         # Base confidence on context quality
         context_confidence = quality_metrics.get("estimated_relevance", 0.0)
         # Adjust based on LLM response time (faster might indicate more confidence)
         time_factor = min(1.0, 10.0 / max(llm_response.response_time, 1.0))
         # Combine factors
         confidence = (context_confidence * 0.7) + (time_factor * 0.3)
         return min(1.0, max(0.0, confidence))
-    def _create_no_context_response(self, question: str, start_time: float) -> RAGResponse:
         """Create response when no relevant context found."""
         return RAGResponse(
-            answer="I couldn't find any relevant information in our corporate policies to answer your question. Please contact HR or check other company resources for assistance.",
             sources=[],
             confidence=0.0,
             processing_time=time.time() - start_time,
@@ -280,18 +299,19 @@ class RAGPipeline:
             llm_model="none",
             context_length=0,
             search_results_count=0,
-            success=True  # This is a valid "no answer" response
         )
     def _create_insufficient_context_response(
-        self,
-        question: str,
-        results: List[Dict[str, Any]],
-        start_time: float
     ) -> RAGResponse:
         """Create response when context quality is insufficient."""
         return RAGResponse(
-            answer="I found some potentially relevant information, but it doesn't provide enough detail to fully answer your question. Please contact HR for more specific guidance or rephrase your question.",
             sources=self._format_sources(results),
             confidence=0.2,
             processing_time=time.time() - start_time,
@@ -299,18 +319,18 @@ class RAGPipeline:
             llm_model="none",
             context_length=0,
             search_results_count=len(results),
-            success=True
         )
     def _create_llm_error_response(
-        self,
-        question: str,
-        error_message: str,
-        start_time: float
     ) -> RAGResponse:
         """Create response when LLM generation fails."""
         return RAGResponse(
-            answer="I apologize, but I'm currently unable to generate a response. Please try again in a moment or contact support if the issue persists.",
             sources=[],
             confidence=0.0,
             processing_time=time.time() - start_time,
@@ -319,54 +339,54 @@ class RAGPipeline:
             context_length=0,
             search_results_count=0,
             success=False,
-            error_message=error_message
         )
     def health_check(self) -> Dict[str, Any]:
         """
         Perform health check on all pipeline components.
         Returns:
             Dictionary with component health status
         """
-        health_status = {
-            "pipeline": "healthy",
-            "components": {}
-        }
         try:
             # Check search service
-            test_results = self.search_service.search("test query", top_k=1, threshold=0.0)
             health_status["components"]["search_service"] = {
                 "status": "healthy",
-                "test_results_count": len(test_results)
             }
         except Exception as e:
             health_status["components"]["search_service"] = {
                 "status": "unhealthy",
-                "error": str(e)
             }
             health_status["pipeline"] = "degraded"
         try:
             # Check LLM service
             llm_health = self.llm_service.health_check()
             health_status["components"]["llm_service"] = llm_health
             # Pipeline is unhealthy if all LLM providers are down
             healthy_providers = sum(
-                1 for provider_status in llm_health.values()
                 if provider_status.get("status") == "healthy"
             )
             if healthy_providers == 0:
                 health_status["pipeline"] = "unhealthy"
         except Exception as e:
             health_status["components"]["llm_service"] = {
-                "status": "unhealthy",
-                "error": str(e)
             }
             health_status["pipeline"] = "unhealthy"
-        return health_status

 import logging
 import time
 from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+from src.llm.context_manager import ContextConfig, ContextManager
+from src.llm.llm_service import LLMResponse, LLMService
+from src.llm.prompt_templates import PromptTemplates
 # Import our modules
 from src.search.search_service import SearchService
 logger = logging.getLogger(__name__)
 @dataclass
 class RAGConfig:
     """Configuration for RAG pipeline."""
     max_context_length: int = 3000
     search_top_k: int = 10
     search_threshold: float = 0.1
 @dataclass
 class RAGResponse:
     """Response from RAG pipeline with metadata."""
     answer: str
     sources: List[Dict[str, Any]]
     confidence: float
 class RAGPipeline:
     """
     Complete RAG pipeline orchestrating retrieval and generation.
     Combines:
     - Semantic search for context retrieval
     - Context optimization and management
         self,
         search_service: SearchService,
         llm_service: LLMService,
+        config: Optional[RAGConfig] = None,
     ):
         """
         Initialize RAG pipeline with required services.
         Args:
             search_service: Configured SearchService instance
+            llm_service: Configured LLMService instance
             config: RAG configuration, uses defaults if None
         """
         self.search_service = search_service
         self.llm_service = llm_service
         self.config = config or RAGConfig()
         # Initialize context manager with matching config
         context_config = ContextConfig(
             max_context_length=self.config.max_context_length,
             max_results=self.config.search_top_k,
+            min_similarity=self.config.search_threshold,
         )
         self.context_manager = ContextManager(context_config)
         # Initialize prompt templates
         self.prompt_templates = PromptTemplates()
         logger.info("RAGPipeline initialized successfully")
     def generate_answer(self, question: str) -> RAGResponse:
         """
         Generate answer to question using RAG pipeline.
         Args:
             question: User's question about corporate policies
         Returns:
             RAGResponse with answer and metadata
         """
         start_time = time.time()
         try:
             # Step 1: Retrieve relevant context
             logger.debug(f"Starting RAG pipeline for question: {question[:100]}...")
             search_results = self._retrieve_context(question)
             if not search_results:
                 return self._create_no_context_response(question, start_time)
             # Step 2: Prepare and optimize context
             context, filtered_results = self.context_manager.prepare_context(
                 search_results, question
             )
             # Step 3: Check if we have sufficient context
             quality_metrics = self.context_manager.validate_context_quality(
                 context, question, self.config.min_similarity_for_answer
             )
             if not quality_metrics["passes_validation"]:
                 return self._create_insufficient_context_response(
                     question, filtered_results, start_time
                 )
             # Step 4: Generate response using LLM
             llm_response = self._generate_llm_response(question, context)
             if not llm_response.success:
                 return self._create_llm_error_response(
                     question, llm_response.error_message, start_time
                 )
             # Step 5: Process and validate response
             processed_response = self._process_response(
                 llm_response.content, filtered_results
             )
             processing_time = time.time() - start_time
             return RAGResponse(
                 answer=processed_response,
                 sources=self._format_sources(filtered_results),
                 llm_model=llm_response.model,
                 context_length=len(context),
                 search_results_count=len(search_results),
+                success=True,
             )
         except Exception as e:
             logger.error(f"RAG pipeline error: {e}")
             return RAGResponse(
+                answer=(
+                    "I apologize, but I encountered an error processing your question. "
+                    "Please try again or contact support."
+                ),
                 sources=[],
                 confidence=0.0,
                 processing_time=time.time() - start_time,
                 context_length=0,
                 search_results_count=0,
                 success=False,
+                error_message=str(e),
             )
     def _retrieve_context(self, question: str) -> List[Dict[str, Any]]:
             results = self.search_service.search(
                 query=question,
                 top_k=self.config.search_top_k,
+                threshold=self.config.search_threshold,
             )
             logger.debug(f"Retrieved {len(results)} search results")
             return results
         except Exception as e:
             logger.error(f"Context retrieval error: {e}")
             return []
     def _generate_llm_response(self, question: str, context: str) -> LLMResponse:
         """Generate response using LLM with formatted prompt."""
         template = self.prompt_templates.get_policy_qa_template()
         # Format the prompt
         formatted_prompt = template.user_template.format(
+            question=question, context=context
         )
         # Add system prompt (if LLM service supports it in future)
         full_prompt = f"{template.system_prompt}\n\n{formatted_prompt}"
         return self.llm_service.generate_response(full_prompt)
     def _process_response(
+        self, raw_response: str, search_results: List[Dict[str, Any]]
     ) -> str:
         """Process and validate LLM response."""
         # Ensure citations are present
         response_with_citations = self.prompt_templates.add_fallback_citations(
             raw_response, search_results
         )
         # Validate citations if enabled
         if self.config.enable_citation_validation:
             available_sources = [
                 result.get("metadata", {}).get("filename", "")
                 for result in search_results
             ]
             citation_validation = self.prompt_templates.validate_citations(
                 response_with_citations, available_sources
             )
             # Log any invalid citations
             invalid_citations = [
+                citation for citation, valid in citation_validation.items() if not valid
             ]
             if invalid_citations:
                 logger.warning(f"Invalid citations detected: {invalid_citations}")
         # Truncate if too long
         if len(response_with_citations) > self.config.max_response_length:
+            truncated = (
+                response_with_citations[: self.config.max_response_length - 3] + "..."
+            )
+            logger.warning(
+                f"Response truncated from {len(response_with_citations)} "
+                f"to {len(truncated)} characters"
+            )
             return truncated
         return response_with_citations
+    def _format_sources(
+        self, search_results: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
         """Format search results for response metadata."""
         sources = []
         for result in search_results:
             metadata = result.get("metadata", {})
+            sources.append(
+                {
+                    "document": metadata.get("filename", "unknown"),
+                    "chunk_id": result.get("chunk_id", ""),
+                    "relevance_score": result.get("similarity_score", 0.0),
+                    "excerpt": (
+                        result.get("content", "")[:200] + "..."
+                        if len(result.get("content", "")) > 200
+                        else result.get("content", "")
+                    ),
+                }
+            )
         return sources
     def _calculate_confidence(
+        self, quality_metrics: Dict[str, Any], llm_response: LLMResponse
     ) -> float:
         """Calculate confidence score for the response."""
         # Base confidence on context quality
         context_confidence = quality_metrics.get("estimated_relevance", 0.0)
         # Adjust based on LLM response time (faster might indicate more confidence)
         time_factor = min(1.0, 10.0 / max(llm_response.response_time, 1.0))
         # Combine factors
         confidence = (context_confidence * 0.7) + (time_factor * 0.3)
         return min(1.0, max(0.0, confidence))
+    def _create_no_context_response(
+        self, question: str, start_time: float
+    ) -> RAGResponse:
         """Create response when no relevant context found."""
         return RAGResponse(
+            answer=(
+                "I couldn't find any relevant information in our corporate policies "
+                "to answer your question. Please contact HR or check other company "
+                "resources for assistance."
+            ),
             sources=[],
             confidence=0.0,
             processing_time=time.time() - start_time,
             llm_model="none",
             context_length=0,
             search_results_count=0,
+            success=True,  # This is a valid "no answer" response
         )
     def _create_insufficient_context_response(
+        self, question: str, results: List[Dict[str, Any]], start_time: float
     ) -> RAGResponse:
         """Create response when context quality is insufficient."""
         return RAGResponse(
+            answer=(
+                "I found some potentially relevant information, but it doesn't provide "
+                "enough detail to fully answer your question. Please contact HR for "
+                "more specific guidance or rephrase your question."
+            ),
             sources=self._format_sources(results),
             confidence=0.2,
             processing_time=time.time() - start_time,
             llm_model="none",
             context_length=0,
             search_results_count=len(results),
+            success=True,
         )
     def _create_llm_error_response(
+        self, question: str, error_message: str, start_time: float
     ) -> RAGResponse:
         """Create response when LLM generation fails."""
         return RAGResponse(
+            answer=(
+                "I apologize, but I'm currently unable to generate a response. "
+                "Please try again in a moment or contact support if the issue persists."
+            ),
             sources=[],
             confidence=0.0,
             processing_time=time.time() - start_time,
             context_length=0,
             search_results_count=0,
             success=False,
+            error_message=error_message,
         )
     def health_check(self) -> Dict[str, Any]:
         """
         Perform health check on all pipeline components.
         Returns:
             Dictionary with component health status
         """
+        health_status = {"pipeline": "healthy", "components": {}}
         try:
             # Check search service
+            test_results = self.search_service.search(
+                "test query", top_k=1, threshold=0.0
+            )
             health_status["components"]["search_service"] = {
                 "status": "healthy",
+                "test_results_count": len(test_results),
             }
         except Exception as e:
             health_status["components"]["search_service"] = {
                 "status": "unhealthy",
+                "error": str(e),
             }
             health_status["pipeline"] = "degraded"
         try:
             # Check LLM service
             llm_health = self.llm_service.health_check()
             health_status["components"]["llm_service"] = llm_health
             # Pipeline is unhealthy if all LLM providers are down
             healthy_providers = sum(
+                1
+                for provider_status in llm_health.values()
                 if provider_status.get("status") == "healthy"
             )
             if healthy_providers == 0:
                 health_status["pipeline"] = "unhealthy"
         except Exception as e:
             health_status["components"]["llm_service"] = {
+                "status": "unhealthy",
+                "error": str(e),
             }
             health_status["pipeline"] = "unhealthy"
+        return health_status

src/rag/response_formatter.py CHANGED Viewed

@@ -6,9 +6,8 @@ formatting, metadata inclusion, and consistent response structure.
 """
 import logging
 from typing import Any, Dict, List, Optional
-from dataclasses import dataclass, asdict
-import json
 logger = logging.getLogger(__name__)
@@ -16,6 +15,7 @@ logger = logging.getLogger(__name__)
 @dataclass
 class FormattedResponse:
     """Standardized formatted response for API endpoints."""
     status: str
     answer: str
     sources: List[Dict[str, Any]]
@@ -27,7 +27,7 @@ class FormattedResponse:
 class ResponseFormatter:
     """
     Formats RAG pipeline responses for various output formats.
     Handles:
     - API response formatting
     - Citation formatting
@@ -40,23 +40,21 @@ class ResponseFormatter:
         logger.info("ResponseFormatter initialized")
     def format_api_response(
-        self,
-        rag_response: Any,  # RAGResponse type
-        include_debug: bool = False
     ) -> Dict[str, Any]:
         """
         Format RAG response for API consumption.
         Args:
             rag_response: RAGResponse from RAG pipeline
             include_debug: Whether to include debug information
         Returns:
             Formatted dictionary for JSON API response
         """
         if not rag_response.success:
             return self._format_error_response(rag_response)
         # Base response structure
         formatted_response = {
             "status": "success",
@@ -66,88 +64,96 @@ class ResponseFormatter:
                 "confidence": round(rag_response.confidence, 3),
                 "processing_time_ms": round(rag_response.processing_time * 1000, 1),
                 "source_count": len(rag_response.sources),
-                "context_length": rag_response.context_length
-            }
         }
         # Add debug information if requested
         if include_debug:
             formatted_response["debug"] = {
                 "llm_provider": rag_response.llm_provider,
                 "llm_model": rag_response.llm_model,
                 "search_results_count": rag_response.search_results_count,
-                "processing_time_seconds": round(rag_response.processing_time, 3)
             }
         return formatted_response
     def format_chat_response(
         self,
         rag_response: Any,  # RAGResponse type
         conversation_id: Optional[str] = None,
-        include_sources: bool = True
     ) -> Dict[str, Any]:
         """
         Format RAG response for chat interface.
         Args:
             rag_response: RAGResponse from RAG pipeline
             conversation_id: Optional conversation ID
             include_sources: Whether to include source information
         Returns:
             Formatted dictionary for chat interface
         """
         if not rag_response.success:
             return self._format_chat_error(rag_response, conversation_id)
         response = {
             "message": rag_response.answer,
             "confidence": round(rag_response.confidence, 2),
-            "processing_time_ms": round(rag_response.processing_time * 1000, 1)
         }
         if conversation_id:
             response["conversation_id"] = conversation_id
         if include_sources and rag_response.sources:
             response["sources"] = self._format_sources_for_chat(rag_response.sources)
         return response
-    def _format_source_list(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Format source list for API response."""
         formatted_sources = []
         for source in sources:
             formatted_source = {
                 "document": source.get("document", "unknown"),
                 "relevance_score": round(source.get("relevance_score", 0.0), 3),
-                "excerpt": source.get("excerpt", "")
             }
             # Add chunk ID if available
             chunk_id = source.get("chunk_id", "")
             if chunk_id:
                 formatted_source["chunk_id"] = chunk_id
             formatted_sources.append(formatted_source)
         return formatted_sources
-    def _format_sources_for_chat(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Format sources for chat interface (more concise)."""
         formatted_sources = []
         for i, source in enumerate(sources[:3], 1):  # Limit to top 3 for chat
             formatted_source = {
                 "id": i,
                 "document": source.get("document", "unknown"),
                 "relevance": f"{source.get('relevance_score', 0.0):.1%}",
-                "preview": source.get("excerpt", "")[:100] + "..." if len(source.get("excerpt", "")) > 100 else source.get("excerpt", "")
             }
             formatted_sources.append(formatted_source)
         return formatted_sources
     def _format_error_response(self, rag_response: Any) -> Dict[str, Any]:
@@ -157,51 +163,45 @@ class ResponseFormatter:
             "error": {
                 "message": rag_response.answer,
                 "details": rag_response.error_message,
-                "processing_time_ms": round(rag_response.processing_time * 1000, 1)
             },
             "sources": [],
-            "metadata": {
-                "confidence": 0.0,
-                "source_count": 0,
-                "context_length": 0
-            }
         }
     def _format_chat_error(
-        self,
-        rag_response: Any,
-        conversation_id: Optional[str] = None
     ) -> Dict[str, Any]:
         """Format error response for chat interface."""
         response = {
             "message": rag_response.answer,
             "error": True,
-            "processing_time_ms": round(rag_response.processing_time * 1000, 1)
         }
         if conversation_id:
             response["conversation_id"] = conversation_id
         return response
     def validate_response_format(self, response: Dict[str, Any]) -> bool:
         """
         Validate that response follows expected format.
         Args:
             response: Formatted response dictionary
         Returns:
             True if format is valid, False otherwise
         """
         required_fields = ["status"]
         # Check required fields
         for field in required_fields:
             if field not in response:
                 logger.error(f"Missing required field: {field}")
                 return False
         # Check status-specific requirements
         if response["status"] == "success":
             success_fields = ["answer", "sources", "metadata"]
@@ -209,21 +209,21 @@ class ResponseFormatter:
                 if field not in response:
                     logger.error(f"Missing success field: {field}")
                     return False
         elif response["status"] == "error":
             if "error" not in response:
                 logger.error("Missing error field in error response")
                 return False
         return True
     def create_health_response(self, health_data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Format health check response.
         Args:
             health_data: Health status from RAG pipeline
         Returns:
             Formatted health response
         """
@@ -232,51 +232,65 @@ class ResponseFormatter:
             "health": {
                 "pipeline_status": health_data.get("pipeline", "unknown"),
                 "components": health_data.get("components", {}),
-                "timestamp": self._get_timestamp()
-            }
         }
-    def create_no_answer_response(self, question: str, reason: str = "no_context") -> Dict[str, Any]:
         """
         Create standardized response when no answer can be provided.
         Args:
             question: Original user question
             reason: Reason for no answer (no_context, insufficient_context, etc.)
         Returns:
             Formatted no-answer response
         """
         messages = {
-            "no_context": "I couldn't find any relevant information in our corporate policies to answer your question.",
-            "insufficient_context": "I found some potentially relevant information, but not enough to provide a complete answer.",
-            "off_topic": "This question appears to be outside the scope of our corporate policies.",
-            "error": "I encountered an error while processing your question."
         }
         message = messages.get(reason, messages["error"])
         return {
             "status": "no_answer",
             "message": message,
             "reason": reason,
-            "suggestion": "Please contact HR or rephrase your question for better results.",
-            "sources": []
         }
     def _get_timestamp(self) -> str:
         """Get current timestamp in ISO format."""
         from datetime import datetime
         return datetime.utcnow().isoformat() + "Z"
     def format_for_logging(self, rag_response: Any, question: str) -> Dict[str, Any]:
         """
         Format response data for logging purposes.
         Args:
             rag_response: RAGResponse from pipeline
             question: Original question
         Returns:
             Formatted data for logging
         """
@@ -291,5 +305,5 @@ class ResponseFormatter:
             "source_count": len(rag_response.sources),
             "context_length": rag_response.context_length,
             "answer_length": len(rag_response.answer),
-            "error": rag_response.error_message
-        }

 """
 import logging
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
 @dataclass
 class FormattedResponse:
     """Standardized formatted response for API endpoints."""
     status: str
     answer: str
     sources: List[Dict[str, Any]]
 class ResponseFormatter:
     """
     Formats RAG pipeline responses for various output formats.
     Handles:
     - API response formatting
     - Citation formatting
         logger.info("ResponseFormatter initialized")
     def format_api_response(
+        self, rag_response: Any, include_debug: bool = False  # RAGResponse type
     ) -> Dict[str, Any]:
         """
         Format RAG response for API consumption.
         Args:
             rag_response: RAGResponse from RAG pipeline
             include_debug: Whether to include debug information
         Returns:
             Formatted dictionary for JSON API response
         """
         if not rag_response.success:
             return self._format_error_response(rag_response)
         # Base response structure
         formatted_response = {
             "status": "success",
                 "confidence": round(rag_response.confidence, 3),
                 "processing_time_ms": round(rag_response.processing_time * 1000, 1),
                 "source_count": len(rag_response.sources),
+                "context_length": rag_response.context_length,
+            },
         }
         # Add debug information if requested
         if include_debug:
             formatted_response["debug"] = {
                 "llm_provider": rag_response.llm_provider,
                 "llm_model": rag_response.llm_model,
                 "search_results_count": rag_response.search_results_count,
+                "processing_time_seconds": round(rag_response.processing_time, 3),
             }
         return formatted_response
     def format_chat_response(
         self,
         rag_response: Any,  # RAGResponse type
         conversation_id: Optional[str] = None,
+        include_sources: bool = True,
     ) -> Dict[str, Any]:
         """
         Format RAG response for chat interface.
         Args:
             rag_response: RAGResponse from RAG pipeline
             conversation_id: Optional conversation ID
             include_sources: Whether to include source information
         Returns:
             Formatted dictionary for chat interface
         """
         if not rag_response.success:
             return self._format_chat_error(rag_response, conversation_id)
         response = {
             "message": rag_response.answer,
             "confidence": round(rag_response.confidence, 2),
+            "processing_time_ms": round(rag_response.processing_time * 1000, 1),
         }
         if conversation_id:
             response["conversation_id"] = conversation_id
         if include_sources and rag_response.sources:
             response["sources"] = self._format_sources_for_chat(rag_response.sources)
         return response
+    def _format_source_list(
+        self, sources: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
         """Format source list for API response."""
         formatted_sources = []
         for source in sources:
             formatted_source = {
                 "document": source.get("document", "unknown"),
                 "relevance_score": round(source.get("relevance_score", 0.0), 3),
+                "excerpt": source.get("excerpt", ""),
             }
             # Add chunk ID if available
             chunk_id = source.get("chunk_id", "")
             if chunk_id:
                 formatted_source["chunk_id"] = chunk_id
             formatted_sources.append(formatted_source)
         return formatted_sources
+    def _format_sources_for_chat(
+        self, sources: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
         """Format sources for chat interface (more concise)."""
         formatted_sources = []
         for i, source in enumerate(sources[:3], 1):  # Limit to top 3 for chat
             formatted_source = {
                 "id": i,
                 "document": source.get("document", "unknown"),
                 "relevance": f"{source.get('relevance_score', 0.0):.1%}",
+                "preview": (
+                    source.get("excerpt", "")[:100] + "..."
+                    if len(source.get("excerpt", "")) > 100
+                    else source.get("excerpt", "")
+                ),
             }
             formatted_sources.append(formatted_source)
         return formatted_sources
     def _format_error_response(self, rag_response: Any) -> Dict[str, Any]:
             "error": {
                 "message": rag_response.answer,
                 "details": rag_response.error_message,
+                "processing_time_ms": round(rag_response.processing_time * 1000, 1),
             },
             "sources": [],
+            "metadata": {"confidence": 0.0, "source_count": 0, "context_length": 0},
         }
     def _format_chat_error(
+        self, rag_response: Any, conversation_id: Optional[str] = None
     ) -> Dict[str, Any]:
         """Format error response for chat interface."""
         response = {
             "message": rag_response.answer,
             "error": True,
+            "processing_time_ms": round(rag_response.processing_time * 1000, 1),
         }
         if conversation_id:
             response["conversation_id"] = conversation_id
         return response
     def validate_response_format(self, response: Dict[str, Any]) -> bool:
         """
         Validate that response follows expected format.
         Args:
             response: Formatted response dictionary
         Returns:
             True if format is valid, False otherwise
         """
         required_fields = ["status"]
         # Check required fields
         for field in required_fields:
             if field not in response:
                 logger.error(f"Missing required field: {field}")
                 return False
         # Check status-specific requirements
         if response["status"] == "success":
             success_fields = ["answer", "sources", "metadata"]
                 if field not in response:
                     logger.error(f"Missing success field: {field}")
                     return False
         elif response["status"] == "error":
             if "error" not in response:
                 logger.error("Missing error field in error response")
                 return False
         return True
     def create_health_response(self, health_data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Format health check response.
         Args:
             health_data: Health status from RAG pipeline
         Returns:
             Formatted health response
         """
             "health": {
                 "pipeline_status": health_data.get("pipeline", "unknown"),
                 "components": health_data.get("components", {}),
+                "timestamp": self._get_timestamp(),
+            },
         }
+    def create_no_answer_response(
+        self, question: str, reason: str = "no_context"
+    ) -> Dict[str, Any]:
         """
         Create standardized response when no answer can be provided.
         Args:
             question: Original user question
             reason: Reason for no answer (no_context, insufficient_context, etc.)
         Returns:
             Formatted no-answer response
         """
         messages = {
+            "no_context": (
+                "I couldn't find any relevant information in our corporate "
+                "policies to answer your question."
+            ),
+            "insufficient_context": (
+                "I found some potentially relevant information, but not "
+                "enough to provide a complete answer."
+            ),
+            "off_topic": (
+                "This question appears to be outside the scope of our "
+                "corporate policies."
+            ),
+            "error": "I encountered an error while processing your question.",
         }
         message = messages.get(reason, messages["error"])
         return {
             "status": "no_answer",
             "message": message,
             "reason": reason,
+            "suggestion": (
+                "Please contact HR or rephrase your question for better results."
+            ),
+            "sources": [],
         }
     def _get_timestamp(self) -> str:
         """Get current timestamp in ISO format."""
         from datetime import datetime
         return datetime.utcnow().isoformat() + "Z"
     def format_for_logging(self, rag_response: Any, question: str) -> Dict[str, Any]:
         """
         Format response data for logging purposes.
         Args:
             rag_response: RAGResponse from pipeline
             question: Original question
         Returns:
             Formatted data for logging
         """
             "source_count": len(rag_response.sources),
             "context_length": rag_response.context_length,
             "answer_length": len(rag_response.answer),
+            "error": rag_response.error_message,
+        }

tests/test_chat_endpoint.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
 import os
 import pytest
-from unittest.mock import patch, MagicMock
 from app import app as flask_app
@@ -19,100 +20,122 @@ def client(app):
 class TestChatEndpoint:
     """Test cases for the /chat endpoint"""
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('app.RAGPipeline')
-    @patch('app.ResponseFormatter')
-    @patch('app.LLMService')
-    @patch('app.SearchService')
-    @patch('app.VectorDatabase')
-    @patch('app.EmbeddingService')
-    def test_chat_endpoint_valid_request(self, mock_embedding, mock_vector, mock_search, mock_llm, mock_formatter, mock_rag, client):
         """Test chat endpoint with valid request"""
         # Mock the RAG pipeline response
         mock_response = {
-            'answer': 'Based on the remote work policy, employees can work remotely up to 3 days per week.',
-            'confidence': 0.85,
-            'sources': [{'chunk_id': '123', 'content': 'Remote work policy content...'}],
-            'citations': ['remote_work_policy.md'],
-            'processing_time_ms': 1500
         }
         # Setup mock instances
         mock_rag_instance = MagicMock()
         mock_rag_instance.generate_answer.return_value = mock_response
         mock_rag.return_value = mock_rag_instance
         mock_formatter_instance = MagicMock()
         mock_formatter_instance.format_api_response.return_value = {
             "status": "success",
-            "answer": mock_response['answer'],
-            "confidence": mock_response['confidence'],
-            "sources": mock_response['sources'],
-            "citations": mock_response['citations']
         }
         mock_formatter.return_value = mock_formatter_instance
         # Mock LLMService.from_environment to return a mock instance
         mock_llm_instance = MagicMock()
         mock_llm.from_environment.return_value = mock_llm_instance
         request_data = {
             "message": "What is the remote work policy?",
-            "include_sources": True
         }
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 200
         data = response.get_json()
         assert data["status"] == "success"
         assert "answer" in data
         assert "confidence" in data
         assert "sources" in data
         assert "citations" in data
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('app.RAGPipeline')
-    @patch('app.ResponseFormatter')
-    @patch('app.LLMService')
-    @patch('app.SearchService')
-    @patch('app.VectorDatabase')
-    @patch('app.EmbeddingService')
-    def test_chat_endpoint_minimal_request(self, mock_embedding, mock_vector, mock_search, mock_llm, mock_formatter, mock_rag, client):
         """Test chat endpoint with minimal request (only message)"""
         mock_response = {
-            'answer': 'Employee benefits include health insurance, retirement plans, and PTO.',
-            'confidence': 0.78,
-            'sources': [],
-            'citations': ['employee_benefits_guide.md'],
-            'processing_time_ms': 1200
         }
         # Setup mock instances
         mock_rag_instance = MagicMock()
         mock_rag_instance.generate_answer.return_value = mock_response
         mock_rag.return_value = mock_rag_instance
         mock_formatter_instance = MagicMock()
         mock_formatter_instance.format_api_response.return_value = {
             "status": "success",
-            "answer": mock_response['answer']
         }
         mock_formatter.return_value = mock_formatter_instance
         mock_llm.from_environment.return_value = MagicMock()
         request_data = {"message": "What are the employee benefits?"}
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 200
@@ -124,9 +147,7 @@ class TestChatEndpoint:
         request_data = {"include_sources": True}
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 400
@@ -139,9 +160,7 @@ class TestChatEndpoint:
         request_data = {"message": ""}
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 400
@@ -154,9 +173,7 @@ class TestChatEndpoint:
         request_data = {"message": 123}
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 400
@@ -179,9 +196,7 @@ class TestChatEndpoint:
             request_data = {"message": "What is the policy?"}
             response = client.post(
-                "/chat",
-                data=json.dumps(request_data),
-                content_type="application/json"
             )
             assert response.status_code == 503
@@ -189,67 +204,110 @@ class TestChatEndpoint:
             assert data["status"] == "error"
             assert "LLM service configuration error" in data["message"]
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('app.RAGPipeline')
-    @patch('app.ResponseFormatter')
-    @patch('app.LLMService')
-    @patch('app.SearchService')
-    @patch('app.VectorDatabase')
-    @patch('app.EmbeddingService')
-    def test_chat_endpoint_with_conversation_id(self, mock_embedding, mock_vector, mock_search, mock_llm, mock_formatter, mock_rag, client):
         """Test chat endpoint with conversation_id parameter"""
         mock_response = {
-            'answer': 'The PTO policy allows 15 days of vacation annually.',
-            'confidence': 0.9,
-            'sources': [],
-            'citations': ['pto_policy.md'],
-            'processing_time_ms': 1100
         }
-        mock_generate.return_value = mock_response
-        mock_llm_service.return_value = MagicMock()
         request_data = {
             "message": "What is the PTO policy?",
             "conversation_id": "conv_123",
-            "include_sources": False
         }
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 200
         data = response.get_json()
         assert data["status"] == "success"
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('src.llm.llm_service.LLMService.from_environment')
-    @patch('src.rag.rag_pipeline.RAGPipeline.generate_answer')
-    def test_chat_endpoint_with_debug(self, mock_generate, mock_llm_service, client):
         """Test chat endpoint with debug information"""
         mock_response = {
-            'answer': 'The security policy requires 2FA authentication.',
-            'confidence': 0.95,
-            'sources': [{'chunk_id': '456', 'content': 'Security requirements...'}],
-            'citations': ['information_security_policy.md'],
-            'processing_time_ms': 1800,
-            'search_results_count': 5,
-            'context_length': 2048
         }
-        mock_generate.return_value = mock_response
-        mock_llm_service.return_value = MagicMock()
         request_data = {
             "message": "What are the security requirements?",
-            "include_debug": True
         }
         response = client.post(
-            "/chat",
-            data=json.dumps(request_data),
-            content_type="application/json"
         )
         assert response.status_code == 200
@@ -260,9 +318,9 @@ class TestChatEndpoint:
 class TestChatHealthEndpoint:
     """Test cases for the /chat/health endpoint"""
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('src.llm.llm_service.LLMService.from_environment')
-    @patch('src.rag.rag_pipeline.RAGPipeline.health_check')
     def test_chat_health_healthy(self, mock_health_check, mock_llm_service, client):
         """Test chat health endpoint when all services are healthy"""
         mock_health_data = {
@@ -270,8 +328,8 @@ class TestChatHealthEndpoint:
             "components": {
                 "search_service": {"status": "healthy"},
                 "llm_service": {"status": "healthy"},
-                "vector_db": {"status": "healthy"}
-            }
         }
         mock_health_check.return_value = mock_health_data
         mock_llm_service.return_value = MagicMock()
@@ -282,9 +340,9 @@ class TestChatHealthEndpoint:
         data = response.get_json()
         assert data["status"] == "success"
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('src.llm.llm_service.LLMService.from_environment')
-    @patch('src.rag.rag_pipeline.RAGPipeline.health_check')
     def test_chat_health_degraded(self, mock_health_check, mock_llm_service, client):
         """Test chat health endpoint when services are degraded"""
         mock_health_data = {
@@ -292,8 +350,8 @@ class TestChatHealthEndpoint:
             "components": {
                 "search_service": {"status": "healthy"},
                 "llm_service": {"status": "degraded", "warning": "High latency"},
-                "vector_db": {"status": "healthy"}
-            }
         }
         mock_health_check.return_value = mock_health_data
         mock_llm_service.return_value = MagicMock()
@@ -314,18 +372,21 @@ class TestChatHealthEndpoint:
             assert data["status"] == "error"
             assert "LLM configuration error" in data["message"]
-    @patch.dict(os.environ, {'OPENROUTER_API_KEY': 'test_key'})
-    @patch('src.llm.llm_service.LLMService.from_environment')
-    @patch('src.rag.rag_pipeline.RAGPipeline.health_check')
     def test_chat_health_unhealthy(self, mock_health_check, mock_llm_service, client):
         """Test chat health endpoint when services are unhealthy"""
         mock_health_data = {
             "pipeline": "unhealthy",
             "components": {
-                "search_service": {"status": "unhealthy", "error": "Database connection failed"},
                 "llm_service": {"status": "unhealthy", "error": "API unreachable"},
-                "vector_db": {"status": "unhealthy"}
-            }
         }
         mock_health_check.return_value = mock_health_data
         mock_llm_service.return_value = MagicMock()
@@ -334,4 +395,4 @@ class TestChatHealthEndpoint:
         assert response.status_code == 503
         data = response.get_json()
-        assert data["status"] == "success"  # Still returns success, but 503 status code

 import json
 import os
+from unittest.mock import MagicMock, patch
 import pytest
 from app import app as flask_app
 class TestChatEndpoint:
     """Test cases for the /chat endpoint"""
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("app.RAGPipeline")
+    @patch("app.ResponseFormatter")
+    @patch("app.LLMService")
+    @patch("app.SearchService")
+    @patch("app.VectorDatabase")
+    @patch("app.EmbeddingService")
+    def test_chat_endpoint_valid_request(
+        self,
+        mock_embedding,
+        mock_vector,
+        mock_search,
+        mock_llm,
+        mock_formatter,
+        mock_rag,
+        client,
+    ):
         """Test chat endpoint with valid request"""
         # Mock the RAG pipeline response
         mock_response = {
+            "answer": (
+                "Based on the remote work policy, employees can work "
+                "remotely up to 3 days per week."
+            ),
+            "confidence": 0.85,
+            "sources": [
+                {"chunk_id": "123", "content": "Remote work policy content..."}
+            ],
+            "citations": ["remote_work_policy.md"],
+            "processing_time_ms": 1500,
         }
         # Setup mock instances
         mock_rag_instance = MagicMock()
         mock_rag_instance.generate_answer.return_value = mock_response
         mock_rag.return_value = mock_rag_instance
         mock_formatter_instance = MagicMock()
         mock_formatter_instance.format_api_response.return_value = {
             "status": "success",
+            "answer": mock_response["answer"],
+            "confidence": mock_response["confidence"],
+            "sources": mock_response["sources"],
+            "citations": mock_response["citations"],
         }
         mock_formatter.return_value = mock_formatter_instance
         # Mock LLMService.from_environment to return a mock instance
         mock_llm_instance = MagicMock()
         mock_llm.from_environment.return_value = mock_llm_instance
         request_data = {
             "message": "What is the remote work policy?",
+            "include_sources": True,
         }
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 200
         data = response.get_json()
         assert data["status"] == "success"
         assert "answer" in data
         assert "confidence" in data
         assert "sources" in data
         assert "citations" in data
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("app.RAGPipeline")
+    @patch("app.ResponseFormatter")
+    @patch("app.LLMService")
+    @patch("app.SearchService")
+    @patch("app.VectorDatabase")
+    @patch("app.EmbeddingService")
+    def test_chat_endpoint_minimal_request(
+        self,
+        mock_embedding,
+        mock_vector,
+        mock_search,
+        mock_llm,
+        mock_formatter,
+        mock_rag,
+        client,
+    ):
         """Test chat endpoint with minimal request (only message)"""
         mock_response = {
+            "answer": (
+                "Employee benefits include health insurance, "
+                "retirement plans, and PTO."
+            ),
+            "confidence": 0.78,
+            "sources": [],
+            "citations": ["employee_benefits_guide.md"],
+            "processing_time_ms": 1200,
         }
         # Setup mock instances
         mock_rag_instance = MagicMock()
         mock_rag_instance.generate_answer.return_value = mock_response
         mock_rag.return_value = mock_rag_instance
         mock_formatter_instance = MagicMock()
         mock_formatter_instance.format_api_response.return_value = {
             "status": "success",
+            "answer": mock_response["answer"],
         }
         mock_formatter.return_value = mock_formatter_instance
         mock_llm.from_environment.return_value = MagicMock()
         request_data = {"message": "What are the employee benefits?"}
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 200
         request_data = {"include_sources": True}
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 400
         request_data = {"message": ""}
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 400
         request_data = {"message": 123}
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 400
             request_data = {"message": "What is the policy?"}
             response = client.post(
+                "/chat", data=json.dumps(request_data), content_type="application/json"
             )
             assert response.status_code == 503
             assert data["status"] == "error"
             assert "LLM service configuration error" in data["message"]
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("app.RAGPipeline")
+    @patch("app.ResponseFormatter")
+    @patch("app.LLMService")
+    @patch("app.SearchService")
+    @patch("app.VectorDatabase")
+    @patch("app.EmbeddingService")
+    def test_chat_endpoint_with_conversation_id(
+        self,
+        mock_embedding,
+        mock_vector,
+        mock_search,
+        mock_llm,
+        mock_formatter,
+        mock_rag,
+        client,
+    ):
         """Test chat endpoint with conversation_id parameter"""
         mock_response = {
+            "answer": "The PTO policy allows 15 days of vacation annually.",
+            "confidence": 0.9,
+            "sources": [],
+            "citations": ["pto_policy.md"],
+            "processing_time_ms": 1100,
         }
+        # Setup mock instances
+        mock_rag_instance = MagicMock()
+        mock_rag_instance.generate_answer.return_value = mock_response
+        mock_rag.return_value = mock_rag_instance
+        mock_formatter_instance = MagicMock()
+        mock_formatter_instance.format_chat_response.return_value = {
+            "status": "success",
+            "answer": mock_response["answer"],
+        }
+        mock_formatter.return_value = mock_formatter_instance
+        mock_llm.from_environment.return_value = MagicMock()
         request_data = {
             "message": "What is the PTO policy?",
             "conversation_id": "conv_123",
+            "include_sources": False,
         }
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 200
         data = response.get_json()
         assert data["status"] == "success"
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("app.RAGPipeline")
+    @patch("app.ResponseFormatter")
+    @patch("app.LLMService")
+    @patch("app.SearchService")
+    @patch("app.VectorDatabase")
+    @patch("app.EmbeddingService")
+    def test_chat_endpoint_with_debug(
+        self,
+        mock_embedding,
+        mock_vector,
+        mock_search,
+        mock_llm,
+        mock_formatter,
+        mock_rag,
+        client,
+    ):
         """Test chat endpoint with debug information"""
         mock_response = {
+            "answer": "The security policy requires 2FA authentication.",
+            "confidence": 0.95,
+            "sources": [{"chunk_id": "456", "content": "Security requirements..."}],
+            "citations": ["information_security_policy.md"],
+            "processing_time_ms": 1800,
+            "search_results_count": 5,
+            "context_length": 2048,
         }
+        # Setup mock instances
+        mock_rag_instance = MagicMock()
+        mock_rag_instance.generate_answer.return_value = mock_response
+        mock_rag.return_value = mock_rag_instance
+        mock_formatter_instance = MagicMock()
+        mock_formatter_instance.format_api_response.return_value = {
+            "status": "success",
+            "answer": mock_response["answer"],
+            "debug": {"processing_time": 1800},
+        }
+        mock_formatter.return_value = mock_formatter_instance
+        mock_llm.from_environment.return_value = MagicMock()
         request_data = {
             "message": "What are the security requirements?",
+            "include_debug": True,
         }
         response = client.post(
+            "/chat", data=json.dumps(request_data), content_type="application/json"
         )
         assert response.status_code == 200
 class TestChatHealthEndpoint:
     """Test cases for the /chat/health endpoint"""
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("src.llm.llm_service.LLMService.from_environment")
+    @patch("src.rag.rag_pipeline.RAGPipeline.health_check")
     def test_chat_health_healthy(self, mock_health_check, mock_llm_service, client):
         """Test chat health endpoint when all services are healthy"""
         mock_health_data = {
             "components": {
                 "search_service": {"status": "healthy"},
                 "llm_service": {"status": "healthy"},
+                "vector_db": {"status": "healthy"},
+            },
         }
         mock_health_check.return_value = mock_health_data
         mock_llm_service.return_value = MagicMock()
         data = response.get_json()
         assert data["status"] == "success"
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("src.llm.llm_service.LLMService.from_environment")
+    @patch("src.rag.rag_pipeline.RAGPipeline.health_check")
     def test_chat_health_degraded(self, mock_health_check, mock_llm_service, client):
         """Test chat health endpoint when services are degraded"""
         mock_health_data = {
             "components": {
                 "search_service": {"status": "healthy"},
                 "llm_service": {"status": "degraded", "warning": "High latency"},
+                "vector_db": {"status": "healthy"},
+            },
         }
         mock_health_check.return_value = mock_health_data
         mock_llm_service.return_value = MagicMock()
             assert data["status"] == "error"
             assert "LLM configuration error" in data["message"]
+    @patch.dict(os.environ, {"OPENROUTER_API_KEY": "test_key"})
+    @patch("src.llm.llm_service.LLMService.from_environment")
+    @patch("src.rag.rag_pipeline.RAGPipeline.health_check")
     def test_chat_health_unhealthy(self, mock_health_check, mock_llm_service, client):
         """Test chat health endpoint when services are unhealthy"""
         mock_health_data = {
             "pipeline": "unhealthy",
             "components": {
+                "search_service": {
+                    "status": "unhealthy",
+                    "error": "Database connection failed",
+                },
                 "llm_service": {"status": "unhealthy", "error": "API unreachable"},
+                "vector_db": {"status": "unhealthy"},
+            },
         }
         mock_health_check.return_value = mock_health_data
         mock_llm_service.return_value = MagicMock()
         assert response.status_code == 503
         data = response.get_json()
+        assert data["status"] == "success"  # Still returns success, but 503 status code

tests/test_llm/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- # LLM Service Tests


1	+ # LLM Service Tests

tests/test_llm/test_llm_service.py CHANGED Viewed

@@ -4,10 +4,12 @@ Test LLM Service
 Tests for LLM integration and service functionality.
 """
 import pytest
-from unittest.mock import Mock, patch, MagicMock
 import requests
-from src.llm.llm_service import LLMService, LLMConfig, LLMResponse
 class TestLLMConfig:
@@ -19,9 +21,9 @@ class TestLLMConfig:
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://test.com"
         )
         assert config.provider == "openrouter"
         assert config.api_key == "test-key"
         assert config.model_name == "test-model"
@@ -41,9 +43,9 @@ class TestLLMResponse:
             model="test-model",
             usage={"tokens": 100},
             response_time=1.5,
-            success=True
         )
         assert response.content == "Test response"
         assert response.provider == "openrouter"
         assert response.model == "test-model"
@@ -62,84 +64,83 @@ class TestLLMService:
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://test.com"
         )
         service = LLMService([config])
         assert len(service.configs) == 1
         assert service.configs[0] == config
         assert service.current_config_index == 0
     def test_initialization_empty_configs_raises_error(self):
         """Test that empty configs raise ValueError."""
-        with pytest.raises(ValueError, match="At least one LLM configuration must be provided"):
             LLMService([])
-    @patch.dict('os.environ', {'OPENROUTER_API_KEY': 'test-openrouter-key'})
     def test_from_environment_with_openrouter_key(self):
         """Test creating service from environment with OpenRouter key."""
         service = LLMService.from_environment()
         assert len(service.configs) >= 1
         openrouter_config = next(
             (config for config in service.configs if config.provider == "openrouter"),
-            None
         )
         assert openrouter_config is not None
         assert openrouter_config.api_key == "test-openrouter-key"
-    @patch.dict('os.environ', {'GROQ_API_KEY': 'test-groq-key'})
     def test_from_environment_with_groq_key(self):
         """Test creating service from environment with Groq key."""
         service = LLMService.from_environment()
         assert len(service.configs) >= 1
         groq_config = next(
-            (config for config in service.configs if config.provider == "groq"),
-            None
         )
         assert groq_config is not None
         assert groq_config.api_key == "test-groq-key"
-    @patch.dict('os.environ', {}, clear=True)
     def test_from_environment_no_keys_raises_error(self):
         """Test that no environment keys raise ValueError."""
         with pytest.raises(ValueError, match="No LLM API keys found in environment"):
             LLMService.from_environment()
-    @patch('requests.post')
     def test_successful_response_generation(self, mock_post):
         """Test successful response generation."""
         # Mock successful API response
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {
-            "choices": [
-                {"message": {"content": "Test response content"}}
-            ],
-            "usage": {"prompt_tokens": 50, "completion_tokens": 20}
         }
         mock_response.raise_for_status = Mock()
         mock_post.return_value = mock_response
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://api.openrouter.ai/api/v1"
         )
         service = LLMService([config])
         result = service.generate_response("Test prompt")
         assert result.success is True
         assert result.content == "Test response content"
         assert result.provider == "openrouter"
         assert result.model == "test-model"
         assert result.usage == {"prompt_tokens": 50, "completion_tokens": 20}
         assert result.response_time > 0
         # Verify API call
         mock_post.assert_called_once()
         args, kwargs = mock_post.call_args
@@ -147,125 +148,139 @@ class TestLLMService:
         assert kwargs["json"]["model"] == "test-model"
         assert kwargs["json"]["messages"][0]["content"] == "Test prompt"
-    @patch('requests.post')
     def test_api_error_handling(self, mock_post):
         """Test handling of API errors."""
         # Mock API error
         mock_post.side_effect = requests.exceptions.RequestException("API Error")
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://api.openrouter.ai/api/v1"
         )
         service = LLMService([config])
         result = service.generate_response("Test prompt")
         assert result.success is False
         assert "API Error" in result.error_message
         assert result.content == ""
         assert result.provider == "openrouter"
-    @patch('requests.post')
     def test_fallback_to_second_provider(self, mock_post):
         """Test fallback to second provider when first fails."""
         # Mock first provider failing, second succeeding
         first_call = Mock()
-        first_call.side_effect = requests.exceptions.RequestException("First provider error")
         second_call = Mock()
         second_response = Mock()
         second_response.status_code = 200
         second_response.json.return_value = {
             "choices": [{"message": {"content": "Second provider response"}}],
-            "usage": {}
         }
         second_response.raise_for_status = Mock()
         second_call.return_value = second_response
         mock_post.side_effect = [first_call.side_effect, second_response]
         config1 = LLMConfig(
             provider="openrouter",
             api_key="key1",
             model_name="model1",
-            base_url="https://api1.com"
         )
         config2 = LLMConfig(
             provider="groq",
             api_key="key2",
             model_name="model2",
-            base_url="https://api2.com"
         )
         service = LLMService([config1, config2])
         result = service.generate_response("Test prompt")
         assert result.success is True
         assert result.content == "Second provider response"
         assert result.provider == "groq"
         assert mock_post.call_count == 2
-    @patch('requests.post')
     def test_all_providers_fail(self, mock_post):
         """Test when all providers fail."""
-        mock_post.side_effect = requests.exceptions.RequestException("All providers down")
-        config1 = LLMConfig(provider="provider1", api_key="key1", model_name="model1", base_url="url1")
-        config2 = LLMConfig(provider="provider2", api_key="key2", model_name="model2", base_url="url2")
         service = LLMService([config1, config2])
         result = service.generate_response("Test prompt")
         assert result.success is False
         assert "All providers failed" in result.error_message
         assert result.provider == "none"
         assert result.model == "none"
-    @patch('requests.post')
     def test_retry_logic(self, mock_post):
         """Test retry logic for failed requests."""
         # First call fails, second succeeds
         first_response = Mock()
-        first_response.side_effect = requests.exceptions.RequestException("Temporary error")
         second_response = Mock()
         second_response.status_code = 200
         second_response.json.return_value = {
             "choices": [{"message": {"content": "Success after retry"}}],
-            "usage": {}
         }
         second_response.raise_for_status = Mock()
         mock_post.side_effect = [first_response.side_effect, second_response]
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://api.openrouter.ai/api/v1"
         )
         service = LLMService([config])
         result = service.generate_response("Test prompt", max_retries=1)
         assert result.success is True
         assert result.content == "Success after retry"
         assert mock_post.call_count == 2
     def test_get_available_providers(self):
         """Test getting list of available providers."""
-        config1 = LLMConfig(provider="openrouter", api_key="key1", model_name="model1", base_url="url1")
-        config2 = LLMConfig(provider="groq", api_key="key2", model_name="model2", base_url="url2")
         service = LLMService([config1, config2])
         providers = service.get_available_providers()
         assert providers == ["openrouter", "groq"]
-    @patch('requests.post')
     def test_health_check(self, mock_post):
         """Test health check functionality."""
         # Mock successful health check
@@ -273,51 +288,54 @@ class TestLLMService:
         mock_response.status_code = 200
         mock_response.json.return_value = {
             "choices": [{"message": {"content": "OK"}}],
-            "usage": {}
         }
         mock_response.raise_for_status = Mock()
         mock_post.return_value = mock_response
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://api.openrouter.ai/api/v1"
         )
         service = LLMService([config])
         health_status = service.health_check()
         assert "openrouter" in health_status
         assert health_status["openrouter"]["status"] == "healthy"
         assert health_status["openrouter"]["model"] == "test-model"
         assert health_status["openrouter"]["response_time"] > 0
-    @patch('requests.post')
     def test_openrouter_specific_headers(self, mock_post):
         """Test that OpenRouter-specific headers are added."""
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {
             "choices": [{"message": {"content": "Test"}}],
-            "usage": {}
         }
         mock_response.raise_for_status = Mock()
         mock_post.return_value = mock_response
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
-            base_url="https://api.openrouter.ai/api/v1"
         )
         service = LLMService([config])
         service.generate_response("Test")
         # Check headers
         args, kwargs = mock_post.call_args
         headers = kwargs["headers"]
         assert "HTTP-Referer" in headers
         assert "X-Title" in headers
-        assert headers["HTTP-Referer"] == "https://github.com/sethmcknight/msse-ai-engineering"

 Tests for LLM integration and service functionality.
 """
+from unittest.mock import Mock, patch
 import pytest
 import requests
+from src.llm.llm_service import LLMConfig, LLMResponse, LLMService
 class TestLLMConfig:
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://test.com",
         )
         assert config.provider == "openrouter"
         assert config.api_key == "test-key"
         assert config.model_name == "test-model"
             model="test-model",
             usage={"tokens": 100},
             response_time=1.5,
+            success=True,
         )
         assert response.content == "Test response"
         assert response.provider == "openrouter"
         assert response.model == "test-model"
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://test.com",
         )
         service = LLMService([config])
         assert len(service.configs) == 1
         assert service.configs[0] == config
         assert service.current_config_index == 0
     def test_initialization_empty_configs_raises_error(self):
         """Test that empty configs raise ValueError."""
+        with pytest.raises(
+            ValueError, match="At least one LLM configuration must be provided"
+        ):
             LLMService([])
+    @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-openrouter-key"})
     def test_from_environment_with_openrouter_key(self):
         """Test creating service from environment with OpenRouter key."""
         service = LLMService.from_environment()
         assert len(service.configs) >= 1
         openrouter_config = next(
             (config for config in service.configs if config.provider == "openrouter"),
+            None,
         )
         assert openrouter_config is not None
         assert openrouter_config.api_key == "test-openrouter-key"
+    @patch.dict("os.environ", {"GROQ_API_KEY": "test-groq-key"})
     def test_from_environment_with_groq_key(self):
         """Test creating service from environment with Groq key."""
         service = LLMService.from_environment()
         assert len(service.configs) >= 1
         groq_config = next(
+            (config for config in service.configs if config.provider == "groq"), None
         )
         assert groq_config is not None
         assert groq_config.api_key == "test-groq-key"
+    @patch.dict("os.environ", {}, clear=True)
     def test_from_environment_no_keys_raises_error(self):
         """Test that no environment keys raise ValueError."""
         with pytest.raises(ValueError, match="No LLM API keys found in environment"):
             LLMService.from_environment()
+    @patch("requests.post")
     def test_successful_response_generation(self, mock_post):
         """Test successful response generation."""
         # Mock successful API response
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {
+            "choices": [{"message": {"content": "Test response content"}}],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 20},
         }
         mock_response.raise_for_status = Mock()
         mock_post.return_value = mock_response
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://api.openrouter.ai/api/v1",
         )
         service = LLMService([config])
         result = service.generate_response("Test prompt")
         assert result.success is True
         assert result.content == "Test response content"
         assert result.provider == "openrouter"
         assert result.model == "test-model"
         assert result.usage == {"prompt_tokens": 50, "completion_tokens": 20}
         assert result.response_time > 0
         # Verify API call
         mock_post.assert_called_once()
         args, kwargs = mock_post.call_args
         assert kwargs["json"]["model"] == "test-model"
         assert kwargs["json"]["messages"][0]["content"] == "Test prompt"
+    @patch("requests.post")
     def test_api_error_handling(self, mock_post):
         """Test handling of API errors."""
         # Mock API error
         mock_post.side_effect = requests.exceptions.RequestException("API Error")
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://api.openrouter.ai/api/v1",
         )
         service = LLMService([config])
         result = service.generate_response("Test prompt")
         assert result.success is False
         assert "API Error" in result.error_message
         assert result.content == ""
         assert result.provider == "openrouter"
+    @patch("requests.post")
     def test_fallback_to_second_provider(self, mock_post):
         """Test fallback to second provider when first fails."""
         # Mock first provider failing, second succeeding
         first_call = Mock()
+        first_call.side_effect = requests.exceptions.RequestException(
+            "First provider error"
+        )
         second_call = Mock()
         second_response = Mock()
         second_response.status_code = 200
         second_response.json.return_value = {
             "choices": [{"message": {"content": "Second provider response"}}],
+            "usage": {},
         }
         second_response.raise_for_status = Mock()
         second_call.return_value = second_response
         mock_post.side_effect = [first_call.side_effect, second_response]
         config1 = LLMConfig(
             provider="openrouter",
             api_key="key1",
             model_name="model1",
+            base_url="https://api1.com",
         )
         config2 = LLMConfig(
             provider="groq",
             api_key="key2",
             model_name="model2",
+            base_url="https://api2.com",
         )
         service = LLMService([config1, config2])
         result = service.generate_response("Test prompt")
         assert result.success is True
         assert result.content == "Second provider response"
         assert result.provider == "groq"
         assert mock_post.call_count == 2
+    @patch("requests.post")
     def test_all_providers_fail(self, mock_post):
         """Test when all providers fail."""
+        mock_post.side_effect = requests.exceptions.RequestException(
+            "All providers down"
+        )
+        config1 = LLMConfig(
+            provider="provider1", api_key="key1", model_name="model1", base_url="url1"
+        )
+        config2 = LLMConfig(
+            provider="provider2", api_key="key2", model_name="model2", base_url="url2"
+        )
         service = LLMService([config1, config2])
         result = service.generate_response("Test prompt")
         assert result.success is False
         assert "All providers failed" in result.error_message
         assert result.provider == "none"
         assert result.model == "none"
+    @patch("requests.post")
     def test_retry_logic(self, mock_post):
         """Test retry logic for failed requests."""
         # First call fails, second succeeds
         first_response = Mock()
+        first_response.side_effect = requests.exceptions.RequestException(
+            "Temporary error"
+        )
         second_response = Mock()
         second_response.status_code = 200
         second_response.json.return_value = {
             "choices": [{"message": {"content": "Success after retry"}}],
+            "usage": {},
         }
         second_response.raise_for_status = Mock()
         mock_post.side_effect = [first_response.side_effect, second_response]
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://api.openrouter.ai/api/v1",
         )
         service = LLMService([config])
         result = service.generate_response("Test prompt", max_retries=1)
         assert result.success is True
         assert result.content == "Success after retry"
         assert mock_post.call_count == 2
     def test_get_available_providers(self):
         """Test getting list of available providers."""
+        config1 = LLMConfig(
+            provider="openrouter", api_key="key1", model_name="model1", base_url="url1"
+        )
+        config2 = LLMConfig(
+            provider="groq", api_key="key2", model_name="model2", base_url="url2"
+        )
         service = LLMService([config1, config2])
         providers = service.get_available_providers()
         assert providers == ["openrouter", "groq"]
+    @patch("requests.post")
     def test_health_check(self, mock_post):
         """Test health check functionality."""
         # Mock successful health check
         mock_response.status_code = 200
         mock_response.json.return_value = {
             "choices": [{"message": {"content": "OK"}}],
+            "usage": {},
         }
         mock_response.raise_for_status = Mock()
         mock_post.return_value = mock_response
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://api.openrouter.ai/api/v1",
         )
         service = LLMService([config])
         health_status = service.health_check()
         assert "openrouter" in health_status
         assert health_status["openrouter"]["status"] == "healthy"
         assert health_status["openrouter"]["model"] == "test-model"
         assert health_status["openrouter"]["response_time"] > 0
+    @patch("requests.post")
     def test_openrouter_specific_headers(self, mock_post):
         """Test that OpenRouter-specific headers are added."""
         mock_response = Mock()
         mock_response.status_code = 200
         mock_response.json.return_value = {
             "choices": [{"message": {"content": "Test"}}],
+            "usage": {},
         }
         mock_response.raise_for_status = Mock()
         mock_post.return_value = mock_response
         config = LLMConfig(
             provider="openrouter",
             api_key="test-key",
             model_name="test-model",
+            base_url="https://api.openrouter.ai/api/v1",
         )
         service = LLMService([config])
         service.generate_response("Test")
         # Check headers
         args, kwargs = mock_post.call_args
         headers = kwargs["headers"]
         assert "HTTP-Referer" in headers
         assert "X-Title" in headers
+        assert (
+            headers["HTTP-Referer"]
+            == "https://github.com/sethmcknight/msse-ai-engineering"
+        )

tests/test_rag/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- # RAG Pipeline Tests


1	+ # RAG Pipeline Tests