""" Prompt Templates for Corporate Policy Q&A This module contains predefined prompt templates optimized for corporate policy question-answering with proper citation requirements. """ from dataclasses import dataclass from typing import Dict, List @dataclass class PromptTemplate: """Template for generating prompts with context and citations.""" system_prompt: str user_template: str citation_format: str class PromptTemplates: """ Collection of prompt templates for different types of policy questions. Templates are designed to ensure: - Accurate responses based on provided context - Proper citation of source documents - Adherence to corporate policy scope - Consistent formatting and tone """ # System prompt for corporate policy assistant SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents. # noqa: E501 IMPORTANT GUIDELINES: 1. Answer questions using ONLY the information provided in the context 2. If the context doesn't contain enough information to answer the question, say so explicitly # noqa: E501 3. Always cite your sources using the format: [Source: filename.md] 4. Be accurate, concise, and professional 5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department # noqa: E501 6. Do not make assumptions or provide information not explicitly stated in the context # noqa: E501 Your responses should be helpful while staying strictly within the scope of the provided corporate policies.""" @classmethod def get_policy_qa_template(cls) -> PromptTemplate: """ Get the standard template for policy question-answering. Returns: PromptTemplate configured for corporate policy Q&A """ return PromptTemplate( system_prompt=cls.SYSTEM_PROMPT, user_template="""Based on the following corporate policy documents, please answer this question: {question} # noqa: E501 CONTEXT DOCUMENTS: {context} Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""", # noqa: E501 citation_format="[Source: {filename}]", ) @classmethod def get_clarification_template(cls) -> PromptTemplate: """ Get template for when clarification is needed. Returns: PromptTemplate for clarification requests """ return PromptTemplate( system_prompt=cls.SYSTEM_PROMPT, user_template="""The user asked: {question} CONTEXT DOCUMENTS: {context} The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that: # noqa: E501 1. Acknowledges what information is available (if any) 2. Clearly states what information is missing 3. Suggests appropriate next steps (contact HR, check other resources, etc.) 4. Cites any relevant sources using [Source: filename.md] format""", citation_format="[Source: {filename}]", ) @classmethod def get_off_topic_template(cls) -> PromptTemplate: """ Get template for off-topic questions. Returns: PromptTemplate for redirecting off-topic questions """ return PromptTemplate( system_prompt=cls.SYSTEM_PROMPT, user_template="""The user asked: {question} This question appears to be outside the scope of our corporate policies. Please provide a polite response that: # noqa: E501 1. Acknowledges the question 2. Explains that this falls outside corporate policy documentation 3. Suggests appropriate resources (HR, IT, management, etc.) 4. Offers to help with any policy-related questions instead""", citation_format="", ) @staticmethod def format_context(search_results: List[Dict]) -> str: """ Format search results into context for the prompt. Args: search_results: List of search results from SearchService Returns: Formatted context string for the prompt """ if not search_results: return "No relevant policy documents found." context_parts = [] for i, result in enumerate(search_results[:5], 1): # Limit to top 5 results filename = result.get("metadata", {}).get("filename", "unknown") content = result.get("content", "").strip() similarity = result.get("similarity_score", 0.0) context_parts.append( f"Document {i}: {filename} (relevance: {similarity:.2f})\n" f"Content: {content}\n" ) return "\n---\n".join(context_parts) @staticmethod def extract_citations(response: str) -> List[str]: """ Extract citations from LLM response. Args: response: Generated response text Returns: List of extracted filenames from citations """ import re # Pattern to match [Source: filename.md] format citation_pattern = r"\[Source:\s*([^\]]+)\]" matches = re.findall(citation_pattern, response) # Clean up filenames citations = [] for match in matches: filename = match.strip() if filename and filename not in citations: citations.append(filename) return citations @staticmethod def validate_citations( response: str, available_sources: List[str] ) -> Dict[str, bool]: """ Validate that all citations in response refer to available sources. Args: response: Generated response text available_sources: List of available source filenames Returns: Dictionary mapping citations to their validity """ citations = PromptTemplates.extract_citations(response) validation = {} for citation in citations: # Check if citation matches any available source valid = any( citation in source or source in citation for source in available_sources ) validation[citation] = valid return validation @staticmethod def add_fallback_citations(response: str, search_results: List[Dict]) -> str: """ Add citations to response if none were provided by LLM. Args: response: Generated response text search_results: Original search results used for context Returns: Response with added citations if needed """ existing_citations = PromptTemplates.extract_citations(response) if existing_citations: return response # Already has citations if not search_results: return response # No sources to cite # Add citations from top search results top_sources = [] for result in search_results[:3]: # Top 3 sources filename = result.get("metadata", {}).get("filename", "") if filename and filename not in top_sources: top_sources.append(filename) if top_sources: citation_text = " [Sources: " + ", ".join(top_sources) + "]" return response + citation_text return response