LiamKhoaLe commited on
Commit
fe5243a
·
1 Parent(s): 556224a

Upd memo managers

Browse files
Files changed (5) hide show
  1. memo/context.py +0 -196
  2. memo/conversation.py +723 -0
  3. memo/core.py +44 -24
  4. routes/chats.py +23 -50
  5. routes/reports.py +24 -7
memo/context.py CHANGED
@@ -79,199 +79,3 @@ async def get_legacy_context(user_id: str, question: str, memory_system,
79
  sem_text = await semantic_context(question, rest17, embedder, topk_sem)
80
 
81
  return recent_text, sem_text
82
-
83
-
84
- # ────────────────────────────── Memory Enhancement Functions ──────────────────────────────
85
-
86
- async def enhance_question_with_memory(user_id: str, question: str, memory, nvidia_rotator, embedder: EmbeddingClient) -> Tuple[str, str]:
87
- """Enhance the user's question with relevant conversation history using STM (latest 3 messages)"""
88
- try:
89
- # Get recent conversation history (STM - latest 3 messages)
90
- recent_memories = memory.recent(user_id, 3)
91
-
92
- if not recent_memories:
93
- logger.info("[CONTEXT_MANAGER] No recent conversation history found")
94
- return question, ""
95
-
96
- # Use NVIDIA to determine if recent memories are relevant to current question
97
- if nvidia_rotator:
98
- try:
99
- from memo.nvidia import related_recent_context
100
- relevant_context = await related_recent_context(question, recent_memories, nvidia_rotator)
101
-
102
- if relevant_context:
103
- # Enhance the question with relevant context
104
- enhanced_question = await create_enhanced_prompt(question, relevant_context, nvidia_rotator)
105
- logger.info(f"[CONTEXT_MANAGER] Enhanced question with {len(relevant_context)} chars of relevant context")
106
- return enhanced_question, relevant_context
107
- else:
108
- logger.info("[CONTEXT_MANAGER] No relevant recent context found")
109
- return question, ""
110
-
111
- except Exception as e:
112
- logger.warning(f"[CONTEXT_MANAGER] NVIDIA context enhancement failed: {e}")
113
- # Fallback to semantic similarity
114
- return await enhance_with_semantic_similarity(question, recent_memories, embedder)
115
- else:
116
- # Use semantic similarity if no NVIDIA rotator
117
- return await enhance_with_semantic_similarity(question, recent_memories, embedder)
118
-
119
- except Exception as e:
120
- logger.error(f"[CONTEXT_MANAGER] Memory enhancement failed: {e}")
121
- return question, ""
122
-
123
-
124
- async def enhance_instructions_with_memory(user_id: str, instructions: str, memory, nvidia_rotator, embedder: EmbeddingClient) -> Tuple[str, str]:
125
- """Enhance the user's report instructions with relevant conversation history using STM (latest 3 messages)"""
126
- try:
127
- # Get recent conversation history (STM - latest 3 messages)
128
- recent_memories = memory.recent(user_id, 3)
129
-
130
- if not recent_memories:
131
- logger.info("[CONTEXT_MANAGER] No recent conversation history found")
132
- return instructions, ""
133
-
134
- # Use NVIDIA to determine if recent memories are relevant to current instructions
135
- if nvidia_rotator:
136
- try:
137
- from memo.nvidia import related_recent_context
138
- relevant_context = await related_recent_context(instructions, recent_memories, nvidia_rotator)
139
-
140
- if relevant_context:
141
- # Enhance the instructions with relevant context
142
- enhanced_instructions = await create_enhanced_report_prompt(instructions, relevant_context, nvidia_rotator)
143
- logger.info(f"[CONTEXT_MANAGER] Enhanced instructions with {len(relevant_context)} chars of relevant context")
144
- return enhanced_instructions, relevant_context
145
- else:
146
- logger.info("[CONTEXT_MANAGER] No relevant recent context found")
147
- return instructions, ""
148
-
149
- except Exception as e:
150
- logger.warning(f"[CONTEXT_MANAGER] NVIDIA context enhancement failed: {e}")
151
- # Fallback to semantic similarity
152
- return await enhance_report_with_semantic_similarity(instructions, recent_memories, embedder)
153
- else:
154
- # Use semantic similarity if no NVIDIA rotator
155
- return await enhance_report_with_semantic_similarity(instructions, recent_memories, embedder)
156
-
157
- except Exception as e:
158
- logger.error(f"[CONTEXT_MANAGER] Memory enhancement failed: {e}")
159
- return instructions, ""
160
-
161
-
162
- async def enhance_with_semantic_similarity(question: str, recent_memories: List[str], embedder: EmbeddingClient) -> Tuple[str, str]:
163
- """Enhance question using semantic similarity as fallback"""
164
- try:
165
- relevant_context = await semantic_context(question, recent_memories, embedder, 2)
166
-
167
- if relevant_context:
168
- # Simple enhancement by prepending context
169
- enhanced_question = f"Based on our previous conversation:\n{relevant_context}\n\nNow, {question}"
170
- logger.info(f"[CONTEXT_MANAGER] Enhanced question with semantic context: {len(relevant_context)} chars")
171
- return enhanced_question, relevant_context
172
- else:
173
- return question, ""
174
-
175
- except Exception as e:
176
- logger.warning(f"[CONTEXT_MANAGER] Semantic enhancement failed: {e}")
177
- return question, ""
178
-
179
-
180
- async def enhance_report_with_semantic_similarity(instructions: str, recent_memories: List[str], embedder: EmbeddingClient) -> Tuple[str, str]:
181
- """Enhance report instructions using semantic similarity as fallback"""
182
- try:
183
- relevant_context = await semantic_context(instructions, recent_memories, embedder, 2)
184
-
185
- if relevant_context:
186
- # Simple enhancement by prepending context
187
- enhanced_instructions = f"Based on our previous conversation:\n{relevant_context}\n\nNow, {instructions}"
188
- logger.info(f"[CONTEXT_MANAGER] Enhanced instructions with semantic context: {len(relevant_context)} chars")
189
- return enhanced_instructions, relevant_context
190
- else:
191
- return instructions, ""
192
-
193
- except Exception as e:
194
- logger.warning(f"[CONTEXT_MANAGER] Semantic enhancement failed: {e}")
195
- return instructions, ""
196
-
197
-
198
- async def create_enhanced_prompt(original_question: str, relevant_context: str, nvidia_rotator) -> str:
199
- """Use NVIDIA to create an enhanced prompt that incorporates relevant context intelligently"""
200
- try:
201
- from utils.api.router import generate_answer_with_model
202
-
203
- sys_prompt = """You are an expert at enhancing user questions with relevant conversation context.
204
-
205
- Given a user's current question and relevant context from previous conversations, create an enhanced question that:
206
- 1. Incorporates the relevant context naturally
207
- 2. Maintains the user's original intent
208
- 3. Provides better context for answering
209
- 4. Flows naturally and doesn't sound forced
210
-
211
- The enhanced question should help the AI provide more detailed, contextual, and relevant answers.
212
-
213
- Return ONLY the enhanced question, no meta-commentary."""
214
-
215
- user_prompt = f"""ORIGINAL QUESTION: {original_question}
216
-
217
- RELEVANT CONTEXT FROM PREVIOUS CONVERSATION:
218
- {relevant_context}
219
-
220
- Create an enhanced version of the question that incorporates this context naturally."""
221
-
222
- selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
223
- enhanced_question = await generate_answer_with_model(
224
- selection=selection,
225
- system_prompt=sys_prompt,
226
- user_prompt=user_prompt,
227
- gemini_rotator=None,
228
- nvidia_rotator=nvidia_rotator
229
- )
230
-
231
- return enhanced_question.strip()
232
-
233
- except Exception as e:
234
- logger.warning(f"[CONTEXT_MANAGER] Prompt enhancement failed: {e}")
235
- # Fallback to simple concatenation
236
- return f"Based on our previous conversation:\n{relevant_context}\n\nNow, {original_question}"
237
-
238
-
239
- async def create_enhanced_report_prompt(original_instructions: str, relevant_context: str, nvidia_rotator) -> str:
240
- """Use NVIDIA to create enhanced report instructions that incorporate relevant context intelligently"""
241
- try:
242
- from utils.api.router import generate_answer_with_model
243
-
244
- sys_prompt = """You are an expert at enhancing report instructions with relevant conversation context.
245
-
246
- Given a user's current report instructions and relevant context from previous conversations, create enhanced instructions that:
247
- 1. Incorporates the relevant context naturally
248
- 2. Maintains the user's original intent for the report
249
- 3. Provides better context for generating a comprehensive report
250
- 4. Flows naturally and doesn't sound forced
251
-
252
- The enhanced instructions should help generate a more detailed, contextual, and relevant report.
253
-
254
- Return ONLY the enhanced instructions, no meta-commentary."""
255
-
256
- user_prompt = f"""ORIGINAL REPORT INSTRUCTIONS: {original_instructions}
257
-
258
- RELEVANT CONTEXT FROM PREVIOUS CONVERSATION:
259
- {relevant_context}
260
-
261
- Create an enhanced version of the report instructions that incorporates this context naturally."""
262
-
263
- selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
264
- enhanced_instructions = await generate_answer_with_model(
265
- selection=selection,
266
- system_prompt=sys_prompt,
267
- user_prompt=user_prompt,
268
- gemini_rotator=None,
269
- nvidia_rotator=nvidia_rotator
270
- )
271
-
272
- return enhanced_instructions.strip()
273
-
274
- except Exception as e:
275
- logger.warning(f"[CONTEXT_MANAGER] Prompt enhancement failed: {e}")
276
- # Fallback to simple concatenation
277
- return f"Based on our previous conversation:\n{relevant_context}\n\nNow, {original_instructions}"
 
79
  sem_text = await semantic_context(question, rest17, embedder, topk_sem)
80
 
81
  return recent_text, sem_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
memo/conversation.py ADDED
@@ -0,0 +1,723 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ────────────────────────────── memo/conversation.py ──────────────────────────────
2
+ """
3
+ Advanced Conversation Management
4
+
5
+ Handles conversation continuity, context switching, memory consolidation,
6
+ and edge cases for natural conversational flow.
7
+ """
8
+
9
+ import re
10
+ import time
11
+ from typing import List, Dict, Any, Tuple, Optional, Set
12
+ from datetime import datetime, timezone, timedelta
13
+
14
+ from utils.logger import get_logger
15
+ from utils.rag.embeddings import EmbeddingClient
16
+ from memo.context import cosine_similarity, semantic_context
17
+
18
+ logger = get_logger("CONVERSATION_MANAGER", __name__)
19
+
20
+ class ConversationManager:
21
+ """
22
+ Advanced conversation manager that handles:
23
+ - Conversation continuity and context switching
24
+ - Memory consolidation and pruning
25
+ - Edge case handling for natural conversation flow
26
+ - Intelligent context retrieval
27
+ """
28
+
29
+ def __init__(self, memory_system, embedder: EmbeddingClient):
30
+ self.memory_system = memory_system
31
+ self.embedder = embedder
32
+ self.conversation_sessions = {} # Track active conversation sessions
33
+ self.context_cache = {} # Cache recent context for performance
34
+ self.memory_consolidation_threshold = 10 # Consolidate after 10 memories
35
+
36
+ async def get_smart_context(self, user_id: str, question: str,
37
+ nvidia_rotator=None, project_id: Optional[str] = None,
38
+ conversation_mode: str = "chat") -> Tuple[str, str, Dict[str, Any]]:
39
+ """
40
+ Get intelligent context for conversation with enhanced edge case handling.
41
+
42
+ Args:
43
+ user_id: User identifier
44
+ question: Current question/instruction
45
+ nvidia_rotator: NVIDIA API rotator for AI enhancement
46
+ project_id: Project context
47
+ conversation_mode: "chat" or "report"
48
+
49
+ Returns:
50
+ Tuple of (recent_context, semantic_context, metadata)
51
+ """
52
+ try:
53
+ # Check for conversation session continuity
54
+ session_info = self._get_or_create_session(user_id, question, conversation_mode)
55
+
56
+ # Get enhanced context based on conversation state
57
+ if session_info["is_continuation"]:
58
+ recent_context, semantic_context = await self._get_continuation_context(
59
+ user_id, question, session_info, nvidia_rotator, project_id
60
+ )
61
+ else:
62
+ recent_context, semantic_context = await self._get_fresh_context(
63
+ user_id, question, nvidia_rotator, project_id
64
+ )
65
+
66
+ # Enhance question/instructions with context if beneficial
67
+ enhanced_input, context_used = await self._enhance_input_with_context(
68
+ question, recent_context, semantic_context, nvidia_rotator, conversation_mode
69
+ )
70
+
71
+ # Update session tracking
72
+ self._update_session(user_id, question, enhanced_input, context_used)
73
+
74
+ # Prepare metadata
75
+ metadata = {
76
+ "session_id": session_info["session_id"],
77
+ "is_continuation": session_info["is_continuation"],
78
+ "context_enhanced": context_used,
79
+ "conversation_depth": session_info["depth"],
80
+ "last_activity": session_info["last_activity"]
81
+ }
82
+
83
+ return recent_context, semantic_context, metadata
84
+
85
+ except Exception as e:
86
+ logger.error(f"[CONVERSATION_MANAGER] Smart context failed: {e}")
87
+ return "", "", {"error": str(e)}
88
+
89
+ async def consolidate_memories(self, user_id: str, nvidia_rotator=None) -> Dict[str, Any]:
90
+ """
91
+ Consolidate and prune memories to prevent information overload.
92
+ """
93
+ try:
94
+ if not self.memory_system.is_enhanced_available():
95
+ return {"consolidated": 0, "pruned": 0}
96
+
97
+ # Get all memories for user
98
+ all_memories = self.memory_system.enhanced_memory.get_memories(user_id, limit=100)
99
+
100
+ if len(all_memories) < self.memory_consolidation_threshold:
101
+ return {"consolidated": 0, "pruned": 0}
102
+
103
+ # Group similar memories
104
+ memory_groups = await self._group_similar_memories(all_memories, nvidia_rotator)
105
+
106
+ # Consolidate each group
107
+ consolidated_count = 0
108
+ pruned_count = 0
109
+
110
+ for group in memory_groups:
111
+ if len(group) > 1:
112
+ # Consolidate similar memories
113
+ consolidated_memory = await self._consolidate_memory_group(group, nvidia_rotator)
114
+
115
+ if consolidated_memory:
116
+ # Remove old memories and add consolidated one
117
+ for memory in group:
118
+ self.memory_system.enhanced_memory.memories.delete_one({"_id": memory["_id"]})
119
+ pruned_count += 1
120
+
121
+ # Add consolidated memory
122
+ self.memory_system.enhanced_memory.add_memory(
123
+ user_id=user_id,
124
+ content=consolidated_memory["content"],
125
+ memory_type=consolidated_memory["memory_type"],
126
+ importance="high", # Consolidated memories are important
127
+ tags=consolidated_memory["tags"] + ["consolidated"]
128
+ )
129
+ consolidated_count += 1
130
+
131
+ logger.info(f"[CONVERSATION_MANAGER] Consolidated {consolidated_count} groups, pruned {pruned_count} memories")
132
+ return {"consolidated": consolidated_count, "pruned": pruned_count}
133
+
134
+ except Exception as e:
135
+ logger.error(f"[CONVERSATION_MANAGER] Memory consolidation failed: {e}")
136
+ return {"consolidated": 0, "pruned": 0, "error": str(e)}
137
+
138
+ async def handle_context_switch(self, user_id: str, new_question: str,
139
+ nvidia_rotator=None) -> Dict[str, Any]:
140
+ """
141
+ Handle context switching when user changes topics or asks unrelated questions.
142
+ """
143
+ try:
144
+ session_info = self.conversation_sessions.get(user_id, {})
145
+
146
+ if not session_info:
147
+ return {"is_context_switch": False, "confidence": 0.0}
148
+
149
+ # Check if this is a context switch
150
+ is_switch, confidence = await self._detect_context_switch(
151
+ session_info.get("last_question", ""), new_question, nvidia_rotator
152
+ )
153
+
154
+ if is_switch and confidence > 0.7:
155
+ # Clear recent context cache for fresh start
156
+ self.context_cache.pop(user_id, None)
157
+
158
+ # Update session to indicate context switch
159
+ session_info["context_switches"] = session_info.get("context_switches", 0) + 1
160
+ session_info["last_context_switch"] = time.time()
161
+
162
+ logger.info(f"[CONVERSATION_MANAGER] Context switch detected for user {user_id} (confidence: {confidence:.2f})")
163
+
164
+ return {
165
+ "is_context_switch": True,
166
+ "confidence": confidence,
167
+ "switch_count": session_info["context_switches"]
168
+ }
169
+
170
+ return {"is_context_switch": False, "confidence": confidence}
171
+
172
+ except Exception as e:
173
+ logger.error(f"[CONVERSATION_MANAGER] Context switch detection failed: {e}")
174
+ return {"is_context_switch": False, "confidence": 0.0, "error": str(e)}
175
+
176
+ def get_conversation_insights(self, user_id: str) -> Dict[str, Any]:
177
+ """
178
+ Get insights about the user's conversation patterns.
179
+ """
180
+ try:
181
+ session_info = self.conversation_sessions.get(user_id, {})
182
+
183
+ if not session_info:
184
+ return {"status": "no_active_session"}
185
+
186
+ return {
187
+ "session_duration": time.time() - session_info.get("start_time", time.time()),
188
+ "message_count": session_info.get("message_count", 0),
189
+ "context_switches": session_info.get("context_switches", 0),
190
+ "last_activity": session_info.get("last_activity", 0),
191
+ "conversation_depth": session_info.get("depth", 0),
192
+ "enhancement_rate": session_info.get("enhancement_rate", 0.0)
193
+ }
194
+
195
+ except Exception as e:
196
+ logger.error(f"[CONVERSATION_MANAGER] Failed to get conversation insights: {e}")
197
+ return {"error": str(e)}
198
+
199
+ # ────────────────────────────── Private Helper Methods ──────────────────────────────
200
+
201
+ def _get_or_create_session(self, user_id: str, question: str, conversation_mode: str) -> Dict[str, Any]:
202
+ """Get or create conversation session for user"""
203
+ current_time = time.time()
204
+
205
+ if user_id not in self.conversation_sessions:
206
+ # New session
207
+ self.conversation_sessions[user_id] = {
208
+ "session_id": f"{user_id}_{int(current_time)}",
209
+ "start_time": current_time,
210
+ "last_activity": current_time,
211
+ "message_count": 0,
212
+ "context_switches": 0,
213
+ "depth": 0,
214
+ "enhancement_rate": 0.0,
215
+ "conversation_mode": conversation_mode,
216
+ "last_question": "",
217
+ "is_continuation": False
218
+ }
219
+ return self.conversation_sessions[user_id]
220
+
221
+ session = self.conversation_sessions[user_id]
222
+
223
+ # Check if this is a continuation (within 30 minutes and same mode)
224
+ time_since_last = current_time - session["last_activity"]
225
+ is_continuation = (time_since_last < 1800 and # 30 minutes
226
+ session["conversation_mode"] == conversation_mode)
227
+
228
+ session["is_continuation"] = is_continuation
229
+ session["last_activity"] = current_time
230
+ session["message_count"] += 1
231
+
232
+ return session
233
+
234
+ def _update_session(self, user_id: str, original_question: str,
235
+ enhanced_input: str, context_used: bool):
236
+ """Update session with new information"""
237
+ if user_id not in self.conversation_sessions:
238
+ return
239
+
240
+ session = self.conversation_sessions[user_id]
241
+ session["last_question"] = original_question
242
+ session["depth"] += 1
243
+
244
+ # Update enhancement rate
245
+ total_enhancements = session.get("total_enhancements", 0)
246
+ if context_used:
247
+ total_enhancements += 1
248
+ session["total_enhancements"] = total_enhancements
249
+ session["enhancement_rate"] = total_enhancements / session["message_count"]
250
+
251
+ async def _get_continuation_context(self, user_id: str, question: str,
252
+ session_info: Dict[str, Any], nvidia_rotator,
253
+ project_id: Optional[str]) -> Tuple[str, str]:
254
+ """Get context for conversation continuation"""
255
+ try:
256
+ # Use enhanced context retrieval with focus on recent conversation
257
+ if self.memory_system.is_enhanced_available():
258
+ recent_context, semantic_context = await self.memory_system.get_conversation_context(
259
+ user_id, question, project_id
260
+ )
261
+ else:
262
+ # Fallback to legacy with enhanced selection
263
+ recent_memories = self.memory_system.recent(user_id, 5) # More recent for continuation
264
+ rest_memories = self.memory_system.rest(user_id, 5)
265
+
266
+ recent_context = ""
267
+ if recent_memories and nvidia_rotator:
268
+ try:
269
+ from memo.nvidia import related_recent_context
270
+ recent_context = await related_recent_context(question, recent_memories, nvidia_rotator)
271
+ except Exception as e:
272
+ logger.warning(f"[CONVERSATION_MANAGER] NVIDIA recent context failed: {e}")
273
+ recent_context = await semantic_context(question, recent_memories, self.embedder, 3)
274
+
275
+ semantic_context = ""
276
+ if rest_memories:
277
+ semantic_context = await semantic_context(question, rest_memories, self.embedder, 5)
278
+
279
+ return recent_context, semantic_context
280
+
281
+ except Exception as e:
282
+ logger.error(f"[CONVERSATION_MANAGER] Continuation context failed: {e}")
283
+ return "", ""
284
+
285
+ async def _get_fresh_context(self, user_id: str, question: str,
286
+ nvidia_rotator, project_id: Optional[str]) -> Tuple[str, str]:
287
+ """Get context for fresh conversation or context switch"""
288
+ try:
289
+ # Use standard context retrieval
290
+ if self.memory_system.is_enhanced_available():
291
+ recent_context, semantic_context = await self.memory_system.get_conversation_context(
292
+ user_id, question, project_id
293
+ )
294
+ else:
295
+ # Legacy fallback
296
+ recent_memories = self.memory_system.recent(user_id, 3)
297
+ rest_memories = self.memory_system.rest(user_id, 3)
298
+
299
+ recent_context = await semantic_context(question, recent_memories, self.embedder, 2)
300
+ semantic_context = await semantic_context(question, rest_memories, self.embedder, 3)
301
+
302
+ return recent_context, semantic_context
303
+
304
+ except Exception as e:
305
+ logger.error(f"[CONVERSATION_MANAGER] Fresh context failed: {e}")
306
+ return "", ""
307
+
308
+ async def _enhance_input_with_context(self, original_input: str, recent_context: str,
309
+ semantic_context: str, nvidia_rotator,
310
+ conversation_mode: str) -> Tuple[str, bool]:
311
+ """Enhance input with relevant context if beneficial"""
312
+ try:
313
+ # Determine if enhancement would be beneficial
314
+ should_enhance = await self._should_enhance_input(
315
+ original_input, recent_context, semantic_context, nvidia_rotator
316
+ )
317
+
318
+ if not should_enhance:
319
+ return original_input, False
320
+
321
+ # Enhance based on conversation mode
322
+ if conversation_mode == "chat":
323
+ return await self._enhance_question(original_input, recent_context, semantic_context, nvidia_rotator)
324
+ else: # report mode
325
+ return await self._enhance_instructions(original_input, recent_context, semantic_context, nvidia_rotator)
326
+
327
+ except Exception as e:
328
+ logger.warning(f"[CONVERSATION_MANAGER] Input enhancement failed: {e}")
329
+ return original_input, False
330
+
331
+ async def _should_enhance_input(self, original_input: str, recent_context: str,
332
+ semantic_context: str, nvidia_rotator) -> bool:
333
+ """Determine if input should be enhanced with context"""
334
+ try:
335
+ # Don't enhance if no context available
336
+ if not recent_context and not semantic_context:
337
+ return False
338
+
339
+ # Don't enhance very specific questions that seem complete
340
+ if len(original_input.split()) > 20: # Long, detailed questions
341
+ return False
342
+
343
+ # Don't enhance if input already contains context indicators
344
+ context_indicators = ["based on", "from our", "as we discussed", "following up", "regarding"]
345
+ if any(indicator in original_input.lower() for indicator in context_indicators):
346
+ return False
347
+
348
+ # Use NVIDIA to determine if enhancement would be helpful
349
+ if nvidia_rotator:
350
+ try:
351
+ from utils.api.router import generate_answer_with_model
352
+
353
+ sys_prompt = """You are an expert at determining if a user's question would benefit from additional context.
354
+
355
+ Given a user's question and available context, determine if enhancing the question with context would:
356
+ 1. Make the answer more relevant and helpful
357
+ 2. Provide better continuity in conversation
358
+ 3. Not make the question unnecessarily complex
359
+
360
+ Respond with only "YES" or "NO"."""
361
+
362
+ user_prompt = f"""USER QUESTION: {original_input}
363
+
364
+ AVAILABLE CONTEXT:
365
+ Recent: {recent_context[:200]}...
366
+ Semantic: {semantic_context[:200]}...
367
+
368
+ Should this question be enhanced with context?"""
369
+
370
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
371
+ response = await generate_answer_with_model(
372
+ selection=selection,
373
+ system_prompt=sys_prompt,
374
+ user_prompt=user_prompt,
375
+ gemini_rotator=None,
376
+ nvidia_rotator=nvidia_rotator
377
+ )
378
+
379
+ return "YES" in response.upper()
380
+
381
+ except Exception as e:
382
+ logger.warning(f"[CONVERSATION_MANAGER] Enhancement decision failed: {e}")
383
+
384
+ # Fallback: enhance if we have substantial context
385
+ total_context_length = len(recent_context) + len(semantic_context)
386
+ return total_context_length > 100
387
+
388
+ except Exception as e:
389
+ logger.warning(f"[CONVERSATION_MANAGER] Enhancement decision failed: {e}")
390
+ return False
391
+
392
+ async def _enhance_question(self, question: str, recent_context: str,
393
+ semantic_context: str, nvidia_rotator) -> Tuple[str, bool]:
394
+ """Enhance question with context"""
395
+ try:
396
+ from utils.api.router import generate_answer_with_model
397
+
398
+ sys_prompt = """You are an expert at enhancing user questions with relevant conversation context.
399
+
400
+ Given a user's question and relevant context, create an enhanced question that:
401
+ 1. Incorporates the context naturally and seamlessly
402
+ 2. Maintains the user's original intent
403
+ 3. Provides better context for answering
404
+ 4. Flows naturally and doesn't sound forced
405
+
406
+ Return ONLY the enhanced question, no meta-commentary."""
407
+
408
+ context_text = ""
409
+ if recent_context:
410
+ context_text += f"Recent conversation:\n{recent_context}\n\n"
411
+ if semantic_context:
412
+ context_text += f"Related information:\n{semantic_context}\n\n"
413
+
414
+ user_prompt = f"""ORIGINAL QUESTION: {question}
415
+
416
+ RELEVANT CONTEXT:
417
+ {context_text}
418
+
419
+ Create an enhanced version that incorporates this context naturally."""
420
+
421
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
422
+ enhanced_question = await generate_answer_with_model(
423
+ selection=selection,
424
+ system_prompt=sys_prompt,
425
+ user_prompt=user_prompt,
426
+ gemini_rotator=None,
427
+ nvidia_rotator=nvidia_rotator
428
+ )
429
+
430
+ return enhanced_question.strip(), True
431
+
432
+ except Exception as e:
433
+ logger.warning(f"[CONVERSATION_MANAGER] Question enhancement failed: {e}")
434
+ return question, False
435
+
436
+ async def _enhance_instructions(self, instructions: str, recent_context: str,
437
+ semantic_context: str, nvidia_rotator) -> Tuple[str, bool]:
438
+ """Enhance report instructions with context"""
439
+ try:
440
+ from utils.api.router import generate_answer_with_model
441
+
442
+ sys_prompt = """You are an expert at enhancing report instructions with relevant conversation context.
443
+
444
+ Given report instructions and relevant context, create enhanced instructions that:
445
+ 1. Incorporates the context naturally and seamlessly
446
+ 2. Maintains the user's original intent for the report
447
+ 3. Provides better context for generating a comprehensive report
448
+ 4. Flows naturally and doesn't sound forced
449
+
450
+ Return ONLY the enhanced instructions, no meta-commentary."""
451
+
452
+ context_text = ""
453
+ if recent_context:
454
+ context_text += f"Recent conversation:\n{recent_context}\n\n"
455
+ if semantic_context:
456
+ context_text += f"Related information:\n{semantic_context}\n\n"
457
+
458
+ user_prompt = f"""ORIGINAL REPORT INSTRUCTIONS: {instructions}
459
+
460
+ RELEVANT CONTEXT:
461
+ {context_text}
462
+
463
+ Create an enhanced version that incorporates this context naturally."""
464
+
465
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
466
+ enhanced_instructions = await generate_answer_with_model(
467
+ selection=selection,
468
+ system_prompt=sys_prompt,
469
+ user_prompt=user_prompt,
470
+ gemini_rotator=None,
471
+ nvidia_rotator=nvidia_rotator
472
+ )
473
+
474
+ return enhanced_instructions.strip(), True
475
+
476
+ except Exception as e:
477
+ logger.warning(f"[CONVERSATION_MANAGER] Instructions enhancement failed: {e}")
478
+ return instructions, False
479
+
480
+ async def _detect_context_switch(self, last_question: str, new_question: str,
481
+ nvidia_rotator) -> Tuple[bool, float]:
482
+ """Detect if user has switched context/topic"""
483
+ try:
484
+ if not last_question or not new_question:
485
+ return False, 0.0
486
+
487
+ if nvidia_rotator:
488
+ try:
489
+ from utils.api.router import generate_answer_with_model
490
+
491
+ sys_prompt = """You are an expert at detecting context switches in conversations.
492
+
493
+ Given two consecutive questions, determine if the user has switched to a completely different topic or context.
494
+
495
+ Consider:
496
+ - Different subject matter
497
+ - Different intent or goal
498
+ - No logical connection between questions
499
+ - Change in conversation direction
500
+
501
+ Respond with a JSON object: {"is_context_switch": true/false, "confidence": 0.0-1.0}"""
502
+
503
+ user_prompt = f"""PREVIOUS QUESTION: {last_question}
504
+
505
+ CURRENT QUESTION: {new_question}
506
+
507
+ Is this a context switch?"""
508
+
509
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
510
+ response = await generate_answer_with_model(
511
+ selection=selection,
512
+ system_prompt=sys_prompt,
513
+ user_prompt=user_prompt,
514
+ gemini_rotator=None,
515
+ nvidia_rotator=nvidia_rotator
516
+ )
517
+
518
+ # Parse JSON response
519
+ import json
520
+ try:
521
+ result = json.loads(response.strip())
522
+ return result.get("is_context_switch", False), result.get("confidence", 0.0)
523
+ except:
524
+ pass
525
+
526
+ except Exception as e:
527
+ logger.warning(f"[CONVERSATION_MANAGER] Context switch detection failed: {e}")
528
+
529
+ # Fallback: simple keyword-based detection
530
+ return self._simple_context_switch_detection(last_question, new_question)
531
+
532
+ except Exception as e:
533
+ logger.warning(f"[CONVERSATION_MANAGER] Context switch detection failed: {e}")
534
+ return False, 0.0
535
+
536
+ def _simple_context_switch_detection(self, last_question: str, new_question: str) -> Tuple[bool, float]:
537
+ """Simple keyword-based context switch detection"""
538
+ try:
539
+ # Extract keywords from both questions
540
+ last_words = set(re.findall(r'\b\w+\b', last_question.lower()))
541
+ new_words = set(re.findall(r'\b\w+\b', new_question.lower()))
542
+
543
+ # Calculate overlap
544
+ overlap = len(last_words.intersection(new_words))
545
+ total_unique = len(last_words.union(new_words))
546
+
547
+ if total_unique == 0:
548
+ return False, 0.0
549
+
550
+ similarity = overlap / total_unique
551
+
552
+ # Context switch if similarity is very low
553
+ is_switch = similarity < 0.1
554
+ confidence = 1.0 - similarity if is_switch else similarity
555
+
556
+ return is_switch, confidence
557
+
558
+ except Exception as e:
559
+ logger.warning(f"[CONVERSATION_MANAGER] Simple context switch detection failed: {e}")
560
+ return False, 0.0
561
+
562
+ async def _group_similar_memories(self, memories: List[Dict[str, Any]],
563
+ nvidia_rotator) -> List[List[Dict[str, Any]]]:
564
+ """Group similar memories for consolidation"""
565
+ try:
566
+ if not memories or len(memories) < 2:
567
+ return [memories] if memories else []
568
+
569
+ groups = []
570
+ used = set()
571
+
572
+ for i, memory in enumerate(memories):
573
+ if i in used:
574
+ continue
575
+
576
+ group = [memory]
577
+ used.add(i)
578
+
579
+ # Find similar memories
580
+ for j, other_memory in enumerate(memories[i+1:], i+1):
581
+ if j in used:
582
+ continue
583
+
584
+ # Calculate similarity
585
+ similarity = await self._calculate_memory_similarity(memory, other_memory, nvidia_rotator)
586
+
587
+ if similarity > 0.7: # High similarity threshold
588
+ group.append(other_memory)
589
+ used.add(j)
590
+
591
+ groups.append(group)
592
+
593
+ return groups
594
+
595
+ except Exception as e:
596
+ logger.error(f"[CONVERSATION_MANAGER] Memory grouping failed: {e}")
597
+ return [memories] if memories else []
598
+
599
+ async def _calculate_memory_similarity(self, memory1: Dict[str, Any],
600
+ memory2: Dict[str, Any], nvidia_rotator) -> float:
601
+ """Calculate similarity between two memories"""
602
+ try:
603
+ # Use embedding similarity if available
604
+ if memory1.get("embedding") and memory2.get("embedding"):
605
+ return cosine_similarity(
606
+ memory1["embedding"],
607
+ memory2["embedding"]
608
+ )
609
+
610
+ # Fallback to content similarity
611
+ content1 = memory1.get("content", "")
612
+ content2 = memory2.get("content", "")
613
+
614
+ if not content1 or not content2:
615
+ return 0.0
616
+
617
+ # Simple word overlap similarity
618
+ words1 = set(re.findall(r'\b\w+\b', content1.lower()))
619
+ words2 = set(re.findall(r'\b\w+\b', content2.lower()))
620
+
621
+ if not words1 or not words2:
622
+ return 0.0
623
+
624
+ overlap = len(words1.intersection(words2))
625
+ total = len(words1.union(words2))
626
+
627
+ return overlap / total if total > 0 else 0.0
628
+
629
+ except Exception as e:
630
+ logger.warning(f"[CONVERSATION_MANAGER] Memory similarity calculation failed: {e}")
631
+ return 0.0
632
+
633
+ async def _consolidate_memory_group(self, group: List[Dict[str, Any]],
634
+ nvidia_rotator) -> Optional[Dict[str, Any]]:
635
+ """Consolidate a group of similar memories into one"""
636
+ try:
637
+ if not group or len(group) < 2:
638
+ return None
639
+
640
+ # Extract content from all memories
641
+ contents = [memory.get("content", "") for memory in group]
642
+ memory_types = list(set(memory.get("memory_type", "conversation") for memory in group))
643
+ tags = []
644
+ for memory in group:
645
+ tags.extend(memory.get("tags", []))
646
+
647
+ # Use NVIDIA to consolidate content
648
+ if nvidia_rotator:
649
+ try:
650
+ from utils.api.router import generate_answer_with_model
651
+
652
+ sys_prompt = """You are an expert at consolidating similar conversation memories.
653
+
654
+ Given multiple similar conversation memories, create a single consolidated memory that:
655
+ 1. Preserves all important information
656
+ 2. Removes redundancy
657
+ 3. Maintains the essential context
658
+ 4. Is concise but comprehensive
659
+
660
+ Return the consolidated content in the same format as the original memories."""
661
+
662
+ user_prompt = f"""CONSOLIDATE THESE SIMILAR MEMORIES:
663
+
664
+ {chr(10).join(f"Memory {i+1}: {content}" for i, content in enumerate(contents))}
665
+
666
+ Create a single consolidated memory:"""
667
+
668
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
669
+ consolidated_content = await generate_answer_with_model(
670
+ selection=selection,
671
+ system_prompt=sys_prompt,
672
+ user_prompt=user_prompt,
673
+ gemini_rotator=None,
674
+ nvidia_rotator=nvidia_rotator
675
+ )
676
+
677
+ return {
678
+ "content": consolidated_content.strip(),
679
+ "memory_type": memory_types[0] if memory_types else "conversation",
680
+ "tags": list(set(tags)) + ["consolidated"]
681
+ }
682
+
683
+ except Exception as e:
684
+ logger.warning(f"[CONVERSATION_MANAGER] NVIDIA consolidation failed: {e}")
685
+
686
+ # Fallback: simple concatenation
687
+ consolidated_content = "\n\n".join(contents)
688
+ return {
689
+ "content": consolidated_content,
690
+ "memory_type": memory_types[0] if memory_types else "conversation",
691
+ "tags": list(set(tags)) + ["consolidated"]
692
+ }
693
+
694
+ except Exception as e:
695
+ logger.error(f"[CONVERSATION_MANAGER] Memory consolidation failed: {e}")
696
+ return None
697
+
698
+
699
+ # ────────────────────────────── Global Instance ──────────────────────────────
700
+
701
+ _conversation_manager: Optional[ConversationManager] = None
702
+
703
+ def get_conversation_manager(memory_system=None, embedder: EmbeddingClient = None) -> ConversationManager:
704
+ """Get the global conversation manager instance"""
705
+ global _conversation_manager
706
+
707
+ if _conversation_manager is None:
708
+ if not memory_system:
709
+ from memo.core import get_memory_system
710
+ memory_system = get_memory_system()
711
+ if not embedder:
712
+ from utils.rag.embeddings import EmbeddingClient
713
+ embedder = EmbeddingClient()
714
+
715
+ _conversation_manager = ConversationManager(memory_system, embedder)
716
+ logger.info("[CONVERSATION_MANAGER] Global conversation manager initialized")
717
+
718
+ return _conversation_manager
719
+
720
+ def reset_conversation_manager():
721
+ """Reset the global conversation manager (for testing)"""
722
+ global _conversation_manager
723
+ _conversation_manager = None
memo/core.py CHANGED
@@ -174,34 +174,54 @@ class MemorySystem:
174
  "enhanced_available": False
175
  }
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  async def get_smart_context(self, user_id: str, question: str,
178
- nvidia_rotator=None, project_id: Optional[str] = None) -> Tuple[str, str]:
179
- """Get smart context using both NVIDIA and semantic similarity for optimal memory ability"""
 
180
  try:
181
- if self.enhanced_available:
182
- # Use enhanced context with NVIDIA integration if available
183
- recent_context, semantic_context = await self._get_enhanced_context(user_id, question)
184
-
185
- # If NVIDIA rotator is available, enhance recent context selection
186
- if nvidia_rotator and recent_context:
187
- try:
188
- from memo.nvidia import related_recent_context
189
- recent_memories = self.legacy_memory.recent(user_id, 5)
190
- if recent_memories:
191
- nvidia_recent = await related_recent_context(question, recent_memories, nvidia_rotator)
192
- if nvidia_recent:
193
- recent_context = nvidia_recent
194
- except Exception as e:
195
- logger.warning(f"[CORE_MEMORY] NVIDIA context enhancement failed: {e}")
196
-
197
- return recent_context, semantic_context
198
- else:
199
- # Use legacy context with NVIDIA enhancement if available
200
- from memo.context import get_legacy_context
201
- return await get_legacy_context(user_id, question, self, self.embedder, 3)
202
  except Exception as e:
203
  logger.error(f"[CORE_MEMORY] Failed to get smart context: {e}")
204
- return "", ""
205
 
206
  # ────────────────────────────── Private Helper Methods ──────────────────────────────
207
 
 
174
  "enhanced_available": False
175
  }
176
 
177
+ async def consolidate_memories(self, user_id: str, nvidia_rotator=None) -> Dict[str, Any]:
178
+ """Consolidate and prune memories to prevent information overload"""
179
+ try:
180
+ from memo.conversation import get_conversation_manager
181
+ conversation_manager = get_conversation_manager(self, self.embedder)
182
+
183
+ return await conversation_manager.consolidate_memories(user_id, nvidia_rotator)
184
+ except Exception as e:
185
+ logger.error(f"[CORE_MEMORY] Memory consolidation failed: {e}")
186
+ return {"consolidated": 0, "pruned": 0, "error": str(e)}
187
+
188
+ async def handle_context_switch(self, user_id: str, new_question: str,
189
+ nvidia_rotator=None) -> Dict[str, Any]:
190
+ """Handle context switching when user changes topics"""
191
+ try:
192
+ from memo.conversation import get_conversation_manager
193
+ conversation_manager = get_conversation_manager(self, self.embedder)
194
+
195
+ return await conversation_manager.handle_context_switch(user_id, new_question, nvidia_rotator)
196
+ except Exception as e:
197
+ logger.error(f"[CORE_MEMORY] Context switch handling failed: {e}")
198
+ return {"is_context_switch": False, "confidence": 0.0, "error": str(e)}
199
+
200
+ def get_conversation_insights(self, user_id: str) -> Dict[str, Any]:
201
+ """Get insights about the user's conversation patterns"""
202
+ try:
203
+ from memo.conversation import get_conversation_manager
204
+ conversation_manager = get_conversation_manager(self, self.embedder)
205
+
206
+ return conversation_manager.get_conversation_insights(user_id)
207
+ except Exception as e:
208
+ logger.error(f"[CORE_MEMORY] Failed to get conversation insights: {e}")
209
+ return {"error": str(e)}
210
+
211
  async def get_smart_context(self, user_id: str, question: str,
212
+ nvidia_rotator=None, project_id: Optional[str] = None,
213
+ conversation_mode: str = "chat") -> Tuple[str, str, Dict[str, Any]]:
214
+ """Get smart context using advanced conversation management"""
215
  try:
216
+ from memo.conversation import get_conversation_manager
217
+ conversation_manager = get_conversation_manager(self, self.embedder)
218
+
219
+ return await conversation_manager.get_smart_context(
220
+ user_id, question, nvidia_rotator, project_id, conversation_mode
221
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  except Exception as e:
223
  logger.error(f"[CORE_MEMORY] Failed to get smart context: {e}")
224
+ return "", "", {"error": str(e)}
225
 
226
  # ────────────────────────────── Private Helper Methods ──────────────────────────────
227
 
routes/chats.py CHANGED
@@ -8,7 +8,7 @@ from helpers.setup import app, rag, logger, embedder, captioner, gemini_rotator,
8
  from helpers.models import ChatMessageResponse, ChatHistoryResponse, MessageResponse, ChatAnswerResponse, StatusUpdateResponse
9
  from utils.service.common import trim_text
10
  from .search import build_web_context
11
- from memo.context import enhance_question_with_memory
12
  from utils.api.router import select_model, generate_answer_with_model
13
 
14
 
@@ -237,10 +237,25 @@ async def _chat_impl(
237
  if session_id:
238
  update_chat_status(session_id, "receiving", "Receiving request...", 5)
239
 
240
- # Step 1: Retrieve and enhance prompt with conversation history FIRST
241
- enhanced_question, memory_context = await enhance_question_with_memory(
242
- user_id, question, memory, nvidia_rotator, embedder
243
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  logger.info(f"[CHAT] Enhanced question with memory context: {len(memory_context)} chars")
245
 
246
  mentioned = set([m.group(0).strip() for m in re.finditer(r"\b[^\s/\\]+?\.(?:pdf|docx|doc)\b", question, re.IGNORECASE)])
@@ -300,51 +315,9 @@ async def _chat_impl(
300
  if extra:
301
  logger.info(f"[CHAT] Forced-include mentioned files into relevance: {extra}")
302
 
303
- try:
304
- from memo.history import get_history_manager
305
- history_manager = get_history_manager(memory)
306
- recent_related, semantic_related = await history_manager.related_recent_and_semantic_context(
307
- user_id, question, embedder
308
- )
309
- except Exception as e:
310
- logger.warning(f"[CHAT] Enhanced context retrieval failed, using fallback: {e}")
311
- recent3 = memory.recent(user_id, 3)
312
- if recent3:
313
- sys = "Pick only items that directly relate to the new question. Output the selected items verbatim, no commentary. If none, output nothing."
314
- numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
315
- user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
316
- try:
317
- from utils.api.rotator import robust_post_json
318
- key = nvidia_rotator.get_key()
319
- url = "https://integrate.api.nvidia.com/v1/chat/completions"
320
- payload = {
321
- "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
322
- "temperature": 0.0,
323
- "messages": [
324
- {"role": "system", "content": sys},
325
- {"role": "user", "content": user},
326
- ]
327
- }
328
- headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key or ''}"}
329
- data = await robust_post_json(url, headers, payload, nvidia_rotator)
330
- recent_related = data["choices"][0]["message"]["content"].strip()
331
- except Exception as e:
332
- logger.warning(f"Recent-related NVIDIA error: {e}")
333
- recent_related = ""
334
- else:
335
- recent_related = ""
336
- rest17 = memory.rest(user_id, 3)
337
- if rest17:
338
- import numpy as np
339
- def _cosine(a: np.ndarray, b: np.ndarray) -> float:
340
- denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
341
- return float(np.dot(a, b) / denom)
342
- qv = np.array(embedder.embed([question])[0], dtype="float32")
343
- mats = embedder.embed([s.strip() for s in rest17])
344
- sims = [(_cosine(qv, np.array(v, dtype="float32")), s) for v, s in zip(mats, rest17)]
345
- sims.sort(key=lambda x: x[0], reverse=True)
346
- top = [s for (sc, s) in sims[:3] if sc > 0.15]
347
- semantic_related = "\n\n".join(top) if top else ""
348
 
349
  logger.info(f"[CHAT] Starting enhanced vector search with relevant_files={relevant_files}")
350
 
 
8
  from helpers.models import ChatMessageResponse, ChatHistoryResponse, MessageResponse, ChatAnswerResponse, StatusUpdateResponse
9
  from utils.service.common import trim_text
10
  from .search import build_web_context
11
+ # Removed: enhance_question_with_memory - now handled by conversation manager
12
  from utils.api.router import select_model, generate_answer_with_model
13
 
14
 
 
237
  if session_id:
238
  update_chat_status(session_id, "receiving", "Receiving request...", 5)
239
 
240
+ # Step 1: Retrieve and enhance prompt with conversation history FIRST with conversation management
241
+ try:
242
+ recent_context, semantic_context, context_metadata = await memory.get_smart_context(
243
+ user_id, question, nvidia_rotator, project_id, "chat"
244
+ )
245
+ logger.info(f"[CHAT] Smart context retrieved: recent={len(recent_context)}, semantic={len(semantic_context)}")
246
+
247
+ # Check for context switch
248
+ context_switch_info = await memory.handle_context_switch(user_id, question, nvidia_rotator)
249
+ if context_switch_info.get("is_context_switch", False):
250
+ logger.info(f"[CHAT] Context switch detected (confidence: {context_switch_info.get('confidence', 0):.2f})")
251
+ except Exception as e:
252
+ logger.warning(f"[CHAT] Smart context failed, using fallback: {e}")
253
+ recent_context, semantic_context = "", ""
254
+ context_metadata = {}
255
+
256
+ # Use enhanced question from smart context if available
257
+ enhanced_question = context_metadata.get("enhanced_input", question)
258
+ memory_context = recent_context + "\n\n" + semantic_context if recent_context or semantic_context else ""
259
  logger.info(f"[CHAT] Enhanced question with memory context: {len(memory_context)} chars")
260
 
261
  mentioned = set([m.group(0).strip() for m in re.finditer(r"\b[^\s/\\]+?\.(?:pdf|docx|doc)\b", question, re.IGNORECASE)])
 
315
  if extra:
316
  logger.info(f"[CHAT] Forced-include mentioned files into relevance: {extra}")
317
 
318
+ # Use context from smart context management (already retrieved above)
319
+ recent_related = recent_context
320
+ semantic_related = semantic_context
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  logger.info(f"[CHAT] Starting enhanced vector search with relevant_files={relevant_files}")
323
 
routes/reports.py CHANGED
@@ -6,7 +6,7 @@ from fastapi import Form, HTTPException
6
 
7
  from helpers.setup import app, rag, logger, embedder, gemini_rotator, nvidia_rotator
8
  from .search import build_web_context
9
- from memo.context import enhance_instructions_with_memory
10
  from helpers.models import ReportResponse, StatusUpdateResponse
11
  from utils.service.common import trim_text
12
  from utils.api.router import select_model, generate_answer_with_model
@@ -46,17 +46,34 @@ async def generate_report(
46
  if not session_id:
47
  session_id = str(uuid.uuid4())
48
 
 
 
 
 
49
  logger.info("[REPORT] User Q/report: %s", trim_text(instructions, 15).replace("\n", " "))
50
 
51
  # Update status: Receiving request
52
  update_report_status(session_id, "receiving", "Receiving request...", 5)
53
 
54
- # Step 1: Retrieve and enhance prompt with conversation history FIRST
55
- from memo.core import get_memory_system
56
- memory = get_memory_system()
57
- enhanced_instructions, memory_context = await enhance_instructions_with_memory(
58
- user_id, instructions, memory, nvidia_rotator, embedder
59
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  logger.info(f"[REPORT] Enhanced instructions with memory context: {len(memory_context)} chars")
61
 
62
  files_list = rag.list_files(user_id=user_id, project_id=project_id)
 
6
 
7
  from helpers.setup import app, rag, logger, embedder, gemini_rotator, nvidia_rotator
8
  from .search import build_web_context
9
+ # Removed: enhance_instructions_with_memory - now handled by conversation manager
10
  from helpers.models import ReportResponse, StatusUpdateResponse
11
  from utils.service.common import trim_text
12
  from utils.api.router import select_model, generate_answer_with_model
 
46
  if not session_id:
47
  session_id = str(uuid.uuid4())
48
 
49
+ # Initialize memory system
50
+ from memo.core import get_memory_system
51
+ memory = get_memory_system()
52
+
53
  logger.info("[REPORT] User Q/report: %s", trim_text(instructions, 15).replace("\n", " "))
54
 
55
  # Update status: Receiving request
56
  update_report_status(session_id, "receiving", "Receiving request...", 5)
57
 
58
+ # Get smart context with conversation management
59
+ try:
60
+ recent_context, semantic_context, context_metadata = await memory.get_smart_context(
61
+ user_id, instructions, nvidia_rotator, project_id, "report"
62
+ )
63
+ logger.info(f"[REPORT] Smart context retrieved: recent={len(recent_context)}, semantic={len(semantic_context)}")
64
+
65
+ # Check for context switch
66
+ context_switch_info = await memory.handle_context_switch(user_id, instructions, nvidia_rotator)
67
+ if context_switch_info.get("is_context_switch", False):
68
+ logger.info(f"[REPORT] Context switch detected (confidence: {context_switch_info.get('confidence', 0):.2f})")
69
+ except Exception as e:
70
+ logger.warning(f"[REPORT] Smart context failed, using fallback: {e}")
71
+ recent_context, semantic_context = "", ""
72
+ context_metadata = {}
73
+
74
+ # Use enhanced instructions from smart context if available
75
+ enhanced_instructions = context_metadata.get("enhanced_input", instructions)
76
+ memory_context = recent_context + "\n\n" + semantic_context if recent_context or semantic_context else ""
77
  logger.info(f"[REPORT] Enhanced instructions with memory context: {len(memory_context)} chars")
78
 
79
  files_list = rag.list_files(user_id=user_id, project_id=project_id)