yashita13 commited on
Commit
bd0b2d4
·
verified ·
1 Parent(s): 9e1af10

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +386 -385
main.py CHANGED
@@ -1,385 +1,386 @@
1
- # backend/main.py
2
- from urllib.parse import unquote
3
- from typing import List, Optional
4
- import uuid
5
- import auth
6
- # --- Standard Imports ---
7
- from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, Form
8
- from pydantic import BaseModel
9
- from database import engine, get_db, SessionLocal
10
- from datetime import datetime
11
-
12
- from sqlalchemy.orm import Session
13
- from sqlalchemy.exc import OperationalError
14
- from sqlalchemy import text
15
- import os
16
- import shutil # Important for file operations
17
-
18
- from pipeline import highlight_text
19
- from ml_qna import qna as generate_ml_answer
20
-
21
- # from email_automation import download_attached_file
22
- # import imaplib
23
- from contextlib import asynccontextmanager
24
- from pipeline import pipeline_process_pdf, load_all_models
25
- from fastapi import BackgroundTasks
26
-
27
- # --- Middleware Import ---
28
- from fastapi.middleware.cors import CORSMiddleware
29
-
30
- # --- Local Module Imports ---
31
- import crud
32
- import models
33
- import schemas
34
- from database import engine, get_db
35
- from supabase_utils import upload_file_to_supabase
36
-
37
- # This creates/updates the database tables in your Neon database
38
- # based on your models.py file.
39
- models.Base.metadata.create_all(bind=engine)
40
-
41
- # --- (3) SETUP FOR LOADING MODELS ON STARTUP ---
42
- # This dictionary will hold our loaded models so we don't reload them on every request
43
- ml_models = {}
44
-
45
- @asynccontextmanager
46
- async def lifespan(app: FastAPI):
47
- # This code runs ONCE when the server starts up
48
- print("[INFO] Server starting up...")
49
-
50
- # --- ADD THIS ENTIRE BLOCK ---
51
- print("[INFO] Ensuring system 'automation_user' exists...")
52
- db = SessionLocal()
53
- try:
54
- # Check if the user already exists
55
- automation_user = crud.get_user(db, user_id="automation_user")
56
- if not automation_user:
57
- # If not, create it
58
- print("[INFO] 'automation_user' not found. Creating it now...")
59
- user_data = schemas.UserCreate(
60
- id="automation_user",
61
- name="Automation Service",
62
- department="System",
63
- role="system",
64
- password="automation_pass" # A placeholder password
65
- )
66
- crud.create_user(db, user_data)
67
- print("[INFO] 'automation_user' created successfully.")
68
- else:
69
- print("[INFO] 'automation_user' already exists.")
70
- finally:
71
- db.close() # Always close the database session
72
- # --- END OF BLOCK ---
73
-
74
- print("[INFO] Loading ML models...")
75
- tokenizer, model, nlp_model = load_all_models()
76
- ml_models["tokenizer"] = tokenizer
77
- ml_models["model"] = model
78
- ml_models["nlp_model"] = nlp_model
79
- print("[INFO] ML models loaded successfully and are ready.")
80
-
81
- yield
82
-
83
- ml_models.clear()
84
- print("[INFO] Server shutting down.")
85
-
86
-
87
- # @asynccontextmanager
88
- # async def lifespan(app: FastAPI):
89
- # # This code runs ONCE when the server starts up
90
- # print("[INFO] Server starting up. Loading ML models...")
91
- # tokenizer, model, nlp_model = load_all_models()
92
- # ml_models["tokenizer"] = tokenizer
93
- # ml_models["model"] = model
94
- # ml_models["nlp_model"] = nlp_model
95
- # print("[INFO] ML models loaded successfully and are ready.")
96
- # yield
97
- # # This code runs when the server shuts down
98
- # ml_models.clear()
99
- # print("[INFO] Server shutting down.")
100
-
101
-
102
- app = FastAPI(lifespan=lifespan)
103
-
104
- # This list now includes the new port your frontend is using
105
- origins = [
106
- "http://localhost:3000",
107
- "http://127.0.0.1:3000",
108
- "http://localhost:3003", # <-- ADD THIS LINE
109
- "http://127.0.0.1:3003", # <-- And this one for good measure
110
- ]
111
-
112
- app.add_middleware(
113
- CORSMiddleware,
114
- allow_origins=origins, # Use the updated list
115
- allow_credentials=True,
116
- allow_methods=["*"],
117
- allow_headers=["*"],
118
- )
119
-
120
- # --- LOCAL UPLOAD DIRECTORY for temporary storage ---
121
- UPLOAD_DIRECTORY = "uploads"
122
- os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)
123
-
124
- # --- Diagnostic Endpoints ---
125
- @app.get("/")
126
- def read_root():
127
- return {"status": "ok", "service": "kmrl-backend-service"}
128
-
129
- @app.get("/ping-db")
130
- def ping_db(db: Session = Depends(get_db)):
131
- try:
132
- db.execute(text("SELECT 1"))
133
- return {"status": "ok", "message": "Database connection successful."}
134
- except OperationalError as e:
135
- raise HTTPException(
136
- status_code=500,
137
- detail=f"Database connection failed: {str(e)}"
138
- )
139
-
140
- # --- User Management Endpoints ---
141
- @app.post("/users/", response_model=schemas.User)
142
- def create_user(user: schemas.UserCreate, db: Session = Depends(get_db)):
143
- db_user = crud.get_user(db, user_id=user.id)
144
- if db_user:
145
- raise HTTPException(status_code=400, detail="User ID already registered")
146
- return crud.create_user(db=db, user=user)
147
-
148
- @app.get("/users/{user_id}", response_model=schemas.User)
149
- def read_user(user_id: str, db: Session = Depends(get_db)):
150
- db_user = crud.get_user(db, user_id=user_id)
151
- if db_user is None:
152
- raise HTTPException(status_code=404, detail="User not found")
153
- return db_user
154
-
155
- # --- Document Management Endpoints ---
156
-
157
- @app.post("/documents/upload")
158
- def upload_document(
159
- # Optional fields for email automation, but required for frontend
160
- title: Optional[str] = Form(None),
161
- department: Optional[str] = Form(None),
162
- user_id: Optional[str] = Form(None),
163
- # The file is always required
164
- file: UploadFile = File(...),
165
- db: Session = Depends(get_db)
166
- ):
167
- # --- 1. Set Default Values & Validate User ---
168
- # If a title wasn't provided (from email), create a default one.
169
- final_title = title or f"Email Attachment - {file.filename}"
170
-
171
- # If a user_id wasn't provided, it MUST be the automation user.
172
- final_user_id = user_id or "automation_user"
173
-
174
- # If a department wasn't provided, set it to be auto-detected by the pipeline.
175
- final_department = department or "auto-detected"
176
-
177
- # Now, use these final variables to validate the user
178
- user = crud.get_user(db, user_id=final_user_id)
179
- if not user:
180
- raise HTTPException(status_code=404, detail=f"Uploader '{final_user_id}' not found")
181
-
182
- # --- 2. Upload Original File to Cloud ---
183
- print("Uploading original file to cloud storage...")
184
- public_url = upload_file_to_supabase(file.file, file.filename)
185
- if not public_url:
186
- raise HTTPException(status_code=500, detail="Could not upload file to cloud storage.")
187
- print("File uploaded successfully. Public URL:", public_url)
188
-
189
- file.file.seek(0) # Rewind file for local processing
190
-
191
- # --- 3. Create Initial Database Record ---
192
- # This now correctly matches the function in crud.py (which should not take highlighted_file_path)
193
- document_data = schemas.DocumentCreate(title=final_title, department=final_department)
194
- db_document = crud.create_document(db=db, document=document_data, file_path=public_url, user_id=final_user_id)
195
- print(f"Initial document record created in DB with ID: {db_document.id}")
196
-
197
- # --- 4. Save Local Copy & Run ML Pipeline ---
198
- local_file_path = os.path.join(UPLOAD_DIRECTORY, file.filename)
199
- with open(local_file_path, "wb") as buffer:
200
- shutil.copyfileobj(file.file, buffer)
201
-
202
- print("Starting ML pipeline processing...")
203
- ml_results = pipeline_process_pdf(
204
- pdf_path=local_file_path,
205
- clf_tokenizer=ml_models["tokenizer"],
206
- clf_model=ml_models["model"],
207
- nlp_model=ml_models["nlp_model"]
208
- )
209
- print("ML pipeline processing complete.")
210
-
211
- # --- 5. Upload Highlighted PDF (if created) ---
212
- highlighted_pdf_path = ml_results.get("highlighted_pdf")
213
- highlighted_public_url = None
214
- if highlighted_pdf_path and os.path.exists(highlighted_pdf_path):
215
- print("Uploading highlighted file to cloud storage...")
216
- with open(highlighted_pdf_path, "rb") as f:
217
- highlighted_filename = os.path.basename(highlighted_pdf_path)
218
- highlighted_public_url = upload_file_to_supabase(f, highlighted_filename)
219
- print("Highlighted PDF uploaded successfully.")
220
-
221
- # ... (after the ML pipeline runs) ...
222
-
223
- # --- (6) UPDATE THE DATABASE RECORD WITH ML RESULTS ---
224
- print("Updating database record with ML results...")
225
- final_document = crud.update_document_with_ml_results(
226
- db,
227
- document_id=db_document.id,
228
- ml_results=ml_results,
229
- highlighted_file_path=highlighted_public_url
230
- )
231
- print("Database record updated successfully.")
232
-
233
- # --- (7) CREATE NOTIFICATION FOR THE DEPARTMENT ---
234
- # The ML results contain the department the document was routed to.
235
- routed_department = final_document.department
236
- if routed_department and routed_department != "Unknown":
237
- notification_message = f"New document '{final_document.title}' has been assigned to your department."
238
- crud.create_notification(
239
- db=db,
240
- document_id=final_document.id,
241
- department=routed_department,
242
- message=notification_message
243
- )
244
- print(f"Notification created for department: {routed_department}")
245
-
246
-
247
- # --- 8. Cleanup Local Files ---
248
- try:
249
- if os.path.exists(local_file_path):
250
- os.remove(local_file_path)
251
- if highlighted_pdf_path and os.path.exists(highlighted_pdf_path):
252
- os.remove(highlighted_pdf_path)
253
- except OSError as e:
254
- print(f"Error during file cleanup: {e}")
255
-
256
- # --- 9. Return Final Response ---
257
- return {
258
- "message": "Document processed and all data saved successfully.",
259
- "document_info": schemas.Document.model_validate(final_document),
260
- "highlighted_pdf_url": highlighted_public_url
261
- }
262
-
263
- # --- Read Endpoints ---
264
- @app.get("/documents/", response_model=list[schemas.Document])
265
- def read_all_documents(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
266
- documents = crud.get_all_documents(db, skip=skip, limit=limit)
267
- return documents
268
-
269
- @app.get("/documents/{department}", response_model=list[schemas.Document])
270
- def read_documents_for_department(department: str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
271
- documents = crud.get_documents_by_department(db, department=department, skip=skip, limit=limit)
272
- return documents
273
-
274
- # --- ADD THESE NEW ENDPOINTS FOR Q&A ---
275
-
276
- def run_ml_qna_in_background(question_id: uuid.UUID, pinecone_pdf_id: str, question_text: str):
277
- print(f"[BACKGROUND TASK] Starting ML RAG pipeline for question ID: {question_id}")
278
-
279
- # Call the ML function to get the answer
280
- answer_text = generate_ml_answer(
281
- pdf_id=pinecone_pdf_id, # <--- This now uses the correct filename ID
282
- query=question_text
283
- )
284
- print(f"[BACKGROUND TASK] Answer generated: {answer_text[:100]}...")
285
-
286
- # Use the CRUD function to save the answer to the database
287
- db = SessionLocal()
288
- try:
289
- # Use the new 'db' session to update the database
290
- crud.update_question_with_answer(
291
- db=db,
292
- question_id=question_id,
293
- answer_text=answer_text
294
- )
295
- print(f"[BACKGROUND TASK] Answer saved to database for question ID: {question_id}")
296
- finally:
297
- db.close()
298
-
299
- @app.post("/documents/{document_id}/questions", response_model=schemas.Question)
300
- def ask_question_on_document(
301
- document_id: uuid.UUID,
302
- question: schemas.QuestionCreate,
303
- background_tasks: BackgroundTasks,
304
- db: Session = Depends(get_db)
305
- ):
306
- """
307
- Endpoint for the frontend to submit a new question.
308
- It saves the question, calls the ML RAG pipeline to generate a real answer,
309
- and saves the answer to the database.
310
- """
311
- # First, fetch the document to get its uploader's ID
312
- document = crud.get_document_by_id(db, document_id=document_id)
313
- if not document:
314
- raise HTTPException(status_code=404, detail="Document not found")
315
-
316
- user_id_who_asked = document.uploader_id
317
-
318
- # Create the question in the database with a NULL answer first
319
- db_question = crud.create_question(
320
- db=db,
321
- document_id=document_id,
322
- user_id=user_id_who_asked,
323
- question=question
324
- )
325
- print(f"New question saved with ID: {db_question.id}. Triggering background ML task.")
326
-
327
-
328
- pinecone_pdf_id = os.path.splitext(os.path.basename(unquote(document.file_path)))[0]
329
-
330
- background_tasks.add_task(
331
- run_ml_qna_in_background,
332
- db_question.id,
333
- pinecone_pdf_id, # <--- Pass the correct filename ID
334
- question.question_text,
335
- )
336
- # --- END OF KEY CHANGE ---
337
-
338
- # Return the new question object to the frontend immediately.
339
- # The frontend will see that `answer_text` is still null.
340
- return db_question
341
-
342
-
343
- @app.get("/documents/{document_id}/questions", response_model=List[schemas.Question])
344
- def get_document_questions(
345
- document_id: uuid.UUID,
346
- db: Session = Depends(get_db) # Ensure there are no typos like 'get_d b'
347
- ):
348
- """
349
- Endpoint for the frontend to retrieve the full conversation history
350
- (all questions and their answers) for a document.
351
- """
352
- return crud.get_questions_for_document(db=db, document_id=document_id)
353
-
354
-
355
- # --- ADD THIS NEW ENDPOINT FOR EMAIL AUTOMATION ---
356
-
357
-
358
- @app.patch("/questions/{question_id}/answer")
359
- def submit_answer(
360
- question_id: uuid.UUID,
361
- answer: schemas.Answer,
362
- db: Session = Depends(get_db)
363
- ):
364
- """
365
- INTERNAL ENDPOINT for the ML service to submit its generated answer
366
- for a question that has already been created.
367
- """
368
- updated_question = crud.update_question_with_answer(
369
- db=db,
370
- question_id=question_id,
371
- answer_text=answer.answer_text
372
- )
373
- if not updated_question:
374
- raise HTTPException(status_code=404, detail="Question not found")
375
-
376
- print(f"Answer submitted for question ID: {question_id}")
377
- return {"status": "success", "question": updated_question}
378
-
379
-
380
- # --- NEW ENDPOINT FOR NOTIFICATIONS ---
381
- @app.get("/notifications/{department}", response_model=List[schemas.Notification])
382
- def read_notifications(department: str, db: Session = Depends(get_db)):
383
- """Fetches unread notifications for a given department."""
384
- notifications = crud.get_notifications_for_department(db, department=department)
385
- return notifications
 
 
1
+ # backend/main.py
2
+ from urllib.parse import unquote
3
+ from typing import List, Optional
4
+ import uuid
5
+ import auth
6
+ # --- Standard Imports ---
7
+ from fastapi import FastAPI, Depends, HTTPException, UploadFile, File, Form
8
+ from pydantic import BaseModel
9
+ from database import engine, get_db, SessionLocal
10
+ from datetime import datetime
11
+
12
+ from sqlalchemy.orm import Session
13
+ from sqlalchemy.exc import OperationalError
14
+ from sqlalchemy import text
15
+ import os
16
+ import shutil # Important for file operations
17
+
18
+ from pipeline import highlight_text
19
+ from ml_qna import qna as generate_ml_answer
20
+
21
+ # from email_automation import download_attached_file
22
+ # import imaplib
23
+ from contextlib import asynccontextmanager
24
+ from pipeline import pipeline_process_pdf, load_all_models
25
+ from fastapi import BackgroundTasks
26
+
27
+ # --- Middleware Import ---
28
+ from fastapi.middleware.cors import CORSMiddleware
29
+
30
+ # --- Local Module Imports ---
31
+ import crud
32
+ import models
33
+ import schemas
34
+ from database import engine, get_db
35
+ from supabase_utils import upload_file_to_supabase
36
+
37
+ # This creates/updates the database tables in your Neon database
38
+ # based on your models.py file.
39
+ models.Base.metadata.create_all(bind=engine)
40
+
41
+ # --- (3) SETUP FOR LOADING MODELS ON STARTUP ---
42
+ # This dictionary will hold our loaded models so we don't reload them on every request
43
+ ml_models = {}
44
+
45
+ @asynccontextmanager
46
+ async def lifespan(app: FastAPI):
47
+ # This code runs ONCE when the server starts up
48
+ print("[INFO] Server starting up...")
49
+
50
+ # --- ADD THIS ENTIRE BLOCK ---
51
+ print("[INFO] Ensuring system 'automation_user' exists...")
52
+ db = SessionLocal()
53
+ try:
54
+ # Check if the user already exists
55
+ automation_user = crud.get_user(db, user_id="automation_user")
56
+ if not automation_user:
57
+ # If not, create it
58
+ print("[INFO] 'automation_user' not found. Creating it now...")
59
+ user_data = schemas.UserCreate(
60
+ id="automation_user",
61
+ name="Automation Service",
62
+ department="System",
63
+ role="system",
64
+ password="automation_pass" # A placeholder password
65
+ )
66
+ crud.create_user(db, user_data)
67
+ print("[INFO] 'automation_user' created successfully.")
68
+ else:
69
+ print("[INFO] 'automation_user' already exists.")
70
+ finally:
71
+ db.close() # Always close the database session
72
+ # --- END OF BLOCK ---
73
+
74
+ print("[INFO] Loading ML models...")
75
+ tokenizer, model, nlp_model = load_all_models()
76
+ ml_models["tokenizer"] = tokenizer
77
+ ml_models["model"] = model
78
+ ml_models["nlp_model"] = nlp_model
79
+ print("[INFO] ML models loaded successfully and are ready.")
80
+
81
+ yield
82
+
83
+ ml_models.clear()
84
+ print("[INFO] Server shutting down.")
85
+
86
+
87
+ # @asynccontextmanager
88
+ # async def lifespan(app: FastAPI):
89
+ # # This code runs ONCE when the server starts up
90
+ # print("[INFO] Server starting up. Loading ML models...")
91
+ # tokenizer, model, nlp_model = load_all_models()
92
+ # ml_models["tokenizer"] = tokenizer
93
+ # ml_models["model"] = model
94
+ # ml_models["nlp_model"] = nlp_model
95
+ # print("[INFO] ML models loaded successfully and are ready.")
96
+ # yield
97
+ # # This code runs when the server shuts down
98
+ # ml_models.clear()
99
+ # print("[INFO] Server shutting down.")
100
+
101
+
102
+ app = FastAPI(lifespan=lifespan)
103
+
104
+ # This list now includes the new port your frontend is using
105
+ origins = [
106
+ "http://localhost:3000",
107
+ "http://127.0.0.1:3000",
108
+ "https://kochi-metro-document.vercel.app",
109
+ "http://localhost:3003", # <-- ADD THIS LINE
110
+ "http://127.0.0.1:3003", # <-- And this one for good measure
111
+ ]
112
+
113
+ app.add_middleware(
114
+ CORSMiddleware,
115
+ allow_origins=origins, # Use the updated list
116
+ allow_credentials=True,
117
+ allow_methods=["*"],
118
+ allow_headers=["*"],
119
+ )
120
+
121
+ # --- LOCAL UPLOAD DIRECTORY for temporary storage ---
122
+ UPLOAD_DIRECTORY = "uploads"
123
+ os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)
124
+
125
+ # --- Diagnostic Endpoints ---
126
+ @app.get("/")
127
+ def read_root():
128
+ return {"status": "ok", "service": "kmrl-backend-service"}
129
+
130
+ @app.get("/ping-db")
131
+ def ping_db(db: Session = Depends(get_db)):
132
+ try:
133
+ db.execute(text("SELECT 1"))
134
+ return {"status": "ok", "message": "Database connection successful."}
135
+ except OperationalError as e:
136
+ raise HTTPException(
137
+ status_code=500,
138
+ detail=f"Database connection failed: {str(e)}"
139
+ )
140
+
141
+ # --- User Management Endpoints ---
142
+ @app.post("/users/", response_model=schemas.User)
143
+ def create_user(user: schemas.UserCreate, db: Session = Depends(get_db)):
144
+ db_user = crud.get_user(db, user_id=user.id)
145
+ if db_user:
146
+ raise HTTPException(status_code=400, detail="User ID already registered")
147
+ return crud.create_user(db=db, user=user)
148
+
149
+ @app.get("/users/{user_id}", response_model=schemas.User)
150
+ def read_user(user_id: str, db: Session = Depends(get_db)):
151
+ db_user = crud.get_user(db, user_id=user_id)
152
+ if db_user is None:
153
+ raise HTTPException(status_code=404, detail="User not found")
154
+ return db_user
155
+
156
+ # --- Document Management Endpoints ---
157
+
158
+ @app.post("/documents/upload")
159
+ def upload_document(
160
+ # Optional fields for email automation, but required for frontend
161
+ title: Optional[str] = Form(None),
162
+ department: Optional[str] = Form(None),
163
+ user_id: Optional[str] = Form(None),
164
+ # The file is always required
165
+ file: UploadFile = File(...),
166
+ db: Session = Depends(get_db)
167
+ ):
168
+ # --- 1. Set Default Values & Validate User ---
169
+ # If a title wasn't provided (from email), create a default one.
170
+ final_title = title or f"Email Attachment - {file.filename}"
171
+
172
+ # If a user_id wasn't provided, it MUST be the automation user.
173
+ final_user_id = user_id or "automation_user"
174
+
175
+ # If a department wasn't provided, set it to be auto-detected by the pipeline.
176
+ final_department = department or "auto-detected"
177
+
178
+ # Now, use these final variables to validate the user
179
+ user = crud.get_user(db, user_id=final_user_id)
180
+ if not user:
181
+ raise HTTPException(status_code=404, detail=f"Uploader '{final_user_id}' not found")
182
+
183
+ # --- 2. Upload Original File to Cloud ---
184
+ print("Uploading original file to cloud storage...")
185
+ public_url = upload_file_to_supabase(file.file, file.filename)
186
+ if not public_url:
187
+ raise HTTPException(status_code=500, detail="Could not upload file to cloud storage.")
188
+ print("File uploaded successfully. Public URL:", public_url)
189
+
190
+ file.file.seek(0) # Rewind file for local processing
191
+
192
+ # --- 3. Create Initial Database Record ---
193
+ # This now correctly matches the function in crud.py (which should not take highlighted_file_path)
194
+ document_data = schemas.DocumentCreate(title=final_title, department=final_department)
195
+ db_document = crud.create_document(db=db, document=document_data, file_path=public_url, user_id=final_user_id)
196
+ print(f"Initial document record created in DB with ID: {db_document.id}")
197
+
198
+ # --- 4. Save Local Copy & Run ML Pipeline ---
199
+ local_file_path = os.path.join(UPLOAD_DIRECTORY, file.filename)
200
+ with open(local_file_path, "wb") as buffer:
201
+ shutil.copyfileobj(file.file, buffer)
202
+
203
+ print("Starting ML pipeline processing...")
204
+ ml_results = pipeline_process_pdf(
205
+ pdf_path=local_file_path,
206
+ clf_tokenizer=ml_models["tokenizer"],
207
+ clf_model=ml_models["model"],
208
+ nlp_model=ml_models["nlp_model"]
209
+ )
210
+ print("ML pipeline processing complete.")
211
+
212
+ # --- 5. Upload Highlighted PDF (if created) ---
213
+ highlighted_pdf_path = ml_results.get("highlighted_pdf")
214
+ highlighted_public_url = None
215
+ if highlighted_pdf_path and os.path.exists(highlighted_pdf_path):
216
+ print("Uploading highlighted file to cloud storage...")
217
+ with open(highlighted_pdf_path, "rb") as f:
218
+ highlighted_filename = os.path.basename(highlighted_pdf_path)
219
+ highlighted_public_url = upload_file_to_supabase(f, highlighted_filename)
220
+ print("Highlighted PDF uploaded successfully.")
221
+
222
+ # ... (after the ML pipeline runs) ...
223
+
224
+ # --- (6) UPDATE THE DATABASE RECORD WITH ML RESULTS ---
225
+ print("Updating database record with ML results...")
226
+ final_document = crud.update_document_with_ml_results(
227
+ db,
228
+ document_id=db_document.id,
229
+ ml_results=ml_results,
230
+ highlighted_file_path=highlighted_public_url
231
+ )
232
+ print("Database record updated successfully.")
233
+
234
+ # --- (7) CREATE NOTIFICATION FOR THE DEPARTMENT ---
235
+ # The ML results contain the department the document was routed to.
236
+ routed_department = final_document.department
237
+ if routed_department and routed_department != "Unknown":
238
+ notification_message = f"New document '{final_document.title}' has been assigned to your department."
239
+ crud.create_notification(
240
+ db=db,
241
+ document_id=final_document.id,
242
+ department=routed_department,
243
+ message=notification_message
244
+ )
245
+ print(f"Notification created for department: {routed_department}")
246
+
247
+
248
+ # --- 8. Cleanup Local Files ---
249
+ try:
250
+ if os.path.exists(local_file_path):
251
+ os.remove(local_file_path)
252
+ if highlighted_pdf_path and os.path.exists(highlighted_pdf_path):
253
+ os.remove(highlighted_pdf_path)
254
+ except OSError as e:
255
+ print(f"Error during file cleanup: {e}")
256
+
257
+ # --- 9. Return Final Response ---
258
+ return {
259
+ "message": "Document processed and all data saved successfully.",
260
+ "document_info": schemas.Document.model_validate(final_document),
261
+ "highlighted_pdf_url": highlighted_public_url
262
+ }
263
+
264
+ # --- Read Endpoints ---
265
+ @app.get("/documents/", response_model=list[schemas.Document])
266
+ def read_all_documents(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
267
+ documents = crud.get_all_documents(db, skip=skip, limit=limit)
268
+ return documents
269
+
270
+ @app.get("/documents/{department}", response_model=list[schemas.Document])
271
+ def read_documents_for_department(department: str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
272
+ documents = crud.get_documents_by_department(db, department=department, skip=skip, limit=limit)
273
+ return documents
274
+
275
+ # --- ADD THESE NEW ENDPOINTS FOR Q&A ---
276
+
277
+ def run_ml_qna_in_background(question_id: uuid.UUID, pinecone_pdf_id: str, question_text: str):
278
+ print(f"[BACKGROUND TASK] Starting ML RAG pipeline for question ID: {question_id}")
279
+
280
+ # Call the ML function to get the answer
281
+ answer_text = generate_ml_answer(
282
+ pdf_id=pinecone_pdf_id, # <--- This now uses the correct filename ID
283
+ query=question_text
284
+ )
285
+ print(f"[BACKGROUND TASK] Answer generated: {answer_text[:100]}...")
286
+
287
+ # Use the CRUD function to save the answer to the database
288
+ db = SessionLocal()
289
+ try:
290
+ # Use the new 'db' session to update the database
291
+ crud.update_question_with_answer(
292
+ db=db,
293
+ question_id=question_id,
294
+ answer_text=answer_text
295
+ )
296
+ print(f"[BACKGROUND TASK] Answer saved to database for question ID: {question_id}")
297
+ finally:
298
+ db.close()
299
+
300
+ @app.post("/documents/{document_id}/questions", response_model=schemas.Question)
301
+ def ask_question_on_document(
302
+ document_id: uuid.UUID,
303
+ question: schemas.QuestionCreate,
304
+ background_tasks: BackgroundTasks,
305
+ db: Session = Depends(get_db)
306
+ ):
307
+ """
308
+ Endpoint for the frontend to submit a new question.
309
+ It saves the question, calls the ML RAG pipeline to generate a real answer,
310
+ and saves the answer to the database.
311
+ """
312
+ # First, fetch the document to get its uploader's ID
313
+ document = crud.get_document_by_id(db, document_id=document_id)
314
+ if not document:
315
+ raise HTTPException(status_code=404, detail="Document not found")
316
+
317
+ user_id_who_asked = document.uploader_id
318
+
319
+ # Create the question in the database with a NULL answer first
320
+ db_question = crud.create_question(
321
+ db=db,
322
+ document_id=document_id,
323
+ user_id=user_id_who_asked,
324
+ question=question
325
+ )
326
+ print(f"New question saved with ID: {db_question.id}. Triggering background ML task.")
327
+
328
+
329
+ pinecone_pdf_id = os.path.splitext(os.path.basename(unquote(document.file_path)))[0]
330
+
331
+ background_tasks.add_task(
332
+ run_ml_qna_in_background,
333
+ db_question.id,
334
+ pinecone_pdf_id, # <--- Pass the correct filename ID
335
+ question.question_text,
336
+ )
337
+ # --- END OF KEY CHANGE ---
338
+
339
+ # Return the new question object to the frontend immediately.
340
+ # The frontend will see that `answer_text` is still null.
341
+ return db_question
342
+
343
+
344
+ @app.get("/documents/{document_id}/questions", response_model=List[schemas.Question])
345
+ def get_document_questions(
346
+ document_id: uuid.UUID,
347
+ db: Session = Depends(get_db) # Ensure there are no typos like 'get_d b'
348
+ ):
349
+ """
350
+ Endpoint for the frontend to retrieve the full conversation history
351
+ (all questions and their answers) for a document.
352
+ """
353
+ return crud.get_questions_for_document(db=db, document_id=document_id)
354
+
355
+
356
+ # --- ADD THIS NEW ENDPOINT FOR EMAIL AUTOMATION ---
357
+
358
+
359
+ @app.patch("/questions/{question_id}/answer")
360
+ def submit_answer(
361
+ question_id: uuid.UUID,
362
+ answer: schemas.Answer,
363
+ db: Session = Depends(get_db)
364
+ ):
365
+ """
366
+ INTERNAL ENDPOINT for the ML service to submit its generated answer
367
+ for a question that has already been created.
368
+ """
369
+ updated_question = crud.update_question_with_answer(
370
+ db=db,
371
+ question_id=question_id,
372
+ answer_text=answer.answer_text
373
+ )
374
+ if not updated_question:
375
+ raise HTTPException(status_code=404, detail="Question not found")
376
+
377
+ print(f"Answer submitted for question ID: {question_id}")
378
+ return {"status": "success", "question": updated_question}
379
+
380
+
381
+ # --- NEW ENDPOINT FOR NOTIFICATIONS ---
382
+ @app.get("/notifications/{department}", response_model=List[schemas.Notification])
383
+ def read_notifications(department: str, db: Session = Depends(get_db)):
384
+ """Fetches unread notifications for a given department."""
385
+ notifications = crud.get_notifications_for_department(db, department=department)
386
+ return notifications