from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.middleware.cors import CORSMiddleware import warnings warnings.filterwarnings("ignore", message=".*_pytree_node.*") import uvicorn import os import tempfile import aiofiles from datetime import datetime import traceback import logging from typing import List, Optional import time from fastapi.responses import JSONResponse # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="Material Summarizer API") from dotenv import load_dotenv load_dotenv() # Get URLs from environment FRONTEND_URL = os.getenv('FRONTEND_URL') BACKEND_URL = os.getenv('BACKEND_URL', 'http://localhost:5000') # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=[FRONTEND_URL, BACKEND_URL], # Adjust in production allow_credentials=True, allow_methods=["*"], allow_headers=["*"], max_age=600, ) # Import processing functions try: from document_parser import parse_document from summarizer import summarize_text from utils import chunked_summarize DEPENDENCIES_LOADED = True logger.info("All AI dependencies loaded successfully") except ImportError as e: logger.error(f"Import error: {e}") DEPENDENCIES_LOADED = False @app.on_event("startup") async def startup_event(): """Handle startup events""" logger.info("Application startup initiated") # Load model on startup to avoid cold start delays try: from summarizer import get_summarizer get_summarizer() # Pre-load the model logger.info("Models pre-loaded successfully") except Exception as e: logger.warning(f"Model pre-loading failed: {e}") @app.get("/") async def root(): return {"message": "Material Summarizer API", "status": "running"} @app.get("/health") async def health_check(): """Health check endpoint specifically for Hugging Face Spaces""" status = "healthy" if DEPENDENCIES_LOADED else "missing_dependencies" return JSONResponse( content={ "status": status, "service": "material-summarizer", "dependencies_loaded": DEPENDENCIES_LOADED, "timestamp": time.time() }, status_code=200 if DEPENDENCIES_LOADED else 503 ) @app.get("/ping") async def ping(): """Simple ping endpoint for load balancers""" return JSONResponse( content={"status": "ok", "timestamp": time.time()}, status_code=200 ) @app.post("/summarize-document") async def summarize_document( file: UploadFile = File(...), max_summary_length: Optional[int] = 1000, chunk_size: Optional[int] = 1500 ): """ Summarize uploaded document (PDF, DOCX, TXT, etc.) """ if not DEPENDENCIES_LOADED: raise HTTPException( status_code=500, detail="Required AI dependencies not loaded. Check server logs." ) temp_file_path = None try: # Validate file type allowed_extensions = {'.pdf', '.docx', '.doc', '.txt', '.pptx', '.ppt'} file_extension = os.path.splitext(file.filename)[1].lower() if file_extension not in allowed_extensions: raise HTTPException( status_code=400, detail=f"Unsupported document format. Allowed: {', '.join(allowed_extensions)}" ) # Create temporary file temp_file_path = f"temp_{file.filename}" # Save uploaded file logger.info(f"Saving uploaded file: {file.filename}") async with aiofiles.open(temp_file_path, 'wb') as out_file: content = await file.read() await out_file.write(content) start_time = datetime.now() # 1. Parse document logger.info("Step 1: Parsing document...") if not os.path.exists(temp_file_path): raise HTTPException(status_code=500, detail="Document file not found after upload") document_text = parse_document(temp_file_path, file_extension) logger.info(f"Extracted text length: {len(document_text)} characters") if not document_text or len(document_text.strip()) < 10: raise HTTPException(status_code=500, detail="Document parsing failed or content too short") # 2. Summarize text with chunking logger.info("Step 2: Generating summary...") def custom_summarize_func(text): return summarize_text( text, model_name="facebook/bart-large-cnn", max_length=max_summary_length, min_length=min(100, max_summary_length // 3) ) final_summary = chunked_summarize( text=document_text, summarize_func=custom_summarize_func, max_chunk_size=chunk_size ) if not final_summary or len(final_summary.strip()) < 10: raise HTTPException(status_code=500, detail="Summary generation failed") processing_time = (datetime.now() - start_time).total_seconds() logger.info(f"Summarization completed in {processing_time:.2f} seconds") return { "success": True, "summary": final_summary, "original_length": len(document_text), "summary_length": len(final_summary), "processing_time": processing_time, "file_type": file_extension } except HTTPException: raise except Exception as e: logger.error(f"Error processing document: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Document processing failed: {str(e)}" ) finally: # Cleanup temporary files try: if temp_file_path and os.path.exists(temp_file_path): os.remove(temp_file_path) logger.info(f"Cleaned up: {temp_file_path}") except Exception as cleanup_error: logger.error(f"Cleanup error: {cleanup_error}") @app.post("/batch-summarize") async def batch_summarize_documents(files: List[UploadFile] = File(...)): """ Summarize multiple documents in batch """ if not DEPENDENCIES_LOADED: raise HTTPException( status_code=500, detail="Required AI dependencies not loaded. Check server logs." ) results = [] for file in files: try: # Use the single document summarization function result = await summarize_document(file) result["filename"] = file.filename results.append(result) except Exception as e: results.append({ "success": False, "filename": file.filename, "error": str(e) }) return { "success": True, "processed_files": len(results), "results": results } if __name__ == "__main__": logger.info("Starting Material Summarizer Server...") logger.info("Dependencies loaded: %s", DEPENDENCIES_LOADED) if not DEPENDENCIES_LOADED: logger.error("CRITICAL: AI dependencies not loaded. Document processing will not work!") port = int(os.environ.get("MATERIAL_PORT", 7860)) uvicorn.run( "app:app", host="0.0.0.0", port=port, reload=False )