venni16 commited on
Commit
49000c7
·
verified ·
1 Parent(s): 5335722

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +205 -203
app.py CHANGED
@@ -1,204 +1,206 @@
1
- from fastapi import FastAPI, UploadFile, File, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- import uvicorn
4
- import os
5
- import tempfile
6
- import aiofiles
7
- from datetime import datetime
8
- import traceback
9
- import logging
10
- from typing import List, Optional
11
-
12
- # Setup logging
13
- logging.basicConfig(level=logging.INFO)
14
- logger = logging.getLogger(__name__)
15
-
16
- app = FastAPI(title="Material Summarizer API")
17
-
18
- from dotenv import load_dotenv
19
- load_dotenv()
20
-
21
- # Get URLs from environment
22
- FRONTEND_URL = os.getenv('FRONTEND_URL')
23
- BACKEND_URL = os.getenv('BACKEND_URL', 'http://localhost:5000')
24
-
25
- # CORS middleware
26
- app.add_middleware(
27
- CORSMiddleware,
28
- allow_origins=["FRONTEND_URL, BACKEND_URL"], # Adjust in production
29
- allow_credentials=True,
30
- allow_methods=["*"],
31
- allow_headers=["*"],
32
- )
33
-
34
- # Import processing functions
35
- try:
36
- from document_parser import parse_document
37
- from summarizer import summarize_text
38
- from utils import chunked_summarize
39
- DEPENDENCIES_LOADED = True
40
- logger.info("All AI dependencies loaded successfully")
41
- except ImportError as e:
42
- logger.error(f"Import error: {e}")
43
- DEPENDENCIES_LOADED = False
44
-
45
- @app.get("/")
46
- async def root():
47
- return {"message": "Material Summarizer API", "status": "running"}
48
-
49
- @app.get("/health")
50
- async def health_check():
51
- status = "healthy" if DEPENDENCIES_LOADED else "missing_dependencies"
52
- return {
53
- "status": status,
54
- "service": "material-summarizer",
55
- "dependencies_loaded": DEPENDENCIES_LOADED
56
- }
57
-
58
- @app.post("/summarize-document")
59
- async def summarize_document(
60
- file: UploadFile = File(...),
61
- max_summary_length: Optional[int] = 1000,
62
- chunk_size: Optional[int] = 1500
63
- ):
64
- """
65
- Summarize uploaded document (PDF, DOCX, TXT, etc.)
66
- """
67
- if not DEPENDENCIES_LOADED:
68
- raise HTTPException(
69
- status_code=500,
70
- detail="Required AI dependencies not loaded. Check server logs."
71
- )
72
-
73
- temp_file_path = None
74
-
75
- try:
76
- # Validate file type
77
- allowed_extensions = {'.pdf', '.docx', '.doc', '.txt', '.pptx', '.ppt'}
78
- file_extension = os.path.splitext(file.filename)[1].lower()
79
-
80
- if file_extension not in allowed_extensions:
81
- raise HTTPException(
82
- status_code=400,
83
- detail=f"Unsupported document format. Allowed: {', '.join(allowed_extensions)}"
84
- )
85
-
86
- # Create temporary file
87
- temp_file_path = f"temp_{file.filename}"
88
-
89
- # Save uploaded file
90
- logger.info(f"Saving uploaded file: {file.filename}")
91
- async with aiofiles.open(temp_file_path, 'wb') as out_file:
92
- content = await file.read()
93
- await out_file.write(content)
94
-
95
- start_time = datetime.now()
96
-
97
- # 1. Parse document
98
- logger.info("Step 1: Parsing document...")
99
- if not os.path.exists(temp_file_path):
100
- raise HTTPException(status_code=500, detail="Document file not found after upload")
101
-
102
- document_text = parse_document(temp_file_path, file_extension)
103
- logger.info(f"Extracted text length: {len(document_text)} characters")
104
-
105
- if not document_text or len(document_text.strip()) < 10:
106
- raise HTTPException(status_code=500, detail="Document parsing failed or content too short")
107
-
108
- # 2. Summarize text with chunking
109
- logger.info("Step 2: Generating summary...")
110
-
111
- def custom_summarize_func(text):
112
- return summarize_text(
113
- text,
114
- model_name="facebook/bart-large-cnn",
115
- max_length=max_summary_length,
116
- min_length=min(100, max_summary_length // 3)
117
- )
118
-
119
- final_summary = chunked_summarize(
120
- text=document_text,
121
- summarize_func=custom_summarize_func,
122
- max_chunk_size=chunk_size
123
- )
124
-
125
- if not final_summary or len(final_summary.strip()) < 10:
126
- raise HTTPException(status_code=500, detail="Summary generation failed")
127
-
128
- processing_time = (datetime.now() - start_time).total_seconds()
129
-
130
- logger.info(f"Summarization completed in {processing_time:.2f} seconds")
131
-
132
- return {
133
- "success": True,
134
- "summary": final_summary,
135
- "original_length": len(document_text),
136
- "summary_length": len(final_summary),
137
- "processing_time": processing_time,
138
- "file_type": file_extension
139
- }
140
-
141
- except HTTPException:
142
- raise
143
- except Exception as e:
144
- logger.error(f"Error processing document: {str(e)}")
145
- logger.error(traceback.format_exc())
146
- raise HTTPException(
147
- status_code=500,
148
- detail=f"Document processing failed: {str(e)}"
149
- )
150
- finally:
151
- # Cleanup temporary files
152
- try:
153
- if temp_file_path and os.path.exists(temp_file_path):
154
- os.remove(temp_file_path)
155
- logger.info(f"Cleaned up: {temp_file_path}")
156
- except Exception as cleanup_error:
157
- logger.error(f"Cleanup error: {cleanup_error}")
158
-
159
- @app.post("/batch-summarize")
160
- async def batch_summarize_documents(files: List[UploadFile] = File(...)):
161
- """
162
- Summarize multiple documents in batch
163
- """
164
- if not DEPENDENCIES_LOADED:
165
- raise HTTPException(
166
- status_code=500,
167
- detail="Required AI dependencies not loaded. Check server logs."
168
- )
169
-
170
- results = []
171
-
172
- for file in files:
173
- try:
174
- # Use the single document summarization function
175
- result = await summarize_document(file)
176
- result["filename"] = file.filename
177
- results.append(result)
178
- except Exception as e:
179
- results.append({
180
- "success": False,
181
- "filename": file.filename,
182
- "error": str(e)
183
- })
184
-
185
- return {
186
- "success": True,
187
- "processed_files": len(results),
188
- "results": results
189
- }
190
-
191
- if __name__ == "__main__":
192
- logger.info("Starting Material Summarizer Server...")
193
- logger.info("Dependencies loaded: %s", DEPENDENCIES_LOADED)
194
-
195
- if not DEPENDENCIES_LOADED:
196
- logger.error("CRITICAL: AI dependencies not loaded. Document processing will not work!")
197
-
198
- port = int(os.environ.get("MATERIAL_PORT", 7861))
199
- uvicorn.run(
200
- "app:app",
201
- host="0.0.0.0",
202
- port=port,
203
- reload=False
 
 
204
  )
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import warnings
4
+ warnings.filterwarnings("ignore", message=".*_pytree_node.*")
5
+ import uvicorn
6
+ import os
7
+ import tempfile
8
+ import aiofiles
9
+ from datetime import datetime
10
+ import traceback
11
+ import logging
12
+ from typing import List, Optional
13
+
14
+ # Setup logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ app = FastAPI(title="Material Summarizer API")
19
+
20
+ from dotenv import load_dotenv
21
+ load_dotenv()
22
+
23
+ # Get URLs from environment
24
+ FRONTEND_URL = os.getenv('FRONTEND_URL')
25
+ BACKEND_URL = os.getenv('BACKEND_URL', 'http://localhost:5000')
26
+
27
+ # CORS middleware
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["FRONTEND_URL, BACKEND_URL"], # Adjust in production
31
+ allow_credentials=True,
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Import processing functions
37
+ try:
38
+ from document_parser import parse_document
39
+ from summarizer import summarize_text
40
+ from utils import chunked_summarize
41
+ DEPENDENCIES_LOADED = True
42
+ logger.info("All AI dependencies loaded successfully")
43
+ except ImportError as e:
44
+ logger.error(f"Import error: {e}")
45
+ DEPENDENCIES_LOADED = False
46
+
47
+ @app.get("/")
48
+ async def root():
49
+ return {"message": "Material Summarizer API", "status": "running"}
50
+
51
+ @app.get("/health")
52
+ async def health_check():
53
+ status = "healthy" if DEPENDENCIES_LOADED else "missing_dependencies"
54
+ return {
55
+ "status": status,
56
+ "service": "material-summarizer",
57
+ "dependencies_loaded": DEPENDENCIES_LOADED
58
+ }
59
+
60
+ @app.post("/summarize-document")
61
+ async def summarize_document(
62
+ file: UploadFile = File(...),
63
+ max_summary_length: Optional[int] = 1000,
64
+ chunk_size: Optional[int] = 1500
65
+ ):
66
+ """
67
+ Summarize uploaded document (PDF, DOCX, TXT, etc.)
68
+ """
69
+ if not DEPENDENCIES_LOADED:
70
+ raise HTTPException(
71
+ status_code=500,
72
+ detail="Required AI dependencies not loaded. Check server logs."
73
+ )
74
+
75
+ temp_file_path = None
76
+
77
+ try:
78
+ # Validate file type
79
+ allowed_extensions = {'.pdf', '.docx', '.doc', '.txt', '.pptx', '.ppt'}
80
+ file_extension = os.path.splitext(file.filename)[1].lower()
81
+
82
+ if file_extension not in allowed_extensions:
83
+ raise HTTPException(
84
+ status_code=400,
85
+ detail=f"Unsupported document format. Allowed: {', '.join(allowed_extensions)}"
86
+ )
87
+
88
+ # Create temporary file
89
+ temp_file_path = f"temp_{file.filename}"
90
+
91
+ # Save uploaded file
92
+ logger.info(f"Saving uploaded file: {file.filename}")
93
+ async with aiofiles.open(temp_file_path, 'wb') as out_file:
94
+ content = await file.read()
95
+ await out_file.write(content)
96
+
97
+ start_time = datetime.now()
98
+
99
+ # 1. Parse document
100
+ logger.info("Step 1: Parsing document...")
101
+ if not os.path.exists(temp_file_path):
102
+ raise HTTPException(status_code=500, detail="Document file not found after upload")
103
+
104
+ document_text = parse_document(temp_file_path, file_extension)
105
+ logger.info(f"Extracted text length: {len(document_text)} characters")
106
+
107
+ if not document_text or len(document_text.strip()) < 10:
108
+ raise HTTPException(status_code=500, detail="Document parsing failed or content too short")
109
+
110
+ # 2. Summarize text with chunking
111
+ logger.info("Step 2: Generating summary...")
112
+
113
+ def custom_summarize_func(text):
114
+ return summarize_text(
115
+ text,
116
+ model_name="facebook/bart-large-cnn",
117
+ max_length=max_summary_length,
118
+ min_length=min(100, max_summary_length // 3)
119
+ )
120
+
121
+ final_summary = chunked_summarize(
122
+ text=document_text,
123
+ summarize_func=custom_summarize_func,
124
+ max_chunk_size=chunk_size
125
+ )
126
+
127
+ if not final_summary or len(final_summary.strip()) < 10:
128
+ raise HTTPException(status_code=500, detail="Summary generation failed")
129
+
130
+ processing_time = (datetime.now() - start_time).total_seconds()
131
+
132
+ logger.info(f"Summarization completed in {processing_time:.2f} seconds")
133
+
134
+ return {
135
+ "success": True,
136
+ "summary": final_summary,
137
+ "original_length": len(document_text),
138
+ "summary_length": len(final_summary),
139
+ "processing_time": processing_time,
140
+ "file_type": file_extension
141
+ }
142
+
143
+ except HTTPException:
144
+ raise
145
+ except Exception as e:
146
+ logger.error(f"Error processing document: {str(e)}")
147
+ logger.error(traceback.format_exc())
148
+ raise HTTPException(
149
+ status_code=500,
150
+ detail=f"Document processing failed: {str(e)}"
151
+ )
152
+ finally:
153
+ # Cleanup temporary files
154
+ try:
155
+ if temp_file_path and os.path.exists(temp_file_path):
156
+ os.remove(temp_file_path)
157
+ logger.info(f"Cleaned up: {temp_file_path}")
158
+ except Exception as cleanup_error:
159
+ logger.error(f"Cleanup error: {cleanup_error}")
160
+
161
+ @app.post("/batch-summarize")
162
+ async def batch_summarize_documents(files: List[UploadFile] = File(...)):
163
+ """
164
+ Summarize multiple documents in batch
165
+ """
166
+ if not DEPENDENCIES_LOADED:
167
+ raise HTTPException(
168
+ status_code=500,
169
+ detail="Required AI dependencies not loaded. Check server logs."
170
+ )
171
+
172
+ results = []
173
+
174
+ for file in files:
175
+ try:
176
+ # Use the single document summarization function
177
+ result = await summarize_document(file)
178
+ result["filename"] = file.filename
179
+ results.append(result)
180
+ except Exception as e:
181
+ results.append({
182
+ "success": False,
183
+ "filename": file.filename,
184
+ "error": str(e)
185
+ })
186
+
187
+ return {
188
+ "success": True,
189
+ "processed_files": len(results),
190
+ "results": results
191
+ }
192
+
193
+ if __name__ == "__main__":
194
+ logger.info("Starting Material Summarizer Server...")
195
+ logger.info("Dependencies loaded: %s", DEPENDENCIES_LOADED)
196
+
197
+ if not DEPENDENCIES_LOADED:
198
+ logger.error("CRITICAL: AI dependencies not loaded. Document processing will not work!")
199
+
200
+ port = int(os.environ.get("MATERIAL_PORT", 7861))
201
+ uvicorn.run(
202
+ "app:app",
203
+ host="0.0.0.0",
204
+ port=port,
205
+ reload=False
206
  )