Spaces:
Running
Running
| from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks | |
| from fastapi.responses import HTMLResponse, FileResponse | |
| import os | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import asyncio | |
| from typing import Dict, Optional | |
| import uuid | |
| app = FastAPI(title="MinerU PDF Converter", version="0.2.0") | |
| async def root(): | |
| """Simple hello world endpoint""" | |
| return { | |
| "message": "Hello World from MinerU PDF Converter!", | |
| "status": "running", | |
| "environment": os.environ.get("SPACE_ID", "local") | |
| } | |
| async def health_check(): | |
| """Health check endpoint""" | |
| return {"status": "healthy", "service": "pdf2md"} | |
| async def test_page(): | |
| """Simple HTML test page""" | |
| return """ | |
| <html> | |
| <head> | |
| <title>PDF to Markdown - Test</title> | |
| <style> | |
| body { | |
| font-family: Arial, sans-serif; | |
| max-width: 800px; | |
| margin: 0 auto; | |
| padding: 20px; | |
| } | |
| .status { | |
| background: #e8f5e9; | |
| padding: 10px; | |
| border-radius: 5px; | |
| margin: 20px 0; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>PDF to Markdown Converter</h1> | |
| <div class="status"> | |
| ✅ Service is running! | |
| </div> | |
| <p>This is a test deployment. Full functionality coming soon.</p> | |
| <p> | |
| <a href="/docs">API Documentation</a> | | |
| <a href="/health">Health Check</a> | |
| </p> | |
| </body> | |
| </html> | |
| """ | |
| async def api_info(): | |
| """API information endpoint""" | |
| return { | |
| "name": "PDF to Markdown Converter API", | |
| "version": "0.2.0", | |
| "endpoints": { | |
| "/": "Main endpoint", | |
| "/health": "Health check", | |
| "/test": "Test HTML page", | |
| "/docs": "FastAPI automatic documentation", | |
| "/api/info": "This endpoint", | |
| "/api/convert": "Convert PDF to Markdown (POST)", | |
| "/api/status/{task_id}": "Check conversion status", | |
| "/api/download/{task_id}": "Download converted markdown" | |
| } | |
| } | |
| # Store for conversion tasks | |
| conversion_tasks: Dict[str, dict] = {} | |
| async def convert_pdf( | |
| background_tasks: BackgroundTasks, | |
| file: UploadFile = File(...) | |
| ): | |
| """Convert PDF to Markdown""" | |
| if not file.filename.endswith('.pdf'): | |
| raise HTTPException(status_code=400, detail="Only PDF files are supported") | |
| # Generate unique task ID | |
| task_id = str(uuid.uuid4()) | |
| # Save uploaded file | |
| temp_dir = Path(tempfile.mkdtemp()) | |
| pdf_path = temp_dir / file.filename | |
| try: | |
| with open(pdf_path, "wb") as buffer: | |
| shutil.copyfileobj(file.file, buffer) | |
| except Exception as e: | |
| shutil.rmtree(temp_dir) | |
| raise HTTPException(status_code=500, detail=f"Failed to save file: {str(e)}") | |
| # Initialize task status | |
| conversion_tasks[task_id] = { | |
| "status": "processing", | |
| "filename": file.filename, | |
| "result": None, | |
| "error": None, | |
| "temp_dir": str(temp_dir) | |
| } | |
| # Start conversion in background | |
| background_tasks.add_task(process_pdf_conversion, task_id, str(pdf_path)) | |
| return { | |
| "task_id": task_id, | |
| "status": "processing", | |
| "message": "PDF conversion started", | |
| "check_status_url": f"/api/status/{task_id}" | |
| } | |
| async def process_pdf_conversion(task_id: str, pdf_path: str): | |
| """Process PDF conversion in background""" | |
| try: | |
| # For now, just simulate conversion | |
| await asyncio.sleep(2) # Simulate processing | |
| # Create a dummy markdown file | |
| output_path = Path(pdf_path).with_suffix('.md') | |
| with open(output_path, 'w') as f: | |
| f.write(f"# Converted from {Path(pdf_path).name}\n\n") | |
| f.write("This is a placeholder conversion. Full MinerU integration coming soon.\n") | |
| conversion_tasks[task_id]["status"] = "completed" | |
| conversion_tasks[task_id]["result"] = str(output_path) | |
| except Exception as e: | |
| conversion_tasks[task_id]["status"] = "failed" | |
| conversion_tasks[task_id]["error"] = str(e) | |
| async def get_conversion_status(task_id: str): | |
| """Check conversion status""" | |
| if task_id not in conversion_tasks: | |
| raise HTTPException(status_code=404, detail="Task not found") | |
| task = conversion_tasks[task_id] | |
| response = { | |
| "task_id": task_id, | |
| "status": task["status"], | |
| "filename": task["filename"] | |
| } | |
| if task["status"] == "completed": | |
| response["download_url"] = f"/api/download/{task_id}" | |
| elif task["status"] == "failed": | |
| response["error"] = task["error"] | |
| return response | |
| async def download_converted_file(task_id: str): | |
| """Download converted markdown file""" | |
| if task_id not in conversion_tasks: | |
| raise HTTPException(status_code=404, detail="Task not found") | |
| task = conversion_tasks[task_id] | |
| if task["status"] != "completed": | |
| raise HTTPException(status_code=400, detail="Conversion not completed") | |
| if not task["result"] or not Path(task["result"]).exists(): | |
| raise HTTPException(status_code=404, detail="Converted file not found") | |
| return FileResponse( | |
| task["result"], | |
| media_type="text/markdown", | |
| filename=Path(task["result"]).name | |
| ) |