Spaces:
Running
Running
| import runpod | |
| import base64 | |
| import fitz # PyMuPDF | |
| def handler(job): | |
| """Simple PDF to text handler for testing""" | |
| try: | |
| job_input = job["input"] | |
| # Get PDF data from base64 | |
| pdf_base64 = job_input.get("pdf_base64") | |
| filename = job_input.get("filename", "document.pdf") | |
| if not pdf_base64: | |
| return {"error": "No PDF data provided", "status": "failed"} | |
| # Decode base64 PDF | |
| pdf_data = base64.b64decode(pdf_base64) | |
| # Extract text using PyMuPDF | |
| doc = fitz.open(stream=pdf_data, filetype="pdf") | |
| text_content = "" | |
| for page_num, page in enumerate(doc): | |
| text_content += f"\n\n--- Page {page_num + 1} ---\n\n" | |
| text_content += page.get_text() | |
| doc.close() | |
| # Convert to simple markdown | |
| markdown_content = f"# {filename}\n\n" | |
| markdown_content += f"*Extracted using PyMuPDF (simplified version)*\n\n" | |
| markdown_content += text_content | |
| return { | |
| "markdown": markdown_content, | |
| "filename": filename, | |
| "status": "success", | |
| "pages": len(doc) | |
| } | |
| except Exception as e: | |
| return { | |
| "error": str(e), | |
| "status": "failed" | |
| } | |
| # RunPod serverless entrypoint | |
| runpod.serverless.start({"handler": handler}) |