Spaces:

kawaiipeace
/

vms-fleet-receipt-reading

Runtime error

App Files Files Community

kawaiipeace commited on Jun 26

Commit

1064ef9

1 Parent(s): 3470a1b

model locally

Browse files

Files changed (4) hide show

app.py +59 -61
app_bk.py +121 -0
dockerfile.hf +10 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,43 +1,39 @@
 import os
-from fastapi import FastAPI, HTTPException, Header, UploadFile, File
 from fastapi.middleware.cors import CORSMiddleware
-import gradio as gr
-from typhoon_ocr import ocr_document
 from pdf2image import convert_from_bytes
 from PIL import Image
-import re
-from dotenv import load_dotenv
-# --- Load environment variables from .env ---
-load_dotenv()
-# --- Config ---
-API_KEY = os.getenv("API_KEY")
-TYPHOON_API_KEY = os.getenv("TYPHOON_OCR_API_KEY")
-TYPHOON_BASE_URL = os.getenv("TYPHOON_BASE_URL", "https://api.opentyphoon.ai/v1")
-# --- FastAPI App ---
-app = FastAPI()
-# CORS (optional for public usage)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
 def extract_fields_regex(text: str) -> dict:
-    # Preprocess text
-    text = re.sub(r"<.*?>", "", text)                  # Strip tags
-    text = re.sub(r"\n+", "\n", text)                  # Collapse newlines
-    text = re.sub(r"\s{2,}", " ", text)                # Collapse multiple spaces
-    text = re.sub(r"\t+", " ", text)
     patterns = {
         "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
-        # Updated pattern for correct tax invoice number
         "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
         "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
         "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
@@ -51,71 +47,73 @@ def extract_fields_regex(text: str) -> dict:
     for field, pattern in patterns.items():
         match = re.search(pattern, text, re.IGNORECASE)
         results[field] = match.group(1).strip() if match else None
-    # Optional fallback if regex fails
-    # if not results["เลขที่ใบกำกับภาษี"]:
-    #     match = re.search(r"TAX\s*INV\.?\s*</td>\s*<td>\s*([\d\-]+)", text, re.IGNORECASE)
-    # if match:
-    #     results["เลขที่ใบกำกับภาษี"] = match.group(1).strip()
     return results
 def pdf_to_image(file_bytes: bytes) -> Image.Image:
     images = convert_from_bytes(file_bytes)
-    return images[0]  # First page only
-# --- API Endpoint ---
-@app.post("/api/ocr_receipt")
-async def ocr_receipt(
-    file: UploadFile = File(...),
-    x_api_key: str | None = Header(None),
-):
-    if API_KEY and x_api_key != API_KEY:
-        raise HTTPException(status_code=401, detail="Invalid API key")
     content = await file.read()
     try:
-        # Handle PDF and image
         if file.filename.lower().endswith(".pdf"):
             image = pdf_to_image(content)
-            raw_output = ocr_document(image, task_type="structure")
         else:
-            raw_output = ocr_document(content, task_type="structure")
-        text = raw_output if isinstance(raw_output, str) else raw_output.get("text", "")
-        extracted = extract_fields_regex(text)
         return {
             "raw_ocr": text,
-            "extracted_fields": extracted,
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-# --- Gradio UI ---
-def gradio_interface(image_path: str | Image.Image):
     if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
         with open(image_path, "rb") as f:
             image = pdf_to_image(f.read())
     else:
         image = image_path
-    raw = ocr_document(image, task_type="structure")
-    text = raw if isinstance(raw, str) else raw.get("text", "")
     extracted = extract_fields_regex(text)
     return text, extracted
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧾 แปลงและตรวจสอบใบเสร็จ")
     with gr.Row():
         img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
-        out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=10)
-        out_fields = gr.JSON(label="ข้อความที่ดึงออกมา")
-    btn = gr.Button("ประมวลผลใบเสร็จ")
     btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
-# --- Mount Gradio on FastAPI ---
-# app = gr.mount_gradio_app(app, demo, path="/ui")
-demo.launch(share=False)

 import os
+import re
+from fastapi import FastAPI, HTTPException, UploadFile, File
 from fastapi.middleware.cors import CORSMiddleware
 from pdf2image import convert_from_bytes
 from PIL import Image
+from transformers import pipeline
+import torch
+import gradio as gr
+# -------------------------
+# Load Hugging Face Model
+# -------------------------
+ocr_pipeline = pipeline(
+    task="document-question-answering",
+    model="scb10x/typhoon-ocr-7b",
+    device=0 if torch.cuda.is_available() else -1
 )
+# -------------------------
+# OCR Wrapper
+# -------------------------
+def ocr_document(image):
+    result = ocr_pipeline(image)
+    return result[0]["generated_text"] if result and isinstance(result, list) else str(result)
+# -------------------------
+# Regex Field Extractor
+# -------------------------
 def extract_fields_regex(text: str) -> dict:
+    text = re.sub(r"<.*?>", "", text)       # remove tags
+    text = re.sub(r"\s{2,}", " ", text)     # collapse spaces
+    text = re.sub(r"\n{2,}", "\n", text)    # collapse newlines
     patterns = {
         "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
         "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
         "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
         "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
     for field, pattern in patterns.items():
         match = re.search(pattern, text, re.IGNORECASE)
         results[field] = match.group(1).strip() if match else None
     return results
+# -------------------------
+# PDF Handling
+# -------------------------
 def pdf_to_image(file_bytes: bytes) -> Image.Image:
     images = convert_from_bytes(file_bytes)
+    return images[0]  # first page only
+# -------------------------
+# FastAPI App
+# -------------------------
+app = FastAPI()
+# Optional: Allow all CORS (customize for security)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.post("/api/ocr_receipt")
+async def ocr_receipt(file: UploadFile = File(...)):
     content = await file.read()
     try:
+        # Convert PDF to image or use image directly
         if file.filename.lower().endswith(".pdf"):
             image = pdf_to_image(content)
         else:
+            image = Image.open(file.file)
+        text = ocr_document(image)
+        fields = extract_fields_regex(text)
         return {
             "raw_ocr": text,
+            "extracted_fields": fields,
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+# -------------------------
+# Gradio UI
+# -------------------------
+def gradio_interface(image_path):
     if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
         with open(image_path, "rb") as f:
             image = pdf_to_image(f.read())
     else:
         image = image_path
+    text = ocr_document(image)
     extracted = extract_fields_regex(text)
     return text, extracted
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧾 OCR ใบเสร็จ (Thai Receipt Scanner)")
     with gr.Row():
         img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
+        out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=12)
+        out_fields = gr.JSON(label="ฟิลด์ที่ดึงออกมา")
+    btn = gr.Button("ประมวลผล")
     btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
+# For Hugging Face Spaces — no uvicorn needed
+demo.launch()

app_bk.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+from fastapi import FastAPI, HTTPException, Header, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+import gradio as gr
+from typhoon_ocr import ocr_document
+from pdf2image import convert_from_bytes
+from PIL import Image
+import re
+from dotenv import load_dotenv
+# --- Load environment variables from .env ---
+load_dotenv()
+# --- Config ---
+API_KEY = os.getenv("API_KEY")
+TYPHOON_API_KEY = os.getenv("TYPHOON_OCR_API_KEY")
+TYPHOON_BASE_URL = os.getenv("TYPHOON_BASE_URL", "https://api.opentyphoon.ai/v1")
+# --- FastAPI App ---
+app = FastAPI()
+# CORS (optional for public usage)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def extract_fields_regex(text: str) -> dict:
+    # Preprocess text
+    text = re.sub(r"<.*?>", "", text)                  # Strip tags
+    text = re.sub(r"\n+", "\n", text)                  # Collapse newlines
+    text = re.sub(r"\s{2,}", " ", text)                # Collapse multiple spaces
+    text = re.sub(r"\t+", " ", text)
+    patterns = {
+        "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
+        # Updated pattern for correct tax invoice number
+        "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
+        "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
+        "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
+        "ลิตร": r"(?:Ltr\.?|Ltrs?\.?|ลิตร)[\s:\-\.]*([\d,]+\.\d{3})",
+        "ภาษีมูลค่าเพิ่ม": r"(?:VAT|ภาษีมูลค่าเพิ่ม)[\s:\-\.]*([\d,]+\.\d{2})",
+        "ยอดรวม": r"(?:TOTAL\s*THB|ยอดรวม|รวมทั้งสิ้น|รวมเงินทั้งสิ้น)[\s:\-\.]*([\d,]+\.\d{2})",
+        "วันที่": r"(?:DATE|วันที่|ออกใบกำกับวันที่)[\s:\-\.]*([\d]{2}/[\d]{2}/[\d]{2,4})",
+    }
+    results = {}
+    for field, pattern in patterns.items():
+        match = re.search(pattern, text, re.IGNORECASE)
+        results[field] = match.group(1).strip() if match else None
+    # Optional fallback if regex fails
+    # if not results["เลขที่ใบกำกับภาษี"]:
+    #     match = re.search(r"TAX\s*INV\.?\s*</td>\s*<td>\s*([\d\-]+)", text, re.IGNORECASE)
+    # if match:
+    #     results["เลขที่ใบกำกับภาษี"] = match.group(1).strip()
+    return results
+def pdf_to_image(file_bytes: bytes) -> Image.Image:
+    images = convert_from_bytes(file_bytes)
+    return images[0]  # First page only
+# --- API Endpoint ---
+@app.post("/api/ocr_receipt")
+async def ocr_receipt(
+    file: UploadFile = File(...),
+    x_api_key: str | None = Header(None),
+):
+    if API_KEY and x_api_key != API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    content = await file.read()
+    try:
+        # Handle PDF and image
+        if file.filename.lower().endswith(".pdf"):
+            image = pdf_to_image(content)
+            raw_output = ocr_document(image, task_type="structure")
+        else:
+            raw_output = ocr_document(content, task_type="structure")
+        text = raw_output if isinstance(raw_output, str) else raw_output.get("text", "")
+        extracted = extract_fields_regex(text)
+        return {
+            "raw_ocr": text,
+            "extracted_fields": extracted,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# --- Gradio UI ---
+def gradio_interface(image_path: str | Image.Image):
+    if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
+        with open(image_path, "rb") as f:
+            image = pdf_to_image(f.read())
+    else:
+        image = image_path
+    raw = ocr_document(image, task_type="structure")
+    text = raw if isinstance(raw, str) else raw.get("text", "")
+    extracted = extract_fields_regex(text)
+    return text, extracted
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧾 แปลงและตรวจสอบใบเสร็จ")
+    with gr.Row():
+        img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
+        out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=10)
+        out_fields = gr.JSON(label="ข้อความที่ดึงออกมา")
+    btn = gr.Button("ประมวลผลใบเสร็จ")
+    btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
+# --- Mount Gradio on FastAPI ---
+# app = gr.mount_gradio_app(app, demo, path="/ui")
+demo.launch(share=False)

dockerfile.hf ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM huggingface/transformers-pytorch-gpu:4.41.1
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --upgrade pip && pip install -r requirements.txt
+COPY . .
+CMD ["python3", "app.py"]

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ pdf2image
 Pillow
 python-dotenv
 openai

 Pillow
 python-dotenv
 openai
+transformers>=4.41.1