Spaces:
Running
Running
File size: 2,113 Bytes
8d056b1 97b3d3f 8d056b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import re
from pathlib import Path
import jaconv
import torch
from PIL import Image
from transformers import AutoTokenizer, AutoImageProcessor, AutoModelForVision2Seq
pretrained_model_name_or_path="jzhang533/manga-ocr-base-2025"
feature_extractor = AutoImageProcessor.from_pretrained(pretrained_model_name_or_path, use_fast=True)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
model = AutoModelForVision2Seq.from_pretrained(pretrained_model_name_or_path)
def post_process(text):
text = "".join(text.split())
text = text.replace("…", "...")
text = re.sub("[・.]{2,}", lambda x: (x.end() - x.start()) * ".", text)
text = jaconv.h2z(text, ascii=True, digit=True)
return text
def inference(img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
elif isinstance(img_or_path, Image.Image):
img = img_or_path
else:
raise ValueError(f"img_or_path must be a path or PIL.Image, instead got: {img_or_path}")
pixel_values = feature_extractor(img, return_tensors="pt").pixel_values
x = pixel_values.squeeze()
x = model.generate(x[None], max_length=300)[0].cpu()
x = tokenizer.decode(x, skip_special_tokens=True)
x = post_process(x)
return x
title = 'MangaOCR demo'
description = '''
- This is derived from : <https://github.com/kha-white/manga-ocr>
- The model being used : <https://huggingface.co/jzhang533/manga-ocr-base-2025> (trained using scripts in [kha-white/manga-ocr](https://github.com/kha-white/manga-ocr) with several tweaks)
- Dataset being used to train the model: [manga109-s](http://www.manga109.org/en/download_s.html) and synthetic data.
'''
examples = [
['00.jpg'],
['01.jpg'],
['02.jpg'],
['03.jpg'],
['04.jpg'],
['05.jpg'],
['06.jpg'],
['07.jpg'],
]
gr.Interface(
inference,
inputs=[
gr.Image(label="Upload Japanese Manga Image", type="filepath")
],
outputs="text",
title=title,
description=description,
examples=examples,
).launch()
|