File size: 2,113 Bytes
8d056b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97b3d3f
8d056b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import re
from pathlib import Path

import jaconv
import torch
from PIL import Image
from transformers import AutoTokenizer, AutoImageProcessor, AutoModelForVision2Seq

pretrained_model_name_or_path="jzhang533/manga-ocr-base-2025"
feature_extractor = AutoImageProcessor.from_pretrained(pretrained_model_name_or_path, use_fast=True)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
model = AutoModelForVision2Seq.from_pretrained(pretrained_model_name_or_path)


def post_process(text):
    text = "".join(text.split())
    text = text.replace("…", "...")
    text = re.sub("[・.]{2,}", lambda x: (x.end() - x.start()) * ".", text)
    text = jaconv.h2z(text, ascii=True, digit=True)

    return text


def inference(img_or_path):
    if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
        img = Image.open(img_or_path)
    elif isinstance(img_or_path, Image.Image):
        img = img_or_path
    else:
        raise ValueError(f"img_or_path must be a path or PIL.Image, instead got: {img_or_path}")

    pixel_values = feature_extractor(img, return_tensors="pt").pixel_values
    x = pixel_values.squeeze()
    x = model.generate(x[None], max_length=300)[0].cpu()
    x = tokenizer.decode(x, skip_special_tokens=True)
    x = post_process(x)
    return x

title = 'MangaOCR demo'
description = '''
- This is derived from : <https://github.com/kha-white/manga-ocr>
- The model being used : <https://huggingface.co/jzhang533/manga-ocr-base-2025> (trained using scripts in [kha-white/manga-ocr](https://github.com/kha-white/manga-ocr) with several tweaks)
- Dataset being used to train the model: [manga109-s](http://www.manga109.org/en/download_s.html) and synthetic data.
'''

examples = [
    ['00.jpg'],
    ['01.jpg'],
    ['02.jpg'],
    ['03.jpg'],
    ['04.jpg'],
    ['05.jpg'],
    ['06.jpg'],
    ['07.jpg'],
]

gr.Interface(
    inference,
    inputs=[
        gr.Image(label="Upload Japanese Manga Image", type="filepath")
        ],
    outputs="text",
    title=title,
    description=description,
    examples=examples,
    ).launch()