Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| import gradio as gr | |
| import re | |
| from pathlib import Path | |
| import jaconv | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoTokenizer, AutoImageProcessor, AutoModelForVision2Seq | |
| pretrained_model_name_or_path="jzhang533/manga-ocr-base-2025" | |
| feature_extractor = AutoImageProcessor.from_pretrained(pretrained_model_name_or_path, use_fast=True) | |
| tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path) | |
| model = AutoModelForVision2Seq.from_pretrained(pretrained_model_name_or_path) | |
| def post_process(text): | |
| text = "".join(text.split()) | |
| text = text.replace("…", "...") | |
| text = re.sub("[・.]{2,}", lambda x: (x.end() - x.start()) * ".", text) | |
| text = jaconv.h2z(text, ascii=True, digit=True) | |
| return text | |
| def inference(img_or_path): | |
| if isinstance(img_or_path, str) or isinstance(img_or_path, Path): | |
| img = Image.open(img_or_path) | |
| elif isinstance(img_or_path, Image.Image): | |
| img = img_or_path | |
| else: | |
| raise ValueError(f"img_or_path must be a path or PIL.Image, instead got: {img_or_path}") | |
| pixel_values = feature_extractor(img, return_tensors="pt").pixel_values | |
| x = pixel_values.squeeze() | |
| x = model.generate(x[None], max_length=300)[0].cpu() | |
| x = tokenizer.decode(x, skip_special_tokens=True) | |
| x = post_process(x) | |
| return x | |
| title = 'MangaOCR demo' | |
| description = ''' | |
| - This is derived from : <https://github.com/kha-white/manga-ocr> | |
| - The model being used : <https://huggingface.co/jzhang533/manga-ocr-base-2025> (trained using scripts in [kha-white/manga-ocr](https://github.com/kha-white/manga-ocr) with several tweaks) | |
| - Dataset being used to train the model: [manga109-s](http://www.manga109.org/en/download_s.html) and synthetic data. | |
| ''' | |
| examples = [ | |
| ['00.jpg'], | |
| ['01.jpg'], | |
| ['02.jpg'], | |
| ['03.jpg'], | |
| ['04.jpg'], | |
| ['05.jpg'], | |
| ['06.jpg'], | |
| ['07.jpg'], | |
| ] | |
| gr.Interface( | |
| inference, | |
| inputs=[ | |
| gr.Image(label="Upload Japanese Manga Image", type="filepath") | |
| ], | |
| outputs="text", | |
| title=title, | |
| description=description, | |
| examples=examples, | |
| ).launch() | |
