Multicentury-HTR-Demo

Running

App Files Files Community

MikkoLipsanen commited on 30 days ago

Commit

f00417c

verified ·

1 Parent(s): eee8423

Update app.py code to support new onnx text revognition model

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from optimum.onnxruntime import ORTModelForVision2Seq
 from transformers import TrOCRProcessor
-from huggingface_hub import login
 import gradio as gr
 import numpy as np
 import onnxruntime
@@ -16,8 +16,11 @@ from onnx_text_recognition import TextRecognition
 LINE_MODEL_PATH = "Kansallisarkisto/multicentury-textline-detection"
 REGION_MODEL_PATH = "Kansallisarkisto/court-records-region-detection"
-TROCR_PROCESSOR_PATH = "Kansallisarkisto/multicentury-htr-model-onnx"
-TROCR_MODEL_PATH = "Kansallisarkisto/multicentury-htr-model-onnx"
 # Allowed source paths for input images
 ALLOWED_SOURCES = ('https://astia.narc.fi', '/tmp/gradio')
@@ -51,7 +54,6 @@ def get_recognizer():
     """Initialize text recognition class."""
     try:
         recognizer = TextRecognition(
-                        processor_path = TROCR_PROCESSOR_PATH,
                         model_path = TROCR_MODEL_PATH,
                         device = 'cuda:0',
                         half_precision = True,
@@ -79,9 +81,8 @@ def merge_lines(segment_predictions):
 def get_text_predictions(image, segment_predictions, recognizer):
     """Collects text prediction data into dicts based on detected text regions."""
     img_lines = merge_lines(segment_predictions)
-    height, width = segment_predictions[0]['img_shape']
     # Process all lines of an image
-    texts = recognizer.process_lines(img_lines, image, height, width)
     return texts
 def is_allowed_source(file_path):
@@ -123,7 +124,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), title="Multicentury HTR Demo") as d
     gr.Markdown("# Multicentury HTR Demo")
     gr.Markdown("""The HTR pipeline contains three components: text region detection, textline detection and handwritten text recognition.
     The components run machine learning models that have been trained at the National Archives of Finland using mostly handwritten documents
-    from 17th, 18th, 19th and 20th centuries.
     Input image can be uploaded using the *Input image* window in the *Text content* tab, and the predicted text content will appear to the window
     on the right side of the image. Results of text region and text line detection can be viewed in the *Text regions* and *Text lines* tabs.
@@ -131,17 +132,17 @@ with gr.Blocks(theme=gr.themes.Monochrome(), title="Multicentury HTR Demo") as d
     Please note that this is a demo. 24/7 functionality is not quaranteed.
-    # Monen vuosisadan käsialantunnistus malli
-    Käsialantunnistus putkessa on kolme mallia: Tekstialueen tunnistus, tekstirivien tunnistus ja tekstintunnistus. Mallit on koulutettu pääosin
-    käsinkirjoitetulla Kansallisarkiston aineistolla, joka ajoittuu 1600-luvulta 1900-luvulle.
     Tunnistettavan kuvan voi ladata *Input image* nimiseen laatikkoon *Text content* välilehdellä. Prosessointi käynnistetään *Process image*
-    painikkeesta ja kuva on prosessoitu tunnistettu teksti ilmaantuu oikeaan laatikkoon nimeltä *Predicted text content*. Tekstialueen ja
     tekstirivien tunnistuksia voi tarkastella *Text regions* ja *Text lines* välilehdiltä. Parhaimman lopputuloksen saa hyvälaatuisilla kuvilla,
     joissa on normaalin kirjan mukainen taitto.
-    Huom! Tämä on demo sovellus. Ympärivuorokautista toimivuutta ei luvata.
     """)
     with gr.Tab("Text content"):

 from optimum.onnxruntime import ORTModelForVision2Seq
+from huggingface_hub import login, snapshot_download
 from transformers import TrOCRProcessor
 import gradio as gr
 import numpy as np
 import onnxruntime
 LINE_MODEL_PATH = "Kansallisarkisto/multicentury-textline-detection"
 REGION_MODEL_PATH = "Kansallisarkisto/court-records-region-detection"
+# Download repository to cache
+TROCR_MODEL_PATH = snapshot_download(
+    repo_id="Kansallisarkisto/multicentury-htr-model-small-onnx"
+)
 # Allowed source paths for input images
 ALLOWED_SOURCES = ('https://astia.narc.fi', '/tmp/gradio')
     """Initialize text recognition class."""
     try:
         recognizer = TextRecognition(
                         model_path = TROCR_MODEL_PATH,
                         device = 'cuda:0',
                         half_precision = True,
 def get_text_predictions(image, segment_predictions, recognizer):
     """Collects text prediction data into dicts based on detected text regions."""
     img_lines = merge_lines(segment_predictions)
     # Process all lines of an image
+    texts = recognizer.process_lines(img_lines, image)
     return texts
 def is_allowed_source(file_path):
     gr.Markdown("# Multicentury HTR Demo")
     gr.Markdown("""The HTR pipeline contains three components: text region detection, textline detection and handwritten text recognition.
     The components run machine learning models that have been trained at the National Archives of Finland using mostly handwritten documents
+    from 16th, 17th, 18th, 19th and 20th centuries.
     Input image can be uploaded using the *Input image* window in the *Text content* tab, and the predicted text content will appear to the window
     on the right side of the image. Results of text region and text line detection can be viewed in the *Text regions* and *Text lines* tabs.
     Please note that this is a demo. 24/7 functionality is not quaranteed.
+    # Monen vuosisadan käsialantunnistusmalli
+    Käsialantunnistusputkessa on kolme mallia: Tekstialueen tunnistus, tekstirivien tunnistus ja tekstintunnistus. Mallit on koulutettu pääosin
+    käsinkirjoitetulla Kansallisarkiston aineistolla, joka ajoittuu 1500-luvulta 1900-luvulle.
     Tunnistettavan kuvan voi ladata *Input image* nimiseen laatikkoon *Text content* välilehdellä. Prosessointi käynnistetään *Process image*
+    painikkeesta, ja kun kuva on prosessoitu, tunnistettu teksti ilmaantuu oikeaan laatikkoon nimeltä *Predicted text content*. Tekstialueen ja
     tekstirivien tunnistuksia voi tarkastella *Text regions* ja *Text lines* välilehdiltä. Parhaimman lopputuloksen saa hyvälaatuisilla kuvilla,
     joissa on normaalin kirjan mukainen taitto.
+    Huom! Tämä on demosovellus. Ympärivuorokautista toimivuutta ei luvata.
     """)
     with gr.Tab("Text content"):