Spaces:

shreyasvaidya
/

Image_to_text_translation

Runtime error

App Files Files Community

shreyasvaidya commited on Dec 25, 2024

Commit

9eb10ea

verified ·

1 Parent(s): f52e5d3

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +2 -2
app.py +16 -15

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: "IndicPhotoOCR"
 colorFrom: "purple"
 colorTo: "pink"
 sdk: "gradio"
@@ -15,7 +15,7 @@ app_port: 7865
 <p align="center">
   <img src="./static/pics/bharatOCR.png" alt="BharatOCR Logo" width="25%">
   <h3 align="center">
-IndicPhotoOCR - Comprehensive Scene Text Recognition Toolkit </br> across 13 Indian Languages
   </h3>
 </p>
 <div align="center">

 ---
+title: "Image_to_text_translation"
 colorFrom: "purple"
 colorTo: "pink"
 sdk: "gradio"
 <p align="center">
   <img src="./static/pics/bharatOCR.png" alt="BharatOCR Logo" width="25%">
   <h3 align="center">
+Scene Text to Text Translation
   </h3>
 </p>
 <div align="center">

app.py CHANGED Viewed

@@ -20,8 +20,8 @@ DEVICE = "cpu"
 # Initialize the OCR object for text detection and recognition
 ocr = OCR(device="cpu", verbose=False)
-def translate_en_hin(given_str):
-    model_name = "ai4bharat/indictrans2-en-indic-1B"
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
@@ -30,7 +30,7 @@ def translate_en_hin(given_str):
     model = model.to(DEVICE)
     model.eval()
-    src_lang, tgt_lang = "eng_Latn", "hin_Deva"
     batch = ip.preprocess_batch(
         [given_str],
@@ -276,20 +276,21 @@ def process_image(image):
     for id,bbox in enumerate(detections):
         # Identify the script and crop the image to this region
         script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
-        script_lang = "english"
-        if script_lang:  # Only proceed if a script language is identified
-            # Recognize text in the cropped area
-            recognized_text = ocr.recognise(cropped_path, "english")
-            x1 = min([bbox[i][0] for i in range(len(bbox))])
-            y1 = min([bbox[i][1] for i in range(len(bbox))])
-            x2 = max([bbox[i][0] for i in range(len(bbox))])
-            y2 = max([bbox[i][1] for i in range(len(bbox))])
             recognized_texts[f"img_{id}"] = {"txt":recognized_text,"bbox":[x1,y1,x2,y2]}
     # Combine recognized texts into a single string for display
-    return output_image, translate_en_hin(detect_para(recognized_texts))
 # Custom HTML for interface header with logos and alignment
 interface_html = """

 # Initialize the OCR object for text detection and recognition
 ocr = OCR(device="cpu", verbose=False)
+def translate(given_str,lang):
+    model_name = "ai4bharat/indictrans2-en-indic-1B" if lang=="english" else "ai4bharat/indictrans2-indic-en-1B"
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
     model = model.to(DEVICE)
     model.eval()
+    src_lang, tgt_lang = "eng_Latn", "hin_Deva" if lang=="english" else "hin_Deva", "eng_Latn"
     batch = ip.preprocess_batch(
         [given_str],
     for id,bbox in enumerate(detections):
         # Identify the script and crop the image to this region
         script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
+        x1 = min([bbox[i][0] for i in range(len(bbox))])
+        y1 = min([bbox[i][1] for i in range(len(bbox))])
+        x2 = max([bbox[i][0] for i in range(len(bbox))])
+        y2 = max([bbox[i][1] for i in range(len(bbox))])
+        if script_lang:
+            recognized_text = ocr.recognise(cropped_path,script_lang)
             recognized_texts[f"img_{id}"] = {"txt":recognized_text,"bbox":[x1,y1,x2,y2]}
+            translated = translate(recognized_texts,script_lang)
     # Combine recognized texts into a single string for display
+    return output_image, translated
 # Custom HTML for interface header with logos and alignment
 interface_html = """