fixed exe error: File "Lib\site-packages\PyInstaller\hooks\rthooks\pyi_rth_multiprocessing.py", line 43, in _freeze_support ValueError: not enough values to unpack (expected 2, got 1)
Browse files- .gitattributes +1 -0
- parser_sevenof9_v1_1_en.exe +3 -0
- parser_sevenof9_v1_1_en.py +430 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
             
            parser_sevenof9_v1_de.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 37 | 
             
            parser_sevenof9_v1_en.exe filter=lfs diff=lfs merge=lfs -text
         | 
|  | 
|  | |
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
             
            parser_sevenof9_v1_de.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 37 | 
             
            parser_sevenof9_v1_en.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 38 | 
            +
            parser_sevenof9_v1_1_en.exe filter=lfs diff=lfs merge=lfs -text
         | 
    	
        parser_sevenof9_v1_1_en.exe
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:aabef5ef27754c71b86dee6e8cfd83b1d688278cceb383243236548d045e03d1
         | 
| 3 | 
            +
            size 25576083
         | 
    	
        parser_sevenof9_v1_1_en.py
    ADDED
    
    | @@ -0,0 +1,430 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import sys
         | 
| 3 | 
            +
            import tkinter as tk # internal
         | 
| 4 | 
            +
            from tkinter import filedialog, messagebox # internal
         | 
| 5 | 
            +
            import subprocess
         | 
| 6 | 
            +
            import threading
         | 
| 7 | 
            +
            import tempfile
         | 
| 8 | 
            +
            import shutil
         | 
| 9 | 
            +
            import json
         | 
| 10 | 
            +
            import logging
         | 
| 11 | 
            +
            import pdfplumber
         | 
| 12 | 
            +
            from pdfplumber.utils import get_bbox_overlap, obj_to_bbox
         | 
| 13 | 
            +
            from pdfplumber.utils.exceptions import PdfminerException
         | 
| 14 | 
            +
            from joblib import delayed, cpu_count, parallel_backend, Parallel
         | 
| 15 | 
            +
            import multiprocessing # intternal
         | 
| 16 | 
            +
            from multiprocessing import Pool # internal
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
            # ========================
         | 
| 20 | 
            +
            # Parser Configuration
         | 
| 21 | 
            +
            # ========================
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            TEXT_EXTRACTION_SETTINGS = {
         | 
| 24 | 
            +
                "x_tolerance": 1,
         | 
| 25 | 
            +
                "y_tolerance": 3,
         | 
| 26 | 
            +
                "keep_blank_chars": False,
         | 
| 27 | 
            +
                "use_text_flow": True
         | 
| 28 | 
            +
            }
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            if sys.platform == "win32":
         | 
| 31 | 
            +
                sys.stderr = open(os.devnull, 'w')
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            PARALLEL_THRESHOLD = 16
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            def suppress_pdfminer_logging():
         | 
| 36 | 
            +
                for logger_name in [
         | 
| 37 | 
            +
                    "pdfminer",
         | 
| 38 | 
            +
                    "pdfminer.pdfparser",
         | 
| 39 | 
            +
                    "pdfminer.pdfdocument",
         | 
| 40 | 
            +
                    "pdfminer.pdfpage",
         | 
| 41 | 
            +
                    "pdfminer.converter",
         | 
| 42 | 
            +
                    "pdfminer.layout",
         | 
| 43 | 
            +
                    "pdfminer.cmapdb",
         | 
| 44 | 
            +
                    "pdfminer.utils"
         | 
| 45 | 
            +
                ]:
         | 
| 46 | 
            +
                    logging.getLogger(logger_name).setLevel(logging.ERROR)
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            def clean_cell_text(text):
         | 
| 49 | 
            +
                if not isinstance(text, str):
         | 
| 50 | 
            +
                    return ""
         | 
| 51 | 
            +
                text = text.replace("-\n", "").replace("\n", " ")
         | 
| 52 | 
            +
                return " ".join(text.split())
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            def safe_join(row):
         | 
| 55 | 
            +
                return [clean_cell_text(str(cell)) if cell is not None else "" for cell in row]
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            def clamp_bbox(bbox, page_width, page_height):
         | 
| 58 | 
            +
                x0, top, x1, bottom = bbox
         | 
| 59 | 
            +
                x0 = max(0, min(x0, page_width))
         | 
| 60 | 
            +
                x1 = max(0, min(x1, page_width))
         | 
| 61 | 
            +
                top = max(0, min(top, page_height))
         | 
| 62 | 
            +
                bottom = max(0, min(bottom, page_height))
         | 
| 63 | 
            +
                return (x0, top, x1, bottom)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            def process_page(args):
         | 
| 66 | 
            +
                suppress_pdfminer_logging()
         | 
| 67 | 
            +
                try:
         | 
| 68 | 
            +
                    page_number, pdf_path, text_settings = args
         | 
| 69 | 
            +
                    with pdfplumber.open(pdf_path) as pdf:
         | 
| 70 | 
            +
                        page = pdf.pages[page_number]
         | 
| 71 | 
            +
                        output = f"Page {page_number + 1}\n"
         | 
| 72 | 
            +
                        width, height = page.width, page.height
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                        filtered_page = page
         | 
| 75 | 
            +
                        table_bboxes = []
         | 
| 76 | 
            +
                        table_json_outputs = []
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                        for table in page.find_tables():
         | 
| 79 | 
            +
                            bbox = clamp_bbox(table.bbox, width, height)
         | 
| 80 | 
            +
                            table_bboxes.append(bbox)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                            if not page.crop(bbox).chars:
         | 
| 83 | 
            +
                                continue
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                            filtered_page = filtered_page.filter(
         | 
| 86 | 
            +
                                lambda obj: get_bbox_overlap(obj_to_bbox(obj), bbox) is None
         | 
| 87 | 
            +
                            )
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                            table_data = table.extract()
         | 
| 90 | 
            +
                            if table_data and len(table_data) >= 1:
         | 
| 91 | 
            +
                                headers = safe_join(table_data[0])
         | 
| 92 | 
            +
                                rows = [safe_join(row) for row in table_data[1:]]
         | 
| 93 | 
            +
                                json_table = [dict(zip(headers, row)) for row in rows]
         | 
| 94 | 
            +
                                table_json_outputs.append(json.dumps(json_table, indent=1, ensure_ascii=False))
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                        words_outside_tables = [
         | 
| 97 | 
            +
                            word for word in page.extract_words(**text_settings)
         | 
| 98 | 
            +
                            if not any(
         | 
| 99 | 
            +
                                bbox[0] <= float(word['x0']) <= bbox[2] and
         | 
| 100 | 
            +
                                bbox[1] <= float(word['top']) <= bbox[3]
         | 
| 101 | 
            +
                                for bbox in table_bboxes
         | 
| 102 | 
            +
                            )
         | 
| 103 | 
            +
                        ]
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                        current_y = None
         | 
| 106 | 
            +
                        line = []
         | 
| 107 | 
            +
                        text_content = ""
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                        for word in words_outside_tables:
         | 
| 110 | 
            +
                            if current_y is None or abs(word['top'] - current_y) > 10:
         | 
| 111 | 
            +
                                if line:
         | 
| 112 | 
            +
                                    text_content += " ".join(line) + "\n"
         | 
| 113 | 
            +
                                line = [word['text']]
         | 
| 114 | 
            +
                                current_y = word['top']
         | 
| 115 | 
            +
                            else:
         | 
| 116 | 
            +
                                line.append(word['text'])
         | 
| 117 | 
            +
                        if line:
         | 
| 118 | 
            +
                            text_content += " ".join(line) + "\n"
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                        output += text_content.strip() + "\n"
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                        for idx, table in enumerate(table_json_outputs, start=1):
         | 
| 123 | 
            +
                            output += f'"table {idx}":\n{table}\n'
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                        return page_number, output
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                except Exception as e:
         | 
| 128 | 
            +
                    return args[0], f"[ERROR] Page {args[0]+1} ({args[1]}): {str(e)}"
         | 
| 129 | 
            +
             | 
| 130 | 
            +
            def process_pdf(pdf_path):
         | 
| 131 | 
            +
                suppress_pdfminer_logging()
         | 
| 132 | 
            +
                try:
         | 
| 133 | 
            +
                    if not os.path.exists(pdf_path):
         | 
| 134 | 
            +
                        return f"[ERROR] File not found: {pdf_path}"
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                    print(f"[INFO] Starting processing: {pdf_path}")
         | 
| 137 | 
            +
                    try:
         | 
| 138 | 
            +
                        with pdfplumber.open(pdf_path) as pdf:
         | 
| 139 | 
            +
                            num_pages = len(pdf.pages)
         | 
| 140 | 
            +
                    except PdfminerException as e:
         | 
| 141 | 
            +
                        return f"[ERROR] Cannot open PDF: {pdf_path} – {str(e)}"
         | 
| 142 | 
            +
                    except Exception as e:
         | 
| 143 | 
            +
                        return f"[ERROR] General error opening PDF: {pdf_path} – {str(e)}"
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    pages = [(i, pdf_path, TEXT_EXTRACTION_SETTINGS) for i in range(num_pages)]
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                    try:
         | 
| 148 | 
            +
                        results = run_serial(pages) if num_pages <= PARALLEL_THRESHOLD else run_parallel(pages)
         | 
| 149 | 
            +
                    except (EOFError, BrokenPipeError, KeyboardInterrupt):
         | 
| 150 | 
            +
                        return "[INFO] Processing was interrupted."
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                    sorted_results = sorted(results, key=lambda x: x[0])
         | 
| 153 | 
            +
                    final_output = "\n".join(text for _, text in sorted_results)
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                    base_name = os.path.splitext(os.path.basename(pdf_path))[0]
         | 
| 156 | 
            +
                    output_dir = os.path.dirname(pdf_path)
         | 
| 157 | 
            +
                    output_path = os.path.join(output_dir, f"{base_name}.txt")
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                    with open(output_path, "w", encoding="utf-8", errors="ignore") as f:
         | 
| 160 | 
            +
                        f.write(final_output)
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                    print(f"[INFO] Processing complete: {output_path}")
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                except (EOFError, BrokenPipeError, KeyboardInterrupt):
         | 
| 165 | 
            +
                    return "[INFO] Processing interrupted by user."
         | 
| 166 | 
            +
                except Exception as e:
         | 
| 167 | 
            +
                    return f"[ERROR] Unexpected error with '{pdf_path}': {str(e)}"
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            def run_serial(pages):
         | 
| 170 | 
            +
                return [process_page(args) for args in pages]
         | 
| 171 | 
            +
             | 
| 172 | 
            +
            def run_parallel(pages):
         | 
| 173 | 
            +
                available_cores = max(1, cpu_count() - 2)
         | 
| 174 | 
            +
                num_cores = min(available_cores, len(pages))
         | 
| 175 | 
            +
                print(f"Starting parallel processing with {num_cores} cores...")
         | 
| 176 | 
            +
                with Pool(processes=num_cores) as pool:
         | 
| 177 | 
            +
                    return pool.map(process_page, pages)
         | 
| 178 | 
            +
             | 
| 179 | 
            +
            def process_pdfs_main():
         | 
| 180 | 
            +
                suppress_pdfminer_logging()
         | 
| 181 | 
            +
                pdf_files = sys.argv[1:]
         | 
| 182 | 
            +
                if not pdf_files:
         | 
| 183 | 
            +
                    print("No PDF files provided.")
         | 
| 184 | 
            +
                    return
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                small_pdfs = []
         | 
| 187 | 
            +
                large_pdfs = []
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                for path in pdf_files:
         | 
| 190 | 
            +
                    if not os.path.exists(path):
         | 
| 191 | 
            +
                        print(f"File not found: {path}")
         | 
| 192 | 
            +
                        continue
         | 
| 193 | 
            +
                    try:
         | 
| 194 | 
            +
                        with pdfplumber.open(path) as pdf:
         | 
| 195 | 
            +
                            if len(pdf.pages) <= PARALLEL_THRESHOLD:
         | 
| 196 | 
            +
                                small_pdfs.append(path)
         | 
| 197 | 
            +
                            else:
         | 
| 198 | 
            +
                                large_pdfs.append(path)
         | 
| 199 | 
            +
                    except PdfminerException:
         | 
| 200 | 
            +
                        print(f"[ERROR] Password-protected PDF skipped: {path}")
         | 
| 201 | 
            +
                    except Exception as e:
         | 
| 202 | 
            +
                        print(f"[ERROR] Error opening {path}: {str(e)}")
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                if small_pdfs:
         | 
| 205 | 
            +
                    available_cores = max(1, cpu_count() - 2)
         | 
| 206 | 
            +
                    num_cores = min(available_cores, len(small_pdfs))
         | 
| 207 | 
            +
                    print(f"\n[Phase 1] Starting parallel processing of small PDFs with {num_cores} cores...")
         | 
| 208 | 
            +
                    results = Parallel(n_jobs=num_cores)(
         | 
| 209 | 
            +
                        delayed(process_pdf)(path) for path in small_pdfs
         | 
| 210 | 
            +
                    )
         | 
| 211 | 
            +
                    for r in results:
         | 
| 212 | 
            +
                        print(r)
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                for path in large_pdfs:
         | 
| 215 | 
            +
                    print(f"\n[Phase 2] Processing large PDF: {os.path.basename(path)}")
         | 
| 216 | 
            +
                    print(process_pdf(path))
         | 
| 217 | 
            +
             | 
| 218 | 
            +
             | 
| 219 | 
            +
            # ========================
         | 
| 220 | 
            +
            # GUI Class
         | 
| 221 | 
            +
            # ========================
         | 
| 222 | 
            +
             | 
| 223 | 
            +
            class FileManager:
         | 
| 224 | 
            +
                def __init__(self, master):
         | 
| 225 | 
            +
                    self.master = master
         | 
| 226 | 
            +
                    self.master.title("Parser-Sevenof9")
         | 
| 227 | 
            +
                    self.files = []
         | 
| 228 | 
            +
                    self.last_selected_index = None
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    self.label = tk.Label(master, text="Selected PDF files:")
         | 
| 231 | 
            +
                    self.label.pack(pady=5)
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                    listbox_frame = tk.Frame(master)
         | 
| 234 | 
            +
                    listbox_frame.pack(pady=5)
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                    scrollbar_listbox = tk.Scrollbar(listbox_frame)
         | 
| 237 | 
            +
                    self.listbox = tk.Listbox(listbox_frame, selectmode=tk.MULTIPLE, width=80, height=6, yscrollcommand=scrollbar_listbox.set)
         | 
| 238 | 
            +
                    scrollbar_listbox.config(command=self.listbox.yview)
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                    self.listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
         | 
| 241 | 
            +
                    scrollbar_listbox.pack(side=tk.RIGHT, fill=tk.Y)
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    self.listbox.bind("<<ListboxSelect>>", self.show_text_file)
         | 
| 244 | 
            +
                    self.listbox.bind("<Button-1>", self.on_listbox_click)
         | 
| 245 | 
            +
                    self.listbox.bind("<Shift-Button-1>", self.on_listbox_shift_click)
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                    self.context_menu = tk.Menu(master, tearoff=0)
         | 
| 248 | 
            +
                    self.context_menu.add_command(label="Remove selected", command=self.remove_file)
         | 
| 249 | 
            +
                    self.listbox.bind("<Button-3>", self.show_context_menu)
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                    self.frame = tk.Frame(master)
         | 
| 252 | 
            +
                    self.frame.pack(pady=10)
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                    tk.Button(self.frame, text="Add Folder", command=self.add_folder).pack(side=tk.LEFT, padx=5)
         | 
| 255 | 
            +
                    tk.Button(self.frame, text="Select Files", command=self.add_file).pack(side=tk.LEFT, padx=5)
         | 
| 256 | 
            +
                    tk.Button(self.frame, text="Remove Selected", command=self.remove_file).pack(side=tk.LEFT, padx=5)
         | 
| 257 | 
            +
                    tk.Button(self.frame, text="Remove All", command=self.remove_all).pack(side=tk.LEFT, padx=5)
         | 
| 258 | 
            +
                    tk.Button(master, text="Stop", command=self.stop_parser).pack(pady=5)
         | 
| 259 | 
            +
                    self.parser_process = None  # Will be stored in thread
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                    tk.Button(master, text="Start Parser", command=self.start_parser).pack(pady=10)
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                    text_frame = tk.Frame(master)
         | 
| 264 | 
            +
                    text_frame.pack(padx=10, pady=5)
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                    scrollbar_text = tk.Scrollbar(text_frame)
         | 
| 267 | 
            +
                    self.text_widget = tk.Text(text_frame, height=15, width=100, wrap=tk.WORD, yscrollcommand=scrollbar_text.set)
         | 
| 268 | 
            +
                    scrollbar_text.config(command=self.text_widget.yview)
         | 
| 269 | 
            +
             | 
| 270 | 
            +
                    self.text_widget.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
         | 
| 271 | 
            +
                    scrollbar_text.pack(side=tk.RIGHT, fill=tk.Y)
         | 
| 272 | 
            +
             | 
| 273 | 
            +
                    tk.Label(master, text="Progress:").pack()
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                    progress_frame = tk.Frame(master)
         | 
| 276 | 
            +
                    progress_frame.pack(padx=10, pady=5)
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                    scrollbar_progress = tk.Scrollbar(progress_frame)
         | 
| 279 | 
            +
                    self.progress_text = tk.Text(progress_frame, height=8, width=100, state=tk.DISABLED, yscrollcommand=scrollbar_progress.set)
         | 
| 280 | 
            +
                    scrollbar_progress.config(command=self.progress_text.yview)
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                    self.progress_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
         | 
| 283 | 
            +
                    scrollbar_progress.pack(side=tk.RIGHT, fill=tk.Y)
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                def on_listbox_click(self, event):
         | 
| 286 | 
            +
                    index = self.listbox.nearest(event.y)
         | 
| 287 | 
            +
                    self.listbox.selection_clear(0, tk.END)
         | 
| 288 | 
            +
                    self.listbox.selection_set(index)
         | 
| 289 | 
            +
                    self.last_selected_index = index
         | 
| 290 | 
            +
                    self.show_text_file(None)
         | 
| 291 | 
            +
                    return "break"
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                def on_listbox_shift_click(self, event):
         | 
| 294 | 
            +
                    index = self.listbox.nearest(event.y)
         | 
| 295 | 
            +
                    if self.last_selected_index is None:
         | 
| 296 | 
            +
                        self.last_selected_index = index
         | 
| 297 | 
            +
                    start, end = sorted((self.last_selected_index, index))
         | 
| 298 | 
            +
                    self.listbox.selection_clear(0, tk.END)
         | 
| 299 | 
            +
                    for i in range(start, end + 1):
         | 
| 300 | 
            +
                        self.listbox.selection_set(i)
         | 
| 301 | 
            +
                    return "break"
         | 
| 302 | 
            +
             | 
| 303 | 
            +
                def show_context_menu(self, event):
         | 
| 304 | 
            +
                    if self.listbox.curselection():
         | 
| 305 | 
            +
                        self.context_menu.tk_popup(event.x_root, event.y_root)
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                def add_folder(self):
         | 
| 308 | 
            +
                    folder = filedialog.askdirectory(title="Select Folder")
         | 
| 309 | 
            +
                    if not folder:
         | 
| 310 | 
            +
                        return
         | 
| 311 | 
            +
                    for root, _, files in os.walk(folder):
         | 
| 312 | 
            +
                        for file in files:
         | 
| 313 | 
            +
                            if file.lower().endswith(".pdf"):
         | 
| 314 | 
            +
                                path = os.path.join(root, file)
         | 
| 315 | 
            +
                                if path not in self.files:
         | 
| 316 | 
            +
                                    self.files.append(path)
         | 
| 317 | 
            +
                                    self.listbox.insert(tk.END, path)
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                def add_file(self):
         | 
| 320 | 
            +
                    paths = filedialog.askopenfilenames(title="Select PDF Files", filetypes=[("PDF Files", "*.pdf")])
         | 
| 321 | 
            +
                    for path in paths:
         | 
| 322 | 
            +
                        if path not in self.files:
         | 
| 323 | 
            +
                            self.files.append(path)
         | 
| 324 | 
            +
                            self.listbox.insert(tk.END, path)
         | 
| 325 | 
            +
             | 
| 326 | 
            +
                def remove_file(self):
         | 
| 327 | 
            +
                    selection = self.listbox.curselection()
         | 
| 328 | 
            +
                    if not selection:
         | 
| 329 | 
            +
                        messagebox.showwarning("Notice", "Please select an entry to remove.")
         | 
| 330 | 
            +
                        return
         | 
| 331 | 
            +
                    for index in reversed(selection):
         | 
| 332 | 
            +
                        self.listbox.delete(index)
         | 
| 333 | 
            +
                        del self.files[index]
         | 
| 334 | 
            +
                    self.text_widget.delete(1.0, tk.END)
         | 
| 335 | 
            +
             | 
| 336 | 
            +
                def remove_all(self):
         | 
| 337 | 
            +
                    self.listbox.delete(0, tk.END)
         | 
| 338 | 
            +
                    self.files.clear()
         | 
| 339 | 
            +
                    self.text_widget.delete(1.0, tk.END)
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                def start_parser(self):
         | 
| 342 | 
            +
                    if not self.files:
         | 
| 343 | 
            +
                        messagebox.showinfo("No Files", "Please select at least one file.")
         | 
| 344 | 
            +
                        return
         | 
| 345 | 
            +
                    self.progress_text.config(state=tk.NORMAL)
         | 
| 346 | 
            +
                    self.progress_text.delete(1.0, tk.END)
         | 
| 347 | 
            +
                    self.progress_text.insert(tk.END, "Starting parser...\n")
         | 
| 348 | 
            +
                    self.progress_text.config(state=tk.DISABLED)
         | 
| 349 | 
            +
                    thread = threading.Thread(target=self.run_parser)
         | 
| 350 | 
            +
                    thread.start()
         | 
| 351 | 
            +
             | 
| 352 | 
            +
                def stop_parser(self):
         | 
| 353 | 
            +
                    if self.parser_process and self.parser_process.poll() is None:
         | 
| 354 | 
            +
                        self.parser_process.terminate()
         | 
| 355 | 
            +
                        self.append_progress_text("Parser process was stopped.\n")
         | 
| 356 | 
            +
                    else:
         | 
| 357 | 
            +
                        self.append_progress_text("No active parser process to stop.\n")
         | 
| 358 | 
            +
             | 
| 359 | 
            +
                def run_parser(self):
         | 
| 360 | 
            +
                    try:
         | 
| 361 | 
            +
                        script_path = os.path.abspath(sys.argv[0])
         | 
| 362 | 
            +
                        self.parser_process = subprocess.Popen(
         | 
| 363 | 
            +
                            [sys.executable, script_path] + self.files,
         | 
| 364 | 
            +
                            stdout=subprocess.PIPE,
         | 
| 365 | 
            +
                            stderr=subprocess.STDOUT,
         | 
| 366 | 
            +
                            text=True,
         | 
| 367 | 
            +
                            encoding='utf-8',
         | 
| 368 | 
            +
                            errors='ignore',
         | 
| 369 | 
            +
                            bufsize=4096
         | 
| 370 | 
            +
                        )
         | 
| 371 | 
            +
                        for line in self.parser_process.stdout:
         | 
| 372 | 
            +
                            self.append_progress_text(line)
         | 
| 373 | 
            +
                        self.parser_process.stdout.close()
         | 
| 374 | 
            +
                        self.parser_process.wait()
         | 
| 375 | 
            +
             | 
| 376 | 
            +
                        if self.parser_process.returncode == 0:
         | 
| 377 | 
            +
                            self.append_progress_text("\nParser finished successfully.\n")
         | 
| 378 | 
            +
                            self.show_messagebox_threadsafe("Parser Done", "The parser was executed successfully.")
         | 
| 379 | 
            +
                        else:
         | 
| 380 | 
            +
                            self.append_progress_text("\nError while running the parser.\n")
         | 
| 381 | 
            +
                            self.show_messagebox_threadsafe("Error", "Error while running the parser.")
         | 
| 382 | 
            +
                    except Exception as e:
         | 
| 383 | 
            +
                        self.append_progress_text(f"Error: {e}\n")
         | 
| 384 | 
            +
                        self.show_messagebox_threadsafe("Error", f"Error during execution:\n{e}")
         | 
| 385 | 
            +
                    finally:
         | 
| 386 | 
            +
                        self.parser_process = None
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                def append_progress_text(self, text):
         | 
| 389 | 
            +
                    self.progress_text.after(0, lambda: self._insert_text(text))
         | 
| 390 | 
            +
             | 
| 391 | 
            +
                def _insert_text(self, text):
         | 
| 392 | 
            +
                    self.progress_text.config(state=tk.NORMAL)
         | 
| 393 | 
            +
                    self.progress_text.insert(tk.END, text)
         | 
| 394 | 
            +
                    self.progress_text.see(tk.END)
         | 
| 395 | 
            +
                    self.progress_text.config(state=tk.DISABLED)
         | 
| 396 | 
            +
             | 
| 397 | 
            +
                def show_messagebox_threadsafe(self, title, message):
         | 
| 398 | 
            +
                    self.master.after(0, lambda: messagebox.showinfo(title, message))
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                def show_text_file(self, event):
         | 
| 401 | 
            +
                    selection = self.listbox.curselection()
         | 
| 402 | 
            +
                    if not selection:
         | 
| 403 | 
            +
                        return
         | 
| 404 | 
            +
                    index = selection[0]
         | 
| 405 | 
            +
                    path = self.files[index]
         | 
| 406 | 
            +
                    txt_path = os.path.splitext(path)[0] + ".txt"
         | 
| 407 | 
            +
                    self.text_widget.delete(1.0, tk.END)
         | 
| 408 | 
            +
                    if os.path.exists(txt_path):
         | 
| 409 | 
            +
                        try:
         | 
| 410 | 
            +
                            with open(txt_path, "r", encoding="utf-8", errors="ignore") as f:
         | 
| 411 | 
            +
                                self.text_widget.insert(tk.END, f.read())
         | 
| 412 | 
            +
                        except Exception as e:
         | 
| 413 | 
            +
                            self.text_widget.insert(tk.END, f"Error loading text file:\n{e}")
         | 
| 414 | 
            +
                    else:
         | 
| 415 | 
            +
                        self.text_widget.insert(tk.END, "[No corresponding .txt file found]")
         | 
| 416 | 
            +
             | 
| 417 | 
            +
            # ========================
         | 
| 418 | 
            +
            # Entry Point
         | 
| 419 | 
            +
            # ========================
         | 
| 420 | 
            +
             | 
| 421 | 
            +
            if __name__ == "__main__":
         | 
| 422 | 
            +
                multiprocessing.freeze_support()  # Must be first in main for compatibility with multiprocessing on Windows
         | 
| 423 | 
            +
             | 
| 424 | 
            +
                if len(sys.argv) > 1:
         | 
| 425 | 
            +
                    process_pdfs_main()
         | 
| 426 | 
            +
                else:
         | 
| 427 | 
            +
                    root = tk.Tk()
         | 
| 428 | 
            +
                    app = FileManager(root)
         | 
| 429 | 
            +
                    root.mainloop()
         | 
| 430 | 
            +
             |