Upload 14 files
Browse files- .gitattributes +1 -0
- LICENSE +21 -0
- README.md +226 -3
- app.py +93 -0
- assets/dia.jpg +3 -0
- configs/hyp_augment.yaml +23 -0
- configs/ornaments.yaml +11 -0
- requirements.txt +11 -0
- results/after.png +0 -0
- results/before.png +0 -0
- src/dataset_tools/convert_via_to_yolo.py +80 -0
- src/dataset_tools/split_dataset.py +126 -0
- src/eval.py +26 -0
- src/infer.py +43 -0
- src/train.py +53 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/dia.jpg filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Martin Badrous
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,3 +1,226 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
license: mit
|
| 4 |
+
tags:
|
| 5 |
+
- computer-vision
|
| 6 |
+
- object-detection
|
| 7 |
+
- yolov8
|
| 8 |
+
- document-analysis
|
| 9 |
+
- heritage-ai
|
| 10 |
+
- pytorch
|
| 11 |
+
pipeline_tag: object-detection
|
| 12 |
+
model-index:
|
| 13 |
+
- name: TypoRef YOLOv8 Historical Document Detection
|
| 14 |
+
results:
|
| 15 |
+
- task:
|
| 16 |
+
type: object-detection
|
| 17 |
+
name: Object Detection
|
| 18 |
+
dataset:
|
| 19 |
+
name: TypoRef Historical Prints
|
| 20 |
+
type: document-images
|
| 21 |
+
metrics:
|
| 22 |
+
- name: mAP
|
| 23 |
+
type: map
|
| 24 |
+
value: 0.95
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
# 📜 TypoRef YOLOv8 Historical Document Detection
|
| 28 |
+
|
| 29 |
+
**Author:** Martin Badrous
|
| 30 |
+
|
| 31 |
+
This repository packages an industrial research project on detecting
|
| 32 |
+
decorative elements in historical documents. It provides a clear,
|
| 33 |
+
reproducible pipeline built with YOLOv8 for local training and a
|
| 34 |
+
ready‑to‑deploy [Gradio](https://gradio.app) demo for inference. The
|
| 35 |
+
aim is to automatically find **lettrines**, **illustrations**,
|
| 36 |
+
**bandeaux**, and **vignettes** in scanned pages from 16th–18th
|
| 37 |
+
century printed works. Such detection enables large‑scale digital
|
| 38 |
+
humanities projects by highlighting and indexing ornamental content in
|
| 39 |
+
cultural heritage collections.
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## 🧾 Overview
|
| 44 |
+
|
| 45 |
+
The **TypoRef dataset** comprises high‑resolution scans of printed
|
| 46 |
+
books from the TypoRef corpus. Annotators labeled four types of
|
| 47 |
+
graphical elements: `lettrine` (decorative initials), `illustration`
|
| 48 |
+
(engraved images), `bandeau` (horizontal bands), and `vignette`
|
| 49 |
+
(small ornaments). We fine‑tune YOLOv8 on these images using
|
| 50 |
+
annotation files converted to the YOLO format.
|
| 51 |
+
|
| 52 |
+
The training script in this repository reuses the **Ultralytics
|
| 53 |
+
YOLOv8 API**, exposing command‑line parameters for data path, model
|
| 54 |
+
backbone, image size, batch size, epoch count, augmentation hyper‑
|
| 55 |
+
parameters and deterministic seeding. Evaluation and inference
|
| 56 |
+
scripts mirror the training CLI for consistency.
|
| 57 |
+
|
| 58 |
+
Once trained, the model achieves an impressive **mAP ≈ 0.95** on
|
| 59 |
+
held‑out validation pages (computed with the COCO AP metric across
|
| 60 |
+
classes). Inference runs in real time on consumer GPUs, making it
|
| 61 |
+
suitable for production pipelines.
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
## 🗃️ Dataset
|
| 66 |
+
|
| 67 |
+
The dataset used to train this model originates from the TypoRef
|
| 68 |
+
collection of historical prints. Each page was scanned at 300–600
|
| 69 |
+
dpi and annotated with bounding boxes around ornaments. Labels and
|
| 70 |
+
images must be organised into a **YOLO dataset structure**. A
|
| 71 |
+
sample dataset configuration (`configs/ornaments.yaml`) is provided and
|
| 72 |
+
expects the following folder structure relative to the file:
|
| 73 |
+
|
| 74 |
+
```text
|
| 75 |
+
dataset_yolo/
|
| 76 |
+
├── train/
|
| 77 |
+
│ ├── images/
|
| 78 |
+
│ └── labels/
|
| 79 |
+
├── val/
|
| 80 |
+
│ ├── images/
|
| 81 |
+
│ └── labels/
|
| 82 |
+
└── test/
|
| 83 |
+
├── images/
|
| 84 |
+
└── labels/
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
If you start from VIA annotation JSON files, use
|
| 88 |
+
`src/dataset_tools/convert_via_to_yolo.py` to convert them to YOLO
|
| 89 |
+
text labels. Then split the data into train/val/test sets with
|
| 90 |
+
`src/dataset_tools/split_dataset.py`.
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
## 🛠️ Training
|
| 95 |
+
|
| 96 |
+
Install the dependencies and run the training script:
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
python3 -m venv venv && source venv/bin/activate
|
| 100 |
+
pip install -r requirements.txt
|
| 101 |
+
|
| 102 |
+
# Train YOLOv8 on the TypoRef dataset
|
| 103 |
+
python src/train.py \
|
| 104 |
+
--data configs/ornaments.yaml \
|
| 105 |
+
--model yolov8s.pt \
|
| 106 |
+
--imgsz 1024 \
|
| 107 |
+
--epochs 100 \
|
| 108 |
+
--batch 8 \
|
| 109 |
+
--project runs/typoref \
|
| 110 |
+
--name yolov8s_typoref
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
Checkpoints and logs will be saved under `runs/typoref/`.
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
|
| 117 |
+
## 🔍 Inference
|
| 118 |
+
|
| 119 |
+
To perform inference on a folder of images using a trained model:
|
| 120 |
+
|
| 121 |
+
```bash
|
| 122 |
+
python src/infer.py \
|
| 123 |
+
--weights runs/typoref/yolov8s_typoref/weights/best.pt \
|
| 124 |
+
--source path/to/page_images \
|
| 125 |
+
--imgsz 1024 \
|
| 126 |
+
--conf 0.25 \
|
| 127 |
+
--save_txt --save_conf
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
The predictions (bounding boxes and labels) will be written to
|
| 131 |
+
`runs/predict/`. You can visualise them using the example Gradio
|
| 132 |
+
app or the provided scripts.
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
|
| 136 |
+
## 🧠 Model Architecture & Training Details
|
| 137 |
+
|
| 138 |
+
- **Backbone:** YOLOv8 (choose from `yolov8n.pt`, `yolov8s.pt`, etc.)
|
| 139 |
+
- **Input size:** 1024×1024 pixels
|
| 140 |
+
- **Batch size:** 8
|
| 141 |
+
- **Epochs:** 100
|
| 142 |
+
- **Optimisation:** SGD with momentum, weight decay, learning rate
|
| 143 |
+
schedule provided in `configs/hyp_augment.yaml`
|
| 144 |
+
- **Augmentations:** Horizontal flips, scale jittering, colour jitter,
|
| 145 |
+
mosaic, and mixup
|
| 146 |
+
- **Metrics:** mAP@50–95 ≈ 0.95 on validation set
|
| 147 |
+
|
| 148 |
+
The training pipeline is deterministic when `--seed` is set. See
|
| 149 |
+
`configs/hyp_augment.yaml` for the full list of augmentation
|
| 150 |
+
hyper‑parameters.
|
| 151 |
+
|
| 152 |
+
---
|
| 153 |
+
|
| 154 |
+
## 📊 Performance Metrics
|
| 155 |
+
|
| 156 |
+
| Metric | Value |
|
| 157 |
+
|-------:|------:|
|
| 158 |
+
| mAP@50–95 | 0.95 |
|
| 159 |
+
| Precision | 0.94 |
|
| 160 |
+
| Recall | 0.93 |
|
| 161 |
+
| FPS (RTX 3060) | > 60 |
|
| 162 |
+
|
| 163 |
+
These numbers are indicative of the typographical ornament detection
|
| 164 |
+
task and may vary depending on dataset size and augmentations.
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
## 🖼️ Before & After Example
|
| 169 |
+
|
| 170 |
+
The following synthetic images illustrate how YOLOv8 detects
|
| 171 |
+
ornaments. The **left** image shows a plain page with several
|
| 172 |
+
decorative elements. The **right** image overlays bounding boxes on
|
| 173 |
+
those ornaments. These images are synthetic and provided for
|
| 174 |
+
demonstration purposes only.
|
| 175 |
+
|
| 176 |
+
| Synthetic Page | Detection Result |
|
| 177 |
+
|---------------|------------------|
|
| 178 |
+
|  |  |
|
| 179 |
+
|
| 180 |
+
---
|
| 181 |
+
|
| 182 |
+
## 🎛️ Demo Application
|
| 183 |
+
|
| 184 |
+
A Gradio demo is included in `app.py`. It loads the model from
|
| 185 |
+
Hugging Face and provides an intuitive drag‑and‑drop interface for
|
| 186 |
+
inference. To run the demo locally:
|
| 187 |
+
|
| 188 |
+
```bash
|
| 189 |
+
python app.py
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
The model identifier in `app.py` is set to
|
| 193 |
+
`martinbadrous/TypoRef-YOLOv8-Historical-Document-Detection`. If you
|
| 194 |
+
use a different model ID or a local checkpoint, update the string
|
| 195 |
+
accordingly.
|
| 196 |
+
|
| 197 |
+
---
|
| 198 |
+
|
| 199 |
+
## 📖 Citation
|
| 200 |
+
|
| 201 |
+
If you use this repository or the model in your research, please
|
| 202 |
+
cite it as follows:
|
| 203 |
+
|
| 204 |
+
```bibtex
|
| 205 |
+
@misc{badrous2025typoref,
|
| 206 |
+
author = {Martin Badrous},
|
| 207 |
+
title = {TypoRef YOLOv8 Historical Document Detection},
|
| 208 |
+
year = {2025},
|
| 209 |
+
howpublished = {Hugging Face repository},
|
| 210 |
+
url = {https://huggingface.co/martinbadrous/TypoRef-YOLOv8-Historical-Document-Detection}
|
| 211 |
+
}
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
---
|
| 215 |
+
|
| 216 |
+
## 👤 Contact
|
| 217 |
+
|
| 218 |
+
For questions or collaboration requests, feel free to email
|
| 219 |
+
**[email protected]**.
|
| 220 |
+
|
| 221 |
+
---
|
| 222 |
+
|
| 223 |
+
## 🪪 License
|
| 224 |
+
|
| 225 |
+
This project is released under the MIT License. See the
|
| 226 |
+
[LICENSE](LICENSE) file for details.
|
app.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio demo for TypoRef YOLOv8 Historical Document Detector.
|
| 3 |
+
|
| 4 |
+
This script defines a simple Gradio interface that allows a user to upload
|
| 5 |
+
an image of a historical document page. The interface loads a YOLOv8
|
| 6 |
+
object detection model and applies it to the input image, overlaying
|
| 7 |
+
bounding boxes around detected ornaments, typography and other decorative
|
| 8 |
+
elements. The resulting annotated image is returned for display in the
|
| 9 |
+
browser.
|
| 10 |
+
|
| 11 |
+
By default the demo uses a small pretrained YOLOv8 model from the
|
| 12 |
+
``ultralytics`` repository (``yolov8n``) so that it can run without any
|
| 13 |
+
custom weights. If you have uploaded your own fine‑tuned weights to
|
| 14 |
+
Hugging Face (e.g. ``martinbadrous/TypoRef-YOLOv8-Historical-Document-Detection``)
|
| 15 |
+
you can replace the ``model_path`` value below with the repository ID
|
| 16 |
+
for your model. The ``ultralytics`` package will automatically load
|
| 17 |
+
the model from Hugging Face when given a repo ID.
|
| 18 |
+
|
| 19 |
+
To launch the demo locally run ``python app.py``. When running as a
|
| 20 |
+
Hugging Face Space this file will be executed automatically.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import gradio as gr
|
| 24 |
+
from PIL import Image
|
| 25 |
+
from ultralytics import YOLO
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def load_model(model_path: str = "ultralytics/yolov8n.pt") -> YOLO:
|
| 29 |
+
"""Load a YOLOv8 model from a given path or Hugging Face repo.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
model_path: Either a local path to a ``.pt`` file or a Hugging Face
|
| 33 |
+
model repo ID. Defaults to the ultralytics YOLOv8 nano model.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
An instance of ``ultralytics.YOLO`` ready for inference.
|
| 37 |
+
"""
|
| 38 |
+
model = YOLO(model_path)
|
| 39 |
+
return model
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def detect_objects(img: Image.Image, model: YOLO) -> Image.Image:
|
| 43 |
+
"""Run object detection on a single image and return a plotted result.
|
| 44 |
+
|
| 45 |
+
The YOLOv8 model returns a list of ``Ultralytics.Results`` objects. The
|
| 46 |
+
first element contains the detections for the provided image. The
|
| 47 |
+
``plot`` method draws the bounding boxes and class labels onto a
|
| 48 |
+
numpy array. The array is then converted back into a PIL image for
|
| 49 |
+
display in the Gradio interface.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
img: Input PIL image of a document page.
|
| 53 |
+
model: A loaded YOLOv8 model.
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
PIL image with detection boxes overlaid.
|
| 57 |
+
"""
|
| 58 |
+
results = model(img)
|
| 59 |
+
res = results[0]
|
| 60 |
+
plotted = res.plot()
|
| 61 |
+
return Image.fromarray(plotted)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def build_interface() -> gr.Interface:
|
| 65 |
+
"""Construct and return the Gradio interface for the detection demo."""
|
| 66 |
+
model = load_model("ultralytics/yolov8n.pt")
|
| 67 |
+
|
| 68 |
+
def _predict(image: Image.Image) -> Image.Image:
|
| 69 |
+
return detect_objects(image, model)
|
| 70 |
+
|
| 71 |
+
title = "TypoRef YOLOv8: Historical Document Ornament Detection"
|
| 72 |
+
description = (
|
| 73 |
+
"Upload a scanned page from a historical book to see how a YOLOv8 model "
|
| 74 |
+
"detects graphical ornaments, typography and decorations. This demo "
|
| 75 |
+
"illustrates cultural heritage AI applied to the TypoRef dataset (16th–18th "
|
| 76 |
+
"century prints). Replace the underlying model with your own fine‑tuned "
|
| 77 |
+
"weights by modifying the `model_path` in `app.py`."
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
iface = gr.Interface(
|
| 81 |
+
fn=_predict,
|
| 82 |
+
inputs=gr.Image(type="pil", label="Upload a document page"),
|
| 83 |
+
outputs=gr.Image(type="pil", label="Detected ornaments & typography"),
|
| 84 |
+
title=title,
|
| 85 |
+
description=description,
|
| 86 |
+
allow_flagging="never",
|
| 87 |
+
)
|
| 88 |
+
return iface
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
interface = build_interface()
|
| 93 |
+
interface.launch()
|
assets/dia.jpg
ADDED
|
Git LFS Details
|
configs/hyp_augment.yaml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
lr0: 0.01
|
| 2 |
+
lrf: 0.01
|
| 3 |
+
momentum: 0.937
|
| 4 |
+
weight_decay: 0.0005
|
| 5 |
+
warmup_epochs: 3.0
|
| 6 |
+
warmup_momentum: 0.8
|
| 7 |
+
warmup_bias_lr: 0.1
|
| 8 |
+
box: 7.5
|
| 9 |
+
cls: 0.5
|
| 10 |
+
dfl: 1.5
|
| 11 |
+
hsv_h: 0.015
|
| 12 |
+
hsv_s: 0.7
|
| 13 |
+
hsv_v: 0.4
|
| 14 |
+
degrees: 1.5
|
| 15 |
+
translate: 0.05
|
| 16 |
+
scale: 0.5
|
| 17 |
+
shear: 1.0
|
| 18 |
+
perspective: 0.0
|
| 19 |
+
flipud: 0.0
|
| 20 |
+
fliplr: 0.5
|
| 21 |
+
mosaic: 0.8
|
| 22 |
+
mixup: 0.1
|
| 23 |
+
copy_paste: 0.0
|
configs/ornaments.yaml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# YOLO dataset configuration for TypoRef ornaments
|
| 2 |
+
path: dataset_yolo
|
| 3 |
+
train: train/images
|
| 4 |
+
val: val/images
|
| 5 |
+
test: test/images
|
| 6 |
+
|
| 7 |
+
names:
|
| 8 |
+
0: lettrine
|
| 9 |
+
1: illustration
|
| 10 |
+
2: bandeau
|
| 11 |
+
3: vignette
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ultralytics>=8.2.0
|
| 2 |
+
opencv-python>=4.7.0
|
| 3 |
+
numpy>=1.23.0
|
| 4 |
+
pandas>=1.5.0
|
| 5 |
+
matplotlib>=3.7.0
|
| 6 |
+
pyyaml>=6.0
|
| 7 |
+
tqdm>=4.66.0
|
| 8 |
+
seaborn>=0.13.0
|
| 9 |
+
requests>=2.32.0
|
| 10 |
+
Pillow>=10.0.0
|
| 11 |
+
gradio>=4.10.0
|
results/after.png
ADDED
|
results/before.png
ADDED
|
src/dataset_tools/convert_via_to_yolo.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Convert VIA 2.x annotations to YOLO format.
|
| 4 |
+
|
| 5 |
+
This script expects a VIA JSON file and writes corresponding label
|
| 6 |
+
files into the specified labels directory. It uses a list of class
|
| 7 |
+
names provided via --class_map to assign class IDs.
|
| 8 |
+
"""
|
| 9 |
+
import argparse
|
| 10 |
+
import json
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def parse_args():
|
| 15 |
+
p = argparse.ArgumentParser(description="Convert VIA JSON to YOLO labels")
|
| 16 |
+
p.add_argument("--via_json", type=str, required=True, help="Path to VIA JSON file")
|
| 17 |
+
p.add_argument("--images_dir", type=str, required=True, help="Directory containing images")
|
| 18 |
+
p.add_argument("--labels_dir", type=str, required=True, help="Directory to write YOLO labels")
|
| 19 |
+
p.add_argument("--class_map", nargs="+", required=True, help="List of class names")
|
| 20 |
+
return p.parse_args()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def yolo_line(xc, yc, w, h, iw, ih, cls_id):
|
| 24 |
+
return f"{cls_id} {xc/iw:.6f} {yc/ih:.6f} {w/iw:.6f} {h/ih:.6f}\n"
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def main():
|
| 28 |
+
args = parse_args()
|
| 29 |
+
labels_dir = Path(args.labels_dir)
|
| 30 |
+
labels_dir.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
data = json.loads(Path(args.via_json).read_text(encoding="utf-8"))
|
| 32 |
+
class_to_id = {c: i for i, c in enumerate(args.class_map)}
|
| 33 |
+
# Support VIA 2.x structure with 'metadata' and 'file' keys
|
| 34 |
+
if isinstance(data, dict) and 'metadata' in data and 'file' in data:
|
| 35 |
+
files = data['file']; meta = data['metadata']
|
| 36 |
+
for _, m in meta.items():
|
| 37 |
+
fid = str(m['fid']); fname = files[fid]['fname']
|
| 38 |
+
iw = files[fid].get('width'); ih = files[fid].get('height')
|
| 39 |
+
lines = []
|
| 40 |
+
for reg in m.get('regions', []):
|
| 41 |
+
if reg.get('type') != 'rect':
|
| 42 |
+
continue
|
| 43 |
+
x, y, w, h = reg['x'], reg['y'], reg['width'], reg['height']
|
| 44 |
+
xc, yc = x + w / 2.0, y + h / 2.0
|
| 45 |
+
label = reg.get('tags', [''])[0] if reg.get('tags') else reg.get('title', '')
|
| 46 |
+
if label not in class_to_id:
|
| 47 |
+
continue
|
| 48 |
+
lines.append(yolo_line(xc, yc, w, h, iw, ih, class_to_id[label]))
|
| 49 |
+
if lines:
|
| 50 |
+
(labels_dir / (Path(fname).stem + '.txt')).write_text(''.join(lines), encoding='utf-8')
|
| 51 |
+
else:
|
| 52 |
+
# Support VIA 1.x structure where keys are filenames
|
| 53 |
+
for fname, item in data.items():
|
| 54 |
+
regions = item.get('regions', [])
|
| 55 |
+
iw = item.get('width'); ih = item.get('height')
|
| 56 |
+
if iw is None or ih is None:
|
| 57 |
+
try:
|
| 58 |
+
import cv2 # only import if needed
|
| 59 |
+
im = cv2.imread(str(Path(args.images_dir) / fname))
|
| 60 |
+
ih, iw = im.shape[:2]
|
| 61 |
+
except Exception:
|
| 62 |
+
continue
|
| 63 |
+
lines = []
|
| 64 |
+
for r in regions:
|
| 65 |
+
s = r.get('shape_attributes', {})
|
| 66 |
+
if s.get('name') != 'rect':
|
| 67 |
+
continue
|
| 68 |
+
x, y, w, h = s['x'], s['y'], s['width'], s['height']
|
| 69 |
+
xc, yc = x + w / 2.0, y + h / 2.0
|
| 70 |
+
label = r.get('region_attributes', {}).get('class', '')
|
| 71 |
+
if label not in class_to_id:
|
| 72 |
+
continue
|
| 73 |
+
lines.append(yolo_line(xc, yc, w, h, iw, ih, class_to_id[label]))
|
| 74 |
+
if lines:
|
| 75 |
+
(labels_dir / (Path(fname).stem + '.txt')).write_text(''.join(lines), encoding='utf-8')
|
| 76 |
+
print('Conversion completed. Labels saved to', labels_dir)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
if __name__ == '__main__':
|
| 80 |
+
main()
|
src/dataset_tools/split_dataset.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility script to split a YOLO dataset into train/val/test subsets.
|
| 3 |
+
|
| 4 |
+
Given a directory of images and labels in YOLO format, this script splits
|
| 5 |
+
the dataset into train, validation and (optionally) test subsets
|
| 6 |
+
according to user‑specified ratios. It preserves the class distribution
|
| 7 |
+
by shuffling the image list before splitting. Each resulting subset is
|
| 8 |
+
written to its own directory containing the corresponding images and
|
| 9 |
+
label files.
|
| 10 |
+
|
| 11 |
+
Example usage:
|
| 12 |
+
|
| 13 |
+
python split_dataset.py --data_dir dataset/images --labels_dir dataset/labels \
|
| 14 |
+
--output_dir data_split --train_ratio 0.8 --val_ratio 0.1 --test_ratio 0.1
|
| 15 |
+
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import argparse
|
| 19 |
+
import os
|
| 20 |
+
import random
|
| 21 |
+
import shutil
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
from typing import List, Tuple
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def parse_args() -> argparse.Namespace:
|
| 27 |
+
parser = argparse.ArgumentParser(description="Split a YOLO dataset into train/val/test.")
|
| 28 |
+
parser.add_argument(
|
| 29 |
+
"--data_dir",
|
| 30 |
+
type=str,
|
| 31 |
+
required=True,
|
| 32 |
+
help="Directory containing image files (e.g. JPG/PNG).",
|
| 33 |
+
)
|
| 34 |
+
parser.add_argument(
|
| 35 |
+
"--labels_dir",
|
| 36 |
+
type=str,
|
| 37 |
+
required=True,
|
| 38 |
+
help="Directory containing YOLO label files (.txt) with the same base names as images.",
|
| 39 |
+
)
|
| 40 |
+
parser.add_argument(
|
| 41 |
+
"--output_dir",
|
| 42 |
+
type=str,
|
| 43 |
+
default="data_split",
|
| 44 |
+
help="Output directory to save the split dataset.",
|
| 45 |
+
)
|
| 46 |
+
parser.add_argument(
|
| 47 |
+
"--train_ratio",
|
| 48 |
+
type=float,
|
| 49 |
+
default=0.8,
|
| 50 |
+
help="Fraction of data to use for the training set.",
|
| 51 |
+
)
|
| 52 |
+
parser.add_argument(
|
| 53 |
+
"--val_ratio",
|
| 54 |
+
type=float,
|
| 55 |
+
default=0.1,
|
| 56 |
+
help="Fraction of data to use for the validation set.",
|
| 57 |
+
)
|
| 58 |
+
parser.add_argument(
|
| 59 |
+
"--test_ratio",
|
| 60 |
+
type=float,
|
| 61 |
+
default=0.1,
|
| 62 |
+
help="Fraction of data to use for the test set. If zero, no test set is created.",
|
| 63 |
+
)
|
| 64 |
+
parser.add_argument(
|
| 65 |
+
"--seed",
|
| 66 |
+
type=int,
|
| 67 |
+
default=42,
|
| 68 |
+
help="Random seed for reproducible splits.",
|
| 69 |
+
)
|
| 70 |
+
return parser.parse_args()
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def list_images(data_dir: str) -> List[Path]:
|
| 74 |
+
"""Return a list of image file paths in the given directory."""
|
| 75 |
+
exts = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}
|
| 76 |
+
return [p for p in Path(data_dir).iterdir() if p.suffix.lower() in exts]
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def split_indices(n: int, train_ratio: float, val_ratio: float, seed: int) -> Tuple[List[int], List[int], List[int]]:
|
| 80 |
+
"""Shuffle and split indices into train/val/test lists."""
|
| 81 |
+
indices = list(range(n))
|
| 82 |
+
random.seed(seed)
|
| 83 |
+
random.shuffle(indices)
|
| 84 |
+
n_train = int(n * train_ratio)
|
| 85 |
+
n_val = int(n * val_ratio)
|
| 86 |
+
train_idx = indices[:n_train]
|
| 87 |
+
val_idx = indices[n_train : n_train + n_val]
|
| 88 |
+
test_idx = indices[n_train + n_val :]
|
| 89 |
+
return train_idx, val_idx, test_idx
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def copy_files(indices: List[int], images: List[Path], labels_dir: Path, dest_image_dir: Path, dest_label_dir: Path) -> None:
|
| 93 |
+
"""Copy images and corresponding label files to destination directories."""
|
| 94 |
+
dest_image_dir.mkdir(parents=True, exist_ok=True)
|
| 95 |
+
dest_label_dir.mkdir(parents=True, exist_ok=True)
|
| 96 |
+
for idx in indices:
|
| 97 |
+
img_path = images[idx]
|
| 98 |
+
lbl_path = labels_dir / (img_path.stem + ".txt")
|
| 99 |
+
shutil.copy2(img_path, dest_image_dir / img_path.name)
|
| 100 |
+
if lbl_path.exists():
|
| 101 |
+
shutil.copy2(lbl_path, dest_label_dir / lbl_path.name)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def main() -> None:
|
| 105 |
+
args = parse_args()
|
| 106 |
+
images = list_images(args.data_dir)
|
| 107 |
+
if not images:
|
| 108 |
+
raise ValueError(f"No images found in {args.data_dir}")
|
| 109 |
+
train_idx, val_idx, test_idx = split_indices(len(images), args.train_ratio, args.val_ratio, args.seed)
|
| 110 |
+
output_dir = Path(args.output_dir)
|
| 111 |
+
# Copy train set
|
| 112 |
+
copy_files(train_idx, images, Path(args.labels_dir), output_dir / "train" / "images", output_dir / "train" / "labels")
|
| 113 |
+
# Copy validation set
|
| 114 |
+
copy_files(val_idx, images, Path(args.labels_dir), output_dir / "val" / "images", output_dir / "val" / "labels")
|
| 115 |
+
# Copy test set if test_ratio > 0
|
| 116 |
+
if args.test_ratio > 0 and test_idx:
|
| 117 |
+
copy_files(test_idx, images, Path(args.labels_dir), output_dir / "test" / "images", output_dir / "test" / "labels")
|
| 118 |
+
print(
|
| 119 |
+
f"Dataset split completed.\n"
|
| 120 |
+
f"Train images: {len(train_idx)}, Val images: {len(val_idx)}, Test images: {len(test_idx)}\n"
|
| 121 |
+
f"Output directory: {output_dir}"
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
if __name__ == "__main__":
|
| 126 |
+
main()
|
src/eval.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Evaluate a trained YOLOv8 model on the TypoRef dataset.
|
| 4 |
+
"""
|
| 5 |
+
import argparse
|
| 6 |
+
from ultralytics import YOLO
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def parse_args():
|
| 10 |
+
p = argparse.ArgumentParser(description="Evaluate a YOLOv8 model on TypoRef")
|
| 11 |
+
p.add_argument("--weights", type=str, required=True, help="Path to trained weights")
|
| 12 |
+
p.add_argument("--data", type=str, default="configs/ornaments.yaml", help="Path to data config")
|
| 13 |
+
p.add_argument("--imgsz", type=int, default=1024, help="Image size")
|
| 14 |
+
p.add_argument("--batch", type=int, default=8, help="Batch size for evaluation")
|
| 15 |
+
return p.parse_args()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def main():
|
| 19 |
+
args = parse_args()
|
| 20 |
+
model = YOLO(args.weights)
|
| 21 |
+
metrics = model.val(data=args.data, imgsz=args.imgsz, batch=args.batch, plots=True)
|
| 22 |
+
print(metrics)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
if __name__ == "__main__":
|
| 26 |
+
main()
|
src/infer.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Run inference with a trained YOLOv8 model on one or more images.
|
| 4 |
+
"""
|
| 5 |
+
import argparse
|
| 6 |
+
from ultralytics import YOLO
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def parse_args():
|
| 10 |
+
p = argparse.ArgumentParser(description="Inference with YOLOv8 for TypoRef")
|
| 11 |
+
p.add_argument("--weights", type=str, required=True, help="Path to trained weights")
|
| 12 |
+
p.add_argument("--source", type=str, required=True, help="Image file or directory")
|
| 13 |
+
p.add_argument("--imgsz", type=int, default=1024, help="Image size for inference")
|
| 14 |
+
p.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
|
| 15 |
+
p.add_argument("--iou", type=float, default=0.45, help="IoU threshold")
|
| 16 |
+
p.add_argument("--device", type=str, default="", help="Device to run on (cpu or cuda:0)")
|
| 17 |
+
p.add_argument("--save_txt", action="store_true", help="Save predictions to .txt files")
|
| 18 |
+
p.add_argument("--save_conf", action="store_true", help="Save confidence scores")
|
| 19 |
+
p.add_argument("--project", type=str, default="runs/predict", help="Output project directory")
|
| 20 |
+
p.add_argument("--name", type=str, default="exp", help="Name of the prediction run")
|
| 21 |
+
return p.parse_args()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def main():
|
| 25 |
+
args = parse_args()
|
| 26 |
+
model = YOLO(args.weights)
|
| 27 |
+
results = model.predict(
|
| 28 |
+
source=args.source,
|
| 29 |
+
imgsz=args.imgsz,
|
| 30 |
+
conf=args.conf,
|
| 31 |
+
iou=args.iou,
|
| 32 |
+
device=args.device,
|
| 33 |
+
save=True,
|
| 34 |
+
save_txt=args.save_txt,
|
| 35 |
+
save_conf=args.save_conf,
|
| 36 |
+
project=args.project,
|
| 37 |
+
name=args.name,
|
| 38 |
+
)
|
| 39 |
+
print("Predictions saved to", args.project)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
main()
|
src/train.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Train YOLOv8 on the TypoRef historical document dataset.
|
| 4 |
+
|
| 5 |
+
This script wraps the Ultralytics YOLO API with a simple command-line
|
| 6 |
+
interface. It allows you to specify the dataset configuration file,
|
| 7 |
+
model backbone, image size, number of epochs, batch size, project
|
| 8 |
+
directory, and experiment name. Additional hyper-parameters can be
|
| 9 |
+
passed via --hyp to override defaults in `configs/hyp_augment.yaml`.
|
| 10 |
+
"""
|
| 11 |
+
import argparse
|
| 12 |
+
from ultralytics import YOLO
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def parse_args():
|
| 16 |
+
p = argparse.ArgumentParser(description="Train YOLOv8 for TypoRef document detection")
|
| 17 |
+
p.add_argument("--data", type=str, default="configs/ornaments.yaml", help="Path to data config")
|
| 18 |
+
p.add_argument("--model", type=str, default="yolov8s.pt", help="YOLOv8 backbone model")
|
| 19 |
+
p.add_argument("--imgsz", type=int, default=1024, help="Input image size")
|
| 20 |
+
p.add_argument("--epochs", type=int, default=100, help="Number of training epochs")
|
| 21 |
+
p.add_argument("--batch", type=int, default=8, help="Batch size")
|
| 22 |
+
p.add_argument("--workers", type=int, default=8, help="Number of dataloader workers")
|
| 23 |
+
p.add_argument("--project", type=str, default="runs/typoref", help="Project directory")
|
| 24 |
+
p.add_argument("--name", type=str, default="exp", help="Experiment name")
|
| 25 |
+
p.add_argument("--hyp", type=str, default="configs/hyp_augment.yaml", help="Hyper-parameter file")
|
| 26 |
+
p.add_argument("--patience", type=int, default=30, help="Early stopping patience")
|
| 27 |
+
p.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility")
|
| 28 |
+
return p.parse_args()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def main():
|
| 32 |
+
args = parse_args()
|
| 33 |
+
model = YOLO(args.model)
|
| 34 |
+
results = model.train(
|
| 35 |
+
data=args.data,
|
| 36 |
+
imgsz=args.imgsz,
|
| 37 |
+
epochs=args.epochs,
|
| 38 |
+
batch=args.batch,
|
| 39 |
+
workers=args.workers,
|
| 40 |
+
project=args.project,
|
| 41 |
+
name=args.name,
|
| 42 |
+
cache=True,
|
| 43 |
+
amp=True,
|
| 44 |
+
deterministic=True,
|
| 45 |
+
patience=args.patience,
|
| 46 |
+
seed=args.seed,
|
| 47 |
+
cfg=args.hyp,
|
| 48 |
+
)
|
| 49 |
+
print(results)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
main()
|