Spaces:
Sleeping
Sleeping
Create reading_order.py
Browse files- reading_order.py +106 -0
reading_order.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
class OrderPolygons:
|
| 4 |
+
def __init__(self, text_direction = 'lr'):
|
| 5 |
+
self.text_direction = text_direction
|
| 6 |
+
|
| 7 |
+
# Defines whether two lines overlap vertically
|
| 8 |
+
def _y_overlaps(self, u, v):
|
| 9 |
+
#u_y_min < v_y_max and u_y_max > v_y_min
|
| 10 |
+
return u[3] < v[2] and u[2] > v[3]
|
| 11 |
+
|
| 12 |
+
# Defines whether two lines overlap horizontally
|
| 13 |
+
def _x_overlaps(self, u, v):
|
| 14 |
+
#u_x_min < v_x_max and u_x_max > v_x_min
|
| 15 |
+
return u[1] < v[0] and u[0] > v[1]
|
| 16 |
+
|
| 17 |
+
# Defines whether one line (u) is above the other (v)
|
| 18 |
+
def _above(self, u, v):
|
| 19 |
+
#u_y_min < v_y_min
|
| 20 |
+
return u[3] < v[3]
|
| 21 |
+
|
| 22 |
+
# Defines whether one line (u) is left of the other (v)
|
| 23 |
+
def _left_of(self, u, v):
|
| 24 |
+
#u_x_max < v_x_min
|
| 25 |
+
return u[0] < v[1]
|
| 26 |
+
|
| 27 |
+
# Defines whether one line (w) overlaps with two others (u,v)
|
| 28 |
+
def _separates(self, w, u, v):
|
| 29 |
+
if w == u or w == v:
|
| 30 |
+
return 0
|
| 31 |
+
#w_y_max < (min(u_y_min, v_y_min))
|
| 32 |
+
if w[2] < min(u[3], v[3]):
|
| 33 |
+
return 0
|
| 34 |
+
#w_y_min > max(u_y_max, v_y_max)
|
| 35 |
+
if w[3] > max(u[2], v[2]):
|
| 36 |
+
return 0
|
| 37 |
+
#w_x_min < u_x_max and w_x_max > v_x_min
|
| 38 |
+
if w[1] < u[0] and w[0] > v[1]:
|
| 39 |
+
return 1
|
| 40 |
+
return 0
|
| 41 |
+
|
| 42 |
+
# Slightly modified version of the Kraken implementation at
|
| 43 |
+
# https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
|
| 44 |
+
def reading_order(self, lines):
|
| 45 |
+
"""Given the list of lines, computes
|
| 46 |
+
the partial reading order. The output is a binary 2D array
|
| 47 |
+
such that order[i,j] is true if line i comes before line j
|
| 48 |
+
in reading order."""
|
| 49 |
+
# Input lines are arrays with 4 polygon coordinates:
|
| 50 |
+
# 0=x_right/x_max, 1=x_left/x_min, 2=y_down/y_max, 3=y_up/y_min
|
| 51 |
+
|
| 52 |
+
# Array where the order of precedence between the lines is defined
|
| 53 |
+
order = np.zeros((len(lines), len(lines)), 'B')
|
| 54 |
+
|
| 55 |
+
# Defines reading direction: default is from left to right
|
| 56 |
+
if self.text_direction == 'rl':
|
| 57 |
+
def horizontal_order(u, v):
|
| 58 |
+
return not self._left_of(u, v)
|
| 59 |
+
else:
|
| 60 |
+
horizontal_order = self._left_of
|
| 61 |
+
|
| 62 |
+
for i, u in enumerate(lines):
|
| 63 |
+
for j, v in enumerate(lines):
|
| 64 |
+
if self._x_overlaps(u, v):
|
| 65 |
+
if self._above(u, v):
|
| 66 |
+
# line u is placed before line v in reading order
|
| 67 |
+
order[i, j] = 1
|
| 68 |
+
else:
|
| 69 |
+
|
| 70 |
+
if [w for w in lines if self._separates(w, u, v)] == []:
|
| 71 |
+
if horizontal_order(u, v):
|
| 72 |
+
order[i, j] = 1
|
| 73 |
+
elif self._y_overlaps(u, v) and horizontal_order(u, v):
|
| 74 |
+
order[i, j] = 1
|
| 75 |
+
|
| 76 |
+
return order
|
| 77 |
+
|
| 78 |
+
# Taken from the Kraken implementation at
|
| 79 |
+
# https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
|
| 80 |
+
def topsort(self, order):
|
| 81 |
+
"""Given a binary array defining a partial order (o[i,j]==True means i<j),
|
| 82 |
+
compute a topological sort. This is a quick and dirty implementation
|
| 83 |
+
that works for up to a few thousand elements."""
|
| 84 |
+
|
| 85 |
+
n = len(order)
|
| 86 |
+
visited = np.zeros(n)
|
| 87 |
+
L = []
|
| 88 |
+
|
| 89 |
+
def _visit(k):
|
| 90 |
+
if visited[k]:
|
| 91 |
+
return
|
| 92 |
+
visited[k] = 1
|
| 93 |
+
a, = np.nonzero(np.ravel(order[:, k]))
|
| 94 |
+
for line in a:
|
| 95 |
+
_visit(line)
|
| 96 |
+
L.append(k)
|
| 97 |
+
|
| 98 |
+
for k in range(n):
|
| 99 |
+
_visit(k)
|
| 100 |
+
return L
|
| 101 |
+
|
| 102 |
+
def order(self, lines):
|
| 103 |
+
order = self.reading_order(lines)
|
| 104 |
+
sorted = self.topsort(order)
|
| 105 |
+
|
| 106 |
+
return sorted
|