Spaces:
Sleeping
Sleeping
Delete utils.py
Browse files
utils.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
from datasets import load_metric
|
| 3 |
-
from PIL import ImageDraw, ImageFont
|
| 4 |
-
import pandas as pd
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
metric = load_metric("seqeval")
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
def unnormalize_box(bbox, width, height):
|
| 11 |
-
return [
|
| 12 |
-
width * (bbox[0] / 1000),
|
| 13 |
-
height * (bbox[1] / 1000),
|
| 14 |
-
width * (bbox[2] / 1000),
|
| 15 |
-
height * (bbox[3] / 1000)
|
| 16 |
-
]
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def normalize_box(bbox, width, height):
|
| 20 |
-
return [
|
| 21 |
-
int((bbox[0] / width) * 1000),
|
| 22 |
-
int((bbox[1] / height) * 1000),
|
| 23 |
-
int((bbox[2] / width) * 1000),
|
| 24 |
-
int((bbox[3] / height) * 1000)
|
| 25 |
-
]
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def draw_output(image, true_predictions, true_boxes):
|
| 29 |
-
def iob_to_label(label):
|
| 30 |
-
label = label
|
| 31 |
-
if not label:
|
| 32 |
-
return 'other'
|
| 33 |
-
return label
|
| 34 |
-
|
| 35 |
-
# width, height = image.size
|
| 36 |
-
|
| 37 |
-
# predictions = logits.argmax(-1).squeeze().tolist()
|
| 38 |
-
# is_subword = np.array(offset_mapping)[:,0] != 0
|
| 39 |
-
# true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
|
| 40 |
-
# true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
|
| 41 |
-
|
| 42 |
-
# draw
|
| 43 |
-
draw = ImageDraw.Draw(image)
|
| 44 |
-
font = ImageFont.load_default()
|
| 45 |
-
|
| 46 |
-
for prediction, box in zip(true_predictions, true_boxes):
|
| 47 |
-
predicted_label = iob_to_label(prediction).lower()
|
| 48 |
-
draw.rectangle(box, outline='red')
|
| 49 |
-
draw.text((box[0] + 10, box[1] - 10),
|
| 50 |
-
text=predicted_label, fill='red', font=font)
|
| 51 |
-
|
| 52 |
-
return image
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def create_df(true_texts,
|
| 56 |
-
true_predictions,
|
| 57 |
-
chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
|
| 58 |
-
'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
|
| 59 |
-
'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
|
| 60 |
-
'RECEMONEY', 'REMAMONEY',
|
| 61 |
-
'BILLID', 'DATETIME', 'CASHIER']
|
| 62 |
-
):
|
| 63 |
-
|
| 64 |
-
data = {'text': [], 'class_label': [], 'product_id': []}
|
| 65 |
-
product_id = -1
|
| 66 |
-
for text, prediction in zip(true_texts, true_predictions):
|
| 67 |
-
if prediction not in chosen_labels:
|
| 68 |
-
continue
|
| 69 |
-
|
| 70 |
-
if prediction == 'PRODUCT_NAME':
|
| 71 |
-
product_id += 1
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
|
| 75 |
-
'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
|
| 76 |
-
'RECEMONEY', 'REMAMONEY']:
|
| 77 |
-
text = reformat(text)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
|
| 81 |
-
data['product_id'].append(product_id)
|
| 82 |
-
else:
|
| 83 |
-
data['product_id'].append('')
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
data['class_label'].append(prediction)
|
| 87 |
-
data['text'].append(text)
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
df = pd.DataFrame(data)
|
| 91 |
-
|
| 92 |
-
return df
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
def reformat(text: str):
|
| 96 |
-
try:
|
| 97 |
-
text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace(
|
| 98 |
-
'\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
|
| 99 |
-
return int(text)
|
| 100 |
-
except:
|
| 101 |
-
return text
|
| 102 |
-
|
| 103 |
-
def find_product(product_name, df):
|
| 104 |
-
product_name = product_name.lower()
|
| 105 |
-
product_df = df[df['class_label'] == 'PRODUCT_NAME']
|
| 106 |
-
mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
|
| 107 |
-
if mask.any():
|
| 108 |
-
product_id = product_df.loc[mask, 'product_id'].iloc[0]
|
| 109 |
-
product_info = df[df['product_id'] == product_id]
|
| 110 |
-
|
| 111 |
-
prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]
|
| 112 |
-
|
| 113 |
-
try:
|
| 114 |
-
amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
|
| 115 |
-
except:
|
| 116 |
-
print("Error: cannot find amount")
|
| 117 |
-
amount = ''
|
| 118 |
-
|
| 119 |
-
try:
|
| 120 |
-
uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
|
| 121 |
-
except:
|
| 122 |
-
print("Error: cannot find unit price")
|
| 123 |
-
uprice = ''
|
| 124 |
-
|
| 125 |
-
try:
|
| 126 |
-
sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
|
| 127 |
-
except:
|
| 128 |
-
print("Error: cannot find sub total price")
|
| 129 |
-
sub_tprice = ''
|
| 130 |
-
|
| 131 |
-
#print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
|
| 132 |
-
#print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
|
| 133 |
-
#print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
|
| 134 |
-
#print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
|
| 135 |
-
return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
|
| 136 |
-
else:
|
| 137 |
-
#print("Không tìm thấy item nào phù hợp.")
|
| 138 |
-
return "Không tìm thấy item nào phù hợp."
|
| 139 |
-
#return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
|
| 140 |
-
#return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|