Upload folder using huggingface_hub
Browse files- best-model.pt +3 -0
- dev.tsv +0 -0
- final-model.pt +3 -0
- loss.tsv +11 -0
- runs/events.out.tfevents.1697037532.c8b2203b18a8.1914.1 +3 -0
- test.tsv +0 -0
- training.log +262 -0
best-model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbc27b63e798005eca77116e5897e73c51421d24a34dd99116f0603150fab60f
|
| 3 |
+
size 870793839
|
dev.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
final-model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34d51ea279dcdebcc07cdbf7c5de267da63b6d3a4bae8ca2c82c69e897b63f90
|
| 3 |
+
size 870793956
|
loss.tsv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
| 2 |
+
1 15:26:04 0.0002 1.1462 0.2301 0.0000 0.0000 0.0000 0.0000
|
| 3 |
+
2 15:33:11 0.0001 0.1361 0.1178 0.7028 0.7376 0.7198 0.5805
|
| 4 |
+
3 15:40:23 0.0001 0.0826 0.0953 0.8369 0.7211 0.7747 0.6439
|
| 5 |
+
4 15:48:19 0.0001 0.0528 0.0786 0.8614 0.8151 0.8376 0.7367
|
| 6 |
+
5 15:55:23 0.0001 0.0363 0.0809 0.8510 0.8440 0.8475 0.7502
|
| 7 |
+
6 16:02:46 0.0001 0.0281 0.0903 0.8772 0.8337 0.8549 0.7606
|
| 8 |
+
7 16:10:14 0.0001 0.0218 0.1077 0.8772 0.8264 0.8511 0.7540
|
| 9 |
+
8 16:18:18 0.0000 0.0176 0.1181 0.8662 0.8295 0.8475 0.7484
|
| 10 |
+
9 16:25:52 0.0000 0.0141 0.1251 0.8625 0.8357 0.8489 0.7505
|
| 11 |
+
10 16:33:16 0.0000 0.0123 0.1305 0.8601 0.8192 0.8392 0.7356
|
runs/events.out.tfevents.1697037532.c8b2203b18a8.1914.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4674d8f872727f7b9942284b23c60b16172550880ddbefe9e2e825f8e70b7e23
|
| 3 |
+
size 407048
|
test.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training.log
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2023-10-11 15:18:52,762 ----------------------------------------------------------------------------------------------------
|
| 2 |
+
2023-10-11 15:18:52,764 Model: "SequenceTagger(
|
| 3 |
+
(embeddings): ByT5Embeddings(
|
| 4 |
+
(model): T5EncoderModel(
|
| 5 |
+
(shared): Embedding(384, 1472)
|
| 6 |
+
(encoder): T5Stack(
|
| 7 |
+
(embed_tokens): Embedding(384, 1472)
|
| 8 |
+
(block): ModuleList(
|
| 9 |
+
(0): T5Block(
|
| 10 |
+
(layer): ModuleList(
|
| 11 |
+
(0): T5LayerSelfAttention(
|
| 12 |
+
(SelfAttention): T5Attention(
|
| 13 |
+
(q): Linear(in_features=1472, out_features=384, bias=False)
|
| 14 |
+
(k): Linear(in_features=1472, out_features=384, bias=False)
|
| 15 |
+
(v): Linear(in_features=1472, out_features=384, bias=False)
|
| 16 |
+
(o): Linear(in_features=384, out_features=1472, bias=False)
|
| 17 |
+
(relative_attention_bias): Embedding(32, 6)
|
| 18 |
+
)
|
| 19 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
| 20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 21 |
+
)
|
| 22 |
+
(1): T5LayerFF(
|
| 23 |
+
(DenseReluDense): T5DenseGatedActDense(
|
| 24 |
+
(wi_0): Linear(in_features=1472, out_features=3584, bias=False)
|
| 25 |
+
(wi_1): Linear(in_features=1472, out_features=3584, bias=False)
|
| 26 |
+
(wo): Linear(in_features=3584, out_features=1472, bias=False)
|
| 27 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 28 |
+
(act): NewGELUActivation()
|
| 29 |
+
)
|
| 30 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
| 31 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 32 |
+
)
|
| 33 |
+
)
|
| 34 |
+
)
|
| 35 |
+
(1-11): 11 x T5Block(
|
| 36 |
+
(layer): ModuleList(
|
| 37 |
+
(0): T5LayerSelfAttention(
|
| 38 |
+
(SelfAttention): T5Attention(
|
| 39 |
+
(q): Linear(in_features=1472, out_features=384, bias=False)
|
| 40 |
+
(k): Linear(in_features=1472, out_features=384, bias=False)
|
| 41 |
+
(v): Linear(in_features=1472, out_features=384, bias=False)
|
| 42 |
+
(o): Linear(in_features=384, out_features=1472, bias=False)
|
| 43 |
+
)
|
| 44 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
| 45 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 46 |
+
)
|
| 47 |
+
(1): T5LayerFF(
|
| 48 |
+
(DenseReluDense): T5DenseGatedActDense(
|
| 49 |
+
(wi_0): Linear(in_features=1472, out_features=3584, bias=False)
|
| 50 |
+
(wi_1): Linear(in_features=1472, out_features=3584, bias=False)
|
| 51 |
+
(wo): Linear(in_features=3584, out_features=1472, bias=False)
|
| 52 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 53 |
+
(act): NewGELUActivation()
|
| 54 |
+
)
|
| 55 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
| 56 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 57 |
+
)
|
| 58 |
+
)
|
| 59 |
+
)
|
| 60 |
+
)
|
| 61 |
+
(final_layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
| 62 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
| 63 |
+
)
|
| 64 |
+
)
|
| 65 |
+
)
|
| 66 |
+
(locked_dropout): LockedDropout(p=0.5)
|
| 67 |
+
(linear): Linear(in_features=1472, out_features=13, bias=True)
|
| 68 |
+
(loss_function): CrossEntropyLoss()
|
| 69 |
+
)"
|
| 70 |
+
2023-10-11 15:18:52,764 ----------------------------------------------------------------------------------------------------
|
| 71 |
+
2023-10-11 15:18:52,764 MultiCorpus: 5777 train + 722 dev + 723 test sentences
|
| 72 |
+
- NER_ICDAR_EUROPEANA Corpus: 5777 train + 722 dev + 723 test sentences - /root/.flair/datasets/ner_icdar_europeana/nl
|
| 73 |
+
2023-10-11 15:18:52,764 ----------------------------------------------------------------------------------------------------
|
| 74 |
+
2023-10-11 15:18:52,764 Train: 5777 sentences
|
| 75 |
+
2023-10-11 15:18:52,765 (train_with_dev=False, train_with_test=False)
|
| 76 |
+
2023-10-11 15:18:52,765 ----------------------------------------------------------------------------------------------------
|
| 77 |
+
2023-10-11 15:18:52,765 Training Params:
|
| 78 |
+
2023-10-11 15:18:52,765 - learning_rate: "0.00016"
|
| 79 |
+
2023-10-11 15:18:52,765 - mini_batch_size: "8"
|
| 80 |
+
2023-10-11 15:18:52,765 - max_epochs: "10"
|
| 81 |
+
2023-10-11 15:18:52,765 - shuffle: "True"
|
| 82 |
+
2023-10-11 15:18:52,765 ----------------------------------------------------------------------------------------------------
|
| 83 |
+
2023-10-11 15:18:52,765 Plugins:
|
| 84 |
+
2023-10-11 15:18:52,765 - TensorboardLogger
|
| 85 |
+
2023-10-11 15:18:52,765 - LinearScheduler | warmup_fraction: '0.1'
|
| 86 |
+
2023-10-11 15:18:52,765 ----------------------------------------------------------------------------------------------------
|
| 87 |
+
2023-10-11 15:18:52,765 Final evaluation on model from best epoch (best-model.pt)
|
| 88 |
+
2023-10-11 15:18:52,765 - metric: "('micro avg', 'f1-score')"
|
| 89 |
+
2023-10-11 15:18:52,765 ----------------------------------------------------------------------------------------------------
|
| 90 |
+
2023-10-11 15:18:52,766 Computation:
|
| 91 |
+
2023-10-11 15:18:52,766 - compute on device: cuda:0
|
| 92 |
+
2023-10-11 15:18:52,766 - embedding storage: none
|
| 93 |
+
2023-10-11 15:18:52,766 ----------------------------------------------------------------------------------------------------
|
| 94 |
+
2023-10-11 15:18:52,766 Model training base path: "hmbench-icdar/nl-hmbyt5-preliminary/byt5-small-historic-multilingual-span20-flax-bs8-wsFalse-e10-lr0.00016-poolingfirst-layers-1-crfFalse-1"
|
| 95 |
+
2023-10-11 15:18:52,766 ----------------------------------------------------------------------------------------------------
|
| 96 |
+
2023-10-11 15:18:52,766 ----------------------------------------------------------------------------------------------------
|
| 97 |
+
2023-10-11 15:18:52,766 Logging anything other than scalars to TensorBoard is currently not supported.
|
| 98 |
+
2023-10-11 15:19:33,021 epoch 1 - iter 72/723 - loss 2.58640498 - time (sec): 40.25 - samples/sec: 437.63 - lr: 0.000016 - momentum: 0.000000
|
| 99 |
+
2023-10-11 15:20:15,240 epoch 1 - iter 144/723 - loss 2.54575181 - time (sec): 82.47 - samples/sec: 450.66 - lr: 0.000032 - momentum: 0.000000
|
| 100 |
+
2023-10-11 15:20:55,763 epoch 1 - iter 216/723 - loss 2.39945434 - time (sec): 122.99 - samples/sec: 435.33 - lr: 0.000048 - momentum: 0.000000
|
| 101 |
+
2023-10-11 15:21:37,028 epoch 1 - iter 288/723 - loss 2.17892498 - time (sec): 164.26 - samples/sec: 435.94 - lr: 0.000064 - momentum: 0.000000
|
| 102 |
+
2023-10-11 15:22:20,557 epoch 1 - iter 360/723 - loss 1.92804078 - time (sec): 207.79 - samples/sec: 438.73 - lr: 0.000079 - momentum: 0.000000
|
| 103 |
+
2023-10-11 15:23:02,576 epoch 1 - iter 432/723 - loss 1.71210703 - time (sec): 249.81 - samples/sec: 432.91 - lr: 0.000095 - momentum: 0.000000
|
| 104 |
+
2023-10-11 15:23:45,206 epoch 1 - iter 504/723 - loss 1.52388016 - time (sec): 292.44 - samples/sec: 429.48 - lr: 0.000111 - momentum: 0.000000
|
| 105 |
+
2023-10-11 15:24:25,404 epoch 1 - iter 576/723 - loss 1.37250291 - time (sec): 332.64 - samples/sec: 428.17 - lr: 0.000127 - momentum: 0.000000
|
| 106 |
+
2023-10-11 15:25:04,451 epoch 1 - iter 648/723 - loss 1.25612134 - time (sec): 371.68 - samples/sec: 425.47 - lr: 0.000143 - momentum: 0.000000
|
| 107 |
+
2023-10-11 15:25:44,336 epoch 1 - iter 720/723 - loss 1.14790086 - time (sec): 411.57 - samples/sec: 427.20 - lr: 0.000159 - momentum: 0.000000
|
| 108 |
+
2023-10-11 15:25:45,493 ----------------------------------------------------------------------------------------------------
|
| 109 |
+
2023-10-11 15:25:45,494 EPOCH 1 done: loss 1.1462 - lr: 0.000159
|
| 110 |
+
2023-10-11 15:26:04,954 DEV : loss 0.23013751208782196 - f1-score (micro avg) 0.0
|
| 111 |
+
2023-10-11 15:26:04,983 ----------------------------------------------------------------------------------------------------
|
| 112 |
+
2023-10-11 15:26:44,924 epoch 2 - iter 72/723 - loss 0.18779348 - time (sec): 39.94 - samples/sec: 451.51 - lr: 0.000158 - momentum: 0.000000
|
| 113 |
+
2023-10-11 15:27:25,506 epoch 2 - iter 144/723 - loss 0.16703311 - time (sec): 80.52 - samples/sec: 441.10 - lr: 0.000156 - momentum: 0.000000
|
| 114 |
+
2023-10-11 15:28:05,904 epoch 2 - iter 216/723 - loss 0.16733631 - time (sec): 120.92 - samples/sec: 427.45 - lr: 0.000155 - momentum: 0.000000
|
| 115 |
+
2023-10-11 15:28:47,010 epoch 2 - iter 288/723 - loss 0.16076280 - time (sec): 162.02 - samples/sec: 428.06 - lr: 0.000153 - momentum: 0.000000
|
| 116 |
+
2023-10-11 15:29:27,132 epoch 2 - iter 360/723 - loss 0.15822813 - time (sec): 202.15 - samples/sec: 432.35 - lr: 0.000151 - momentum: 0.000000
|
| 117 |
+
2023-10-11 15:30:06,139 epoch 2 - iter 432/723 - loss 0.15494629 - time (sec): 241.15 - samples/sec: 434.81 - lr: 0.000149 - momentum: 0.000000
|
| 118 |
+
2023-10-11 15:30:46,534 epoch 2 - iter 504/723 - loss 0.14781553 - time (sec): 281.55 - samples/sec: 437.97 - lr: 0.000148 - momentum: 0.000000
|
| 119 |
+
2023-10-11 15:31:28,218 epoch 2 - iter 576/723 - loss 0.14089350 - time (sec): 323.23 - samples/sec: 442.36 - lr: 0.000146 - momentum: 0.000000
|
| 120 |
+
2023-10-11 15:32:07,820 epoch 2 - iter 648/723 - loss 0.13806927 - time (sec): 362.83 - samples/sec: 438.46 - lr: 0.000144 - momentum: 0.000000
|
| 121 |
+
2023-10-11 15:32:47,840 epoch 2 - iter 720/723 - loss 0.13620972 - time (sec): 402.86 - samples/sec: 435.58 - lr: 0.000142 - momentum: 0.000000
|
| 122 |
+
2023-10-11 15:32:49,348 ----------------------------------------------------------------------------------------------------
|
| 123 |
+
2023-10-11 15:32:49,349 EPOCH 2 done: loss 0.1361 - lr: 0.000142
|
| 124 |
+
2023-10-11 15:33:11,468 DEV : loss 0.11784520745277405 - f1-score (micro avg) 0.7198
|
| 125 |
+
2023-10-11 15:33:11,498 saving best model
|
| 126 |
+
2023-10-11 15:33:12,500 ----------------------------------------------------------------------------------------------------
|
| 127 |
+
2023-10-11 15:33:56,445 epoch 3 - iter 72/723 - loss 0.08434424 - time (sec): 43.94 - samples/sec: 393.04 - lr: 0.000140 - momentum: 0.000000
|
| 128 |
+
2023-10-11 15:34:38,669 epoch 3 - iter 144/723 - loss 0.09138352 - time (sec): 86.17 - samples/sec: 399.52 - lr: 0.000139 - momentum: 0.000000
|
| 129 |
+
2023-10-11 15:35:17,265 epoch 3 - iter 216/723 - loss 0.08550606 - time (sec): 124.76 - samples/sec: 416.98 - lr: 0.000137 - momentum: 0.000000
|
| 130 |
+
2023-10-11 15:35:57,111 epoch 3 - iter 288/723 - loss 0.09254066 - time (sec): 164.61 - samples/sec: 417.05 - lr: 0.000135 - momentum: 0.000000
|
| 131 |
+
2023-10-11 15:36:38,099 epoch 3 - iter 360/723 - loss 0.09054215 - time (sec): 205.60 - samples/sec: 424.30 - lr: 0.000133 - momentum: 0.000000
|
| 132 |
+
2023-10-11 15:37:16,378 epoch 3 - iter 432/723 - loss 0.09029129 - time (sec): 243.88 - samples/sec: 426.52 - lr: 0.000132 - momentum: 0.000000
|
| 133 |
+
2023-10-11 15:37:56,089 epoch 3 - iter 504/723 - loss 0.08953230 - time (sec): 283.59 - samples/sec: 428.48 - lr: 0.000130 - momentum: 0.000000
|
| 134 |
+
2023-10-11 15:38:36,917 epoch 3 - iter 576/723 - loss 0.08628376 - time (sec): 324.42 - samples/sec: 430.75 - lr: 0.000128 - momentum: 0.000000
|
| 135 |
+
2023-10-11 15:39:16,306 epoch 3 - iter 648/723 - loss 0.08375845 - time (sec): 363.80 - samples/sec: 434.46 - lr: 0.000126 - momentum: 0.000000
|
| 136 |
+
2023-10-11 15:39:58,736 epoch 3 - iter 720/723 - loss 0.08263566 - time (sec): 406.23 - samples/sec: 432.38 - lr: 0.000125 - momentum: 0.000000
|
| 137 |
+
2023-10-11 15:40:00,098 ----------------------------------------------------------------------------------------------------
|
| 138 |
+
2023-10-11 15:40:00,099 EPOCH 3 done: loss 0.0826 - lr: 0.000125
|
| 139 |
+
2023-10-11 15:40:23,121 DEV : loss 0.09526825696229935 - f1-score (micro avg) 0.7747
|
| 140 |
+
2023-10-11 15:40:23,153 saving best model
|
| 141 |
+
2023-10-11 15:40:25,742 ----------------------------------------------------------------------------------------------------
|
| 142 |
+
2023-10-11 15:41:11,486 epoch 4 - iter 72/723 - loss 0.07098217 - time (sec): 45.74 - samples/sec: 395.35 - lr: 0.000123 - momentum: 0.000000
|
| 143 |
+
2023-10-11 15:41:56,161 epoch 4 - iter 144/723 - loss 0.06244011 - time (sec): 90.41 - samples/sec: 387.90 - lr: 0.000121 - momentum: 0.000000
|
| 144 |
+
2023-10-11 15:42:41,898 epoch 4 - iter 216/723 - loss 0.05892834 - time (sec): 136.15 - samples/sec: 388.41 - lr: 0.000119 - momentum: 0.000000
|
| 145 |
+
2023-10-11 15:43:27,740 epoch 4 - iter 288/723 - loss 0.05706105 - time (sec): 181.99 - samples/sec: 386.02 - lr: 0.000117 - momentum: 0.000000
|
| 146 |
+
2023-10-11 15:44:13,629 epoch 4 - iter 360/723 - loss 0.05540965 - time (sec): 227.88 - samples/sec: 382.82 - lr: 0.000116 - momentum: 0.000000
|
| 147 |
+
2023-10-11 15:44:57,779 epoch 4 - iter 432/723 - loss 0.05519746 - time (sec): 272.03 - samples/sec: 381.49 - lr: 0.000114 - momentum: 0.000000
|
| 148 |
+
2023-10-11 15:45:42,220 epoch 4 - iter 504/723 - loss 0.05511683 - time (sec): 316.47 - samples/sec: 382.27 - lr: 0.000112 - momentum: 0.000000
|
| 149 |
+
2023-10-11 15:46:28,521 epoch 4 - iter 576/723 - loss 0.05612426 - time (sec): 362.77 - samples/sec: 384.07 - lr: 0.000110 - momentum: 0.000000
|
| 150 |
+
2023-10-11 15:47:13,618 epoch 4 - iter 648/723 - loss 0.05489093 - time (sec): 407.87 - samples/sec: 385.40 - lr: 0.000109 - momentum: 0.000000
|
| 151 |
+
2023-10-11 15:47:57,365 epoch 4 - iter 720/723 - loss 0.05280893 - time (sec): 451.62 - samples/sec: 389.22 - lr: 0.000107 - momentum: 0.000000
|
| 152 |
+
2023-10-11 15:47:58,594 ----------------------------------------------------------------------------------------------------
|
| 153 |
+
2023-10-11 15:47:58,594 EPOCH 4 done: loss 0.0528 - lr: 0.000107
|
| 154 |
+
2023-10-11 15:48:19,766 DEV : loss 0.07857168465852737 - f1-score (micro avg) 0.8376
|
| 155 |
+
2023-10-11 15:48:19,797 saving best model
|
| 156 |
+
2023-10-11 15:48:22,416 ----------------------------------------------------------------------------------------------------
|
| 157 |
+
2023-10-11 15:49:02,536 epoch 5 - iter 72/723 - loss 0.02925552 - time (sec): 40.12 - samples/sec: 445.20 - lr: 0.000105 - momentum: 0.000000
|
| 158 |
+
2023-10-11 15:49:40,160 epoch 5 - iter 144/723 - loss 0.03139574 - time (sec): 77.74 - samples/sec: 444.68 - lr: 0.000103 - momentum: 0.000000
|
| 159 |
+
2023-10-11 15:50:18,818 epoch 5 - iter 216/723 - loss 0.03596320 - time (sec): 116.40 - samples/sec: 441.32 - lr: 0.000101 - momentum: 0.000000
|
| 160 |
+
2023-10-11 15:50:58,827 epoch 5 - iter 288/723 - loss 0.03461253 - time (sec): 156.41 - samples/sec: 442.52 - lr: 0.000100 - momentum: 0.000000
|
| 161 |
+
2023-10-11 15:51:39,726 epoch 5 - iter 360/723 - loss 0.03830367 - time (sec): 197.31 - samples/sec: 445.19 - lr: 0.000098 - momentum: 0.000000
|
| 162 |
+
2023-10-11 15:52:18,234 epoch 5 - iter 432/723 - loss 0.03612181 - time (sec): 235.81 - samples/sec: 442.09 - lr: 0.000096 - momentum: 0.000000
|
| 163 |
+
2023-10-11 15:52:57,653 epoch 5 - iter 504/723 - loss 0.03611999 - time (sec): 275.23 - samples/sec: 442.66 - lr: 0.000094 - momentum: 0.000000
|
| 164 |
+
2023-10-11 15:53:39,398 epoch 5 - iter 576/723 - loss 0.03635599 - time (sec): 316.98 - samples/sec: 443.94 - lr: 0.000093 - momentum: 0.000000
|
| 165 |
+
2023-10-11 15:54:18,572 epoch 5 - iter 648/723 - loss 0.03657235 - time (sec): 356.15 - samples/sec: 442.26 - lr: 0.000091 - momentum: 0.000000
|
| 166 |
+
2023-10-11 15:54:59,785 epoch 5 - iter 720/723 - loss 0.03624887 - time (sec): 397.36 - samples/sec: 442.18 - lr: 0.000089 - momentum: 0.000000
|
| 167 |
+
2023-10-11 15:55:01,035 ----------------------------------------------------------------------------------------------------
|
| 168 |
+
2023-10-11 15:55:01,036 EPOCH 5 done: loss 0.0363 - lr: 0.000089
|
| 169 |
+
2023-10-11 15:55:23,163 DEV : loss 0.08089756220579147 - f1-score (micro avg) 0.8475
|
| 170 |
+
2023-10-11 15:55:23,201 saving best model
|
| 171 |
+
2023-10-11 15:55:25,804 ----------------------------------------------------------------------------------------------------
|
| 172 |
+
2023-10-11 15:56:06,123 epoch 6 - iter 72/723 - loss 0.02940130 - time (sec): 40.31 - samples/sec: 417.22 - lr: 0.000087 - momentum: 0.000000
|
| 173 |
+
2023-10-11 15:56:47,163 epoch 6 - iter 144/723 - loss 0.03149005 - time (sec): 81.35 - samples/sec: 418.21 - lr: 0.000085 - momentum: 0.000000
|
| 174 |
+
2023-10-11 15:57:29,010 epoch 6 - iter 216/723 - loss 0.03196977 - time (sec): 123.20 - samples/sec: 421.16 - lr: 0.000084 - momentum: 0.000000
|
| 175 |
+
2023-10-11 15:58:11,799 epoch 6 - iter 288/723 - loss 0.02911791 - time (sec): 165.99 - samples/sec: 421.81 - lr: 0.000082 - momentum: 0.000000
|
| 176 |
+
2023-10-11 15:58:56,449 epoch 6 - iter 360/723 - loss 0.02913213 - time (sec): 210.64 - samples/sec: 419.65 - lr: 0.000080 - momentum: 0.000000
|
| 177 |
+
2023-10-11 15:59:41,504 epoch 6 - iter 432/723 - loss 0.02849520 - time (sec): 255.70 - samples/sec: 412.23 - lr: 0.000078 - momentum: 0.000000
|
| 178 |
+
2023-10-11 16:00:22,165 epoch 6 - iter 504/723 - loss 0.02767609 - time (sec): 296.36 - samples/sec: 412.30 - lr: 0.000077 - momentum: 0.000000
|
| 179 |
+
2023-10-11 16:01:04,601 epoch 6 - iter 576/723 - loss 0.02699882 - time (sec): 338.79 - samples/sec: 412.62 - lr: 0.000075 - momentum: 0.000000
|
| 180 |
+
2023-10-11 16:01:45,681 epoch 6 - iter 648/723 - loss 0.02774246 - time (sec): 379.87 - samples/sec: 419.60 - lr: 0.000073 - momentum: 0.000000
|
| 181 |
+
2023-10-11 16:02:24,287 epoch 6 - iter 720/723 - loss 0.02783453 - time (sec): 418.48 - samples/sec: 420.01 - lr: 0.000071 - momentum: 0.000000
|
| 182 |
+
2023-10-11 16:02:25,435 ----------------------------------------------------------------------------------------------------
|
| 183 |
+
2023-10-11 16:02:25,436 EPOCH 6 done: loss 0.0281 - lr: 0.000071
|
| 184 |
+
2023-10-11 16:02:46,632 DEV : loss 0.09028322994709015 - f1-score (micro avg) 0.8549
|
| 185 |
+
2023-10-11 16:02:46,667 saving best model
|
| 186 |
+
2023-10-11 16:02:49,267 ----------------------------------------------------------------------------------------------------
|
| 187 |
+
2023-10-11 16:03:30,122 epoch 7 - iter 72/723 - loss 0.02328474 - time (sec): 40.85 - samples/sec: 438.26 - lr: 0.000069 - momentum: 0.000000
|
| 188 |
+
2023-10-11 16:04:09,577 epoch 7 - iter 144/723 - loss 0.01939381 - time (sec): 80.31 - samples/sec: 434.55 - lr: 0.000068 - momentum: 0.000000
|
| 189 |
+
2023-10-11 16:04:49,205 epoch 7 - iter 216/723 - loss 0.01823087 - time (sec): 119.93 - samples/sec: 432.07 - lr: 0.000066 - momentum: 0.000000
|
| 190 |
+
2023-10-11 16:05:30,859 epoch 7 - iter 288/723 - loss 0.01970687 - time (sec): 161.59 - samples/sec: 426.14 - lr: 0.000064 - momentum: 0.000000
|
| 191 |
+
2023-10-11 16:06:11,413 epoch 7 - iter 360/723 - loss 0.02102339 - time (sec): 202.14 - samples/sec: 427.93 - lr: 0.000062 - momentum: 0.000000
|
| 192 |
+
2023-10-11 16:06:56,523 epoch 7 - iter 432/723 - loss 0.02184920 - time (sec): 247.25 - samples/sec: 422.29 - lr: 0.000061 - momentum: 0.000000
|
| 193 |
+
2023-10-11 16:07:39,113 epoch 7 - iter 504/723 - loss 0.02240161 - time (sec): 289.84 - samples/sec: 422.63 - lr: 0.000059 - momentum: 0.000000
|
| 194 |
+
2023-10-11 16:08:19,982 epoch 7 - iter 576/723 - loss 0.02113173 - time (sec): 330.71 - samples/sec: 422.97 - lr: 0.000057 - momentum: 0.000000
|
| 195 |
+
2023-10-11 16:09:02,449 epoch 7 - iter 648/723 - loss 0.02209098 - time (sec): 373.18 - samples/sec: 421.17 - lr: 0.000055 - momentum: 0.000000
|
| 196 |
+
2023-10-11 16:09:48,752 epoch 7 - iter 720/723 - loss 0.02183950 - time (sec): 419.48 - samples/sec: 418.39 - lr: 0.000053 - momentum: 0.000000
|
| 197 |
+
2023-10-11 16:09:50,406 ----------------------------------------------------------------------------------------------------
|
| 198 |
+
2023-10-11 16:09:50,406 EPOCH 7 done: loss 0.0218 - lr: 0.000053
|
| 199 |
+
2023-10-11 16:10:14,848 DEV : loss 0.1077359989285469 - f1-score (micro avg) 0.8511
|
| 200 |
+
2023-10-11 16:10:14,891 ----------------------------------------------------------------------------------------------------
|
| 201 |
+
2023-10-11 16:11:04,173 epoch 8 - iter 72/723 - loss 0.01776157 - time (sec): 49.28 - samples/sec: 380.87 - lr: 0.000052 - momentum: 0.000000
|
| 202 |
+
2023-10-11 16:11:49,030 epoch 8 - iter 144/723 - loss 0.01607038 - time (sec): 94.14 - samples/sec: 382.08 - lr: 0.000050 - momentum: 0.000000
|
| 203 |
+
2023-10-11 16:12:35,654 epoch 8 - iter 216/723 - loss 0.01527344 - time (sec): 140.76 - samples/sec: 376.84 - lr: 0.000048 - momentum: 0.000000
|
| 204 |
+
2023-10-11 16:13:21,937 epoch 8 - iter 288/723 - loss 0.01465974 - time (sec): 187.04 - samples/sec: 373.55 - lr: 0.000046 - momentum: 0.000000
|
| 205 |
+
2023-10-11 16:14:08,463 epoch 8 - iter 360/723 - loss 0.01544379 - time (sec): 233.57 - samples/sec: 376.05 - lr: 0.000045 - momentum: 0.000000
|
| 206 |
+
2023-10-11 16:14:53,901 epoch 8 - iter 432/723 - loss 0.01555183 - time (sec): 279.01 - samples/sec: 380.63 - lr: 0.000043 - momentum: 0.000000
|
| 207 |
+
2023-10-11 16:15:38,074 epoch 8 - iter 504/723 - loss 0.01639593 - time (sec): 323.18 - samples/sec: 384.22 - lr: 0.000041 - momentum: 0.000000
|
| 208 |
+
2023-10-11 16:16:23,453 epoch 8 - iter 576/723 - loss 0.01765329 - time (sec): 368.56 - samples/sec: 386.50 - lr: 0.000039 - momentum: 0.000000
|
| 209 |
+
2023-10-11 16:17:07,576 epoch 8 - iter 648/723 - loss 0.01757496 - time (sec): 412.68 - samples/sec: 383.80 - lr: 0.000037 - momentum: 0.000000
|
| 210 |
+
2023-10-11 16:17:52,827 epoch 8 - iter 720/723 - loss 0.01756128 - time (sec): 457.93 - samples/sec: 383.62 - lr: 0.000036 - momentum: 0.000000
|
| 211 |
+
2023-10-11 16:17:54,262 ----------------------------------------------------------------------------------------------------
|
| 212 |
+
2023-10-11 16:17:54,262 EPOCH 8 done: loss 0.0176 - lr: 0.000036
|
| 213 |
+
2023-10-11 16:18:18,043 DEV : loss 0.11811362206935883 - f1-score (micro avg) 0.8475
|
| 214 |
+
2023-10-11 16:18:18,077 ----------------------------------------------------------------------------------------------------
|
| 215 |
+
2023-10-11 16:19:02,582 epoch 9 - iter 72/723 - loss 0.02085559 - time (sec): 44.50 - samples/sec: 421.75 - lr: 0.000034 - momentum: 0.000000
|
| 216 |
+
2023-10-11 16:19:44,728 epoch 9 - iter 144/723 - loss 0.01556397 - time (sec): 86.65 - samples/sec: 407.11 - lr: 0.000032 - momentum: 0.000000
|
| 217 |
+
2023-10-11 16:20:29,081 epoch 9 - iter 216/723 - loss 0.01455073 - time (sec): 131.00 - samples/sec: 411.44 - lr: 0.000030 - momentum: 0.000000
|
| 218 |
+
2023-10-11 16:21:15,015 epoch 9 - iter 288/723 - loss 0.01377233 - time (sec): 176.94 - samples/sec: 403.57 - lr: 0.000028 - momentum: 0.000000
|
| 219 |
+
2023-10-11 16:21:59,624 epoch 9 - iter 360/723 - loss 0.01410060 - time (sec): 221.54 - samples/sec: 400.43 - lr: 0.000027 - momentum: 0.000000
|
| 220 |
+
2023-10-11 16:22:39,825 epoch 9 - iter 432/723 - loss 0.01447392 - time (sec): 261.75 - samples/sec: 406.36 - lr: 0.000025 - momentum: 0.000000
|
| 221 |
+
2023-10-11 16:23:22,295 epoch 9 - iter 504/723 - loss 0.01453168 - time (sec): 304.22 - samples/sec: 410.26 - lr: 0.000023 - momentum: 0.000000
|
| 222 |
+
2023-10-11 16:24:03,364 epoch 9 - iter 576/723 - loss 0.01468837 - time (sec): 345.29 - samples/sec: 409.78 - lr: 0.000021 - momentum: 0.000000
|
| 223 |
+
2023-10-11 16:24:47,724 epoch 9 - iter 648/723 - loss 0.01396882 - time (sec): 389.65 - samples/sec: 406.74 - lr: 0.000020 - momentum: 0.000000
|
| 224 |
+
2023-10-11 16:25:30,313 epoch 9 - iter 720/723 - loss 0.01410103 - time (sec): 432.23 - samples/sec: 406.78 - lr: 0.000018 - momentum: 0.000000
|
| 225 |
+
2023-10-11 16:25:31,403 ----------------------------------------------------------------------------------------------------
|
| 226 |
+
2023-10-11 16:25:31,403 EPOCH 9 done: loss 0.0141 - lr: 0.000018
|
| 227 |
+
2023-10-11 16:25:52,109 DEV : loss 0.12514764070510864 - f1-score (micro avg) 0.8489
|
| 228 |
+
2023-10-11 16:25:52,145 ----------------------------------------------------------------------------------------------------
|
| 229 |
+
2023-10-11 16:26:32,572 epoch 10 - iter 72/723 - loss 0.00691167 - time (sec): 40.43 - samples/sec: 414.99 - lr: 0.000016 - momentum: 0.000000
|
| 230 |
+
2023-10-11 16:27:18,626 epoch 10 - iter 144/723 - loss 0.01193292 - time (sec): 86.48 - samples/sec: 407.69 - lr: 0.000014 - momentum: 0.000000
|
| 231 |
+
2023-10-11 16:28:03,587 epoch 10 - iter 216/723 - loss 0.01238731 - time (sec): 131.44 - samples/sec: 398.85 - lr: 0.000012 - momentum: 0.000000
|
| 232 |
+
2023-10-11 16:28:47,658 epoch 10 - iter 288/723 - loss 0.01210096 - time (sec): 175.51 - samples/sec: 394.39 - lr: 0.000011 - momentum: 0.000000
|
| 233 |
+
2023-10-11 16:29:30,706 epoch 10 - iter 360/723 - loss 0.01176008 - time (sec): 218.56 - samples/sec: 401.34 - lr: 0.000009 - momentum: 0.000000
|
| 234 |
+
2023-10-11 16:30:14,701 epoch 10 - iter 432/723 - loss 0.01133392 - time (sec): 262.55 - samples/sec: 403.84 - lr: 0.000007 - momentum: 0.000000
|
| 235 |
+
2023-10-11 16:30:53,960 epoch 10 - iter 504/723 - loss 0.01234490 - time (sec): 301.81 - samples/sec: 411.19 - lr: 0.000005 - momentum: 0.000000
|
| 236 |
+
2023-10-11 16:31:35,278 epoch 10 - iter 576/723 - loss 0.01209169 - time (sec): 343.13 - samples/sec: 410.10 - lr: 0.000004 - momentum: 0.000000
|
| 237 |
+
2023-10-11 16:32:15,815 epoch 10 - iter 648/723 - loss 0.01228765 - time (sec): 383.67 - samples/sec: 412.84 - lr: 0.000002 - momentum: 0.000000
|
| 238 |
+
2023-10-11 16:32:54,288 epoch 10 - iter 720/723 - loss 0.01234272 - time (sec): 422.14 - samples/sec: 415.95 - lr: 0.000000 - momentum: 0.000000
|
| 239 |
+
2023-10-11 16:32:55,551 ----------------------------------------------------------------------------------------------------
|
| 240 |
+
2023-10-11 16:32:55,552 EPOCH 10 done: loss 0.0123 - lr: 0.000000
|
| 241 |
+
2023-10-11 16:33:16,205 DEV : loss 0.13051000237464905 - f1-score (micro avg) 0.8392
|
| 242 |
+
2023-10-11 16:33:17,164 ----------------------------------------------------------------------------------------------------
|
| 243 |
+
2023-10-11 16:33:17,166 Loading model from best epoch ...
|
| 244 |
+
2023-10-11 16:33:21,267 SequenceTagger predicts: Dictionary with 13 tags: O, S-LOC, B-LOC, E-LOC, I-LOC, S-PER, B-PER, E-PER, I-PER, S-ORG, B-ORG, E-ORG, I-ORG
|
| 245 |
+
2023-10-11 16:33:42,968
|
| 246 |
+
Results:
|
| 247 |
+
- F-score (micro) 0.8506
|
| 248 |
+
- F-score (macro) 0.7523
|
| 249 |
+
- Accuracy 0.7511
|
| 250 |
+
|
| 251 |
+
By class:
|
| 252 |
+
precision recall f1-score support
|
| 253 |
+
|
| 254 |
+
PER 0.8497 0.8444 0.8470 482
|
| 255 |
+
LOC 0.9109 0.8930 0.9019 458
|
| 256 |
+
ORG 0.5614 0.4638 0.5079 69
|
| 257 |
+
|
| 258 |
+
micro avg 0.8609 0.8404 0.8506 1009
|
| 259 |
+
macro avg 0.7740 0.7337 0.7523 1009
|
| 260 |
+
weighted avg 0.8578 0.8404 0.8487 1009
|
| 261 |
+
|
| 262 |
+
2023-10-11 16:33:42,969 ----------------------------------------------------------------------------------------------------
|