Model save
Browse files- README.md +107 -22
- model.safetensors +1 -1
README.md
CHANGED
|
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 14 |
|
| 15 |
This model was trained from scratch on an unknown dataset.
|
| 16 |
It achieves the following results on the evaluation set:
|
| 17 |
-
- Loss:
|
| 18 |
|
| 19 |
## Model description
|
| 20 |
|
|
@@ -46,27 +46,112 @@ The following hyperparameters were used during training:
|
|
| 46 |
|
| 47 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 48 |
|:-------------:|:------:|:-----:|:---------------:|
|
| 49 |
-
|
|
| 50 |
-
|
|
| 51 |
-
|
|
| 52 |
-
|
|
| 53 |
-
|
|
| 54 |
-
|
|
| 55 |
-
| 3.
|
| 56 |
-
| 3.
|
| 57 |
-
| 3.
|
| 58 |
-
| 3.
|
| 59 |
-
| 3.
|
| 60 |
-
| 3.
|
| 61 |
-
| 3.
|
| 62 |
-
| 3.
|
| 63 |
-
| 3.
|
| 64 |
-
| 3.
|
| 65 |
-
| 3.
|
| 66 |
-
| 3.
|
| 67 |
-
| 3.
|
| 68 |
-
| 3.
|
| 69 |
-
| 3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
### Framework versions
|
|
|
|
| 14 |
|
| 15 |
This model was trained from scratch on an unknown dataset.
|
| 16 |
It achieves the following results on the evaluation set:
|
| 17 |
+
- Loss: 2.8779
|
| 18 |
|
| 19 |
## Model description
|
| 20 |
|
|
|
|
| 46 |
|
| 47 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 48 |
|:-------------:|:------:|:-----:|:---------------:|
|
| 49 |
+
| 6.6923 | 0.0281 | 500 | 6.6275 |
|
| 50 |
+
| 5.7396 | 0.0561 | 1000 | 5.7512 |
|
| 51 |
+
| 5.3007 | 0.0842 | 1500 | 5.2374 |
|
| 52 |
+
| 4.6605 | 0.1123 | 2000 | 4.6288 |
|
| 53 |
+
| 4.2878 | 0.1403 | 2500 | 4.2100 |
|
| 54 |
+
| 4.0088 | 0.1684 | 3000 | 3.9680 |
|
| 55 |
+
| 3.8332 | 0.1965 | 3500 | 3.8331 |
|
| 56 |
+
| 3.7552 | 0.2246 | 4000 | 3.7197 |
|
| 57 |
+
| 3.6096 | 0.2526 | 4500 | 3.6527 |
|
| 58 |
+
| 3.6151 | 0.2807 | 5000 | 3.5885 |
|
| 59 |
+
| 3.4574 | 0.3088 | 5500 | 3.5467 |
|
| 60 |
+
| 3.4561 | 0.3368 | 6000 | 3.4924 |
|
| 61 |
+
| 3.3655 | 0.3649 | 6500 | 3.4318 |
|
| 62 |
+
| 3.4202 | 0.3930 | 7000 | 3.3960 |
|
| 63 |
+
| 3.3726 | 0.4210 | 7500 | 3.3732 |
|
| 64 |
+
| 3.3313 | 0.4491 | 8000 | 3.3337 |
|
| 65 |
+
| 3.3411 | 0.4772 | 8500 | 3.3040 |
|
| 66 |
+
| 3.3544 | 0.5052 | 9000 | 3.2786 |
|
| 67 |
+
| 3.2502 | 0.5333 | 9500 | 3.2705 |
|
| 68 |
+
| 3.2733 | 0.5614 | 10000 | 3.2517 |
|
| 69 |
+
| 3.2467 | 0.5895 | 10500 | 3.2253 |
|
| 70 |
+
| 3.2516 | 0.6175 | 11000 | 3.2078 |
|
| 71 |
+
| 3.1775 | 0.6456 | 11500 | 3.1942 |
|
| 72 |
+
| 3.207 | 0.6737 | 12000 | 3.1785 |
|
| 73 |
+
| 3.1331 | 0.7017 | 12500 | 3.1725 |
|
| 74 |
+
| 3.1398 | 0.7298 | 13000 | 3.1542 |
|
| 75 |
+
| 3.1569 | 0.7579 | 13500 | 3.1462 |
|
| 76 |
+
| 3.148 | 0.7859 | 14000 | 3.1280 |
|
| 77 |
+
| 3.1547 | 0.8140 | 14500 | 3.1201 |
|
| 78 |
+
| 3.1583 | 0.8421 | 15000 | 3.1070 |
|
| 79 |
+
| 3.103 | 0.8702 | 15500 | 3.0974 |
|
| 80 |
+
| 3.0527 | 0.8982 | 16000 | 3.0862 |
|
| 81 |
+
| 3.0612 | 0.9263 | 16500 | 3.0764 |
|
| 82 |
+
| 3.0707 | 0.9544 | 17000 | 3.0663 |
|
| 83 |
+
| 3.0632 | 0.9824 | 17500 | 3.0607 |
|
| 84 |
+
| 3.0317 | 1.0105 | 18000 | 3.0491 |
|
| 85 |
+
| 3.0108 | 1.0386 | 18500 | 3.0479 |
|
| 86 |
+
| 3.035 | 1.0666 | 19000 | 3.0415 |
|
| 87 |
+
| 3.0164 | 1.0947 | 19500 | 3.0324 |
|
| 88 |
+
| 2.9707 | 1.1228 | 20000 | 3.0250 |
|
| 89 |
+
| 3.0121 | 1.1508 | 20500 | 3.0211 |
|
| 90 |
+
| 2.9887 | 1.1789 | 21000 | 3.0135 |
|
| 91 |
+
| 2.9933 | 1.2070 | 21500 | 3.0050 |
|
| 92 |
+
| 2.9535 | 1.2351 | 22000 | 3.0005 |
|
| 93 |
+
| 2.9651 | 1.2631 | 22500 | 2.9931 |
|
| 94 |
+
| 2.9965 | 1.2912 | 23000 | 2.9875 |
|
| 95 |
+
| 2.9886 | 1.3193 | 23500 | 2.9819 |
|
| 96 |
+
| 2.92 | 1.3473 | 24000 | 2.9752 |
|
| 97 |
+
| 2.9263 | 1.3754 | 24500 | 2.9717 |
|
| 98 |
+
| 2.8707 | 1.4035 | 25000 | 2.9691 |
|
| 99 |
+
| 2.923 | 1.4315 | 25500 | 2.9627 |
|
| 100 |
+
| 2.9615 | 1.4596 | 26000 | 2.9555 |
|
| 101 |
+
| 2.9154 | 1.4877 | 26500 | 2.9518 |
|
| 102 |
+
| 2.9112 | 1.5157 | 27000 | 2.9481 |
|
| 103 |
+
| 2.9033 | 1.5438 | 27500 | 2.9433 |
|
| 104 |
+
| 2.9711 | 1.5719 | 28000 | 2.9379 |
|
| 105 |
+
| 2.8926 | 1.6000 | 28500 | 2.9344 |
|
| 106 |
+
| 2.9006 | 1.6280 | 29000 | 2.9301 |
|
| 107 |
+
| 2.9529 | 1.6561 | 29500 | 2.9263 |
|
| 108 |
+
| 2.8718 | 1.6842 | 30000 | 2.9223 |
|
| 109 |
+
| 2.8989 | 1.7122 | 30500 | 2.9188 |
|
| 110 |
+
| 2.9101 | 1.7403 | 31000 | 2.9149 |
|
| 111 |
+
| 2.9219 | 1.7684 | 31500 | 2.9120 |
|
| 112 |
+
| 2.9225 | 1.7964 | 32000 | 2.9082 |
|
| 113 |
+
| 2.8855 | 1.8245 | 32500 | 2.9058 |
|
| 114 |
+
| 2.8643 | 1.8526 | 33000 | 2.9026 |
|
| 115 |
+
| 2.8996 | 1.8806 | 33500 | 2.8999 |
|
| 116 |
+
| 2.9717 | 1.9087 | 34000 | 2.8974 |
|
| 117 |
+
| 2.8536 | 1.9368 | 34500 | 2.8960 |
|
| 118 |
+
| 2.8435 | 1.9649 | 35000 | 2.8928 |
|
| 119 |
+
| 2.861 | 1.9929 | 35500 | 2.8906 |
|
| 120 |
+
| 2.7977 | 2.0210 | 36000 | 2.8894 |
|
| 121 |
+
| 2.8228 | 2.0491 | 36500 | 2.8895 |
|
| 122 |
+
| 2.8064 | 2.0771 | 37000 | 2.8874 |
|
| 123 |
+
| 2.827 | 2.1052 | 37500 | 2.8863 |
|
| 124 |
+
| 2.8202 | 2.1333 | 38000 | 2.8852 |
|
| 125 |
+
| 2.8581 | 2.1613 | 38500 | 2.8840 |
|
| 126 |
+
| 2.8004 | 2.1894 | 39000 | 2.8828 |
|
| 127 |
+
| 2.7771 | 2.2175 | 39500 | 2.8820 |
|
| 128 |
+
| 2.8305 | 2.2456 | 40000 | 2.8814 |
|
| 129 |
+
| 2.8659 | 2.2736 | 40500 | 2.8806 |
|
| 130 |
+
| 2.8176 | 2.3017 | 41000 | 2.8804 |
|
| 131 |
+
| 2.8101 | 2.3298 | 41500 | 2.8797 |
|
| 132 |
+
| 2.7866 | 2.3578 | 42000 | 2.8792 |
|
| 133 |
+
| 2.8114 | 2.3859 | 42500 | 2.8789 |
|
| 134 |
+
| 2.8204 | 2.4140 | 43000 | 2.8786 |
|
| 135 |
+
| 2.861 | 2.4420 | 43500 | 2.8782 |
|
| 136 |
+
| 2.8333 | 2.4701 | 44000 | 2.8781 |
|
| 137 |
+
| 2.8111 | 2.4982 | 44500 | 2.8781 |
|
| 138 |
+
| 2.8145 | 2.5262 | 45000 | 2.8779 |
|
| 139 |
+
| 2.8349 | 2.5543 | 45500 | 2.8778 |
|
| 140 |
+
| 2.8058 | 2.5824 | 46000 | 2.8778 |
|
| 141 |
+
| 2.7971 | 2.6105 | 46500 | 2.8778 |
|
| 142 |
+
| 2.8267 | 2.6385 | 47000 | 2.8777 |
|
| 143 |
+
| 2.8172 | 2.6666 | 47500 | 2.8777 |
|
| 144 |
+
| 2.8355 | 2.6947 | 48000 | 2.8778 |
|
| 145 |
+
| 2.8189 | 2.7227 | 48500 | 2.8778 |
|
| 146 |
+
| 2.8155 | 2.7508 | 49000 | 2.8778 |
|
| 147 |
+
| 2.8542 | 2.7789 | 49500 | 2.8778 |
|
| 148 |
+
| 2.8476 | 2.8069 | 50000 | 2.8779 |
|
| 149 |
+
| 2.8288 | 2.8350 | 50500 | 2.8779 |
|
| 150 |
+
| 2.8399 | 2.8631 | 51000 | 2.8779 |
|
| 151 |
+
| 2.767 | 2.8911 | 51500 | 2.8779 |
|
| 152 |
+
| 2.8238 | 2.9192 | 52000 | 2.8779 |
|
| 153 |
+
| 2.7982 | 2.9473 | 52500 | 2.8779 |
|
| 154 |
+
| 2.7844 | 2.9754 | 53000 | 2.8779 |
|
| 155 |
|
| 156 |
|
| 157 |
### Framework versions
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 324662984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e5e9be4754d1c5ef1f96df9af7599e4bdabd7522da0cd670e098e0974cbde0b
|
| 3 |
size 324662984
|