Saving weights and logs of step 10000
Browse files
events.out.tfevents.1640023857.t1v-n-8eba1090-w-0.1757053.0.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dff3582a3f326fc45154024684d65c41df3357136ac4ff101cfb162d6ab4779
|
| 3 |
+
size 1471447
|
flax_model.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1421662309
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc5b3ca96a41ec669cb35943c8c732e01b6a98c0da4c9d79947da7eab843a2cc
|
| 3 |
size 1421662309
|
start_train.sh
CHANGED
|
@@ -11,8 +11,8 @@ python3 run_mlm_flax.py \
|
|
| 11 |
--max_seq_length="512" \
|
| 12 |
--pad_to_max_length \
|
| 13 |
--preprocessing_num_workers="64" \
|
| 14 |
-
--per_device_train_batch_size="
|
| 15 |
-
--per_device_eval_batch_size="
|
| 16 |
--adam_beta1="0.9" \
|
| 17 |
--adam_beta2="0.98" \
|
| 18 |
--adam_epsilon="1e-6" \
|
|
|
|
| 11 |
--max_seq_length="512" \
|
| 12 |
--pad_to_max_length \
|
| 13 |
--preprocessing_num_workers="64" \
|
| 14 |
+
--per_device_train_batch_size="8" \
|
| 15 |
+
--per_device_eval_batch_size="8" \
|
| 16 |
--adam_beta1="0.9" \
|
| 17 |
--adam_beta2="0.98" \
|
| 18 |
--adam_epsilon="1e-6" \
|