| EXP_FOLDER="dumped/bengali_t5_base" | |
| CACHE_DIR=$EXP_FOLDER/ | |
| MODEL_CKPT=$EXP_FOLDER/ | |
| mkdir -p $EXP_FOLDER | |
| mkdir -p $CACHE_DIR | |
| mkdir -p $MODEL_CKPT | |
| TOKENIZER_DIR="dumped/bengali_t5_base/tokenizer" | |
| MODEL_CONFIG="t5-base" | |
| MAX_SEQ_LEN=512 | |
| NUM_THREAD=50 | |
| DATASET_NAME="mc4" | |
| DATASET_CONFIG_NAME="bn" | |
| python -u run_t5_mlm_flax.py \ | |
| --output_dir ${MODEL_CKPT} \ | |
| --model_type "t5" \ | |
| --config_name $MODEL_CONFIG \ | |
| --tokenizer_name ${TOKENIZER_DIR} \ | |
| --dataset_name $DATASET_NAME \ | |
| --dataset_config_name $DATASET_CONFIG_NAME \ | |
| --max_seq_length $MAX_SEQ_LEN \ | |
| --per_device_train_batch_size 8 \ | |
| --per_device_eval_batch_size 8 \ | |
| --adafactor \ | |
| --learning_rate 1e-3 \ | |
| --weight_decay 0.001 \ | |
| --warmup_steps 5000 \ | |
| --overwrite_output_dir \ | |
| --num_train_epochs 10 \ | |
| --logging_steps 500 \ | |
| --save_steps 2500 \ | |
| --eval_steps 7500 \ | |
| --preprocessing_num_workers $NUM_THREAD \ | |
| --dtype bfloat16 | |