End of training
Browse files- all_results.json +14 -0
- debugger_ovh_transformers.ipynb +195 -1
- eval_results.json +9 -0
- train_results.json +8 -0
- trainer_state.json +25 -0
all_results.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0.0,
|
| 3 |
+
"eval_loss": 140.0673828125,
|
| 4 |
+
"eval_runtime": 217.9185,
|
| 5 |
+
"eval_samples": 9184,
|
| 6 |
+
"eval_samples_per_second": 42.144,
|
| 7 |
+
"eval_steps_per_second": 5.268,
|
| 8 |
+
"eval_wer": 1.119321698229979,
|
| 9 |
+
"train_loss": 67.9575927734375,
|
| 10 |
+
"train_runtime": 3.7102,
|
| 11 |
+
"train_samples": 30002,
|
| 12 |
+
"train_samples_per_second": 5.391,
|
| 13 |
+
"train_steps_per_second": 2.695
|
| 14 |
+
}
|
debugger_ovh_transformers.ipynb
CHANGED
|
@@ -426,7 +426,201 @@
|
|
| 426 |
"\n",
|
| 427 |
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
|
| 428 |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
| 429 |
-
"preprocess datasets:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
]
|
| 431 |
}
|
| 432 |
],
|
|
|
|
| 426 |
"\n",
|
| 427 |
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
|
| 428 |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
| 429 |
+
"preprocess datasets: 30002ex [03:40, 136.35ex/s]\n",
|
| 430 |
+
"preprocess datasets: 9184ex [01:06, 137.56ex/s]\n",
|
| 431 |
+
"100%|ββββββββββββββββββββββββββββββββββββββββββ| 31/31 [00:00<00:00, 809.78ba/s]\n",
|
| 432 |
+
"100%|ββββββββββββββββββββββββββββββββββββββββββ| 10/10 [00:00<00:00, 874.45ba/s]\n",
|
| 433 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
| 434 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
| 435 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 436 |
+
"added tokens file saved in ./added_tokens.json\n",
|
| 437 |
+
"Configuration saved in ./config.json\n",
|
| 438 |
+
"loading feature extractor configuration file ./preprocessor_config.json\n",
|
| 439 |
+
"loading configuration file ./config.json\n",
|
| 440 |
+
"Model config Wav2Vec2Config {\n",
|
| 441 |
+
" \"_name_or_path\": \"./\",\n",
|
| 442 |
+
" \"activation_dropout\": 0.0,\n",
|
| 443 |
+
" \"adapter_kernel_size\": 3,\n",
|
| 444 |
+
" \"adapter_stride\": 2,\n",
|
| 445 |
+
" \"add_adapter\": false,\n",
|
| 446 |
+
" \"apply_spec_augment\": true,\n",
|
| 447 |
+
" \"architectures\": [\n",
|
| 448 |
+
" \"Wav2Vec2Model\"\n",
|
| 449 |
+
" ],\n",
|
| 450 |
+
" \"attention_dropout\": 0.0,\n",
|
| 451 |
+
" \"bos_token_id\": 1,\n",
|
| 452 |
+
" \"classifier_proj_size\": 256,\n",
|
| 453 |
+
" \"codevector_dim\": 256,\n",
|
| 454 |
+
" \"contrastive_logits_temperature\": 0.1,\n",
|
| 455 |
+
" \"conv_bias\": false,\n",
|
| 456 |
+
" \"conv_dim\": [\n",
|
| 457 |
+
" 32,\n",
|
| 458 |
+
" 32,\n",
|
| 459 |
+
" 32\n",
|
| 460 |
+
" ],\n",
|
| 461 |
+
" \"conv_kernel\": [\n",
|
| 462 |
+
" 8,\n",
|
| 463 |
+
" 8,\n",
|
| 464 |
+
" 8\n",
|
| 465 |
+
" ],\n",
|
| 466 |
+
" \"conv_stride\": [\n",
|
| 467 |
+
" 4,\n",
|
| 468 |
+
" 4,\n",
|
| 469 |
+
" 4\n",
|
| 470 |
+
" ],\n",
|
| 471 |
+
" \"ctc_loss_reduction\": \"mean\",\n",
|
| 472 |
+
" \"ctc_zero_infinity\": false,\n",
|
| 473 |
+
" \"diversity_loss_weight\": 0.1,\n",
|
| 474 |
+
" \"do_stable_layer_norm\": true,\n",
|
| 475 |
+
" \"eos_token_id\": 2,\n",
|
| 476 |
+
" \"feat_extract_activation\": \"gelu\",\n",
|
| 477 |
+
" \"feat_extract_dropout\": 0.0,\n",
|
| 478 |
+
" \"feat_extract_norm\": \"layer\",\n",
|
| 479 |
+
" \"feat_proj_dropout\": 0.0,\n",
|
| 480 |
+
" \"feat_quantizer_dropout\": 0.0,\n",
|
| 481 |
+
" \"final_dropout\": 0.0,\n",
|
| 482 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 483 |
+
" \"hidden_dropout\": 0.0,\n",
|
| 484 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 485 |
+
" \"hidden_size\": 16,\n",
|
| 486 |
+
" \"initializer_range\": 0.02,\n",
|
| 487 |
+
" \"intermediate_size\": 20,\n",
|
| 488 |
+
" \"layer_norm_eps\": 1e-05,\n",
|
| 489 |
+
" \"layerdrop\": 0.0,\n",
|
| 490 |
+
" \"mask_feature_length\": 10,\n",
|
| 491 |
+
" \"mask_feature_min_masks\": 0,\n",
|
| 492 |
+
" \"mask_feature_prob\": 0.0,\n",
|
| 493 |
+
" \"mask_time_length\": 10,\n",
|
| 494 |
+
" \"mask_time_min_masks\": 2,\n",
|
| 495 |
+
" \"mask_time_prob\": 0.05,\n",
|
| 496 |
+
" \"model_type\": \"wav2vec2\",\n",
|
| 497 |
+
" \"num_adapter_layers\": 3,\n",
|
| 498 |
+
" \"num_attention_heads\": 2,\n",
|
| 499 |
+
" \"num_codevector_groups\": 2,\n",
|
| 500 |
+
" \"num_codevectors_per_group\": 320,\n",
|
| 501 |
+
" \"num_conv_pos_embedding_groups\": 2,\n",
|
| 502 |
+
" \"num_conv_pos_embeddings\": 16,\n",
|
| 503 |
+
" \"num_feat_extract_layers\": 3,\n",
|
| 504 |
+
" \"num_hidden_layers\": 4,\n",
|
| 505 |
+
" \"num_negatives\": 10,\n",
|
| 506 |
+
" \"output_hidden_size\": 16,\n",
|
| 507 |
+
" \"pad_token_id\": 51,\n",
|
| 508 |
+
" \"proj_codevector_dim\": 256,\n",
|
| 509 |
+
" \"tdnn_dilation\": [\n",
|
| 510 |
+
" 1,\n",
|
| 511 |
+
" 2,\n",
|
| 512 |
+
" 3,\n",
|
| 513 |
+
" 1,\n",
|
| 514 |
+
" 1\n",
|
| 515 |
+
" ],\n",
|
| 516 |
+
" \"tdnn_dim\": [\n",
|
| 517 |
+
" 512,\n",
|
| 518 |
+
" 512,\n",
|
| 519 |
+
" 512,\n",
|
| 520 |
+
" 512,\n",
|
| 521 |
+
" 1500\n",
|
| 522 |
+
" ],\n",
|
| 523 |
+
" \"tdnn_kernel\": [\n",
|
| 524 |
+
" 5,\n",
|
| 525 |
+
" 3,\n",
|
| 526 |
+
" 3,\n",
|
| 527 |
+
" 1,\n",
|
| 528 |
+
" 1\n",
|
| 529 |
+
" ],\n",
|
| 530 |
+
" \"torch_dtype\": \"float32\",\n",
|
| 531 |
+
" \"transformers_version\": \"4.17.0.dev0\",\n",
|
| 532 |
+
" \"use_weighted_layer_sum\": false,\n",
|
| 533 |
+
" \"vocab_size\": 54,\n",
|
| 534 |
+
" \"xvector_output_dim\": 512\n",
|
| 535 |
+
"}\n",
|
| 536 |
+
"\n",
|
| 537 |
+
"loading feature extractor configuration file ./preprocessor_config.json\n",
|
| 538 |
+
"Feature extractor Wav2Vec2FeatureExtractor {\n",
|
| 539 |
+
" \"do_normalize\": true,\n",
|
| 540 |
+
" \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
|
| 541 |
+
" \"feature_size\": 1,\n",
|
| 542 |
+
" \"padding_side\": \"right\",\n",
|
| 543 |
+
" \"padding_value\": 0.0,\n",
|
| 544 |
+
" \"return_attention_mask\": false,\n",
|
| 545 |
+
" \"sampling_rate\": 16000\n",
|
| 546 |
+
"}\n",
|
| 547 |
+
"\n",
|
| 548 |
+
"Didn't find file ./tokenizer.json. We won't load it.\n",
|
| 549 |
+
"loading file ./vocab.json\n",
|
| 550 |
+
"loading file ./tokenizer_config.json\n",
|
| 551 |
+
"loading file ./added_tokens.json\n",
|
| 552 |
+
"loading file ./special_tokens_map.json\n",
|
| 553 |
+
"loading file None\n",
|
| 554 |
+
"Adding <s> to the vocabulary\n",
|
| 555 |
+
"Adding </s> to the vocabulary\n",
|
| 556 |
+
"/workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
|
| 557 |
+
"01/31/2022 17:18:19 - WARNING - huggingface_hub.repository - /workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
|
| 558 |
+
"max_steps is given, it will override any value given in num_train_epochs\n",
|
| 559 |
+
"Using amp half precision backend\n",
|
| 560 |
+
"The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
|
| 561 |
+
"/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
| 562 |
+
" warnings.warn(\n",
|
| 563 |
+
"***** Running training *****\n",
|
| 564 |
+
" Num examples = 30002\n",
|
| 565 |
+
" Num Epochs = 1\n",
|
| 566 |
+
" Instantaneous batch size per device = 2\n",
|
| 567 |
+
" Total train batch size (w. parallel, distributed & accumulation) = 2\n",
|
| 568 |
+
" Gradient Accumulation steps = 1\n",
|
| 569 |
+
" Total optimization steps = 10\n",
|
| 570 |
+
" 50%|ββββββββββββββββββββββ | 5/10 [00:00<00:00, 10.35it/s]Saving model checkpoint to ./checkpoint-5\n",
|
| 571 |
+
"Configuration saved in ./checkpoint-5/config.json\n",
|
| 572 |
+
"Model weights saved in ./checkpoint-5/pytorch_model.bin\n",
|
| 573 |
+
"Configuration saved in ./checkpoint-5/preprocessor_config.json\n",
|
| 574 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
| 575 |
+
" 90%|ββββββββββββββββββββββββββββββββββββββββ | 9/10 [00:03<00:00, 2.20it/s]Saving model checkpoint to ./checkpoint-10\n",
|
| 576 |
+
"Configuration saved in ./checkpoint-10/config.json\n",
|
| 577 |
+
"Model weights saved in ./checkpoint-10/pytorch_model.bin\n",
|
| 578 |
+
"Configuration saved in ./checkpoint-10/preprocessor_config.json\n",
|
| 579 |
+
"Deleting older checkpoint [checkpoint-5] due to args.save_total_limit\n",
|
| 580 |
+
"\n",
|
| 581 |
+
"\n",
|
| 582 |
+
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
|
| 583 |
+
"\n",
|
| 584 |
+
"\n",
|
| 585 |
+
"{'train_runtime': 3.7102, 'train_samples_per_second': 5.391, 'train_steps_per_second': 2.695, 'train_loss': 67.9575927734375, 'epoch': 0.0}\n",
|
| 586 |
+
"100%|βββββββββββββββββββββββββββββββββββββββββββ| 10/10 [00:03<00:00, 2.70it/s]\n",
|
| 587 |
+
"Saving model checkpoint to ./\n",
|
| 588 |
+
"Configuration saved in ./config.json\n",
|
| 589 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
| 590 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
| 591 |
+
"Saving model checkpoint to ./\n",
|
| 592 |
+
"Configuration saved in ./config.json\n",
|
| 593 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
| 594 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
| 595 |
+
"Several commits (2) will be pushed upstream.\n",
|
| 596 |
+
"01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream.\n",
|
| 597 |
+
"The progress bars may be unreliable.\n",
|
| 598 |
+
"01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.\n",
|
| 599 |
+
"Everything up-to-date\n",
|
| 600 |
+
"\n",
|
| 601 |
+
"01/31/2022 17:18:27 - WARNING - huggingface_hub.repository - Everything up-to-date\n",
|
| 602 |
+
"\n",
|
| 603 |
+
"Dropping the following result as it does not have all the necessary fields:\n",
|
| 604 |
+
"{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'ab'}}\n",
|
| 605 |
+
"To https://huggingface.co/masapasa/xls-r-ab-test\n",
|
| 606 |
+
" b50c32e..4e53539 main -> main\n",
|
| 607 |
+
"\n",
|
| 608 |
+
"01/31/2022 17:18:33 - WARNING - huggingface_hub.repository - To https://huggingface.co/masapasa/xls-r-ab-test\n",
|
| 609 |
+
" b50c32e..4e53539 main -> main\n",
|
| 610 |
+
"\n",
|
| 611 |
+
"***** train metrics *****\n",
|
| 612 |
+
" epoch = 0.0\n",
|
| 613 |
+
" train_loss = 67.9576\n",
|
| 614 |
+
" train_runtime = 0:00:03.71\n",
|
| 615 |
+
" train_samples = 30002\n",
|
| 616 |
+
" train_samples_per_second = 5.391\n",
|
| 617 |
+
" train_steps_per_second = 2.695\n",
|
| 618 |
+
"01/31/2022 17:18:36 - INFO - __main__ - *** Evaluate ***\n",
|
| 619 |
+
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
|
| 620 |
+
"***** Running Evaluation *****\n",
|
| 621 |
+
" Num examples = 9184\n",
|
| 622 |
+
" Batch size = 8\n",
|
| 623 |
+
" 68%|βββββββββββββββββββββββββββ | 777/1148 [02:07<01:06, 5.55it/s]"
|
| 624 |
]
|
| 625 |
}
|
| 626 |
],
|
eval_results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0.0,
|
| 3 |
+
"eval_loss": 140.0673828125,
|
| 4 |
+
"eval_runtime": 217.9185,
|
| 5 |
+
"eval_samples": 9184,
|
| 6 |
+
"eval_samples_per_second": 42.144,
|
| 7 |
+
"eval_steps_per_second": 5.268,
|
| 8 |
+
"eval_wer": 1.119321698229979
|
| 9 |
+
}
|
train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0.0,
|
| 3 |
+
"train_loss": 67.9575927734375,
|
| 4 |
+
"train_runtime": 3.7102,
|
| 5 |
+
"train_samples": 30002,
|
| 6 |
+
"train_samples_per_second": 5.391,
|
| 7 |
+
"train_steps_per_second": 2.695
|
| 8 |
+
}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.0006666222251849877,
|
| 5 |
+
"global_step": 10,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 0.0,
|
| 12 |
+
"step": 10,
|
| 13 |
+
"total_flos": 334514838528.0,
|
| 14 |
+
"train_loss": 67.9575927734375,
|
| 15 |
+
"train_runtime": 3.7102,
|
| 16 |
+
"train_samples_per_second": 5.391,
|
| 17 |
+
"train_steps_per_second": 2.695
|
| 18 |
+
}
|
| 19 |
+
],
|
| 20 |
+
"max_steps": 10,
|
| 21 |
+
"num_train_epochs": 1,
|
| 22 |
+
"total_flos": 334514838528.0,
|
| 23 |
+
"trial_name": null,
|
| 24 |
+
"trial_params": null
|
| 25 |
+
}
|