Training in progress, step 100
Browse files- fine-tune-whisper-streaming.ipynb +32 -210
- pytorch_model.bin +1 -1
- runs/Dec06_02-49-42_04812eda968b/events.out.tfevents.1670295010.04812eda968b.1031606.3 +2 -2
- runs/Dec06_03-53-43_04812eda968b/1670298849.4736288/events.out.tfevents.1670298849.04812eda968b.1031606.6 +3 -0
- runs/Dec06_03-53-43_04812eda968b/events.out.tfevents.1670298849.04812eda968b.1031606.5 +3 -0
- runs/Dec06_03-57-56_04812eda968b/1670299090.1920335/events.out.tfevents.1670299090.04812eda968b.1031606.8 +3 -0
- runs/Dec06_03-57-56_04812eda968b/events.out.tfevents.1670299090.04812eda968b.1031606.7 +3 -0
- runs/Dec06_06-58-23_04812eda968b/1670309913.1115/events.out.tfevents.1670309913.04812eda968b.2449868.1 +3 -0
- runs/Dec06_06-58-23_04812eda968b/events.out.tfevents.1670309913.04812eda968b.2449868.0 +3 -0
- runs/Dec06_07-40-22_04812eda968b/1670312431.563092/events.out.tfevents.1670312431.04812eda968b.2650899.1 +3 -0
- runs/Dec06_07-40-22_04812eda968b/events.out.tfevents.1670312431.04812eda968b.2650899.0 +3 -0
- training_args.bin +1 -1
fine-tune-whisper-streaming.ipynb
CHANGED
|
@@ -684,36 +684,28 @@
|
|
| 684 |
},
|
| 685 |
{
|
| 686 |
"cell_type": "code",
|
| 687 |
-
"execution_count":
|
| 688 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
| 689 |
"metadata": {},
|
| 690 |
-
"outputs": [
|
| 691 |
-
{
|
| 692 |
-
"name": "stderr",
|
| 693 |
-
"output_type": "stream",
|
| 694 |
-
"text": [
|
| 695 |
-
"PyTorch: setting up devices\n"
|
| 696 |
-
]
|
| 697 |
-
}
|
| 698 |
-
],
|
| 699 |
"source": [
|
| 700 |
"from transformers import Seq2SeqTrainingArguments\n",
|
| 701 |
"\n",
|
| 702 |
"training_args = Seq2SeqTrainingArguments(\n",
|
| 703 |
" output_dir=\"./\",\n",
|
| 704 |
-
" per_device_train_batch_size=
|
| 705 |
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
| 706 |
" learning_rate=1e-5,\n",
|
| 707 |
" warmup_steps=500,\n",
|
| 708 |
-
" max_steps=
|
| 709 |
" gradient_checkpointing=True,\n",
|
| 710 |
" fp16=True,\n",
|
| 711 |
" evaluation_strategy=\"steps\",\n",
|
| 712 |
-
" per_device_eval_batch_size=
|
| 713 |
" predict_with_generate=True,\n",
|
| 714 |
" generation_max_length=225,\n",
|
| 715 |
-
" save_steps=
|
| 716 |
-
" eval_steps=
|
| 717 |
" logging_steps=1, #25,\n",
|
| 718 |
" report_to=[\"tensorboard\"],\n",
|
| 719 |
" load_best_model_at_end=True,\n",
|
|
@@ -742,7 +734,7 @@
|
|
| 742 |
},
|
| 743 |
{
|
| 744 |
"cell_type": "code",
|
| 745 |
-
"execution_count":
|
| 746 |
"id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
|
| 747 |
"metadata": {},
|
| 748 |
"outputs": [],
|
|
@@ -771,7 +763,7 @@
|
|
| 771 |
},
|
| 772 |
{
|
| 773 |
"cell_type": "code",
|
| 774 |
-
"execution_count":
|
| 775 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
| 776 |
"metadata": {},
|
| 777 |
"outputs": [
|
|
@@ -792,7 +784,7 @@
|
|
| 792 |
" args=training_args,\n",
|
| 793 |
" model=model,\n",
|
| 794 |
" train_dataset=vectorized_datasets[\"train\"],\n",
|
| 795 |
-
" eval_dataset=vectorized_datasets[\"test\"].take(
|
| 796 |
" data_collator=data_collator,\n",
|
| 797 |
" compute_metrics=compute_metrics,\n",
|
| 798 |
" tokenizer=processor,\n",
|
|
@@ -810,7 +802,7 @@
|
|
| 810 |
},
|
| 811 |
{
|
| 812 |
"cell_type": "code",
|
| 813 |
-
"execution_count":
|
| 814 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
| 815 |
"metadata": {},
|
| 816 |
"outputs": [
|
|
@@ -867,14 +859,14 @@
|
|
| 867 |
"/home/jovyan/whisper/lib/python3.10/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
| 868 |
" warnings.warn(\n",
|
| 869 |
"***** Running training *****\n",
|
| 870 |
-
" Num examples =
|
| 871 |
" Num Epochs = 9223372036854775807\n",
|
| 872 |
-
" Instantaneous batch size per device =
|
| 873 |
-
" Total train batch size (w. parallel, distributed & accumulation) =
|
| 874 |
" Gradient Accumulation steps = 1\n",
|
| 875 |
-
" Total optimization steps =
|
| 876 |
" Number of trainable parameters = 241734912\n",
|
| 877 |
-
"Reading metadata...: 18211it [00:02,
|
| 878 |
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 879 |
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 880 |
" warnings.warn('Was asked to gather along dimension 0, but all '\n"
|
|
@@ -886,8 +878,8 @@
|
|
| 886 |
"\n",
|
| 887 |
" <div>\n",
|
| 888 |
" \n",
|
| 889 |
-
" <progress value='
|
| 890 |
-
" [
|
| 891 |
" </div>\n",
|
| 892 |
" <table border=\"1\" class=\"dataframe\">\n",
|
| 893 |
" <thead>\n",
|
|
@@ -900,58 +892,10 @@
|
|
| 900 |
" </thead>\n",
|
| 901 |
" <tbody>\n",
|
| 902 |
" <tr>\n",
|
| 903 |
-
" <td>
|
| 904 |
-
" <td>
|
| 905 |
-
" <td>
|
| 906 |
-
" <td>
|
| 907 |
-
" </tr>\n",
|
| 908 |
-
" <tr>\n",
|
| 909 |
-
" <td>20</td>\n",
|
| 910 |
-
" <td>0.499800</td>\n",
|
| 911 |
-
" <td>0.709538</td>\n",
|
| 912 |
-
" <td>55.380577</td>\n",
|
| 913 |
-
" </tr>\n",
|
| 914 |
-
" <tr>\n",
|
| 915 |
-
" <td>30</td>\n",
|
| 916 |
-
" <td>0.432000</td>\n",
|
| 917 |
-
" <td>0.702946</td>\n",
|
| 918 |
-
" <td>51.443570</td>\n",
|
| 919 |
-
" </tr>\n",
|
| 920 |
-
" <tr>\n",
|
| 921 |
-
" <td>40</td>\n",
|
| 922 |
-
" <td>0.511800</td>\n",
|
| 923 |
-
" <td>0.694216</td>\n",
|
| 924 |
-
" <td>49.606299</td>\n",
|
| 925 |
-
" </tr>\n",
|
| 926 |
-
" <tr>\n",
|
| 927 |
-
" <td>50</td>\n",
|
| 928 |
-
" <td>0.398100</td>\n",
|
| 929 |
-
" <td>0.682893</td>\n",
|
| 930 |
-
" <td>48.556430</td>\n",
|
| 931 |
-
" </tr>\n",
|
| 932 |
-
" <tr>\n",
|
| 933 |
-
" <td>60</td>\n",
|
| 934 |
-
" <td>0.473400</td>\n",
|
| 935 |
-
" <td>0.669629</td>\n",
|
| 936 |
-
" <td>46.719160</td>\n",
|
| 937 |
-
" </tr>\n",
|
| 938 |
-
" <tr>\n",
|
| 939 |
-
" <td>70</td>\n",
|
| 940 |
-
" <td>0.388100</td>\n",
|
| 941 |
-
" <td>0.648503</td>\n",
|
| 942 |
-
" <td>48.818898</td>\n",
|
| 943 |
-
" </tr>\n",
|
| 944 |
-
" <tr>\n",
|
| 945 |
-
" <td>80</td>\n",
|
| 946 |
-
" <td>0.420200</td>\n",
|
| 947 |
-
" <td>0.626310</td>\n",
|
| 948 |
-
" <td>50.656168</td>\n",
|
| 949 |
-
" </tr>\n",
|
| 950 |
-
" <tr>\n",
|
| 951 |
-
" <td>90</td>\n",
|
| 952 |
-
" <td>0.342300</td>\n",
|
| 953 |
-
" <td>0.602524</td>\n",
|
| 954 |
-
" <td>50.918635</td>\n",
|
| 955 |
" </tr>\n",
|
| 956 |
" </tbody>\n",
|
| 957 |
"</table><p>"
|
|
@@ -969,142 +913,20 @@
|
|
| 969 |
"text": [
|
| 970 |
"***** Running Evaluation *****\n",
|
| 971 |
" Num examples: Unknown\n",
|
| 972 |
-
" Batch size =
|
| 973 |
-
"Reading metadata...: 8693it [00:01,
|
| 974 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 975 |
-
"Saving model checkpoint to ./checkpoint-10\n",
|
| 976 |
-
"Configuration saved in ./checkpoint-10/config.json\n",
|
| 977 |
-
"Model weights saved in ./checkpoint-10/pytorch_model.bin\n",
|
| 978 |
-
"Feature extractor saved in ./checkpoint-10/preprocessor_config.json\n",
|
| 979 |
-
"tokenizer config file saved in ./checkpoint-10/tokenizer_config.json\n",
|
| 980 |
-
"Special tokens file saved in ./checkpoint-10/special_tokens_map.json\n",
|
| 981 |
-
"added tokens file saved in ./checkpoint-10/added_tokens.json\n",
|
| 982 |
-
"Feature extractor saved in ./preprocessor_config.json\n",
|
| 983 |
-
"tokenizer config file saved in ./tokenizer_config.json\n",
|
| 984 |
-
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 985 |
-
"added tokens file saved in ./added_tokens.json\n",
|
| 986 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 987 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 988 |
-
"***** Running Evaluation *****\n",
|
| 989 |
-
" Num examples: Unknown\n",
|
| 990 |
-
" Batch size = 32\n",
|
| 991 |
-
"Reading metadata...: 8693it [00:02, 3799.36it/s]\n",
|
| 992 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 993 |
-
"Saving model checkpoint to ./checkpoint-20\n",
|
| 994 |
-
"Configuration saved in ./checkpoint-20/config.json\n",
|
| 995 |
-
"Model weights saved in ./checkpoint-20/pytorch_model.bin\n",
|
| 996 |
-
"Feature extractor saved in ./checkpoint-20/preprocessor_config.json\n",
|
| 997 |
-
"tokenizer config file saved in ./checkpoint-20/tokenizer_config.json\n",
|
| 998 |
-
"Special tokens file saved in ./checkpoint-20/special_tokens_map.json\n",
|
| 999 |
-
"added tokens file saved in ./checkpoint-20/added_tokens.json\n",
|
| 1000 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1001 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1002 |
-
"***** Running Evaluation *****\n",
|
| 1003 |
-
" Num examples: Unknown\n",
|
| 1004 |
-
" Batch size = 32\n",
|
| 1005 |
-
"Reading metadata...: 8693it [00:02, 3860.16it/s]\n",
|
| 1006 |
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1007 |
-
"Saving model checkpoint to ./checkpoint-
|
| 1008 |
-
"Configuration saved in ./checkpoint-
|
| 1009 |
-
"Model weights saved in ./checkpoint-
|
| 1010 |
-
"Feature extractor saved in ./checkpoint-
|
| 1011 |
-
"tokenizer config file saved in ./checkpoint-
|
| 1012 |
-
"Special tokens file saved in ./checkpoint-
|
| 1013 |
-
"added tokens file saved in ./checkpoint-
|
| 1014 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1015 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1016 |
-
"***** Running Evaluation *****\n",
|
| 1017 |
-
" Num examples: Unknown\n",
|
| 1018 |
-
" Batch size = 32\n",
|
| 1019 |
-
"Reading metadata...: 8693it [00:01, 7522.25it/s]\n",
|
| 1020 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1021 |
-
"Saving model checkpoint to ./checkpoint-40\n",
|
| 1022 |
-
"Configuration saved in ./checkpoint-40/config.json\n",
|
| 1023 |
-
"Model weights saved in ./checkpoint-40/pytorch_model.bin\n",
|
| 1024 |
-
"Feature extractor saved in ./checkpoint-40/preprocessor_config.json\n",
|
| 1025 |
-
"tokenizer config file saved in ./checkpoint-40/tokenizer_config.json\n",
|
| 1026 |
-
"Special tokens file saved in ./checkpoint-40/special_tokens_map.json\n",
|
| 1027 |
-
"added tokens file saved in ./checkpoint-40/added_tokens.json\n",
|
| 1028 |
"Feature extractor saved in ./preprocessor_config.json\n",
|
| 1029 |
"tokenizer config file saved in ./tokenizer_config.json\n",
|
| 1030 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 1031 |
-
"added tokens file saved in ./added_tokens.json\n"
|
| 1032 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1033 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1034 |
-
"***** Running Evaluation *****\n",
|
| 1035 |
-
" Num examples: Unknown\n",
|
| 1036 |
-
" Batch size = 32\n",
|
| 1037 |
-
"Reading metadata...: 8693it [00:02, 4089.09it/s]\n",
|
| 1038 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1039 |
-
"Saving model checkpoint to ./checkpoint-50\n",
|
| 1040 |
-
"Configuration saved in ./checkpoint-50/config.json\n",
|
| 1041 |
-
"Model weights saved in ./checkpoint-50/pytorch_model.bin\n",
|
| 1042 |
-
"Feature extractor saved in ./checkpoint-50/preprocessor_config.json\n",
|
| 1043 |
-
"tokenizer config file saved in ./checkpoint-50/tokenizer_config.json\n",
|
| 1044 |
-
"Special tokens file saved in ./checkpoint-50/special_tokens_map.json\n",
|
| 1045 |
-
"added tokens file saved in ./checkpoint-50/added_tokens.json\n",
|
| 1046 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1047 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1048 |
-
"***** Running Evaluation *****\n",
|
| 1049 |
-
" Num examples: Unknown\n",
|
| 1050 |
-
" Batch size = 32\n",
|
| 1051 |
-
"Reading metadata...: 8693it [00:02, 3824.34it/s]\n",
|
| 1052 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1053 |
-
"Saving model checkpoint to ./checkpoint-60\n",
|
| 1054 |
-
"Configuration saved in ./checkpoint-60/config.json\n",
|
| 1055 |
-
"Model weights saved in ./checkpoint-60/pytorch_model.bin\n",
|
| 1056 |
-
"Feature extractor saved in ./checkpoint-60/preprocessor_config.json\n",
|
| 1057 |
-
"tokenizer config file saved in ./checkpoint-60/tokenizer_config.json\n",
|
| 1058 |
-
"Special tokens file saved in ./checkpoint-60/special_tokens_map.json\n",
|
| 1059 |
-
"added tokens file saved in ./checkpoint-60/added_tokens.json\n",
|
| 1060 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1061 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1062 |
-
"***** Running Evaluation *****\n",
|
| 1063 |
-
" Num examples: Unknown\n",
|
| 1064 |
-
" Batch size = 32\n",
|
| 1065 |
-
"Reading metadata...: 8693it [00:01, 7724.39it/s]\n",
|
| 1066 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1067 |
-
"Saving model checkpoint to ./checkpoint-70\n",
|
| 1068 |
-
"Configuration saved in ./checkpoint-70/config.json\n",
|
| 1069 |
-
"Model weights saved in ./checkpoint-70/pytorch_model.bin\n",
|
| 1070 |
-
"Feature extractor saved in ./checkpoint-70/preprocessor_config.json\n",
|
| 1071 |
-
"tokenizer config file saved in ./checkpoint-70/tokenizer_config.json\n",
|
| 1072 |
-
"Special tokens file saved in ./checkpoint-70/special_tokens_map.json\n",
|
| 1073 |
-
"added tokens file saved in ./checkpoint-70/added_tokens.json\n",
|
| 1074 |
-
"Feature extractor saved in ./preprocessor_config.json\n",
|
| 1075 |
-
"tokenizer config file saved in ./tokenizer_config.json\n",
|
| 1076 |
-
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 1077 |
-
"added tokens file saved in ./added_tokens.json\n",
|
| 1078 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1079 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1080 |
-
"***** Running Evaluation *****\n",
|
| 1081 |
-
" Num examples: Unknown\n",
|
| 1082 |
-
" Batch size = 32\n",
|
| 1083 |
-
"Reading metadata...: 8693it [00:02, 3249.16it/s]\n",
|
| 1084 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1085 |
-
"Saving model checkpoint to ./checkpoint-80\n",
|
| 1086 |
-
"Configuration saved in ./checkpoint-80/config.json\n",
|
| 1087 |
-
"Model weights saved in ./checkpoint-80/pytorch_model.bin\n",
|
| 1088 |
-
"Feature extractor saved in ./checkpoint-80/preprocessor_config.json\n",
|
| 1089 |
-
"tokenizer config file saved in ./checkpoint-80/tokenizer_config.json\n",
|
| 1090 |
-
"Special tokens file saved in ./checkpoint-80/special_tokens_map.json\n",
|
| 1091 |
-
"added tokens file saved in ./checkpoint-80/added_tokens.json\n",
|
| 1092 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1093 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n",
|
| 1094 |
-
"***** Running Evaluation *****\n",
|
| 1095 |
-
" Num examples: Unknown\n",
|
| 1096 |
-
" Batch size = 32\n",
|
| 1097 |
-
"Reading metadata...: 8693it [00:02, 3111.55it/s]\n",
|
| 1098 |
-
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 1099 |
-
"Saving model checkpoint to ./checkpoint-90\n",
|
| 1100 |
-
"Configuration saved in ./checkpoint-90/config.json\n",
|
| 1101 |
-
"Model weights saved in ./checkpoint-90/pytorch_model.bin\n",
|
| 1102 |
-
"Feature extractor saved in ./checkpoint-90/preprocessor_config.json\n",
|
| 1103 |
-
"tokenizer config file saved in ./checkpoint-90/tokenizer_config.json\n",
|
| 1104 |
-
"Special tokens file saved in ./checkpoint-90/special_tokens_map.json\n",
|
| 1105 |
-
"added tokens file saved in ./checkpoint-90/added_tokens.json\n",
|
| 1106 |
-
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 1107 |
-
" warnings.warn('Was asked to gather along dimension 0, but all '\n"
|
| 1108 |
]
|
| 1109 |
}
|
| 1110 |
],
|
|
|
|
| 684 |
},
|
| 685 |
{
|
| 686 |
"cell_type": "code",
|
| 687 |
+
"execution_count": 19,
|
| 688 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
| 689 |
"metadata": {},
|
| 690 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
"source": [
|
| 692 |
"from transformers import Seq2SeqTrainingArguments\n",
|
| 693 |
"\n",
|
| 694 |
"training_args = Seq2SeqTrainingArguments(\n",
|
| 695 |
" output_dir=\"./\",\n",
|
| 696 |
+
" per_device_train_batch_size=50,\n",
|
| 697 |
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
| 698 |
" learning_rate=1e-5,\n",
|
| 699 |
" warmup_steps=500,\n",
|
| 700 |
+
" max_steps=3000, #5000,\n",
|
| 701 |
" gradient_checkpointing=True,\n",
|
| 702 |
" fp16=True,\n",
|
| 703 |
" evaluation_strategy=\"steps\",\n",
|
| 704 |
+
" per_device_eval_batch_size=5,\n",
|
| 705 |
" predict_with_generate=True,\n",
|
| 706 |
" generation_max_length=225,\n",
|
| 707 |
+
" save_steps=100, #100,\n",
|
| 708 |
+
" eval_steps=100,\n",
|
| 709 |
" logging_steps=1, #25,\n",
|
| 710 |
" report_to=[\"tensorboard\"],\n",
|
| 711 |
" load_best_model_at_end=True,\n",
|
|
|
|
| 734 |
},
|
| 735 |
{
|
| 736 |
"cell_type": "code",
|
| 737 |
+
"execution_count": 20,
|
| 738 |
"id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
|
| 739 |
"metadata": {},
|
| 740 |
"outputs": [],
|
|
|
|
| 763 |
},
|
| 764 |
{
|
| 765 |
"cell_type": "code",
|
| 766 |
+
"execution_count": 21,
|
| 767 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
| 768 |
"metadata": {},
|
| 769 |
"outputs": [
|
|
|
|
| 784 |
" args=training_args,\n",
|
| 785 |
" model=model,\n",
|
| 786 |
" train_dataset=vectorized_datasets[\"train\"],\n",
|
| 787 |
+
" eval_dataset=vectorized_datasets[\"test\"].take(500),\n",
|
| 788 |
" data_collator=data_collator,\n",
|
| 789 |
" compute_metrics=compute_metrics,\n",
|
| 790 |
" tokenizer=processor,\n",
|
|
|
|
| 802 |
},
|
| 803 |
{
|
| 804 |
"cell_type": "code",
|
| 805 |
+
"execution_count": 22,
|
| 806 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
| 807 |
"metadata": {},
|
| 808 |
"outputs": [
|
|
|
|
| 859 |
"/home/jovyan/whisper/lib/python3.10/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
| 860 |
" warnings.warn(\n",
|
| 861 |
"***** Running training *****\n",
|
| 862 |
+
" Num examples = 600000\n",
|
| 863 |
" Num Epochs = 9223372036854775807\n",
|
| 864 |
+
" Instantaneous batch size per device = 50\n",
|
| 865 |
+
" Total train batch size (w. parallel, distributed & accumulation) = 200\n",
|
| 866 |
" Gradient Accumulation steps = 1\n",
|
| 867 |
+
" Total optimization steps = 3000\n",
|
| 868 |
" Number of trainable parameters = 241734912\n",
|
| 869 |
+
"Reading metadata...: 18211it [00:02, 8504.35it/s] \n",
|
| 870 |
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 871 |
"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
|
| 872 |
" warnings.warn('Was asked to gather along dimension 0, but all '\n"
|
|
|
|
| 878 |
"\n",
|
| 879 |
" <div>\n",
|
| 880 |
" \n",
|
| 881 |
+
" <progress value='101' max='3000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 882 |
+
" [ 101/3000 25:24 < 12:24:00, 0.06 it/s, Epoch 0.03/9223372036854775807]\n",
|
| 883 |
" </div>\n",
|
| 884 |
" <table border=\"1\" class=\"dataframe\">\n",
|
| 885 |
" <thead>\n",
|
|
|
|
| 892 |
" </thead>\n",
|
| 893 |
" <tbody>\n",
|
| 894 |
" <tr>\n",
|
| 895 |
+
" <td>100</td>\n",
|
| 896 |
+
" <td>1.069800</td>\n",
|
| 897 |
+
" <td>1.207340</td>\n",
|
| 898 |
+
" <td>142.617124</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
" </tr>\n",
|
| 900 |
" </tbody>\n",
|
| 901 |
"</table><p>"
|
|
|
|
| 913 |
"text": [
|
| 914 |
"***** Running Evaluation *****\n",
|
| 915 |
" Num examples: Unknown\n",
|
| 916 |
+
" Batch size = 20\n",
|
| 917 |
+
"Reading metadata...: 8693it [00:01, 7656.96it/s]\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
| 919 |
+
"Saving model checkpoint to ./checkpoint-100\n",
|
| 920 |
+
"Configuration saved in ./checkpoint-100/config.json\n",
|
| 921 |
+
"Model weights saved in ./checkpoint-100/pytorch_model.bin\n",
|
| 922 |
+
"Feature extractor saved in ./checkpoint-100/preprocessor_config.json\n",
|
| 923 |
+
"tokenizer config file saved in ./checkpoint-100/tokenizer_config.json\n",
|
| 924 |
+
"Special tokens file saved in ./checkpoint-100/special_tokens_map.json\n",
|
| 925 |
+
"added tokens file saved in ./checkpoint-100/added_tokens.json\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
"Feature extractor saved in ./preprocessor_config.json\n",
|
| 927 |
"tokenizer config file saved in ./tokenizer_config.json\n",
|
| 928 |
"Special tokens file saved in ./special_tokens_map.json\n",
|
| 929 |
+
"added tokens file saved in ./added_tokens.json\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 930 |
]
|
| 931 |
}
|
| 932 |
],
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 967099139
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c39156e0baeaa2a0bbfb0619976fb64003fc312ae1fa5a05b6b98b78c47fa454
|
| 3 |
size 967099139
|
runs/Dec06_02-49-42_04812eda968b/events.out.tfevents.1670295010.04812eda968b.1031606.3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a0738a5ca78b0a1c5521ecdaa8f89a160e520d861e79d9fdf3d6dad00e17eed
|
| 3 |
+
size 23058
|
runs/Dec06_03-53-43_04812eda968b/1670298849.4736288/events.out.tfevents.1670298849.04812eda968b.1031606.6
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:040321fe8626593fba40aa3fe2d1189172b567f9b9aa5c2d31edbbfe5cf21f6b
|
| 3 |
+
size 5701
|
runs/Dec06_03-53-43_04812eda968b/events.out.tfevents.1670298849.04812eda968b.1031606.5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0254f34c208105396800997d02b76e1a43ec2f6420e112c391bb30cb4cdb44ce
|
| 3 |
+
size 5580
|
runs/Dec06_03-57-56_04812eda968b/1670299090.1920335/events.out.tfevents.1670299090.04812eda968b.1031606.8
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf3889b8a853fef26f36df5d508da50972ac19277696c5fac25525c05032bb5d
|
| 3 |
+
size 5701
|
runs/Dec06_03-57-56_04812eda968b/events.out.tfevents.1670299090.04812eda968b.1031606.7
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fd9e1f17d3a11a984e8cc89e14fd25e9b304fff02ca7d0a91f4472462294bb1
|
| 3 |
+
size 19594
|
runs/Dec06_06-58-23_04812eda968b/1670309913.1115/events.out.tfevents.1670309913.04812eda968b.2449868.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f2e0f225220c341b5870d143fa4387b7094084eb8ec22a18d8836f017cbaad4
|
| 3 |
+
size 5701
|
runs/Dec06_06-58-23_04812eda968b/events.out.tfevents.1670309913.04812eda968b.2449868.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f895155f75dec6d2940f49f5c2da89db62ffa5ff6260646f4a8e92b6fe180ef7
|
| 3 |
+
size 19594
|
runs/Dec06_07-40-22_04812eda968b/1670312431.563092/events.out.tfevents.1670312431.04812eda968b.2650899.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e158bd332d51cec6795201a51029faadec007da2bee3c4f4e3562dc287a3fb17
|
| 3 |
+
size 5701
|
runs/Dec06_07-40-22_04812eda968b/events.out.tfevents.1670312431.04812eda968b.2650899.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9133eed6ce04b45b52ff52dc5a94d9f934513443190fe9cb6f4fecd2ae1e64d
|
| 3 |
+
size 19905
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ac204e2762aaab2d3f59ce61d73b8a7fb5e1f9724f92d59b71383f7a17e163f
|
| 3 |
size 3503
|