Updated training script
Browse files- my-training.py +25 -25
my-training.py
CHANGED
|
@@ -45,8 +45,8 @@ print(f"\n\n Device to be used: {device} \n\n")
|
|
| 45 |
|
| 46 |
|
| 47 |
## 2. Setting Up Variables
|
| 48 |
-
model_name = "openai/whisper-tiny"
|
| 49 |
-
|
| 50 |
# model_name = "openai/whisper-large-v2"
|
| 51 |
|
| 52 |
language = "Bengali"
|
|
@@ -57,8 +57,8 @@ print(f"\n\n Loading {model_name} for {language} to {task}...this might take a w
|
|
| 57 |
## 3. Setting Up Training Args
|
| 58 |
output_dir = "./"
|
| 59 |
overwrite_output_dir = True
|
| 60 |
-
|
| 61 |
-
max_steps = 5
|
| 62 |
per_device_train_batch_size = 4
|
| 63 |
# per_device_train_batch_size = 1
|
| 64 |
per_device_eval_batch_size = 32
|
|
@@ -68,18 +68,18 @@ gradient_accumulation_steps = 128
|
|
| 68 |
dataloader_num_workers = 4
|
| 69 |
gradient_checkpointing = False
|
| 70 |
evaluation_strategy ="steps"
|
| 71 |
-
eval_steps = 5
|
| 72 |
-
|
| 73 |
save_strategy = "steps"
|
| 74 |
-
|
| 75 |
-
save_steps = 5
|
| 76 |
save_total_limit = 5
|
| 77 |
learning_rate = 1e-5
|
| 78 |
lr_scheduler_type = "cosine" # "constant", "constant_with_warmup", "cosine", "cosine_with_restarts", "linear"(default), "polynomial", "inverse_sqrt"
|
| 79 |
-
|
| 80 |
-
warmup_steps = 1
|
| 81 |
-
|
| 82 |
-
logging_steps = 1
|
| 83 |
# weight_decay = 0.01
|
| 84 |
weight_decay = 0
|
| 85 |
dropout = 0.1 # any value > 0.1 hurts performance. So, use values between 0.0 and 0.1
|
|
@@ -93,8 +93,8 @@ tf32 = True
|
|
| 93 |
generation_max_length = 448
|
| 94 |
report_to = ["tensorboard"]
|
| 95 |
predict_with_generate = True
|
| 96 |
-
|
| 97 |
-
push_to_hub = False
|
| 98 |
freeze_feature_encoder = False
|
| 99 |
early_stopping_patience = 10
|
| 100 |
apply_spec_augment = True
|
|
@@ -120,17 +120,17 @@ google_fleurs["test"] = load_dataset("google/fleurs", "bn_in", split="test", cac
|
|
| 120 |
|
| 121 |
|
| 122 |
## 5. Small Subset for Testing
|
| 123 |
-
common_voice['train'] = common_voice['train'].select(range(50))
|
| 124 |
-
common_voice['test'] = common_voice['test'].select(range(50))
|
| 125 |
-
google_fleurs['train'] = google_fleurs['train'].select(range(50))
|
| 126 |
-
google_fleurs['test'] = google_fleurs['test'].select(range(50))
|
| 127 |
-
openslr['train'] = openslr['train'].select(range(50))
|
| 128 |
-
|
| 129 |
-
print("\n\n For testing, the small subsets are:")
|
| 130 |
-
print(common_voice)
|
| 131 |
-
print(google_fleurs)
|
| 132 |
-
print(openslr)
|
| 133 |
-
print("\n")
|
| 134 |
|
| 135 |
|
| 136 |
## Removing bad samples from common_voice based on upvotes and downvotes
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
## 2. Setting Up Variables
|
| 48 |
+
# model_name = "openai/whisper-tiny"
|
| 49 |
+
model_name = "openai/whisper-small"
|
| 50 |
# model_name = "openai/whisper-large-v2"
|
| 51 |
|
| 52 |
language = "Bengali"
|
|
|
|
| 57 |
## 3. Setting Up Training Args
|
| 58 |
output_dir = "./"
|
| 59 |
overwrite_output_dir = True
|
| 60 |
+
max_steps = 40000
|
| 61 |
+
# max_steps = 5
|
| 62 |
per_device_train_batch_size = 4
|
| 63 |
# per_device_train_batch_size = 1
|
| 64 |
per_device_eval_batch_size = 32
|
|
|
|
| 68 |
dataloader_num_workers = 4
|
| 69 |
gradient_checkpointing = False
|
| 70 |
evaluation_strategy ="steps"
|
| 71 |
+
# eval_steps = 5
|
| 72 |
+
eval_steps = 1000
|
| 73 |
save_strategy = "steps"
|
| 74 |
+
save_steps = 1000
|
| 75 |
+
# save_steps = 5
|
| 76 |
save_total_limit = 5
|
| 77 |
learning_rate = 1e-5
|
| 78 |
lr_scheduler_type = "cosine" # "constant", "constant_with_warmup", "cosine", "cosine_with_restarts", "linear"(default), "polynomial", "inverse_sqrt"
|
| 79 |
+
warmup_steps = 15000 # (1 epoch)
|
| 80 |
+
# warmup_steps = 1
|
| 81 |
+
logging_steps = 25
|
| 82 |
+
# logging_steps = 1
|
| 83 |
# weight_decay = 0.01
|
| 84 |
weight_decay = 0
|
| 85 |
dropout = 0.1 # any value > 0.1 hurts performance. So, use values between 0.0 and 0.1
|
|
|
|
| 93 |
generation_max_length = 448
|
| 94 |
report_to = ["tensorboard"]
|
| 95 |
predict_with_generate = True
|
| 96 |
+
push_to_hub = True
|
| 97 |
+
# push_to_hub = False
|
| 98 |
freeze_feature_encoder = False
|
| 99 |
early_stopping_patience = 10
|
| 100 |
apply_spec_augment = True
|
|
|
|
| 120 |
|
| 121 |
|
| 122 |
## 5. Small Subset for Testing
|
| 123 |
+
# common_voice['train'] = common_voice['train'].select(range(50))
|
| 124 |
+
# common_voice['test'] = common_voice['test'].select(range(50))
|
| 125 |
+
# google_fleurs['train'] = google_fleurs['train'].select(range(50))
|
| 126 |
+
# google_fleurs['test'] = google_fleurs['test'].select(range(50))
|
| 127 |
+
# openslr['train'] = openslr['train'].select(range(50))
|
| 128 |
+
|
| 129 |
+
# print("\n\n For testing, the small subsets are:")
|
| 130 |
+
# print(common_voice)
|
| 131 |
+
# print(google_fleurs)
|
| 132 |
+
# print(openslr)
|
| 133 |
+
# print("\n")
|
| 134 |
|
| 135 |
|
| 136 |
## Removing bad samples from common_voice based on upvotes and downvotes
|