| { | |
| "input_wavs_dir": "/private/home/adampolyak/datasets/LJ/LJSpeech-1.1/wavs_16khz_padded", | |
| "input_training_file": "/large_experiments/ust/annl/datasets/tts/LJSpeech/filelist/mhubert_vp_en_es_fr_it3_400k/lj_train_layer11_hubert1000_filelist.txt", | |
| "input_validation_file": "/large_experiments/ust/annl/datasets/tts/LJSpeech/filelist/mhubert_vp_en_es_fr_it3_400k/lj_dev_layer11_hubert1000_filelist.txt", | |
| "resblock": "1", | |
| "num_gpus": 0, | |
| "batch_size": 16, | |
| "learning_rate": 0.0002, | |
| "adam_b1": 0.8, | |
| "adam_b2": 0.99, | |
| "lr_decay": 0.999, | |
| "seed": 1234, | |
| "upsample_rates": [5,4,4,2,2], | |
| "upsample_kernel_sizes": [11,8,8,4,4], | |
| "upsample_initial_channel": 512, | |
| "resblock_kernel_sizes": [3,7,11], | |
| "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], | |
| "num_embeddings": 1000, | |
| "embedding_dim": 128, | |
| "model_in_dim": 128, | |
| "segment_size": 8960, | |
| "code_hop_size": 320, | |
| "f0": false, | |
| "num_mels": 80, | |
| "num_freq": 1025, | |
| "n_fft": 1024, | |
| "hop_size": 256, | |
| "win_size": 1024, | |
| "dur_prediction_weight": 1.0, | |
| "dur_predictor_params": { | |
| "encoder_embed_dim": 128, | |
| "var_pred_hidden_dim": 128, | |
| "var_pred_kernel_size": 3, | |
| "var_pred_dropout": 0.5 | |
| }, | |
| "sampling_rate": 16000, | |
| "fmin": 0, | |
| "fmax": 8000, | |
| "fmax_for_loss": null, | |
| "num_workers": 4, | |
| "dist_config": { | |
| "dist_backend": "nccl", | |
| "dist_url": "env://" | |
| } | |
| } | |