English
Blinorot commited on
Commit
30ea5ba
·
verified ·
1 Parent(s): 6c5a138

Upload selected files from dummy subfolders

Browse files
32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M/checkpoint-epoch100.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9573e972f86167543de233ec198bc134e72428ba09e9c04ea9e9bd9b4885425
3
+ size 98066442
32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M/config.yaml ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ _target_: src.model.LenslessWrapper
3
+ use_loader: false
4
+ loader_kwargs: null
5
+ use_batch_video_version: false
6
+ freeze_weights: false
7
+ psf_path: data/digicam_psf/SIM_psf.png
8
+ psf_loader_kwargs:
9
+ downsample: 8
10
+ return_bg: false
11
+ grayscale_psf: true
12
+ recon_name: UnrolledADMM
13
+ recon_kwargs:
14
+ post_process:
15
+ _target_: lensless.recon.drunet.network_unet.UNetRes
16
+ in_nc: 2
17
+ out_nc: 1
18
+ nc:
19
+ - 32
20
+ - 64
21
+ - 128
22
+ - 256
23
+ nb: 4
24
+ act_mode: R
25
+ downsample_mode: strideconv
26
+ upsample_mode: convtranspose
27
+ psf_residual: false
28
+ skip_unrolled: true
29
+ return_intermediate: false
30
+ writer:
31
+ _target_: src.logger.WandBWriter
32
+ project_name: lenslessmic
33
+ entity: null
34
+ run_name: 32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M
35
+ mode: online
36
+ loss_names:
37
+ - loss
38
+ - codec_mse_loss
39
+ - codec_ssim_loss
40
+ - raw_codec_ssim_loss
41
+ - raw_codec_l1_loss
42
+ - audio_l1_loss
43
+ - audio_sisdr_loss
44
+ - audio_stft_loss
45
+ - audio_mel_loss
46
+ log_checkpoints: false
47
+ id_length: 8
48
+ names:
49
+ - input-1:frame
50
+ - input-2:frame
51
+ - input-3:frame
52
+ - input-4:frame
53
+ figsize:
54
+ - 15
55
+ - 15
56
+ sample_rate: 16000
57
+ run_id: kfhqvxqm
58
+ metrics:
59
+ device: auto
60
+ train:
61
+ - _target_: src.metrics.SISDRMetric
62
+ name: SISDR
63
+ - _target_: src.metrics.PSNRMetric
64
+ name: PSNR
65
+ - _target_: src.metrics.QuantizationMatchMetric
66
+ name: QuantizationMatch-all
67
+ codebook_index: all
68
+ inference:
69
+ - _target_: src.metrics.SISDRMetric
70
+ name: SISDR
71
+ - _target_: src.metrics.STOIMetric
72
+ name: STOI
73
+ - _target_: src.metrics.WERMetric
74
+ name: WER
75
+ - _target_: src.metrics.PESQMetric
76
+ name: PESQ
77
+ - _target_: src.metrics.MelMetric
78
+ name: Mel
79
+ audio_mel_config:
80
+ n_mels:
81
+ - 5
82
+ - 10
83
+ - 20
84
+ - 40
85
+ - 80
86
+ - 160
87
+ - 320
88
+ window_lengths:
89
+ - 32
90
+ - 64
91
+ - 128
92
+ - 256
93
+ - 512
94
+ - 1024
95
+ - 2048
96
+ mel_fmin:
97
+ - 0
98
+ - 0
99
+ - 0
100
+ - 0
101
+ - 0
102
+ - 0
103
+ - 0
104
+ mel_fmax:
105
+ - null
106
+ - null
107
+ - null
108
+ - null
109
+ - null
110
+ - null
111
+ - null
112
+ pow: 1.0
113
+ clamp_eps: 1.0e-05
114
+ mag_weight: 0.0
115
+ - _target_: src.metrics.STFTMetric
116
+ name: STFT
117
+ audio_stft_config:
118
+ window_lengths:
119
+ - 2048
120
+ - 512
121
+ - _target_: src.metrics.QuantizationMatchMetric
122
+ name: QuantizationMatch-all
123
+ codebook_index: all
124
+ - _target_: src.metrics.QuantizationMatchMetric
125
+ name: QuantizationMatch-1
126
+ codebook_index: 1
127
+ - _target_: src.metrics.QuantizationMatchMetric
128
+ name: QuantizationMatch-2
129
+ codebook_index: 2
130
+ - _target_: src.metrics.PSNRMetric
131
+ name: PSNR
132
+ - _target_: src.metrics.SSIMMetric
133
+ name: SSIM
134
+ - _target_: src.metrics.GMSDMetric
135
+ name: GMSD
136
+ - _target_: src.metrics.MSEMetric
137
+ name: MSE
138
+ normalized: false
139
+ - _target_: src.metrics.MSEMetric
140
+ name: NormMSE
141
+ normalized: true
142
+ datasets:
143
+ train:
144
+ _target_: src.datasets.LibrispeechDataset
145
+ max_audio_length: 3
146
+ part: train-clean-100
147
+ roi_kwargs: ${reconstruction.roi_kwargs}
148
+ codec_name: ${codec.codec_name}
149
+ lensless_tag: measurement_group_2_2_0_0
150
+ instance_transforms: ${transforms.instance_transforms.train}
151
+ sim_psf_config: ${psf}
152
+ test:
153
+ _target_: src.datasets.LibrispeechDataset
154
+ limit: 1
155
+ max_audio_length: 3
156
+ part: test-clean
157
+ roi_kwargs: ${reconstruction.roi_kwargs}
158
+ codec_name: ${codec.codec_name}
159
+ lensless_tag: measurement_group_2_2_0_0
160
+ instance_transforms: ${transforms.instance_transforms.inference}
161
+ sim_psf_config: ${psf}
162
+ dataloader:
163
+ train:
164
+ _target_: torch.utils.data.DataLoader
165
+ batch_size: 1
166
+ num_workers: 2
167
+ pin_memory: true
168
+ inference:
169
+ _target_: torch.utils.data.DataLoader
170
+ batch_size: 1
171
+ num_workers: 2
172
+ pin_memory: true
173
+ transforms:
174
+ instance_transforms:
175
+ train:
176
+ all:
177
+ _target_: torchvision.transforms.v2.Compose
178
+ transforms:
179
+ - _target_: src.transforms.PadCrop
180
+ length: 4
181
+ pad_format: replicated
182
+ random_crop: true
183
+ ratio: null
184
+ frames_per_lensless: 4
185
+ inference: null
186
+ batch_transforms:
187
+ train: null
188
+ inference: null
189
+ codec:
190
+ _target_: src.transforms.CodecEncoderDecoder
191
+ codec_cls: ${resolve_class:dac.DAC}
192
+ codec_weights_path: data/dac_exps/${codec.codec_name}/latest/dac/weights.pth
193
+ codec_add_root_path: true
194
+ codec_kwargs: null
195
+ codec_name: 32x32_120_16khz_original
196
+ eval_mode: true
197
+ freeze_weights: true
198
+ reconstruction:
199
+ roi_kwargs:
200
+ top_left:
201
+ - 65
202
+ - 118
203
+ height: 256
204
+ width: 256
205
+ group_frames_kwargs:
206
+ n_rows: 2
207
+ n_cols: 2
208
+ row_space: 0
209
+ col_space: 0
210
+ resize_coef: 4
211
+ normalize_lensless: true
212
+ corners_list: null
213
+ psf:
214
+ slm: adafruit
215
+ sensor: rpi_hq
216
+ downsample: 8
217
+ rotate: -0.8
218
+ vertical_shift: -20
219
+ horizontal_shift: -20
220
+ flipud: true
221
+ use_waveprop: true
222
+ deadspace: true
223
+ scene2mask: 0.3
224
+ mask2sensor: 0.004
225
+ grayscale: true
226
+ lr_scheduler:
227
+ _target_: torch.optim.lr_scheduler.ConstantLR
228
+ factor: 1
229
+ optimizer:
230
+ _target_: torch.optim.Adam
231
+ lr: 0.0001
232
+ loss_function:
233
+ _target_: src.loss.ReconstructionLoss
234
+ codec_mse_coef: 1
235
+ codec_ssim_coef: 1
236
+ codec_gmsd_coef: 0
237
+ raw_codec_ssim_coef: 1
238
+ raw_codec_l1_coef: 0
239
+ audio_l1_coef: 0
240
+ audio_sisdr_coef: 0
241
+ audio_stft_coef: 0
242
+ audio_mel_coef: 0
243
+ audio_stft_config:
244
+ window_lengths:
245
+ - 2048
246
+ - 512
247
+ audio_mel_config:
248
+ n_mels:
249
+ - 5
250
+ - 10
251
+ - 20
252
+ - 40
253
+ window_lengths:
254
+ - 32
255
+ - 64
256
+ - 128
257
+ - 256
258
+ mel_fmin:
259
+ - 0
260
+ - 0
261
+ - 0
262
+ - 0
263
+ mel_fmax:
264
+ - null
265
+ - null
266
+ - null
267
+ - null
268
+ pow: 1.0
269
+ clamp_eps: 1.0e-05
270
+ mag_weight: 0.0
271
+ resize_coef: ${reconstruction.resize_coef}
272
+ group_frames_kwargs: ${reconstruction.group_frames_kwargs}
273
+ ssim_kernel: 7
274
+ ssim_sigma: 0.5
275
+ raw_ssim_kernel: 11
276
+ trainer:
277
+ log_step: 50
278
+ n_epochs: 100
279
+ epoch_len: 500
280
+ device_tensors:
281
+ - lensless_codec_video
282
+ - lensed_codec_video
283
+ - lensless_psf
284
+ - audio
285
+ - pad_mask
286
+ resume_from: null
287
+ device: auto
288
+ override: true
289
+ monitor: max test_PSNR
290
+ save_period: 5
291
+ early_stop: ${trainer.n_epochs}
292
+ save_dir: saved
293
+ seed: 1
294
+ skip_NaN: true