English
Blinorot commited on
Commit
b695617
·
verified ·
1 Parent(s): d56045c

Upload selected files from dummy subfolders

Browse files
32x32_librispeech_other_mse_ssim_raw_ssim_PSF_Unet4M_U5_Unet4M_ft/checkpoint-epoch30.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaaa4543541cb0291a89d26b4f02c50bd78cc3a24ee09762406a906bca218842
3
+ size 97419290
32x32_librispeech_other_mse_ssim_raw_ssim_PSF_Unet4M_U5_Unet4M_ft/config.yaml ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ _target_: src.model.LenslessWrapper
3
+ use_loader: false
4
+ loader_kwargs: null
5
+ use_batch_video_version: false
6
+ freeze_weights: false
7
+ psf_path: data/digicam_psf/SIM_psf.png
8
+ psf_loader_kwargs:
9
+ downsample: 8
10
+ return_bg: false
11
+ grayscale_psf: true
12
+ recon_name: UnrolledADMM
13
+ recon_kwargs:
14
+ n_iter: 5
15
+ mu1: 0.0001
16
+ mu2: 0.0001
17
+ mu3: 0.0001
18
+ tau: 0.0002
19
+ pre_process:
20
+ _target_: lensless.recon.drunet.network_unet.UNetRes
21
+ in_nc: 2
22
+ out_nc: 1
23
+ nc:
24
+ - 32
25
+ - 64
26
+ - 112
27
+ - 128
28
+ nb: 4
29
+ act_mode: R
30
+ downsample_mode: strideconv
31
+ upsample_mode: convtranspose
32
+ post_process:
33
+ _target_: lensless.recon.drunet.network_unet.UNetRes
34
+ in_nc: 2
35
+ out_nc: 1
36
+ nc:
37
+ - 32
38
+ - 64
39
+ - 116
40
+ - 128
41
+ nb: 4
42
+ act_mode: R
43
+ downsample_mode: strideconv
44
+ upsample_mode: convtranspose
45
+ psf_network:
46
+ _target_: lensless.recon.drunet.network_unet.UNetRes
47
+ in_nc: 2
48
+ out_nc: 1
49
+ nc:
50
+ - 4
51
+ - 8
52
+ - 16
53
+ - 32
54
+ nb: 4
55
+ act_mode: R
56
+ downsample_mode: strideconv
57
+ upsample_mode: convtranspose
58
+ psf_residual: false
59
+ skip_unrolled: false
60
+ return_intermediate: false
61
+ writer:
62
+ _target_: src.logger.WandBWriter
63
+ project_name: lenslessmic
64
+ entity: null
65
+ run_name: 32x32_librispeech_other_mse_ssim_raw_ssim_PSF_Unet4M_U5_Unet4M_ft
66
+ mode: online
67
+ loss_names:
68
+ - loss
69
+ - codec_mse_loss
70
+ - codec_ssim_loss
71
+ - raw_codec_ssim_loss
72
+ - raw_codec_l1_loss
73
+ - audio_l1_loss
74
+ - audio_sisdr_loss
75
+ - audio_stft_loss
76
+ - audio_mel_loss
77
+ log_checkpoints: false
78
+ id_length: 8
79
+ names:
80
+ - input-1:frame
81
+ - input-2:frame
82
+ - input-3:frame
83
+ - input-4:frame
84
+ figsize:
85
+ - 15
86
+ - 15
87
+ sample_rate: 16000
88
+ run_id: hncosdnr
89
+ metrics:
90
+ device: auto
91
+ train:
92
+ - _target_: src.metrics.SISDRMetric
93
+ name: SISDR
94
+ - _target_: src.metrics.PSNRMetric
95
+ name: PSNR
96
+ - _target_: src.metrics.QuantizationMatchMetric
97
+ name: QuantizationMatch-all
98
+ codebook_index: all
99
+ inference:
100
+ - _target_: src.metrics.SISDRMetric
101
+ name: SISDR
102
+ - _target_: src.metrics.STOIMetric
103
+ name: STOI
104
+ - _target_: src.metrics.WERMetric
105
+ name: WER
106
+ - _target_: src.metrics.PESQMetric
107
+ name: PESQ
108
+ - _target_: src.metrics.MelMetric
109
+ name: Mel
110
+ audio_mel_config:
111
+ n_mels:
112
+ - 5
113
+ - 10
114
+ - 20
115
+ - 40
116
+ - 80
117
+ - 160
118
+ - 320
119
+ window_lengths:
120
+ - 32
121
+ - 64
122
+ - 128
123
+ - 256
124
+ - 512
125
+ - 1024
126
+ - 2048
127
+ mel_fmin:
128
+ - 0
129
+ - 0
130
+ - 0
131
+ - 0
132
+ - 0
133
+ - 0
134
+ - 0
135
+ mel_fmax:
136
+ - null
137
+ - null
138
+ - null
139
+ - null
140
+ - null
141
+ - null
142
+ - null
143
+ pow: 1.0
144
+ clamp_eps: 1.0e-05
145
+ mag_weight: 0.0
146
+ - _target_: src.metrics.STFTMetric
147
+ name: STFT
148
+ audio_stft_config:
149
+ window_lengths:
150
+ - 2048
151
+ - 512
152
+ - _target_: src.metrics.QuantizationMatchMetric
153
+ name: QuantizationMatch-all
154
+ codebook_index: all
155
+ - _target_: src.metrics.QuantizationMatchMetric
156
+ name: QuantizationMatch-1
157
+ codebook_index: 1
158
+ - _target_: src.metrics.QuantizationMatchMetric
159
+ name: QuantizationMatch-2
160
+ codebook_index: 2
161
+ - _target_: src.metrics.PSNRMetric
162
+ name: PSNR
163
+ - _target_: src.metrics.SSIMMetric
164
+ name: SSIM
165
+ - _target_: src.metrics.GMSDMetric
166
+ name: GMSD
167
+ - _target_: src.metrics.MSEMetric
168
+ name: MSE
169
+ normalized: false
170
+ - _target_: src.metrics.MSEMetric
171
+ name: NormMSE
172
+ normalized: true
173
+ datasets:
174
+ train:
175
+ _target_: src.datasets.LibrispeechDataset
176
+ max_audio_length: 3
177
+ part:
178
+ - train-other-500
179
+ - train-clean-100
180
+ roi_kwargs: ${reconstruction.roi_kwargs}
181
+ codec_name: ${codec.codec_name}
182
+ lensless_tag: measurement
183
+ instance_transforms: ${transforms.instance_transforms.train}
184
+ sim_psf_config: ${psf}
185
+ test:
186
+ _target_: src.datasets.LibrispeechDataset
187
+ limit: 1
188
+ max_audio_length: 3
189
+ part: test-clean
190
+ roi_kwargs: ${reconstruction.roi_kwargs}
191
+ codec_name: ${codec.codec_name}
192
+ lensless_tag: measurement
193
+ instance_transforms: ${transforms.instance_transforms.inference}
194
+ sim_psf_config: ${psf}
195
+ test_other:
196
+ _target_: src.datasets.LibrispeechDataset
197
+ limit: 1
198
+ max_audio_length: 3
199
+ part: test-other
200
+ roi_kwargs: ${reconstruction.roi_kwargs}
201
+ codec_name: ${codec.codec_name}
202
+ lensless_tag: measurement
203
+ instance_transforms: ${transforms.instance_transforms.inference}
204
+ sim_psf_config: ${psf}
205
+ dataloader:
206
+ train:
207
+ _target_: torch.utils.data.DataLoader
208
+ batch_size: 1
209
+ num_workers: 2
210
+ pin_memory: true
211
+ inference:
212
+ _target_: torch.utils.data.DataLoader
213
+ batch_size: 1
214
+ num_workers: 2
215
+ pin_memory: true
216
+ transforms:
217
+ instance_transforms:
218
+ train:
219
+ all:
220
+ _target_: torchvision.transforms.v2.Compose
221
+ transforms:
222
+ - _target_: src.transforms.PadCrop
223
+ length: 4
224
+ pad_format: replicated
225
+ random_crop: true
226
+ ratio: null
227
+ frames_per_lensless: 1
228
+ inference: null
229
+ batch_transforms:
230
+ train: null
231
+ inference: null
232
+ codec:
233
+ _target_: src.transforms.CodecEncoderDecoder
234
+ codec_cls: ${resolve_class:dac.DAC}
235
+ codec_weights_path: data/dac_exps/${codec.codec_name}/latest/dac/weights.pth
236
+ codec_add_root_path: true
237
+ codec_kwargs: null
238
+ codec_name: 32x32_120_16khz_original
239
+ eval_mode: true
240
+ freeze_weights: true
241
+ reconstruction:
242
+ roi_kwargs:
243
+ top_left:
244
+ - 65
245
+ - 118
246
+ height: 256
247
+ width: 256
248
+ resize_coef: 8
249
+ group_frames_kwargs: null
250
+ normalize_lensless: true
251
+ corners_list: null
252
+ psf:
253
+ slm: adafruit
254
+ sensor: rpi_hq
255
+ downsample: 8
256
+ rotate: -0.8
257
+ vertical_shift: -20
258
+ horizontal_shift: -20
259
+ flipud: true
260
+ use_waveprop: true
261
+ deadspace: true
262
+ scene2mask: 0.3
263
+ mask2sensor: 0.004
264
+ grayscale: true
265
+ lr_scheduler:
266
+ _target_: torch.optim.lr_scheduler.ConstantLR
267
+ factor: 1
268
+ optimizer:
269
+ _target_: torch.optim.Adam
270
+ lr: 5.0e-05
271
+ loss_function:
272
+ _target_: src.loss.ReconstructionLoss
273
+ codec_mse_coef: 1
274
+ codec_ssim_coef: 1
275
+ codec_gmsd_coef: 0
276
+ raw_codec_ssim_coef: 1
277
+ raw_codec_l1_coef: 0
278
+ audio_l1_coef: 0
279
+ audio_sisdr_coef: 0
280
+ audio_stft_coef: 0
281
+ audio_mel_coef: 0
282
+ audio_stft_config:
283
+ window_lengths:
284
+ - 2048
285
+ - 512
286
+ audio_mel_config:
287
+ n_mels:
288
+ - 5
289
+ - 10
290
+ - 20
291
+ - 40
292
+ window_lengths:
293
+ - 32
294
+ - 64
295
+ - 128
296
+ - 256
297
+ mel_fmin:
298
+ - 0
299
+ - 0
300
+ - 0
301
+ - 0
302
+ mel_fmax:
303
+ - null
304
+ - null
305
+ - null
306
+ - null
307
+ pow: 1.0
308
+ clamp_eps: 1.0e-05
309
+ mag_weight: 0.0
310
+ resize_coef: ${reconstruction.resize_coef}
311
+ group_frames_kwargs: ${reconstruction.group_frames_kwargs}
312
+ ssim_kernel: 7
313
+ ssim_sigma: 0.5
314
+ raw_ssim_kernel: 11
315
+ trainer:
316
+ log_step: 50
317
+ n_epochs: 100
318
+ epoch_len: 500
319
+ device_tensors:
320
+ - lensless_codec_video
321
+ - lensed_codec_video
322
+ - lensless_psf
323
+ - audio
324
+ - pad_mask
325
+ resume_from: null
326
+ device: auto
327
+ override: true
328
+ monitor: max test_PSNR
329
+ save_period: 5
330
+ early_stop: ${trainer.n_epochs}
331
+ save_dir: saved
332
+ seed: 1
333
+ from_pretrained: ROOT_PATH/data/lensless_exps/32x32_librispeech_mse_ssim_raw_ssim_PSF_Unet4M_U5_Unet4M/checkpoint-epoch100.pth