Upload selected files from dummy subfolders

Browse files

Files changed (2) hide show

32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M/checkpoint-epoch100.pth +3 -0
32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M/config.yaml +294 -0

32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M/checkpoint-epoch100.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9573e972f86167543de233ec198bc134e72428ba09e9c04ea9e9bd9b4885425
+size 98066442

32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M/config.yaml ADDED Viewed

	@@ -0,0 +1,294 @@

+model:
+  _target_: src.model.LenslessWrapper
+  use_loader: false
+  loader_kwargs: null
+  use_batch_video_version: false
+  freeze_weights: false
+  psf_path: data/digicam_psf/SIM_psf.png
+  psf_loader_kwargs:
+    downsample: 8
+    return_bg: false
+  grayscale_psf: true
+  recon_name: UnrolledADMM
+  recon_kwargs:
+    post_process:
+      _target_: lensless.recon.drunet.network_unet.UNetRes
+      in_nc: 2
+      out_nc: 1
+      nc:
+      - 32
+      - 64
+      - 128
+      - 256
+      nb: 4
+      act_mode: R
+      downsample_mode: strideconv
+      upsample_mode: convtranspose
+    psf_residual: false
+    skip_unrolled: true
+    return_intermediate: false
+writer:
+  _target_: src.logger.WandBWriter
+  project_name: lenslessmic
+  entity: null
+  run_name: 32x32_librispeech_group_2_2_0_0_mse_ssim_raw_ssim_Unet8M
+  mode: online
+  loss_names:
+  - loss
+  - codec_mse_loss
+  - codec_ssim_loss
+  - raw_codec_ssim_loss
+  - raw_codec_l1_loss
+  - audio_l1_loss
+  - audio_sisdr_loss
+  - audio_stft_loss
+  - audio_mel_loss
+  log_checkpoints: false
+  id_length: 8
+  names:
+  - input-1:frame
+  - input-2:frame
+  - input-3:frame
+  - input-4:frame
+  figsize:
+  - 15
+  - 15
+  sample_rate: 16000
+  run_id: kfhqvxqm
+metrics:
+  device: auto
+  train:
+  - _target_: src.metrics.SISDRMetric
+    name: SISDR
+  - _target_: src.metrics.PSNRMetric
+    name: PSNR
+  - _target_: src.metrics.QuantizationMatchMetric
+    name: QuantizationMatch-all
+    codebook_index: all
+  inference:
+  - _target_: src.metrics.SISDRMetric
+    name: SISDR
+  - _target_: src.metrics.STOIMetric
+    name: STOI
+  - _target_: src.metrics.WERMetric
+    name: WER
+  - _target_: src.metrics.PESQMetric
+    name: PESQ
+  - _target_: src.metrics.MelMetric
+    name: Mel
+    audio_mel_config:
+      n_mels:
+      - 5
+      - 10
+      - 20
+      - 40
+      - 80
+      - 160
+      - 320
+      window_lengths:
+      - 32
+      - 64
+      - 128
+      - 256
+      - 512
+      - 1024
+      - 2048
+      mel_fmin:
+      - 0
+      - 0
+      - 0
+      - 0
+      - 0
+      - 0
+      - 0
+      mel_fmax:
+      - null
+      - null
+      - null
+      - null
+      - null
+      - null
+      - null
+      pow: 1.0
+      clamp_eps: 1.0e-05
+      mag_weight: 0.0
+  - _target_: src.metrics.STFTMetric
+    name: STFT
+    audio_stft_config:
+      window_lengths:
+      - 2048
+      - 512
+  - _target_: src.metrics.QuantizationMatchMetric
+    name: QuantizationMatch-all
+    codebook_index: all
+  - _target_: src.metrics.QuantizationMatchMetric
+    name: QuantizationMatch-1
+    codebook_index: 1
+  - _target_: src.metrics.QuantizationMatchMetric
+    name: QuantizationMatch-2
+    codebook_index: 2
+  - _target_: src.metrics.PSNRMetric
+    name: PSNR
+  - _target_: src.metrics.SSIMMetric
+    name: SSIM
+  - _target_: src.metrics.GMSDMetric
+    name: GMSD
+  - _target_: src.metrics.MSEMetric
+    name: MSE
+    normalized: false
+  - _target_: src.metrics.MSEMetric
+    name: NormMSE
+    normalized: true
+datasets:
+  train:
+    _target_: src.datasets.LibrispeechDataset
+    max_audio_length: 3
+    part: train-clean-100
+    roi_kwargs: ${reconstruction.roi_kwargs}
+    codec_name: ${codec.codec_name}
+    lensless_tag: measurement_group_2_2_0_0
+    instance_transforms: ${transforms.instance_transforms.train}
+    sim_psf_config: ${psf}
+  test:
+    _target_: src.datasets.LibrispeechDataset
+    limit: 1
+    max_audio_length: 3
+    part: test-clean
+    roi_kwargs: ${reconstruction.roi_kwargs}
+    codec_name: ${codec.codec_name}
+    lensless_tag: measurement_group_2_2_0_0
+    instance_transforms: ${transforms.instance_transforms.inference}
+    sim_psf_config: ${psf}
+dataloader:
+  train:
+    _target_: torch.utils.data.DataLoader
+    batch_size: 1
+    num_workers: 2
+    pin_memory: true
+  inference:
+    _target_: torch.utils.data.DataLoader
+    batch_size: 1
+    num_workers: 2
+    pin_memory: true
+transforms:
+  instance_transforms:
+    train:
+      all:
+        _target_: torchvision.transforms.v2.Compose
+        transforms:
+        - _target_: src.transforms.PadCrop
+          length: 4
+          pad_format: replicated
+          random_crop: true
+          ratio: null
+          frames_per_lensless: 4
+    inference: null
+  batch_transforms:
+    train: null
+    inference: null
+codec:
+  _target_: src.transforms.CodecEncoderDecoder
+  codec_cls: ${resolve_class:dac.DAC}
+  codec_weights_path: data/dac_exps/${codec.codec_name}/latest/dac/weights.pth
+  codec_add_root_path: true
+  codec_kwargs: null
+  codec_name: 32x32_120_16khz_original
+  eval_mode: true
+  freeze_weights: true
+reconstruction:
+  roi_kwargs:
+    top_left:
+    - 65
+    - 118
+    height: 256
+    width: 256
+  group_frames_kwargs:
+    n_rows: 2
+    n_cols: 2
+    row_space: 0
+    col_space: 0
+  resize_coef: 4
+  normalize_lensless: true
+  corners_list: null
+psf:
+  slm: adafruit
+  sensor: rpi_hq
+  downsample: 8
+  rotate: -0.8
+  vertical_shift: -20
+  horizontal_shift: -20
+  flipud: true
+  use_waveprop: true
+  deadspace: true
+  scene2mask: 0.3
+  mask2sensor: 0.004
+  grayscale: true
+lr_scheduler:
+  _target_: torch.optim.lr_scheduler.ConstantLR
+  factor: 1
+optimizer:
+  _target_: torch.optim.Adam
+  lr: 0.0001
+loss_function:
+  _target_: src.loss.ReconstructionLoss
+  codec_mse_coef: 1
+  codec_ssim_coef: 1
+  codec_gmsd_coef: 0
+  raw_codec_ssim_coef: 1
+  raw_codec_l1_coef: 0
+  audio_l1_coef: 0
+  audio_sisdr_coef: 0
+  audio_stft_coef: 0
+  audio_mel_coef: 0
+  audio_stft_config:
+    window_lengths:
+    - 2048
+    - 512
+  audio_mel_config:
+    n_mels:
+    - 5
+    - 10
+    - 20
+    - 40
+    window_lengths:
+    - 32
+    - 64
+    - 128
+    - 256
+    mel_fmin:
+    - 0
+    - 0
+    - 0
+    - 0
+    mel_fmax:
+    - null
+    - null
+    - null
+    - null
+    pow: 1.0
+    clamp_eps: 1.0e-05
+    mag_weight: 0.0
+  resize_coef: ${reconstruction.resize_coef}
+  group_frames_kwargs: ${reconstruction.group_frames_kwargs}
+  ssim_kernel: 7
+  ssim_sigma: 0.5
+  raw_ssim_kernel: 11
+trainer:
+  log_step: 50
+  n_epochs: 100
+  epoch_len: 500
+  device_tensors:
+  - lensless_codec_video
+  - lensed_codec_video
+  - lensless_psf
+  - audio
+  - pad_mask
+  resume_from: null
+  device: auto
+  override: true
+  monitor: max test_PSNR
+  save_period: 5
+  early_stop: ${trainer.n_epochs}
+  save_dir: saved
+  seed: 1
+  skip_NaN: true