Spaces:

mattricesound
/

RemFx

Runtime error

App Files Files Community

mattricesound commited on Aug 3, 2023

Commit

568c3f1

1 Parent(s): 836d971

Update to latest classifier inference

Browse files

Files changed (10) hide show

README.md +18 -24
cfg/exp/5-5_full_cls.yaml +2 -2
cfg/exp/5-5_full_cls_dynamic.yaml +1 -1
remfx/classifier.py +2 -15
remfx/datasets.py +1 -1
remfx/effects.py +0 -2
remfx/models.py +3 -13
remfx/tcn.py +0 -1
scripts/test.py +2 -1
setup.py +9 -0

README.md CHANGED Viewed

@@ -10,14 +10,19 @@ git clone https://github.com/mhrice/RemFx.git
 cd RemFx
 git submodule update --init --recursive
 pip install -e . ./umx
 ```
 # Usage
 This repo can be used for many different tasks. Here are some examples.
 ## Run RemFX Detect on a single file
 First, need to download the checkpoints from [zenodo](https://zenodo.org/record/8179396)
 ```
 scripts/download_checkpoints.sh
-scripts/remfx_detect.sh wet.wav -o dry.wav
 ```
 ## Download the [General Purpose Audio Effect Removal evaluation datasets](https://zenodo.org/record/8187288)
 ```
@@ -69,6 +74,18 @@ If you have generated the dataset separately (see Generate datasets used in the
 Also note that the training assumes you have a GPU. To train on CPU, set `accelerator=null` in the config or command-line.
 ## Evaluate models on the General Purpose Audio Effect Removal evaluation datasets (Table 4 from the paper)
 First download the General Purpose Audio Effect Removal evaluation datasets (see above).
 To use the pretrained RemFX model, download the checkpoints
@@ -148,26 +165,3 @@ Some relevant dataset/training parameters descriptions
 - `distortion`
 - `reverb`
 - `delay`
-<!-- # DO WE NEED THIS?
-## Evaluate RemFXwith a custom directory
-Assumes directory is structured as
-- root
-    - clean
-        - file1.wav
-        - file2.wav
-        - file3.wav
-    - effected
-        - file1.wav
-        - file2.wav
-        - file3.wav
-First set the dataset root:
-```
-export DATASET_ROOT={path/to/datasets}
-```
-Then run
-```
-python scripts/chain_inference.py +exp=chain_inference_custom
-``` -->

 cd RemFx
 git submodule update --init --recursive
 pip install -e . ./umx
+pip install --no-deps hearbaseline
 ```
+Due to incompatabilities with hearbaseline's dependencies (namely numpy/numba) and our other packages, we need to install hearbaseline with no dependencies.
 # Usage
 This repo can be used for many different tasks. Here are some examples.
 ## Run RemFX Detect on a single file
 First, need to download the checkpoints from [zenodo](https://zenodo.org/record/8179396)
 ```
 scripts/download_checkpoints.sh
+```
+Then run the detect script. This repo contains an example file `example.wav` from our test dataset which contains 2 effects (chorus and delay) applied to a guitar.
+```
+scripts/remfx_detect.sh example.wav -o dry.wav
 ```
 ## Download the [General Purpose Audio Effect Removal evaluation datasets](https://zenodo.org/record/8187288)
 ```
 Also note that the training assumes you have a GPU. To train on CPU, set `accelerator=null` in the config or command-line.
+### Logging
+Default CSV logger
+To use WANDB logger:
+export WANDB_PROJECT={desired_wandb_project}
+export WANDB_ENTITY={your_wandb_username}
+## Panns pretrianed
+```
+wget https://zenodo.org/record/6332525/files/hear2021-panns_hear.pth
+```
 ## Evaluate models on the General Purpose Audio Effect Removal evaluation datasets (Table 4 from the paper)
 First download the General Purpose Audio Effect Removal evaluation datasets (see above).
 To use the pretrained RemFX model, download the checkpoints
 - `distortion`
 - `reverb`
 - `delay`

cfg/exp/5-5_full_cls.yaml CHANGED Viewed

@@ -1,11 +1,11 @@
 # @package _global_
 defaults:
-  - override /model: cls_panns_48k
   - override /effects: all
 seed: 12345
 sample_rate: 48000
 chunk_size: 262144 # 5.5s
-logs_dir: "/scratch/cjs-logs"
 render_files: True
 accelerator: "gpu"

 # @package _global_
 defaults:
+  - override /model: cls_panns_48k_specaugment
   - override /effects: all
 seed: 12345
 sample_rate: 48000
 chunk_size: 262144 # 5.5s
+logs_dir: "./logs"
 render_files: True
 accelerator: "gpu"

cfg/exp/5-5_full_cls_dynamic.yaml CHANGED Viewed

@@ -5,7 +5,7 @@ defaults:
 seed: 12345
 sample_rate: 48000
 chunk_size: 262144 # 5.5s
-logs_dir: "/scratch/cjs-logs"
 render_files: True
 accelerator: "gpu"

 seed: 12345
 sample_rate: 48000
 chunk_size: 262144 # 5.5s
+logs_dir: "./logs"
 render_files: True
 accelerator: "gpu"

remfx/classifier.py CHANGED Viewed

@@ -171,7 +171,6 @@ class Cnn14(nn.Module):
         self.fc1 = nn.Linear(2048, 2048, bias=True)
-        # self.fc_audioset = nn.Linear(2048, num_classes, bias=True)
         self.heads = torch.nn.ModuleList()
         for _ in range(num_classes):
             self.heads.append(nn.Linear(2048, 1, bias=True))
@@ -190,7 +189,6 @@ class Cnn14(nn.Module):
     def init_weight(self):
         init_bn(self.bn0)
         init_layer(self.fc1)
-        # init_layer(self.fc_audioset)
     def forward(self, x: torch.Tensor, train: bool = False):
         """
@@ -202,20 +200,11 @@ class Cnn14(nn.Module):
         x = self.melspec(x)
         if self.specaugment and train:
-            # import matplotlib.pyplot as plt
-            # fig, axs = plt.subplots(2, 1, sharex=True)
-            # axs[0].imshow(x[0, :, :, :].detach().squeeze().cpu().numpy())
             x = self.freq_mask(x)
             x = self.time_mask(x)
-            # axs[1].imshow(x[0, :, :, :].detach().squeeze().cpu().numpy())
-            # plt.savefig("spec_augment.png", dpi=300)
-        # x = x.permute(0, 2, 1, 3)
-        # x = self.bn0(x)
-        # x = x.permute(0, 2, 1, 3)
         # apply standardization
-        x = (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True)
         x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
         x = F.dropout(x, p=0.2, training=train)
@@ -241,8 +230,6 @@ class Cnn14(nn.Module):
         for head in self.heads:
             outputs.append(torch.sigmoid(head(x)))
-        # clipwise_output = self.fc_audioset(x)
         return outputs
@@ -294,4 +281,4 @@ class ConvBlock(nn.Module):
         else:
             raise Exception("Incorrect argument!")
-        return x

         self.fc1 = nn.Linear(2048, 2048, bias=True)
         self.heads = torch.nn.ModuleList()
         for _ in range(num_classes):
             self.heads.append(nn.Linear(2048, 1, bias=True))
     def init_weight(self):
         init_bn(self.bn0)
         init_layer(self.fc1)
     def forward(self, x: torch.Tensor, train: bool = False):
         """
         x = self.melspec(x)
         if self.specaugment and train:
             x = self.freq_mask(x)
             x = self.time_mask(x)
         # apply standardization
+        x = (x - x.mean(dim=(2, 3), keepdim=True)) / x.std(dim=(2, 3), keepdim=True)
         x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
         x = F.dropout(x, p=0.2, training=train)
         for head in self.heads:
             outputs.append(torch.sigmoid(head(x)))
         return outputs
         else:
             raise Exception("Incorrect argument!")
+        return x

remfx/datasets.py CHANGED Viewed

@@ -666,7 +666,7 @@ class EffectDatamodule(pl.LightningDataModule):
     def test_dataloader(self) -> DataLoader:
         return DataLoader(
             dataset=self.test_dataset,
-            batch_size=1,  # Use small, consistent batch size for testing
             num_workers=self.num_workers,
             pin_memory=self.pin_memory,
             shuffle=False,

     def test_dataloader(self) -> DataLoader:
         return DataLoader(
             dataset=self.test_dataset,
+            batch_size=self.test_batch_size,
             num_workers=self.num_workers,
             pin_memory=self.pin_memory,
             shuffle=False,

remfx/effects.py CHANGED Viewed

@@ -84,7 +84,6 @@ def biqaud(
         a2 = 1 - alpha / A
     else:
         pass
-        # raise ValueError(f"Invalid filter_type: {filter_type}.")
     b = np.array([b0, b1, b2]) / a0
     a = np.array([a0, a1, a2]) / a0
@@ -291,7 +290,6 @@ class RandomVolumeAutomation(torch.nn.Module):
             gain_db[samples_filled : samples_filled + segment_samples] = fade
             samples_filled = samples_filled + segment_samples
-        # print(gain_db)
         x *= 10 ** (gain_db / 20.0)
         return x

         a2 = 1 - alpha / A
     else:
         pass
     b = np.array([b0, b1, b2]) / a0
     a = np.array([a0, a1, a2]) / a0
             gain_db[samples_filled : samples_filled + segment_samples] = fade
             samples_filled = samples_filled + segment_samples
         x *= 10 ** (gain_db / 20.0)
         return x

remfx/models.py CHANGED Viewed

@@ -55,12 +55,11 @@ class RemFXChainInference(pl.LightningModule):
             effects_order = order
         else:
             effects_order = self.effect_order
         # Use classifier labels
         if self.classifier:
             threshold = 0.5
             with torch.no_grad():
-                labels = torch.sigmoid(self.classifier(x))
                 rem_fx_labels = torch.where(labels > threshold, 1.0, 0.0)
         if self.use_all_effect_models:
             effects_present = [
@@ -253,17 +252,8 @@ class RemFX(pl.LightningModule):
                     prog_bar=True,
                     sync_dist=True,
                 )
-                # print(f"Input_{metric}", negate * self.metrics[metric](x, y))
-                # print(f"test_{metric}", negate * self.metrics[metric](output, y))
-                # self.output_str += f"{negate * self.metrics[metric](x, y).item():.4f},{negate * self.metrics[metric](output, y).item():.4f},"
-            # self.output_str += "\n"
         return loss
-    def on_test_end(self) -> None:
-        pass
-        # with open("output.csv", "w") as f:
-        # f.write(self.output_str)
 class OpenUnmixModel(nn.Module):
     def __init__(
@@ -418,7 +408,6 @@ def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
     else:
         lam = 1
-    print(lam)
     if np.random.rand() > 0.5:
         index = torch.randperm(batch_size).to(x.device)
         mixed_x = lam * x + (1 - lam) * x[index, :]
@@ -429,6 +418,7 @@ def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
     return mixed_x, mixed_y, lam
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,
@@ -533,4 +523,4 @@ class FXClassifier(pl.LightningModule):
             lr=self.lr,
             weight_decay=self.lr_weight_decay,
         )
-        return optimizer

             effects_order = order
         else:
             effects_order = self.effect_order
         # Use classifier labels
         if self.classifier:
             threshold = 0.5
             with torch.no_grad():
+                labels = torch.hstack(self.classifier(x))
                 rem_fx_labels = torch.where(labels > threshold, 1.0, 0.0)
         if self.use_all_effect_models:
             effects_present = [
                     prog_bar=True,
                     sync_dist=True,
                 )
         return loss
 class OpenUnmixModel(nn.Module):
     def __init__(
     else:
         lam = 1
     if np.random.rand() > 0.5:
         index = torch.randperm(batch_size).to(x.device)
         mixed_x = lam * x + (1 - lam) * x[index, :]
     return mixed_x, mixed_y, lam
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,
             lr=self.lr,
             weight_decay=self.lr_weight_decay,
         )
+        return optimizer

remfx/tcn.py CHANGED Viewed

@@ -91,7 +91,6 @@ class TCN(nn.Module):
         self.causal = causal
         self.estimate_loudness = estimate_loudness
-        print(f"Causal: {self.causal}")
         if self.causal:
             self.crop_fn = causal_crop
         else:

         self.causal = causal
         self.estimate_loudness = estimate_loudness
         if self.causal:
             self.crop_fn = causal_crop
         else:

scripts/test.py CHANGED Viewed

@@ -16,7 +16,8 @@ def main(cfg: DictConfig):
     datamodule = hydra.utils.instantiate(cfg.datamodule, _convert_="partial")
     log.info(f"Instantiating model <{cfg.model._target_}>.")
     model = hydra.utils.instantiate(cfg.model, _convert_="partial")
-    state_dict = torch.load(cfg.ckpt_path, map_location=torch.device("cpu"))[
         "state_dict"
     ]
     model.load_state_dict(state_dict)

     datamodule = hydra.utils.instantiate(cfg.datamodule, _convert_="partial")
     log.info(f"Instantiating model <{cfg.model._target_}>.")
     model = hydra.utils.instantiate(cfg.model, _convert_="partial")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    state_dict = torch.load(cfg.ckpt_path, map_location=device)[
         "state_dict"
     ]
     model.load_state_dict(state_dict)

setup.py CHANGED Viewed

@@ -44,6 +44,15 @@ setup(
         "pyloudnorm",
         "pedalboard",
         "asteroid",
     ],
     include_package_data=True,
     license="Apache License 2.0",

         "pyloudnorm",
         "pedalboard",
         "asteroid",
+        "librosa",
+        "speechbrain",
+        "torchcrepe",
+        "torchopenl3",
+        "tensorflow",
+        "transformers",
+        "torchmetrics>=1.0",
+        "wav2clip_hear @ git+https://github.com/hohsiangwu/wav2clip-hear.git",
+        "panns_hear @ git+https://github.com/qiuqiangkong/HEAR2021_Challenge_PANNs",
     ],
     include_package_data=True,
     license="Apache License 2.0",