Spaces:

Flux9665
/

MassivelyMultilingualTTS

Running on T4

App Files Files

Flux9665 commited on Sep 22, 2024

Commit

07b3559

verified ·

1 Parent(s): b66b459

Update Modules/Vocoder/HiFiGAN_Generator.py

Browse files

Files changed (1) hide show

Modules/Vocoder/HiFiGAN_Generator.py +6 -12

Modules/Vocoder/HiFiGAN_Generator.py CHANGED Viewed

@@ -15,10 +15,10 @@ class HiFiGAN(torch.nn.Module):
     def __init__(self,
                  in_channels=128,
                  out_channels=1,
-                 channels=512,
                  kernel_size=7,
-                 upsample_scales=(8, 6, 4, 2),  # CAREFUL: Avocodo assumes that there are always 4 upsample scales, because it takes intermediate results.
-                 upsample_kernel_sizes=(16, 12, 8, 4),
                  resblock_kernel_sizes=(3, 7, 11),
                  resblock_dilations=((1, 3, 5), (1, 3, 5), (1, 3, 5)),
                  use_additional_convs=True,
@@ -87,9 +87,6 @@ class HiFiGAN(torch.nn.Module):
                             1,
                             padding=(kernel_size - 1) // 2, ), torch.nn.Tanh(), )
-        self.out_proj_x1 = torch.nn.Conv1d(channels // 4, 1, 7, 1, padding=3)
-        self.out_proj_x2 = torch.nn.Conv1d(channels // 8, 1, 7, 1, padding=3)
         # apply weight norm
         self.apply_weight_norm()
@@ -118,13 +115,9 @@ class HiFiGAN(torch.nn.Module):
             for j in range(self.num_blocks):
                 cs += self.blocks[i * self.num_blocks + j](c)
             c = cs / self.num_blocks
-            if i == 1:
-                x1 = self.out_proj_x1(c)
-            elif i == 2:
-                x2 = self.out_proj_x2(c)
         c = self.output_conv(c)
-        return c, x2, x1
     def reset_parameters(self):
         """
@@ -185,4 +178,5 @@ class HiFiGAN(torch.nn.Module):
 if __name__ == "__main__":
     hifi = HiFiGAN()
-    print(f"HiFiGAN parameter count: {sum(p.numel() for p in hifi.parameters() if p.requires_grad)}")

     def __init__(self,
                  in_channels=128,
                  out_channels=1,
+                 channels=768,
                  kernel_size=7,
+                 upsample_scales=(8, 6, 2, 2, 2),  # CAREFUL: Avocodo assumes that there are always 4 upsample scales, because it takes intermediate results.
+                 upsample_kernel_sizes=(16, 12, 4, 4, 4),
                  resblock_kernel_sizes=(3, 7, 11),
                  resblock_dilations=((1, 3, 5), (1, 3, 5), (1, 3, 5)),
                  use_additional_convs=True,
                             1,
                             padding=(kernel_size - 1) // 2, ), torch.nn.Tanh(), )
         # apply weight norm
         self.apply_weight_norm()
             for j in range(self.num_blocks):
                 cs += self.blocks[i * self.num_blocks + j](c)
             c = cs / self.num_blocks
         c = self.output_conv(c)
+        return c
     def reset_parameters(self):
         """
 if __name__ == "__main__":
     hifi = HiFiGAN()
+    print(f"HiFiGAN parameter count: {sum(p.numel() for p in hifi.parameters() if p.requires_grad)}")
+    print(hifi(torch.randn([1, 128, 100]))[0].shape)