saurabhati
/

VMamba_ImageNet_82.6

@@ -36,7 +36,6 @@ import warnings
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils.checkpoint as checkpoint
-from timm.models.layers import DropPath, trunc_normal_
 from functools import partial
 from typing import Optional, Callable, Any, Union
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss
@@ -744,6 +743,39 @@ def selective_scan_fn(
 ############## HuggingFace modeling file #################
 ##########################################################
 class VMambaLinear2d(nn.Linear):
     def __init__(self, *args, groups=1, **kwargs):
         nn.Linear.__init__(self, *args, **kwargs)
@@ -1118,7 +1150,7 @@ class VMambaPreTrainedModel(PreTrainedModel):
     def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None:
         """Initialize the weights"""
         if isinstance(module, nn.Linear):
-            trunc_normal_(module.weight, std=0.02)
             if isinstance(module, nn.Linear) and module.bias is not None:
                 nn.init.constant_(module.bias, 0)
         elif isinstance(module, nn.LayerNorm):

 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils.checkpoint as checkpoint
 from functools import partial
 from typing import Optional, Callable, Any, Union
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss
 ############## HuggingFace modeling file #################
 ##########################################################
+# DropPath from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py
+def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+    if keep_prob > 0.0 and scale_by_keep:
+        random_tensor.div_(keep_prob)
+    return x * random_tensor
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
+    def extra_repr(self):
+        return f'drop_prob={round(self.drop_prob,3):0.3f}'
 class VMambaLinear2d(nn.Linear):
     def __init__(self, *args, groups=1, **kwargs):
         nn.Linear.__init__(self, *args, **kwargs)
     def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None:
         """Initialize the weights"""
         if isinstance(module, nn.Linear):
+            nn.init.trunc_normal_(module.weight, std=0.02)
             if isinstance(module, nn.Linear) and module.bias is not None:
                 nn.init.constant_(module.bias, 0)
         elif isinstance(module, nn.LayerNorm):