Motif-Technologies
/

optimizer

Model card Files Files and versions

ca1207 commited on Sep 24

Commit

35894d1

·

1 Parent(s): 6e9baad

misc

Files changed (2) hide show

test/test_muon/test.py +1 -1
torch-ext/optimizer/muon.py +4 -4

test/test_muon/test.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import torch
 import torch.distributed as dist
-from muon import Muon, get_default_muon_param_groups
 from torch.distributed.fsdp import FSDPModule, fully_shard
 from torch.distributed.tensor import DTensor
 from torch.distributed.tensor.placement_types import Replicate

 import torch
 import torch.distributed as dist
+from optimizer.muon import Muon, get_default_muon_param_groups
 from torch.distributed.fsdp import FSDPModule, fully_shard
 from torch.distributed.tensor import DTensor
 from torch.distributed.tensor.placement_types import Replicate

torch-ext/optimizer/muon.py CHANGED Viewed

@@ -701,10 +701,10 @@ class Muon(torch.optim.Optimizer):
                 new_scale = math.sqrt(threshold / v_ele)
                 if new_scale < scales_full[head_idx]:
                     scales_full[head_idx] = new_scale
-                    #logger.info(
-                    #    f"[{kind}] Head {head_idx} exceeded threshold "
-                    #    f"(value={v_ele:.4f}, threshold={threshold:.4f}) -> applying scale={new_scale:.4f}"
-                    #)
                     scaling += 1
         return scales_full if scaling > 0 else None

                 new_scale = math.sqrt(threshold / v_ele)
                 if new_scale < scales_full[head_idx]:
                     scales_full[head_idx] = new_scale
+                    logger.info(
+                        f"[{kind}] Head {head_idx} exceeded threshold "
+                        f"(value={v_ele:.4f}, threshold={threshold:.4f}) -> applying scale={new_scale:.4f}"
+                    )
                     scaling += 1
         return scales_full if scaling > 0 else None