Update modeling_glm4_moe.py
Browse files- modeling_glm4_moe.py +1 -1
modeling_glm4_moe.py
CHANGED
|
@@ -356,7 +356,7 @@ class Glm4MoeDecoderLayer(GradientCheckpointingLayer):
|
|
| 356 |
route_scale=config.routed_scaling_factor,
|
| 357 |
score_before_experts=False,
|
| 358 |
top_k=config.num_experts_per_tok,
|
| 359 |
-
use_grouped_mm=torch.cuda.get_device_capability(0)[0] >= 9
|
| 360 |
load_balance_coeff=1e-3,
|
| 361 |
)
|
| 362 |
|
|
|
|
| 356 |
route_scale=config.routed_scaling_factor,
|
| 357 |
score_before_experts=False,
|
| 358 |
top_k=config.num_experts_per_tok,
|
| 359 |
+
use_grouped_mm=torch.cuda.get_device_capability(0)[0] >= 9,
|
| 360 |
load_balance_coeff=1e-3,
|
| 361 |
)
|
| 362 |
|