[BugFix] Fix Humming MoE deploy error (#43100)

Signed-off-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local> Signed-off-by: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com> Co-authored-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local> Co-authored-by: Jinzhen Lin <linjinzhen@hotmail.com>
2026-06-06 00:16:14 +00:00 · 2026-06-02 20:02:50 +03:30
parent 3f3e2702c2
commit afcb580715
1 changed files with 3 additions and 2 deletions
@@ -810,8 +810,8 @@ class HummingMoEMethod(FusedMoEMethodBase):
                    param = torch.nn.Parameter(tensor, requires_grad=False)
                    setattr(layer, name, param)

-                layer.weight_schemas[sublayer_name] = weight_schema
-                layer.input_schemas[sublayer_name] = input_schema
+            layer.weight_schemas[sublayer_name] = weight_schema
+            layer.input_schemas[sublayer_name] = input_schema

            # force requant (origin quant setting -> fp16/bf16 -> new_quant setting)
            assert isinstance(weight_schema, HummingWeightSchema)
@@ -865,6 +865,7 @@ class HummingMoEMethod(FusedMoEMethodBase):

        # use moe modular
        experts: HummingIndexedExperts | HummingGroupedExperts
+        layer.ensure_moe_quant_config_init()
        assert self.moe_quant_config is not None
        if get_humming_moe_gemm_type() == "indexed":
            experts = HummingIndexedExperts(layer, self.moe, self.moe_quant_config)