[BugFix] Fix Humming MoE deploy error (#43100)

Signed-off-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local>
Signed-off-by: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com>
Co-authored-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local>
Co-authored-by: Jinzhen Lin <linjinzhen@hotmail.com>
This commit is contained in:
Alireza Dadgarnia
2026-06-02 20:02:50 +03:30
committed by GitHub
parent 3f3e2702c2
commit afcb580715
@@ -810,8 +810,8 @@ class HummingMoEMethod(FusedMoEMethodBase):
param = torch.nn.Parameter(tensor, requires_grad=False)
setattr(layer, name, param)
layer.weight_schemas[sublayer_name] = weight_schema
layer.input_schemas[sublayer_name] = input_schema
layer.weight_schemas[sublayer_name] = weight_schema
layer.input_schemas[sublayer_name] = input_schema
# force requant (origin quant setting -> fp16/bf16 -> new_quant setting)
assert isinstance(weight_schema, HummingWeightSchema)
@@ -865,6 +865,7 @@ class HummingMoEMethod(FusedMoEMethodBase):
# use moe modular
experts: HummingIndexedExperts | HummingGroupedExperts
layer.ensure_moe_quant_config_init()
assert self.moe_quant_config is not None
if get_humming_moe_gemm_type() == "indexed":
experts = HummingIndexedExperts(layer, self.moe, self.moe_quant_config)