From afcb5807159fdc0de6866a8a6c63c1d7597aa73a Mon Sep 17 00:00:00 2001 From: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com> Date: Tue, 2 Jun 2026 20:02:50 +0330 Subject: [PATCH] [BugFix] Fix Humming MoE deploy error (#43100) Signed-off-by: Alireza Dadgarnia Signed-off-by: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com> Co-authored-by: Alireza Dadgarnia Co-authored-by: Jinzhen Lin --- vllm/model_executor/layers/quantization/humming.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/humming.py b/vllm/model_executor/layers/quantization/humming.py index 12bb07a4022..e4d27efe370 100644 --- a/vllm/model_executor/layers/quantization/humming.py +++ b/vllm/model_executor/layers/quantization/humming.py @@ -810,8 +810,8 @@ class HummingMoEMethod(FusedMoEMethodBase): param = torch.nn.Parameter(tensor, requires_grad=False) setattr(layer, name, param) - layer.weight_schemas[sublayer_name] = weight_schema - layer.input_schemas[sublayer_name] = input_schema + layer.weight_schemas[sublayer_name] = weight_schema + layer.input_schemas[sublayer_name] = input_schema # force requant (origin quant setting -> fp16/bf16 -> new_quant setting) assert isinstance(weight_schema, HummingWeightSchema) @@ -865,6 +865,7 @@ class HummingMoEMethod(FusedMoEMethodBase): # use moe modular experts: HummingIndexedExperts | HummingGroupedExperts + layer.ensure_moe_quant_config_init() assert self.moe_quant_config is not None if get_humming_moe_gemm_type() == "indexed": experts = HummingIndexedExperts(layer, self.moe, self.moe_quant_config)