From afcb5807159fdc0de6866a8a6c63c1d7597aa73a Mon Sep 17 00:00:00 2001
From: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com>
Date: Tue, 2 Jun 2026 20:02:50 +0330
Subject: [PATCH] [BugFix] Fix Humming MoE deploy error (#43100)

Signed-off-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local>
Signed-off-by: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com>
Co-authored-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local>
Co-authored-by: Jinzhen Lin <linjinzhen@hotmail.com>
---
 vllm/model_executor/layers/quantization/humming.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/humming.py b/vllm/model_executor/layers/quantization/humming.py
index 12bb07a4022..e4d27efe370 100644
--- a/vllm/model_executor/layers/quantization/humming.py
+++ b/vllm/model_executor/layers/quantization/humming.py
@@ -810,8 +810,8 @@ class HummingMoEMethod(FusedMoEMethodBase):
                     param = torch.nn.Parameter(tensor, requires_grad=False)
                     setattr(layer, name, param)
 
-                layer.weight_schemas[sublayer_name] = weight_schema
-                layer.input_schemas[sublayer_name] = input_schema
+            layer.weight_schemas[sublayer_name] = weight_schema
+            layer.input_schemas[sublayer_name] = input_schema
 
             # force requant (origin quant setting -> fp16/bf16 -> new_quant setting)
             assert isinstance(weight_schema, HummingWeightSchema)
@@ -865,6 +865,7 @@ class HummingMoEMethod(FusedMoEMethodBase):
 
         # use moe modular
         experts: HummingIndexedExperts | HummingGroupedExperts
+        layer.ensure_moe_quant_config_init()
         assert self.moe_quant_config is not None
         if get_humming_moe_gemm_type() == "indexed":
             experts = HummingIndexedExperts(layer, self.moe, self.moe_quant_config)