mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[BugFix] Fix Humming MoE deploy error (#43100)
Signed-off-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local> Signed-off-by: Alireza Dadgarnia <49554709+adotdad@users.noreply.github.com> Co-authored-by: Alireza Dadgarnia <dadgarnia@Alirezas-MacBook-Pro-2.local> Co-authored-by: Jinzhen Lin <linjinzhen@hotmail.com>
This commit is contained in:
committed by
GitHub
parent
3f3e2702c2
commit
afcb580715
@@ -810,8 +810,8 @@ class HummingMoEMethod(FusedMoEMethodBase):
|
||||
param = torch.nn.Parameter(tensor, requires_grad=False)
|
||||
setattr(layer, name, param)
|
||||
|
||||
layer.weight_schemas[sublayer_name] = weight_schema
|
||||
layer.input_schemas[sublayer_name] = input_schema
|
||||
layer.weight_schemas[sublayer_name] = weight_schema
|
||||
layer.input_schemas[sublayer_name] = input_schema
|
||||
|
||||
# force requant (origin quant setting -> fp16/bf16 -> new_quant setting)
|
||||
assert isinstance(weight_schema, HummingWeightSchema)
|
||||
@@ -865,6 +865,7 @@ class HummingMoEMethod(FusedMoEMethodBase):
|
||||
|
||||
# use moe modular
|
||||
experts: HummingIndexedExperts | HummingGroupedExperts
|
||||
layer.ensure_moe_quant_config_init()
|
||||
assert self.moe_quant_config is not None
|
||||
if get_humming_moe_gemm_type() == "indexed":
|
||||
experts = HummingIndexedExperts(layer, self.moe, self.moe_quant_config)
|
||||
|
||||
Reference in New Issue
Block a user