[fix] Fix Llama4 min-latency import error (#5209)

Signed-off-by: Yilin Fan <206948969+nv-yilinf@users.noreply.github.com>
This commit is contained in:
Yilin Fan 2025-06-15 19:03:07 -07:00 committed by GitHub
parent c84e41fd9d
commit 7a5e0fd300
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,7 @@ from .fused_moe_trtllm_gen import TRTLLMGenFusedMoE
from .fused_moe_vanilla import VanillaMoE
from .interface import MoE, MoEWeightLoadingMode
from .moe_load_balancer import MoeLoadBalancer
from .quantization import FusedMoEQuantScalesFP8
from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod,
DefaultMoeRoutingMethod,
Llama4RenormalizeMoeRoutingMethod,
@ -12,12 +13,23 @@ from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod,
SparseMixerMoeRoutingMethod, StaticMoeRoutingMethod)
__all__ = [
"VanillaMoE", "CutlassFusedMoE", "TRTLLMGenFusedMoE",
"BaseMoeRoutingMethod", "MoeLoadBalancer",
"RenormalizeNaiveMoeRoutingMethod", "Llama4RenormalizeMoeRoutingMethod",
"SparseMixerMoeRoutingMethod", "LoadBalancedMoeRoutingMethod",
"StaticMoeRoutingMethod", "DefaultMoeRoutingMethod",
"DeepSeekV3MoeRoutingMethod", "RoutingMethodType",
"RenormalizeMoeRoutingMethod", "MoE", "MoEWeightLoadingMode", "get_moe_cls",
"create_moe"
"VanillaMoE",
"CutlassFusedMoE",
"TRTLLMGenFusedMoE",
"BaseMoeRoutingMethod",
"MoeLoadBalancer",
"RenormalizeNaiveMoeRoutingMethod",
"Llama4RenormalizeMoeRoutingMethod",
"SparseMixerMoeRoutingMethod",
"LoadBalancedMoeRoutingMethod",
"StaticMoeRoutingMethod",
"DefaultMoeRoutingMethod",
"DeepSeekV3MoeRoutingMethod",
"RoutingMethodType",
"RenormalizeMoeRoutingMethod",
"MoE",
"MoEWeightLoadingMode",
"get_moe_cls",
"create_moe",
"FusedMoEQuantScalesFP8",
]