mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[fix] Fix Llama4 min-latency import error (#5209)
Signed-off-by: Yilin Fan <206948969+nv-yilinf@users.noreply.github.com>
This commit is contained in:
parent
c84e41fd9d
commit
7a5e0fd300
@ -4,6 +4,7 @@ from .fused_moe_trtllm_gen import TRTLLMGenFusedMoE
|
||||
from .fused_moe_vanilla import VanillaMoE
|
||||
from .interface import MoE, MoEWeightLoadingMode
|
||||
from .moe_load_balancer import MoeLoadBalancer
|
||||
from .quantization import FusedMoEQuantScalesFP8
|
||||
from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod,
|
||||
DefaultMoeRoutingMethod,
|
||||
Llama4RenormalizeMoeRoutingMethod,
|
||||
@ -12,12 +13,23 @@ from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod,
|
||||
SparseMixerMoeRoutingMethod, StaticMoeRoutingMethod)
|
||||
|
||||
__all__ = [
|
||||
"VanillaMoE", "CutlassFusedMoE", "TRTLLMGenFusedMoE",
|
||||
"BaseMoeRoutingMethod", "MoeLoadBalancer",
|
||||
"RenormalizeNaiveMoeRoutingMethod", "Llama4RenormalizeMoeRoutingMethod",
|
||||
"SparseMixerMoeRoutingMethod", "LoadBalancedMoeRoutingMethod",
|
||||
"StaticMoeRoutingMethod", "DefaultMoeRoutingMethod",
|
||||
"DeepSeekV3MoeRoutingMethod", "RoutingMethodType",
|
||||
"RenormalizeMoeRoutingMethod", "MoE", "MoEWeightLoadingMode", "get_moe_cls",
|
||||
"create_moe"
|
||||
"VanillaMoE",
|
||||
"CutlassFusedMoE",
|
||||
"TRTLLMGenFusedMoE",
|
||||
"BaseMoeRoutingMethod",
|
||||
"MoeLoadBalancer",
|
||||
"RenormalizeNaiveMoeRoutingMethod",
|
||||
"Llama4RenormalizeMoeRoutingMethod",
|
||||
"SparseMixerMoeRoutingMethod",
|
||||
"LoadBalancedMoeRoutingMethod",
|
||||
"StaticMoeRoutingMethod",
|
||||
"DefaultMoeRoutingMethod",
|
||||
"DeepSeekV3MoeRoutingMethod",
|
||||
"RoutingMethodType",
|
||||
"RenormalizeMoeRoutingMethod",
|
||||
"MoE",
|
||||
"MoEWeightLoadingMode",
|
||||
"get_moe_cls",
|
||||
"create_moe",
|
||||
"FusedMoEQuantScalesFP8",
|
||||
]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user