[fix] Fix Llama4 min-latency import error (#5209)

Signed-off-by: Yilin Fan <206948969+nv-yilinf@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-06-15 19:03:07 -07:00 · 2025-06-15 19:03:07 -07:00 · 7a5e0fd300
commit 7a5e0fd300
parent c84e41fd9d
1 changed files with 20 additions and 8 deletions
--- a/tensorrt_llm/_torch/modules/fused_moe/init.py
+++ b/tensorrt_llm/_torch/modules/fused_moe/init.py
@ -4,6 +4,7 @@ from .fused_moe_trtllm_gen import TRTLLMGenFusedMoE
 from .fused_moe_vanilla import VanillaMoE
 from .interface import MoE, MoEWeightLoadingMode
 from .moe_load_balancer import MoeLoadBalancer
+from .quantization import FusedMoEQuantScalesFP8
 from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod,
                      DefaultMoeRoutingMethod,
                      Llama4RenormalizeMoeRoutingMethod,
@ -12,12 +13,23 @@ from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod,
                      SparseMixerMoeRoutingMethod, StaticMoeRoutingMethod)

 __all__ = [
-    "VanillaMoE", "CutlassFusedMoE", "TRTLLMGenFusedMoE",
-    "BaseMoeRoutingMethod", "MoeLoadBalancer",
-    "RenormalizeNaiveMoeRoutingMethod", "Llama4RenormalizeMoeRoutingMethod",
-    "SparseMixerMoeRoutingMethod", "LoadBalancedMoeRoutingMethod",
-    "StaticMoeRoutingMethod", "DefaultMoeRoutingMethod",
-    "DeepSeekV3MoeRoutingMethod", "RoutingMethodType",
-    "RenormalizeMoeRoutingMethod", "MoE", "MoEWeightLoadingMode", "get_moe_cls",
-    "create_moe"
+    "VanillaMoE",
+    "CutlassFusedMoE",
+    "TRTLLMGenFusedMoE",
+    "BaseMoeRoutingMethod",
+    "MoeLoadBalancer",
+    "RenormalizeNaiveMoeRoutingMethod",
+    "Llama4RenormalizeMoeRoutingMethod",
+    "SparseMixerMoeRoutingMethod",
+    "LoadBalancedMoeRoutingMethod",
+    "StaticMoeRoutingMethod",
+    "DefaultMoeRoutingMethod",
+    "DeepSeekV3MoeRoutingMethod",
+    "RoutingMethodType",
+    "RenormalizeMoeRoutingMethod",
+    "MoE",
+    "MoEWeightLoadingMode",
+    "get_moe_cls",
+    "create_moe",
+    "FusedMoEQuantScalesFP8",
 ]