From 7a5e0fd300b9d5d675de660752ef5f54a125b4b3 Mon Sep 17 00:00:00 2001 From: Yilin Fan <206948969+nv-yilinf@users.noreply.github.com> Date: Sun, 15 Jun 2025 19:03:07 -0700 Subject: [PATCH] [fix] Fix Llama4 min-latency import error (#5209) Signed-off-by: Yilin Fan <206948969+nv-yilinf@users.noreply.github.com> --- .../_torch/modules/fused_moe/__init__.py | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tensorrt_llm/_torch/modules/fused_moe/__init__.py b/tensorrt_llm/_torch/modules/fused_moe/__init__.py index 2f741d4799..c1c699b556 100644 --- a/tensorrt_llm/_torch/modules/fused_moe/__init__.py +++ b/tensorrt_llm/_torch/modules/fused_moe/__init__.py @@ -4,6 +4,7 @@ from .fused_moe_trtllm_gen import TRTLLMGenFusedMoE from .fused_moe_vanilla import VanillaMoE from .interface import MoE, MoEWeightLoadingMode from .moe_load_balancer import MoeLoadBalancer +from .quantization import FusedMoEQuantScalesFP8 from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod, DefaultMoeRoutingMethod, Llama4RenormalizeMoeRoutingMethod, @@ -12,12 +13,23 @@ from .routing import (BaseMoeRoutingMethod, DeepSeekV3MoeRoutingMethod, SparseMixerMoeRoutingMethod, StaticMoeRoutingMethod) __all__ = [ - "VanillaMoE", "CutlassFusedMoE", "TRTLLMGenFusedMoE", - "BaseMoeRoutingMethod", "MoeLoadBalancer", - "RenormalizeNaiveMoeRoutingMethod", "Llama4RenormalizeMoeRoutingMethod", - "SparseMixerMoeRoutingMethod", "LoadBalancedMoeRoutingMethod", - "StaticMoeRoutingMethod", "DefaultMoeRoutingMethod", - "DeepSeekV3MoeRoutingMethod", "RoutingMethodType", - "RenormalizeMoeRoutingMethod", "MoE", "MoEWeightLoadingMode", "get_moe_cls", - "create_moe" + "VanillaMoE", + "CutlassFusedMoE", + "TRTLLMGenFusedMoE", + "BaseMoeRoutingMethod", + "MoeLoadBalancer", + "RenormalizeNaiveMoeRoutingMethod", + "Llama4RenormalizeMoeRoutingMethod", + "SparseMixerMoeRoutingMethod", + "LoadBalancedMoeRoutingMethod", + "StaticMoeRoutingMethod", + "DefaultMoeRoutingMethod", + "DeepSeekV3MoeRoutingMethod", + "RoutingMethodType", + "RenormalizeMoeRoutingMethod", + "MoE", + "MoEWeightLoadingMode", + "get_moe_cls", + "create_moe", + "FusedMoEQuantScalesFP8", ]