From e9817461badb625aee8f16ed6471474db8891b32 Mon Sep 17 00:00:00 2001 From: xxi <95731198+xxi-nv@users.noreply.github.com> Date: Wed, 14 Jan 2026 18:33:45 +0800 Subject: [PATCH] =?UTF-8?q?[None][chore]=20improve=20the=20readability=20o?= =?UTF-8?q?f=20log=20for=20cutlass=20can=20only=20sup=E2=80=A6=20(#10630)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xxi --- .../_torch/custom_ops/torch_custom_ops.py | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py index 06e93eb3e5..9410efc067 100644 --- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py +++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py @@ -252,14 +252,25 @@ def fused_moe( ) run_moe = moe_runner.fused_moe_runner.run_moe_min_latency if min_latency_mode else moe_runner.fused_moe_runner.run_moe - output = run_moe(input, token_selected_experts, token_final_scales, - fc1_expert_weights, fc1_expert_biases, fc2_expert_weights, - fc2_expert_biases, quant_scales, input_sf, - swizzled_input_sf, swiglu_alpha, swiglu_beta, swiglu_limit, - tp_size, tp_rank, ep_size, ep_rank, cluster_size, - cluster_rank, enable_alltoall, min_latency_mode, - [gemm_tactic_1, gemm_tactic_2], activation_type, - unpadded_hidden_size, tuner_num_tokens, out_tensor) + try: + output = run_moe(input, token_selected_experts, token_final_scales, + fc1_expert_weights, fc1_expert_biases, + fc2_expert_weights, fc2_expert_biases, quant_scales, + input_sf, swizzled_input_sf, swiglu_alpha, swiglu_beta, + swiglu_limit, tp_size, tp_rank, ep_size, ep_rank, + cluster_size, cluster_rank, enable_alltoall, + min_latency_mode, [gemm_tactic_1, gemm_tactic_2], + activation_type, unpadded_hidden_size, + tuner_num_tokens, out_tensor) + except RuntimeError as e: + error_msg = str(e) + if "DeepGEMM only supports Hopper" in error_msg: + raise RuntimeError( + f"{error_msg}" + "Note: This is the Cutlass backend with DeepGemm JIT path. " + "For Blackwell (SM100+) support, please use the DEEPGEMM backend instead." + ) from e + raise return output if min_latency_mode else [output]