From e9817461badb625aee8f16ed6471474db8891b32 Mon Sep 17 00:00:00 2001
From: xxi <95731198+xxi-nv@users.noreply.github.com>
Date: Wed, 14 Jan 2026 18:33:45 +0800
Subject: [PATCH] =?UTF-8?q?[None][chore]=20improve=20the=20readability=20o?=
 =?UTF-8?q?f=20log=20for=20cutlass=20can=20only=20sup=E2=80=A6=20(#10630)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: xxi <xxi@nvidia.com>
---
 .../_torch/custom_ops/torch_custom_ops.py     | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
index 06e93eb3e5..9410efc067 100644
--- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
+++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
@@ -252,14 +252,25 @@ def fused_moe(
     )
 
     run_moe = moe_runner.fused_moe_runner.run_moe_min_latency if min_latency_mode else moe_runner.fused_moe_runner.run_moe
-    output = run_moe(input, token_selected_experts, token_final_scales,
-                     fc1_expert_weights, fc1_expert_biases, fc2_expert_weights,
-                     fc2_expert_biases, quant_scales, input_sf,
-                     swizzled_input_sf, swiglu_alpha, swiglu_beta, swiglu_limit,
-                     tp_size, tp_rank, ep_size, ep_rank, cluster_size,
-                     cluster_rank, enable_alltoall, min_latency_mode,
-                     [gemm_tactic_1, gemm_tactic_2], activation_type,
-                     unpadded_hidden_size, tuner_num_tokens, out_tensor)
+    try:
+        output = run_moe(input, token_selected_experts, token_final_scales,
+                         fc1_expert_weights, fc1_expert_biases,
+                         fc2_expert_weights, fc2_expert_biases, quant_scales,
+                         input_sf, swizzled_input_sf, swiglu_alpha, swiglu_beta,
+                         swiglu_limit, tp_size, tp_rank, ep_size, ep_rank,
+                         cluster_size, cluster_rank, enable_alltoall,
+                         min_latency_mode, [gemm_tactic_1, gemm_tactic_2],
+                         activation_type, unpadded_hidden_size,
+                         tuner_num_tokens, out_tensor)
+    except RuntimeError as e:
+        error_msg = str(e)
+        if "DeepGEMM only supports Hopper" in error_msg:
+            raise RuntimeError(
+                f"{error_msg}"
+                "Note: This is the Cutlass backend with DeepGemm JIT path. "
+                "For Blackwell (SM100+) support, please use the DEEPGEMM backend instead."
+            ) from e
+        raise
 
     return output if min_latency_mode else [output]