From d60d6ff6fd8a2c348204b59aa6b48c58ff764710 Mon Sep 17 00:00:00 2001 From: Yukun He <23156053+hyukn@users.noreply.github.com> Date: Wed, 21 Jan 2026 13:53:04 +0800 Subject: [PATCH] [None][fix] Cherry-pick #10715: Disable short profile for tunable ops with MERGE strategy (#10844) Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com> --- tensorrt_llm/_torch/autotuner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/_torch/autotuner.py b/tensorrt_llm/_torch/autotuner.py index c78b1a0319..f68d9c7f6c 100644 --- a/tensorrt_llm/_torch/autotuner.py +++ b/tensorrt_llm/_torch/autotuner.py @@ -1105,7 +1105,10 @@ class AutoTuner: disable_short_profile = os.environ.get( "TLLM_AUTOTUNER_DISABLE_SHORT_PROFILE", "0") == "1" - if fewer_repeat_avg_time > short_profile_threshold_ms and not disable_short_profile: + + # Disable this feature for merged tuning strategy to avoid potential hang due to asymmetric tuning. + if fewer_repeat_avg_time > short_profile_threshold_ms and not disable_short_profile \ + and tuning_config.distributed_tuning_strategy != DistributedTuningStrategy.MERGE: # directly use the few repeat estimated time to avoid redundant profiling avg_time = fewer_repeat_avg_time else: