From 80dd6e70c68974ebb06dc48e21d7d2eb1dc41d9e Mon Sep 17 00:00:00 2001 From: WeiHaocheng <20514172+WeiHaocheng@users.noreply.github.com> Date: Thu, 29 Jan 2026 20:43:34 +0800 Subject: [PATCH] =?UTF-8?q?[TRTLLM-10415][feat]=20Dump=20thread=20stacks?= =?UTF-8?q?=20for=20hanging=20tests=20before=20time=E2=80=A6=20(#10708)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fred Wei <20514172+WeiHaocheng@users.noreply.github.com> --- tests/unittest/conftest.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/unittest/conftest.py b/tests/unittest/conftest.py index 2c75d30023..80343bcd36 100644 --- a/tests/unittest/conftest.py +++ b/tests/unittest/conftest.py @@ -14,6 +14,7 @@ # limitations under the License. # # Force resource release after test import os +import signal import sys import traceback import warnings @@ -33,14 +34,23 @@ from mpi4py.futures import MPIPoolExecutor from utils.cpp_paths import llm_root # noqa: F401 from utils.util import get_current_process_gpu_memory +from tensorrt_llm._utils import print_all_stacks + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from integration.defs import test_list_parser +def dump_threads(signum, frame): + print_all_stacks() + + def pytest_configure(config): # avoid thread leak of tqdm's TMonitor tqdm.tqdm.monitor_interval = 0 + # Dump all threads' stacks when SIGALRM is received + signal.signal(signal.SIGALRM, dump_threads) + @pytest.hookimpl(wrapper=True) def pytest_runtest_protocol(item, nextitem):