[TRTLLM-10415][feat] Dump thread stacks for hanging tests before time… (#10708)

Signed-off-by: Fred Wei <20514172+WeiHaocheng@users.noreply.github.com>
2026-02-04 18:21:52 +08:00 · 2026-01-29 20:43:34 +08:00 · 2026-01-29 20:43:34 +08:00 · 80dd6e70c6
commit 80dd6e70c6
parent c7a86f89de
1 changed files with 10 additions and 0 deletions
--- a/tests/unittest/conftest.py
+++ b/tests/unittest/conftest.py
@ -14,6 +14,7 @@
 # limitations under the License.
 # # Force resource release after test
 import os
+import signal
 import sys
 import traceback
 import warnings
@ -33,14 +34,23 @@ from mpi4py.futures import MPIPoolExecutor
 from utils.cpp_paths import llm_root  # noqa: F401
 from utils.util import get_current_process_gpu_memory

+from tensorrt_llm._utils import print_all_stacks
+
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from integration.defs import test_list_parser


+def dump_threads(signum, frame):
+    print_all_stacks()
+
+
 def pytest_configure(config):
    # avoid thread leak of tqdm's TMonitor
    tqdm.tqdm.monitor_interval = 0

+    # Dump all threads' stacks when SIGALRM is received
+    signal.signal(signal.SIGALRM, dump_threads)
+

@pytest.hookimpl(wrapper=True)
 def pytest_runtest_protocol(item, nextitem):