mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[TRTLLM-10415][feat] Dump thread stacks for hanging tests before time… (#10708)
Signed-off-by: Fred Wei <20514172+WeiHaocheng@users.noreply.github.com>
This commit is contained in:
parent
c7a86f89de
commit
80dd6e70c6
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
# # Force resource release after test
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import traceback
|
||||
import warnings
|
||||
@ -33,14 +34,23 @@ from mpi4py.futures import MPIPoolExecutor
|
||||
from utils.cpp_paths import llm_root # noqa: F401
|
||||
from utils.util import get_current_process_gpu_memory
|
||||
|
||||
from tensorrt_llm._utils import print_all_stacks
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from integration.defs import test_list_parser
|
||||
|
||||
|
||||
def dump_threads(signum, frame):
|
||||
print_all_stacks()
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
# avoid thread leak of tqdm's TMonitor
|
||||
tqdm.tqdm.monitor_interval = 0
|
||||
|
||||
# Dump all threads' stacks when SIGALRM is received
|
||||
signal.signal(signal.SIGALRM, dump_threads)
|
||||
|
||||
|
||||
@pytest.hookimpl(wrapper=True)
|
||||
def pytest_runtest_protocol(item, nextitem):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user