From 968ae74341d465eb3c1ed9e123464417aa85edc1 Mon Sep 17 00:00:00 2001 From: Abtin Keshavarzian Date: Mon, 9 Feb 2026 11:38:18 -0800 Subject: [PATCH] [tests] add timeout to cert suite execution (#12379) Currently, the `run_cert()` function in `run_cert_suite.py` invokes test scripts using `subprocess.check_call()` without a timeout. This can cause the test suite to hang indefinitely if a test script fails to terminate. This commit updates `run_cert_suite.py` to support a configurable `--timeout` argument. It passes this timeout to `check_call()` and handles `subprocess.TimeoutExpired` to log failure and print output upon timeout. Additionally, this commit updates `script/test` to pass the `TEST_TIMEOUT` environment variable to the test runner and updates GitHub workflow configurations to define specific timeout values for various test jobs. --- .github/workflows/otbr.yml | 2 ++ .github/workflows/simulation-1.1.yml | 5 +++++ .github/workflows/simulation-1.4.yml | 2 ++ script/test | 2 +- tests/scripts/thread-cert/run_cert_suite.py | 17 ++++++++++++----- 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/.github/workflows/otbr.yml b/.github/workflows/otbr.yml index cd5923cfe..866d1b039 100644 --- a/.github/workflows/otbr.yml +++ b/.github/workflows/otbr.yml @@ -50,6 +50,7 @@ jobs: env: REFERENCE_DEVICE: 1 VIRTUAL_TIME: 0 + TEST_TIMEOUT: 1800 PACKET_VERIFICATION: 1 THREAD_VERSION: 1.4 INTER_OP: 1 @@ -165,6 +166,7 @@ jobs: env: REFERENCE_DEVICE: 1 VIRTUAL_TIME: 0 + TEST_TIMEOUT: 1800 PACKET_VERIFICATION: ${{ matrix.packet_verification }} THREAD_VERSION: 1.4 INTER_OP: 1 diff --git a/.github/workflows/simulation-1.1.yml b/.github/workflows/simulation-1.1.yml index 4a8c4359a..78e2e4c37 100644 --- a/.github/workflows/simulation-1.1.yml +++ b/.github/workflows/simulation-1.1.yml @@ -52,6 +52,7 @@ jobs: REFERENCE_DEVICE: 1 THREAD_VERSION: 1.1 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 MULTIPLY: 3 steps: @@ -107,6 +108,7 @@ jobs: REFERENCE_DEVICE: 1 THREAD_VERSION: 1.1 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 steps: - name: Harden Runner @@ -162,6 +164,7 @@ jobs: THREAD_VERSION: 1.1 USE_MTD: 1 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 MESSAGE_USE_HEAP: ${{ matrix.message_use_heap }} steps: @@ -212,6 +215,7 @@ jobs: REFERENCE_DEVICE: 1 THREAD_VERSION: 1.1 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 steps: - name: Harden Runner @@ -361,6 +365,7 @@ jobs: COVERAGE: 1 THREAD_VERSION: 1.1 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 CXXFLAGS: "-DOPENTHREAD_CONFIG_LOG_PREPEND_UPTIME=0" steps: diff --git a/.github/workflows/simulation-1.4.yml b/.github/workflows/simulation-1.4.yml index 0c53f87d3..e0972b5b4 100644 --- a/.github/workflows/simulation-1.4.yml +++ b/.github/workflows/simulation-1.4.yml @@ -55,6 +55,7 @@ jobs: COVERAGE: 1 THREAD_VERSION: 1.4 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 INTER_OP: 1 INTER_OP_BBR: 1 @@ -125,6 +126,7 @@ jobs: env: REFERENCE_DEVICE: 1 VIRTUAL_TIME: 1 + TEST_TIMEOUT: 300 OT_VT_USE_UNIX_SOCKET: 1 COVERAGE: 1 PACKET_VERIFICATION: 1 diff --git a/script/test b/script/test index c03969620..5c0e09335 100755 --- a/script/test +++ b/script/test @@ -317,7 +317,7 @@ do_cert_suite() sudo modprobe ip6table_filter mkdir -p ot_testing - ./tests/scripts/thread-cert/run_cert_suite.py --run-directory ot_testing --multiply "${MULTIPLY:-1}" "$@" + ./tests/scripts/thread-cert/run_cert_suite.py --run-directory ot_testing --multiply "${MULTIPLY:-1}" --timeout "${TEST_TIMEOUT:-0}" "$@" exit 0 } diff --git a/tests/scripts/thread-cert/run_cert_suite.py b/tests/scripts/thread-cert/run_cert_suite.py index 57546b7ec..e8ba94d13 100755 --- a/tests/scripts/thread-cert/run_cert_suite.py +++ b/tests/scripts/thread-cert/run_cert_suite.py @@ -57,7 +57,7 @@ def bash(cmd: str, check=True, stdout=None): subprocess.run(cmd, shell=True, check=check, stdout=stdout) -def run_cert(iteration_id: int, port_offset: int, script: str, run_directory: str): +def run_cert(iteration_id: int, port_offset: int, script: str, run_directory: str, timeout: int): if not os.access(script, os.X_OK): logging.warning('Skip test %s, not executable', script) return @@ -79,7 +79,12 @@ def run_cert(iteration_id: int, port_offset: int, script: str, run_directory: st stderr=output, stdin=subprocess.DEVNULL, cwd=run_directory, - env=env) + env=env, + timeout=None if timeout == 0 else timeout) + except subprocess.TimeoutExpired: + bash(f'cat {logfile} 1>&2') + logging.error("Run test %s timed out, please check the log file: %s", test_name, logfile) + raise except subprocess.CalledProcessError: bash(f'cat {logfile} 1>&2') logging.error("Run test %s failed, please check the log file: %s", test_name, logfile) @@ -115,6 +120,7 @@ def parse_args(): import argparse parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--multiply', type=int, default=1, help='run each test for multiple times') + parser.add_argument('--timeout', type=int, default=0, help='timeout in seconds per test, zero means no timeout') parser.add_argument('--run-directory', type=str, default=None, help='run each test in the specified directory') parser.add_argument("scripts", nargs='+', type=str, help='specify Backbone test scripts') @@ -122,6 +128,7 @@ def parse_args(): logging.info("Max jobs: %d", MAX_JOBS) logging.info("Run directory: %s", args.run_directory or '.') logging.info("Multiply: %d", args.multiply) + logging.info("Timeout: %d", args.timeout) logging.info("Test scripts: %d", len(args.scripts)) return args @@ -165,7 +172,7 @@ def print_summary(scripts: List[str], script_successes: Dict[str, List[int]], sc print(message) -def run_tests(scripts: List[str], multiply: int = 1, run_directory: str = None): +def run_tests(scripts: List[str], multiply: int = 1, run_directory: str = None, timeout: int = 0): scripts = list(set(scripts)) # Run each script for multiple times @@ -182,7 +189,7 @@ def run_tests(scripts: List[str], multiply: int = 1, run_directory: str = None): for script, i in script_ids: port_offset = port_offset_pool.allocate() - pool.apply_async(run_cert, [i, port_offset, script, run_directory], + pool.apply_async(run_cert, [i, port_offset, script, run_directory, timeout], callback=lambda ret, id=i, script=script, port_offset=port_offset: result_callback( id, script, script_successes, port_offset), error_callback=lambda ret, id=i, script=script, port_offset=port_offset: result_callback( @@ -206,7 +213,7 @@ def main(): setup_backbone_env() try: - fail_count = run_tests(args.scripts, args.multiply, args.run_directory) + fail_count = run_tests(args.scripts, args.multiply, args.run_directory, args.timeout) exit(fail_count) finally: if has_backbone_tests: