[TRTLLM-5453][infra] Check all steps for test name and also check the test in waives.txt also exists in l0 or qa test list. (#6256)

Signed-off-by: qqiao <qqiao@nvidia.com> Signed-off-by: Emma Qiao <qqiao@nvidia.com>
2026-01-13 22:18:36 +08:00 · 2025-10-30 16:56:04 +08:00 · 2025-10-30 16:56:04 +08:00 · a5cc9fe0aa
commit a5cc9fe0aa
parent 13cfd70f57
4 changed files with 116 additions and 34 deletions
--- a/scripts/check_test_list.py
+++ b/scripts/check_test_list.py
@ -82,7 +82,7 @@ def verify_l0_test_lists(llm_src):

    subprocess.run(
        f"cd {llm_src}/tests/integration/defs && "
-        f"pytest --apply-test-list-correction --test-list={test_list} --co -q",
+        f"pytest --test-list={test_list} --output-dir={llm_src} -s --co -q",
        shell=True,
        check=True)

@ -96,19 +96,33 @@ def verify_qa_test_lists(llm_src):
    for test_def_file in test_def_files:
        subprocess.run(
            f"cd {llm_src}/tests/integration/defs && "
-            f"pytest --apply-test-list-correction --test-list={test_def_file} --co -q",
+            f"pytest --test-list={test_def_file} --output-dir={llm_src} -s --co -q",
            shell=True,
            check=True)
+        # append all the test_def_file to qa_test.txt
+        with open(f"{llm_src}/qa_test.txt", "a") as f:
+            with open(test_def_file, "r") as test_file:
+                lines = test_file.readlines()
+                for line in lines:
+                    # Remove 'TIMEOUT' marker and strip spaces
+                    cleaned_line = line.split(" TIMEOUT ", 1)[0].strip()
+                    if cleaned_line:
+                        f.write(f"{cleaned_line}\n")


-def verify_waive_list(llm_src):
+def verify_waive_list(llm_src, args):
    waives_list_path = f"{llm_src}/tests/integration/test_lists/waives.txt"
+    dup_cases_record = f"{llm_src}/dup_cases.txt"
+    non_existent_cases_record = f"{llm_src}/nonexits_cases.json"
    # Remove prefix and markers in wavies.txt
+    dedup_lines = {
+    }  # Track all occurrences: processed_line -> [(line_no, original_line), ...]
    processed_lines = set()
    with open(waives_list_path, "r") as f:
        lines = f.readlines()

-    for line in lines:
+    for line_no, line in enumerate(lines, 1):
+        original_line = line.strip()
        line = line.strip()

        if not line:
@ -119,14 +133,59 @@ def verify_waive_list(llm_src):
            continue

        # Check for SKIP marker in waives.txt and split by the first occurrence
-        line = line.split(" SKIP ", 1)[0].strip()
+        line = line.split(" SKIP", 1)[0].strip()
+
+        # Track all occurrences of each processed line
+        if line in dedup_lines:
+            dedup_lines[line].append((line_no, original_line))
+        else:
+            dedup_lines[line] = [(line_no, original_line)]

        # If the line starts with 'full:', process it
        if line.startswith("full:"):
            line = line.split("/", 1)[1].lstrip("/")

+        # Skip unittests due to we don't need to have an entry in test-db yml
+        if line.startswith("unittest/"):
+            continue
+
+        # Check waived cases also in l0_text.txt and qa_text.txt
+        found_in_l0_qa = False
+        if args.l0:
+            with open(f"{llm_src}/l0_test.txt", "r") as f:
+                l0_lines = f.readlines()
+                for l0_line in l0_lines:
+                    if line == l0_line.strip():
+                        found_in_l0_qa = True
+                        break
+        if args.qa:
+            with open(f"{llm_src}/qa_test.txt", "r") as f:
+                qa_lines = f.readlines()
+                for qa_line in qa_lines:
+                    if line == qa_line.strip():
+                        found_in_l0_qa = True
+                        break
+        if not found_in_l0_qa:
+            with open(non_existent_cases_record, "a") as f:
+                f.write(
+                    f"Non-existent test name in l0 or qa list found in waives.txt: {line}\n"
+                )
+
        processed_lines.add(line)

+    # Write duplicate report after processing all lines
+    for processed_line, occurrences in dedup_lines.items():
+        if len(occurrences) > 1:
+            with open(dup_cases_record, "a") as f:
+                f.write(
+                    f"Duplicate waive records found for '{processed_line}' ({len(occurrences)} occurrences):\n"
+                )
+                for i, (line_no, original_line) in enumerate(occurrences, 1):
+                    f.write(
+                        f"  Occurrence {i} at line {line_no}: '{original_line}'\n"
+                    )
+                f.write(f"\n")
+
    # Write the processed lines to a tmp file
    tmp_waives_file = f"{llm_src}/processed_waive_list.txt"
    with open(tmp_waives_file, "w") as f:
@ -134,7 +193,7 @@ def verify_waive_list(llm_src):

    subprocess.run(
        f"cd {llm_src}/tests/integration/defs && "
-        f"pytest --apply-test-list-correction --test-list={tmp_waives_file} --co -q",
+        f"pytest --test-list={tmp_waives_file} --output-dir={llm_src} -s --co -q",
        shell=True,
        check=True)

@ -156,26 +215,67 @@ def main():
    llm_src = os.path.abspath(os.path.join(script_dir, "../"))

    install_python_dependencies(llm_src)
+    pass_flag = True
    # Verify L0 test lists
    if args.l0:
-        print("Starting L0 test list verification...")
+        print("-----------Starting L0 test list verification...-----------",
+              flush=True)
        verify_l0_test_lists(llm_src)
    else:
-        print("Skipping L0 test list verification.")
+        print("-----------Skipping L0 test list verification.-----------",
+              flush=True)

    # Verify QA test lists
    if args.qa:
-        print("Starting QA test list verification...")
+        print("-----------Starting QA test list verification...-----------",
+              flush=True)
        verify_qa_test_lists(llm_src)
    else:
-        print("Skipping QA test list verification.")
+        print("-----------Skipping QA test list verification.-----------",
+              flush=True)

    # Verify waive test lists
    if args.waive:
-        print("Starting waive list verification...")
-        verify_waive_list(llm_src)
+        print("-----------Starting waive list verification...-----------",
+              flush=True)
+        verify_waive_list(llm_src, args)
    else:
-        print("Skipping waive list verification.")
+        print("-----------Skipping waive list verification.-----------",
+              flush=True)
+
+    invalid_json_file = os.path.join(llm_src, "invalid_tests.json")
+    if os.path.isfile(invalid_json_file) and os.path.getsize(
+            invalid_json_file) > 0:
+        print("Invalid cases:")
+        with open(invalid_json_file, "r") as f:
+            print(f.read())
+        print("Invalid test names found, please correct them first!!!\n")
+        pass_flag = False
+
+    duplicate_cases_file = os.path.join(llm_src, "dup_cases.txt")
+    if os.path.isfile(duplicate_cases_file) and os.path.getsize(
+            duplicate_cases_file) > 0:
+        print("Duplicate cases found:")
+        with open(duplicate_cases_file, "r") as f:
+            print(f.read())
+        print(
+            "Duplicate test names found in waives.txt, please delete one or combine them first!!!\n"
+        )
+        pass_flag = False
+
+    non_existent_cases_file = os.path.join(llm_src, "nonexits_cases.json")
+    if os.path.isfile(non_existent_cases_file) and os.path.getsize(
+            non_existent_cases_file) > 0:
+        print("Non-existent cases found in waives.txt:")
+        with open(non_existent_cases_file, "r") as f:
+            print(f.read())
+        print(
+            "Non-unit test test name in waives.txt but not in l0 test list or qa list, please delete them first!!!\n"
+        )
+        pass_flag = False
+
+    if not pass_flag:
+        exit(1)


 if __name__ == "__main__":
--- a/tests/README.md
+++ b/tests/README.md
@ -254,11 +254,6 @@ Add `ISOLATION` to the test case line with proper spacing:
 - disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
 ```

-**For Local Testing (TXT files):**
-```
-disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
-```
-
 ## 7. Combining test markers

 Multiple markers can be combined for the same test case using commas. Both formats are valid:
--- a/tests/integration/defs/test_list_parser.py
+++ b/tests/integration/defs/test_list_parser.py
@ -575,9 +575,10 @@ def handle_corrections(corrections, test_prefix):

 def record_invalid_tests(output_file, corrections):
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
-    with open(output_file, "w") as f:
+    with open(output_file, "a") as f:
        invalid_tests = {"invalid": list(corrections.keys())}
        json.dump(invalid_tests, f)
+        f.write("\n")


 def parse_and_validate_test_list(
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -7,10 +7,6 @@ full:GH200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8]
 full:GH200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (arm is not supported)
 full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-full_prec] SKIP (arm is not supported)
 full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-fp8] SKIP (arm is not supported)
-full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-int4_awq] SKIP (arm is not supported)
-full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-full_prec] SKIP (arm is not supported)
-full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-fp8] SKIP (arm is not supported)
-full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-int4_awq] SKIP (arm is not supported)
 full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-full_prec] SKIP (arm is not supported)
 full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-fp8] SKIP (arm is not supported)
 full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-int4_awq] SKIP (arm is not supported)
@ -18,7 +14,6 @@ perf/test_perf.py::test_perf[t5_base-plugin-float16-bs:8-input_output_len:60,20]
 perf/test_perf.py::test_perf[flan_t5_base-plugin-float16-bs:8-input_output_len:60,20] SKIP # (https://nvidia.slack.com/archives/C059LSY62BT/p1704525727177449)
 perf/test_perf.py::test_perf[bart_large_cnn-plugin-float16-bs:8-input_output_len:60,20] SKIP # (https://nvidia.slack.com/archives/C059LSY62BT/p1704525727177449)
 accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2 SKIP (not supported yet)
-examples/test_llama.py::test_llm_llama_v3_1m_long_context_8gpus[Llama-3-70B-Instruct-Gradient-1048k] SKIP (test duration is too long)
 full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
 full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
 full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
@ -28,8 +23,6 @@ perf/test_perf.py::test_perf[llama_v3.1_70b-cppmanager-exe-plugin_ifb-float16-in
 cpp/test_e2e.py::test_model[-encoder-90] SKIP (waive Encoder-only test because it doesn't take batched input)
 full:L40S/examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[disable_weight_only] SKIP (skip on L40S commit f9a0fcb0)
 full:GH200/unittest/trt/model_api/test_model_quantization.py SKIP (https://nvbugspro.nvidia.com/bug/4979955)
-examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-int8_sq-bfloat16-8] SKIP (https://nvbugs/4988782)
-examples/test_llama.py::test_llm_llama_v3_8b_1048k_long_context_ppl[SlimPajama-6B-Llama-3-8B-Instruct-Gradient-1048k] SKIP (https://nvbugs/4993898)
 examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5014327)
 examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-full_prec] SKIP (https://nvbugs/5000026)
 examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (https://nvbugs/5000026)
@ -72,8 +65,6 @@ full:B200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] S
 full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM)
 full:B200/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM)
 full:B200/examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
-examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp4pp1-context_fmha] SKIP (https://nvbugs/5063469)
-examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp2pp2-context_fmha_fp32_acc] SKIP (https://nvbugs/5063469)
 examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
 test_e2e.py::test_openai_consistent_chat SKIP (https://nvbugs/5112075)
 examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle1] SKIP (https://nvbugs/5206383)
@ -137,7 +128,6 @@ examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle1] SKIP (https:/
 examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle1] SKIP (https://nvbugs/5219535)
 examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle2] SKIP (https://nvbugs/5219535)
 examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle2] SKIP (https://nvbugs/5219535)
-examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219535)
 examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16] SKIP (https://nvbugspro.nvidia.com/bug/5226339)
 perf/test_perf.py::test_perf[t5-bench-float16-input_output_len:128,20] SKIP # https://nvbugspro.nvidia.com/bug/5207477
 perf/test_perf.py::test_perf[flan_t5_base-bench-float16-input_output_len:128,20] SKIP
@ -217,8 +207,6 @@ unittest/llmapi/test_llm_multi_gpu.py -m "gpu4 and part0" SKIP (https://nvbugs/5
 accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar] SKIP (https://nvbugs/5346443)
 examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936)
 examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936)
-examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-build] SKIP (https://nvbugs/5247243)
-examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer] SKIP (https://nvbugs/5247243)
 examples/test_llama.py::test_llm_llama_1gpu_streaming_llm[ailab-deepseek-coder-6.7b-instruct] SKIP (https://nvbugs/5435714)
 test_e2e.py::test_openai_multinodes_chat_tp16pp1 SKIP (https://nvbugs/5112075)
 examples/test_qwen.py::test_llm_hf_qwen_quantization_1gpu[qwen2_vl_7b_instruct-fp8-bfloat16] SKIP (https://nvbugs/5322488)
@ -290,7 +278,7 @@ accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbug
 accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143, 5481206 WNF)
 accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
 accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=False] SKIP (https://nvbugs/5483534)
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687,https://nvbugs/5543035)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5488118)
 test_e2e.py::test_trtllm_bench_iteration_log[TRT-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B] SKIP (https://nvbugs/5448523)
@ -309,7 +297,6 @@ full:L20/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantize
 full:L20/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862)
 full:L40S/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized SKIP (https://nvbugs/5542862)
 full:L40S/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862)
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5543035)
 unittest/_torch/multi_gpu_modeling/test_llama3.py::test_llama_3_3 SKIP (https://nvbugs/5536131)
 examples/test_eagle.py::test_llm_eagle_1gpu_modelopt_ckpt[llama3.1-eagle-8b-hf_v0.5-float16-bs8] SKIP (https://nvbugs/5546507)
 examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle1] SKIP (https://nvbugs/5546507)
@ -328,7 +315,6 @@ test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instru
 test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] SKIP (https://nvbugs/5547434)
 cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5550689)
 cpp/test_e2e.py::test_benchmarks[bart-90] SKIP (https://nvbugs/5550689)
-examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1] SKIP (https://nvbugs/5563469)
 test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5547437)
 accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5546510)
 test_e2e.py::test_multi_nodes_eval[Kimi-K2-Instruct-tp16-mmlu] SKIP (https://nvbugs/5556998)