[TRTLLM-5453][infra] Check all steps for test name and also check the test in waives.txt also exists in l0 or qa test list. (#6256)

Signed-off-by: qqiao <qqiao@nvidia.com>
Signed-off-by: Emma Qiao <qqiao@nvidia.com>
This commit is contained in:
Emma Qiao 2025-10-30 16:56:04 +08:00 committed by GitHub
parent 13cfd70f57
commit a5cc9fe0aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 116 additions and 34 deletions

View File

@ -82,7 +82,7 @@ def verify_l0_test_lists(llm_src):
subprocess.run(
f"cd {llm_src}/tests/integration/defs && "
f"pytest --apply-test-list-correction --test-list={test_list} --co -q",
f"pytest --test-list={test_list} --output-dir={llm_src} -s --co -q",
shell=True,
check=True)
@ -96,19 +96,33 @@ def verify_qa_test_lists(llm_src):
for test_def_file in test_def_files:
subprocess.run(
f"cd {llm_src}/tests/integration/defs && "
f"pytest --apply-test-list-correction --test-list={test_def_file} --co -q",
f"pytest --test-list={test_def_file} --output-dir={llm_src} -s --co -q",
shell=True,
check=True)
# append all the test_def_file to qa_test.txt
with open(f"{llm_src}/qa_test.txt", "a") as f:
with open(test_def_file, "r") as test_file:
lines = test_file.readlines()
for line in lines:
# Remove 'TIMEOUT' marker and strip spaces
cleaned_line = line.split(" TIMEOUT ", 1)[0].strip()
if cleaned_line:
f.write(f"{cleaned_line}\n")
def verify_waive_list(llm_src):
def verify_waive_list(llm_src, args):
waives_list_path = f"{llm_src}/tests/integration/test_lists/waives.txt"
dup_cases_record = f"{llm_src}/dup_cases.txt"
non_existent_cases_record = f"{llm_src}/nonexits_cases.json"
# Remove prefix and markers in wavies.txt
dedup_lines = {
} # Track all occurrences: processed_line -> [(line_no, original_line), ...]
processed_lines = set()
with open(waives_list_path, "r") as f:
lines = f.readlines()
for line in lines:
for line_no, line in enumerate(lines, 1):
original_line = line.strip()
line = line.strip()
if not line:
@ -119,14 +133,59 @@ def verify_waive_list(llm_src):
continue
# Check for SKIP marker in waives.txt and split by the first occurrence
line = line.split(" SKIP ", 1)[0].strip()
line = line.split(" SKIP", 1)[0].strip()
# Track all occurrences of each processed line
if line in dedup_lines:
dedup_lines[line].append((line_no, original_line))
else:
dedup_lines[line] = [(line_no, original_line)]
# If the line starts with 'full:', process it
if line.startswith("full:"):
line = line.split("/", 1)[1].lstrip("/")
# Skip unittests due to we don't need to have an entry in test-db yml
if line.startswith("unittest/"):
continue
# Check waived cases also in l0_text.txt and qa_text.txt
found_in_l0_qa = False
if args.l0:
with open(f"{llm_src}/l0_test.txt", "r") as f:
l0_lines = f.readlines()
for l0_line in l0_lines:
if line == l0_line.strip():
found_in_l0_qa = True
break
if args.qa:
with open(f"{llm_src}/qa_test.txt", "r") as f:
qa_lines = f.readlines()
for qa_line in qa_lines:
if line == qa_line.strip():
found_in_l0_qa = True
break
if not found_in_l0_qa:
with open(non_existent_cases_record, "a") as f:
f.write(
f"Non-existent test name in l0 or qa list found in waives.txt: {line}\n"
)
processed_lines.add(line)
# Write duplicate report after processing all lines
for processed_line, occurrences in dedup_lines.items():
if len(occurrences) > 1:
with open(dup_cases_record, "a") as f:
f.write(
f"Duplicate waive records found for '{processed_line}' ({len(occurrences)} occurrences):\n"
)
for i, (line_no, original_line) in enumerate(occurrences, 1):
f.write(
f" Occurrence {i} at line {line_no}: '{original_line}'\n"
)
f.write(f"\n")
# Write the processed lines to a tmp file
tmp_waives_file = f"{llm_src}/processed_waive_list.txt"
with open(tmp_waives_file, "w") as f:
@ -134,7 +193,7 @@ def verify_waive_list(llm_src):
subprocess.run(
f"cd {llm_src}/tests/integration/defs && "
f"pytest --apply-test-list-correction --test-list={tmp_waives_file} --co -q",
f"pytest --test-list={tmp_waives_file} --output-dir={llm_src} -s --co -q",
shell=True,
check=True)
@ -156,26 +215,67 @@ def main():
llm_src = os.path.abspath(os.path.join(script_dir, "../"))
install_python_dependencies(llm_src)
pass_flag = True
# Verify L0 test lists
if args.l0:
print("Starting L0 test list verification...")
print("-----------Starting L0 test list verification...-----------",
flush=True)
verify_l0_test_lists(llm_src)
else:
print("Skipping L0 test list verification.")
print("-----------Skipping L0 test list verification.-----------",
flush=True)
# Verify QA test lists
if args.qa:
print("Starting QA test list verification...")
print("-----------Starting QA test list verification...-----------",
flush=True)
verify_qa_test_lists(llm_src)
else:
print("Skipping QA test list verification.")
print("-----------Skipping QA test list verification.-----------",
flush=True)
# Verify waive test lists
if args.waive:
print("Starting waive list verification...")
verify_waive_list(llm_src)
print("-----------Starting waive list verification...-----------",
flush=True)
verify_waive_list(llm_src, args)
else:
print("Skipping waive list verification.")
print("-----------Skipping waive list verification.-----------",
flush=True)
invalid_json_file = os.path.join(llm_src, "invalid_tests.json")
if os.path.isfile(invalid_json_file) and os.path.getsize(
invalid_json_file) > 0:
print("Invalid cases:")
with open(invalid_json_file, "r") as f:
print(f.read())
print("Invalid test names found, please correct them first!!!\n")
pass_flag = False
duplicate_cases_file = os.path.join(llm_src, "dup_cases.txt")
if os.path.isfile(duplicate_cases_file) and os.path.getsize(
duplicate_cases_file) > 0:
print("Duplicate cases found:")
with open(duplicate_cases_file, "r") as f:
print(f.read())
print(
"Duplicate test names found in waives.txt, please delete one or combine them first!!!\n"
)
pass_flag = False
non_existent_cases_file = os.path.join(llm_src, "nonexits_cases.json")
if os.path.isfile(non_existent_cases_file) and os.path.getsize(
non_existent_cases_file) > 0:
print("Non-existent cases found in waives.txt:")
with open(non_existent_cases_file, "r") as f:
print(f.read())
print(
"Non-unit test test name in waives.txt but not in l0 test list or qa list, please delete them first!!!\n"
)
pass_flag = False
if not pass_flag:
exit(1)
if __name__ == "__main__":

View File

@ -254,11 +254,6 @@ Add `ISOLATION` to the test case line with proper spacing:
- disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
```
**For Local Testing (TXT files):**
```
disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
```
## 7. Combining test markers
Multiple markers can be combined for the same test case using commas. Both formats are valid:

View File

@ -575,9 +575,10 @@ def handle_corrections(corrections, test_prefix):
def record_invalid_tests(output_file, corrections):
os.makedirs(os.path.dirname(output_file), exist_ok=True)
with open(output_file, "w") as f:
with open(output_file, "a") as f:
invalid_tests = {"invalid": list(corrections.keys())}
json.dump(invalid_tests, f)
f.write("\n")
def parse_and_validate_test_list(

View File

@ -7,10 +7,6 @@ full:GH200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8]
full:GH200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-full_prec] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-fp8] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-int4_awq] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-full_prec] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-fp8] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-int4_awq] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-full_prec] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-fp8] SKIP (arm is not supported)
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-int4_awq] SKIP (arm is not supported)
@ -18,7 +14,6 @@ perf/test_perf.py::test_perf[t5_base-plugin-float16-bs:8-input_output_len:60,20]
perf/test_perf.py::test_perf[flan_t5_base-plugin-float16-bs:8-input_output_len:60,20] SKIP # (https://nvidia.slack.com/archives/C059LSY62BT/p1704525727177449)
perf/test_perf.py::test_perf[bart_large_cnn-plugin-float16-bs:8-input_output_len:60,20] SKIP # (https://nvidia.slack.com/archives/C059LSY62BT/p1704525727177449)
accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2 SKIP (not supported yet)
examples/test_llama.py::test_llm_llama_v3_1m_long_context_8gpus[Llama-3-70B-Instruct-Gradient-1048k] SKIP (test duration is too long)
full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
@ -28,8 +23,6 @@ perf/test_perf.py::test_perf[llama_v3.1_70b-cppmanager-exe-plugin_ifb-float16-in
cpp/test_e2e.py::test_model[-encoder-90] SKIP (waive Encoder-only test because it doesn't take batched input)
full:L40S/examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[disable_weight_only] SKIP (skip on L40S commit f9a0fcb0)
full:GH200/unittest/trt/model_api/test_model_quantization.py SKIP (https://nvbugspro.nvidia.com/bug/4979955)
examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-int8_sq-bfloat16-8] SKIP (https://nvbugs/4988782)
examples/test_llama.py::test_llm_llama_v3_8b_1048k_long_context_ppl[SlimPajama-6B-Llama-3-8B-Instruct-Gradient-1048k] SKIP (https://nvbugs/4993898)
examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5014327)
examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-full_prec] SKIP (https://nvbugs/5000026)
examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (https://nvbugs/5000026)
@ -72,8 +65,6 @@ full:B200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] S
full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM)
full:B200/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM)
full:B200/examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp4pp1-context_fmha] SKIP (https://nvbugs/5063469)
examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp2pp2-context_fmha_fp32_acc] SKIP (https://nvbugs/5063469)
examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
test_e2e.py::test_openai_consistent_chat SKIP (https://nvbugs/5112075)
examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle1] SKIP (https://nvbugs/5206383)
@ -137,7 +128,6 @@ examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle1] SKIP (https:/
examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle1] SKIP (https://nvbugs/5219535)
examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle2] SKIP (https://nvbugs/5219535)
examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle2] SKIP (https://nvbugs/5219535)
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219535)
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16] SKIP (https://nvbugspro.nvidia.com/bug/5226339)
perf/test_perf.py::test_perf[t5-bench-float16-input_output_len:128,20] SKIP # https://nvbugspro.nvidia.com/bug/5207477
perf/test_perf.py::test_perf[flan_t5_base-bench-float16-input_output_len:128,20] SKIP
@ -217,8 +207,6 @@ unittest/llmapi/test_llm_multi_gpu.py -m "gpu4 and part0" SKIP (https://nvbugs/5
accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar] SKIP (https://nvbugs/5346443)
examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936)
examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936)
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-build] SKIP (https://nvbugs/5247243)
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer] SKIP (https://nvbugs/5247243)
examples/test_llama.py::test_llm_llama_1gpu_streaming_llm[ailab-deepseek-coder-6.7b-instruct] SKIP (https://nvbugs/5435714)
test_e2e.py::test_openai_multinodes_chat_tp16pp1 SKIP (https://nvbugs/5112075)
examples/test_qwen.py::test_llm_hf_qwen_quantization_1gpu[qwen2_vl_7b_instruct-fp8-bfloat16] SKIP (https://nvbugs/5322488)
@ -290,7 +278,7 @@ accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbug
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143, 5481206 WNF)
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=False] SKIP (https://nvbugs/5483534)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687,https://nvbugs/5543035)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687)
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5488118)
test_e2e.py::test_trtllm_bench_iteration_log[TRT-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B] SKIP (https://nvbugs/5448523)
@ -309,7 +297,6 @@ full:L20/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantize
full:L20/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862)
full:L40S/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized SKIP (https://nvbugs/5542862)
full:L40S/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5543035)
unittest/_torch/multi_gpu_modeling/test_llama3.py::test_llama_3_3 SKIP (https://nvbugs/5536131)
examples/test_eagle.py::test_llm_eagle_1gpu_modelopt_ckpt[llama3.1-eagle-8b-hf_v0.5-float16-bs8] SKIP (https://nvbugs/5546507)
examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle1] SKIP (https://nvbugs/5546507)
@ -328,7 +315,6 @@ test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instru
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] SKIP (https://nvbugs/5547434)
cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5550689)
cpp/test_e2e.py::test_benchmarks[bart-90] SKIP (https://nvbugs/5550689)
examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1] SKIP (https://nvbugs/5563469)
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5547437)
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5546510)
test_e2e.py::test_multi_nodes_eval[Kimi-K2-Instruct-tp16-mmlu] SKIP (https://nvbugs/5556998)