mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-5453][infra] Check all steps for test name and also check the test in waives.txt also exists in l0 or qa test list. (#6256)
Signed-off-by: qqiao <qqiao@nvidia.com> Signed-off-by: Emma Qiao <qqiao@nvidia.com>
This commit is contained in:
parent
13cfd70f57
commit
a5cc9fe0aa
@ -82,7 +82,7 @@ def verify_l0_test_lists(llm_src):
|
||||
|
||||
subprocess.run(
|
||||
f"cd {llm_src}/tests/integration/defs && "
|
||||
f"pytest --apply-test-list-correction --test-list={test_list} --co -q",
|
||||
f"pytest --test-list={test_list} --output-dir={llm_src} -s --co -q",
|
||||
shell=True,
|
||||
check=True)
|
||||
|
||||
@ -96,19 +96,33 @@ def verify_qa_test_lists(llm_src):
|
||||
for test_def_file in test_def_files:
|
||||
subprocess.run(
|
||||
f"cd {llm_src}/tests/integration/defs && "
|
||||
f"pytest --apply-test-list-correction --test-list={test_def_file} --co -q",
|
||||
f"pytest --test-list={test_def_file} --output-dir={llm_src} -s --co -q",
|
||||
shell=True,
|
||||
check=True)
|
||||
# append all the test_def_file to qa_test.txt
|
||||
with open(f"{llm_src}/qa_test.txt", "a") as f:
|
||||
with open(test_def_file, "r") as test_file:
|
||||
lines = test_file.readlines()
|
||||
for line in lines:
|
||||
# Remove 'TIMEOUT' marker and strip spaces
|
||||
cleaned_line = line.split(" TIMEOUT ", 1)[0].strip()
|
||||
if cleaned_line:
|
||||
f.write(f"{cleaned_line}\n")
|
||||
|
||||
|
||||
def verify_waive_list(llm_src):
|
||||
def verify_waive_list(llm_src, args):
|
||||
waives_list_path = f"{llm_src}/tests/integration/test_lists/waives.txt"
|
||||
dup_cases_record = f"{llm_src}/dup_cases.txt"
|
||||
non_existent_cases_record = f"{llm_src}/nonexits_cases.json"
|
||||
# Remove prefix and markers in wavies.txt
|
||||
dedup_lines = {
|
||||
} # Track all occurrences: processed_line -> [(line_no, original_line), ...]
|
||||
processed_lines = set()
|
||||
with open(waives_list_path, "r") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
for line_no, line in enumerate(lines, 1):
|
||||
original_line = line.strip()
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
@ -119,14 +133,59 @@ def verify_waive_list(llm_src):
|
||||
continue
|
||||
|
||||
# Check for SKIP marker in waives.txt and split by the first occurrence
|
||||
line = line.split(" SKIP ", 1)[0].strip()
|
||||
line = line.split(" SKIP", 1)[0].strip()
|
||||
|
||||
# Track all occurrences of each processed line
|
||||
if line in dedup_lines:
|
||||
dedup_lines[line].append((line_no, original_line))
|
||||
else:
|
||||
dedup_lines[line] = [(line_no, original_line)]
|
||||
|
||||
# If the line starts with 'full:', process it
|
||||
if line.startswith("full:"):
|
||||
line = line.split("/", 1)[1].lstrip("/")
|
||||
|
||||
# Skip unittests due to we don't need to have an entry in test-db yml
|
||||
if line.startswith("unittest/"):
|
||||
continue
|
||||
|
||||
# Check waived cases also in l0_text.txt and qa_text.txt
|
||||
found_in_l0_qa = False
|
||||
if args.l0:
|
||||
with open(f"{llm_src}/l0_test.txt", "r") as f:
|
||||
l0_lines = f.readlines()
|
||||
for l0_line in l0_lines:
|
||||
if line == l0_line.strip():
|
||||
found_in_l0_qa = True
|
||||
break
|
||||
if args.qa:
|
||||
with open(f"{llm_src}/qa_test.txt", "r") as f:
|
||||
qa_lines = f.readlines()
|
||||
for qa_line in qa_lines:
|
||||
if line == qa_line.strip():
|
||||
found_in_l0_qa = True
|
||||
break
|
||||
if not found_in_l0_qa:
|
||||
with open(non_existent_cases_record, "a") as f:
|
||||
f.write(
|
||||
f"Non-existent test name in l0 or qa list found in waives.txt: {line}\n"
|
||||
)
|
||||
|
||||
processed_lines.add(line)
|
||||
|
||||
# Write duplicate report after processing all lines
|
||||
for processed_line, occurrences in dedup_lines.items():
|
||||
if len(occurrences) > 1:
|
||||
with open(dup_cases_record, "a") as f:
|
||||
f.write(
|
||||
f"Duplicate waive records found for '{processed_line}' ({len(occurrences)} occurrences):\n"
|
||||
)
|
||||
for i, (line_no, original_line) in enumerate(occurrences, 1):
|
||||
f.write(
|
||||
f" Occurrence {i} at line {line_no}: '{original_line}'\n"
|
||||
)
|
||||
f.write(f"\n")
|
||||
|
||||
# Write the processed lines to a tmp file
|
||||
tmp_waives_file = f"{llm_src}/processed_waive_list.txt"
|
||||
with open(tmp_waives_file, "w") as f:
|
||||
@ -134,7 +193,7 @@ def verify_waive_list(llm_src):
|
||||
|
||||
subprocess.run(
|
||||
f"cd {llm_src}/tests/integration/defs && "
|
||||
f"pytest --apply-test-list-correction --test-list={tmp_waives_file} --co -q",
|
||||
f"pytest --test-list={tmp_waives_file} --output-dir={llm_src} -s --co -q",
|
||||
shell=True,
|
||||
check=True)
|
||||
|
||||
@ -156,26 +215,67 @@ def main():
|
||||
llm_src = os.path.abspath(os.path.join(script_dir, "../"))
|
||||
|
||||
install_python_dependencies(llm_src)
|
||||
pass_flag = True
|
||||
# Verify L0 test lists
|
||||
if args.l0:
|
||||
print("Starting L0 test list verification...")
|
||||
print("-----------Starting L0 test list verification...-----------",
|
||||
flush=True)
|
||||
verify_l0_test_lists(llm_src)
|
||||
else:
|
||||
print("Skipping L0 test list verification.")
|
||||
print("-----------Skipping L0 test list verification.-----------",
|
||||
flush=True)
|
||||
|
||||
# Verify QA test lists
|
||||
if args.qa:
|
||||
print("Starting QA test list verification...")
|
||||
print("-----------Starting QA test list verification...-----------",
|
||||
flush=True)
|
||||
verify_qa_test_lists(llm_src)
|
||||
else:
|
||||
print("Skipping QA test list verification.")
|
||||
print("-----------Skipping QA test list verification.-----------",
|
||||
flush=True)
|
||||
|
||||
# Verify waive test lists
|
||||
if args.waive:
|
||||
print("Starting waive list verification...")
|
||||
verify_waive_list(llm_src)
|
||||
print("-----------Starting waive list verification...-----------",
|
||||
flush=True)
|
||||
verify_waive_list(llm_src, args)
|
||||
else:
|
||||
print("Skipping waive list verification.")
|
||||
print("-----------Skipping waive list verification.-----------",
|
||||
flush=True)
|
||||
|
||||
invalid_json_file = os.path.join(llm_src, "invalid_tests.json")
|
||||
if os.path.isfile(invalid_json_file) and os.path.getsize(
|
||||
invalid_json_file) > 0:
|
||||
print("Invalid cases:")
|
||||
with open(invalid_json_file, "r") as f:
|
||||
print(f.read())
|
||||
print("Invalid test names found, please correct them first!!!\n")
|
||||
pass_flag = False
|
||||
|
||||
duplicate_cases_file = os.path.join(llm_src, "dup_cases.txt")
|
||||
if os.path.isfile(duplicate_cases_file) and os.path.getsize(
|
||||
duplicate_cases_file) > 0:
|
||||
print("Duplicate cases found:")
|
||||
with open(duplicate_cases_file, "r") as f:
|
||||
print(f.read())
|
||||
print(
|
||||
"Duplicate test names found in waives.txt, please delete one or combine them first!!!\n"
|
||||
)
|
||||
pass_flag = False
|
||||
|
||||
non_existent_cases_file = os.path.join(llm_src, "nonexits_cases.json")
|
||||
if os.path.isfile(non_existent_cases_file) and os.path.getsize(
|
||||
non_existent_cases_file) > 0:
|
||||
print("Non-existent cases found in waives.txt:")
|
||||
with open(non_existent_cases_file, "r") as f:
|
||||
print(f.read())
|
||||
print(
|
||||
"Non-unit test test name in waives.txt but not in l0 test list or qa list, please delete them first!!!\n"
|
||||
)
|
||||
pass_flag = False
|
||||
|
||||
if not pass_flag:
|
||||
exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -254,11 +254,6 @@ Add `ISOLATION` to the test case line with proper spacing:
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
|
||||
```
|
||||
|
||||
**For Local Testing (TXT files):**
|
||||
```
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
|
||||
```
|
||||
|
||||
## 7. Combining test markers
|
||||
|
||||
Multiple markers can be combined for the same test case using commas. Both formats are valid:
|
||||
|
||||
@ -575,9 +575,10 @@ def handle_corrections(corrections, test_prefix):
|
||||
|
||||
def record_invalid_tests(output_file, corrections):
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
with open(output_file, "w") as f:
|
||||
with open(output_file, "a") as f:
|
||||
invalid_tests = {"invalid": list(corrections.keys())}
|
||||
json.dump(invalid_tests, f)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def parse_and_validate_test_list(
|
||||
|
||||
@ -7,10 +7,6 @@ full:GH200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8]
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-full_prec] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-fp8] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-int4_awq] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-full_prec] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-fp8] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[float16-int4_awq] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-full_prec] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-fp8] SKIP (arm is not supported)
|
||||
full:GH200/examples/test_nemotron.py::test_llm_nemotron_4_15b_2gpus[bfloat16-int4_awq] SKIP (arm is not supported)
|
||||
@ -18,7 +14,6 @@ perf/test_perf.py::test_perf[t5_base-plugin-float16-bs:8-input_output_len:60,20]
|
||||
perf/test_perf.py::test_perf[flan_t5_base-plugin-float16-bs:8-input_output_len:60,20] SKIP # (https://nvidia.slack.com/archives/C059LSY62BT/p1704525727177449)
|
||||
perf/test_perf.py::test_perf[bart_large_cnn-plugin-float16-bs:8-input_output_len:60,20] SKIP # (https://nvidia.slack.com/archives/C059LSY62BT/p1704525727177449)
|
||||
accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2 SKIP (not supported yet)
|
||||
examples/test_llama.py::test_llm_llama_v3_1m_long_context_8gpus[Llama-3-70B-Instruct-Gradient-1048k] SKIP (test duration is too long)
|
||||
full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
|
||||
full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
|
||||
full:GH200/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/4731514)
|
||||
@ -28,8 +23,6 @@ perf/test_perf.py::test_perf[llama_v3.1_70b-cppmanager-exe-plugin_ifb-float16-in
|
||||
cpp/test_e2e.py::test_model[-encoder-90] SKIP (waive Encoder-only test because it doesn't take batched input)
|
||||
full:L40S/examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[disable_weight_only] SKIP (skip on L40S commit f9a0fcb0)
|
||||
full:GH200/unittest/trt/model_api/test_model_quantization.py SKIP (https://nvbugspro.nvidia.com/bug/4979955)
|
||||
examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-int8_sq-bfloat16-8] SKIP (https://nvbugs/4988782)
|
||||
examples/test_llama.py::test_llm_llama_v3_8b_1048k_long_context_ppl[SlimPajama-6B-Llama-3-8B-Instruct-Gradient-1048k] SKIP (https://nvbugs/4993898)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5014327)
|
||||
examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-full_prec] SKIP (https://nvbugs/5000026)
|
||||
examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] SKIP (https://nvbugs/5000026)
|
||||
@ -72,8 +65,6 @@ full:B200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] S
|
||||
full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM)
|
||||
full:B200/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM)
|
||||
full:B200/examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
|
||||
examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp4pp1-context_fmha] SKIP (https://nvbugs/5063469)
|
||||
examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp2pp2-context_fmha_fp32_acc] SKIP (https://nvbugs/5063469)
|
||||
examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
|
||||
test_e2e.py::test_openai_consistent_chat SKIP (https://nvbugs/5112075)
|
||||
examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle1] SKIP (https://nvbugs/5206383)
|
||||
@ -137,7 +128,6 @@ examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle1] SKIP (https:/
|
||||
examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle1] SKIP (https://nvbugs/5219535)
|
||||
examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle2] SKIP (https://nvbugs/5219535)
|
||||
examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle2] SKIP (https://nvbugs/5219535)
|
||||
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219535)
|
||||
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16] SKIP (https://nvbugspro.nvidia.com/bug/5226339)
|
||||
perf/test_perf.py::test_perf[t5-bench-float16-input_output_len:128,20] SKIP # https://nvbugspro.nvidia.com/bug/5207477
|
||||
perf/test_perf.py::test_perf[flan_t5_base-bench-float16-input_output_len:128,20] SKIP
|
||||
@ -217,8 +207,6 @@ unittest/llmapi/test_llm_multi_gpu.py -m "gpu4 and part0" SKIP (https://nvbugs/5
|
||||
accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar] SKIP (https://nvbugs/5346443)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936)
|
||||
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-build] SKIP (https://nvbugs/5247243)
|
||||
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer] SKIP (https://nvbugs/5247243)
|
||||
examples/test_llama.py::test_llm_llama_1gpu_streaming_llm[ailab-deepseek-coder-6.7b-instruct] SKIP (https://nvbugs/5435714)
|
||||
test_e2e.py::test_openai_multinodes_chat_tp16pp1 SKIP (https://nvbugs/5112075)
|
||||
examples/test_qwen.py::test_llm_hf_qwen_quantization_1gpu[qwen2_vl_7b_instruct-fp8-bfloat16] SKIP (https://nvbugs/5322488)
|
||||
@ -290,7 +278,7 @@ accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbug
|
||||
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143, 5481206 WNF)
|
||||
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=False] SKIP (https://nvbugs/5483534)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687,https://nvbugs/5543035)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687)
|
||||
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5488118)
|
||||
test_e2e.py::test_trtllm_bench_iteration_log[TRT-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B] SKIP (https://nvbugs/5448523)
|
||||
@ -309,7 +297,6 @@ full:L20/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantize
|
||||
full:L20/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862)
|
||||
full:L40S/accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized SKIP (https://nvbugs/5542862)
|
||||
full:L40S/accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5542862)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5543035)
|
||||
unittest/_torch/multi_gpu_modeling/test_llama3.py::test_llama_3_3 SKIP (https://nvbugs/5536131)
|
||||
examples/test_eagle.py::test_llm_eagle_1gpu_modelopt_ckpt[llama3.1-eagle-8b-hf_v0.5-float16-bs8] SKIP (https://nvbugs/5546507)
|
||||
examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle1] SKIP (https://nvbugs/5546507)
|
||||
@ -328,7 +315,6 @@ test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instru
|
||||
test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video] SKIP (https://nvbugs/5547434)
|
||||
cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5550689)
|
||||
cpp/test_e2e.py::test_benchmarks[bart-90] SKIP (https://nvbugs/5550689)
|
||||
examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1] SKIP (https://nvbugs/5563469)
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5547437)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5546510)
|
||||
test_e2e.py::test_multi_nodes_eval[Kimi-K2-Instruct-tp16-mmlu] SKIP (https://nvbugs/5556998)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user