test: [CI] Add failed cases into waives.txt (#6333)

Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2025-07-25 00:18:06 -07:00 committed by GitHub
parent e07fff4f78
commit 470544cf17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 18 additions and 4 deletions

View File

@ -211,6 +211,7 @@ class TestLlama3_3NemotronSuper49Bv1(CliFlowAccuracyTestHarness):
def test_auto_dtype_tp2(self):
self.run(tasks=[MMLU(self.MODEL_NAME)], tp_size=2, dtype='auto')
@skip_pre_hopper
@pytest.mark.skip(
reason="nemotron-nas scripts have to accommodate fp8 flags")
@pytest.mark.skip_less_device(2)
@ -811,14 +812,14 @@ class TestLlama3_1_8BInstruct(CliFlowAccuracyTestHarness):
def test_auto_dtype(self):
self.run(dtype='auto')
@skip_pre_ada
@skip_pre_hopper
def test_fp8_prequantized(self, mocker):
mocker.patch.object(
self.__class__, "MODEL_PATH",
f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct-FP8")
self.run(quant_algo=QuantAlgo.FP8, kv_cache_quant_algo=QuantAlgo.FP8)
@skip_pre_ada
@skip_pre_hopper
@skip_post_blackwell
def test_medusa_fp8_prequantized(self, mocker):
# nvidia/Llama-3.1-8B-Medusa-FP8
@ -958,6 +959,7 @@ class TestLlama3_3_70BInstruct(CliFlowAccuracyTestHarness):
def test_auto_dtype_tp8(self):
self.run(tasks=[MMLU(self.MODEL_NAME)], tp_size=8, dtype='auto')
@skip_pre_hopper
@pytest.mark.skip_less_device(4)
@pytest.mark.skip_device_not_contain(["H100", "H200", "B200"])
def test_fp8_prequantized_tp4(self, mocker):

View File

@ -307,6 +307,7 @@ class TestLlama3_2_1B(LlmapiAccuracyTestHarness):
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
@skip_pre_hopper
def test_fp8_prequantized(self):
model_path = f"{llm_models_root()}/llama-3.2-models/Llama-3.2-1B-FP8"
with LLM(model_path) as llm:
@ -1478,6 +1479,7 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
@skip_pre_hopper
@pytest.mark.skip_less_device(2)
@pytest.mark.skip_device_not_contain(["H100", "B200"])
def test_fp8_prequantized_tp2(self):
@ -1507,6 +1509,7 @@ class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
@skip_pre_hopper
@pytest.mark.skip_device_not_contain(["H100", "B200"])
def test_fp8_prequantized(self):
model_path = f"{llm_models_root()}/Llama-3.1-Nemotron-Nano-8B-v1-FP8"
@ -1547,6 +1550,7 @@ class TestNemotronUltra(LlmapiAccuracyTestHarness):
# task.evaluate(llm,
# extra_evaluator_kwargs=dict(apply_chat_template=True))
@skip_pre_hopper
@pytest.mark.skip_less_device(8)
@pytest.mark.skip_device_not_contain(["H100", "B200"])
@parametrize_with_ids("cuda_graph", [False, True])

View File

@ -1938,8 +1938,12 @@ def test_ptp_quickstart_advanced_mixed_precision(llm_root, llm_venv):
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
("qwen2-vl-7b-instruct", "Qwen2-VL-7B-Instruct"),
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
("mistral-small-3.1-24b-instruct", "Mistral-Small-3.1-24B-Instruct-2503"),
("gemma-3-27b-it", "gemma/gemma-3-27b-it"),
pytest.param("mistral-small-3.1-24b-instruct",
"Mistral-Small-3.1-24B-Instruct-2503",
marks=pytest.mark.skip_less_device_memory(80000)),
pytest.param("gemma-3-27b-it",
"gemma/gemma-3-27b-it",
marks=pytest.mark.skip_less_device_memory(80000)),
])
def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
modality, use_cuda_graph):

View File

@ -109,6 +109,8 @@ test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image-True]
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False]
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True]
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False]
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B]
test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
test_e2e.py::test_qwen_e2e_cpprunner_large_new_tokens[DeepSeek-R1-Distill-Qwen-1.5B-DeepSeek-R1-Distill-Qwen-1.5B]

View File

@ -421,6 +421,7 @@ triton_server/test_triton_llm.py::test_llava_onevision[test_video-False-1---Fals
triton_server/test_triton.py::test_cpp_unit_tests[cpp-unit-tests] SKIP (https://nvbugs/5401088)
accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype SKIP (https://nvbugs/5401114)
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True] SKIP (https://nvbugs/5401114)
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False] SKIP (https://nvbgus/5401114)
examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int4_awq-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (https://nvbugs/5401233)
examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b] SKIP (https://nvbugs/5401233)
examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5401156)
@ -440,3 +441,4 @@ unittest/trt/attention/test_gpt_attention.py -k "partition0" SKIP (https://nvbug
unittest/trt/attention/test_gpt_attention.py -k "partition1" SKIP (https://nvbugs/5412456)
unittest/trt/attention/test_gpt_attention.py -k "partition2" SKIP (https://nvbugs/5412456)
unittest/trt/attention/test_gpt_attention.py -k "partition3" SKIP (https://nvbugs/5412456)
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5414909)