mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
test: [CI] Add failed cases into waives.txt (#6333)
Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
parent
e07fff4f78
commit
470544cf17
@ -211,6 +211,7 @@ class TestLlama3_3NemotronSuper49Bv1(CliFlowAccuracyTestHarness):
|
||||
def test_auto_dtype_tp2(self):
|
||||
self.run(tasks=[MMLU(self.MODEL_NAME)], tp_size=2, dtype='auto')
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip(
|
||||
reason="nemotron-nas scripts have to accommodate fp8 flags")
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@ -811,14 +812,14 @@ class TestLlama3_1_8BInstruct(CliFlowAccuracyTestHarness):
|
||||
def test_auto_dtype(self):
|
||||
self.run(dtype='auto')
|
||||
|
||||
@skip_pre_ada
|
||||
@skip_pre_hopper
|
||||
def test_fp8_prequantized(self, mocker):
|
||||
mocker.patch.object(
|
||||
self.__class__, "MODEL_PATH",
|
||||
f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct-FP8")
|
||||
self.run(quant_algo=QuantAlgo.FP8, kv_cache_quant_algo=QuantAlgo.FP8)
|
||||
|
||||
@skip_pre_ada
|
||||
@skip_pre_hopper
|
||||
@skip_post_blackwell
|
||||
def test_medusa_fp8_prequantized(self, mocker):
|
||||
# nvidia/Llama-3.1-8B-Medusa-FP8
|
||||
@ -958,6 +959,7 @@ class TestLlama3_3_70BInstruct(CliFlowAccuracyTestHarness):
|
||||
def test_auto_dtype_tp8(self):
|
||||
self.run(tasks=[MMLU(self.MODEL_NAME)], tp_size=8, dtype='auto')
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip_less_device(4)
|
||||
@pytest.mark.skip_device_not_contain(["H100", "H200", "B200"])
|
||||
def test_fp8_prequantized_tp4(self, mocker):
|
||||
|
||||
@ -307,6 +307,7 @@ class TestLlama3_2_1B(LlmapiAccuracyTestHarness):
|
||||
task = CnnDailymail(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
|
||||
@skip_pre_hopper
|
||||
def test_fp8_prequantized(self):
|
||||
model_path = f"{llm_models_root()}/llama-3.2-models/Llama-3.2-1B-FP8"
|
||||
with LLM(model_path) as llm:
|
||||
@ -1478,6 +1479,7 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_device_not_contain(["H100", "B200"])
|
||||
def test_fp8_prequantized_tp2(self):
|
||||
@ -1507,6 +1509,7 @@ class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip_device_not_contain(["H100", "B200"])
|
||||
def test_fp8_prequantized(self):
|
||||
model_path = f"{llm_models_root()}/Llama-3.1-Nemotron-Nano-8B-v1-FP8"
|
||||
@ -1547,6 +1550,7 @@ class TestNemotronUltra(LlmapiAccuracyTestHarness):
|
||||
# task.evaluate(llm,
|
||||
# extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip_less_device(8)
|
||||
@pytest.mark.skip_device_not_contain(["H100", "B200"])
|
||||
@parametrize_with_ids("cuda_graph", [False, True])
|
||||
|
||||
@ -1938,8 +1938,12 @@ def test_ptp_quickstart_advanced_mixed_precision(llm_root, llm_venv):
|
||||
("llava-v1.6-mistral-7b", "llava-v1.6-mistral-7b-hf"),
|
||||
("qwen2-vl-7b-instruct", "Qwen2-VL-7B-Instruct"),
|
||||
("qwen2.5-vl-7b-instruct", "Qwen2.5-VL-7B-Instruct"),
|
||||
("mistral-small-3.1-24b-instruct", "Mistral-Small-3.1-24B-Instruct-2503"),
|
||||
("gemma-3-27b-it", "gemma/gemma-3-27b-it"),
|
||||
pytest.param("mistral-small-3.1-24b-instruct",
|
||||
"Mistral-Small-3.1-24B-Instruct-2503",
|
||||
marks=pytest.mark.skip_less_device_memory(80000)),
|
||||
pytest.param("gemma-3-27b-it",
|
||||
"gemma/gemma-3-27b-it",
|
||||
marks=pytest.mark.skip_less_device_memory(80000)),
|
||||
])
|
||||
def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
|
||||
modality, use_cuda_graph):
|
||||
|
||||
@ -109,6 +109,8 @@ test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image-True]
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False]
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True]
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False]
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
|
||||
test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B]
|
||||
test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
|
||||
test_e2e.py::test_qwen_e2e_cpprunner_large_new_tokens[DeepSeek-R1-Distill-Qwen-1.5B-DeepSeek-R1-Distill-Qwen-1.5B]
|
||||
|
||||
@ -421,6 +421,7 @@ triton_server/test_triton_llm.py::test_llava_onevision[test_video-False-1---Fals
|
||||
triton_server/test_triton.py::test_cpp_unit_tests[cpp-unit-tests] SKIP (https://nvbugs/5401088)
|
||||
accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype SKIP (https://nvbugs/5401114)
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True] SKIP (https://nvbugs/5401114)
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False] SKIP (https://nvbgus/5401114)
|
||||
examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int4_awq-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (https://nvbugs/5401233)
|
||||
examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b] SKIP (https://nvbugs/5401233)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5401156)
|
||||
@ -440,3 +441,4 @@ unittest/trt/attention/test_gpt_attention.py -k "partition0" SKIP (https://nvbug
|
||||
unittest/trt/attention/test_gpt_attention.py -k "partition1" SKIP (https://nvbugs/5412456)
|
||||
unittest/trt/attention/test_gpt_attention.py -k "partition2" SKIP (https://nvbugs/5412456)
|
||||
unittest/trt/attention/test_gpt_attention.py -k "partition3" SKIP (https://nvbugs/5412456)
|
||||
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5414909)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user