From 1d26a3fd7cc137aec5b58fc440e3fcdf101aa9d6 Mon Sep 17 00:00:00 2001 From: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Date: Fri, 9 May 2025 14:51:55 +0800 Subject: [PATCH] test: skip tests on b200 (#3913) * skip tests on b200 Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> * skip phi-3-128k Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> --------- Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> --- tests/integration/defs/accuracy/test_cli_flow.py | 6 ++++++ tests/integration/defs/examples/test_gemma.py | 1 + tests/integration/defs/examples/test_llama.py | 1 + tests/integration/defs/examples/test_phi.py | 7 +++++-- tests/integration/defs/test_e2e.py | 7 ++++--- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/integration/defs/accuracy/test_cli_flow.py b/tests/integration/defs/accuracy/test_cli_flow.py index bcdfae95a7..563ac63d2f 100644 --- a/tests/integration/defs/accuracy/test_cli_flow.py +++ b/tests/integration/defs/accuracy/test_cli_flow.py @@ -428,14 +428,18 @@ class TestLlama2_7B(CliFlowAccuracyTestHarness): extra_build_args=["--gemm_plugin=fp8"]) @skip_pre_ada + @skip_post_blackwell def test_fp8_gemm_swiglu_plugin(self): + # gemm_swiglu_plugin=fp8 is not supported on SM 100. self.run( quant_algo=QuantAlgo.FP8, kv_cache_quant_algo=QuantAlgo.FP8, extra_build_args=["--gemm_plugin=fp8", "--gemm_swiglu_plugin=fp8"]) @skip_pre_ada + @skip_post_blackwell def test_fp8_low_latency_gemm_plugin(self): + # low_latency_gemm_plugin=fp8 is not supported on SM 100. self.run(quant_algo=QuantAlgo.FP8, kv_cache_quant_algo=QuantAlgo.FP8, extra_build_args=["--low_latency_gemm_plugin=fp8"]) @@ -635,6 +639,7 @@ class TestLlama3_1_8B(CliFlowAccuracyTestHarness): tp_size=4, extra_build_args=extra_build_args) + @skip_post_blackwell @skip_pre_ada def test_autoq(self): self.run(tasks=[CnnDailymail(self.MODEL_NAME), @@ -1064,6 +1069,7 @@ class TestGemma2_9BIt(CliFlowAccuracyTestHarness): MODEL_PATH = f"{llm_models_root()}/gemma/gemma-2-9b-it" EXAMPLE_FOLDER = "models/core/gemma" + @skip_post_blackwell def test_auto_dtype(self): self.run(tasks=[CnnDailymail(self.MODEL_NAME), MMLU(self.MODEL_NAME)], diff --git a/tests/integration/defs/examples/test_gemma.py b/tests/integration/defs/examples/test_gemma.py index e7be525236..5ae50e811c 100644 --- a/tests/integration/defs/examples/test_gemma.py +++ b/tests/integration/defs/examples/test_gemma.py @@ -517,6 +517,7 @@ def test_llm_gemma_1gpu_evaltool(gemma_model_root, llm_venv, cmodel_dir, @skip_pre_hopper +@skip_post_blackwell @pytest.mark.parametrize("gemma_model_root", ["gemma-2b", "gemma-7b", *GEMMA2_MODELS], indirect=True) diff --git a/tests/integration/defs/examples/test_llama.py b/tests/integration/defs/examples/test_llama.py index 487409775d..8b3ff6321a 100644 --- a/tests/integration/defs/examples/test_llama.py +++ b/tests/integration/defs/examples/test_llama.py @@ -4141,6 +4141,7 @@ def test_llm_llama_v3_2_smoothquant_1node_single_gpu( # TODO: remove skip after support fp8 rowwise gemm on B200 +@skip_post_blackwell @pytest.mark.skip_less_device_memory(80000) @pytest.mark.skip_less_device(4) @pytest.mark.parametrize("fp8_quant", diff --git a/tests/integration/defs/examples/test_phi.py b/tests/integration/defs/examples/test_phi.py index 94d0a9da0e..80b85083e6 100644 --- a/tests/integration/defs/examples/test_phi.py +++ b/tests/integration/defs/examples/test_phi.py @@ -510,8 +510,11 @@ def test_llm_phi_lora_1gpu(data_type, lora_data_type, phi_example_root, @pytest.mark.parametrize("data_type", ['float16', 'bfloat16']) @pytest.mark.parametrize("qformat", ['fp8']) @pytest.mark.parametrize("llm_phi_model_root", [ - "phi-2", "Phi-3-mini-128k-instruct", "Phi-3-small-128k-instruct", - "Phi-3.5-mini-instruct", "Phi-3.5-MoE-instruct", "Phi-4-mini-instruct" + pytest.param("phi-2", marks=skip_post_blackwell), + pytest.param("Phi-3-mini-128k-instruct", marks=skip_post_blackwell), + pytest.param("Phi-3-small-128k-instruct", marks=skip_post_blackwell), + pytest.param("Phi-3.5-mini-instruct", marks=skip_post_blackwell), + "Phi-3.5-MoE-instruct", "Phi-4-mini-instruct" ], indirect=True) def test_llm_phi_quantization_1gpu(data_type, llm_phi_model_root, llm_venv, diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 3524610a4f..7c54d38fab 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -30,9 +30,9 @@ from defs.trt_test_alternative import (check_call, check_call_negative_test, from .common import (PluginOptions, convert_weights, prune_checkpoint, quantize_data, refit_model, venv_check_call) -from .conftest import (llm_models_root, skip_nvlink_inactive, skip_pre_ada, - skip_pre_blackwell, skip_pre_hopper, tests_path, - unittest_path) +from .conftest import (llm_models_root, skip_nvlink_inactive, + skip_post_blackwell, skip_pre_ada, skip_pre_blackwell, + skip_pre_hopper, tests_path, unittest_path) sys.path.append(os.path.join(str(tests_path()), '/../examples/apps')) @@ -1310,6 +1310,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name, ]) +@skip_post_blackwell @pytest.mark.skip_less_device_memory(110000) @pytest.mark.skip_less_device(8) @pytest.mark.parametrize("model_name,model_path", [