From 1d26a3fd7cc137aec5b58fc440e3fcdf101aa9d6 Mon Sep 17 00:00:00 2001
From: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
Date: Fri, 9 May 2025 14:51:55 +0800
Subject: [PATCH] test: skip tests on b200 (#3913)

* skip tests on b200

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>

* skip phi-3-128k

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>

---------

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
---
 tests/integration/defs/accuracy/test_cli_flow.py | 6 ++++++
 tests/integration/defs/examples/test_gemma.py    | 1 +
 tests/integration/defs/examples/test_llama.py    | 1 +
 tests/integration/defs/examples/test_phi.py      | 7 +++++--
 tests/integration/defs/test_e2e.py               | 7 ++++---
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tests/integration/defs/accuracy/test_cli_flow.py b/tests/integration/defs/accuracy/test_cli_flow.py
index bcdfae95a7..563ac63d2f 100644
--- a/tests/integration/defs/accuracy/test_cli_flow.py
+++ b/tests/integration/defs/accuracy/test_cli_flow.py
@@ -428,14 +428,18 @@ class TestLlama2_7B(CliFlowAccuracyTestHarness):
                  extra_build_args=["--gemm_plugin=fp8"])
 
     @skip_pre_ada
+    @skip_post_blackwell
     def test_fp8_gemm_swiglu_plugin(self):
+        # gemm_swiglu_plugin=fp8 is not supported on SM 100.
         self.run(
             quant_algo=QuantAlgo.FP8,
             kv_cache_quant_algo=QuantAlgo.FP8,
             extra_build_args=["--gemm_plugin=fp8", "--gemm_swiglu_plugin=fp8"])
 
     @skip_pre_ada
+    @skip_post_blackwell
     def test_fp8_low_latency_gemm_plugin(self):
+        # low_latency_gemm_plugin=fp8 is not supported on SM 100.
         self.run(quant_algo=QuantAlgo.FP8,
                  kv_cache_quant_algo=QuantAlgo.FP8,
                  extra_build_args=["--low_latency_gemm_plugin=fp8"])
@@ -635,6 +639,7 @@ class TestLlama3_1_8B(CliFlowAccuracyTestHarness):
             tp_size=4,
             extra_build_args=extra_build_args)
 
+    @skip_post_blackwell
     @skip_pre_ada
     def test_autoq(self):
         self.run(tasks=[CnnDailymail(self.MODEL_NAME),
@@ -1064,6 +1069,7 @@ class TestGemma2_9BIt(CliFlowAccuracyTestHarness):
     MODEL_PATH = f"{llm_models_root()}/gemma/gemma-2-9b-it"
     EXAMPLE_FOLDER = "models/core/gemma"
 
+    @skip_post_blackwell
     def test_auto_dtype(self):
         self.run(tasks=[CnnDailymail(self.MODEL_NAME),
                         MMLU(self.MODEL_NAME)],
diff --git a/tests/integration/defs/examples/test_gemma.py b/tests/integration/defs/examples/test_gemma.py
index e7be525236..5ae50e811c 100644
--- a/tests/integration/defs/examples/test_gemma.py
+++ b/tests/integration/defs/examples/test_gemma.py
@@ -517,6 +517,7 @@ def test_llm_gemma_1gpu_evaltool(gemma_model_root, llm_venv, cmodel_dir,
 
 
 @skip_pre_hopper
+@skip_post_blackwell
 @pytest.mark.parametrize("gemma_model_root",
                          ["gemma-2b", "gemma-7b", *GEMMA2_MODELS],
                          indirect=True)
diff --git a/tests/integration/defs/examples/test_llama.py b/tests/integration/defs/examples/test_llama.py
index 487409775d..8b3ff6321a 100644
--- a/tests/integration/defs/examples/test_llama.py
+++ b/tests/integration/defs/examples/test_llama.py
@@ -4141,6 +4141,7 @@ def test_llm_llama_v3_2_smoothquant_1node_single_gpu(
 
 
 # TODO: remove skip after support fp8 rowwise gemm on B200
+@skip_post_blackwell
 @pytest.mark.skip_less_device_memory(80000)
 @pytest.mark.skip_less_device(4)
 @pytest.mark.parametrize("fp8_quant",
diff --git a/tests/integration/defs/examples/test_phi.py b/tests/integration/defs/examples/test_phi.py
index 94d0a9da0e..80b85083e6 100644
--- a/tests/integration/defs/examples/test_phi.py
+++ b/tests/integration/defs/examples/test_phi.py
@@ -510,8 +510,11 @@ def test_llm_phi_lora_1gpu(data_type, lora_data_type, phi_example_root,
 @pytest.mark.parametrize("data_type", ['float16', 'bfloat16'])
 @pytest.mark.parametrize("qformat", ['fp8'])
 @pytest.mark.parametrize("llm_phi_model_root", [
-    "phi-2", "Phi-3-mini-128k-instruct", "Phi-3-small-128k-instruct",
-    "Phi-3.5-mini-instruct", "Phi-3.5-MoE-instruct", "Phi-4-mini-instruct"
+    pytest.param("phi-2", marks=skip_post_blackwell),
+    pytest.param("Phi-3-mini-128k-instruct", marks=skip_post_blackwell),
+    pytest.param("Phi-3-small-128k-instruct", marks=skip_post_blackwell),
+    pytest.param("Phi-3.5-mini-instruct", marks=skip_post_blackwell),
+    "Phi-3.5-MoE-instruct", "Phi-4-mini-instruct"
 ],
                          indirect=True)
 def test_llm_phi_quantization_1gpu(data_type, llm_phi_model_root, llm_venv,
diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
index 3524610a4f..7c54d38fab 100644
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@@ -30,9 +30,9 @@ from defs.trt_test_alternative import (check_call, check_call_negative_test,
 
 from .common import (PluginOptions, convert_weights, prune_checkpoint,
                      quantize_data, refit_model, venv_check_call)
-from .conftest import (llm_models_root, skip_nvlink_inactive, skip_pre_ada,
-                       skip_pre_blackwell, skip_pre_hopper, tests_path,
-                       unittest_path)
+from .conftest import (llm_models_root, skip_nvlink_inactive,
+                       skip_post_blackwell, skip_pre_ada, skip_pre_blackwell,
+                       skip_pre_hopper, tests_path, unittest_path)
 
 sys.path.append(os.path.join(str(tests_path()), '/../examples/apps'))
 
@@ -1310,6 +1310,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name,
     ])
 
 
+@skip_post_blackwell
 @pytest.mark.skip_less_device_memory(110000)
 @pytest.mark.skip_less_device(8)
 @pytest.mark.parametrize("model_name,model_path", [