test: skip tests on b200 (#3913)

* skip tests on b200

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>

* skip phi-3-128k

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>

---------

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2025-05-09 14:51:55 +08:00 committed by GitHub
parent 77f8e43592
commit 1d26a3fd7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 17 additions and 5 deletions

View File

@ -428,14 +428,18 @@ class TestLlama2_7B(CliFlowAccuracyTestHarness):
extra_build_args=["--gemm_plugin=fp8"])
@skip_pre_ada
@skip_post_blackwell
def test_fp8_gemm_swiglu_plugin(self):
# gemm_swiglu_plugin=fp8 is not supported on SM 100.
self.run(
quant_algo=QuantAlgo.FP8,
kv_cache_quant_algo=QuantAlgo.FP8,
extra_build_args=["--gemm_plugin=fp8", "--gemm_swiglu_plugin=fp8"])
@skip_pre_ada
@skip_post_blackwell
def test_fp8_low_latency_gemm_plugin(self):
# low_latency_gemm_plugin=fp8 is not supported on SM 100.
self.run(quant_algo=QuantAlgo.FP8,
kv_cache_quant_algo=QuantAlgo.FP8,
extra_build_args=["--low_latency_gemm_plugin=fp8"])
@ -635,6 +639,7 @@ class TestLlama3_1_8B(CliFlowAccuracyTestHarness):
tp_size=4,
extra_build_args=extra_build_args)
@skip_post_blackwell
@skip_pre_ada
def test_autoq(self):
self.run(tasks=[CnnDailymail(self.MODEL_NAME),
@ -1064,6 +1069,7 @@ class TestGemma2_9BIt(CliFlowAccuracyTestHarness):
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-2-9b-it"
EXAMPLE_FOLDER = "models/core/gemma"
@skip_post_blackwell
def test_auto_dtype(self):
self.run(tasks=[CnnDailymail(self.MODEL_NAME),
MMLU(self.MODEL_NAME)],

View File

@ -517,6 +517,7 @@ def test_llm_gemma_1gpu_evaltool(gemma_model_root, llm_venv, cmodel_dir,
@skip_pre_hopper
@skip_post_blackwell
@pytest.mark.parametrize("gemma_model_root",
["gemma-2b", "gemma-7b", *GEMMA2_MODELS],
indirect=True)

View File

@ -4141,6 +4141,7 @@ def test_llm_llama_v3_2_smoothquant_1node_single_gpu(
# TODO: remove skip after support fp8 rowwise gemm on B200
@skip_post_blackwell
@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.skip_less_device(4)
@pytest.mark.parametrize("fp8_quant",

View File

@ -510,8 +510,11 @@ def test_llm_phi_lora_1gpu(data_type, lora_data_type, phi_example_root,
@pytest.mark.parametrize("data_type", ['float16', 'bfloat16'])
@pytest.mark.parametrize("qformat", ['fp8'])
@pytest.mark.parametrize("llm_phi_model_root", [
"phi-2", "Phi-3-mini-128k-instruct", "Phi-3-small-128k-instruct",
"Phi-3.5-mini-instruct", "Phi-3.5-MoE-instruct", "Phi-4-mini-instruct"
pytest.param("phi-2", marks=skip_post_blackwell),
pytest.param("Phi-3-mini-128k-instruct", marks=skip_post_blackwell),
pytest.param("Phi-3-small-128k-instruct", marks=skip_post_blackwell),
pytest.param("Phi-3.5-mini-instruct", marks=skip_post_blackwell),
"Phi-3.5-MoE-instruct", "Phi-4-mini-instruct"
],
indirect=True)
def test_llm_phi_quantization_1gpu(data_type, llm_phi_model_root, llm_venv,

View File

@ -30,9 +30,9 @@ from defs.trt_test_alternative import (check_call, check_call_negative_test,
from .common import (PluginOptions, convert_weights, prune_checkpoint,
quantize_data, refit_model, venv_check_call)
from .conftest import (llm_models_root, skip_nvlink_inactive, skip_pre_ada,
skip_pre_blackwell, skip_pre_hopper, tests_path,
unittest_path)
from .conftest import (llm_models_root, skip_nvlink_inactive,
skip_post_blackwell, skip_pre_ada, skip_pre_blackwell,
skip_pre_hopper, tests_path, unittest_path)
sys.path.append(os.path.join(str(tests_path()), '/../examples/apps'))
@ -1310,6 +1310,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name,
])
@skip_post_blackwell
@pytest.mark.skip_less_device_memory(110000)
@pytest.mark.skip_less_device(8)
@pytest.mark.parametrize("model_name,model_path", [