mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
test: skip tests on b200 (#3913)
* skip tests on b200 Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> * skip phi-3-128k Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> --------- Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
parent
77f8e43592
commit
1d26a3fd7c
@ -428,14 +428,18 @@ class TestLlama2_7B(CliFlowAccuracyTestHarness):
|
||||
extra_build_args=["--gemm_plugin=fp8"])
|
||||
|
||||
@skip_pre_ada
|
||||
@skip_post_blackwell
|
||||
def test_fp8_gemm_swiglu_plugin(self):
|
||||
# gemm_swiglu_plugin=fp8 is not supported on SM 100.
|
||||
self.run(
|
||||
quant_algo=QuantAlgo.FP8,
|
||||
kv_cache_quant_algo=QuantAlgo.FP8,
|
||||
extra_build_args=["--gemm_plugin=fp8", "--gemm_swiglu_plugin=fp8"])
|
||||
|
||||
@skip_pre_ada
|
||||
@skip_post_blackwell
|
||||
def test_fp8_low_latency_gemm_plugin(self):
|
||||
# low_latency_gemm_plugin=fp8 is not supported on SM 100.
|
||||
self.run(quant_algo=QuantAlgo.FP8,
|
||||
kv_cache_quant_algo=QuantAlgo.FP8,
|
||||
extra_build_args=["--low_latency_gemm_plugin=fp8"])
|
||||
@ -635,6 +639,7 @@ class TestLlama3_1_8B(CliFlowAccuracyTestHarness):
|
||||
tp_size=4,
|
||||
extra_build_args=extra_build_args)
|
||||
|
||||
@skip_post_blackwell
|
||||
@skip_pre_ada
|
||||
def test_autoq(self):
|
||||
self.run(tasks=[CnnDailymail(self.MODEL_NAME),
|
||||
@ -1064,6 +1069,7 @@ class TestGemma2_9BIt(CliFlowAccuracyTestHarness):
|
||||
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-2-9b-it"
|
||||
EXAMPLE_FOLDER = "models/core/gemma"
|
||||
|
||||
@skip_post_blackwell
|
||||
def test_auto_dtype(self):
|
||||
self.run(tasks=[CnnDailymail(self.MODEL_NAME),
|
||||
MMLU(self.MODEL_NAME)],
|
||||
|
||||
@ -517,6 +517,7 @@ def test_llm_gemma_1gpu_evaltool(gemma_model_root, llm_venv, cmodel_dir,
|
||||
|
||||
|
||||
@skip_pre_hopper
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("gemma_model_root",
|
||||
["gemma-2b", "gemma-7b", *GEMMA2_MODELS],
|
||||
indirect=True)
|
||||
|
||||
@ -4141,6 +4141,7 @@ def test_llm_llama_v3_2_smoothquant_1node_single_gpu(
|
||||
|
||||
|
||||
# TODO: remove skip after support fp8 rowwise gemm on B200
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@pytest.mark.skip_less_device(4)
|
||||
@pytest.mark.parametrize("fp8_quant",
|
||||
|
||||
@ -510,8 +510,11 @@ def test_llm_phi_lora_1gpu(data_type, lora_data_type, phi_example_root,
|
||||
@pytest.mark.parametrize("data_type", ['float16', 'bfloat16'])
|
||||
@pytest.mark.parametrize("qformat", ['fp8'])
|
||||
@pytest.mark.parametrize("llm_phi_model_root", [
|
||||
"phi-2", "Phi-3-mini-128k-instruct", "Phi-3-small-128k-instruct",
|
||||
"Phi-3.5-mini-instruct", "Phi-3.5-MoE-instruct", "Phi-4-mini-instruct"
|
||||
pytest.param("phi-2", marks=skip_post_blackwell),
|
||||
pytest.param("Phi-3-mini-128k-instruct", marks=skip_post_blackwell),
|
||||
pytest.param("Phi-3-small-128k-instruct", marks=skip_post_blackwell),
|
||||
pytest.param("Phi-3.5-mini-instruct", marks=skip_post_blackwell),
|
||||
"Phi-3.5-MoE-instruct", "Phi-4-mini-instruct"
|
||||
],
|
||||
indirect=True)
|
||||
def test_llm_phi_quantization_1gpu(data_type, llm_phi_model_root, llm_venv,
|
||||
|
||||
@ -30,9 +30,9 @@ from defs.trt_test_alternative import (check_call, check_call_negative_test,
|
||||
|
||||
from .common import (PluginOptions, convert_weights, prune_checkpoint,
|
||||
quantize_data, refit_model, venv_check_call)
|
||||
from .conftest import (llm_models_root, skip_nvlink_inactive, skip_pre_ada,
|
||||
skip_pre_blackwell, skip_pre_hopper, tests_path,
|
||||
unittest_path)
|
||||
from .conftest import (llm_models_root, skip_nvlink_inactive,
|
||||
skip_post_blackwell, skip_pre_ada, skip_pre_blackwell,
|
||||
skip_pre_hopper, tests_path, unittest_path)
|
||||
|
||||
sys.path.append(os.path.join(str(tests_path()), '/../examples/apps'))
|
||||
|
||||
@ -1310,6 +1310,7 @@ def test_ptp_quickstart_advanced_eagle3(llm_root, llm_venv, model_name,
|
||||
])
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(110000)
|
||||
@pytest.mark.skip_less_device(8)
|
||||
@pytest.mark.parametrize("model_name,model_path", [
|
||||
|
||||
Loading…
Reference in New Issue
Block a user