mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
test: skip post blackwell (#6357)
Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
parent
5247df6ae2
commit
263c6c0ad0
@ -50,11 +50,13 @@ class TestGpt2(CliFlowAccuracyTestHarness):
|
||||
def test_context_fmha_fp32_acc(self):
|
||||
self.run(extra_summarize_args=["--enable_context_fmha_fp32_acc"])
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("precision", ["int8", "int4"])
|
||||
def test_weight_only(self, precision: str):
|
||||
quant_algo = QuantAlgo.W8A16 if precision == "int8" else QuantAlgo.W4A16
|
||||
self.run(quant_algo=quant_algo)
|
||||
|
||||
@skip_post_blackwell
|
||||
def test_int8_kv_cache(self):
|
||||
self.run(kv_cache_quant_algo=QuantAlgo.INT8)
|
||||
|
||||
@ -415,6 +417,7 @@ class TestVicuna7B(CliFlowAccuracyTestHarness):
|
||||
EAGLE_MODEL_NAME = "yuhuili/EAGLE-Vicuna-7B-v1.3"
|
||||
EAGLE_MODEL_PATH = f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3"
|
||||
|
||||
@skip_post_blackwell
|
||||
def test_lookahead(self, mocker):
|
||||
mocker.patch.object(CnnDailymail, "MAX_BATCH_SIZE", 8)
|
||||
|
||||
@ -425,6 +428,7 @@ class TestVicuna7B(CliFlowAccuracyTestHarness):
|
||||
],
|
||||
extra_summarize_args=["--lookahead_config=[7,7,7]"])
|
||||
|
||||
@skip_post_blackwell
|
||||
@parametrize_with_ids("cuda_graph", [False, True])
|
||||
def test_medusa(self, cuda_graph, mocker):
|
||||
mocker.patch.object(self.__class__, "EXAMPLE_FOLDER", "medusa")
|
||||
@ -1104,6 +1108,7 @@ class TestMixtral8x7B(CliFlowAccuracyTestHarness):
|
||||
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@skip_post_blackwell
|
||||
def test_weight_only_int4_tp2(self):
|
||||
self.run(quant_algo=QuantAlgo.W4A16,
|
||||
tp_size=2,
|
||||
@ -1111,6 +1116,7 @@ class TestMixtral8x7B(CliFlowAccuracyTestHarness):
|
||||
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@skip_post_blackwell
|
||||
def test_weight_only_int8_tp2(self):
|
||||
self.run(quant_algo=QuantAlgo.W8A16,
|
||||
tp_size=2,
|
||||
|
||||
@ -1892,6 +1892,10 @@ skip_post_blackwell = pytest.mark.skipif(
|
||||
get_sm_version() >= 100,
|
||||
reason="This test is not supported in post-Blackwell architecture")
|
||||
|
||||
skip_post_blackwell_ultra = pytest.mark.skipif(
|
||||
get_sm_version() >= 103,
|
||||
reason="This test is not supported in post-Blackwell-Ultra architecture")
|
||||
|
||||
skip_device_contain_gb200 = pytest.mark.skipif(
|
||||
check_device_contain(["GB200"]),
|
||||
reason="This test is not supported on GB200 or GB100")
|
||||
|
||||
@ -18,6 +18,12 @@ from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
|
||||
from defs.conftest import get_device_count, get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
# # Build parameters
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@ -18,9 +18,15 @@ import os
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
|
||||
from defs.conftest import get_device_count
|
||||
from defs.conftest import get_device_count, get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def bindings_example_root(llm_root):
|
||||
|
||||
@ -18,12 +18,20 @@ import shutil
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call, exists
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
# TODO: add more test case for input_padding, paged_kv_cache, num_beams
|
||||
@pytest.mark.skip_less_device_memory(24000)
|
||||
@pytest.mark.parametrize("use_weight_only", [True, False],
|
||||
@pytest.mark.parametrize("use_weight_only",
|
||||
[pytest.param(True, marks=skip_post_blackwell), False],
|
||||
ids=["enable_weight_only", "disable_weight_only"])
|
||||
@pytest.mark.parametrize("llm_glm_4_9b_model_root",
|
||||
["glm-4-9b", "glm-4-9b-chat"],
|
||||
|
||||
@ -18,11 +18,19 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, generate_summary_cmd, venv_check_call,
|
||||
venv_mpi_check_call)
|
||||
from defs.conftest import get_gpu_device_list
|
||||
from defs.conftest import (get_gpu_device_list, get_sm_version,
|
||||
skip_post_blackwell)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("use_weight_only", [True, False],
|
||||
ids=["enable_weight_only", "disable_weight_only"])
|
||||
def test_llm_commandr_v01_single_gpu_summary(commandr_example_root,
|
||||
@ -79,7 +87,8 @@ def test_llm_commandr_v01_single_gpu_summary(commandr_example_root,
|
||||
@pytest.mark.skip_less_device(4)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@pytest.mark.skip_less_host_memory(1000000)
|
||||
@pytest.mark.parametrize("use_weight_only", [True, False],
|
||||
@pytest.mark.parametrize("use_weight_only",
|
||||
[pytest.param(True, marks=skip_post_blackwell), False],
|
||||
ids=["enable_weight_only", "disable_weight_only"])
|
||||
def test_llm_commandr_plus_4gpus_summary(commandr_example_root,
|
||||
llm_commandr_plus_model_root,
|
||||
|
||||
@ -19,10 +19,16 @@ from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
|
||||
from defs.conftest import (get_device_memory, llm_models_root,
|
||||
from defs.conftest import (get_device_memory, get_sm_version, llm_models_root,
|
||||
skip_post_blackwell, skip_pre_hopper)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
# TODO: remove skip after enable Blackwell for Speculative Decoding
|
||||
@skip_post_blackwell
|
||||
|
||||
@ -18,9 +18,15 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, get_dummy_spec_decoding_heads,
|
||||
venv_check_call)
|
||||
from defs.conftest import skip_post_blackwell, skip_pre_ada
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell, skip_pre_ada
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("use_dynamic_tree", [False, True],
|
||||
|
||||
@ -16,10 +16,16 @@
|
||||
import pytest
|
||||
from defs.common import (convert_weights, quantize_data, venv_check_call,
|
||||
venv_mpi_check_call)
|
||||
from defs.conftest import (get_device_count, skip_fp8_pre_ada,
|
||||
from defs.conftest import (get_device_count, get_sm_version, skip_fp8_pre_ada,
|
||||
skip_post_blackwell)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_fp8", [True, False],
|
||||
ids=["enable_fp8", "disable_fp8"])
|
||||
@ -38,8 +44,11 @@ from defs.trt_test_alternative import check_call
|
||||
ids=["enable_gemm_plugin", "disable_gemm_plugin"])
|
||||
@pytest.mark.parametrize("data_type", ['bfloat16', 'float16', 'float32'])
|
||||
@pytest.mark.parametrize("enc_dec_model_root", [
|
||||
't5-small', 'flan-t5-small', 'byt5-small', 'bart-large-cnn',
|
||||
'mbart-large-50-many-to-one-mmt', 'wmt14'
|
||||
pytest.param('t5-small', marks=skip_post_blackwell),
|
||||
pytest.param('flan-t5-small', marks=skip_post_blackwell),
|
||||
pytest.param('byt5-small', marks=skip_post_blackwell), 'bart-large-cnn',
|
||||
pytest.param('mbart-large-50-many-to-one-mmt', marks=skip_post_blackwell),
|
||||
'wmt14'
|
||||
],
|
||||
indirect=True)
|
||||
@pytest.mark.parametrize("compare_hf_fp32", [True, False],
|
||||
|
||||
@ -17,10 +17,17 @@
|
||||
import pytest
|
||||
from defs.common import (convert_weights, generate_summary_cmd, venv_check_call,
|
||||
venv_mpi_check_call)
|
||||
from defs.conftest import skip_post_blackwell
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("num_beams", [1, 2, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@pytest.mark.parametrize("data_type", ['bfloat16', 'float16'])
|
||||
|
||||
@ -24,9 +24,16 @@ from defs.common import (convert_weights, generate_summary_cmd, parse_mpi_cmd,
|
||||
similarity_score, test_multi_lora_support,
|
||||
venv_check_call, venv_check_output,
|
||||
venv_mpi_check_call, venv_mpi_check_output)
|
||||
from defs.conftest import get_device_memory, skip_fp8_pre_ada, skip_pre_ada
|
||||
from defs.conftest import (get_device_memory, get_sm_version, skip_fp8_pre_ada,
|
||||
skip_post_blackwell, skip_pre_ada)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
INPUT_TEXT_1 = "After Washington had returned to Williamsburg, " + \
|
||||
"Dinwiddie ordered him to lead a larger force to assist Trent in his work. " + \
|
||||
"While en route, Washington learned of Trent's retreat. " + \
|
||||
@ -688,6 +695,7 @@ def test_llm_gpt3_175b_1node_8gpus(gpt_example_root, llm_venv, engine_dir,
|
||||
])
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("per_token_channel", [True, False],
|
||||
ids=["enable_ptpc", "disable_ptpc"])
|
||||
def test_llm_gpt2_smooth_single_gpu_summary(gpt_example_root, llm_venv,
|
||||
@ -732,6 +740,7 @@ def test_llm_gpt2_smooth_single_gpu_summary(gpt_example_root, llm_venv,
|
||||
])
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
def test_llm_gpt2_int8_kv_1gpu(gpt_example_root, llm_venv, llm_gpt2_model_root,
|
||||
llm_datasets_root, engine_dir, cmodel_dir):
|
||||
"gpt2 INT8 KV Cache test on 1 gpu"
|
||||
@ -1360,6 +1369,7 @@ def test_llm_gpt2_starcoder_1node_4gpus(gpt_example_root,
|
||||
summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_host_memory(250000)
|
||||
def test_llm_gpt2_starcoder_1gpus(gpt_example_root,
|
||||
llm_gpt2_starcoder_model_root, llm_venv,
|
||||
@ -1401,6 +1411,7 @@ def test_llm_gpt2_starcoder_1gpus(gpt_example_root,
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_host_memory(250000)
|
||||
@pytest.mark.parametrize("dtype", ["float16"])
|
||||
@pytest.mark.parametrize("precision", ["int8", "int4"])
|
||||
@ -1710,6 +1721,7 @@ def test_llm_gpt2_multi_lora_1gpu(gpt_example_root, llm_venv,
|
||||
for item in expected_output[idx]]), f"output is {output}"
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(50000)
|
||||
@pytest.mark.parametrize("data_type", ['float16', 'fp8'],
|
||||
ids=['base_fp16', 'base_fp8'])
|
||||
|
||||
@ -15,9 +15,15 @@
|
||||
|
||||
import pytest
|
||||
from defs.common import venv_check_call
|
||||
from defs.conftest import get_gpu_device_list
|
||||
from defs.conftest import get_gpu_device_list, get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
INPUT_TEXT = """
|
||||
Write a Python function `find_max(words)` to solve the following problem:\nWrite a function that accepts a list of strings.\nThe list contains different words. Return the word with maximum number\nof unique characters. If multiple strings have maximum number of unique\ncharacters, return the one which comes first in lexicographical order.\nfind_max(["name", "of", "string"]) == "string"\nfind_max(["name", "enam", "game"]) == "enam"\nfind_max(["aaaaaaa", "bb" ,"cc"]) == ""aaaaaaa"
|
||||
"""
|
||||
|
||||
@ -19,8 +19,15 @@ import time
|
||||
import pytest
|
||||
from defs.common import (convert_weights, test_multi_lora_support,
|
||||
venv_mpi_check_call)
|
||||
from defs.conftest import get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def disable_unified_converter():
|
||||
|
||||
@ -14,9 +14,15 @@
|
||||
# limitations under the License.
|
||||
import pytest
|
||||
from defs.common import convert_weights, parse_mpi_cmd, venv_mpi_check_call
|
||||
from defs.conftest import get_device_memory
|
||||
from defs.conftest import get_device_memory, get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
# @pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.parametrize("num_beams", [1, 2, 4],
|
||||
|
||||
@ -36,6 +36,12 @@ from defs.conftest import (get_device_count, get_device_memory,
|
||||
# yapf: enable
|
||||
from defs.trt_test_alternative import check_call, exists
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
# if get_sm_version() >= 103:
|
||||
# pytest.skip(
|
||||
# "TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
# allow_module_level=True)
|
||||
|
||||
INPUT_TEXT_1 = "After Washington had returned to Williamsburg, " + \
|
||||
"Dinwiddie ordered him to lead a larger force to assist Trent in his work. " + \
|
||||
"While en route, Washington learned of Trent's retreat. " + \
|
||||
@ -688,6 +694,7 @@ def test_llm_llama_v2_1gpu_sparsity(llama_example_root, llama_model_root,
|
||||
])
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("num_beams", [1],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@pytest.mark.parametrize("data_type", ['bfloat16', 'float16'])
|
||||
@ -886,6 +893,7 @@ def test_llm_llama_v2_gather_logits_2gpu_pp2(llama_example_root,
|
||||
summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("llama_model_root", ['llama-v2-7b-hf'], indirect=True)
|
||||
def test_llm_llama_v2_1gpu_auto_parallel(llama_example_root, llama_model_root,
|
||||
llm_venv, cmodel_dir, engine_dir):
|
||||
@ -911,6 +919,7 @@ def test_llm_llama_v2_1gpu_auto_parallel(llama_example_root, llama_model_root,
|
||||
check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -1622,6 +1631,7 @@ def test_llm_llama_v2_1gpu_fp8_gemv(llama_example_root, llama_model_root,
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(50000)
|
||||
@pytest.mark.parametrize("data_type", ['bfloat16', 'float16'])
|
||||
@pytest.mark.parametrize("gemm_swiglu_plugin", ["fp8"])
|
||||
@ -1697,7 +1707,12 @@ def test_llm_llama_v2_1gpu_gemm_swiglu(llama_example_root, llama_model_root,
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_type", ['float16', 'fp8', 'sq_ootb', 'awq', 'int8_wo'],
|
||||
"data_type", [
|
||||
'float16', 'fp8',
|
||||
pytest.param('sq_ootb', marks=skip_post_blackwell),
|
||||
pytest.param('awq', marks=skip_post_blackwell),
|
||||
pytest.param('int8_wo', marks=skip_post_blackwell)
|
||||
],
|
||||
ids=['base_fp16', 'base_fp8', 'base_sq_ootb', 'base_awq', 'base_int8_wo'])
|
||||
@pytest.mark.parametrize("lora_data_type", ['float16'], ids=['lora_fp16'])
|
||||
@pytest.mark.parametrize("llama_model_root", ['llama-v2-13b-hf'], indirect=True)
|
||||
@ -2280,6 +2295,7 @@ def test_llm_llama_code_llama_multi_gpus_summary(llama_example_root,
|
||||
summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(30000)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -2336,6 +2352,7 @@ def test_llm_llama_smooth_quant_1gpu_summary(llama_example_root,
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(30000)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -2385,6 +2402,7 @@ def test_llm_llama_int8_kv_1gpu_summary(llama_example_root, llama_model_root,
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(30000)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -2429,6 +2447,7 @@ def test_llm_llama_int8_sq_ootb_1gpu_summary(
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.parametrize("num_beams", [1],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -2488,6 +2507,7 @@ def test_llm_llama_v2_int8sq_2gpu_tp2(data_type, llama_example_root,
|
||||
summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(30000)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -2543,6 +2563,7 @@ def test_llm_llama_wo_1gpu_summary(llama_example_root, llama_model_root,
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(30000)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@ -2872,7 +2893,9 @@ def test_llm_llama_v2_lora_benchmark_2gpu(llama_example_root, llama_model_root,
|
||||
@pytest.mark.skip_less_device(4)
|
||||
@pytest.mark.parametrize("num_beams", [1, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
@pytest.mark.parametrize("qformat", ["fp8", "int4_awq"])
|
||||
@pytest.mark.parametrize(
|
||||
"qformat",
|
||||
["fp8", pytest.param("int4_awq", marks=skip_post_blackwell)])
|
||||
@pytest.mark.parametrize(
|
||||
"tp_pp_size", [(4, 1), (2, 2)],
|
||||
ids=lambda tp_pp_size: f'tp{tp_pp_size[0]}pp{tp_pp_size[1]}')
|
||||
@ -3268,8 +3291,11 @@ def test_llm_llama_1gpu_streaming_llm(llama_example_root, deepseek_model_root,
|
||||
assert "上海人工智能实验室" in output, output
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fp8_quant", ['disable_fp8', 'enable_fp8', 'enable_fp8_meta_recipe'])
|
||||
@pytest.mark.parametrize("fp8_quant", [
|
||||
'disable_fp8',
|
||||
pytest.param('enable_fp8', marks=skip_post_blackwell),
|
||||
pytest.param('enable_fp8_meta_recipe', marks=skip_post_blackwell)
|
||||
])
|
||||
@pytest.mark.parametrize("llama_model_root", ['llama-3.1-8b', 'llama-3.2-1b'],
|
||||
indirect=True)
|
||||
def test_llm_llama_v3_1_1node_single_gpu(llama_example_root, llama_model_root,
|
||||
@ -3581,6 +3607,7 @@ def test_llm_llama_v3_1_2nodes_8gpus(test_type, llama_example_root,
|
||||
check_call(" ".join(mmlu_cmd), shell=True, env=llm_venv._new_env)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(50000)
|
||||
@pytest.mark.parametrize("low_latency_gemm_plugin", ["fp8"])
|
||||
@pytest.mark.parametrize("llama_model_root", ['llama-v2-7b-hf'], indirect=True)
|
||||
@ -3813,6 +3840,7 @@ def test_llm_llama_v2_fp8_2gpu_cp2(data_type, llama_example_root,
|
||||
|
||||
|
||||
@skip_pre_ada
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("llama_model_root", ['llama-3.1-8b', 'llama-3.2-1b'],
|
||||
indirect=True)
|
||||
def test_llm_llama_lookahead_xqa_fp8_1gpu(llama_example_root, llama_model_root,
|
||||
@ -4014,6 +4042,7 @@ def test_mistral_nemo_fp8_with_bf16_lora(
|
||||
)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("llama_model_root", ['llama-3.1-8b'], indirect=True)
|
||||
def test_llm_llama_lookahead_single_gpu_summary(llama_example_root,
|
||||
llama_model_root, llm_venv,
|
||||
|
||||
@ -18,16 +18,24 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, generate_summary_cmd, venv_check_call,
|
||||
venv_mpi_check_call)
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("gemm_plugin", [True, False],
|
||||
ids=["enable_gemm_plugin", "disable_gemm_plugin"])
|
||||
@pytest.mark.parametrize("dtype", ['bfloat16', 'float16'])
|
||||
@pytest.mark.parametrize("mamba_model_root", [
|
||||
'mamba-130m', 'mamba-2.8b', 'mamba-1.4b', 'mamba-790m', 'mamba-370m',
|
||||
'mamba2-130m', 'mamba2-2.7b', 'mamba2-1.3b', 'mamba2-780m', 'mamba2-370m',
|
||||
'mamba-codestral-7B-v0.1'
|
||||
pytest.param('mamba-130m', marks=skip_post_blackwell), 'mamba-2.8b',
|
||||
'mamba-1.4b', 'mamba-790m', 'mamba-370m', 'mamba2-130m', 'mamba2-2.7b',
|
||||
'mamba2-1.3b', 'mamba2-780m', 'mamba2-370m',
|
||||
pytest.param('mamba-codestral-7B-v0.1', marks=skip_post_blackwell)
|
||||
],
|
||||
indirect=True)
|
||||
def test_llm_mamba_1gpu(mamba_example_root, mamba_model_root,
|
||||
|
||||
@ -18,10 +18,17 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, get_dummy_spec_decoding_heads,
|
||||
venv_check_call)
|
||||
from defs.conftest import skip_fp8_pre_ada
|
||||
from defs.conftest import get_sm_version, skip_fp8_pre_ada, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("batch_size", [1, 8], ids=['bs1', 'bs8'])
|
||||
@pytest.mark.parametrize("data_type", ['bfloat16'])
|
||||
@pytest.mark.parametrize("num_medusa_heads", [4], ids=['4-heads'])
|
||||
@ -79,6 +86,7 @@ def test_llm_medusa_1gpu(batch_size, data_type, medusa_model_roots,
|
||||
venv_check_call(llm_venv, summary_cmd)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("batch_size", [1, 8], ids=['bs1', 'bs8'])
|
||||
@pytest.mark.parametrize("data_type", ['bfloat16', 'float16'])
|
||||
@pytest.mark.parametrize("num_medusa_heads", [4], ids=['4-heads'])
|
||||
|
||||
@ -20,9 +20,15 @@ import psutil
|
||||
import pytest
|
||||
from defs.common import (convert_weights, quantize_data,
|
||||
test_multi_lora_support, venv_check_call)
|
||||
from defs.conftest import skip_post_blackwell, skip_pre_ada
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell, skip_pre_ada
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
def get_optimal_jobs():
|
||||
cpu_count = multiprocessing.cpu_count()
|
||||
|
||||
@ -19,9 +19,16 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, generate_summary_cmd, quantize_data,
|
||||
venv_check_call, venv_mpi_check_call)
|
||||
from defs.conftest import llm_models_root, skip_post_blackwell, skip_pre_ada
|
||||
from defs.conftest import (get_sm_version, llm_models_root, skip_post_blackwell,
|
||||
skip_pre_ada)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("model_name", ['mixtral-8x7b-v0.1-AWQ'])
|
||||
|
||||
@ -18,9 +18,16 @@ import os
|
||||
import pytest
|
||||
import torch
|
||||
from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
|
||||
from defs.conftest import get_device_memory, skip_post_blackwell, skip_pre_ada
|
||||
from defs.conftest import (get_device_memory, get_sm_version,
|
||||
skip_post_blackwell, skip_pre_ada)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def multimodal_example_root(llm_root):
|
||||
@ -623,19 +630,19 @@ def _test_llm_multimodal_general(llm_venv,
|
||||
reason="Skip due to low memory")),
|
||||
'llava-onevision-qwen2-7b-ov-hf',
|
||||
'llava-onevision-qwen2-7b-ov-hf-video',
|
||||
'nougat-base',
|
||||
pytest.param('nougat-base', marks=skip_post_blackwell),
|
||||
'VILA1.5-3b',
|
||||
'cogvlm-chat',
|
||||
'fuyu-8b',
|
||||
'deplot',
|
||||
pytest.param('deplot', marks=skip_post_blackwell),
|
||||
pytest.param('neva-22b',
|
||||
marks=pytest.mark.skip(reason="RCCA https://nvbugs/5220761")),
|
||||
'kosmos-2',
|
||||
'video-neva',
|
||||
pytest.param('video-neva', marks=skip_post_blackwell),
|
||||
pytest.param('Phi-3-vision-128k-instruct', marks=skip_post_blackwell),
|
||||
pytest.param('Phi-3.5-vision-instruct', marks=skip_post_blackwell),
|
||||
pytest.param('Phi-4-multimodal-instruct', marks=skip_post_blackwell),
|
||||
'Llama-3.2-11B-Vision',
|
||||
pytest.param('Llama-3.2-11B-Vision', marks=skip_post_blackwell),
|
||||
'Qwen2-VL-7B-Instruct',
|
||||
'internlm-xcomposer2-vl-7b',
|
||||
'Mistral-Small-3.1-24B-Instruct-2503',
|
||||
@ -688,8 +695,8 @@ def test_llm_multimodal_general(llm_venv, llm_root, llm_datasets_root,
|
||||
'Phi-3-vision-128k-instruct',
|
||||
'Phi-3.5-vision-instruct',
|
||||
'Phi-4-multimodal-instruct',
|
||||
'Llama-3.2-11B-Vision-Instruct',
|
||||
'Llama-3.2-11B-Vision',
|
||||
pytest.param('Llama-3.2-11B-Vision-Instruct', marks=skip_post_blackwell),
|
||||
pytest.param('Llama-3.2-11B-Vision', marks=skip_post_blackwell),
|
||||
'Qwen2-VL-7B-Instruct',
|
||||
],
|
||||
indirect=True)
|
||||
|
||||
@ -15,9 +15,15 @@
|
||||
|
||||
import pytest
|
||||
from defs.common import venv_check_call, venv_mpi_check_call
|
||||
from defs.conftest import skip_fp8_pre_ada
|
||||
from defs.conftest import get_sm_version, skip_fp8_pre_ada
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.mark.skip_less_device_memory(50000)
|
||||
@pytest.mark.parametrize("qformat", ["full_prec", "fp8", "int4_awq"])
|
||||
|
||||
@ -2,9 +2,15 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
|
||||
from defs.conftest import get_device_memory
|
||||
from defs.conftest import get_device_memory, get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
ROUGE1_ACCURACY_THRESHOLD = 20
|
||||
|
||||
|
||||
|
||||
@ -18,9 +18,15 @@ from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call
|
||||
from defs.conftest import skip_post_blackwell
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
# TODO: remove skip after support NGram on B200
|
||||
@skip_post_blackwell
|
||||
|
||||
@ -23,6 +23,12 @@ from defs.conftest import (get_device_memory, get_sm_version, skip_fp8_pre_ada,
|
||||
skip_post_blackwell, skip_pre_ada)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def phi_example_root(llm_root, llm_venv):
|
||||
@ -36,6 +42,7 @@ def phi_example_root(llm_root, llm_venv):
|
||||
return example_root
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device_memory(40000)
|
||||
@pytest.mark.parametrize("num_beams", [1, 2, 4],
|
||||
ids=lambda num_beams: f'nb:{num_beams}')
|
||||
|
||||
@ -20,10 +20,16 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, test_multi_lora_support,
|
||||
venv_check_call, venv_mpi_check_call)
|
||||
from defs.conftest import (get_device_count, get_device_memory,
|
||||
from defs.conftest import (get_device_count, get_device_memory, get_sm_version,
|
||||
skip_post_blackwell, skip_pre_ada)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"context_fmha_type",
|
||||
|
||||
@ -20,8 +20,15 @@ import re
|
||||
|
||||
import pytest
|
||||
from defs.common import venv_check_call, venv_check_output
|
||||
from defs.conftest import get_sm_version
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def qwenvl_example_root(llm_root, llm_venv):
|
||||
|
||||
@ -19,10 +19,17 @@ from pathlib import Path
|
||||
import pytest
|
||||
from defs.common import (convert_weights, generate_summary_cmd, quantize_data,
|
||||
venv_check_call, venv_mpi_check_call)
|
||||
from defs.conftest import skip_fp8_pre_ada
|
||||
from defs.conftest import get_sm_version, skip_fp8_pre_ada, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("gemm_plugin", [True, False],
|
||||
ids=["enable_gemm_plugin", "disable_gemm_plugin"])
|
||||
@pytest.mark.parametrize("gpt_attention_plugin", [True, False],
|
||||
|
||||
@ -15,9 +15,17 @@
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("batch_size", [8], ids=['bs8'])
|
||||
@pytest.mark.parametrize("redrafter_num_beams", [5, 8], ids=['nb5', 'nb8'])
|
||||
@pytest.mark.parametrize("redrafter_draft_len_per_beam", [5], ids=['dl5'])
|
||||
|
||||
@ -15,9 +15,15 @@
|
||||
|
||||
import pytest
|
||||
from defs.common import convert_weights, venv_check_call
|
||||
from defs.conftest import skip_post_blackwell
|
||||
from defs.conftest import get_sm_version, skip_post_blackwell
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
# skip trt flow cases on post-Blackwell-Ultra
|
||||
if get_sm_version() >= 103:
|
||||
pytest.skip(
|
||||
"TRT workflow tests are not supported on post Blackwell-Ultra architecture",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.parametrize("use_cpp_runtime", [True, False],
|
||||
|
||||
@ -45,193 +45,41 @@ examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (https
|
||||
examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long] SKIP (https://nvbugs/5321371)
|
||||
test_e2e.py::test_openai_chat_structural_tag_example SKIP (https://nvbugspro.nvidia.com/bug/5375594)
|
||||
cpp/test_e2e.py::test_model[fp8-chatglm-90] SKIP (https://nvbugs/5034830)
|
||||
full:B200_PCIe/examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-disable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v2_1gpu_gemm_swiglu[llama-v2-7b-hf-fp8-float16] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_int8_kv_1gpu_summary[llama-7b-enable_weight_only-nb:4] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_int8_sq_ootb_1gpu_summary[llama-7b-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_wo_1gpu_summary[llama-7b-int4-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_int8_kv_awq_1gpu_summary[llama-7b-nb:4] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v2_1gpu_low_latency_gemm[llama-v2-7b-hf-fp8] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200_PCIe/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200_PCIe/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200_PCIe/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int8] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int4] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestGpt2::test_smooth_quant[per_token=False-per_channel=False] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestGpt2::test_smooth_quant[per_token=True-per_channel=True] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_exaone.py::test_llm_exaone_1gpu[enable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_gpt.py::test_llm_gpt_starcoder_lora_1gpu[peft-lora-starcoder2-15b-unity-copilot-starcoder2-lora_fp16-base_fp16] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_gpt.py::test_llm_gpt_starcoder_lora_1gpu[peft-lora-starcoder2-15b-unity-copilot-starcoder2-lora_fp16-base_fp8] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_awq] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_int8_wo] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-mini-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-small-8k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3.5-mini-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/functional SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/quantization SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=False] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=True] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestVicuna7B::test_lookahead SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/attention/test_bert_attention.py SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/model/test_mamba.py SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/bindings SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/quantization/test_weight_only_quant_matmul.py SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoder2-int8-float16] SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/trt/model/test_gpt.py -k "partition0" SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/unittest/test_model_runner_cpp.py SKIP (Disable for Blackwell)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_smooth_quant_1gpu_summary[float16-llama-7b-enable_ptpc-nb:4] SKIP (Disable for Blackwell for SQ)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_wo_1gpu_summary[llama-7b-int8-nb:1] SKIP (Disable for Blackwell for WO)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v3_int8_gptq_1gpu_summary[llama-v3-8b-instruct-hf-float16-nb:1] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200_PCIe/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[enable_weight_only] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v3_1_quantization_1gpu_manage_weights[llama-3.1-8b-int4_wo] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v3_1_autoq_1gpu_mmlu[llama-3.1-8b] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[nougat-base-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=False-chunked_context=False-typical_acceptance=False] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=False] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=True-typical_acceptance=False] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200_PCIe/examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200_PCIe/unittest/llmapi/test_llm_models.py -m "part0" SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 80/96)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 96)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 96)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-enable_fp8] SKIP (Disable for Blackwell for fp8 rowwise gemm)
|
||||
full:B200_PCIe/examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-enable_fp8_meta_recipe] SKIP (Disable for Blackwell for fp8 rowwise gemm)
|
||||
full:B200_PCIe/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (megatron-core 0.8 is not supported in python 3.12)
|
||||
full:B200_PCIe/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (megatron-core 0.8 is not supported in python 3.12)
|
||||
full:B200_PCIe/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM)
|
||||
full:B200_PCIe/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (Disable for Blackwell OOM)
|
||||
full:B200_PCIe/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_1gpu_auto_parallel[llama-v2-7b-hf] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-disable_gemm_plugin] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_1gpu_gemm_swiglu[llama-v2-7b-hf-fp8-float16] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_int8_kv_1gpu_summary[llama-7b-enable_weight_only-nb:4] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_int8_sq_ootb_1gpu_summary[llama-7b-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_wo_1gpu_summary[llama-7b-int4-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_int8_kv_awq_1gpu_summary[llama-7b-nb:4] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_1gpu_low_latency_gemm[llama-v2-7b-hf-fp8] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200/examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8] SKIP (Disable for Blackwell spec decoding)
|
||||
full:B200/accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int8] SKIP (Disable for Blackwell)
|
||||
full:B200/accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int4] SKIP (Disable for Blackwell)
|
||||
full:B200/accuracy/test_cli_flow.py::TestGpt2::test_smooth_quant[per_token=False-per_channel=False] SKIP (Disable for Blackwell)
|
||||
full:B200/accuracy/test_cli_flow.py::TestGpt2::test_smooth_quant[per_token=True-per_channel=True] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_exaone.py::test_llm_exaone_1gpu[enable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt_starcoder_lora_1gpu[peft-lora-starcoder2-15b-unity-copilot-starcoder2-lora_fp16-base_fp16] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt_starcoder_lora_1gpu[peft-lora-starcoder2-15b-unity-copilot-starcoder2-lora_fp16-base_fp8] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_awq] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_int8_wo] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-mini-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-small-8k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-small-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3.5-mini-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3-mini-128k-instruct-fp8-float16] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3.5-mini-instruct-fp8-float16] SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/functional SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/quantization SKIP (Disable for Blackwell)
|
||||
full:B200/accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=False] SKIP (Disable for Blackwell)
|
||||
full:B200/accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=True] SKIP (Disable for Blackwell)
|
||||
full:B200/accuracy/test_cli_flow.py::TestVicuna7B::test_lookahead SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/attention/test_bert_attention.py SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/model/test_mamba.py SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/bindings SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/quantization/test_weight_only_quant_matmul.py SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoder2-int8-float16] SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/trt/model/test_gpt.py -k "partition0" SKIP (Disable for Blackwell)
|
||||
full:B200/unittest/test_model_runner_cpp.py SKIP (Disable for Blackwell)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_smooth_quant_1gpu_summary[float16-llama-7b-enable_ptpc-nb:4] SKIP (Disable for Blackwell for SQ)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_wo_1gpu_summary[llama-7b-int8-nb:1] SKIP (Disable for Blackwell for WO)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v3_int8_gptq_1gpu_summary[llama-v3-8b-instruct-hf-float16-nb:1] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[enable_weight_only] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v3_1_quantization_1gpu_manage_weights[llama-3.1-8b-int4_wo] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v3_1_autoq_1gpu_mmlu[llama-3.1-8b] SKIP (Disable for Blackwell for weight only)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[nougat-base-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200/accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=False-chunked_context=False-typical_acceptance=False] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200/accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=False] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200/accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=True-typical_acceptance=False] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200/examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8] SKIP (Disable for Blackwell for Speculative Dec)
|
||||
full:B200/unittest/llmapi/test_llm_models.py -m "part0" SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 80/96)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 96)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 96)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (Disable for Blackwell for context fmha doesn't support custom mask)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-enable_fp8] SKIP (Disable for Blackwell for fp8 rowwise gemm)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-enable_fp8_meta_recipe] SKIP (Disable for Blackwell for fp8 rowwise gemm)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (megatron-core 0.8 is not supported in python 3.12)
|
||||
full:B200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (megatron-core 0.8 is not supported in python 3.12)
|
||||
full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM)
|
||||
full:B200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (Disable for Blackwell OOM)
|
||||
full:B200/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-34b-Instruct-tp2pp2-int4_awq-nb:4] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-70b-hf-tp2pp2-int4_awq-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:1-nb:1-enable_fp8] SKIP (not support on B200)
|
||||
full:B200/examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-disable_fp8] SKIP (not support on B200)
|
||||
full:B200/examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-mbart-large-50-many-to-one-mmt-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-enable_fp8] SKIP (not support on B200)
|
||||
full:B200/examples/test_enc_dec.py::test_llm_enc_dec_general[no_compare_hf-byt5-small-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:1-nb:1-disable_fp8] SKIP (not support on B200)
|
||||
full:B200/examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-enable_weight_only] SKIP (not support on B200)
|
||||
full:B200/examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-chat-enable_weight_only] SKIP (not support on B200)
|
||||
full:B200/examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[enable_weight_only] SKIP (not support on B200)
|
||||
full:B200/examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc] SKIP (not support on B200)
|
||||
full:B200/examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2.5_1.5b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc] SKIP (not support on B200)
|
||||
full:B200/examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2_7b_instruct-enable_gemm_plugin-enable_weight_only] SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_smooth_single_gpu_summary[enable_ptpc] SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_smooth_single_gpu_summary[disable_ptpc] SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_int8_kv_1gpu SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoder-int8-float16] SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoder-int4-float16] SKIP (not support on B200)
|
||||
full:B200/examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-int8_sq-bfloat16-8] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_awq_2gpu_summary[llama-v2-7b-hf-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_awq_2gpu_summary[Llama-2-7B-AWQ-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_awq_2gpu_summary[Llama-2-7B-GPTQ-nb:4] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_sq_ootb] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_smooth_quant_1gpu_summary[float16-llama-7b-disable_ptpc-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_int8_kv_1gpu_summary[llama-7b-enable_weight_only-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_int8_kv_1gpu_summary[llama-7b-disable_weight_only-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_v2_int8sq_2gpu_tp2[llama-v2-7b-hf-bfloat16-nb:1] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_int8_kv_awq_1gpu_summary[llama-7b-nb:1] SKIP (not support on B200)
|
||||
full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_weight_only_int4_tp2 SKIP (not support on B200)
|
||||
full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_weight_only_int8_tp2 SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoderplus-int8-float16] SKIP (not support on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoderplus-int4-float16] SKIP (not support on B200)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_lookahead_xqa_fp8_1gpu[llama-3.1-8b] SKIP (No available XQA kernels are found for speculative decoding mode)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_lookahead_xqa_fp8_1gpu[llama-3.2-1b] SKIP (No available XQA kernels are found for speculative decoding mode)
|
||||
full:B200/examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1] SKIP (No available XQA kernels are found for speculative decoding mode)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoder2-int4-float16] SKIP (not support on B200)
|
||||
full:B200/examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
|
||||
full:B200/accuracy/test_cli_flow.py::TestGpt2::test_int8_kv_cache SKIP (not support on B200)
|
||||
full:B200/examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int8_sq-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (not support on B200)
|
||||
full:B200/examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int4_awq-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (not support on B200)
|
||||
full:B200/examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-fp8-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (not support on B200)
|
||||
examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp4pp1-context_fmha] SKIP (https://nvbugs/5063469)
|
||||
examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp2pp2-context_fmha_fp32_acc] SKIP (https://nvbugs/5063469)
|
||||
examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768)
|
||||
llmapi/test_llm_e2e.py::test_llmapi_build_command_parameters_align[llama-llama-models-v2/TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5061624)
|
||||
test_e2e.py::test_openai_consistent_chat SKIP (https://nvbugs/5112075)
|
||||
full:B200/examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-9b-it-fp8-bfloat16-8] SKIP (not supported on B200)
|
||||
full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_1gpus SKIP (not supported on B200)
|
||||
examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle1] SKIP (https://nvbugs/5206383)
|
||||
examples/test_eagle.py::test_qwen_eagle_1gpu[qwen1.5_7b_chat-eagle1] SKIP (https://nvbugs/5206383)
|
||||
examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2_7b_instruct-eagle1] SKIP (https://nvbugs/5206383)
|
||||
@ -250,12 +98,6 @@ examples/test_eagle.py::test_phi_eagle_1gpu[phi-2-eagle2] SKIP (https://nvbugs/5
|
||||
examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-mini-128k-instruct-eagle2] SKIP (https://nvbugs/5206383)
|
||||
examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-small-128k-instruct-eagle2] SKIP (https://nvbugs/5206383)
|
||||
examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3.5-mini-instruct-eagle2] SKIP (https://nvbugs/5206383)
|
||||
full:B200/examples/test_llama.py::test_llm_llama_lookahead_single_gpu_summary[llama-3.1-8b] SKIP (not supported on B200)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[nougat-base-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (TRTLLM-GEN does not support custom mask)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (TRTLLM-GEN does not support custom mask)
|
||||
full:B200/examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (TRTLLM-GEN does not support custom mask)
|
||||
full:B200/examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-scienceqa-Llama-3.2-11B-Vision-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False] SKIP (TRTLLM-GEN does not support custom mask)
|
||||
full:B200/examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-chat-disable_weight_only] SKIP (https://nvbugs/5114743)
|
||||
examples/test_gpt.py::test_llm_gpt_starcoder_lora_1gpu[peft-lora-starcoder2-15b-unity-copilot-starcoder2-lora_fp16-base_fp16] SKIP (https://nvbugs/5114678)
|
||||
examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-mbart-large-50-many-to-one-mmt-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-enable_fp8] SKIP (https://nvbugs/5135328)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5141288)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user