mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
tests: waive and unwaive QA test cases (#4644)
Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
parent
10119412ef
commit
fbe48df361
@ -987,7 +987,8 @@ class TestMixtral8x7B(CliFlowAccuracyTestHarness):
|
||||
@pytest.mark.parametrize(
|
||||
"moe_tp_size", [1, 4, 8],
|
||||
ids=['expert_parallel', 'mixed_parallel', 'tensor_parallel'])
|
||||
def test_ootb_except_mha_tp8(self, moe_tp_size):
|
||||
def test_ootb_except_mha_tp8(self, moe_tp_size, mocker):
|
||||
mocker.patch.object(CnnDailymail, "MAX_BATCH_SIZE", 1)
|
||||
self.run(tp_size=8,
|
||||
extra_convert_args=[
|
||||
f"--moe_tp_size={moe_tp_size}",
|
||||
|
||||
@ -113,6 +113,7 @@ class TestMixtral8x7B(LlmapiAccuracyTestHarness):
|
||||
MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"
|
||||
MODEL_PATH = f"{llm_models_root()}/Mixtral-8x7B-v0.1"
|
||||
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@pytest.mark.skip_less_device(2)
|
||||
def test_tp2(self):
|
||||
with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
|
||||
|
||||
@ -970,6 +970,7 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
|
||||
MODEL_PATH = f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1"
|
||||
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
def test_auto_dtype_tp2(self):
|
||||
with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
|
||||
@ -304,6 +304,7 @@ def test_mistral_eagle_1gpu(llm_mistral_model_root,
|
||||
|
||||
|
||||
@skip_pre_ada
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
@pytest.mark.parametrize("use_dynamic_tree", [False, True],
|
||||
ids=['eagle1', 'eagle2'])
|
||||
@pytest.mark.parametrize("mistral_nemo_model_root", ['Mistral-Nemo-12b-Base'],
|
||||
|
||||
@ -81,6 +81,8 @@ def _test_llm_multimodal_general(llm_venv,
|
||||
|
||||
if "neva-22b" in tllm_model_name and get_device_memory() < 80000:
|
||||
pytest.skip("GPU memory is insufficient.")
|
||||
if "Mistral-Small" in tllm_model_name and get_device_memory() < 80000:
|
||||
pytest.skip("GPU memory is insufficient.")
|
||||
|
||||
print("Converting huggingface model into binary format...")
|
||||
# ckpt from llm_models/<model_name> --> cmodels/<model_name>/<dtype>
|
||||
|
||||
@ -1318,7 +1318,6 @@ def test_ptp_quickstart(llm_root, llm_venv):
|
||||
("Llama3.2-11B-BF16", "llama-3.2-models/Llama-3.2-11B-Vision"),
|
||||
("Nemotron4_4B-BF16", "nemotron/Minitron-4B-Base"),
|
||||
("Nemotron-H-8B", "Nemotron-H-8B-Base-8K"),
|
||||
("Qwen3-30B-A3B", "Qwen3/Qwen3-30B-A3B"),
|
||||
pytest.param('Llama3.1-8B-NVFP4',
|
||||
'nvfp4-quantized/Meta-Llama-3.1-8B',
|
||||
marks=skip_pre_blackwell),
|
||||
@ -1343,6 +1342,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
|
||||
pytest.param('Mixtral-8x7B-FP8',
|
||||
'Mixtral-8x7B-Instruct-v0.1-fp8',
|
||||
marks=skip_pre_blackwell),
|
||||
pytest.param('Qwen3-30B-A3B',
|
||||
'Qwen3/Qwen3-30B-A3B',
|
||||
marks=pytest.mark.skip_less_device_memory(80000)),
|
||||
])
|
||||
def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path):
|
||||
print(f"Testing {model_name}.")
|
||||
|
||||
@ -420,7 +420,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mt
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp4-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5294983)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[ep4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5234002)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-] SKIP (https://nvbugs/5234002)
|
||||
examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8] SKIP (https://nvbugs/5234164)
|
||||
full::GH200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (https://nvbugs/5250460)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user