tests: waive and unwaive QA test cases (#4644)

Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
Ivy Zhang 2025-05-27 15:19:45 +08:00 committed by GitHub
parent 10119412ef
commit fbe48df361
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 10 additions and 3 deletions

View File

@ -987,7 +987,8 @@ class TestMixtral8x7B(CliFlowAccuracyTestHarness):
@pytest.mark.parametrize(
"moe_tp_size", [1, 4, 8],
ids=['expert_parallel', 'mixed_parallel', 'tensor_parallel'])
def test_ootb_except_mha_tp8(self, moe_tp_size):
def test_ootb_except_mha_tp8(self, moe_tp_size, mocker):
mocker.patch.object(CnnDailymail, "MAX_BATCH_SIZE", 1)
self.run(tp_size=8,
extra_convert_args=[
f"--moe_tp_size={moe_tp_size}",

View File

@ -113,6 +113,7 @@ class TestMixtral8x7B(LlmapiAccuracyTestHarness):
MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"
MODEL_PATH = f"{llm_models_root()}/Mixtral-8x7B-v0.1"
@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.skip_less_device(2)
def test_tp2(self):
with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:

View File

@ -970,6 +970,7 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
MODEL_PATH = f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1"
@pytest.mark.skip_less_device(2)
@pytest.mark.skip_less_device_memory(80000)
def test_auto_dtype_tp2(self):
with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
task = MMLU(self.MODEL_NAME)

View File

@ -304,6 +304,7 @@ def test_mistral_eagle_1gpu(llm_mistral_model_root,
@skip_pre_ada
@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.parametrize("use_dynamic_tree", [False, True],
ids=['eagle1', 'eagle2'])
@pytest.mark.parametrize("mistral_nemo_model_root", ['Mistral-Nemo-12b-Base'],

View File

@ -81,6 +81,8 @@ def _test_llm_multimodal_general(llm_venv,
if "neva-22b" in tllm_model_name and get_device_memory() < 80000:
pytest.skip("GPU memory is insufficient.")
if "Mistral-Small" in tllm_model_name and get_device_memory() < 80000:
pytest.skip("GPU memory is insufficient.")
print("Converting huggingface model into binary format...")
# ckpt from llm_models/<model_name> --> cmodels/<model_name>/<dtype>

View File

@ -1318,7 +1318,6 @@ def test_ptp_quickstart(llm_root, llm_venv):
("Llama3.2-11B-BF16", "llama-3.2-models/Llama-3.2-11B-Vision"),
("Nemotron4_4B-BF16", "nemotron/Minitron-4B-Base"),
("Nemotron-H-8B", "Nemotron-H-8B-Base-8K"),
("Qwen3-30B-A3B", "Qwen3/Qwen3-30B-A3B"),
pytest.param('Llama3.1-8B-NVFP4',
'nvfp4-quantized/Meta-Llama-3.1-8B',
marks=skip_pre_blackwell),
@ -1343,6 +1342,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
pytest.param('Mixtral-8x7B-FP8',
'Mixtral-8x7B-Instruct-v0.1-fp8',
marks=skip_pre_blackwell),
pytest.param('Qwen3-30B-A3B',
'Qwen3/Qwen3-30B-A3B',
marks=pytest.mark.skip_less_device_memory(80000)),
])
def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path):
print(f"Testing {model_name}.")

View File

@ -420,7 +420,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mt
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp4-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5294983)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[ep4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5234002)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-] SKIP (https://nvbugs/5234002)
examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8] SKIP (https://nvbugs/5234164)
full::GH200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (https://nvbugs/5250460)