tests: waive and unwaive QA test cases (#4644)

Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-05-27 15:19:45 +08:00 · 2025-05-27 15:19:45 +08:00 · fbe48df361
commit fbe48df361
parent 10119412ef
7 changed files with 10 additions and 3 deletions
--- a/tests/integration/defs/accuracy/test_cli_flow.py
+++ b/tests/integration/defs/accuracy/test_cli_flow.py
@ -987,7 +987,8 @@ class TestMixtral8x7B(CliFlowAccuracyTestHarness):
    @pytest.mark.parametrize(
        "moe_tp_size", [1, 4, 8],
        ids=['expert_parallel', 'mixed_parallel', 'tensor_parallel'])
-    def test_ootb_except_mha_tp8(self, moe_tp_size):
+    def test_ootb_except_mha_tp8(self, moe_tp_size, mocker):
+        mocker.patch.object(CnnDailymail, "MAX_BATCH_SIZE", 1)
        self.run(tp_size=8,
                 extra_convert_args=[
                     f"--moe_tp_size={moe_tp_size}",
--- a/tests/integration/defs/accuracy/test_llm_api.py
+++ b/tests/integration/defs/accuracy/test_llm_api.py
@ -113,6 +113,7 @@ class TestMixtral8x7B(LlmapiAccuracyTestHarness):
    MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"
    MODEL_PATH = f"{llm_models_root()}/Mixtral-8x7B-v0.1"

+    @pytest.mark.skip_less_device_memory(80000)
    @pytest.mark.skip_less_device(2)
    def test_tp2(self):
        with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@ -970,6 +970,7 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
    MODEL_PATH = f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1"

    @pytest.mark.skip_less_device(2)
+    @pytest.mark.skip_less_device_memory(80000)
    def test_auto_dtype_tp2(self):
        with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
            task = MMLU(self.MODEL_NAME)
--- a/tests/integration/defs/examples/test_eagle.py
+++ b/tests/integration/defs/examples/test_eagle.py
@ -304,6 +304,7 @@ def test_mistral_eagle_1gpu(llm_mistral_model_root,


@skip_pre_ada
+@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.parametrize("use_dynamic_tree", [False, True],
                         ids=['eagle1', 'eagle2'])
@pytest.mark.parametrize("mistral_nemo_model_root", ['Mistral-Nemo-12b-Base'],
--- a/tests/integration/defs/examples/test_multimodal.py
+++ b/tests/integration/defs/examples/test_multimodal.py
@ -81,6 +81,8 @@ def _test_llm_multimodal_general(llm_venv,

    if "neva-22b" in tllm_model_name and get_device_memory() < 80000:
        pytest.skip("GPU memory is insufficient.")
+    if "Mistral-Small" in tllm_model_name and get_device_memory() < 80000:
+        pytest.skip("GPU memory is insufficient.")

    print("Converting huggingface model into binary format...")
    # ckpt from llm_models/<model_name> --> cmodels/<model_name>/<dtype>
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@ -1318,7 +1318,6 @@ def test_ptp_quickstart(llm_root, llm_venv):
    ("Llama3.2-11B-BF16", "llama-3.2-models/Llama-3.2-11B-Vision"),
    ("Nemotron4_4B-BF16", "nemotron/Minitron-4B-Base"),
    ("Nemotron-H-8B", "Nemotron-H-8B-Base-8K"),
-    ("Qwen3-30B-A3B", "Qwen3/Qwen3-30B-A3B"),
    pytest.param('Llama3.1-8B-NVFP4',
                 'nvfp4-quantized/Meta-Llama-3.1-8B',
                 marks=skip_pre_blackwell),
@ -1343,6 +1342,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
    pytest.param('Mixtral-8x7B-FP8',
                 'Mixtral-8x7B-Instruct-v0.1-fp8',
                 marks=skip_pre_blackwell),
+    pytest.param('Qwen3-30B-A3B',
+                 'Qwen3/Qwen3-30B-A3B',
+                 marks=pytest.mark.skip_less_device_memory(80000)),
 ])
 def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path):
    print(f"Testing {model_name}.")
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -420,7 +420,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mt
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp4-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5294983)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[ep4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5234002)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-] SKIP (https://nvbugs/5234002)
 examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8] SKIP (https://nvbugs/5234164)
 full::GH200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (https://nvbugs/5250460)