[AutoDeploy] deprecate CI post-merge tests and keep them for local testing (#4892)

Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
2026-01-23 04:03:22 +08:00 · 2025-06-04 17:27:17 -07:00 · 2025-06-04 17:27:17 -07:00 · f9d45e03a4
commit f9d45e03a4
parent 8e0d96fcc6
4 changed files with 9 additions and 24 deletions
--- a/tests/integration/test_lists/test-db/l0_dgx_h200.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h200.yml
@ -79,8 +79,6 @@ l0_dgx_h200:
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
-  - unittest/_torch/auto_deploy/integration/test_ad_build.py
-  - unittest/_torch/auto_deploy/integration/test_lm_eval.py
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=False]
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=FLASHINFER-torch_compile=False]
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -289,7 +289,6 @@ full:B200/test_e2e.py::test_ptp_quickstart_advanced[Nemotron4_4B-BF16-nemotron/M
 full:B200/test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B] SKIP (https://nvbugs/5136994)
 full:B200/test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False] SKIP (https://nvbugs/5136994)
 examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5141288)
-unittest/_torch/auto_deploy/integration/test_lm_eval.py SKIP (https://nvbugs/5144854)
 examples/test_qwen.py::test_llm_qwen1_5_moe_plugin_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA] SKIP (https://nvbugs/5155141)

 full:L40S/accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype SKIP (https://nvbugs/5176851)
--- a/tests/unittest/_torch/auto_deploy/integration/test_ad_build.py
+++ b/tests/unittest/_torch/auto_deploy/integration/test_ad_build.py
@ -1,4 +1,7 @@
-"""Testing build_and_run_ad end2end."""
+"""Testing build_and_run_ad end2end.
+
+NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI.
+"""

 from typing import Dict, Optional

@ -64,9 +67,6 @@ from utils.llm_data import llm_models_root
                "attn_backend": "FlashInfer",
                "compile_backend": "torch-opt",
            },
-            marks_extra=[
-                pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416"),
-            ],
        ),
        # 2-layer llama3.1-8B model on 4 GPUs
        param_with_device_count(
@ -92,9 +92,6 @@ from utils.llm_data import llm_models_root
                "benchmark": True,
                "attn_backend": "FlashInfer",
            },
-            marks_extra=[
-                pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5178508"),
-            ],
        ),
        # full NemotronNAS (Llama-3.1-Nemotron-51B) with torch-opt backend + simple runtime
        param_with_device_count(
@ -105,9 +102,6 @@ from utils.llm_data import llm_models_root
                    "nvidia/Llama-3_1-Nemotron-51B-Instruct",
                )
            },
-            marks_extra=[
-                pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5121522"),
-            ],
        ),
        # Mixtral 8x7B with torch-simple backend + simple runtime
        param_with_device_count(
--- a/tests/unittest/_torch/auto_deploy/integration/test_lm_eval.py
+++ b/tests/unittest/_torch/auto_deploy/integration/test_lm_eval.py
@ -1,3 +1,8 @@
+"""Testing lm-eval with AutoDeploy for basic accuracy validation.
+
+NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI.
+"""
+
 import gc
 import glob
 import json
@ -66,11 +71,6 @@ def _cli_evaluate_with_mocks(args):
            ["gsm8k", "mmlu"],
            ["exact_match,strict-match", "acc,none"],
            [0.75, 0.675],
-            marks_extra=[
-                pytest.mark.skip(
-                    reason="https://nvbugspro.nvidia.com/bug/5123940; failed and timeout"
-                )
-            ],
        ),
        param_with_device_count(
            2,
@ -97,9 +97,6 @@ def _cli_evaluate_with_mocks(args):
            [0.70, 0.64],
            marks_extra=[
                pytest.mark.skipif(not fp4_compatible(), reason="Requires fp4 support"),
-                pytest.mark.skip(
-                    reason="https://nvbugspro.nvidia.com/bug/5095416; to add ckpt on llm-models"
-                ),
            ],
        ),
        param_with_device_count(
@ -114,9 +111,6 @@ def _cli_evaluate_with_mocks(args):
            ["gsm8k", "mmlu"],
            ["exact_match,strict-match", "acc,none"],
            [0.583, 0.67],
-            marks_extra=[
-                pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416; timeout")
-            ],
        ),
    ],
 )