[AutoDeploy] deprecate CI post-merge tests and keep them for local testing (#4892)

Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
This commit is contained in:
Lucas Liebenwein 2025-06-04 17:27:17 -07:00 committed by GitHub
parent 8e0d96fcc6
commit f9d45e03a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 9 additions and 24 deletions

View File

@ -79,8 +79,6 @@ l0_dgx_h200:
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=True]
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
- unittest/_torch/auto_deploy/integration/test_ad_build.py
- unittest/_torch/auto_deploy/integration/test_lm_eval.py
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=False]
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=True]
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=FLASHINFER-torch_compile=False]

View File

@ -289,7 +289,6 @@ full:B200/test_e2e.py::test_ptp_quickstart_advanced[Nemotron4_4B-BF16-nemotron/M
full:B200/test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B] SKIP (https://nvbugs/5136994)
full:B200/test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False] SKIP (https://nvbugs/5136994)
examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5141288)
unittest/_torch/auto_deploy/integration/test_lm_eval.py SKIP (https://nvbugs/5144854)
examples/test_qwen.py::test_llm_qwen1_5_moe_plugin_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA] SKIP (https://nvbugs/5155141)
full:L40S/accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype SKIP (https://nvbugs/5176851)

View File

@ -1,4 +1,7 @@
"""Testing build_and_run_ad end2end."""
"""Testing build_and_run_ad end2end.
NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI.
"""
from typing import Dict, Optional
@ -64,9 +67,6 @@ from utils.llm_data import llm_models_root
"attn_backend": "FlashInfer",
"compile_backend": "torch-opt",
},
marks_extra=[
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416"),
],
),
# 2-layer llama3.1-8B model on 4 GPUs
param_with_device_count(
@ -92,9 +92,6 @@ from utils.llm_data import llm_models_root
"benchmark": True,
"attn_backend": "FlashInfer",
},
marks_extra=[
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5178508"),
],
),
# full NemotronNAS (Llama-3.1-Nemotron-51B) with torch-opt backend + simple runtime
param_with_device_count(
@ -105,9 +102,6 @@ from utils.llm_data import llm_models_root
"nvidia/Llama-3_1-Nemotron-51B-Instruct",
)
},
marks_extra=[
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5121522"),
],
),
# Mixtral 8x7B with torch-simple backend + simple runtime
param_with_device_count(

View File

@ -1,3 +1,8 @@
"""Testing lm-eval with AutoDeploy for basic accuracy validation.
NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI.
"""
import gc
import glob
import json
@ -66,11 +71,6 @@ def _cli_evaluate_with_mocks(args):
["gsm8k", "mmlu"],
["exact_match,strict-match", "acc,none"],
[0.75, 0.675],
marks_extra=[
pytest.mark.skip(
reason="https://nvbugspro.nvidia.com/bug/5123940; failed and timeout"
)
],
),
param_with_device_count(
2,
@ -97,9 +97,6 @@ def _cli_evaluate_with_mocks(args):
[0.70, 0.64],
marks_extra=[
pytest.mark.skipif(not fp4_compatible(), reason="Requires fp4 support"),
pytest.mark.skip(
reason="https://nvbugspro.nvidia.com/bug/5095416; to add ckpt on llm-models"
),
],
),
param_with_device_count(
@ -114,9 +111,6 @@ def _cli_evaluate_with_mocks(args):
["gsm8k", "mmlu"],
["exact_match,strict-match", "acc,none"],
[0.583, 0.67],
marks_extra=[
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416; timeout")
],
),
],
)