mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-23 04:03:22 +08:00
[AutoDeploy] deprecate CI post-merge tests and keep them for local testing (#4892)
Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
This commit is contained in:
parent
8e0d96fcc6
commit
f9d45e03a4
@ -79,8 +79,6 @@ l0_dgx_h200:
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=True]
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
|
||||
- unittest/_torch/auto_deploy/integration/test_ad_build.py
|
||||
- unittest/_torch/auto_deploy/integration/test_lm_eval.py
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=False]
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=True]
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=FLASHINFER-torch_compile=False]
|
||||
|
||||
@ -289,7 +289,6 @@ full:B200/test_e2e.py::test_ptp_quickstart_advanced[Nemotron4_4B-BF16-nemotron/M
|
||||
full:B200/test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B] SKIP (https://nvbugs/5136994)
|
||||
full:B200/test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False] SKIP (https://nvbugs/5136994)
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5141288)
|
||||
unittest/_torch/auto_deploy/integration/test_lm_eval.py SKIP (https://nvbugs/5144854)
|
||||
examples/test_qwen.py::test_llm_qwen1_5_moe_plugin_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA] SKIP (https://nvbugs/5155141)
|
||||
|
||||
full:L40S/accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype SKIP (https://nvbugs/5176851)
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
"""Testing build_and_run_ad end2end."""
|
||||
"""Testing build_and_run_ad end2end.
|
||||
|
||||
NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional
|
||||
|
||||
@ -64,9 +67,6 @@ from utils.llm_data import llm_models_root
|
||||
"attn_backend": "FlashInfer",
|
||||
"compile_backend": "torch-opt",
|
||||
},
|
||||
marks_extra=[
|
||||
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416"),
|
||||
],
|
||||
),
|
||||
# 2-layer llama3.1-8B model on 4 GPUs
|
||||
param_with_device_count(
|
||||
@ -92,9 +92,6 @@ from utils.llm_data import llm_models_root
|
||||
"benchmark": True,
|
||||
"attn_backend": "FlashInfer",
|
||||
},
|
||||
marks_extra=[
|
||||
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5178508"),
|
||||
],
|
||||
),
|
||||
# full NemotronNAS (Llama-3.1-Nemotron-51B) with torch-opt backend + simple runtime
|
||||
param_with_device_count(
|
||||
@ -105,9 +102,6 @@ from utils.llm_data import llm_models_root
|
||||
"nvidia/Llama-3_1-Nemotron-51B-Instruct",
|
||||
)
|
||||
},
|
||||
marks_extra=[
|
||||
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5121522"),
|
||||
],
|
||||
),
|
||||
# Mixtral 8x7B with torch-simple backend + simple runtime
|
||||
param_with_device_count(
|
||||
|
||||
@ -1,3 +1,8 @@
|
||||
"""Testing lm-eval with AutoDeploy for basic accuracy validation.
|
||||
|
||||
NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI.
|
||||
"""
|
||||
|
||||
import gc
|
||||
import glob
|
||||
import json
|
||||
@ -66,11 +71,6 @@ def _cli_evaluate_with_mocks(args):
|
||||
["gsm8k", "mmlu"],
|
||||
["exact_match,strict-match", "acc,none"],
|
||||
[0.75, 0.675],
|
||||
marks_extra=[
|
||||
pytest.mark.skip(
|
||||
reason="https://nvbugspro.nvidia.com/bug/5123940; failed and timeout"
|
||||
)
|
||||
],
|
||||
),
|
||||
param_with_device_count(
|
||||
2,
|
||||
@ -97,9 +97,6 @@ def _cli_evaluate_with_mocks(args):
|
||||
[0.70, 0.64],
|
||||
marks_extra=[
|
||||
pytest.mark.skipif(not fp4_compatible(), reason="Requires fp4 support"),
|
||||
pytest.mark.skip(
|
||||
reason="https://nvbugspro.nvidia.com/bug/5095416; to add ckpt on llm-models"
|
||||
),
|
||||
],
|
||||
),
|
||||
param_with_device_count(
|
||||
@ -114,9 +111,6 @@ def _cli_evaluate_with_mocks(args):
|
||||
["gsm8k", "mmlu"],
|
||||
["exact_match,strict-match", "acc,none"],
|
||||
[0.583, 0.67],
|
||||
marks_extra=[
|
||||
pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416; timeout")
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user