diff --git a/tests/integration/test_lists/test-db/l0_dgx_h200.yml b/tests/integration/test_lists/test-db/l0_dgx_h200.yml index b5f946a70f..3ad7bd68a6 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h200.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h200.yml @@ -79,8 +79,6 @@ l0_dgx_h200: - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=True] - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] - - unittest/_torch/auto_deploy/integration/test_ad_build.py - - unittest/_torch/auto_deploy/integration/test_lm_eval.py - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=True] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=FLASHINFER-torch_compile=False] diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index edc55c0c00..36f0273bd0 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -289,7 +289,6 @@ full:B200/test_e2e.py::test_ptp_quickstart_advanced[Nemotron4_4B-BF16-nemotron/M full:B200/test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B] SKIP (https://nvbugs/5136994) full:B200/test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False] SKIP (https://nvbugs/5136994) examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5141288) -unittest/_torch/auto_deploy/integration/test_lm_eval.py SKIP (https://nvbugs/5144854) examples/test_qwen.py::test_llm_qwen1_5_moe_plugin_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA] SKIP (https://nvbugs/5155141) full:L40S/accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype SKIP (https://nvbugs/5176851) diff --git a/tests/unittest/_torch/auto_deploy/integration/test_ad_build.py b/tests/unittest/_torch/auto_deploy/integration/test_ad_build.py index f865d05a91..2db454089f 100644 --- a/tests/unittest/_torch/auto_deploy/integration/test_ad_build.py +++ b/tests/unittest/_torch/auto_deploy/integration/test_ad_build.py @@ -1,4 +1,7 @@ -"""Testing build_and_run_ad end2end.""" +"""Testing build_and_run_ad end2end. + +NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI. +""" from typing import Dict, Optional @@ -64,9 +67,6 @@ from utils.llm_data import llm_models_root "attn_backend": "FlashInfer", "compile_backend": "torch-opt", }, - marks_extra=[ - pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416"), - ], ), # 2-layer llama3.1-8B model on 4 GPUs param_with_device_count( @@ -92,9 +92,6 @@ from utils.llm_data import llm_models_root "benchmark": True, "attn_backend": "FlashInfer", }, - marks_extra=[ - pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5178508"), - ], ), # full NemotronNAS (Llama-3.1-Nemotron-51B) with torch-opt backend + simple runtime param_with_device_count( @@ -105,9 +102,6 @@ from utils.llm_data import llm_models_root "nvidia/Llama-3_1-Nemotron-51B-Instruct", ) }, - marks_extra=[ - pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5121522"), - ], ), # Mixtral 8x7B with torch-simple backend + simple runtime param_with_device_count( diff --git a/tests/unittest/_torch/auto_deploy/integration/test_lm_eval.py b/tests/unittest/_torch/auto_deploy/integration/test_lm_eval.py index f59e59264c..6cecdb6a07 100644 --- a/tests/unittest/_torch/auto_deploy/integration/test_lm_eval.py +++ b/tests/unittest/_torch/auto_deploy/integration/test_lm_eval.py @@ -1,3 +1,8 @@ +"""Testing lm-eval with AutoDeploy for basic accuracy validation. + +NOTE (lucaslie): this test is for local testing only. It is not registered to run as part of CI. +""" + import gc import glob import json @@ -66,11 +71,6 @@ def _cli_evaluate_with_mocks(args): ["gsm8k", "mmlu"], ["exact_match,strict-match", "acc,none"], [0.75, 0.675], - marks_extra=[ - pytest.mark.skip( - reason="https://nvbugspro.nvidia.com/bug/5123940; failed and timeout" - ) - ], ), param_with_device_count( 2, @@ -97,9 +97,6 @@ def _cli_evaluate_with_mocks(args): [0.70, 0.64], marks_extra=[ pytest.mark.skipif(not fp4_compatible(), reason="Requires fp4 support"), - pytest.mark.skip( - reason="https://nvbugspro.nvidia.com/bug/5095416; to add ckpt on llm-models" - ), ], ), param_with_device_count( @@ -114,9 +111,6 @@ def _cli_evaluate_with_mocks(args): ["gsm8k", "mmlu"], ["exact_match,strict-match", "acc,none"], [0.583, 0.67], - marks_extra=[ - pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5095416; timeout") - ], ), ], )