[None][chore] AutoDeploy: clean up accuracy test configs (#8134)

Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
This commit is contained in:
Lucas Liebenwein 2025-10-06 15:51:01 -04:00 committed by GitHub
parent 98b3af4d4e
commit 3492391feb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 9 additions and 7 deletions

View File

@ -66,11 +66,13 @@ class TestLlama3_1_8B(LlmapiAccuracyTestHarness):
use_beam_search=beam_width > 1)
@pytest.mark.skip_less_device_memory(32000)
def test_auto_dtype(self):
@pytest.mark.parametrize("world_size", [1, 2, 4])
def test_auto_dtype(self, world_size):
kwargs = self.get_default_kwargs()
sampling_params = self.get_default_sampling_params()
with AutoDeployLLM(model=self.MODEL_PATH,
tokenizer=self.MODEL_PATH,
world_size=world_size,
**kwargs) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)

View File

@ -74,6 +74,8 @@ l0_b200:
- unittest/_torch/modeling -k "modeling_llama"
- unittest/_torch/modeling -k "modeling_mixtral"
- unittest/_torch/modeling -k "modeling_gpt_oss"
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[1]
- unittest/_torch/auto_deploy/unit/singlegpu
- condition:
ranges:

View File

@ -181,5 +181,3 @@ l0_dgx_b200:
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto]
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton-auto]
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype

View File

@ -41,7 +41,7 @@ l0_dgx_h100:
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True-True-False]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True-True-True]
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[2]
- condition:
ranges:
system_gpu_count:

View File

@ -34,8 +34,6 @@ l0_dgx_h200:
- unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep1-disable_adp-enable_graph-tp8-trtllm-scout]
- unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout]
- unittest/llmapi/test_llm_pytorch.py::test_nemotron_nas_lora
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype
- condition:
ranges:
system_gpu_count:
@ -121,6 +119,8 @@ l0_dgx_h200:
- test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b]
- test_e2e.py::test_trtllm_bench_mgmn
- unittest/_torch/multi_gpu -m "post_merge" TIMEOUT (90)
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[4]
- condition:
ranges:
system_gpu_count:

View File

@ -114,7 +114,7 @@ l0_h100:
- test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True] TIMEOUT (90)
- test_e2e.py::test_trtllm_benchmark_serving[llama-3.1-model/Meta-Llama-3.1-8B]
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[1]
- accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype
- condition:
ranges: