diff --git a/tests/integration/defs/accuracy/test_llm_api_autodeploy.py b/tests/integration/defs/accuracy/test_llm_api_autodeploy.py index acbfdcc1d6..3fbb077843 100644 --- a/tests/integration/defs/accuracy/test_llm_api_autodeploy.py +++ b/tests/integration/defs/accuracy/test_llm_api_autodeploy.py @@ -66,11 +66,13 @@ class TestLlama3_1_8B(LlmapiAccuracyTestHarness): use_beam_search=beam_width > 1) @pytest.mark.skip_less_device_memory(32000) - def test_auto_dtype(self): + @pytest.mark.parametrize("world_size", [1, 2, 4]) + def test_auto_dtype(self, world_size): kwargs = self.get_default_kwargs() sampling_params = self.get_default_sampling_params() with AutoDeployLLM(model=self.MODEL_PATH, tokenizer=self.MODEL_PATH, + world_size=world_size, **kwargs) as llm: task = CnnDailymail(self.MODEL_NAME) task.evaluate(llm) diff --git a/tests/integration/test_lists/test-db/l0_b200.yml b/tests/integration/test_lists/test-db/l0_b200.yml index 1e052ea11e..8f2cf4abd0 100644 --- a/tests/integration/test_lists/test-db/l0_b200.yml +++ b/tests/integration/test_lists/test-db/l0_b200.yml @@ -74,6 +74,8 @@ l0_b200: - unittest/_torch/modeling -k "modeling_llama" - unittest/_torch/modeling -k "modeling_mixtral" - unittest/_torch/modeling -k "modeling_gpt_oss" + # ------------- AutoDeploy tests --------------- + - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[1] - unittest/_torch/auto_deploy/unit/singlegpu - condition: ranges: diff --git a/tests/integration/test_lists/test-db/l0_dgx_b200.yml b/tests/integration/test_lists/test-db/l0_dgx_b200.yml index f98f0c029d..5a8b3f35a6 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_b200.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_b200.yml @@ -181,5 +181,3 @@ l0_dgx_b200: - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto] - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton-auto] - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] - # ------------- AutoDeploy tests --------------- - - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml index b4f8adc07c..b2cf63bf03 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml @@ -41,7 +41,7 @@ l0_dgx_h100: - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True-True-False] - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True-True-True] # ------------- AutoDeploy tests --------------- - - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype + - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[2] - condition: ranges: system_gpu_count: diff --git a/tests/integration/test_lists/test-db/l0_dgx_h200.yml b/tests/integration/test_lists/test-db/l0_dgx_h200.yml index 36cd9b9696..c31066f3d4 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h200.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h200.yml @@ -34,8 +34,6 @@ l0_dgx_h200: - unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep1-disable_adp-enable_graph-tp8-trtllm-scout] - unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] - unittest/llmapi/test_llm_pytorch.py::test_nemotron_nas_lora - # ------------- AutoDeploy tests --------------- - - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype - condition: ranges: system_gpu_count: @@ -121,6 +119,8 @@ l0_dgx_h200: - test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] - test_e2e.py::test_trtllm_bench_mgmn - unittest/_torch/multi_gpu -m "post_merge" TIMEOUT (90) + # ------------- AutoDeploy tests --------------- + - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[4] - condition: ranges: system_gpu_count: diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index c6b4f446c5..8b4d3261be 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -114,7 +114,7 @@ l0_h100: - test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True] TIMEOUT (90) - test_e2e.py::test_trtllm_benchmark_serving[llama-3.1-model/Meta-Llama-3.1-8B] # ------------- AutoDeploy tests --------------- - - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype + - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[1] - accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype - condition: ranges: