From 73d30a23c75ed54d32ba9dd077b1c2aeaf136585 Mon Sep 17 00:00:00 2001 From: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com> Date: Tue, 1 Jul 2025 13:06:47 +0800 Subject: [PATCH] test: add more tests for GB200 with 8 GPUs/2 nodes in L0 tests (#5397) Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com> --- jenkins/L0_Test.groovy | 5 ++++- .../integration/test_lists/test-db/l0_gb200_multi_nodes.yml | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index df15e9294c..615222231a 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -1824,7 +1824,10 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) fullSet += SBSASlurmTestConfigs.keySet() multiNodesSBSAConfigs = [ - "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 1, 8, 2], + // Each stage test 1 testcase with 8 GPUs and 2 nodes. + "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 3, 8, 2], + "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-multi-node", "l0_gb200_multi_nodes", 2, 3, 8, 2], + "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-multi-node", "l0_gb200_multi_nodes", 3, 3, 8, 2], ] fullSet += multiNodesSBSAConfigs.keySet() diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml index ddd0eb1ed6..bbe1c1b8a2 100644 --- a/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml +++ b/tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml @@ -14,3 +14,6 @@ l0_gb200_multi_nodes: backend: pytorch tests: - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency] TIMEOUT (180) + - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] TIMEOUT (180) + - accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] TIMEOUT (180) + - accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] TIMEOUT (180)