mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
test: add more tests for GB200 with 8 GPUs/2 nodes in L0 tests (#5397)
Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com>
This commit is contained in:
parent
cb9f596dbe
commit
73d30a23c7
@ -1824,7 +1824,10 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
fullSet += SBSASlurmTestConfigs.keySet()
|
||||
|
||||
multiNodesSBSAConfigs = [
|
||||
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 1, 8, 2],
|
||||
// Each stage test 1 testcase with 8 GPUs and 2 nodes.
|
||||
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 3, 8, 2],
|
||||
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-multi-node", "l0_gb200_multi_nodes", 2, 3, 8, 2],
|
||||
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-multi-node", "l0_gb200_multi_nodes", 3, 3, 8, 2],
|
||||
]
|
||||
fullSet += multiNodesSBSAConfigs.keySet()
|
||||
|
||||
|
||||
@ -14,3 +14,6 @@ l0_gb200_multi_nodes:
|
||||
backend: pytorch
|
||||
tests:
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency] TIMEOUT (180)
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] TIMEOUT (180)
|
||||
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] TIMEOUT (180)
|
||||
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] TIMEOUT (180)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user