[TRTLLM-7755][infra] Add DGX_B300 and GB300 tests in CI

Signed-off-by: Yiqing Yan <yiqingy@nvidia.com>
This commit is contained in:
Yiqing Yan 2025-08-28 22:45:00 -07:00
parent f14c7402c1
commit 3c06303542
3 changed files with 36 additions and 0 deletions

View File

@ -1930,6 +1930,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
x86SlurmTestConfigs = [
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-x4", "l0_dgx_b200", 1, 1, 4],
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
]
fullSet += x86SlurmTestConfigs.keySet()
@ -1962,6 +1963,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
// "GB200-PyTorch-1": ["gb200-single", "l0_gb200", 1, 3],
"GB200-4_GPUs-PyTorch-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
"GB200-4_GPUs-PyTorch-Post-Merge-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
"GB300-4_GPUs-PyTorch-Post-Merge-1": ["gb300-x4", "l0_gb300_multi_gpus", 1, 1, 4],
]
fullSet += SBSASlurmTestConfigs.keySet()

View File

@ -0,0 +1,17 @@
version: 0.0.1
l0_dgx_b300:
- condition:
ranges:
system_gpu_count:
gte: 4
lte: 4
wildcards:
gpu:
- '*gb110*'
linux_distribution_name: ubuntu*
cpu: x86_64
terms:
stage: post_merge
backend: pytorch
tests:
- unittest/_torch/multi_gpu_modeling -k "deepseek"

View File

@ -0,0 +1,17 @@
version: 0.0.1
l0_gb300_multi_gpus:
- condition:
ranges:
system_gpu_count:
gte: 4
lte: 4
wildcards:
gpu:
- '*gb110*'
linux_distribution_name: ubuntu*
cpu: aarch64
terms:
stage: post_merge
backend: pytorch
tests:
- unittest/_torch/multi_gpu_modeling -k "deepseek"