test: [TRTLLM-3994] Support only run pytorch tests (#3013)

* [TRTLLM-3994] Support only run pytorch tests

Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>

* Move perf test to TensorRT backend

Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>

* Fix review

Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>

---------

Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>
This commit is contained in:
Zhanrui Sun 2025-04-03 13:46:09 +08:00 committed by GitHub
parent dcc0ebd273
commit 7f03125098
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 282 additions and 94 deletions

View File

@ -112,6 +112,8 @@ def DISABLE_MULTI_GPU_TEST = "disable_multi_gpu_test"
def EXTRA_STAGE_LIST = "extra_stage"
@Field
def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed"
@Field
def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed"
def testFilter = [
(REUSE_STAGE_LIST): null,
@ -124,6 +126,7 @@ def testFilter = [
(DISABLE_MULTI_GPU_TEST): false,
(EXTRA_STAGE_LIST): null,
(MULTI_GPU_FILE_CHANGED): false,
(ONLY_PYTORCH_FILE_CHANGED): false,
]
String getShortenedJobName(String path)
@ -478,7 +481,7 @@ def generateStageFailTestResultXml(stageName, subName, failureLog, resultPath) {
</failure></testcase></testsuite></testsuites>"""
}
def getMakoOpts(getMakoScript, makoArgs="") {
def getMakoOpts(getMakoScript, makoArgs=[]) {
// We want to save a map for the Mako opts
def makoOpts = [:]
def turtleOutput = ""
@ -492,8 +495,9 @@ def getMakoOpts(getMakoScript, makoArgs="") {
getMakoScript,
"--device 0"].join(" ")
if (makoArgs != "") {
listMakoCmd = [listMakoCmd, "--mako-opt ${makoArgs}"].join(" ")
if (makoArgs) {
def makoOptArgs = makoArgs.collect { "--mako-opt " + it }
listMakoCmd += " " + makoOptArgs.join(" ")
}
// Add the withCredentials step to access gpu-chip-mapping file
withCredentials([file(credentialsId: 'gpu-chip-mapping', variable: 'GPU_CHIP_MAPPING')]) {
@ -557,13 +561,29 @@ def getMakoOpts(getMakoScript, makoArgs="") {
}
def renderTestDB(testContext, llmSrc, stageName) {
def makoOpts = ""
def scriptPath = "${llmSrc}/tests/integration/defs/sysinfo/get_sysinfo.py"
if (stageName.contains("Post-Merge")) {
makoOpts = getMakoOpts(scriptPath, "stage=post_merge")
def makoArgs = []
def isPostMerge = stageName.contains("Post-Merge")
makoArgs += [isPostMerge ? "stage=post_merge" : "stage=pre_merge"]
// Determine the backend type based on keywords in stageName
if (stageName.contains("-PyTorch-")) {
// If stageName contains "-PyTorch-", add "backend=pytorch" to makoArgs
// At this point, only tests with backend=pytorch or unspecified backend will be run
makoArgs += ["backend=pytorch"]
} else if (stageName.contains("-TensorRT-")) {
// If stageName contains "-TensorRT-", add "backend=tensorrt" to makoArgs
// At this point, only tests with backend=tensorrt or unspecified backend will be run
makoArgs += ["backend=tensorrt"]
} else if (stageName.contains("-CPP-")) {
// If stageName contains "-CPP-", add "backend=cpp" to makoArgs
// At this point, only tests with backend=cpp or unspecified backend will be run
makoArgs += ["backend=cpp"]
} else {
makoOpts = getMakoOpts(scriptPath)
// If stageName does not contain "-PyTorch-", "-TensorRT-", or "-CPP-", do not add any backend
// At this point, all tests will be run
// For cases where backend is not specified in makoArgs, we will match all types of backends and tests without specified backend
}
def makoOpts = getMakoOpts(scriptPath, makoArgs)
sh "pip3 install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --ignore-installed trt-test-db==1.8.5+bc6df7"
def testDBPath = "${llmSrc}/tests/integration/test_lists/test-db"
@ -577,43 +597,11 @@ def renderTestDB(testContext, llmSrc, stageName) {
"--test-names",
"--output",
testList,
"--match-exact",
"--match",
"'${makoOpts}'"
].join(" ")
sh(label: "Render test list from test-db", script: testDBQueryCmd)
if (stageName.contains("Post-Merge")){
// Using the "stage: post_merge" mako will contain pre-merge tests by default.
// But currently post-merge test stages only run post-merge tests for
// triaging failures efficiently. We need to remove pre-merge tests explicitly.
// This behavior may change in the future.
def jsonSlurper = new JsonSlurper()
def jsonMap = jsonSlurper.parseText(makoOpts)
if (jsonMap.containsKey('stage') && jsonMap.stage == 'post_merge') {
jsonMap.remove('stage')
}
def updatedMakoOptsJson = JsonOutput.toJson(jsonMap)
def defaultTestList = "${llmSrc}/default_test.txt"
def updatedTestDBQueryCmd = [
"trt-test-db",
"-d",
testDBPath,
"--context",
testContext,
"--test-names",
"--output",
defaultTestList,
"--match-exact",
"'${updatedMakoOptsJson}'"
].join(" ")
sh(label: "Render default test list from test-db", script: updatedTestDBQueryCmd)
def linesToRemove = readFile(defaultTestList).readLines().collect { it.trim() }.toSet()
def updatedLines = readFile(testList).readLines().findAll { line ->
!linesToRemove.contains(line.trim())
}
def contentToWrite = updatedLines.join('\n')
sh "echo \"${contentToWrite}\" > ${testList}"
}
sh(script: "cat ${testList}")
return testList
@ -1013,59 +1001,63 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
{
def dockerArgs = "-v /mnt/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
turtleConfigs = [
"DGX_H100-4_GPUs-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 4, 4],
"DGX_H100-4_GPUs-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 4, 4],
"DGX_H100-4_GPUs-3": ["dgx-h100-x4", "l0_dgx_h100", 3, 4, 4],
"DGX_H100-4_GPUs-4": ["dgx-h100-x4", "l0_dgx_h100", 4, 4, 4],
"A10-1": ["a10", "l0_a10", 1, 8],
"A10-2": ["a10", "l0_a10", 2, 8],
"A10-3": ["a10", "l0_a10", 3, 8],
"A10-4": ["a10", "l0_a10", 4, 8],
"A10-5": ["a10", "l0_a10", 5, 8],
"A10-6": ["a10", "l0_a10", 6, 8],
"A10-7": ["a10", "l0_a10", 7, 8],
"A10-8": ["a10", "l0_a10", 8, 8],
"A30-1": ["a30", "l0_a30", 1, 8],
"A30-2": ["a30", "l0_a30", 2, 8],
"A30-3": ["a30", "l0_a30", 3, 8],
"A30-4": ["a30", "l0_a30", 4, 8],
"A30-5": ["a30", "l0_a30", 5, 8],
"A30-6": ["a30", "l0_a30", 6, 8],
"A30-7": ["a30", "l0_a30", 7, 8],
"A30-8": ["a30", "l0_a30", 8, 8],
"A100X-1": ["a100x", "l0_a100", 1, 4],
"A100X-2": ["a100x", "l0_a100", 2, 4],
"A100X-3": ["a100x", "l0_a100", 3, 4],
"A100X-4": ["a100x", "l0_a100", 4, 4],
"L40S-1": ["l40s", "l0_l40s", 1, 4],
"L40S-2": ["l40s", "l0_l40s", 2, 4],
"L40S-3": ["l40s", "l0_l40s", 3, 4],
"L40S-4": ["l40s", "l0_l40s", 4, 4],
"H100_PCIe-1": ["h100-cr", "l0_h100", 1, 7],
"H100_PCIe-2": ["h100-cr", "l0_h100", 2, 7],
"H100_PCIe-3": ["h100-cr", "l0_h100", 3, 7],
"H100_PCIe-4": ["h100-cr", "l0_h100", 4, 7],
"H100_PCIe-5": ["h100-cr", "l0_h100", 5, 7],
"H100_PCIe-6": ["h100-cr", "l0_h100", 6, 7],
"H100_PCIe-7": ["h100-cr", "l0_h100", 7, 7],
"B200_PCIe-1": ["b100-ts2", "l0_b200", 1, 2],
"B200_PCIe-2": ["b100-ts2", "l0_b200", 2, 2],
"DGX_H100-4_GPUs-PyTorch-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-TensorRT-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4],
"DGX_H100-4_GPUs-TensorRT-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
"A10-PyTorch-1": ["a10", "l0_a10", 1, 1],
"A10-CPP-1": ["a10", "l0_a10", 1, 1],
"A10-TensorRT-1": ["a10", "l0_a10", 1, 6],
"A10-TensorRT-2": ["a10", "l0_a10", 2, 6],
"A10-TensorRT-3": ["a10", "l0_a10", 3, 6],
"A10-TensorRT-4": ["a10", "l0_a10", 4, 6],
"A10-TensorRT-5": ["a10", "l0_a10", 5, 6],
"A10-TensorRT-6": ["a10", "l0_a10", 6, 6],
"A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
"A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
"A30-CPP-1": ["a30", "l0_a30", 1, 2],
"A30-CPP-2": ["a30", "l0_a30", 2, 2],
"A30-TensorRT-1": ["a30", "l0_a30", 1, 4],
"A30-TensorRT-2": ["a30", "l0_a30", 2, 4],
"A30-TensorRT-3": ["a30", "l0_a30", 3, 4],
"A30-TensorRT-4": ["a30", "l0_a30", 4, 4],
"A100X-TensorRT-1": ["a100x", "l0_a100", 1, 4],
"A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4],
"A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4],
"A100X-TensorRT-4": ["a100x", "l0_a100", 4, 4],
"L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 1],
"L40S-TensorRT-1": ["l40s", "l0_l40s", 1, 3],
"L40S-TensorRT-2": ["l40s", "l0_l40s", 2, 3],
"L40S-TensorRT-3": ["l40s", "l0_l40s", 3, 3],
"H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 2],
"H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 2],
"H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 1],
"H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 5],
"H100_PCIe-TensorRT-2": ["h100-cr", "l0_h100", 2, 5],
"H100_PCIe-TensorRT-3": ["h100-cr", "l0_h100", 3, 5],
"H100_PCIe-TensorRT-4": ["h100-cr", "l0_h100", 4, 5],
"H100_PCIe-TensorRT-5": ["h100-cr", "l0_h100", 5, 5],
"B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 2],
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
"B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2],
"B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2],
// Currently post-merge test stages only run tests with "stage: post_merge" mako
// in the test-db. This behavior may change in the future.
"A10-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
"A10-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
"A30-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
"A30-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
"A100X-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
"A100X-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
"L40S-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
"L40S-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
"H100_PCIe-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 3],
"H100_PCIe-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 3],
"H100_PCIe-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 3],
"DGX_H100-4_GPUs-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"A100_80GB_PCIE-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
"H100_PCIe-Perf": ["h100-cr", "l0_perf", 1, 1],
"A10-TensorRT-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
"A10-TensorRT-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
"A30-TensorRT-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
"A30-TensorRT-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
"A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
"A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
"L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
"L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
"H100_PCIe-CPP-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1],
"H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
"H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
"DGX_H100-4_GPUs-PyTorch-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-TensorRT-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"A100_80GB_PCIE-TensorRT-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
"H100_PCIe-TensorRT-Perf": ["h100-cr", "l0_perf", 1, 1],
]
parallelJobs = turtleConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
@ -1119,7 +1111,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
}]]}
sanityCheckConfigs = [
"pytorch": [
"DLFW": [
LLM_DOCKER_IMAGE,
"B200_PCIe",
X86_64_TRIPLE,
@ -1151,7 +1143,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
if (env.targetArch == AARCH64_TRIPLE) {
sanityCheckConfigs = [
"pytorch": [
"DLFW": [
LLM_DOCKER_IMAGE,
"GH200",
AARCH64_TRIPLE,
@ -1163,7 +1155,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
]
}
fullSet += [toStageName("GH200", "pytorch")]
fullSet += [toStageName("GH200", "DLFW")]
sanityCheckJobs = sanityCheckConfigs.collectEntries {key, values -> [toStageName(values[1], key), {
cacheErrorAndUploadResult(toStageName(values[1], key), {
@ -1319,6 +1311,12 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
println parallelJobsFiltered.keySet()
}
if (testFilter[(ONLY_PYTORCH_FILE_CHANGED)]) {
echo "ONLY_PYTORCH_FILE_CHANGED mode is true."
parallelJobsFiltered = parallelJobsFiltered.findAll { !it.key.contains("-CPP-") && !it.key.contains("-TensorRT-") }
println parallelJobsFiltered.keySet()
}
// Check --stage-list, only run the stages in stage-list.
if (testFilter[TEST_STAGE_LIST] != null) {
echo "Use TEST_STAGE_LIST for filtering."

View File

@ -9,18 +9,47 @@ l0_a10:
gpu:
- '*a10*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
- disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*a10*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: cpp
tests:
# ------------- CPP tests ---------------
- test_cpp.py::test_model[medusa-86]
- test_cpp.py::test_model[redrafter-86]
- test_cpp.py::test_model[mamba-86]
- test_cpp.py::test_model[recurrentgemma-86]
- test_cpp.py::test_model[eagle-86]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*a10*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
@ -89,6 +118,7 @@ l0_a10:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
- test_e2e.py::test_mistral_e2e[use_py_session]
- test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]

View File

@ -9,6 +9,9 @@ l0_a100:
gpu:
- '*a100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
- unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others
- unittest/llmapi/test_llm_models.py -m "part1"
@ -40,6 +43,7 @@ l0_a100:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
- accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
- accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins

View File

@ -9,6 +9,9 @@ l0_a30:
gpu:
- '*a30*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
@ -21,10 +24,36 @@ l0_a30:
- unittest/_torch/modeling -k "modeling_vila"
- unittest/_torch/modeling -k "modeling_nemotron"
- unittest/_torch/auto_deploy/unit/singlegpu
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*a30*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: cpp
tests:
# ------------- CPP tests ---------------
- test_cpp.py::test_unit_tests[80]
- test_cpp.py::test_model[gpt-80]
- test_cpp.py::test_benchmarks[gpt-80]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*a30*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- unittest/trt/model/test_nemotron_nas.py -k "not fp8"
- unittest/trt/model/test_gpt.py -k "partition0" # 10 mins
@ -71,6 +100,7 @@ l0_a30:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
- examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
- examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]

View File

@ -9,6 +9,9 @@ l0_b200:
gpu:
- '*b100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
@ -26,6 +29,19 @@ l0_b200:
- unittest/_torch/multi_gpu_modeling -k "deepseek and tp1 and not nextn0"
- unittest/_torch/auto_deploy/unit/singlegpu
- unittest/_torch/speculative/test_eagle3.py
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*b100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant]

View File

@ -9,6 +9,9 @@ l0_dgx_h100:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
- unittest/_torch/multi_gpu
@ -26,12 +29,38 @@ l0_dgx_h100:
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8]
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]
- disaggregated/test_disaggregated.py::test_disaggregated_overlap_dp[DeepSeek-V3-Lite-fp8]
- condition:
ranges:
system_gpu_count:
gte: 4
lte: 4
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: cpp
tests:
# ------------- CPP tests ---------------
- test_cpp.py::test_multi_gpu_simple[90]
- test_cpp.py::test_multi_gpu_t5[90]
- test_cpp.py::test_multi_gpu_llama_executor[90]
- test_cpp.py::test_multi_gpu_trt_gpt_real_decoder[90]
- test_cpp.py::test_multi_gpu_disagg[90]
- condition:
ranges:
system_gpu_count:
gte: 4
lte: 4
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-disable_fp8_context_fmha]
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-enable_fp8_context_fmha]
@ -71,10 +100,24 @@ l0_dgx_h100:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
- unittest/_torch/auto_deploy/integration/test_ad_build.py
- unittest/_torch/auto_deploy/integration/test_lm_eval.py
- condition:
ranges:
system_gpu_count:
gte: 4
lte: 4
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]

View File

@ -9,6 +9,9 @@ l0_gh200:
gpu:
- '*h200*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
@ -31,6 +34,7 @@ l0_gh200:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
- unittest/test_model_runner_cpp.py
- accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins

View File

@ -9,6 +9,9 @@ l0_h100:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
# Only key models in H100: llama/mixtral/nemotron/deepseek
@ -22,6 +25,19 @@ l0_h100:
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: cpp
tests:
# ------------- CPP tests ---------------
- test_cpp.py::test_unit_tests[90]
- test_cpp.py::test_model[fp8-llama-90]
@ -29,6 +45,19 @@ l0_h100:
- test_cpp.py::test_benchmarks[t5-90]
- test_cpp.py::test_model[encoder-90]
- test_cpp.py::test_model[enc_dec_language_adapter-90]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
- unittest/trt/functional/test_moe.py
@ -92,10 +121,24 @@ l0_h100:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: cpp
tests:
# ------------- CPP tests ---------------
- test_cpp.py::test_model[bart-90]
- test_cpp.py::test_benchmarks[bart-90]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- examples/test_eagle.py::test_llm_eagle_1gpu[llama3.1-eagle-8b-hf_v0.5-float16-bs8] # 9 mins
- examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct]

View File

@ -9,6 +9,9 @@ l0_l40s:
gpu:
- '*l40s*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
tests:
# ------------- PyTorch tests ---------------
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
@ -25,6 +28,19 @@ l0_l40s:
- test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image]
- test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video]
- test_e2e.py::test_ptp_quickstart_bert[BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*l40s*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
# ------------- TRT tests ---------------
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
@ -67,6 +83,7 @@ l0_l40s:
linux_distribution_name: ubuntu*
terms:
stage: post_merge
backend: tensorrt
tests:
- accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb
- accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype

View File

@ -10,6 +10,9 @@ l0_perf:
- '*a100*'
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: tensorrt
tests:
- perf/test_perf.py::test_perf[bert_base-plugin-float16-bs:32-input_len:32]
- perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]