mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
test: [TRTLLM-3994] Support only run pytorch tests (#3013)
* [TRTLLM-3994] Support only run pytorch tests Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Move perf test to TensorRT backend Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Fix review Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> --------- Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>
This commit is contained in:
parent
dcc0ebd273
commit
7f03125098
@ -112,6 +112,8 @@ def DISABLE_MULTI_GPU_TEST = "disable_multi_gpu_test"
|
||||
def EXTRA_STAGE_LIST = "extra_stage"
|
||||
@Field
|
||||
def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed"
|
||||
@Field
|
||||
def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed"
|
||||
|
||||
def testFilter = [
|
||||
(REUSE_STAGE_LIST): null,
|
||||
@ -124,6 +126,7 @@ def testFilter = [
|
||||
(DISABLE_MULTI_GPU_TEST): false,
|
||||
(EXTRA_STAGE_LIST): null,
|
||||
(MULTI_GPU_FILE_CHANGED): false,
|
||||
(ONLY_PYTORCH_FILE_CHANGED): false,
|
||||
]
|
||||
|
||||
String getShortenedJobName(String path)
|
||||
@ -478,7 +481,7 @@ def generateStageFailTestResultXml(stageName, subName, failureLog, resultPath) {
|
||||
</failure></testcase></testsuite></testsuites>"""
|
||||
}
|
||||
|
||||
def getMakoOpts(getMakoScript, makoArgs="") {
|
||||
def getMakoOpts(getMakoScript, makoArgs=[]) {
|
||||
// We want to save a map for the Mako opts
|
||||
def makoOpts = [:]
|
||||
def turtleOutput = ""
|
||||
@ -492,8 +495,9 @@ def getMakoOpts(getMakoScript, makoArgs="") {
|
||||
getMakoScript,
|
||||
"--device 0"].join(" ")
|
||||
|
||||
if (makoArgs != "") {
|
||||
listMakoCmd = [listMakoCmd, "--mako-opt ${makoArgs}"].join(" ")
|
||||
if (makoArgs) {
|
||||
def makoOptArgs = makoArgs.collect { "--mako-opt " + it }
|
||||
listMakoCmd += " " + makoOptArgs.join(" ")
|
||||
}
|
||||
// Add the withCredentials step to access gpu-chip-mapping file
|
||||
withCredentials([file(credentialsId: 'gpu-chip-mapping', variable: 'GPU_CHIP_MAPPING')]) {
|
||||
@ -557,13 +561,29 @@ def getMakoOpts(getMakoScript, makoArgs="") {
|
||||
}
|
||||
|
||||
def renderTestDB(testContext, llmSrc, stageName) {
|
||||
def makoOpts = ""
|
||||
def scriptPath = "${llmSrc}/tests/integration/defs/sysinfo/get_sysinfo.py"
|
||||
if (stageName.contains("Post-Merge")) {
|
||||
makoOpts = getMakoOpts(scriptPath, "stage=post_merge")
|
||||
def makoArgs = []
|
||||
def isPostMerge = stageName.contains("Post-Merge")
|
||||
makoArgs += [isPostMerge ? "stage=post_merge" : "stage=pre_merge"]
|
||||
// Determine the backend type based on keywords in stageName
|
||||
if (stageName.contains("-PyTorch-")) {
|
||||
// If stageName contains "-PyTorch-", add "backend=pytorch" to makoArgs
|
||||
// At this point, only tests with backend=pytorch or unspecified backend will be run
|
||||
makoArgs += ["backend=pytorch"]
|
||||
} else if (stageName.contains("-TensorRT-")) {
|
||||
// If stageName contains "-TensorRT-", add "backend=tensorrt" to makoArgs
|
||||
// At this point, only tests with backend=tensorrt or unspecified backend will be run
|
||||
makoArgs += ["backend=tensorrt"]
|
||||
} else if (stageName.contains("-CPP-")) {
|
||||
// If stageName contains "-CPP-", add "backend=cpp" to makoArgs
|
||||
// At this point, only tests with backend=cpp or unspecified backend will be run
|
||||
makoArgs += ["backend=cpp"]
|
||||
} else {
|
||||
makoOpts = getMakoOpts(scriptPath)
|
||||
// If stageName does not contain "-PyTorch-", "-TensorRT-", or "-CPP-", do not add any backend
|
||||
// At this point, all tests will be run
|
||||
// For cases where backend is not specified in makoArgs, we will match all types of backends and tests without specified backend
|
||||
}
|
||||
def makoOpts = getMakoOpts(scriptPath, makoArgs)
|
||||
|
||||
sh "pip3 install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --ignore-installed trt-test-db==1.8.5+bc6df7"
|
||||
def testDBPath = "${llmSrc}/tests/integration/test_lists/test-db"
|
||||
@ -577,43 +597,11 @@ def renderTestDB(testContext, llmSrc, stageName) {
|
||||
"--test-names",
|
||||
"--output",
|
||||
testList,
|
||||
"--match-exact",
|
||||
"--match",
|
||||
"'${makoOpts}'"
|
||||
].join(" ")
|
||||
|
||||
sh(label: "Render test list from test-db", script: testDBQueryCmd)
|
||||
if (stageName.contains("Post-Merge")){
|
||||
// Using the "stage: post_merge" mako will contain pre-merge tests by default.
|
||||
// But currently post-merge test stages only run post-merge tests for
|
||||
// triaging failures efficiently. We need to remove pre-merge tests explicitly.
|
||||
// This behavior may change in the future.
|
||||
def jsonSlurper = new JsonSlurper()
|
||||
def jsonMap = jsonSlurper.parseText(makoOpts)
|
||||
if (jsonMap.containsKey('stage') && jsonMap.stage == 'post_merge') {
|
||||
jsonMap.remove('stage')
|
||||
}
|
||||
def updatedMakoOptsJson = JsonOutput.toJson(jsonMap)
|
||||
def defaultTestList = "${llmSrc}/default_test.txt"
|
||||
def updatedTestDBQueryCmd = [
|
||||
"trt-test-db",
|
||||
"-d",
|
||||
testDBPath,
|
||||
"--context",
|
||||
testContext,
|
||||
"--test-names",
|
||||
"--output",
|
||||
defaultTestList,
|
||||
"--match-exact",
|
||||
"'${updatedMakoOptsJson}'"
|
||||
].join(" ")
|
||||
sh(label: "Render default test list from test-db", script: updatedTestDBQueryCmd)
|
||||
def linesToRemove = readFile(defaultTestList).readLines().collect { it.trim() }.toSet()
|
||||
def updatedLines = readFile(testList).readLines().findAll { line ->
|
||||
!linesToRemove.contains(line.trim())
|
||||
}
|
||||
def contentToWrite = updatedLines.join('\n')
|
||||
sh "echo \"${contentToWrite}\" > ${testList}"
|
||||
}
|
||||
sh(script: "cat ${testList}")
|
||||
|
||||
return testList
|
||||
@ -1013,59 +1001,63 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
{
|
||||
def dockerArgs = "-v /mnt/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
|
||||
turtleConfigs = [
|
||||
"DGX_H100-4_GPUs-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 4, 4],
|
||||
"DGX_H100-4_GPUs-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 4, 4],
|
||||
"DGX_H100-4_GPUs-3": ["dgx-h100-x4", "l0_dgx_h100", 3, 4, 4],
|
||||
"DGX_H100-4_GPUs-4": ["dgx-h100-x4", "l0_dgx_h100", 4, 4, 4],
|
||||
"A10-1": ["a10", "l0_a10", 1, 8],
|
||||
"A10-2": ["a10", "l0_a10", 2, 8],
|
||||
"A10-3": ["a10", "l0_a10", 3, 8],
|
||||
"A10-4": ["a10", "l0_a10", 4, 8],
|
||||
"A10-5": ["a10", "l0_a10", 5, 8],
|
||||
"A10-6": ["a10", "l0_a10", 6, 8],
|
||||
"A10-7": ["a10", "l0_a10", 7, 8],
|
||||
"A10-8": ["a10", "l0_a10", 8, 8],
|
||||
"A30-1": ["a30", "l0_a30", 1, 8],
|
||||
"A30-2": ["a30", "l0_a30", 2, 8],
|
||||
"A30-3": ["a30", "l0_a30", 3, 8],
|
||||
"A30-4": ["a30", "l0_a30", 4, 8],
|
||||
"A30-5": ["a30", "l0_a30", 5, 8],
|
||||
"A30-6": ["a30", "l0_a30", 6, 8],
|
||||
"A30-7": ["a30", "l0_a30", 7, 8],
|
||||
"A30-8": ["a30", "l0_a30", 8, 8],
|
||||
"A100X-1": ["a100x", "l0_a100", 1, 4],
|
||||
"A100X-2": ["a100x", "l0_a100", 2, 4],
|
||||
"A100X-3": ["a100x", "l0_a100", 3, 4],
|
||||
"A100X-4": ["a100x", "l0_a100", 4, 4],
|
||||
"L40S-1": ["l40s", "l0_l40s", 1, 4],
|
||||
"L40S-2": ["l40s", "l0_l40s", 2, 4],
|
||||
"L40S-3": ["l40s", "l0_l40s", 3, 4],
|
||||
"L40S-4": ["l40s", "l0_l40s", 4, 4],
|
||||
"H100_PCIe-1": ["h100-cr", "l0_h100", 1, 7],
|
||||
"H100_PCIe-2": ["h100-cr", "l0_h100", 2, 7],
|
||||
"H100_PCIe-3": ["h100-cr", "l0_h100", 3, 7],
|
||||
"H100_PCIe-4": ["h100-cr", "l0_h100", 4, 7],
|
||||
"H100_PCIe-5": ["h100-cr", "l0_h100", 5, 7],
|
||||
"H100_PCIe-6": ["h100-cr", "l0_h100", 6, 7],
|
||||
"H100_PCIe-7": ["h100-cr", "l0_h100", 7, 7],
|
||||
"B200_PCIe-1": ["b100-ts2", "l0_b200", 1, 2],
|
||||
"B200_PCIe-2": ["b100-ts2", "l0_b200", 2, 2],
|
||||
"DGX_H100-4_GPUs-PyTorch-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"DGX_H100-4_GPUs-TensorRT-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4],
|
||||
"DGX_H100-4_GPUs-TensorRT-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
|
||||
"A10-PyTorch-1": ["a10", "l0_a10", 1, 1],
|
||||
"A10-CPP-1": ["a10", "l0_a10", 1, 1],
|
||||
"A10-TensorRT-1": ["a10", "l0_a10", 1, 6],
|
||||
"A10-TensorRT-2": ["a10", "l0_a10", 2, 6],
|
||||
"A10-TensorRT-3": ["a10", "l0_a10", 3, 6],
|
||||
"A10-TensorRT-4": ["a10", "l0_a10", 4, 6],
|
||||
"A10-TensorRT-5": ["a10", "l0_a10", 5, 6],
|
||||
"A10-TensorRT-6": ["a10", "l0_a10", 6, 6],
|
||||
"A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
|
||||
"A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
|
||||
"A30-CPP-1": ["a30", "l0_a30", 1, 2],
|
||||
"A30-CPP-2": ["a30", "l0_a30", 2, 2],
|
||||
"A30-TensorRT-1": ["a30", "l0_a30", 1, 4],
|
||||
"A30-TensorRT-2": ["a30", "l0_a30", 2, 4],
|
||||
"A30-TensorRT-3": ["a30", "l0_a30", 3, 4],
|
||||
"A30-TensorRT-4": ["a30", "l0_a30", 4, 4],
|
||||
"A100X-TensorRT-1": ["a100x", "l0_a100", 1, 4],
|
||||
"A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4],
|
||||
"A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4],
|
||||
"A100X-TensorRT-4": ["a100x", "l0_a100", 4, 4],
|
||||
"L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 1],
|
||||
"L40S-TensorRT-1": ["l40s", "l0_l40s", 1, 3],
|
||||
"L40S-TensorRT-2": ["l40s", "l0_l40s", 2, 3],
|
||||
"L40S-TensorRT-3": ["l40s", "l0_l40s", 3, 3],
|
||||
"H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 2],
|
||||
"H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 2],
|
||||
"H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 1],
|
||||
"H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 5],
|
||||
"H100_PCIe-TensorRT-2": ["h100-cr", "l0_h100", 2, 5],
|
||||
"H100_PCIe-TensorRT-3": ["h100-cr", "l0_h100", 3, 5],
|
||||
"H100_PCIe-TensorRT-4": ["h100-cr", "l0_h100", 4, 5],
|
||||
"H100_PCIe-TensorRT-5": ["h100-cr", "l0_h100", 5, 5],
|
||||
"B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 2],
|
||||
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
|
||||
"B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2],
|
||||
"B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2],
|
||||
// Currently post-merge test stages only run tests with "stage: post_merge" mako
|
||||
// in the test-db. This behavior may change in the future.
|
||||
"A10-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
|
||||
"A10-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
|
||||
"A30-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
|
||||
"A30-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
|
||||
"A100X-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
|
||||
"A100X-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
|
||||
"L40S-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
|
||||
"L40S-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
|
||||
"H100_PCIe-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 3],
|
||||
"H100_PCIe-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 3],
|
||||
"H100_PCIe-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 3],
|
||||
"DGX_H100-4_GPUs-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"A100_80GB_PCIE-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
|
||||
"H100_PCIe-Perf": ["h100-cr", "l0_perf", 1, 1],
|
||||
"A10-TensorRT-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
|
||||
"A10-TensorRT-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
|
||||
"A30-TensorRT-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
|
||||
"A30-TensorRT-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
|
||||
"A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
|
||||
"A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
|
||||
"L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
|
||||
"L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
|
||||
"H100_PCIe-CPP-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1],
|
||||
"H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
|
||||
"H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
|
||||
"DGX_H100-4_GPUs-PyTorch-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"DGX_H100-4_GPUs-TensorRT-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"A100_80GB_PCIE-TensorRT-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
|
||||
"H100_PCIe-TensorRT-Perf": ["h100-cr", "l0_perf", 1, 1],
|
||||
]
|
||||
|
||||
parallelJobs = turtleConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
||||
@ -1119,7 +1111,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
}]]}
|
||||
|
||||
sanityCheckConfigs = [
|
||||
"pytorch": [
|
||||
"DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"B200_PCIe",
|
||||
X86_64_TRIPLE,
|
||||
@ -1151,7 +1143,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
|
||||
if (env.targetArch == AARCH64_TRIPLE) {
|
||||
sanityCheckConfigs = [
|
||||
"pytorch": [
|
||||
"DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
@ -1163,7 +1155,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
]
|
||||
}
|
||||
|
||||
fullSet += [toStageName("GH200", "pytorch")]
|
||||
fullSet += [toStageName("GH200", "DLFW")]
|
||||
|
||||
sanityCheckJobs = sanityCheckConfigs.collectEntries {key, values -> [toStageName(values[1], key), {
|
||||
cacheErrorAndUploadResult(toStageName(values[1], key), {
|
||||
@ -1319,6 +1311,12 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
println parallelJobsFiltered.keySet()
|
||||
}
|
||||
|
||||
if (testFilter[(ONLY_PYTORCH_FILE_CHANGED)]) {
|
||||
echo "ONLY_PYTORCH_FILE_CHANGED mode is true."
|
||||
parallelJobsFiltered = parallelJobsFiltered.findAll { !it.key.contains("-CPP-") && !it.key.contains("-TensorRT-") }
|
||||
println parallelJobsFiltered.keySet()
|
||||
}
|
||||
|
||||
// Check --stage-list, only run the stages in stage-list.
|
||||
if (testFilter[TEST_STAGE_LIST] != null) {
|
||||
echo "Use TEST_STAGE_LIST for filtering."
|
||||
|
||||
@ -9,18 +9,47 @@ l0_a10:
|
||||
gpu:
|
||||
- '*a10*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*a10*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: cpp
|
||||
tests:
|
||||
# ------------- CPP tests ---------------
|
||||
- test_cpp.py::test_model[medusa-86]
|
||||
- test_cpp.py::test_model[redrafter-86]
|
||||
- test_cpp.py::test_model[mamba-86]
|
||||
- test_cpp.py::test_model[recurrentgemma-86]
|
||||
- test_cpp.py::test_model[eagle-86]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*a10*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
|
||||
@ -89,6 +118,7 @@ l0_a10:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- test_e2e.py::test_mistral_e2e[use_py_session]
|
||||
- test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]
|
||||
|
||||
@ -9,6 +9,9 @@ l0_a100:
|
||||
gpu:
|
||||
- '*a100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others
|
||||
- unittest/llmapi/test_llm_models.py -m "part1"
|
||||
@ -40,6 +43,7 @@ l0_a100:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
|
||||
- accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins
|
||||
|
||||
@ -9,6 +9,9 @@ l0_a30:
|
||||
gpu:
|
||||
- '*a30*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
|
||||
@ -21,10 +24,36 @@ l0_a30:
|
||||
- unittest/_torch/modeling -k "modeling_vila"
|
||||
- unittest/_torch/modeling -k "modeling_nemotron"
|
||||
- unittest/_torch/auto_deploy/unit/singlegpu
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*a30*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: cpp
|
||||
tests:
|
||||
# ------------- CPP tests ---------------
|
||||
- test_cpp.py::test_unit_tests[80]
|
||||
- test_cpp.py::test_model[gpt-80]
|
||||
- test_cpp.py::test_benchmarks[gpt-80]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*a30*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- unittest/trt/model/test_nemotron_nas.py -k "not fp8"
|
||||
- unittest/trt/model/test_gpt.py -k "partition0" # 10 mins
|
||||
@ -71,6 +100,7 @@ l0_a30:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
|
||||
- examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
|
||||
|
||||
@ -9,6 +9,9 @@ l0_b200:
|
||||
gpu:
|
||||
- '*b100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
|
||||
@ -26,6 +29,19 @@ l0_b200:
|
||||
- unittest/_torch/multi_gpu_modeling -k "deepseek and tp1 and not nextn0"
|
||||
- unittest/_torch/auto_deploy/unit/singlegpu
|
||||
- unittest/_torch/speculative/test_eagle3.py
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*b100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4
|
||||
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant]
|
||||
|
||||
@ -9,6 +9,9 @@ l0_dgx_h100:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- unittest/_torch/multi_gpu
|
||||
@ -26,12 +29,38 @@ l0_dgx_h100:
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_overlap_dp[DeepSeek-V3-Lite-fp8]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
lte: 4
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: cpp
|
||||
tests:
|
||||
# ------------- CPP tests ---------------
|
||||
- test_cpp.py::test_multi_gpu_simple[90]
|
||||
- test_cpp.py::test_multi_gpu_t5[90]
|
||||
- test_cpp.py::test_multi_gpu_llama_executor[90]
|
||||
- test_cpp.py::test_multi_gpu_trt_gpt_real_decoder[90]
|
||||
- test_cpp.py::test_multi_gpu_disagg[90]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
lte: 4
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-disable_fp8_context_fmha]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-enable_fp8_context_fmha]
|
||||
@ -71,10 +100,24 @@ l0_dgx_h100:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- unittest/_torch/auto_deploy/integration/test_ad_build.py
|
||||
- unittest/_torch/auto_deploy/integration/test_lm_eval.py
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
lte: 4
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
|
||||
|
||||
@ -9,6 +9,9 @@ l0_gh200:
|
||||
gpu:
|
||||
- '*h200*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
|
||||
@ -31,6 +34,7 @@ l0_gh200:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- unittest/test_model_runner_cpp.py
|
||||
- accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
|
||||
|
||||
@ -9,6 +9,9 @@ l0_h100:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
# Only key models in H100: llama/mixtral/nemotron/deepseek
|
||||
@ -22,6 +25,19 @@ l0_h100:
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales
|
||||
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
|
||||
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: cpp
|
||||
tests:
|
||||
# ------------- CPP tests ---------------
|
||||
- test_cpp.py::test_unit_tests[90]
|
||||
- test_cpp.py::test_model[fp8-llama-90]
|
||||
@ -29,6 +45,19 @@ l0_h100:
|
||||
- test_cpp.py::test_benchmarks[t5-90]
|
||||
- test_cpp.py::test_model[encoder-90]
|
||||
- test_cpp.py::test_model[enc_dec_language_adapter-90]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
|
||||
- unittest/trt/functional/test_moe.py
|
||||
@ -92,10 +121,24 @@ l0_h100:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: cpp
|
||||
tests:
|
||||
# ------------- CPP tests ---------------
|
||||
- test_cpp.py::test_model[bart-90]
|
||||
- test_cpp.py::test_benchmarks[bart-90]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- examples/test_eagle.py::test_llm_eagle_1gpu[llama3.1-eagle-8b-hf_v0.5-float16-bs8] # 9 mins
|
||||
- examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct]
|
||||
|
||||
@ -9,6 +9,9 @@ l0_l40s:
|
||||
gpu:
|
||||
- '*l40s*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: pytorch
|
||||
tests:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
|
||||
@ -25,6 +28,19 @@ l0_l40s:
|
||||
- test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image]
|
||||
- test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video]
|
||||
- test_e2e.py::test_ptp_quickstart_bert[BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
lte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*l40s*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
|
||||
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
|
||||
@ -67,6 +83,7 @@ l0_l40s:
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb
|
||||
- accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype
|
||||
|
||||
@ -10,6 +10,9 @@ l0_perf:
|
||||
- '*a100*'
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[bert_base-plugin-float16-bs:32-input_len:32]
|
||||
- perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user