test: [TRTLLM-3994] Support only run pytorch tests (#3013)

* [TRTLLM-3994] Support only run pytorch tests Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Move perf test to TensorRT backend Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Fix review Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> --------- Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-04-03 13:46:09 +08:00 · 2025-04-03 13:46:09 +08:00 · 7f03125098
commit 7f03125098
parent dcc0ebd273
10 changed files with 282 additions and 94 deletions
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@ -112,6 +112,8 @@ def DISABLE_MULTI_GPU_TEST = "disable_multi_gpu_test"
 def EXTRA_STAGE_LIST = "extra_stage"
@Field
 def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed"
+@Field
+def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed"

 def testFilter = [
    (REUSE_STAGE_LIST): null,
@ -124,6 +126,7 @@ def testFilter = [
    (DISABLE_MULTI_GPU_TEST): false,
    (EXTRA_STAGE_LIST): null,
    (MULTI_GPU_FILE_CHANGED): false,
+    (ONLY_PYTORCH_FILE_CHANGED): false,
 ]

 String getShortenedJobName(String path)
@ -478,7 +481,7 @@ def generateStageFailTestResultXml(stageName, subName, failureLog, resultPath) {
        </failure></testcase></testsuite></testsuites>"""
 }

-def getMakoOpts(getMakoScript, makoArgs="") {
+def getMakoOpts(getMakoScript, makoArgs=[]) {
    // We want to save a map for the Mako opts
    def makoOpts = [:]
    def turtleOutput = ""
@ -492,8 +495,9 @@ def getMakoOpts(getMakoScript, makoArgs="") {
        getMakoScript,
        "--device 0"].join(" ")

-    if (makoArgs != "") {
-        listMakoCmd = [listMakoCmd, "--mako-opt ${makoArgs}"].join(" ")
+    if (makoArgs) {
+        def makoOptArgs = makoArgs.collect { "--mako-opt " + it }
+        listMakoCmd += " " + makoOptArgs.join(" ")
    }
    // Add the withCredentials step to access gpu-chip-mapping file
    withCredentials([file(credentialsId: 'gpu-chip-mapping', variable: 'GPU_CHIP_MAPPING')]) {
@ -557,13 +561,29 @@ def getMakoOpts(getMakoScript, makoArgs="") {
 }

 def renderTestDB(testContext, llmSrc, stageName) {
-    def makoOpts = ""
    def scriptPath = "${llmSrc}/tests/integration/defs/sysinfo/get_sysinfo.py"
-    if (stageName.contains("Post-Merge")) {
-        makoOpts = getMakoOpts(scriptPath, "stage=post_merge")
+    def makoArgs = []
+    def isPostMerge = stageName.contains("Post-Merge")
+    makoArgs += [isPostMerge ? "stage=post_merge" : "stage=pre_merge"]
+    // Determine the backend type based on keywords in stageName
+    if (stageName.contains("-PyTorch-")) {
+        // If stageName contains "-PyTorch-", add "backend=pytorch" to makoArgs
+        // At this point, only tests with backend=pytorch or unspecified backend will be run
+        makoArgs += ["backend=pytorch"]
+    } else if (stageName.contains("-TensorRT-")) {
+        // If stageName contains "-TensorRT-", add "backend=tensorrt" to makoArgs
+        // At this point, only tests with backend=tensorrt or unspecified backend will be run
+        makoArgs += ["backend=tensorrt"]
+    } else if (stageName.contains("-CPP-")) {
+        // If stageName contains "-CPP-", add "backend=cpp" to makoArgs
+        // At this point, only tests with backend=cpp or unspecified backend will be run
+        makoArgs += ["backend=cpp"]
    } else {
-        makoOpts = getMakoOpts(scriptPath)
+        // If stageName does not contain "-PyTorch-", "-TensorRT-", or "-CPP-", do not add any backend
+        // At this point, all tests will be run
+        // For cases where backend is not specified in makoArgs, we will match all types of backends and tests without specified backend
    }
+    def makoOpts = getMakoOpts(scriptPath, makoArgs)

    sh "pip3 install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --ignore-installed trt-test-db==1.8.5+bc6df7"
    def testDBPath = "${llmSrc}/tests/integration/test_lists/test-db"
@ -577,43 +597,11 @@ def renderTestDB(testContext, llmSrc, stageName) {
        "--test-names",
        "--output",
        testList,
-        "--match-exact",
+        "--match",
        "'${makoOpts}'"
    ].join(" ")

    sh(label: "Render test list from test-db", script: testDBQueryCmd)
-    if (stageName.contains("Post-Merge")){
-        // Using the "stage: post_merge" mako will contain pre-merge tests by default.
-        // But currently post-merge test stages only run post-merge tests for
-        // triaging failures efficiently. We need to remove pre-merge tests explicitly.
-        // This behavior may change in the future.
-        def jsonSlurper = new JsonSlurper()
-        def jsonMap = jsonSlurper.parseText(makoOpts)
-        if (jsonMap.containsKey('stage') && jsonMap.stage == 'post_merge') {
-            jsonMap.remove('stage')
-        }
-        def updatedMakoOptsJson = JsonOutput.toJson(jsonMap)
-        def defaultTestList = "${llmSrc}/default_test.txt"
-        def updatedTestDBQueryCmd = [
-            "trt-test-db",
-            "-d",
-            testDBPath,
-            "--context",
-            testContext,
-            "--test-names",
-            "--output",
-            defaultTestList,
-            "--match-exact",
-            "'${updatedMakoOptsJson}'"
-        ].join(" ")
-        sh(label: "Render default test list from test-db", script: updatedTestDBQueryCmd)
-        def linesToRemove = readFile(defaultTestList).readLines().collect { it.trim() }.toSet()
-        def updatedLines = readFile(testList).readLines().findAll { line ->
-            !linesToRemove.contains(line.trim())
-        }
-        def contentToWrite = updatedLines.join('\n')
-        sh "echo \"${contentToWrite}\" > ${testList}"
-    }
    sh(script: "cat ${testList}")

    return testList
@ -1013,59 +1001,63 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
 {
    def dockerArgs = "-v /mnt/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
    turtleConfigs = [
-        "DGX_H100-4_GPUs-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 4, 4],
-        "DGX_H100-4_GPUs-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 4, 4],
-        "DGX_H100-4_GPUs-3": ["dgx-h100-x4", "l0_dgx_h100", 3, 4, 4],
-        "DGX_H100-4_GPUs-4": ["dgx-h100-x4", "l0_dgx_h100", 4, 4, 4],
-        "A10-1": ["a10", "l0_a10", 1, 8],
-        "A10-2": ["a10", "l0_a10", 2, 8],
-        "A10-3": ["a10", "l0_a10", 3, 8],
-        "A10-4": ["a10", "l0_a10", 4, 8],
-        "A10-5": ["a10", "l0_a10", 5, 8],
-        "A10-6": ["a10", "l0_a10", 6, 8],
-        "A10-7": ["a10", "l0_a10", 7, 8],
-        "A10-8": ["a10", "l0_a10", 8, 8],
-        "A30-1": ["a30", "l0_a30", 1, 8],
-        "A30-2": ["a30", "l0_a30", 2, 8],
-        "A30-3": ["a30", "l0_a30", 3, 8],
-        "A30-4": ["a30", "l0_a30", 4, 8],
-        "A30-5": ["a30", "l0_a30", 5, 8],
-        "A30-6": ["a30", "l0_a30", 6, 8],
-        "A30-7": ["a30", "l0_a30", 7, 8],
-        "A30-8": ["a30", "l0_a30", 8, 8],
-        "A100X-1": ["a100x", "l0_a100", 1, 4],
-        "A100X-2": ["a100x", "l0_a100", 2, 4],
-        "A100X-3": ["a100x", "l0_a100", 3, 4],
-        "A100X-4": ["a100x", "l0_a100", 4, 4],
-        "L40S-1": ["l40s", "l0_l40s", 1, 4],
-        "L40S-2": ["l40s", "l0_l40s", 2, 4],
-        "L40S-3": ["l40s", "l0_l40s", 3, 4],
-        "L40S-4": ["l40s", "l0_l40s", 4, 4],
-        "H100_PCIe-1": ["h100-cr", "l0_h100", 1, 7],
-        "H100_PCIe-2": ["h100-cr", "l0_h100", 2, 7],
-        "H100_PCIe-3": ["h100-cr", "l0_h100", 3, 7],
-        "H100_PCIe-4": ["h100-cr", "l0_h100", 4, 7],
-        "H100_PCIe-5": ["h100-cr", "l0_h100", 5, 7],
-        "H100_PCIe-6": ["h100-cr", "l0_h100", 6, 7],
-        "H100_PCIe-7": ["h100-cr", "l0_h100", 7, 7],
-        "B200_PCIe-1": ["b100-ts2", "l0_b200", 1, 2],
-        "B200_PCIe-2": ["b100-ts2", "l0_b200", 2, 2],
+        "DGX_H100-4_GPUs-PyTorch-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-TensorRT-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4],
+        "DGX_H100-4_GPUs-TensorRT-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
+        "A10-PyTorch-1": ["a10", "l0_a10", 1, 1],
+        "A10-CPP-1": ["a10", "l0_a10", 1, 1],
+        "A10-TensorRT-1": ["a10", "l0_a10", 1, 6],
+        "A10-TensorRT-2": ["a10", "l0_a10", 2, 6],
+        "A10-TensorRT-3": ["a10", "l0_a10", 3, 6],
+        "A10-TensorRT-4": ["a10", "l0_a10", 4, 6],
+        "A10-TensorRT-5": ["a10", "l0_a10", 5, 6],
+        "A10-TensorRT-6": ["a10", "l0_a10", 6, 6],
+        "A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
+        "A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
+        "A30-CPP-1": ["a30", "l0_a30", 1, 2],
+        "A30-CPP-2": ["a30", "l0_a30", 2, 2],
+        "A30-TensorRT-1": ["a30", "l0_a30", 1, 4],
+        "A30-TensorRT-2": ["a30", "l0_a30", 2, 4],
+        "A30-TensorRT-3": ["a30", "l0_a30", 3, 4],
+        "A30-TensorRT-4": ["a30", "l0_a30", 4, 4],
+        "A100X-TensorRT-1": ["a100x", "l0_a100", 1, 4],
+        "A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4],
+        "A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4],
+        "A100X-TensorRT-4": ["a100x", "l0_a100", 4, 4],
+        "L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 1],
+        "L40S-TensorRT-1": ["l40s", "l0_l40s", 1, 3],
+        "L40S-TensorRT-2": ["l40s", "l0_l40s", 2, 3],
+        "L40S-TensorRT-3": ["l40s", "l0_l40s", 3, 3],
+        "H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 2],
+        "H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 2],
+        "H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 1],
+        "H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 5],
+        "H100_PCIe-TensorRT-2": ["h100-cr", "l0_h100", 2, 5],
+        "H100_PCIe-TensorRT-3": ["h100-cr", "l0_h100", 3, 5],
+        "H100_PCIe-TensorRT-4": ["h100-cr", "l0_h100", 4, 5],
+        "H100_PCIe-TensorRT-5": ["h100-cr", "l0_h100", 5, 5],
+        "B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 2],
+        "B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
+        "B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2],
+        "B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2],
        // Currently post-merge test stages only run tests with "stage: post_merge" mako
        // in the test-db. This behavior may change in the future.
-        "A10-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
-        "A10-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
-        "A30-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
-        "A30-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
-        "A100X-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
-        "A100X-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
-        "L40S-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
-        "L40S-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
-        "H100_PCIe-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 3],
-        "H100_PCIe-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 3],
-        "H100_PCIe-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 3],
-        "DGX_H100-4_GPUs-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
-        "A100_80GB_PCIE-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
-        "H100_PCIe-Perf": ["h100-cr", "l0_perf", 1, 1],
+        "A10-TensorRT-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
+        "A10-TensorRT-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
+        "A30-TensorRT-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
+        "A30-TensorRT-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
+        "A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
+        "A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
+        "L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
+        "L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
+        "H100_PCIe-CPP-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1],
+        "H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
+        "H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
+        "DGX_H100-4_GPUs-PyTorch-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-TensorRT-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "A100_80GB_PCIE-TensorRT-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
+        "H100_PCIe-TensorRT-Perf": ["h100-cr", "l0_perf", 1, 1],
    ]

    parallelJobs = turtleConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
@ -1119,7 +1111,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
    }]]}

    sanityCheckConfigs = [
-        "pytorch": [
+        "DLFW": [
            LLM_DOCKER_IMAGE,
            "B200_PCIe",
            X86_64_TRIPLE,
@ -1151,7 +1143,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)

    if (env.targetArch == AARCH64_TRIPLE) {
        sanityCheckConfigs = [
-            "pytorch": [
+            "DLFW": [
                LLM_DOCKER_IMAGE,
                "GH200",
                AARCH64_TRIPLE,
@ -1163,7 +1155,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
        ]
    }

-    fullSet += [toStageName("GH200", "pytorch")]
+    fullSet += [toStageName("GH200", "DLFW")]

    sanityCheckJobs = sanityCheckConfigs.collectEntries {key, values -> [toStageName(values[1], key), {
        cacheErrorAndUploadResult(toStageName(values[1], key), {
@ -1319,6 +1311,12 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
        println parallelJobsFiltered.keySet()
    }

+    if (testFilter[(ONLY_PYTORCH_FILE_CHANGED)]) {
+        echo "ONLY_PYTORCH_FILE_CHANGED mode is true."
+        parallelJobsFiltered = parallelJobsFiltered.findAll { !it.key.contains("-CPP-") && !it.key.contains("-TensorRT-") }
+        println parallelJobsFiltered.keySet()
+    }
+
    // Check --stage-list, only run the stages in stage-list.
    if (testFilter[TEST_STAGE_LIST] != null) {
        echo "Use TEST_STAGE_LIST for filtering."
--- a/tests/integration/test_lists/test-db/l0_a10.yml
+++ b/tests/integration/test_lists/test-db/l0_a10.yml
@ -9,18 +9,47 @@ l0_a10:
      gpu:
      - '*a10*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  - disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
  - disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
  - disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
  - disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a10*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
  # ------------- CPP tests ---------------
  - test_cpp.py::test_model[medusa-86]
  - test_cpp.py::test_model[redrafter-86]
  - test_cpp.py::test_model[mamba-86]
  - test_cpp.py::test_model[recurrentgemma-86]
  - test_cpp.py::test_model[eagle-86]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a10*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - unittest/trt/attention/test_gpt_attention.py -k "partition0"
  - unittest/trt/attention/test_gpt_attention.py -k "partition1"
@ -89,6 +118,7 @@ l0_a10:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: tensorrt
  tests:
  - test_e2e.py::test_mistral_e2e[use_py_session]
  - test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]
--- a/tests/integration/test_lists/test-db/l0_a100.yml
+++ b/tests/integration/test_lists/test-db/l0_a100.yml
@ -9,6 +9,9 @@ l0_a100:
      gpu:
      - '*a100*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
  tests:
  - unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others
  - unittest/llmapi/test_llm_models.py -m "part1"
@ -40,6 +43,7 @@ l0_a100:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: tensorrt
  tests:
  - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
  - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins
--- a/tests/integration/test_lists/test-db/l0_a30.yml
+++ b/tests/integration/test_lists/test-db/l0_a30.yml
@ -9,6 +9,9 @@ l0_a30:
      gpu:
      - '*a30*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  - unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
@ -21,10 +24,36 @@ l0_a30:
  - unittest/_torch/modeling -k "modeling_vila"
  - unittest/_torch/modeling -k "modeling_nemotron"
  - unittest/_torch/auto_deploy/unit/singlegpu
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a30*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
  # ------------- CPP tests ---------------
  - test_cpp.py::test_unit_tests[80]
  - test_cpp.py::test_model[gpt-80]
  - test_cpp.py::test_benchmarks[gpt-80]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a30*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - unittest/trt/model/test_nemotron_nas.py -k "not fp8"
  - unittest/trt/model/test_gpt.py -k "partition0" # 10 mins
@ -71,6 +100,7 @@ l0_a30:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: tensorrt
  tests:
  - examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
  - examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
--- a/tests/integration/test_lists/test-db/l0_b200.yml
+++ b/tests/integration/test_lists/test-db/l0_b200.yml
@ -9,6 +9,9 @@ l0_b200:
      gpu:
      - '*b100*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
@ -26,6 +29,19 @@ l0_b200:
  - unittest/_torch/multi_gpu_modeling -k "deepseek and tp1 and not nextn0"
  - unittest/_torch/auto_deploy/unit/singlegpu
  - unittest/_torch/speculative/test_eagle3.py
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*b100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4
  - accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant]
--- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
@ -9,6 +9,9 @@ l0_dgx_h100:
      gpu:
      - '*h100*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  - unittest/_torch/multi_gpu
@ -26,12 +29,38 @@ l0_dgx_h100:
  - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8]
  - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]
  - disaggregated/test_disaggregated.py::test_disaggregated_overlap_dp[DeepSeek-V3-Lite-fp8]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
  # ------------- CPP tests ---------------
  - test_cpp.py::test_multi_gpu_simple[90]
  - test_cpp.py::test_multi_gpu_t5[90]
  - test_cpp.py::test_multi_gpu_llama_executor[90]
  - test_cpp.py::test_multi_gpu_trt_gpt_real_decoder[90]
  - test_cpp.py::test_multi_gpu_disagg[90]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-disable_fp8_context_fmha]
  - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-enable_fp8_context_fmha]
@ -71,10 +100,24 @@ l0_dgx_h100:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  - unittest/_torch/auto_deploy/integration/test_ad_build.py
  - unittest/_torch/auto_deploy/integration/test_lm_eval.py
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: post_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
--- a/tests/integration/test_lists/test-db/l0_gh200.yml
+++ b/tests/integration/test_lists/test-db/l0_gh200.yml
@ -9,6 +9,9 @@ l0_gh200:
      gpu:
      - '*h200*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
  tests:
  - unittest/trt/attention/test_gpt_attention.py -k "partition0"
  - unittest/trt/attention/test_gpt_attention.py -k "partition1"
@ -31,6 +34,7 @@ l0_gh200:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: tensorrt
  tests:
  - unittest/test_model_runner_cpp.py
  - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
--- a/tests/integration/test_lists/test-db/l0_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_h100.yml
@ -9,6 +9,9 @@ l0_h100:
      gpu:
      - '*h100*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  # Only key models in H100: llama/mixtral/nemotron/deepseek
@ -22,6 +25,19 @@ l0_h100:
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales
  - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
  - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
  # ------------- CPP tests ---------------
  - test_cpp.py::test_unit_tests[90]
  - test_cpp.py::test_model[fp8-llama-90]
@ -29,6 +45,19 @@ l0_h100:
  - test_cpp.py::test_benchmarks[t5-90]
  - test_cpp.py::test_model[encoder-90]
  - test_cpp.py::test_model[enc_dec_language_adapter-90]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
  - unittest/trt/functional/test_moe.py
@ -92,10 +121,24 @@ l0_h100:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: cpp
  tests:
  # ------------- CPP tests ---------------
  - test_cpp.py::test_model[bart-90]
  - test_cpp.py::test_benchmarks[bart-90]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: post_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - examples/test_eagle.py::test_llm_eagle_1gpu[llama3.1-eagle-8b-hf_v0.5-float16-bs8] # 9 mins
  - examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct]
--- a/tests/integration/test_lists/test-db/l0_l40s.yml
+++ b/tests/integration/test_lists/test-db/l0_l40s.yml
@ -9,6 +9,9 @@ l0_l40s:
      gpu:
      - '*l40s*'
      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
  tests:
  # ------------- PyTorch tests ---------------
  - unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
@ -25,6 +28,19 @@ l0_l40s:
  - test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image]
  - test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video]
  - test_e2e.py::test_ptp_quickstart_bert[BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*l40s*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
  # ------------- TRT tests ---------------
  - unittest/trt/attention/test_gpt_attention.py -k "partition0"
  - unittest/trt/attention/test_gpt_attention.py -k "partition1"
@ -67,6 +83,7 @@ l0_l40s:
      linux_distribution_name: ubuntu*
    terms:
      stage: post_merge
+      backend: tensorrt
  tests:
  - accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb
  - accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype
--- a/tests/integration/test_lists/test-db/l0_perf.yml
+++ b/tests/integration/test_lists/test-db/l0_perf.yml
@ -10,6 +10,9 @@ l0_perf:
          - '*a100*'
          - '*h100*'
        linux_distribution_name: ubuntu*
+      terms:
+        stage: pre_merge
+        backend: tensorrt
    tests:
      - perf/test_perf.py::test_perf[bert_base-plugin-float16-bs:32-input_len:32]
      - perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]