diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
index 061262316f..c004551590 100644
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@@ -112,6 +112,8 @@ def DISABLE_MULTI_GPU_TEST = "disable_multi_gpu_test"
 def EXTRA_STAGE_LIST = "extra_stage"
 @Field
 def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed"
+@Field
+def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed"
 
 def testFilter = [
     (REUSE_STAGE_LIST): null,
@@ -124,6 +126,7 @@ def testFilter = [
     (DISABLE_MULTI_GPU_TEST): false,
     (EXTRA_STAGE_LIST): null,
     (MULTI_GPU_FILE_CHANGED): false,
+    (ONLY_PYTORCH_FILE_CHANGED): false,
 ]
 
 String getShortenedJobName(String path)
@@ -478,7 +481,7 @@ def generateStageFailTestResultXml(stageName, subName, failureLog, resultPath) {
         </failure></testcase></testsuite></testsuites>"""
 }
 
-def getMakoOpts(getMakoScript, makoArgs="") {
+def getMakoOpts(getMakoScript, makoArgs=[]) {
     // We want to save a map for the Mako opts
     def makoOpts = [:]
     def turtleOutput = ""
@@ -492,8 +495,9 @@ def getMakoOpts(getMakoScript, makoArgs="") {
         getMakoScript,
         "--device 0"].join(" ")
 
-    if (makoArgs != "") {
-        listMakoCmd = [listMakoCmd, "--mako-opt ${makoArgs}"].join(" ")
+    if (makoArgs) {
+        def makoOptArgs = makoArgs.collect { "--mako-opt " + it }
+        listMakoCmd += " " + makoOptArgs.join(" ")
     }
     // Add the withCredentials step to access gpu-chip-mapping file
     withCredentials([file(credentialsId: 'gpu-chip-mapping', variable: 'GPU_CHIP_MAPPING')]) {
@@ -557,13 +561,29 @@ def getMakoOpts(getMakoScript, makoArgs="") {
 }
 
 def renderTestDB(testContext, llmSrc, stageName) {
-    def makoOpts = ""
     def scriptPath = "${llmSrc}/tests/integration/defs/sysinfo/get_sysinfo.py"
-    if (stageName.contains("Post-Merge")) {
-        makoOpts = getMakoOpts(scriptPath, "stage=post_merge")
+    def makoArgs = []
+    def isPostMerge = stageName.contains("Post-Merge")
+    makoArgs += [isPostMerge ? "stage=post_merge" : "stage=pre_merge"]
+    // Determine the backend type based on keywords in stageName
+    if (stageName.contains("-PyTorch-")) {
+        // If stageName contains "-PyTorch-", add "backend=pytorch" to makoArgs
+        // At this point, only tests with backend=pytorch or unspecified backend will be run
+        makoArgs += ["backend=pytorch"]
+    } else if (stageName.contains("-TensorRT-")) {
+        // If stageName contains "-TensorRT-", add "backend=tensorrt" to makoArgs
+        // At this point, only tests with backend=tensorrt or unspecified backend will be run
+        makoArgs += ["backend=tensorrt"]
+    } else if (stageName.contains("-CPP-")) {
+        // If stageName contains "-CPP-", add "backend=cpp" to makoArgs
+        // At this point, only tests with backend=cpp or unspecified backend will be run
+        makoArgs += ["backend=cpp"]
     } else {
-        makoOpts = getMakoOpts(scriptPath)
+        // If stageName does not contain "-PyTorch-", "-TensorRT-", or "-CPP-", do not add any backend
+        // At this point, all tests will be run
+        // For cases where backend is not specified in makoArgs, we will match all types of backends and tests without specified backend
     }
+    def makoOpts = getMakoOpts(scriptPath, makoArgs)
 
     sh "pip3 install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --ignore-installed trt-test-db==1.8.5+bc6df7"
     def testDBPath = "${llmSrc}/tests/integration/test_lists/test-db"
@@ -577,43 +597,11 @@ def renderTestDB(testContext, llmSrc, stageName) {
         "--test-names",
         "--output",
         testList,
-        "--match-exact",
+        "--match",
         "'${makoOpts}'"
     ].join(" ")
 
     sh(label: "Render test list from test-db", script: testDBQueryCmd)
-    if (stageName.contains("Post-Merge")){
-        // Using the "stage: post_merge" mako will contain pre-merge tests by default.
-        // But currently post-merge test stages only run post-merge tests for
-        // triaging failures efficiently. We need to remove pre-merge tests explicitly.
-        // This behavior may change in the future.
-        def jsonSlurper = new JsonSlurper()
-        def jsonMap = jsonSlurper.parseText(makoOpts)
-        if (jsonMap.containsKey('stage') && jsonMap.stage == 'post_merge') {
-            jsonMap.remove('stage')
-        }
-        def updatedMakoOptsJson = JsonOutput.toJson(jsonMap)
-        def defaultTestList = "${llmSrc}/default_test.txt"
-        def updatedTestDBQueryCmd = [
-            "trt-test-db",
-            "-d",
-            testDBPath,
-            "--context",
-            testContext,
-            "--test-names",
-            "--output",
-            defaultTestList,
-            "--match-exact",
-            "'${updatedMakoOptsJson}'"
-        ].join(" ")
-        sh(label: "Render default test list from test-db", script: updatedTestDBQueryCmd)
-        def linesToRemove = readFile(defaultTestList).readLines().collect { it.trim() }.toSet()
-        def updatedLines = readFile(testList).readLines().findAll { line ->
-            !linesToRemove.contains(line.trim())
-        }
-        def contentToWrite = updatedLines.join('\n')
-        sh "echo \"${contentToWrite}\" > ${testList}"
-    }
     sh(script: "cat ${testList}")
 
     return testList
@@ -1013,59 +1001,63 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
 {
     def dockerArgs = "-v /mnt/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
     turtleConfigs = [
-        "DGX_H100-4_GPUs-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 4, 4],
-        "DGX_H100-4_GPUs-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 4, 4],
-        "DGX_H100-4_GPUs-3": ["dgx-h100-x4", "l0_dgx_h100", 3, 4, 4],
-        "DGX_H100-4_GPUs-4": ["dgx-h100-x4", "l0_dgx_h100", 4, 4, 4],
-        "A10-1": ["a10", "l0_a10", 1, 8],
-        "A10-2": ["a10", "l0_a10", 2, 8],
-        "A10-3": ["a10", "l0_a10", 3, 8],
-        "A10-4": ["a10", "l0_a10", 4, 8],
-        "A10-5": ["a10", "l0_a10", 5, 8],
-        "A10-6": ["a10", "l0_a10", 6, 8],
-        "A10-7": ["a10", "l0_a10", 7, 8],
-        "A10-8": ["a10", "l0_a10", 8, 8],
-        "A30-1": ["a30", "l0_a30", 1, 8],
-        "A30-2": ["a30", "l0_a30", 2, 8],
-        "A30-3": ["a30", "l0_a30", 3, 8],
-        "A30-4": ["a30", "l0_a30", 4, 8],
-        "A30-5": ["a30", "l0_a30", 5, 8],
-        "A30-6": ["a30", "l0_a30", 6, 8],
-        "A30-7": ["a30", "l0_a30", 7, 8],
-        "A30-8": ["a30", "l0_a30", 8, 8],
-        "A100X-1": ["a100x", "l0_a100", 1, 4],
-        "A100X-2": ["a100x", "l0_a100", 2, 4],
-        "A100X-3": ["a100x", "l0_a100", 3, 4],
-        "A100X-4": ["a100x", "l0_a100", 4, 4],
-        "L40S-1": ["l40s", "l0_l40s", 1, 4],
-        "L40S-2": ["l40s", "l0_l40s", 2, 4],
-        "L40S-3": ["l40s", "l0_l40s", 3, 4],
-        "L40S-4": ["l40s", "l0_l40s", 4, 4],
-        "H100_PCIe-1": ["h100-cr", "l0_h100", 1, 7],
-        "H100_PCIe-2": ["h100-cr", "l0_h100", 2, 7],
-        "H100_PCIe-3": ["h100-cr", "l0_h100", 3, 7],
-        "H100_PCIe-4": ["h100-cr", "l0_h100", 4, 7],
-        "H100_PCIe-5": ["h100-cr", "l0_h100", 5, 7],
-        "H100_PCIe-6": ["h100-cr", "l0_h100", 6, 7],
-        "H100_PCIe-7": ["h100-cr", "l0_h100", 7, 7],
-        "B200_PCIe-1": ["b100-ts2", "l0_b200", 1, 2],
-        "B200_PCIe-2": ["b100-ts2", "l0_b200", 2, 2],
+        "DGX_H100-4_GPUs-PyTorch-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-TensorRT-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4],
+        "DGX_H100-4_GPUs-TensorRT-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
+        "A10-PyTorch-1": ["a10", "l0_a10", 1, 1],
+        "A10-CPP-1": ["a10", "l0_a10", 1, 1],
+        "A10-TensorRT-1": ["a10", "l0_a10", 1, 6],
+        "A10-TensorRT-2": ["a10", "l0_a10", 2, 6],
+        "A10-TensorRT-3": ["a10", "l0_a10", 3, 6],
+        "A10-TensorRT-4": ["a10", "l0_a10", 4, 6],
+        "A10-TensorRT-5": ["a10", "l0_a10", 5, 6],
+        "A10-TensorRT-6": ["a10", "l0_a10", 6, 6],
+        "A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
+        "A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
+        "A30-CPP-1": ["a30", "l0_a30", 1, 2],
+        "A30-CPP-2": ["a30", "l0_a30", 2, 2],
+        "A30-TensorRT-1": ["a30", "l0_a30", 1, 4],
+        "A30-TensorRT-2": ["a30", "l0_a30", 2, 4],
+        "A30-TensorRT-3": ["a30", "l0_a30", 3, 4],
+        "A30-TensorRT-4": ["a30", "l0_a30", 4, 4],
+        "A100X-TensorRT-1": ["a100x", "l0_a100", 1, 4],
+        "A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4],
+        "A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4],
+        "A100X-TensorRT-4": ["a100x", "l0_a100", 4, 4],
+        "L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 1],
+        "L40S-TensorRT-1": ["l40s", "l0_l40s", 1, 3],
+        "L40S-TensorRT-2": ["l40s", "l0_l40s", 2, 3],
+        "L40S-TensorRT-3": ["l40s", "l0_l40s", 3, 3],
+        "H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 2],
+        "H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 2],
+        "H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 1],
+        "H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 5],
+        "H100_PCIe-TensorRT-2": ["h100-cr", "l0_h100", 2, 5],
+        "H100_PCIe-TensorRT-3": ["h100-cr", "l0_h100", 3, 5],
+        "H100_PCIe-TensorRT-4": ["h100-cr", "l0_h100", 4, 5],
+        "H100_PCIe-TensorRT-5": ["h100-cr", "l0_h100", 5, 5],
+        "B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 2],
+        "B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
+        "B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2],
+        "B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2],
         // Currently post-merge test stages only run tests with "stage: post_merge" mako
         // in the test-db. This behavior may change in the future.
-        "A10-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
-        "A10-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
-        "A30-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
-        "A30-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
-        "A100X-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
-        "A100X-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
-        "L40S-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
-        "L40S-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
-        "H100_PCIe-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 3],
-        "H100_PCIe-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 3],
-        "H100_PCIe-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 3],
-        "DGX_H100-4_GPUs-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
-        "A100_80GB_PCIE-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
-        "H100_PCIe-Perf": ["h100-cr", "l0_perf", 1, 1],
+        "A10-TensorRT-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
+        "A10-TensorRT-[Post-Merge]-2": ["a10", "l0_a10", 2, 2],
+        "A30-TensorRT-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
+        "A30-TensorRT-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
+        "A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
+        "A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
+        "L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2],
+        "L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2],
+        "H100_PCIe-CPP-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1],
+        "H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
+        "H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
+        "DGX_H100-4_GPUs-PyTorch-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-TensorRT-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
+        "A100_80GB_PCIE-TensorRT-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1],
+        "H100_PCIe-TensorRT-Perf": ["h100-cr", "l0_perf", 1, 1],
     ]
 
     parallelJobs = turtleConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
@@ -1119,7 +1111,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
     }]]}
 
     sanityCheckConfigs = [
-        "pytorch": [
+        "DLFW": [
             LLM_DOCKER_IMAGE,
             "B200_PCIe",
             X86_64_TRIPLE,
@@ -1151,7 +1143,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
 
     if (env.targetArch == AARCH64_TRIPLE) {
         sanityCheckConfigs = [
-            "pytorch": [
+            "DLFW": [
                 LLM_DOCKER_IMAGE,
                 "GH200",
                 AARCH64_TRIPLE,
@@ -1163,7 +1155,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
         ]
     }
 
-    fullSet += [toStageName("GH200", "pytorch")]
+    fullSet += [toStageName("GH200", "DLFW")]
 
     sanityCheckJobs = sanityCheckConfigs.collectEntries {key, values -> [toStageName(values[1], key), {
         cacheErrorAndUploadResult(toStageName(values[1], key), {
@@ -1319,6 +1311,12 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
         println parallelJobsFiltered.keySet()
     }
 
+    if (testFilter[(ONLY_PYTORCH_FILE_CHANGED)]) {
+        echo "ONLY_PYTORCH_FILE_CHANGED mode is true."
+        parallelJobsFiltered = parallelJobsFiltered.findAll { !it.key.contains("-CPP-") && !it.key.contains("-TensorRT-") }
+        println parallelJobsFiltered.keySet()
+    }
+
     // Check --stage-list, only run the stages in stage-list.
     if (testFilter[TEST_STAGE_LIST] != null) {
         echo "Use TEST_STAGE_LIST for filtering."
diff --git a/tests/integration/test_lists/test-db/l0_a10.yml b/tests/integration/test_lists/test-db/l0_a10.yml
index 4de92d0d06..d9b818f96c 100644
--- a/tests/integration/test_lists/test-db/l0_a10.yml
+++ b/tests/integration/test_lists/test-db/l0_a10.yml
@@ -9,18 +9,47 @@ l0_a10:
       gpu:
       - '*a10*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   - disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
   - disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
   - disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
   - disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a10*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
   # ------------- CPP tests ---------------
   - test_cpp.py::test_model[medusa-86]
   - test_cpp.py::test_model[redrafter-86]
   - test_cpp.py::test_model[mamba-86]
   - test_cpp.py::test_model[recurrentgemma-86]
   - test_cpp.py::test_model[eagle-86]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a10*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - unittest/trt/attention/test_gpt_attention.py -k "partition0"
   - unittest/trt/attention/test_gpt_attention.py -k "partition1"
@@ -89,6 +118,7 @@ l0_a10:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: tensorrt
   tests:
   - test_e2e.py::test_mistral_e2e[use_py_session]
   - test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]
diff --git a/tests/integration/test_lists/test-db/l0_a100.yml b/tests/integration/test_lists/test-db/l0_a100.yml
index f31b81f0d4..f4e87b7771 100644
--- a/tests/integration/test_lists/test-db/l0_a100.yml
+++ b/tests/integration/test_lists/test-db/l0_a100.yml
@@ -9,6 +9,9 @@ l0_a100:
       gpu:
       - '*a100*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
   tests:
   - unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others
   - unittest/llmapi/test_llm_models.py -m "part1"
@@ -40,6 +43,7 @@ l0_a100:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: tensorrt
   tests:
   - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
   - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins
diff --git a/tests/integration/test_lists/test-db/l0_a30.yml b/tests/integration/test_lists/test-db/l0_a30.yml
index a8f4331206..6765e9ccd2 100644
--- a/tests/integration/test_lists/test-db/l0_a30.yml
+++ b/tests/integration/test_lists/test-db/l0_a30.yml
@@ -9,6 +9,9 @@ l0_a30:
       gpu:
       - '*a30*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   - unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
@@ -21,10 +24,36 @@ l0_a30:
   - unittest/_torch/modeling -k "modeling_vila"
   - unittest/_torch/modeling -k "modeling_nemotron"
   - unittest/_torch/auto_deploy/unit/singlegpu
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a30*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
   # ------------- CPP tests ---------------
   - test_cpp.py::test_unit_tests[80]
   - test_cpp.py::test_model[gpt-80]
   - test_cpp.py::test_benchmarks[gpt-80]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a30*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - unittest/trt/model/test_nemotron_nas.py -k "not fp8"
   - unittest/trt/model/test_gpt.py -k "partition0" # 10 mins
@@ -71,6 +100,7 @@ l0_a30:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: tensorrt
   tests:
   - examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
   - examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]
diff --git a/tests/integration/test_lists/test-db/l0_b200.yml b/tests/integration/test_lists/test-db/l0_b200.yml
index 1c644daaae..62a88d3016 100644
--- a/tests/integration/test_lists/test-db/l0_b200.yml
+++ b/tests/integration/test_lists/test-db/l0_b200.yml
@@ -9,6 +9,9 @@ l0_b200:
       gpu:
       - '*b100*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
@@ -26,6 +29,19 @@ l0_b200:
   - unittest/_torch/multi_gpu_modeling -k "deepseek and tp1 and not nextn0"
   - unittest/_torch/auto_deploy/unit/singlegpu
   - unittest/_torch/speculative/test_eagle3.py
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*b100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4
   - accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant]
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
index 92ef60fdb5..666d293fc6 100644
--- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
@@ -9,6 +9,9 @@ l0_dgx_h100:
       gpu:
       - '*h100*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   - unittest/_torch/multi_gpu
@@ -26,12 +29,38 @@ l0_dgx_h100:
   - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8]
   - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]
   - disaggregated/test_disaggregated.py::test_disaggregated_overlap_dp[DeepSeek-V3-Lite-fp8]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
   # ------------- CPP tests ---------------
   - test_cpp.py::test_multi_gpu_simple[90]
   - test_cpp.py::test_multi_gpu_t5[90]
   - test_cpp.py::test_multi_gpu_llama_executor[90]
   - test_cpp.py::test_multi_gpu_trt_gpt_real_decoder[90]
   - test_cpp.py::test_multi_gpu_disagg[90]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-disable_fp8_context_fmha]
   - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-enable_fp8_context_fmha]
@@ -71,10 +100,24 @@ l0_dgx_h100:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   - unittest/_torch/auto_deploy/integration/test_ad_build.py
   - unittest/_torch/auto_deploy/integration/test_lm_eval.py
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: post_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
   - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
diff --git a/tests/integration/test_lists/test-db/l0_gh200.yml b/tests/integration/test_lists/test-db/l0_gh200.yml
index 79dcca3999..7e515d37bd 100644
--- a/tests/integration/test_lists/test-db/l0_gh200.yml
+++ b/tests/integration/test_lists/test-db/l0_gh200.yml
@@ -9,6 +9,9 @@ l0_gh200:
       gpu:
       - '*h200*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
   tests:
   - unittest/trt/attention/test_gpt_attention.py -k "partition0"
   - unittest/trt/attention/test_gpt_attention.py -k "partition1"
@@ -31,6 +34,7 @@ l0_gh200:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: tensorrt
   tests:
   - unittest/test_model_runner_cpp.py
   - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
index d6244c9808..f48b27cc78 100644
--- a/tests/integration/test_lists/test-db/l0_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -9,6 +9,9 @@ l0_h100:
       gpu:
       - '*h100*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   # Only key models in H100: llama/mixtral/nemotron/deepseek
@@ -22,6 +25,19 @@ l0_h100:
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales
   - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
   - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: cpp
+  tests:
   # ------------- CPP tests ---------------
   - test_cpp.py::test_unit_tests[90]
   - test_cpp.py::test_model[fp8-llama-90]
@@ -29,6 +45,19 @@ l0_h100:
   - test_cpp.py::test_benchmarks[t5-90]
   - test_cpp.py::test_model[encoder-90]
   - test_cpp.py::test_model[enc_dec_language_adapter-90]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
   - unittest/trt/functional/test_moe.py
@@ -92,10 +121,24 @@ l0_h100:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: cpp
   tests:
   # ------------- CPP tests ---------------
   - test_cpp.py::test_model[bart-90]
   - test_cpp.py::test_benchmarks[bart-90]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: post_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - examples/test_eagle.py::test_llm_eagle_1gpu[llama3.1-eagle-8b-hf_v0.5-float16-bs8] # 9 mins
   - examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct]
diff --git a/tests/integration/test_lists/test-db/l0_l40s.yml b/tests/integration/test_lists/test-db/l0_l40s.yml
index 4ab85a0f06..22cdba9f8b 100644
--- a/tests/integration/test_lists/test-db/l0_l40s.yml
+++ b/tests/integration/test_lists/test-db/l0_l40s.yml
@@ -9,6 +9,9 @@ l0_l40s:
       gpu:
       - '*l40s*'
       linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
   tests:
   # ------------- PyTorch tests ---------------
   - unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
@@ -25,6 +28,19 @@ l0_l40s:
   - test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image]
   - test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video]
   - test_e2e.py::test_ptp_quickstart_bert[BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*l40s*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: tensorrt
+  tests:
   # ------------- TRT tests ---------------
   - unittest/trt/attention/test_gpt_attention.py -k "partition0"
   - unittest/trt/attention/test_gpt_attention.py -k "partition1"
@@ -67,6 +83,7 @@ l0_l40s:
       linux_distribution_name: ubuntu*
     terms:
       stage: post_merge
+      backend: tensorrt
   tests:
   - accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb
   - accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype
diff --git a/tests/integration/test_lists/test-db/l0_perf.yml b/tests/integration/test_lists/test-db/l0_perf.yml
index 40c0ae4cee..6f6308fb96 100644
--- a/tests/integration/test_lists/test-db/l0_perf.yml
+++ b/tests/integration/test_lists/test-db/l0_perf.yml
@@ -10,6 +10,9 @@ l0_perf:
           - '*a100*'
           - '*h100*'
         linux_distribution_name: ubuntu*
+      terms:
+        stage: pre_merge
+        backend: tensorrt
     tests:
       - perf/test_perf.py::test_perf[bert_base-plugin-float16-bs:32-input_len:32]
       - perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]