diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 061262316f..c004551590 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -112,6 +112,8 @@ def DISABLE_MULTI_GPU_TEST = "disable_multi_gpu_test" def EXTRA_STAGE_LIST = "extra_stage" @Field def MULTI_GPU_FILE_CHANGED = "multi_gpu_file_changed" +@Field +def ONLY_PYTORCH_FILE_CHANGED = "only_pytorch_file_changed" def testFilter = [ (REUSE_STAGE_LIST): null, @@ -124,6 +126,7 @@ def testFilter = [ (DISABLE_MULTI_GPU_TEST): false, (EXTRA_STAGE_LIST): null, (MULTI_GPU_FILE_CHANGED): false, + (ONLY_PYTORCH_FILE_CHANGED): false, ] String getShortenedJobName(String path) @@ -478,7 +481,7 @@ def generateStageFailTestResultXml(stageName, subName, failureLog, resultPath) { """ } -def getMakoOpts(getMakoScript, makoArgs="") { +def getMakoOpts(getMakoScript, makoArgs=[]) { // We want to save a map for the Mako opts def makoOpts = [:] def turtleOutput = "" @@ -492,8 +495,9 @@ def getMakoOpts(getMakoScript, makoArgs="") { getMakoScript, "--device 0"].join(" ") - if (makoArgs != "") { - listMakoCmd = [listMakoCmd, "--mako-opt ${makoArgs}"].join(" ") + if (makoArgs) { + def makoOptArgs = makoArgs.collect { "--mako-opt " + it } + listMakoCmd += " " + makoOptArgs.join(" ") } // Add the withCredentials step to access gpu-chip-mapping file withCredentials([file(credentialsId: 'gpu-chip-mapping', variable: 'GPU_CHIP_MAPPING')]) { @@ -557,13 +561,29 @@ def getMakoOpts(getMakoScript, makoArgs="") { } def renderTestDB(testContext, llmSrc, stageName) { - def makoOpts = "" def scriptPath = "${llmSrc}/tests/integration/defs/sysinfo/get_sysinfo.py" - if (stageName.contains("Post-Merge")) { - makoOpts = getMakoOpts(scriptPath, "stage=post_merge") + def makoArgs = [] + def isPostMerge = stageName.contains("Post-Merge") + makoArgs += [isPostMerge ? "stage=post_merge" : "stage=pre_merge"] + // Determine the backend type based on keywords in stageName + if (stageName.contains("-PyTorch-")) { + // If stageName contains "-PyTorch-", add "backend=pytorch" to makoArgs + // At this point, only tests with backend=pytorch or unspecified backend will be run + makoArgs += ["backend=pytorch"] + } else if (stageName.contains("-TensorRT-")) { + // If stageName contains "-TensorRT-", add "backend=tensorrt" to makoArgs + // At this point, only tests with backend=tensorrt or unspecified backend will be run + makoArgs += ["backend=tensorrt"] + } else if (stageName.contains("-CPP-")) { + // If stageName contains "-CPP-", add "backend=cpp" to makoArgs + // At this point, only tests with backend=cpp or unspecified backend will be run + makoArgs += ["backend=cpp"] } else { - makoOpts = getMakoOpts(scriptPath) + // If stageName does not contain "-PyTorch-", "-TensorRT-", or "-CPP-", do not add any backend + // At this point, all tests will be run + // For cases where backend is not specified in makoArgs, we will match all types of backends and tests without specified backend } + def makoOpts = getMakoOpts(scriptPath, makoArgs) sh "pip3 install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --ignore-installed trt-test-db==1.8.5+bc6df7" def testDBPath = "${llmSrc}/tests/integration/test_lists/test-db" @@ -577,43 +597,11 @@ def renderTestDB(testContext, llmSrc, stageName) { "--test-names", "--output", testList, - "--match-exact", + "--match", "'${makoOpts}'" ].join(" ") sh(label: "Render test list from test-db", script: testDBQueryCmd) - if (stageName.contains("Post-Merge")){ - // Using the "stage: post_merge" mako will contain pre-merge tests by default. - // But currently post-merge test stages only run post-merge tests for - // triaging failures efficiently. We need to remove pre-merge tests explicitly. - // This behavior may change in the future. - def jsonSlurper = new JsonSlurper() - def jsonMap = jsonSlurper.parseText(makoOpts) - if (jsonMap.containsKey('stage') && jsonMap.stage == 'post_merge') { - jsonMap.remove('stage') - } - def updatedMakoOptsJson = JsonOutput.toJson(jsonMap) - def defaultTestList = "${llmSrc}/default_test.txt" - def updatedTestDBQueryCmd = [ - "trt-test-db", - "-d", - testDBPath, - "--context", - testContext, - "--test-names", - "--output", - defaultTestList, - "--match-exact", - "'${updatedMakoOptsJson}'" - ].join(" ") - sh(label: "Render default test list from test-db", script: updatedTestDBQueryCmd) - def linesToRemove = readFile(defaultTestList).readLines().collect { it.trim() }.toSet() - def updatedLines = readFile(testList).readLines().findAll { line -> - !linesToRemove.contains(line.trim()) - } - def contentToWrite = updatedLines.join('\n') - sh "echo \"${contentToWrite}\" > ${testList}" - } sh(script: "cat ${testList}") return testList @@ -1013,59 +1001,63 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) { def dockerArgs = "-v /mnt/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog" turtleConfigs = [ - "DGX_H100-4_GPUs-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 4, 4], - "DGX_H100-4_GPUs-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 4, 4], - "DGX_H100-4_GPUs-3": ["dgx-h100-x4", "l0_dgx_h100", 3, 4, 4], - "DGX_H100-4_GPUs-4": ["dgx-h100-x4", "l0_dgx_h100", 4, 4, 4], - "A10-1": ["a10", "l0_a10", 1, 8], - "A10-2": ["a10", "l0_a10", 2, 8], - "A10-3": ["a10", "l0_a10", 3, 8], - "A10-4": ["a10", "l0_a10", 4, 8], - "A10-5": ["a10", "l0_a10", 5, 8], - "A10-6": ["a10", "l0_a10", 6, 8], - "A10-7": ["a10", "l0_a10", 7, 8], - "A10-8": ["a10", "l0_a10", 8, 8], - "A30-1": ["a30", "l0_a30", 1, 8], - "A30-2": ["a30", "l0_a30", 2, 8], - "A30-3": ["a30", "l0_a30", 3, 8], - "A30-4": ["a30", "l0_a30", 4, 8], - "A30-5": ["a30", "l0_a30", 5, 8], - "A30-6": ["a30", "l0_a30", 6, 8], - "A30-7": ["a30", "l0_a30", 7, 8], - "A30-8": ["a30", "l0_a30", 8, 8], - "A100X-1": ["a100x", "l0_a100", 1, 4], - "A100X-2": ["a100x", "l0_a100", 2, 4], - "A100X-3": ["a100x", "l0_a100", 3, 4], - "A100X-4": ["a100x", "l0_a100", 4, 4], - "L40S-1": ["l40s", "l0_l40s", 1, 4], - "L40S-2": ["l40s", "l0_l40s", 2, 4], - "L40S-3": ["l40s", "l0_l40s", 3, 4], - "L40S-4": ["l40s", "l0_l40s", 4, 4], - "H100_PCIe-1": ["h100-cr", "l0_h100", 1, 7], - "H100_PCIe-2": ["h100-cr", "l0_h100", 2, 7], - "H100_PCIe-3": ["h100-cr", "l0_h100", 3, 7], - "H100_PCIe-4": ["h100-cr", "l0_h100", 4, 7], - "H100_PCIe-5": ["h100-cr", "l0_h100", 5, 7], - "H100_PCIe-6": ["h100-cr", "l0_h100", 6, 7], - "H100_PCIe-7": ["h100-cr", "l0_h100", 7, 7], - "B200_PCIe-1": ["b100-ts2", "l0_b200", 1, 2], - "B200_PCIe-2": ["b100-ts2", "l0_b200", 2, 2], + "DGX_H100-4_GPUs-PyTorch-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], + "DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], + "DGX_H100-4_GPUs-TensorRT-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4], + "DGX_H100-4_GPUs-TensorRT-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4], + "A10-PyTorch-1": ["a10", "l0_a10", 1, 1], + "A10-CPP-1": ["a10", "l0_a10", 1, 1], + "A10-TensorRT-1": ["a10", "l0_a10", 1, 6], + "A10-TensorRT-2": ["a10", "l0_a10", 2, 6], + "A10-TensorRT-3": ["a10", "l0_a10", 3, 6], + "A10-TensorRT-4": ["a10", "l0_a10", 4, 6], + "A10-TensorRT-5": ["a10", "l0_a10", 5, 6], + "A10-TensorRT-6": ["a10", "l0_a10", 6, 6], + "A30-PyTorch-1": ["a30", "l0_a30", 1, 2], + "A30-PyTorch-2": ["a30", "l0_a30", 2, 2], + "A30-CPP-1": ["a30", "l0_a30", 1, 2], + "A30-CPP-2": ["a30", "l0_a30", 2, 2], + "A30-TensorRT-1": ["a30", "l0_a30", 1, 4], + "A30-TensorRT-2": ["a30", "l0_a30", 2, 4], + "A30-TensorRT-3": ["a30", "l0_a30", 3, 4], + "A30-TensorRT-4": ["a30", "l0_a30", 4, 4], + "A100X-TensorRT-1": ["a100x", "l0_a100", 1, 4], + "A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4], + "A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4], + "A100X-TensorRT-4": ["a100x", "l0_a100", 4, 4], + "L40S-PyTorch-1": ["l40s", "l0_l40s", 1, 1], + "L40S-TensorRT-1": ["l40s", "l0_l40s", 1, 3], + "L40S-TensorRT-2": ["l40s", "l0_l40s", 2, 3], + "L40S-TensorRT-3": ["l40s", "l0_l40s", 3, 3], + "H100_PCIe-PyTorch-1": ["h100-cr", "l0_h100", 1, 2], + "H100_PCIe-PyTorch-2": ["h100-cr", "l0_h100", 2, 2], + "H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 1], + "H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 5], + "H100_PCIe-TensorRT-2": ["h100-cr", "l0_h100", 2, 5], + "H100_PCIe-TensorRT-3": ["h100-cr", "l0_h100", 3, 5], + "H100_PCIe-TensorRT-4": ["h100-cr", "l0_h100", 4, 5], + "H100_PCIe-TensorRT-5": ["h100-cr", "l0_h100", 5, 5], + "B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 2], + "B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2], + "B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2], + "B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2], // Currently post-merge test stages only run tests with "stage: post_merge" mako // in the test-db. This behavior may change in the future. - "A10-[Post-Merge]-1": ["a10", "l0_a10", 1, 2], - "A10-[Post-Merge]-2": ["a10", "l0_a10", 2, 2], - "A30-[Post-Merge]-1": ["a30", "l0_a30", 1, 2], - "A30-[Post-Merge]-2": ["a30", "l0_a30", 2, 2], - "A100X-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2], - "A100X-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2], - "L40S-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2], - "L40S-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2], - "H100_PCIe-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 3], - "H100_PCIe-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 3], - "H100_PCIe-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 3], - "DGX_H100-4_GPUs-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], - "A100_80GB_PCIE-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1], - "H100_PCIe-Perf": ["h100-cr", "l0_perf", 1, 1], + "A10-TensorRT-[Post-Merge]-1": ["a10", "l0_a10", 1, 2], + "A10-TensorRT-[Post-Merge]-2": ["a10", "l0_a10", 2, 2], + "A30-TensorRT-[Post-Merge]-1": ["a30", "l0_a30", 1, 2], + "A30-TensorRT-[Post-Merge]-2": ["a30", "l0_a30", 2, 2], + "A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2], + "A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2], + "L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 2], + "L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 2], + "H100_PCIe-CPP-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 1], + "H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2], + "H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2], + "DGX_H100-4_GPUs-PyTorch-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], + "DGX_H100-4_GPUs-TensorRT-[Post-Merge]": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], + "A100_80GB_PCIE-TensorRT-Perf": ["a100-80gb-pcie", "l0_perf", 1, 1], + "H100_PCIe-TensorRT-Perf": ["h100-cr", "l0_perf", 1, 1], ] parallelJobs = turtleConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), { @@ -1119,7 +1111,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) }]]} sanityCheckConfigs = [ - "pytorch": [ + "DLFW": [ LLM_DOCKER_IMAGE, "B200_PCIe", X86_64_TRIPLE, @@ -1151,7 +1143,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) if (env.targetArch == AARCH64_TRIPLE) { sanityCheckConfigs = [ - "pytorch": [ + "DLFW": [ LLM_DOCKER_IMAGE, "GH200", AARCH64_TRIPLE, @@ -1163,7 +1155,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) ] } - fullSet += [toStageName("GH200", "pytorch")] + fullSet += [toStageName("GH200", "DLFW")] sanityCheckJobs = sanityCheckConfigs.collectEntries {key, values -> [toStageName(values[1], key), { cacheErrorAndUploadResult(toStageName(values[1], key), { @@ -1319,6 +1311,12 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) println parallelJobsFiltered.keySet() } + if (testFilter[(ONLY_PYTORCH_FILE_CHANGED)]) { + echo "ONLY_PYTORCH_FILE_CHANGED mode is true." + parallelJobsFiltered = parallelJobsFiltered.findAll { !it.key.contains("-CPP-") && !it.key.contains("-TensorRT-") } + println parallelJobsFiltered.keySet() + } + // Check --stage-list, only run the stages in stage-list. if (testFilter[TEST_STAGE_LIST] != null) { echo "Use TEST_STAGE_LIST for filtering." diff --git a/tests/integration/test_lists/test-db/l0_a10.yml b/tests/integration/test_lists/test-db/l0_a10.yml index 4de92d0d06..d9b818f96c 100644 --- a/tests/integration/test_lists/test-db/l0_a10.yml +++ b/tests/integration/test_lists/test-db/l0_a10.yml @@ -9,18 +9,47 @@ l0_a10: gpu: - '*a10*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- - disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] - disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0] - disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0] - disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*a10*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: cpp + tests: # ------------- CPP tests --------------- - test_cpp.py::test_model[medusa-86] - test_cpp.py::test_model[redrafter-86] - test_cpp.py::test_model[mamba-86] - test_cpp.py::test_model[recurrentgemma-86] - test_cpp.py::test_model[eagle-86] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*a10*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - unittest/trt/attention/test_gpt_attention.py -k "partition0" - unittest/trt/attention/test_gpt_attention.py -k "partition1" @@ -89,6 +118,7 @@ l0_a10: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: tensorrt tests: - test_e2e.py::test_mistral_e2e[use_py_session] - test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding] diff --git a/tests/integration/test_lists/test-db/l0_a100.yml b/tests/integration/test_lists/test-db/l0_a100.yml index f31b81f0d4..f4e87b7771 100644 --- a/tests/integration/test_lists/test-db/l0_a100.yml +++ b/tests/integration/test_lists/test-db/l0_a100.yml @@ -9,6 +9,9 @@ l0_a100: gpu: - '*a100*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt tests: - unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others - unittest/llmapi/test_llm_models.py -m "part1" @@ -40,6 +43,7 @@ l0_a100: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: tensorrt tests: - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins diff --git a/tests/integration/test_lists/test-db/l0_a30.yml b/tests/integration/test_lists/test-db/l0_a30.yml index a8f4331206..6765e9ccd2 100644 --- a/tests/integration/test_lists/test-db/l0_a30.yml +++ b/tests/integration/test_lists/test-db/l0_a30.yml @@ -9,6 +9,9 @@ l0_a30: gpu: - '*a30*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- - unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)" @@ -21,10 +24,36 @@ l0_a30: - unittest/_torch/modeling -k "modeling_vila" - unittest/_torch/modeling -k "modeling_nemotron" - unittest/_torch/auto_deploy/unit/singlegpu +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*a30*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: cpp + tests: # ------------- CPP tests --------------- - test_cpp.py::test_unit_tests[80] - test_cpp.py::test_model[gpt-80] - test_cpp.py::test_benchmarks[gpt-80] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*a30*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - unittest/trt/model/test_nemotron_nas.py -k "not fp8" - unittest/trt/model/test_gpt.py -k "partition0" # 10 mins @@ -71,6 +100,7 @@ l0_a30: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: tensorrt tests: - examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin] - examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin] diff --git a/tests/integration/test_lists/test-db/l0_b200.yml b/tests/integration/test_lists/test-db/l0_b200.yml index 1c644daaae..62a88d3016 100644 --- a/tests/integration/test_lists/test-db/l0_b200.yml +++ b/tests/integration/test_lists/test-db/l0_b200.yml @@ -9,6 +9,9 @@ l0_b200: gpu: - '*b100*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4 @@ -26,6 +29,19 @@ l0_b200: - unittest/_torch/multi_gpu_modeling -k "deepseek and tp1 and not nextn0" - unittest/_torch/auto_deploy/unit/singlegpu - unittest/_torch/speculative/test_eagle3.py +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*b100*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4 - accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant] diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml index 92ef60fdb5..666d293fc6 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml @@ -9,6 +9,9 @@ l0_dgx_h100: gpu: - '*h100*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- - unittest/_torch/multi_gpu @@ -26,12 +29,38 @@ l0_dgx_h100: - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8] - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8] - disaggregated/test_disaggregated.py::test_disaggregated_overlap_dp[DeepSeek-V3-Lite-fp8] +- condition: + ranges: + system_gpu_count: + gte: 4 + lte: 4 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: cpp + tests: # ------------- CPP tests --------------- - test_cpp.py::test_multi_gpu_simple[90] - test_cpp.py::test_multi_gpu_t5[90] - test_cpp.py::test_multi_gpu_llama_executor[90] - test_cpp.py::test_multi_gpu_trt_gpt_real_decoder[90] - test_cpp.py::test_multi_gpu_disagg[90] +- condition: + ranges: + system_gpu_count: + gte: 4 + lte: 4 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-disable_fp8_context_fmha] - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-enable_fp8_context_fmha] @@ -71,10 +100,24 @@ l0_dgx_h100: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- - unittest/_torch/auto_deploy/integration/test_ad_build.py - unittest/_torch/auto_deploy/integration/test_lm_eval.py +- condition: + ranges: + system_gpu_count: + gte: 4 + lte: 4 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: post_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2] diff --git a/tests/integration/test_lists/test-db/l0_gh200.yml b/tests/integration/test_lists/test-db/l0_gh200.yml index 79dcca3999..7e515d37bd 100644 --- a/tests/integration/test_lists/test-db/l0_gh200.yml +++ b/tests/integration/test_lists/test-db/l0_gh200.yml @@ -9,6 +9,9 @@ l0_gh200: gpu: - '*h200*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt tests: - unittest/trt/attention/test_gpt_attention.py -k "partition0" - unittest/trt/attention/test_gpt_attention.py -k "partition1" @@ -31,6 +34,7 @@ l0_gh200: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: tensorrt tests: - unittest/test_model_runner_cpp.py - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index d6244c9808..f48b27cc78 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -9,6 +9,9 @@ l0_h100: gpu: - '*h100*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- # Only key models in H100: llama/mixtral/nemotron/deepseek @@ -22,6 +25,19 @@ l0_h100: - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False] - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: cpp + tests: # ------------- CPP tests --------------- - test_cpp.py::test_unit_tests[90] - test_cpp.py::test_model[fp8-llama-90] @@ -29,6 +45,19 @@ l0_h100: - test_cpp.py::test_benchmarks[t5-90] - test_cpp.py::test_model[encoder-90] - test_cpp.py::test_model[enc_dec_language_adapter-90] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" - unittest/trt/functional/test_moe.py @@ -92,10 +121,24 @@ l0_h100: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: cpp tests: # ------------- CPP tests --------------- - test_cpp.py::test_model[bart-90] - test_cpp.py::test_benchmarks[bart-90] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: post_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - examples/test_eagle.py::test_llm_eagle_1gpu[llama3.1-eagle-8b-hf_v0.5-float16-bs8] # 9 mins - examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct] diff --git a/tests/integration/test_lists/test-db/l0_l40s.yml b/tests/integration/test_lists/test-db/l0_l40s.yml index 4ab85a0f06..22cdba9f8b 100644 --- a/tests/integration/test_lists/test-db/l0_l40s.yml +++ b/tests/integration/test_lists/test-db/l0_l40s.yml @@ -9,6 +9,9 @@ l0_l40s: gpu: - '*l40s*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch tests: # ------------- PyTorch tests --------------- - unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)" @@ -25,6 +28,19 @@ l0_l40s: - test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image] - test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video] - test_e2e.py::test_ptp_quickstart_bert[BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*l40s*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt + tests: # ------------- TRT tests --------------- - unittest/trt/attention/test_gpt_attention.py -k "partition0" - unittest/trt/attention/test_gpt_attention.py -k "partition1" @@ -67,6 +83,7 @@ l0_l40s: linux_distribution_name: ubuntu* terms: stage: post_merge + backend: tensorrt tests: - accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb - accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype diff --git a/tests/integration/test_lists/test-db/l0_perf.yml b/tests/integration/test_lists/test-db/l0_perf.yml index 40c0ae4cee..6f6308fb96 100644 --- a/tests/integration/test_lists/test-db/l0_perf.yml +++ b/tests/integration/test_lists/test-db/l0_perf.yml @@ -10,6 +10,9 @@ l0_perf: - '*a100*' - '*h100*' linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: tensorrt tests: - perf/test_perf.py::test_perf[bert_base-plugin-float16-bs:32-input_len:32] - perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]