From 5ca3376d6f2aa1d865d200003406869fbb3875ed Mon Sep 17 00:00:00 2001
From: Zhanrui Sun <zhanruis@nvidia.com>
Date: Fri, 5 Sep 2025 00:04:22 -0700
Subject: [PATCH] Support DLFW sanity check use CU13 image

Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
---
 jenkins/Build.groovy   | 12 ++++++++----
 jenkins/L0_Test.groovy | 37 ++++++++++++++++++++++++++++---------
 requirements.txt       |  2 +-
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy
index 1a0971099e..34be217550 100644
--- a/jenkins/Build.groovy
+++ b/jenkins/Build.groovy
@@ -34,6 +34,9 @@ def TARNAME = "tarName"
 @Field
 def WHEEL_ARCHS = "wheelArchs"
 
+@Field
+def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
+
 @Field
 def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
 
@@ -109,6 +112,7 @@ def BUILD_CONFIGS = [
     (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
     (TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
     (WHEEL_ARCHS): "90-real;100-real;120-real",
+    (BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
   ],
 ]
 
@@ -457,8 +461,10 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
         trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, "cp312")
     }
 
+    def buildJobs = buildFlags[BUILD_JOBS_FOR_CONFIG] ?: BUILD_JOBS
+
     withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
-        sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${BUILD_JOBS} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
+        sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${buildJobs} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
     }
     if (is_linux_x86_64) {
         sh "cd ${LLM_ROOT} && python3 scripts/build_cpp_examples.py"
@@ -472,7 +478,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
     if (tarName.contains("CU12")) {
         tritonShortTag = "r25.06"
     }
-    sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
+    sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
 
     // Step 3: packaging wheels into tarfile
     sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"
@@ -579,8 +585,6 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
         "Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
             pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE),
         ]
-    } else {
-        buildConfigs.remove("Build TRT-LLM LLVM") // TODO: Remove after LLVM is supported on AArch64
     }
 
     rtServer (
diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
index aa972b2afa..deae86ae08 100644
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@@ -904,7 +904,8 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
                               - key: "kubernetes.io/hostname"
                                 operator: In
                                 values:
-                                - "lego-cg1-qct-066.ipp3a2.colossus\""""
+                                - "lego-cg1-qct-066.ipp3a2.colossus"
+                                - "lego-cg1-qct-069.ipp3a2.colossus\""""
     }
 
     def podConfig = [
@@ -2160,13 +2161,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
 
     // Python version and OS for sanity check
     x86SanityCheckConfigs = [
-        "PY312-DLFW-CU12": [
-            LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9,
-            "B200_PCIe",
+        "PY312-DLFW": [
+            LLM_DOCKER_IMAGE,
+            "A10",
             X86_64_TRIPLE,
-            true,
+            false,
             "dlfw/",
-            DLFW_IMAGE_12_9,
+            DLFW_IMAGE,
             false,
         ],
         "PY310-UB2204-CU12": [
@@ -2199,13 +2200,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
             UBUNTU_24_04_IMAGE,
             true, // Extra PyTorch CUDA 12.8 install
         ],
-        "PY312-DLFW-CU12": [
-            LLM_SBSA_DOCKER_IMAGE_12_9,
+        "PY312-DLFW": [
+            LLM_DOCKER_IMAGE,
             "GH200",
             AARCH64_TRIPLE,
             false,
             "dlfw/",
-            DLFW_IMAGE_12_9,
+            DLFW_IMAGE,
             false,
         ],
     ]
@@ -2321,6 +2322,24 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
                             }
                         }
 
+                        // TODO: Remove this after public triton supports CUDA 13.
+                        if (key == "PY312-DLFW" && values[2] == X86_64_TRIPLE) {
+                            trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install https://download.pytorch.org/whl/nightly/pytorch_triton-3.3.1%2Bgitc8757738-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl")
+                            sh """
+                                cd /usr/local/lib/python3.12/dist-packages/ && \
+                                ls -la | grep pytorch_triton && \
+                                mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
+                                cd triton-3.3.1+gitc8757738.dist-info && \
+                                echo "Current directory: \$(pwd)" && \
+                                echo "Files in directory:" && \
+                                ls -la && \
+                                sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
+                                sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
+                                echo "METADATA after update:" && \
+                                grep "^Name:" METADATA
+                            """
+                        }
+
                         def libEnv = []
                         if (env.alternativeTRT) {
                             stage("Replace TensorRT") {
diff --git a/requirements.txt b/requirements.txt
index 0a574d5333..e59d5abd09 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -70,7 +70,7 @@ ninja
 etcd3
 blake3
 soundfile
-triton>=3.3.1,<3.4.0; platform_machine == "x86_64"
+triton==3.3.1; platform_machine == "x86_64"
 tiktoken
 blobfile
 openai-harmony==0.0.4