diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi
index d91e53d9a7..6f19602efb 100644
--- a/docker/Dockerfile.multi
+++ b/docker/Dockerfile.multi
@@ -117,7 +117,7 @@ RUN mkdir -p /root/.cache/pip /root/.cache/ccache
 ENV CCACHE_DIR=/root/.cache/ccache
 # Build the TRT-LLM wheel
 ARG BUILD_WHEEL_ARGS="--clean --python_bindings --benchmarks"
-RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/ccache \
+RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=${CCACHE_DIR} \
     python3 scripts/build_wheel.py ${BUILD_WHEEL_ARGS}
 
 FROM ${DEVEL_IMAGE} AS release
diff --git a/docker/Makefile b/docker/Makefile
index 4265b114b0..658aebfbfc 100644
--- a/docker/Makefile
+++ b/docker/Makefile
@@ -161,24 +161,24 @@ release_run: WORK_DIR = /app/tensorrt_llm
 
 # For x86_64
 jenkins_%: IMAGE_WITH_TAG = $(shell grep 'LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
-jenkins_%: STAGE = devel
+jenkins_%: STAGE = tritondevel
 
 # For aarch64
 jenkins-aarch64_%: IMAGE_WITH_TAG = $(shell grep 'LLM_SBSA_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
-jenkins-aarch64_%: STAGE = devel
+jenkins-aarch64_%: STAGE = tritondevel
 
 # For x86_64
 jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell grep 'LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
-jenkins-rockylinux8_%: STAGE = devel
+jenkins-rockylinux8_%: STAGE = tritondevel
 jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
 jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
 
-rockylinux8_%: STAGE = devel
+rockylinux8_%: STAGE = tritondevel
 rockylinux8_%: BASE_IMAGE = nvidia/cuda
 rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
 
 # For x86_64 and aarch64
-ubuntu22_%: STAGE = devel
+ubuntu22_%: STAGE = tritondevel
 ubuntu22_%: BASE_IMAGE = nvidia/cuda
 ubuntu22_%: BASE_TAG = 12.9.0-devel-ubuntu22.04
 
diff --git a/jenkins/BuildDockerImage.groovy b/jenkins/BuildDockerImage.groovy
index 5cb227909b..defd17a8fb 100644
--- a/jenkins/BuildDockerImage.groovy
+++ b/jenkins/BuildDockerImage.groovy
@@ -12,17 +12,72 @@ withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LL
 }
 LLM_ROOT = "llm"
 
-LLM_BRANCH = env.gitlabBranch? env.gitlabBranch : params.branch
+LLM_BRANCH = env.gitlabBranch ?: params.branch
 LLM_BRANCH_TAG = LLM_BRANCH.replaceAll('/', '_')
 
-BUILD_JOBS = "32"
-BUILD_JOBS_RELEASE_X86_64 = "16"
-BUILD_JOBS_RELEASE_SBSA = "8"
+LLM_COMMIT_OR_BRANCH = env.gitlabCommit ?: LLM_BRANCH
 
-def createKubernetesPodConfig(type, arch = "amd64")
+LLM_SHORT_COMMIT = env.gitlabCommit ? env.gitlabCommit.substring(0, 7) : "undefined"
+
+LLM_DEFAULT_TAG = env.defaultTag ?: "${LLM_SHORT_COMMIT}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}"
+
+BUILD_JOBS = "32"
+BUILD_JOBS_RELEASE_X86_64 = "32"
+BUILD_JOBS_RELEASE_SBSA = "32"
+
+CCACHE_DIR="/mnt/sw-tensorrt-pvc/scratch.trt_ccache/llm_ccache"
+
+@Field
+def GITHUB_PR_API_URL = "github_pr_api_url"
+@Field
+def CACHED_CHANGED_FILE_LIST = "cached_changed_file_list"
+@Field
+def ACTION_INFO = "action_info"
+def globalVars = [
+    (GITHUB_PR_API_URL): null,
+    (CACHED_CHANGED_FILE_LIST): null,
+    (ACTION_INFO): null,
+]
+
+@Field
+def imageKeyToTag = [:]
+
+def createKubernetesPodConfig(type, arch = "amd64", build_wheel = false)
 {
     def targetCould = "kubernetes-cpu"
     def containerConfig = ""
+    def selectors = """
+                nodeSelector:
+                  nvidia.com/node_type: builder
+                  kubernetes.io/os: linux
+                  kubernetes.io/arch: ${arch}"""
+
+    if (build_wheel && arch == "arm64") {
+        // For aarch64, we need to use hostname to fix the ucxx issue when building wheels
+        selectors += """
+                affinity:
+                    nodeAffinity:
+                        requiredDuringSchedulingIgnoredDuringExecution:
+                            nodeSelectorTerms:
+                                - matchExpressions:
+                                    - key: "kubernetes.io/hostname"
+                                      operator: In
+                                      values:
+                                        - "rl300-0008.ipp2u1.colossus"
+                                        - "rl300-0014.ipp2u1.colossus"
+                                        - "rl300-0023.ipp2u1.colossus"
+                                        - "rl300-0024.ipp2u1.colossus"
+                                        - "rl300-0030.ipp2u1.colossus"
+                                        - "rl300-0040.ipp2u1.colossus"
+                                        - "rl300-0041.ipp2u1.colossus"
+                                        - "rl300-0042.ipp2u1.colossus"
+                                        - "rl300-0043.ipp2u1.colossus"
+                                        - "rl300-0044.ipp2u1.colossus"
+                                        - "rl300-0045.ipp2u1.colossus"
+                                        - "rl300-0046.ipp2u1.colossus"
+                                        - "rl300-0047.ipp2u1.colossus"
+        """
+    }
 
     switch(type)
     {
@@ -44,9 +99,10 @@ def createKubernetesPodConfig(type, arch = "amd64")
                     imagePullPolicy: Always"""
         break
     case "build":
+        // Use a customized docker:dind image with essential dependencies
         containerConfig = """
                   - name: docker
-                    image: urm.nvidia.com/docker/docker:dind
+                    image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:202505221445_docker_dind_withbash
                     tty: true
                     resources:
                       requests:
@@ -65,19 +121,34 @@ def createKubernetesPodConfig(type, arch = "amd64")
                         - SYS_ADMIN"""
         break
     }
-
+    def pvcVolume = """
+                - name: sw-tensorrt-pvc
+                  persistentVolumeClaim:
+                    claimName: sw-tensorrt-pvc
+    """
+    if (arch == "arm64") {
+        // PVC mount isn't supported on aarch64 platform. Use NFS as a WAR.
+        pvcVolume = """
+                - name: sw-tensorrt-pvc
+                  nfs:
+                    server: 10.117.145.13
+                    path: /vol/scratch1/scratch.svc_tensorrt_blossom
+        """
+    }
+    def nodeLabelPrefix = "cpu"
+    def jobName = "llm-build-images"
+    def buildID = env.BUILD_ID
+    def nodeLabel = trtllm_utils.appendRandomPostfix("${nodeLabelPrefix}---tensorrt-${jobName}-${buildID}")
     def podConfig = [
         cloud: targetCould,
         namespace: "sw-tensorrt",
+        label: nodeLabel,
         yaml: """
             apiVersion: v1
             kind: Pod
             spec:
                 qosClass: Guaranteed
-                nodeSelector:
-                  nvidia.com/node_type: builder
-                  kubernetes.io/os: linux
-                  kubernetes.io/arch: ${arch}
+                ${selectors}
                 containers:
                   ${containerConfig}
                   - name: jnlp
@@ -92,6 +163,12 @@ def createKubernetesPodConfig(type, arch = "amd64")
                         cpu: '2'
                         memory: 10Gi
                         ephemeral-storage: 25Gi
+                volumeMounts:
+                    - name: sw-tensorrt-pvc
+                      mountPath: "/mnt/sw-tensorrt-pvc"
+                      readOnly: false
+                volumes:
+                ${pvcVolume}
         """.stripIndent(),
     ]
 
@@ -99,94 +176,218 @@ def createKubernetesPodConfig(type, arch = "amd64")
 }
 
 
-def buildImage(target, action="build", torchInstallType="skip", args="", custom_tag="", post_tag="", is_sbsa=false)
+def buildImage(config, imageKeyToTag)
 {
-    def arch = is_sbsa ? "sbsa" : "x86_64"
-    def tag = "${arch}-${target}-torch_${torchInstallType}${post_tag}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}"
+    def target = config.target
+    def action = config.action
+    def torchInstallType = config.torchInstallType
+    def args = config.args ?: ""
+    def customTag = config.customTag
+    def postTag = config.postTag
+    def dependentTarget = config.dependentTarget
+    def arch = config.arch == 'arm64' ? 'sbsa' : 'x86_64'
 
-    // Step 1: cloning tekit source code
-    // allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
-    trtllm_utils.checkoutSource(LLM_REPO, LLM_BRANCH, LLM_ROOT, true, true)
+    def tag = "${arch}-${target}-torch_${torchInstallType}${postTag}-${LLM_DEFAULT_TAG}"
 
-    // Step 2: building wheels in container
-    container("docker") {
-        stage ("Install packages") {
-            sh "pwd && ls -alh"
-            sh "env"
-            sh "apk add make git"
-            sh "git config --global --add safe.directory '*'"
+    def dependentTargetTag = tag.replace("${arch}-${target}-", "${arch}-${dependentTarget}-")
 
-            withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
-                sh "docker login urm.nvidia.com -u ${USERNAME} -p ${PASSWORD}"
-            }
+    if (target == "ngc-release") {
+        imageKeyToTag["NGC Devel Image ${config.arch}"] = "${IMAGE_NAME}/${dependentTarget}:${dependentTargetTag}"
+        imageKeyToTag["NGC Release Image ${config.arch}"] = "${IMAGE_NAME}/${target}:${tag}"
+    }
 
-            withCredentials([
-                usernamePassword(
-                    credentialsId: "svc_tensorrt_gitlab_read_api_token",
-                    usernameVariable: 'USERNAME',
-                    passwordVariable: 'PASSWORD'
-                ),
-                string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')
-            ]) {
-                sh "docker login ${DEFAULT_GIT_URL}:5005 -u ${USERNAME} -p ${PASSWORD}"
-            }
+    args += " GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
+
+    stage (config.stageName) {
+        // Step 1: Clone TRT-LLM source codes
+        // If using a forked repo, svc_tensorrt needs to have the access to the forked repo.
+        trtllm_utils.checkoutSource(LLM_REPO, LLM_COMMIT_OR_BRANCH, LLM_ROOT, true, true)
+    }
+
+    // Step 2: Build the images
+    stage ("Install packages") {
+        sh "pwd && ls -alh"
+        sh "env"
+        sh "apk add make git"
+        sh "git config --global --add safe.directory '*'"
+
+        withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
+            sh "docker login urm.nvidia.com -u ${USERNAME} -p ${PASSWORD}"
         }
-        try {
-            // Fix the build OOM issue of release builds
-            def build_jobs = BUILD_JOBS
-            if (target == "trtllm") {
-                if (arch == "x86_64") {
-                    build_jobs = BUILD_JOBS_RELEASE_X86_64
-                } else {
-                    build_jobs = BUILD_JOBS_RELEASE_SBSA
-                }
-            }
-            containerGenFailure = null
-            stage ("make ${target}_${action}") {
-                retry(3)
-                {
-                  // Fix the triton image pull timeout issue
-                  def TRITON_IMAGE = sh(script: "cd ${LLM_ROOT} && grep 'ARG TRITON_IMAGE=' docker/Dockerfile.multi | grep -o '=.*' | tr -d '=\"'", returnStdout: true).trim()
-                  def TRITON_BASE_TAG = sh(script: "cd ${LLM_ROOT} && grep 'ARG TRITON_BASE_TAG=' docker/Dockerfile.multi | grep -o '=.*' | tr -d '=\"'", returnStdout: true).trim()
-                  retry(3) {
-                    sh "docker pull ${TRITON_IMAGE}:${TRITON_BASE_TAG}"
-                  }
 
-                  sh """
-                  cd ${LLM_ROOT} && make -C docker ${target}_${action} \
-                  TORCH_INSTALL_TYPE=${torchInstallType} \
-                  IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
-                  BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} \
-                  GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
-                  """
-                }
-            }
-
-            if (custom_tag) {
-                stage ("custom tag: ${custom_tag}") {
-                  sh """
-                  cd ${LLM_ROOT} && make -C docker ${target}_${action} \
-                  TORCH_INSTALL_TYPE=${torchInstallType} \
-                  IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${custom_tag} \
-                  BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} \
-                  GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
-                  """
-               }
-            }
-        } catch (Exception ex) {
-            containerGenFailure = ex
-        } finally {
-            stage ("Docker logout") {
-                withCredentials([string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')]) {
-                    sh "docker logout urm.nvidia.com"
-                    sh "docker logout ${DEFAULT_GIT_URL}:5005"
-                }
-            }
-            if (containerGenFailure != null) {
-                throw containerGenFailure
-            }
+        withCredentials([
+            usernamePassword(
+                credentialsId: "svc_tensorrt_gitlab_read_api_token",
+                usernameVariable: 'USERNAME',
+                passwordVariable: 'PASSWORD'
+            ),
+            string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')
+        ]) {
+            sh "docker login ${DEFAULT_GIT_URL}:5005 -u ${USERNAME} -p ${PASSWORD}"
         }
     }
+    try {
+        def build_jobs = BUILD_JOBS
+        // Fix the triton image pull timeout issue
+        def TRITON_IMAGE = sh(script: "cd ${LLM_ROOT} && grep 'ARG TRITON_IMAGE=' docker/Dockerfile.multi | grep -o '=.*' | tr -d '=\"'", returnStdout: true).trim()
+        def TRITON_BASE_TAG = sh(script: "cd ${LLM_ROOT} && grep 'ARG TRITON_BASE_TAG=' docker/Dockerfile.multi | grep -o '=.*' | tr -d '=\"'", returnStdout: true).trim()
+        containerGenFailure = null
+
+        if (dependentTarget) {
+            stage ("make ${dependentTarget}_${action} (${arch})") {
+                retry(3) {
+                    retry(3) {
+                        sh "docker pull ${TRITON_IMAGE}:${TRITON_BASE_TAG}"
+                    }
+                    sh """
+                    cd ${LLM_ROOT} && make -C docker ${dependentTarget}_${action} \
+                    TORCH_INSTALL_TYPE=${torchInstallType} \
+                    IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${dependentTargetTag} \
+                    BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
+                    """
+                }
+                args += " DEVEL_IMAGE=${IMAGE_NAME}/${dependentTarget}:${dependentTargetTag}"
+            }
+        }
+
+        // Avoid the frequency of OOM issue when building the wheel
+        if (target == "trtllm") {
+            if (arch == "x86_64") {
+                build_jobs = BUILD_JOBS_RELEASE_X86_64
+            } else {
+                build_jobs = BUILD_JOBS_RELEASE_SBSA
+            }
+        }
+        stage ("make ${target}_${action} (${arch})") {
+            retry(3) {
+                retry(3) {
+                    sh "docker pull ${TRITON_IMAGE}:${TRITON_BASE_TAG}"
+                }
+
+                sh """
+                cd ${LLM_ROOT} && make -C docker ${target}_${action} \
+                TORCH_INSTALL_TYPE=${torchInstallType} \
+                IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
+                BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
+                """
+            }
+        }
+
+        if (customTag) {
+            stage ("custom tag: ${customTag} (${arch})") {
+                sh """
+                cd ${LLM_ROOT} && make -C docker ${target}_${action} \
+                TORCH_INSTALL_TYPE=${torchInstallType} \
+                IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${customTag} \
+                BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
+                """
+            }
+        }
+    } catch (Exception ex) {
+        containerGenFailure = ex
+    } finally {
+        stage ("Docker logout") {
+            withCredentials([string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')]) {
+                sh "docker logout urm.nvidia.com"
+                sh "docker logout ${DEFAULT_GIT_URL}:5005"
+            }
+        }
+        if (containerGenFailure != null) {
+            throw containerGenFailure
+        }
+    }
+}
+
+
+def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
+    def defaultBuildConfig = [
+        target: "tritondevel",
+        action: params.action,
+        customTag: "",
+        postTag: "",
+        args: "",
+        torchInstallType: "skip",
+        arch: "amd64",
+        build_wheel: false,
+        dependentTarget: "",
+    ]
+
+    def release_action = env.JOB_NAME ==~ /.*PostMerge.*/ ? "push" : params.action
+    def buildConfigs = [
+        "Build trtllm release (x86_64)": [
+            target: "trtllm",
+            action: release_action,
+            customTag: LLM_BRANCH_TAG + "-x86_64",
+            build_wheel: true,
+        ],
+        "Build trtllm release (SBSA)": [
+            target: "trtllm",
+            action: release_action,
+            customTag: LLM_BRANCH_TAG + "-sbsa",
+            build_wheel: true,
+            arch: "arm64"
+        ],
+        "Build CI image (x86_64 tritondevel)": [:],
+        "Build CI image (SBSA tritondevel)": [
+            arch: "arm64",
+        ],
+        "Build CI image (RockyLinux8 Python310)": [
+            target: "rockylinux8",
+            args: "PYTHON_VERSION=3.10.12",
+            postTag: "-py310",
+        ],
+        "Build CI image(RockyLinux8 Python312)": [
+            target: "rockylinux8",
+            args: "PYTHON_VERSION=3.12.3 STAGE=tritondevel",
+            postTag: "-py312",
+        ],
+        "Build NGC devel and release (x86_64)": [
+            target: "ngc-release",
+            action: release_action,
+            customTag: "ngc-" + LLM_BRANCH_TAG + "-x86_64",
+            args: "DOCKER_BUILD_OPTS='--load --platform linux/amd64'",
+            build_wheel: true,
+            dependentTarget: "devel",
+        ],
+        "Build NGC devel and release(SBSA)": [
+            target: "ngc-release",
+            action: release_action,
+            customTag: "ngc-" + LLM_BRANCH_TAG + "-sbsa",
+            args: "DOCKER_BUILD_OPTS='--load --platform linux/arm64'",
+            arch: "arm64",
+            build_wheel: true,
+            dependentTarget: "devel",
+        ],
+    ]
+    // Override all fields in build config with default values
+    buildConfigs.each { key, config ->
+        defaultBuildConfig.each { defaultKey, defaultValue ->
+            if (!(defaultKey in config)) {
+                config[defaultKey] = defaultValue
+            }
+        }
+        config.podConfig = createKubernetesPodConfig("build", config.arch, config.build_wheel)
+    }
+    echo "Build configs:"
+    println buildConfigs
+
+    def buildJobs = buildConfigs.collectEntries { key, config ->
+        [key, {
+            script {
+                stage(key) {
+                    config.stageName = key
+                    trtllm_utils.launchKubernetesPod(pipeline, config.podConfig, "docker") {
+                        buildImage(config, imageKeyToTag)
+                    }
+                }
+            }
+        }]
+    }
+
+    echo "enableFailFast is: ${env.enableFailFast}, but we currently don't use it due to random ucxx issue"
+    //pipeline.failFast = env.enableFailFast
+    pipeline.parallel buildJobs
+
 }
 
 
@@ -216,65 +417,40 @@ pipeline {
         timeout(time: 24, unit: 'HOURS')
     }
     environment {
+        CCACHE_DIR="${CCACHE_DIR}"
         PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
     }
     stages {
-        stage("Build")
-        {
-            parallel {
-                stage("Build trtllm release") {
-                    agent {
-                        kubernetes createKubernetesPodConfig("build")
-                    }
-                    steps
-                    {
-                        buildImage("trtllm", env.JOB_NAME ==~ /.*PostMerge.*/ ? "push" : params.action, "skip", "", LLM_BRANCH_TAG)
-                    }
+        stage("Setup environment") {
+            steps {
+                script {
+                    echo "branch is: ${LLM_BRANCH}"
+                    echo "env.gitlabBranch is: ${env.gitlabBranch}"
+                    echo "params.branch is: ${params.branch}"
+                    echo "params.action is: ${params.action}"
+                    echo "env.defaultTag is: ${env.defaultTag}"
+                    echo "env.gitlabCommit is: ${env.gitlabCommit}"
+                    echo "LLM_REPO is: ${LLM_REPO}"
+                    echo "env.globalVars is: ${env.globalVars}"
+                    globalVars = trtllm_utils.updateMapWithJson(this, globalVars, env.globalVars, "globalVars")
+                    globalVars[ACTION_INFO] = trtllm_utils.setupPipelineDescription(this, globalVars[ACTION_INFO])
                 }
-                stage("Build x86_64-skip") {
-                    agent {
-                        kubernetes createKubernetesPodConfig("build")
-                    }
-                    steps
-                    {
-                        buildImage("tritondevel", params.action, "skip")
-                    }
+            }
+        }
+        stage("Build") {
+            steps{
+                script{
+                    launchBuildJobs(this, globalVars, imageKeyToTag)
                 }
-                stage("Build trtllm release-sbsa") {
-                    agent {
-                        kubernetes createKubernetesPodConfig("build", "arm64")
-                    }
-                    steps
-                    {
-                        buildImage("trtllm", env.JOB_NAME ==~ /.*PostMerge.*/ ? "push" : params.action, "skip", "", LLM_BRANCH_TAG + "-sbsa", "", true)
-                    }
-                }
-                stage("Build rockylinux8 x86_64-skip-py3.10") {
-                    agent {
-                        kubernetes createKubernetesPodConfig("build")
-                    }
-                    steps
-                    {
-                        buildImage("rockylinux8", params.action, "skip", "PYTHON_VERSION=3.10.12 STAGE=tritondevel", "", "-py310")
-                    }
-                }
-                stage("Build rockylinux8 x86_64-skip-py3.12") {
-                    agent {
-                        kubernetes createKubernetesPodConfig("build")
-                    }
-                    steps
-                    {
-                        buildImage("rockylinux8", params.action, "skip", "PYTHON_VERSION=3.12.3 STAGE=tritondevel", "", "-py312")
-                    }
-                }
-                stage("Build SBSA-skip") {
-                    agent {
-                        kubernetes createKubernetesPodConfig("build", "arm64")
-                    }
-                    steps
-                    {
-                        buildImage("tritondevel", params.action, "skip", "", "", "", true)
-                    }
+            }
+        }
+        stage("Upload Artifacts") {
+            steps {
+                script {
+                    String imageKeyToTagJson = writeJSON returnText: true, json: imageKeyToTag
+                    echo "imageKeyToTag is: ${imageKeyToTagJson}"
+                    writeFile file: "imageKeyToTag.json", text: imageKeyToTagJson
+                    archiveArtifacts artifacts: 'imageKeyToTag.json', fingerprint: true
                 }
             }
         }
diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy
index 4583772ab6..f90a4cc928 100644
--- a/jenkins/L0_MergeRequest.groovy
+++ b/jenkins/L0_MergeRequest.groovy
@@ -1035,6 +1035,43 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
             }
         },
     ]
+    def dockerBuildJob = [
+        "Build-Docker-Images": {
+            script {
+                stage("[Build-Docker-Images] Remote Run") {
+                    def parameters = getCommonParameters()
+                    String globalVarsJson = writeJSON returnText: true, json: globalVars
+                    def branch = env.gitlabBranch ? env.gitlabBranch : "main"
+                    if (globalVars[GITHUB_PR_API_URL]) {
+                        branch = "github-pr-" + globalVars[GITHUB_PR_API_URL].split('/').last()
+                    }
+
+                    parameters += [
+                        'enableFailFast': enableFailFast,
+                        'branch': branch,
+                        'action': "push",
+                        'globalVars': globalVarsJson,
+                    ]
+
+                    echo "trigger BuildDockerImages job, params: ${parameters}"
+
+                    def status = triggerJob("/LLM/helpers/BuildDockerImages", parameters)
+                    if (status != "SUCCESS") {
+                        error "Downstream job did not succeed"
+                    }
+                }
+            }
+        }
+    ]
+    if (env.JOB_NAME ==~ /.*PostMerge.*/) {
+        stages += dockerBuildJob
+    }
+    if (testFilter[(TEST_STAGE_LIST)]?.contains("Build-Docker-Images") || testFilter[(EXTRA_STAGE_LIST)]?.contains("Build-Docker-Images")) {
+        stages += dockerBuildJob
+        testFilter[(TEST_STAGE_LIST)]?.remove("Build-Docker-Images")
+        testFilter[(EXTRA_STAGE_LIST)]?.remove("Build-Docker-Images")
+        echo "Will run Build-Docker-Images job"
+    }
 
     parallelJobs = stages.collectEntries{key, value -> [key, {
         script {
diff --git a/jenkins/docker/Dockerfile.dind b/jenkins/docker/Dockerfile.dind
new file mode 100644
index 0000000000..66fddc3291
--- /dev/null
+++ b/jenkins/docker/Dockerfile.dind
@@ -0,0 +1,15 @@
+# docker buildx build -t tensorrt-llm:{timestamp}_docker_dind_withbash -f jenkins/docker/Dockerfile.dind . --builder=multi-builder --platform linux/arm64,linux/amd64
+
+FROM docker:dind
+
+RUN apk add --no-cache bash git make python3 py3-pip
+
+ENV PATH=/usr/local/cmake/bin:$PATH
+ENV ENV=${ENV:-/etc/shinit_v2}
+COPY docker/common/install_cmake.sh install_cmake.sh
+RUN bash ./install_cmake.sh && rm install_cmake.sh
+
+RUN git clone https://github.com/rapidsai/rapids-cmake.git /tmp/rapids-cmake && \
+    mkdir -p /usr/local/share/cmake/rapids && \
+    cp -r /tmp/rapids-cmake/rapids-cmake/* /usr/local/share/cmake/rapids/ && \
+    rm -rf /tmp/rapids-cmake