TensorRT-LLMs/jenkins/Build.groovy

@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _

import groovy.transform.Field

// LLM repository configuration
withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LLM_REPO')]) {
    LLM_REPO = env.gitlabSourceRepoHttpUrl ? env.gitlabSourceRepoHttpUrl : "${DEFAULT_LLM_REPO}"
}
LLM_ROOT = "llm"

ARTIFACT_PATH = env.artifactPath ? env.artifactPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"
UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"

X86_64_TRIPLE = "x86_64-linux-gnu"
AARCH64_TRIPLE = "aarch64-linux-gnu"

LLM_DOCKER_IMAGE = env.dockerImage

// Always use x86_64 image for agent
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")

POD_TIMEOUT_SECONDS_BUILD = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200"

// Literals for easier access.
@Field
def WHEEL_EXTRA_ARGS = "extraArgs"

@Field
def TARNAME = "tarName"

@Field
def WHEEL_ARCHS = "wheelArchs"

@Field
def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"

@Field
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"

@Field
def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"

@Field
def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"

@Field
def CONFIG_LINUX_AARCH64 = "linux_aarch64"

@Field
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"

@Field
def CONFIG_LINUX_X86_64_PYBIND = "linux_x86_64_Pybind"

@Field
def CONFIG_LINUX_AARCH64_PYBIND = "linux_aarch64_Pybind"

@Field
def BUILD_CONFIGS = [
  // Vanilla TARNAME is used for packaging in runLLMPackage
  // cmake-vars cannot be empty, so passing (default) multi-device configuration.
  (CONFIG_LINUX_X86_64_VANILLA) : [
    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake --micro_benchmarks",
    (TARNAME) : "TensorRT-LLM.tar.gz",
    (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
  ],
  (CONFIG_LINUX_X86_64_PYBIND) : [
    (WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake --micro_benchmarks",
    (TARNAME) : "pybind-TensorRT-LLM.tar.gz",
    (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
  ],
  (CONFIG_LINUX_X86_64_SINGLE_DEVICE) : [
    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=0 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars ENABLE_UCX=0 --micro_benchmarks",
    (TARNAME) : "single-device-TensorRT-LLM.tar.gz",
    (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
  ],
  (CONFIG_LINUX_X86_64_LLVM) : [
    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --micro_benchmarks -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
    (TARNAME) : "llvm-TensorRT-LLM.tar.gz",
    (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
  ],
  (CONFIG_LINUX_AARCH64): [
    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake",
    (TARNAME) : "TensorRT-LLM-GH200.tar.gz",
    (WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
    (BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
  ],
  (CONFIG_LINUX_AARCH64_PYBIND): [
    (WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake",
    (TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
    (WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
    (BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
  ],
  (CONFIG_LINUX_AARCH64_LLVM) : [
    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
    (TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
    (WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
    (BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
  ],
]

@Field
def GITHUB_PR_API_URL = "github_pr_api_url"
@Field
def CACHED_CHANGED_FILE_LIST = "cached_changed_file_list"
@Field
def ACTION_INFO = "action_info"
def globalVars = [
    (GITHUB_PR_API_URL): null,
    (CACHED_CHANGED_FILE_LIST): null,
    (ACTION_INFO): null,
]

// TODO: Move common variables to an unified location
BUILD_CORES_REQUEST = "8"
BUILD_CORES_LIMIT = "8"
BUILD_MEMORY_REQUEST = "48Gi"
BUILD_MEMORY_LIMIT = "96Gi"
BUILD_JOBS = "8"

TESTER_CORES = "12"
TESTER_MEMORY = "96Gi"

CCACHE_DIR="/mnt/sw-tensorrt-pvc/scratch.trt_ccache/llm_ccache"

String getShortenedJobName(String path)
{
    static final nameMapping = [
        "L0_MergeRequest": "l0-mr",
        "L0_Custom": "l0-cus",
        "L0_PostMerge": "l0-pm",
        "L0_PostMergeDocker": "l0-pmd",
        "L1_Custom": "l1-cus",
        "L1_Nightly": "l1-nt",
        "L1_Stable": "l1-stb",
    ]
    def parts = path.split('/')
    // Apply nameMapping to the last part (jobName)
    def jobName = parts[-1]
    boolean replaced = false
    nameMapping.each { key, value ->
        if (jobName.contains(key)) {
            jobName = jobName.replace(key, value)
            replaced = true
        }
    }
    if (!replaced) {
        jobName = jobName.length() > 7 ? jobName.substring(0, 7) : jobName
    }
    // Replace the last part with the transformed jobName
    parts[-1] = jobName
    // Rejoin the parts with '-', convert to lowercase
    return parts.join('-').toLowerCase()
}

def createKubernetesPodConfig(image, type, arch = "amd64")
{
    def targetCould = "kubernetes-cpu"
    def selectors = """
                  nvidia.com/node_type: builder
                  kubernetes.io/os: linux
                  kubernetes.io/arch: ${arch}"""
    def containerConfig = ""
    def nodeLabelPrefix = ""
    def jobName = getShortenedJobName(env.JOB_NAME)
    def buildID = env.BUILD_ID

    def archSuffix = arch == "arm64" ? "arm" : "amd"
    def jnlpImage = "urm.nvidia.com/sw-ipp-blossom-sre-docker-local/lambda/custom_jnlp_images_${archSuffix}_linux:jdk17"

    switch(type)
    {
    case "build":
        containerConfig = """
                  - name: trt-llm
                    image: ${image}
                    command: ['sleep', ${POD_TIMEOUT_SECONDS_BUILD}]
                    volumeMounts:
                    - name: sw-tensorrt-pvc
                      mountPath: "/mnt/sw-tensorrt-pvc"
                      readOnly: false
                    tty: true
                    resources:
                      requests:
                        cpu: ${BUILD_CORES_REQUEST}
                        memory: ${BUILD_MEMORY_REQUEST}
                        ephemeral-storage: 200Gi
                      limits:
                        cpu: ${BUILD_CORES_LIMIT}
                        memory: ${BUILD_MEMORY_LIMIT}
                        ephemeral-storage: 200Gi
                    imagePullPolicy: Always"""
        nodeLabelPrefix = "cpu"
        break
    case "package":
        containerConfig = """
                  - name: trt-llm
                    image: ${image}
                    command: ['cat']
                    tty: true
                    resources:
                      requests:
                        cpu: '2'
                        memory: 10Gi
                        ephemeral-storage: 25Gi
                      limits:
                        cpu: '2'
                        memory: 10Gi
                        ephemeral-storage: 25Gi
                    imagePullPolicy: Always"""
        nodeLabelPrefix = "cpu"
        break
    }
    def nodeLabel = trtllm_utils.appendRandomPostfix("${nodeLabelPrefix}---tensorrt-${jobName}-${buildID}")
    def pvcVolume = """
                - name: sw-tensorrt-pvc
                  persistentVolumeClaim:
                    claimName: sw-tensorrt-pvc
    """
    if (arch == "arm64") {
        // PVC mount isn't supported on aarch64 platform. Use NFS as a WAR.
        pvcVolume = """
                - name: sw-tensorrt-pvc
                  nfs:
                    server: 10.117.145.13
                    path: /vol/scratch1/scratch.svc_tensorrt_blossom
        """
    }
    def podConfig = [
        cloud: targetCould,
        namespace: "sw-tensorrt",
        label: nodeLabel,
        yaml: """
            apiVersion: v1
            kind: Pod
            spec:
                qosClass: Guaranteed
                affinity:
                    nodeAffinity:
                        requiredDuringSchedulingIgnoredDuringExecution:
                            nodeSelectorTerms:
                            - matchExpressions:
                              - key: "tensorrt/taints"
                                operator: DoesNotExist
                              - key: "tensorrt/affinity"
                                operator: NotIn
                                values:
                                - "core"
                nodeSelector: ${selectors}
                containers:
                  ${containerConfig}
                    env:
                    - name: HOST_NODE_NAME
                      valueFrom:
                        fieldRef:
                          fieldPath: spec.nodeName
                  - name: jnlp
                    image: ${jnlpImage}
                    args: ['\$(JENKINS_SECRET)', '\$(JENKINS_NAME)']
                    resources:
                      requests:
                        cpu: '2'
                        memory: 10Gi
                        ephemeral-storage: 25Gi
                      limits:
                        cpu: '2'
                        memory: 10Gi
                        ephemeral-storage: 25Gi
                qosClass: Guaranteed
                volumes:
                ${pvcVolume}
        """.stripIndent(),
    ]

    return podConfig
}

def echoNodeAndGpuInfo(pipeline, stageName)
{
    String hostNodeName = sh(script: 'echo $HOST_NODE_NAME', returnStdout: true)
    String gpuUuids = pipeline.sh(script: "nvidia-smi -q | grep \"GPU UUID\" | awk '{print \$4}' | tr '\n' ',' || true", returnStdout: true)
    pipeline.echo "HOST_NODE_NAME = ${hostNodeName} ; GPU_UUIDS = ${gpuUuids} ; STAGE_NAME = ${stageName}"
}

def downloadArtifacts(stageName, reuseArtifactPath, artifacts, serverId = 'Artifactory')
{
    def reused = true
    stage(stageName) {
        for (downit in artifacts) {
            def uploadpath = downit.key
            try {
                rtDownload(
                    failNoOp: true,
                    serverId: serverId,
                    spec: """{
                        "files": [
                            {
                            "pattern": "${reuseArtifactPath}/${uploadpath}"
                            }
                        ]
                    }""",
                )
            } catch (Exception e) {
                echo "failed downloading ${reuseArtifactPath}/${uploadpath}, need rebuild."
                reused = false
                catchError(buildResult: 'SUCCESS', stageResult: 'UNSTABLE') { throw e }
            }
        }

        if (!reused) {
            return null
        }

        reuseArtifactPath = reuseArtifactPath.substring(reuseArtifactPath.indexOf('/')+1)
        def newArtifacts = [:]
        for (reuseit in artifacts) {
            def uploadpath = reuseit.key
            newArtifacts[reuseit.key] = "${reuseArtifactPath}/${uploadpath}"
        }

        return newArtifacts
    }
}

def uploadArtifacts(artifacts, prefix = UPLOAD_PATH, retryTimes = 2, serverId = 'Artifactory')
{
    for (it in artifacts) {
        def uploadpath = it.key
        def filepath = it.value
        def spec = """{
                    "files": [
                        {
                        "pattern": "${filepath}",
                        "target": "${prefix}/${uploadpath}"
                        }
                    ]
                }"""
        echo "Uploading ${filepath} as ${uploadpath}. Spec: ${spec}"
        trtllm_utils.llmRetry(retryTimes, "uploadArtifacts", {
            rtUpload (
                serverId: serverId,
                spec: spec,
            )
        })
    }
}

def buildOrCache(pipeline, key, reuseArtifactPath, artifacts, image, k8s_cpu, runner)
{
    if (reuseArtifactPath) {
        stage(key) {
            def newArtifacts = downloadArtifacts("[${key}] Reuse", reuseArtifactPath, artifacts)
            if (newArtifacts != null) {
                uploadArtifacts(newArtifacts)
            } else {
                reuseArtifactPath = null
            }
        }
    }
    if (reuseArtifactPath) {
        return
    }

    trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(image, "build", k8s_cpu), "trt-llm", {
        stage(key) {
            stage("[${key}] Run") {
                echoNodeAndGpuInfo(pipeline, key)
                runner()
            }
            stage("Upload") {
                rtServer (
                    id: 'Artifactory',
                    url: 'https://urm.nvidia.com/artifactory',
                    credentialsId: 'urm-artifactory-creds',
                    // If Jenkins is configured to use an http proxy, you can bypass the proxy when using this Artifactory server:
                    bypassProxy: true,
                    // Configure the connection timeout (in seconds).
                    // The default value (if not configured) is 300 seconds:
                    timeout: 300
                )
                uploadArtifacts(artifacts)
            }
        }
    })
}

def prepareLLMBuild(pipeline, config)
{
    def buildFlags = BUILD_CONFIGS[config]
    def tarName = buildFlags[TARNAME]

    def is_linux_x86_64 = config.contains("linux_x86_64")
    def artifacts = ["${tarName}": tarName]
    def runner = {
        runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
    }

    return [artifacts, runner]

}

def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
{
    // Step 1: cloning tekit source code
    sh "pwd && ls -alh"
    sh "env | sort"
    sh "ccache -sv"
    sh "rm -rf **/*.xml *.tar.gz"

    trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
    if (env.alternativeTRT) {
        sh "cd ${LLM_ROOT} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
    }

    sh "mkdir TensorRT-LLM"
    sh "cp -r ${LLM_ROOT}/ TensorRT-LLM/src/"

    // Step 2: building wheels in container
    // Random sleep to avoid resource contention
    sleep(10 * Math.random())
    sh "curl ifconfig.me || true"
    sh "nproc && free -g && hostname"
    sh "cat ${CCACHE_DIR}/ccache.conf"

    sh "env | sort"
    sh "ldconfig --print-cache || true"
    sh "ls -lh /"
    sh "id || true"
    sh "whoami || true"
    echo "Building TensorRT-LLM Python package ..."
    sh "git config --global --add safe.directory \"*\""
    def pipArgs = "--no-cache-dir"
    if (is_linux_x86_64) {
        pipArgs = ""
    }

    // install python package
    trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}")

    if (env.alternativeTRT) {
        trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, "cp312")
    }

    def buildJobs = buildFlags[BUILD_JOBS_FOR_CONFIG] ?: BUILD_JOBS

    withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
        sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${buildJobs} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
    }
    if (is_linux_x86_64) {
        sh "cd ${LLM_ROOT} && python3 scripts/build_cpp_examples.py"
    }

    // Build tritonserver artifacts
    def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
    // TODO: Remove after the cmake version is upgraded to 3.31.8
    // Get triton tag from docker/dockerfile.multi
    def tritonShortTag = "r25.10"
    sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"

    // Step 3: packaging wheels into tarfile
    sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"

    // Step 4: packaging tritonserver artifacts into tarfile
    sh "mkdir -p TensorRT-LLM/triton_backend/inflight_batcher_llm/"
    sh "cp ${LLM_ROOT}/triton_backend/inflight_batcher_llm/build/libtriton_tensorrtllm.so TensorRT-LLM/triton_backend/inflight_batcher_llm/"
    sh "cp ${LLM_ROOT}/triton_backend/inflight_batcher_llm/build/trtllmExecutorWorker TensorRT-LLM/triton_backend/inflight_batcher_llm/"

    // Step 5: packaging benchmark and required cpp dependencies into tarfile
    sh "mkdir -p TensorRT-LLM/benchmarks/cpp"
    sh "cp ${LLM_ROOT}/cpp/build/benchmarks/bertBenchmark TensorRT-LLM/benchmarks/cpp"
    sh "cp ${LLM_ROOT}/cpp/build/benchmarks/gptManagerBenchmark TensorRT-LLM/benchmarks/cpp"
    sh "cp ${LLM_ROOT}/cpp/build/benchmarks/disaggServerBenchmark TensorRT-LLM/benchmarks/cpp"
    sh "cp ${LLM_ROOT}/cpp/build/tensorrt_llm/libtensorrt_llm.so TensorRT-LLM/benchmarks/cpp"
    sh "cp ${LLM_ROOT}/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so TensorRT-LLM/benchmarks/cpp"

    if (is_linux_x86_64) {
        sh "rm -rf ${tarName}"
        sh "pigz --version || true"
        sh "bash -c 'tar --use-compress-program=\"pigz -k\" -cf ${tarName} TensorRT-LLM/'"
    } else {
        sh "tar -czvf ${tarName} TensorRT-LLM/"
    }
}

def buildWheelInContainer(pipeline, libraries=[], triple=X86_64_TRIPLE, clean=false, pre_cxx11abi=false, cpver="312", extra_args="")
{
    // Random sleep to avoid resource contention
    sleep(10 * Math.random())
    sh "curl ifconfig.me || true"
    sh "nproc && free -g && hostname"
    sh "ccache -sv"
    sh "cat ${CCACHE_DIR}/ccache.conf"

    // Step 1: cloning tekit source code
    trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
    if (env.alternativeTRT) {
        trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, cpver)
        sh "cd ${LLM_ROOT} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
    }
    // Step 2: building libs in container
    sh "bash -c 'pip3 show tensorrt || true'"

    if (extra_args == "") {
        if (triple == AARCH64_TRIPLE) {
            extra_args = "-a '90-real;100-real;103-real;120-real'"
        } else {
            extra_args = "-a '80-real;86-real;89-real;90-real;100-real;103-real;120-real'"
        }
    }
    if (pre_cxx11abi) {
        extra_args = extra_args + " -l -D 'USE_CXX11_ABI=0'"
    } else {
        if (libraries.size() != 0) {
            extra_args = extra_args + " -l -D 'USE_CXX11_ABI=1'"
        }
    }
    if (clean) {
        extra_args = extra_args + " --clean"
    }
    sh "bash -c 'git config --global --add safe.directory \"*\"'"
    // Because different architectures involve different macros, a comprehensive test is conducted here.
    withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
        trtllm_utils.llmExecStepWithRetry(pipeline, script: "bash -c \"cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${BUILD_JOBS} -D 'WARNING_IS_ERROR=ON' ${extra_args}\"")
    }
}

def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
{
    stage("Show Environment") {
        sh "env | sort"
        echo "dockerImage: ${env.dockerImage}"
        echo "gitlabSourceRepoHttpUrl: ${env.gitlabSourceRepoHttpUrl}"
        echo "gitlabCommit: ${env.gitlabCommit}"
        echo "alternativeTRT: ${env.alternativeTRT}"
        echo "Using GitLab repo: ${LLM_REPO}. Commit: ${env.gitlabCommit}"

        echo "env.globalVars is: ${env.globalVars}"
        globalVars = trtllm_utils.updateMapWithJson(pipeline, globalVars, env.globalVars, "globalVars")
        globalVars[ACTION_INFO] = trtllm_utils.setupPipelineDescription(pipeline, globalVars[ACTION_INFO])
    }

    def wheelDockerImage = env.wheelDockerImagePy310
    if (!wheelDockerImage && cpu_arch == AARCH64_TRIPLE) {
        wheelDockerImage = env.dockerImage
    }

    buildConfigs = [
        "Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
        "Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
        "Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_PYBIND : CONFIG_LINUX_X86_64_PYBIND),
    ]

    if (cpu_arch == X86_64_TRIPLE) {
        buildConfigs += [
        "Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
            pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE),
        ]
    }

    rtServer (
        id: 'Artifactory',
        url: 'https://urm.nvidia.com/artifactory',
        credentialsId: 'urm-artifactory-creds',
        // If Jenkins is configured to use an http proxy, you can bypass the proxy when using this Artifactory server:
        bypassProxy: true,
        // Configure the connection timeout (in seconds).
        // The default value (if not configured) is 300 seconds:
        timeout: 300
    )
    def reuseArtifactPath = env.reuseArtifactPath

    def k8s_cpu = "amd64"
    if (cpu_arch == AARCH64_TRIPLE) {
        k8s_cpu = "arm64"
    }

    parallelJobs = buildConfigs.collectEntries{key, values -> [key, {
        script {
            buildOrCache(pipeline, key, reuseArtifactPath, values[1], values[0], k8s_cpu, values[2])
        }
    }]}
    parallelJobs.failFast = enableFailFast

    if (cpu_arch == X86_64_TRIPLE && !reuseArtifactPath) {
        def key = "Build With Build Type Debug"
        parallelJobs += [
        (key): {
            script {
                trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(LLM_DOCKER_IMAGE, "build", k8s_cpu), "trt-llm", {
                    stage(key) {
                        stage("[${key}] Run") {
                            echoNodeAndGpuInfo(pipeline, key)
                            buildWheelInContainer(pipeline, [], X86_64_TRIPLE, false, false, "cp312", "-a '90-real' -b Debug --benchmarks --micro_benchmarks")
                        }
                    }
                })
            }
        }]
    }

    stage("Build") {
        pipeline.parallel parallelJobs
    } // Build stage
}

pipeline {
    agent {
        kubernetes createKubernetesPodConfig(AGENT_IMAGE, "package", "amd64")
    }
    options {
        // Check the valid options at: https://www.jenkins.io/doc/book/pipeline/syntax/
        // some step like results analysis stage, does not need to check out source code
        skipDefaultCheckout()
        // to better analyze the time for each step/test
        timestamps()
        timeout(time: 24, unit: 'HOURS')
    }
    environment {
        //Workspace normally is: /home/jenkins/agent/workspace/LLM/L0_MergeRequest@tmp/
        HF_HOME="${env.WORKSPACE_TMP}/.cache/huggingface"
        CCACHE_DIR="${CCACHE_DIR}"
        GITHUB_MIRROR="https://urm.nvidia.com/artifactory/github-go-remote"
        PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
        // force datasets to be offline mode, to prevent CI jobs are downloading HF dataset causing test failures
        HF_DATASETS_OFFLINE=1
    }
    stages {
        stage("Build Job") {
            steps {
                launchStages(this, params.targetArch, params.enableFailFast, globalVars)
            }
        }
    } // stage
} // pipeline