TensorRT-LLMs/jenkins/Build.groovy
tburt-nv 7053d0ad5a
infra: add conan (#3744)
This MR integrates Conan into the build system, so that it can be used to fetch dependencies in future changes.

Also installs all requirements-dev.txt inside a virtualenv instead of the system, since some of Conan's dependencies may conflict with the system packages. Virtualenv is used instead of venv because the triton server backend container has only virtualenv installed. This also allows developers to cache the requirements-dev.txt packages between container launches.


Signed-off-by: Tyler Burt <195370667+tburt-nv@users.noreply.github.com>
2025-04-30 11:53:14 -07:00

722 lines
26 KiB
Groovy

@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _
import groovy.transform.Field
// LLM repository configuration
withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LLM_REPO')]) {
LLM_REPO = env.gitlabSourceRepoHttpUrl ? env.gitlabSourceRepoHttpUrl : "${DEFAULT_LLM_REPO}"
}
LLM_ROOT = "llm"
ARTIFACT_PATH = env.artifactPath ? env.artifactPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"
UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"
X86_64_TRIPLE = "x86_64-linux-gnu"
AARCH64_TRIPLE = "aarch64-linux-gnu"
LLM_DOCKER_IMAGE = env.dockerImage
AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
// Literals for easier access.
@Field
def WHEEL_EXTRA_ARGS = "extraArgs"
@Field
def TARNAME = "tarName"
@Field
def WHEEL_ARCHS = "wheelArchs"
@Field
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
@Field
def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
@Field
def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
@Field
CONFIG_LINUX_AARCH64 = "linux_aarch64"
@Field
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
@Field
def BUILD_CONFIGS = [
// Vanilla TARNAME is used for packaging in runLLMPackage
// cmake-vars cannot be empty, so passing (default) multi-device configuration.
(CONFIG_LINUX_X86_64_VANILLA) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --micro_benchmarks",
(TARNAME) : "TensorRT-LLM.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
],
(CONFIG_LINUX_X86_64_SINGLE_DEVICE) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=0 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars ENABLE_UCX=0 --micro_benchmarks",
(TARNAME) : "single-device-TensorRT-LLM.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
],
(CONFIG_LINUX_X86_64_LLVM) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --micro_benchmarks -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
(TARNAME) : "llvm-TensorRT-LLM.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
],
(CONFIG_LINUX_AARCH64): [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
(TARNAME) : "TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;120-real",
],
(CONFIG_LINUX_AARCH64_LLVM) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
(TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;120-real",
],
]
// TODO: Move common variables to an unified location
BUILD_CORES_REQUEST = "8"
BUILD_CORES_LIMIT = "8"
BUILD_MEMORY_REQUEST = "48Gi"
BUILD_MEMORY_LIMIT = "64Gi"
BUILD_JOBS = "8"
TESTER_CORES = "12"
TESTER_MEMORY = "96Gi"
CCACHE_DIR="/mnt/sw-tensorrt-pvc/scratch.trt_ccache/llm_ccache"
String getShortenedJobName(String path)
{
static final nameMapping = [
"L0_MergeRequest": "l0-mr",
"L0_Custom": "l0-cus",
"L0_PostMerge": "l0-pm",
"L0_PostMergeDocker": "l0-pmd",
"L1_Custom": "l1-cus",
"L1_Nightly": "l1-nt",
"L1_Stable": "l1-stb",
]
def parts = path.split('/')
// Apply nameMapping to the last part (jobName)
def jobName = parts[-1]
boolean replaced = false
nameMapping.each { key, value ->
if (jobName.contains(key)) {
jobName = jobName.replace(key, value)
replaced = true
}
}
if (!replaced) {
jobName = jobName.length() > 7 ? jobName.substring(0, 7) : jobName
}
// Replace the last part with the transformed jobName
parts[-1] = jobName
// Rejoin the parts with '-', convert to lowercase
return parts.join('-').toLowerCase()
}
def createKubernetesPodConfig(image, type, arch = "amd64")
{
def targetCould = "kubernetes-cpu"
def selectors = """
nvidia.com/node_type: builder
kubernetes.io/os: linux
kubernetes.io/arch: ${arch}"""
def containerConfig = ""
def nodeLabelPrefix = ""
def jobName = getShortenedJobName(env.JOB_NAME)
def buildID = env.BUILD_ID
switch(type)
{
case "build":
containerConfig = """
- name: trt-llm
image: ${image}
command: ['sleep', ${POD_TIMEOUT_SECONDS}]
volumeMounts:
- name: sw-tensorrt-pvc
mountPath: "/mnt/sw-tensorrt-pvc"
readOnly: false
tty: true
resources:
requests:
cpu: ${BUILD_CORES_REQUEST}
memory: ${BUILD_MEMORY_REQUEST}
ephemeral-storage: 200Gi
limits:
cpu: ${BUILD_CORES_LIMIT}
memory: ${BUILD_MEMORY_LIMIT}
ephemeral-storage: 200Gi
imagePullPolicy: Always"""
nodeLabelPrefix = "cpu"
break
case "package":
containerConfig = """
- name: trt-llm
image: ${image}
command: ['cat']
tty: true
resources:
requests:
cpu: '2'
memory: 10Gi
ephemeral-storage: 25Gi
limits:
cpu: '2'
memory: 10Gi
ephemeral-storage: 25Gi
imagePullPolicy: Always"""
nodeLabelPrefix = "cpu"
break
}
def nodeLabel = trtllm_utils.appendRandomPostfix("${nodeLabelPrefix}---tensorrt-${jobName}-${buildID}")
def pvcVolume = """
- name: sw-tensorrt-pvc
persistentVolumeClaim:
claimName: sw-tensorrt-pvc
"""
if (arch == "arm64") {
// WAR: PVC mount is not setup on GH200 machines, use a small local cache as a WAR
pvcVolume = """
- name: sw-tensorrt-pvc
nfs:
server: 10.117.145.13
path: /vol/scratch1/scratch.svc_tensorrt_blossom
"""
}
def podConfig = [
cloud: targetCould,
namespace: "sw-tensorrt",
label: nodeLabel,
yaml: """
apiVersion: v1
kind: Pod
spec:
qosClass: Guaranteed
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "tensorrt/taints"
operator: DoesNotExist
- key: "tensorrt/affinity"
operator: NotIn
values:
- "core"
nodeSelector: ${selectors}
containers:
${containerConfig}
env:
- name: HOST_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: jnlp
image: urm.nvidia.com/docker/jenkins/inbound-agent:4.11-1-jdk11
args: ['\$(JENKINS_SECRET)', '\$(JENKINS_NAME)']
resources:
requests:
cpu: '2'
memory: 10Gi
ephemeral-storage: 25Gi
limits:
cpu: '2'
memory: 10Gi
ephemeral-storage: 25Gi
qosClass: Guaranteed
volumes:
${pvcVolume}
""".stripIndent(),
]
return podConfig
}
def echoNodeAndGpuInfo(pipeline, stageName)
{
String hostNodeName = sh(script: 'echo $HOST_NODE_NAME', returnStdout: true)
String gpuUuids = pipeline.sh(script: "nvidia-smi -q | grep \"GPU UUID\" | awk '{print \$4}' | tr '\n' ',' || true", returnStdout: true)
pipeline.echo "HOST_NODE_NAME = ${hostNodeName} ; GPU_UUIDS = ${gpuUuids} ; STAGE_NAME = ${stageName}"
}
def downloadArtifacts(stageName, reuseArtifactPath, artifacts, serverId = 'Artifactory')
{
def reused = true
stage(stageName) {
for (downit in artifacts) {
def uploadpath = downit.key
try {
rtDownload(
failNoOp: true,
serverId: serverId,
spec: """{
"files": [
{
"pattern": "${reuseArtifactPath}/${uploadpath}"
}
]
}""",
)
} catch (Exception e) {
echo "failed downloading ${reuseArtifactPath}/${uploadpath}, need rebuild."
reused = false
catchError(buildResult: 'SUCCESS', stageResult: 'UNSTABLE') { throw e }
}
}
if (!reused) {
return null
}
reuseArtifactPath = reuseArtifactPath.substring(reuseArtifactPath.indexOf('/')+1)
def newArtifacts = [:]
for (reuseit in artifacts) {
def uploadpath = reuseit.key
newArtifacts[reuseit.key] = "${reuseArtifactPath}/${uploadpath}"
}
return newArtifacts
}
}
def uploadArtifacts(artifacts, prefix = UPLOAD_PATH, retryTimes = 2, serverId = 'Artifactory')
{
for (it in artifacts) {
def uploadpath = it.key
def filepath = it.value
echo "uploading ${filepath} as ${uploadpath}"
trtllm_utils.llmRetry(retryTimes, "uploadArtifacts", {
rtUpload (
serverId: serverId,
spec: """{
"files": [
{
"pattern": "${filepath}",
"target": "${prefix}/${uploadpath}"
}
]
}""",
)
})
}
}
def buildOrCache(pipeline, key, reuseArtifactPath, artifacts, image, k8s_cpu, runner)
{
if (reuseArtifactPath) {
stage(key) {
def newArtifacts = downloadArtifacts("[${key}] Reuse", reuseArtifactPath, artifacts)
if (newArtifacts != null) {
uploadArtifacts(newArtifacts)
} else {
reuseArtifactPath = null
}
}
}
if (reuseArtifactPath) {
return
}
trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(image, "build", k8s_cpu), "trt-llm", {
stage(key) {
stage("[${key}] Run") {
echoNodeAndGpuInfo(pipeline, key)
runner()
}
stage("Upload") {
rtServer (
id: 'Artifactory',
url: 'https://urm.nvidia.com/artifactory',
credentialsId: 'urm-artifactory-creds',
// If Jenkins is configured to use an http proxy, you can bypass the proxy when using this Artifactory server:
bypassProxy: true,
// Configure the connection timeout (in seconds).
// The default value (if not configured) is 300 seconds:
timeout: 300
)
uploadArtifacts(artifacts)
}
}
})
}
def prepareLLMBuild(pipeline, config)
{
def buildFlags = BUILD_CONFIGS[config]
def tarName = buildFlags[TARNAME]
def is_linux_x86_64 = config.contains("linux_x86_64")
def artifacts = ["${tarName}": tarName]
def runner = {
runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
}
return [artifacts, runner]
}
def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
{
// Step 1: cloning tekit source code
sh "pwd && ls -alh"
sh "env | sort"
sh "ccache -sv"
sh "rm -rf **/*.xml *.tar.gz"
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
if (env.alternativeTRT) {
sh "cd ${LLM_ROOT} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
}
sh "mkdir TensorRT-LLM"
sh "cp -r ${LLM_ROOT}/ TensorRT-LLM/src/"
// Step 2: building wheels in container
// Random sleep to avoid resource contention
sleep(10 * Math.random())
sh "curl ifconfig.me || true"
sh "nproc && free -g && hostname"
sh "cat ${CCACHE_DIR}/ccache.conf"
sh "env | sort"
sh "ldconfig --print-cache || true"
sh "ls -lh /"
sh "id || true"
sh "whoami || true"
echo "Building TensorRT-LLM Python package ..."
sh "git config --global --add safe.directory \"*\""
def pipArgs = "--no-cache-dir"
if (is_linux_x86_64) {
pipArgs = ""
}
// install python package
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}")
if (env.alternativeTRT) {
trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, "cp312")
}
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -j ${BUILD_JOBS} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks --extra-make-targets modelSpec"
}
if (is_linux_x86_64) {
sh "cd ${LLM_ROOT} && python3 scripts/build_cpp_examples.py"
}
// Step 3: packaging wheels into tarfile
sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"
// Step 4: packaging benchmark and required cpp dependencies into tarfile
sh "mkdir -p TensorRT-LLM/benchmarks/cpp"
sh "cp ${LLM_ROOT}/cpp/build/benchmarks/bertBenchmark TensorRT-LLM/benchmarks/cpp"
sh "cp ${LLM_ROOT}/cpp/build/benchmarks/gptSessionBenchmark TensorRT-LLM/benchmarks/cpp"
sh "cp ${LLM_ROOT}/cpp/build/benchmarks/gptManagerBenchmark TensorRT-LLM/benchmarks/cpp"
sh "cp ${LLM_ROOT}/cpp/build/tensorrt_llm/libtensorrt_llm.so TensorRT-LLM/benchmarks/cpp"
sh "cp ${LLM_ROOT}/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so TensorRT-LLM/benchmarks/cpp"
// Copy model_spec.so (pybinding for ModelSpec) since various tests will need it.
sh "mkdir -p TensorRT-LLM/src/cpp/tests/batch_manager"
sh "cp ${LLM_ROOT}/cpp/build/tests/batch_manager/model_spec.so TensorRT-LLM/src/cpp/tests/batch_manager/"
if (is_linux_x86_64) {
sh "rm -rf ${tarName}"
sh "pigz --version || true"
sh "bash -c 'tar --use-compress-program=\"pigz -k\" -cf ${tarName} TensorRT-LLM/'"
} else {
sh "tar -czvf ${tarName} TensorRT-LLM/"
}
}
def buildWheelInContainer(pipeline, libraries=[], triple=X86_64_TRIPLE, clean=false, pre_cxx11abi=false, cpver="312", extra_args="")
{
// Random sleep to avoid resource contention
sleep(10 * Math.random())
sh "curl ifconfig.me || true"
sh "nproc && free -g && hostname"
sh "ccache -sv"
sh "cat ${CCACHE_DIR}/ccache.conf"
// Step 1: cloning tekit source code
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
if (env.alternativeTRT) {
trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, cpver)
sh "cd ${LLM_ROOT} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
}
// Step 2: building libs in container
sh "bash -c 'pip3 show tensorrt || true'"
if (extra_args == "") {
if (triple == AARCH64_TRIPLE) {
extra_args = "-a '90-real;100-real;120-real'"
} else {
extra_args = "-a '80-real;86-real;89-real;90-real;100-real;120-real'"
}
}
if (pre_cxx11abi) {
extra_args = extra_args + " -l -D 'USE_CXX11_ABI=0'"
} else {
if (libraries.size() != 0) {
extra_args = extra_args + " -l -D 'USE_CXX11_ABI=1'"
}
}
if (clean) {
extra_args = extra_args + " --clean"
}
sh "bash -c 'git config --global --add safe.directory \"*\"'"
// Because different architectures involve different macros, a comprehensive test is conducted here.
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "bash -c \"cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -j ${BUILD_JOBS} -D 'WARNING_IS_ERROR=ON' ${extra_args}\"")
}
}
def prepareBuildLib(pipeline, triple, pre_cxx11abi)
{
def libraries = [
"batch_manager",
"executor",
"internal_cutlass_kernels",
]
if ((triple == X86_64_TRIPLE && pre_cxx11abi) || (triple == AARCH64_TRIPLE && !pre_cxx11abi)) {
libraries += [
"ucx_wrapper",
]
}
def artifacts = [:]
for (library_name in libraries) {
def libdir
def is_static
if (library_name == "batch_manager") {
libdir = "tensorrt_llm/batch_manager"
is_static = true
} else if (library_name == "executor") {
libdir = "tensorrt_llm/executor"
is_static = true
} else if (library_name == "internal_cutlass_kernels"){
libdir = "tensorrt_llm/kernels/internal_cutlass_kernels"
is_static = true
} else if (library_name == "ucx_wrapper") {
libdir = "tensorrt_llm/executor/cache_transmission/ucx_utils"
}
def libname = "libtensorrt_llm_" + library_name
def ext = ".so"
if (is_static) {
libname += "_static"
ext = ".a"
}
def filepath = "${LLM_ROOT}/cpp/build/" + libdir + "/" + libname + ext
def uploadname = libname + ext
if (is_static && pre_cxx11abi) {
uploadname = libname + ".pre_cxx11" + ext
}
def uploadpath = "${triple}/${uploadname}"
artifacts[uploadpath] = filepath
}
def cpver = "cp312"
if (triple == X86_64_TRIPLE) {
cpver = "cp310"
}
return [artifacts, {
buildWheelInContainer(pipeline, libraries, triple, false, pre_cxx11abi, cpver)
}]
}
def prepareLLMPackage(pipeline, archTriple=X86_64_TRIPLE)
{
def tarFileName = "TensorRT-LLM.tar.gz"
def linuxPkgName = "tensorrt-llm-release-src-${env.gitlabCommit}.tar.gz"
if (archTriple == AARCH64_TRIPLE) {
tarFileName = "TensorRT-LLM-GH200.tar.gz"
linuxPkgName = "tensorrt-llm-sbsa-release-src-${env.gitlabCommit}.tar.gz"
}
def artifacts = ["${linuxPkgName}": "${LLM_ROOT}/${linuxPkgName}"]
return [artifacts, { runLLMPackage(pipeline, archTriple, tarFileName, linuxPkgName) }]
}
def runLLMPackage(pipeline, archTriple, tarFileName, linuxPkgName)
{
// Random sleep to avoid resource contention
sleep(10 * Math.random())
sh "curl ifconfig.me || true"
sh "nproc && free -g && hostname"
// Step 1: create LLM_ROOT dir and download code
sh "pwd && ls -alh"
sh "mkdir ${LLM_ROOT}"
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
// Download tar generated from build jobs
def llmTarfile = "https://urm.nvidia.com/artifactory/${ARTIFACT_PATH}/${tarFileName}"
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmPath} && wget -nv ${llmTarfile}")
// The path TensorRT-LLM/src is defined in the build job
sh "cd ${llmPath} && tar -zxf ${tarFileName} TensorRT-LLM/src"
// create a additional `pkg/tensorrt_llm` folder to make sure the generated tar.gz has only one tensorrt_llm folder
def llmPackage = "${llmPath}/TensorRT-LLM/pkg/"
sh "rm -rf ${llmPackage}"
sh "mkdir -p ${llmPackage}"
sh "mv ${llmPath}/TensorRT-LLM/src ${llmPackage}/tensorrt_llm"
// download libs
trtllm_utils.llmExecStepWithRetry(pipeline, script: """
pip3 install gitignore_parser && \
python3 ${llmPackage}/tensorrt_llm/scripts/package_trt_llm.py \
--lib_list oss \
--arch ${archTriple} \
--download ${env.gitlabCommit} \
--addr https://urm.nvidia.com/artifactory/${ARTIFACT_PATH} \
-v \
${llmPackage}/tensorrt_llm
""")
// clean the internal files and create one tar package
sh """cd ${llmPackage}/tensorrt_llm && \
python3 ${llmPackage}/tensorrt_llm/scripts/package_trt_llm.py \
--lib_list oss \
--clean \
--package ${llmPath}/${linuxPkgName} \
${llmPackage}/tensorrt_llm
"""
sh "cd ${llmPath} && ls -alh"
}
def launchStages(pipeline, cpu_arch, enableFailFast)
{
stage("Show Environment") {
sh "env | sort"
echo "dockerImage: ${env.dockerImage}"
echo "gitlabSourceRepoHttpUrl: ${env.gitlabSourceRepoHttpUrl}"
echo "gitlabCommit: ${env.gitlabCommit}"
echo "alternativeTRT: ${env.alternativeTRT}"
echo "Using GitLab repo: ${LLM_REPO}. Commit: ${env.gitlabCommit}"
}
def wheelDockerImage = env.wheelDockerImage
if (!wheelDockerImage && cpu_arch == AARCH64_TRIPLE) {
wheelDockerImage = env.dockerImage
}
buildConfigs = [
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
] + [true, false].collectEntries{ cxx11 -> [
"Build libs (cxx11=${cxx11})".toString(), [wheelDockerImage] + prepareBuildLib(
pipeline, cpu_arch, !cxx11),
]}
if (cpu_arch == X86_64_TRIPLE) {
buildConfigs += [
"Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE),
]
}
def packageConf = prepareLLMPackage(pipeline, cpu_arch)
def artifacts = packageConf[0]
def runner = packageConf[1]
rtServer (
id: 'Artifactory',
url: 'https://urm.nvidia.com/artifactory',
credentialsId: 'urm-artifactory-creds',
// If Jenkins is configured to use an http proxy, you can bypass the proxy when using this Artifactory server:
bypassProxy: true,
// Configure the connection timeout (in seconds).
// The default value (if not configured) is 300 seconds:
timeout: 300
)
def reuseArtifactPath = env.reuseArtifactPath
if (reuseArtifactPath) {
def stageName = "Reuse Check"
newArtifacts = downloadArtifacts(stageName, reuseArtifactPath, artifacts)
if (!newArtifacts) {
echo "previous package does not exist, rebuild all the artifacts"
reuseArtifactPath = null
} else {
artifacts = newArtifacts
runner = null
}
}
def k8s_cpu = "amd64"
if (cpu_arch == AARCH64_TRIPLE) {
k8s_cpu = "arm64"
}
parallelJobs = buildConfigs.collectEntries{key, values -> [key, {
script {
buildOrCache(pipeline, key, reuseArtifactPath, values[1], values[0], k8s_cpu, values[2])
}
}]}
parallelJobs.failFast = enableFailFast
if (cpu_arch == X86_64_TRIPLE && !reuseArtifactPath) {
def key = "Build with build type Debug"
parallelJobs += [
(key): {
script {
trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(LLM_DOCKER_IMAGE, "build", k8s_cpu), "trt-llm", {
stage(key) {
stage("[${key}] Run") {
echoNodeAndGpuInfo(pipeline, key)
buildWheelInContainer(pipeline, [], X86_64_TRIPLE, false, false, "cp312", "-a '90-real' -b Debug --benchmarks --micro_benchmarks")
}
}
})
}
}]
}
stage("Build") {
pipeline.parallel parallelJobs
} // Build stage
stage("Package") {
container("trt-llm") {
if (!reuseArtifactPath) {
runner()
}
uploadArtifacts(artifacts)
}
}
}
pipeline {
agent {
kubernetes createKubernetesPodConfig(AGENT_IMAGE, "package", "amd64")
}
options {
// Check the valid options at: https://www.jenkins.io/doc/book/pipeline/syntax/
// some step like results analysis stage, does not need to check out source code
skipDefaultCheckout()
// to better analyze the time for each step/test
timestamps()
timeout(time: 24, unit: 'HOURS')
}
environment {
//Workspace normally is: /home/jenkins/agent/workspace/LLM/L0_MergeRequest@tmp/
HF_HOME="${env.WORKSPACE_TMP}/.cache/huggingface"
CCACHE_DIR="${CCACHE_DIR}"
PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
// force datasets to be offline mode, to prevent CI jobs are downloading HF dataset causing test failures
HF_DATASETS_OFFLINE=1
}
stages {
stage("BuildJob") {
steps {
launchStages(this, params.targetArch, params.enableFailFast)
}
}
} // stage
} // pipeline