mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Merge e4d605f45b into 38296a472b
This commit is contained in:
commit
fce0539d55
@ -1,4 +1,4 @@
|
||||
@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _
|
||||
@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@user/zhanruis/0105_test_mirror']) _
|
||||
|
||||
import java.lang.Exception
|
||||
import groovy.transform.Field
|
||||
@ -31,6 +31,12 @@ TRIGGER_TYPE = env.triggerType ?: "manual"
|
||||
|
||||
ENABLE_USE_WHEEL_FROM_BUILD_STAGE = params.useWheelFromBuildStage ?: false
|
||||
|
||||
GITHUB_CREDENTIALS_ID = env.GithubCredencialId ?: 'github-cred-trtllm-ci'
|
||||
|
||||
// "normal": will build and run all stages
|
||||
// "build_for_ci": will only build the images for CI
|
||||
MODE = params.mode ?: "normal"
|
||||
|
||||
WAIT_TIME_FOR_BUILD_STAGE = 60 // minutes
|
||||
|
||||
BUILD_JOBS = "32"
|
||||
@ -47,11 +53,14 @@ def CACHED_CHANGED_FILE_LIST = "cached_changed_file_list"
|
||||
def ACTION_INFO = "action_info"
|
||||
@Field
|
||||
def IMAGE_KEY_TO_TAG = "image_key_to_tag"
|
||||
@Field
|
||||
def GITHUB_SOURCE_REPO_AND_BRANCH = "github_source_repo_and_branch"
|
||||
def globalVars = [
|
||||
(GITHUB_PR_API_URL): null,
|
||||
(CACHED_CHANGED_FILE_LIST): null,
|
||||
(ACTION_INFO): null,
|
||||
(IMAGE_KEY_TO_TAG): [:],
|
||||
(GITHUB_SOURCE_REPO_AND_BRANCH): null,
|
||||
]
|
||||
|
||||
@Field
|
||||
@ -59,7 +68,7 @@ def imageKeyToTag = [:]
|
||||
|
||||
def createKubernetesPodConfig(type, arch = "amd64", build_wheel = false)
|
||||
{
|
||||
def targetCould = "kubernetes-cpu"
|
||||
def targetCloud = "kubernetes-cpu"
|
||||
def containerConfig = ""
|
||||
def selectors = """
|
||||
nodeSelector:
|
||||
@ -178,7 +187,7 @@ def createKubernetesPodConfig(type, arch = "amd64", build_wheel = false)
|
||||
def buildID = env.BUILD_ID
|
||||
def nodeLabel = trtllm_utils.appendRandomPostfix("${nodeLabelPrefix}---tensorrt-${jobName}-${buildID}")
|
||||
def podConfig = [
|
||||
cloud: targetCould,
|
||||
cloud: targetCloud,
|
||||
namespace: "sw-tensorrt",
|
||||
label: nodeLabel,
|
||||
yaml: """
|
||||
@ -356,6 +365,9 @@ def buildImage(config, imageKeyToTag)
|
||||
STAGE=${dockerfileStage} \
|
||||
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} ${buildWheelArgs}
|
||||
""", sleepInSecs: randomSleep, numRetries: 6, shortCommondRunTimeMax: 7200)
|
||||
if (MODE == "build_for_ci") {
|
||||
imageKeyToTag[config.stageName] = imageWithTag
|
||||
}
|
||||
} catch (InterruptedException ex) {
|
||||
throw ex
|
||||
} catch (Exception ex) {
|
||||
@ -373,6 +385,9 @@ def buildImage(config, imageKeyToTag)
|
||||
STAGE=${dockerfileStage} \
|
||||
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} ${buildWheelArgs}
|
||||
""", sleepInSecs: randomSleep, numRetries: 6, shortCommondRunTimeMax: 7200)
|
||||
if (MODE == "build_for_ci") {
|
||||
imageKeyToTag[config.stageName] = imageWithTag
|
||||
}
|
||||
}
|
||||
if (target == "ngc-release") {
|
||||
imageKeyToTag["NGC Release Image ${config.arch}"] = imageWithTag
|
||||
@ -477,6 +492,28 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
|
||||
dockerfileStage: "release",
|
||||
],
|
||||
]
|
||||
if (MODE == "build_for_ci") {
|
||||
buildConfigs = buildConfigs.findAll { key, config ->
|
||||
key.contains("Build CI Image")
|
||||
}
|
||||
// Add NGC devel build configs for CI
|
||||
buildConfigs += [
|
||||
"Build NGC devel (x86_64)": [
|
||||
target: "ngc-devel",
|
||||
action: release_action,
|
||||
args: "DOCKER_BUILD_OPTS='--load --platform linux/amd64'",
|
||||
dockerfileStage: "devel",
|
||||
],
|
||||
"Build NGC devel (SBSA)": [
|
||||
target: "ngc-devel",
|
||||
action: release_action,
|
||||
args: "DOCKER_BUILD_OPTS='--load --platform linux/arm64'",
|
||||
arch: "arm64",
|
||||
dockerfileStage: "devel",
|
||||
],
|
||||
]
|
||||
echo "Build configs only for CI"
|
||||
}
|
||||
// Override all fields in build config with default values
|
||||
buildConfigs.each { key, config ->
|
||||
defaultBuildConfig.each { defaultKey, defaultValue ->
|
||||
@ -484,6 +521,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
|
||||
config[defaultKey] = defaultValue
|
||||
}
|
||||
}
|
||||
config.stageName = key
|
||||
config.podConfig = createKubernetesPodConfig("build", config.arch, config.build_wheel)
|
||||
}
|
||||
echo "Build configs:"
|
||||
@ -518,6 +556,130 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
|
||||
}
|
||||
|
||||
|
||||
def updateCIImageTag(globalVars) {
|
||||
echo "Update CI Image Tag"
|
||||
// Update jenkins/current_image_tags.properties with newly built image tags and push to PR branch
|
||||
|
||||
def imageTagKeys = [
|
||||
"LLM_DOCKER_IMAGE",
|
||||
"LLM_SBSA_DOCKER_IMAGE",
|
||||
"LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE",
|
||||
"LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"
|
||||
]
|
||||
|
||||
def newImageTags = [
|
||||
"LLM_DOCKER_IMAGE" : imageKeyToTag["Build CI Image (x86_64 tritondevel)"],
|
||||
"LLM_SBSA_DOCKER_IMAGE" : imageKeyToTag["Build CI Image (SBSA tritondevel)"],
|
||||
"LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" : imageKeyToTag["Build CI Image (RockyLinux8 Python310)"],
|
||||
"LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" : imageKeyToTag["Build CI Image (RockyLinux8 Python312)"],
|
||||
]
|
||||
|
||||
def emptyKeys = newImageTags.findAll { k, v -> v == null || v.trim().isEmpty() }.keySet()
|
||||
if (!emptyKeys.isEmpty()) {
|
||||
error "Not found image tags for CI: ${emptyKeys.join(', ')}"
|
||||
}
|
||||
|
||||
def filePath = "jenkins/current_image_tags.properties"
|
||||
|
||||
withCredentials([usernamePassword(credentialsId: GITHUB_CREDENTIALS_ID, usernameVariable: 'GITHUB_USERNAME', passwordVariable: 'GITHUB_PASSWORD')]) {
|
||||
// 1. Validate and parse source repo and branch
|
||||
def srcRepoAndBranch = globalVars[GITHUB_SOURCE_REPO_AND_BRANCH]
|
||||
if (!srcRepoAndBranch || !srcRepoAndBranch.contains(":")) {
|
||||
echo "WARNING: No GitHub source repo and branch found. Skipping update."
|
||||
return
|
||||
}
|
||||
|
||||
def parts = srcRepoAndBranch.split(":", 2)
|
||||
if (parts.size() != 2) {
|
||||
error "Invalid GITHUB_SOURCE_REPO_AND_BRANCH format: '${srcRepoAndBranch}'. Expected 'owner/repo:branch'"
|
||||
}
|
||||
def repoPart = parts[0] // e.g., "ZhanruiSunCh/TensorRT-LLM"
|
||||
def branchName = parts[1] // e.g., "user/zhanruis/feature_branch"
|
||||
echo "Target fork repo: ${repoPart}, branch: ${branchName}"
|
||||
|
||||
// 2. Setup workspace with upstream repo
|
||||
def workDir = "update_ci_image_tag_workspace"
|
||||
|
||||
// Extract repo path from LLM_REPO (e.g., "https://github.com/NVIDIA/TensorRT-LLM" -> "NVIDIA/TensorRT-LLM")
|
||||
def upstreamRepoPath = 'NVIDIA/TensorRT-LLM'
|
||||
def upstreamRepoUrl = "https://${GITHUB_PASSWORD}@github.com/${upstreamRepoPath}.git"
|
||||
def forkRepoUrl = "https://${GITHUB_PASSWORD}@github.com/${repoPart}.git"
|
||||
|
||||
echo "Setting up workspace with upstream repo: ${upstreamRepoPath}"
|
||||
echo "Fork repo: ${repoPart}"
|
||||
|
||||
// Clean up and prepare workspace
|
||||
sh "rm -rf ${workDir}"
|
||||
sh "mkdir -p ${workDir}"
|
||||
|
||||
// Disable git-lfs globally to avoid lock verification
|
||||
sh "git lfs uninstall || true"
|
||||
|
||||
// Clone upstream repository without LFS
|
||||
sh """
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
cd ${workDir}
|
||||
git clone --depth 20 ${upstreamRepoUrl} repo
|
||||
"""
|
||||
|
||||
// Disable LFS in the cloned repo
|
||||
sh "cd ${workDir}/repo && git lfs uninstall --local || true"
|
||||
sh "cd ${workDir}/repo && git config --local lfs.locksverify false"
|
||||
|
||||
// Add contributor's fork as remote
|
||||
sh "cd ${workDir}/repo && git remote add contributor ${forkRepoUrl}"
|
||||
|
||||
// Fetch PR branch from contributor's fork
|
||||
sh "cd ${workDir}/repo && git fetch contributor ${branchName}"
|
||||
|
||||
// Checkout the PR branch
|
||||
sh "cd ${workDir}/repo && git checkout -b pr-branch contributor/${branchName}"
|
||||
|
||||
// Configure Git user
|
||||
sh "cd ${workDir}/repo && git config user.name 'tensorrt-cicd'"
|
||||
sh "cd ${workDir}/repo && git config user.email '90828364+tensorrt-cicd@users.noreply.github.com'"
|
||||
|
||||
// 3. Read current file and update content
|
||||
echo "Reading and updating ${filePath}"
|
||||
def currentContent = readFile("${workDir}/repo/${filePath}")
|
||||
def lines = currentContent.split("\n") as List
|
||||
def updatedLines = lines.collect { line ->
|
||||
def matchedKey = imageTagKeys.find { key -> line.startsWith(key + "=") }
|
||||
matchedKey ? "${matchedKey}=${newImageTags[matchedKey]}" : line
|
||||
}
|
||||
def updatedContent = updatedLines.join("\n") + "\n"
|
||||
|
||||
// Write updated content
|
||||
writeFile file: "${workDir}/repo/${filePath}", text: updatedContent
|
||||
|
||||
// 4. Commit and push back to contributor's fork
|
||||
echo "Committing and pushing changes"
|
||||
|
||||
// Ensure LFS is still disabled (prevent lock verification)
|
||||
sh "cd ${workDir}/repo && git lfs uninstall --local || true"
|
||||
sh "cd ${workDir}/repo && git config --local lfs.locksverify false"
|
||||
|
||||
// Stage changes
|
||||
sh "cd ${workDir}/repo && git add ${filePath}"
|
||||
|
||||
// Commit with sign-off
|
||||
sh "cd ${workDir}/repo && git commit -s -m '[auto] Update CI image tags with newly built images'"
|
||||
|
||||
// Push to contributor's fork branch (maintainer permission allows this)
|
||||
sh """
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
cd ${workDir}/repo
|
||||
git push contributor HEAD:${branchName}
|
||||
"""
|
||||
|
||||
echo "✅ Successfully updated ${filePath} and pushed to ${repoPart}/${branchName}"
|
||||
|
||||
// Cleanup
|
||||
sh "rm -rf ${workDir}"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def getCommonParameters()
|
||||
{
|
||||
return [
|
||||
@ -583,7 +745,24 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Update CI Image Tag") {
|
||||
when {
|
||||
expression {
|
||||
MODE == "build_for_ci"
|
||||
}
|
||||
}
|
||||
steps {
|
||||
script {
|
||||
updateCIImageTag(globalVars)
|
||||
}
|
||||
}
|
||||
}
|
||||
stage("Upload Artifact") {
|
||||
when {
|
||||
expression {
|
||||
MODE != "build_for_ci"
|
||||
}
|
||||
}
|
||||
steps {
|
||||
script {
|
||||
String imageKeyToTagJson = writeJSON returnText: true, json: imageKeyToTag
|
||||
@ -597,7 +776,7 @@ pipeline {
|
||||
stage("Wait For Build Job Complete") {
|
||||
when {
|
||||
expression {
|
||||
RUN_SANITY_CHECK
|
||||
RUN_SANITY_CHECK && MODE != "build_for_ci"
|
||||
}
|
||||
}
|
||||
steps {
|
||||
@ -658,7 +837,7 @@ pipeline {
|
||||
stage("Sanity Check For NGC Image") {
|
||||
when {
|
||||
expression {
|
||||
RUN_SANITY_CHECK
|
||||
RUN_SANITY_CHECK && MODE != "build_for_ci"
|
||||
}
|
||||
}
|
||||
steps {
|
||||
@ -694,7 +873,7 @@ pipeline {
|
||||
stage("Register NGC Image For Security Check") {
|
||||
when {
|
||||
expression {
|
||||
return params.nspect_id && params.action == "push"
|
||||
return params.nspect_id && params.action == "push" && MODE != "build_for_ci"
|
||||
}
|
||||
}
|
||||
steps {
|
||||
|
||||
@ -25,6 +25,9 @@ SCAN_ROOT = "scan"
|
||||
ARTIFACT_PATH = env.artifactPath ? env.artifactPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"
|
||||
UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"
|
||||
|
||||
// GitHub credentials configuration
|
||||
GITHUB_CREDENTIALS_ID = env.GithubCredencialId ?: 'github-cred-trtllm-ci'
|
||||
|
||||
// Container configuration
|
||||
def getContainerURIs()
|
||||
{
|
||||
@ -65,6 +68,7 @@ if (env.gitlabTriggerPhrase)
|
||||
boolean enableFailFast = !(env.JOB_NAME ==~ /.*PostMerge.*/ || env.JOB_NAME ==~ /.*Dependency_Testing_TRT.*/) && !gitlabParamsFromBot.get("disable_fail_fast", false)
|
||||
|
||||
boolean isReleaseCheckMode = (gitlabParamsFromBot.get("run_mode", "full") == "release_check")
|
||||
boolean isRetagImageMode = gitlabParamsFromBot.get("retag_image", false)
|
||||
|
||||
BUILD_STATUS_NAME = isReleaseCheckMode ? "Jenkins Release Check" : "Jenkins Full Build"
|
||||
|
||||
@ -114,6 +118,8 @@ def AUTO_TRIGGER_TAG_LIST = "auto_trigger_tag_list"
|
||||
def DEBUG_MODE = "debug"
|
||||
@Field
|
||||
def DETAILED_LOG = "detailed_log"
|
||||
@Field
|
||||
def BUILD_DOCKER_IMAGE = "build_docker_image"
|
||||
|
||||
def testFilter = [
|
||||
(REUSE_TEST): gitlabParamsFromBot.get(REUSE_TEST, null),
|
||||
@ -132,6 +138,7 @@ def testFilter = [
|
||||
(DEBUG_MODE): gitlabParamsFromBot.get(DEBUG_MODE, false),
|
||||
(AUTO_TRIGGER_TAG_LIST): [],
|
||||
(DETAILED_LOG): gitlabParamsFromBot.get(DETAILED_LOG, false),
|
||||
(BUILD_DOCKER_IMAGE): gitlabParamsFromBot.get(BUILD_DOCKER_IMAGE, false),
|
||||
]
|
||||
|
||||
String reuseBuild = gitlabParamsFromBot.get('reuse_build', null)
|
||||
@ -146,12 +153,15 @@ def ACTION_INFO = "action_info"
|
||||
def IMAGE_KEY_TO_TAG = "image_key_to_tag"
|
||||
@Field
|
||||
def TARGET_BRANCH = "target_branch"
|
||||
@Field
|
||||
def GITHUB_SOURCE_REPO_AND_BRANCH = "github_source_repo_and_branch"
|
||||
def globalVars = [
|
||||
(GITHUB_PR_API_URL): gitlabParamsFromBot.get('github_pr_api_url', null),
|
||||
(CACHED_CHANGED_FILE_LIST): null,
|
||||
(ACTION_INFO): gitlabParamsFromBot.get('action_info', null),
|
||||
(IMAGE_KEY_TO_TAG): [:],
|
||||
(TARGET_BRANCH): gitlabParamsFromBot.get('target_branch', null),
|
||||
(GITHUB_SOURCE_REPO_AND_BRANCH): env.githubSourceRepoAndBranch ? env.githubSourceRepoAndBranch : null,
|
||||
]
|
||||
|
||||
// If not running all test stages in the L0 pre-merge, we will not update the GitLab status at the end.
|
||||
@ -228,6 +238,29 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
|
||||
imagePullPolicy: Always"""
|
||||
nodeLabelPrefix = "cpu"
|
||||
break
|
||||
case "build":
|
||||
// Use a customized docker:dind image with essential dependencies
|
||||
containerConfig = """
|
||||
- name: docker
|
||||
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:202505221445_docker_dind_withbash
|
||||
tty: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 16
|
||||
memory: 72Gi
|
||||
ephemeral-storage: 200Gi
|
||||
limits:
|
||||
cpu: 16
|
||||
memory: 256Gi
|
||||
ephemeral-storage: 200Gi
|
||||
imagePullPolicy: Always
|
||||
securityContext:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_ADMIN"""
|
||||
nodeLabelPrefix = "cpu"
|
||||
break
|
||||
case "package":
|
||||
containerConfig = """
|
||||
- name: trt-llm
|
||||
@ -1255,6 +1288,9 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
'triggerType': env.JOB_NAME ==~ /.*PostMerge.*/ ? "post-merge" : "pre-merge",
|
||||
'runSanityCheck': env.JOB_NAME ==~ /.*PostMerge.*/ ? true : false,
|
||||
]
|
||||
if (testFilter[(BUILD_DOCKER_IMAGE)]) {
|
||||
additionalParameters['mode'] = "build_for_ci"
|
||||
}
|
||||
|
||||
launchJob("/LLM/helpers/BuildDockerImages", false, enableFailFast, globalVars, "x86_64", additionalParameters)
|
||||
}
|
||||
@ -1274,6 +1310,10 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
stages.remove("SBSA-Linux")
|
||||
echo "Build-Docker-Images job is set explicitly. Both x86_64-Linux and SBSA-Linux sub-pipelines will be disabled."
|
||||
}
|
||||
if (testFilter[(BUILD_DOCKER_IMAGE)]) {
|
||||
stages = stages.findAll { key, value -> key.contains("Release Check") } + dockerBuildJob
|
||||
echo "Only execute Build-Docker-Images and Release Check stages, build and update docker images and tags"
|
||||
}
|
||||
|
||||
parallelJobs = stages.collectEntries{key, value -> [key, {
|
||||
script {
|
||||
@ -1287,6 +1327,217 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
pipeline.parallel parallelJobs
|
||||
}
|
||||
|
||||
def getImageTags(pipeline, globalVars) {
|
||||
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT)
|
||||
|
||||
def pr_id = ""
|
||||
def pytorch_version = ""
|
||||
def cuda_version = ""
|
||||
def trt_version = ""
|
||||
def timestamp = ""
|
||||
|
||||
// 1. Get pytorch_version from docker/Dockerfile.multi
|
||||
def dockerfilePath = "${LLM_ROOT}/docker/Dockerfile.multi"
|
||||
def dockerfileContent = readFile(file: dockerfilePath)
|
||||
def baseTagMatch = dockerfileContent =~ /ARG\s+BASE_TAG\s*=\s*([^\s]*)/
|
||||
if (baseTagMatch) {
|
||||
pytorch_version = baseTagMatch[0][1].split('-')[0]
|
||||
} else {
|
||||
error "Failed to find ARG BASE_TAG in Dockerfile.multi"
|
||||
}
|
||||
|
||||
// Extract cuda_version from docker/common/install_cuda_toolkit.sh
|
||||
def cudaToolkitPath = "${LLM_ROOT}/docker/common/install_cuda_toolkit.sh"
|
||||
def cudaToolkitContent = readFile(file: cudaToolkitPath)
|
||||
def cudaVerMatch = cudaToolkitContent =~ /CUDA_VER\s*=\s*"([^"]+)"/
|
||||
if (cudaVerMatch) {
|
||||
cuda_version = cudaVerMatch[0][1].split('_')[0] // e.g. "13.0.2"
|
||||
} else {
|
||||
error "Failed to find CUDA_VER in install_cuda_toolkit.sh"
|
||||
}
|
||||
|
||||
// Extract trt_version from docker/common/install_tensorrt.sh
|
||||
def tensorrtInstallPath = "${LLM_ROOT}/docker/common/install_tensorrt.sh"
|
||||
def tensorrtInstallContent = readFile(file: tensorrtInstallPath)
|
||||
def trtVerMatch = tensorrtInstallContent =~ /TRT_VER\s*=\s*"([^"]+)"/
|
||||
if (trtVerMatch) {
|
||||
trt_version = trtVerMatch[0][1]
|
||||
} else {
|
||||
error "Failed to find TRT_VER in install_tensorrt.sh"
|
||||
}
|
||||
|
||||
// Generate timestamp in the format yyyyMMddHHmm
|
||||
def now = new Date()
|
||||
def ts = now.format("yyyyMMddHHmm", TimeZone.getTimeZone('UTC'))
|
||||
timestamp = ts
|
||||
|
||||
if (globalVars[GITHUB_PR_API_URL]) {
|
||||
pr_id = globalVars[GITHUB_PR_API_URL].split('/').last()
|
||||
} else {
|
||||
error "No GitHub PR API URL found"
|
||||
}
|
||||
newImageTags = [
|
||||
"LLM_DOCKER_IMAGE" : "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:pytorch-${pytorch_version}-py3-x86_64-ubuntu24.04-trt${trt_version}-skip-tritondevel-${timestamp}-${pr_id}",
|
||||
"LLM_SBSA_DOCKER_IMAGE" : "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:pytorch-${pytorch_version}-py3-aarch64-ubuntu24.04-trt${trt_version}-skip-tritondevel-${timestamp}-${pr_id}",
|
||||
"LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" : "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:cuda-${cuda_version}-devel-rocky8-x86_64-rocky8-py310-trt${trt_version}-skip-tritondevel-${timestamp}-${pr_id}",
|
||||
"LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" : "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:cuda-${cuda_version}-devel-rocky8-x86_64-rocky8-py312-trt${trt_version}-skip-tritondevel-${timestamp}-${pr_id}",
|
||||
]
|
||||
pipeline.echo("Generated new image tags:\n" + newImageTags.collect { key, value -> "${key} = ${value}" }.join('\n'))
|
||||
|
||||
def oldTagProps = readProperties file: "${LLM_ROOT}/jenkins/current_image_tags.properties", interpolate: true
|
||||
def oldImageTags = [:]
|
||||
oldTagProps.each { key, value ->
|
||||
oldImageTags[key] = value
|
||||
}
|
||||
def oldTagToNewTagMap = [:]
|
||||
oldImageTags.each { key, value ->
|
||||
// Only include mappings where the key exists in newImageTags
|
||||
if (newImageTags.containsKey(key)) {
|
||||
oldTagToNewTagMap[value] = newImageTags[key]
|
||||
}
|
||||
}
|
||||
pipeline.echo("Old to new image tag map:\n" + oldTagToNewTagMap.collect { k, v -> "${k} => ${v}" }.join('\n'))
|
||||
return oldTagToNewTagMap
|
||||
}
|
||||
|
||||
def renameDockerImages(oldTagToNewTagMap) {
|
||||
oldTagToNewTagMap.each { oldTag, newTag ->
|
||||
retry(3) {
|
||||
sh "docker pull ${oldTag}"
|
||||
sh "docker tag ${oldTag} ${newTag}"
|
||||
sh "docker push ${newTag}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def updateImageTag(oldTagToNewTagMap, globalVars) {
|
||||
withCredentials([usernamePassword(credentialsId: GITHUB_CREDENTIALS_ID, usernameVariable: 'GITHUB_USERNAME', passwordVariable: 'GITHUB_PASSWORD')]) {
|
||||
// 1. Validate and parse source repo and branch
|
||||
def srcRepoAndBranch = globalVars[GITHUB_SOURCE_REPO_AND_BRANCH]
|
||||
if (!srcRepoAndBranch || !srcRepoAndBranch.contains(":")) {
|
||||
echo "WARNING: No GitHub source repo and branch found. Skipping update."
|
||||
return
|
||||
}
|
||||
|
||||
def parts = srcRepoAndBranch.split(":", 2)
|
||||
if (parts.size() != 2) {
|
||||
error "Invalid GITHUB_SOURCE_REPO_AND_BRANCH format: '${srcRepoAndBranch}'. Expected 'owner/repo:branch'"
|
||||
}
|
||||
def repoPart = parts[0] // e.g., "ZhanruiSunCh/TensorRT-LLM"
|
||||
def branchName = parts[1] // e.g., "user/zhanruis/feature_branch"
|
||||
echo "Target fork repo: ${repoPart}, branch: ${branchName}"
|
||||
|
||||
// 2. Setup workspace with upstream repo
|
||||
def workDir = "update_ci_image_tag_workspace"
|
||||
|
||||
// Extract repo path from LLM_REPO (e.g., "https://github.com/NVIDIA/TensorRT-LLM" -> "NVIDIA/TensorRT-LLM")
|
||||
def upstreamRepoPath = 'NVIDIA/TensorRT-LLM'
|
||||
def upstreamRepoUrl = "https://${GITHUB_PASSWORD}@github.com/${upstreamRepoPath}.git"
|
||||
def forkRepoUrl = "https://${GITHUB_PASSWORD}@github.com/${repoPart}.git"
|
||||
|
||||
echo "Setting up workspace with upstream repo: ${upstreamRepoPath}"
|
||||
echo "Fork repo: ${repoPart}"
|
||||
|
||||
// Clean up and prepare workspace
|
||||
sh "rm -rf ${workDir}"
|
||||
sh "mkdir -p ${workDir}"
|
||||
|
||||
// Disable git-lfs globally to avoid lock verification
|
||||
sh "git lfs uninstall || true"
|
||||
|
||||
// Clone upstream repository without LFS
|
||||
sh """
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
cd ${workDir}
|
||||
git clone --depth 20 ${upstreamRepoUrl} repo
|
||||
"""
|
||||
|
||||
// Disable LFS in the cloned repo
|
||||
sh "cd ${workDir}/repo && git lfs uninstall --local || true"
|
||||
sh "cd ${workDir}/repo && git config --local lfs.locksverify false"
|
||||
|
||||
// Add contributor's fork as remote
|
||||
sh "cd ${workDir}/repo && git remote add contributor ${forkRepoUrl}"
|
||||
|
||||
// Fetch PR branch from contributor's fork
|
||||
sh "cd ${workDir}/repo && git fetch contributor ${branchName}"
|
||||
|
||||
// Checkout the PR branch
|
||||
sh "cd ${workDir}/repo && git checkout -b pr-branch contributor/${branchName}"
|
||||
|
||||
// Configure Git user
|
||||
sh "cd ${workDir}/repo && git config user.name 'tensorrt-cicd'"
|
||||
sh "cd ${workDir}/repo && git config user.email '90828364+tensorrt-cicd@users.noreply.github.com'"
|
||||
|
||||
// 3. Read current file and update content
|
||||
def filePath = "jenkins/current_image_tags.properties"
|
||||
echo "Reading and updating ${filePath}"
|
||||
|
||||
// Read file content
|
||||
def currentContent = sh(script: "cat ${workDir}/repo/${filePath}", returnStdout: true)
|
||||
def lines = currentContent.split("\n") as List
|
||||
|
||||
// Apply tag replacements line by line
|
||||
def updatedLines = lines.collect { line ->
|
||||
// For each line, check if it contains an old tag that needs to be replaced
|
||||
def updatedLine = line
|
||||
oldTagToNewTagMap.each { oldTag, newTag ->
|
||||
if (line.contains(oldTag)) {
|
||||
updatedLine = line.replace(oldTag, newTag)
|
||||
echo "Replaced in line: ${oldTag} -> ${newTag}"
|
||||
}
|
||||
}
|
||||
return updatedLine
|
||||
}
|
||||
def updatedContent = updatedLines.join("\n") + "\n"
|
||||
|
||||
// Write updated content using shell command to avoid permission issues
|
||||
sh """
|
||||
cd ${workDir}/repo
|
||||
cat > ${filePath} << 'EOF'
|
||||
${updatedContent}EOF
|
||||
"""
|
||||
|
||||
// 4. Commit and push back to contributor's fork
|
||||
echo "Committing and pushing changes"
|
||||
|
||||
// Ensure LFS is still disabled (prevent lock verification)
|
||||
sh "cd ${workDir}/repo && git lfs uninstall --local || true"
|
||||
sh "cd ${workDir}/repo && git config --local lfs.locksverify false"
|
||||
|
||||
// Stage changes
|
||||
sh "cd ${workDir}/repo && git add ${filePath}"
|
||||
|
||||
// Commit with sign-off
|
||||
sh "cd ${workDir}/repo && git commit -s -m '[auto] Retag Docker image tags'"
|
||||
|
||||
// Push to contributor's fork branch (maintainer permission allows this)
|
||||
sh """
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
cd ${workDir}/repo
|
||||
git push contributor HEAD:${branchName}
|
||||
"""
|
||||
|
||||
echo "✅ Successfully updated ${filePath} and pushed to ${repoPart}/${branchName}"
|
||||
|
||||
// Cleanup
|
||||
sh "rm -rf ${workDir}"
|
||||
}
|
||||
}
|
||||
|
||||
def runRetagImage(pipeline, globalVars)
|
||||
{
|
||||
collectResultPodSpec = createKubernetesPodConfig("", "build")
|
||||
trtllm_utils.launchKubernetesPod(pipeline, collectResultPodSpec, "docker", {
|
||||
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "docker login urm.nvidia.com -u ${USERNAME} -p ${PASSWORD}")
|
||||
}
|
||||
oldTagToNewTagMap = getImageTags(pipeline, globalVars)
|
||||
renameDockerImages(oldTagToNewTagMap)
|
||||
updateImageTag(oldTagToNewTagMap, globalVars)
|
||||
})
|
||||
}
|
||||
|
||||
pipeline {
|
||||
agent {
|
||||
kubernetes createKubernetesPodConfig("", "agent")
|
||||
@ -1318,7 +1569,7 @@ pipeline {
|
||||
}
|
||||
always {
|
||||
script {
|
||||
if (!isReleaseCheckMode) {
|
||||
if (!isReleaseCheckMode && !isRetagImageMode) {
|
||||
collectTestResults(this, testFilter)
|
||||
}
|
||||
}
|
||||
@ -1349,6 +1600,12 @@ pipeline {
|
||||
launchReleaseCheck(this)
|
||||
}
|
||||
}
|
||||
} else if (isRetagImageMode) {
|
||||
stage("Retag Image") {
|
||||
script {
|
||||
runRetagImage(this, globalVars)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// globalVars[CACHED_CHANGED_FILE_LIST] is only used in setupPipelineEnvironment
|
||||
// Reset it to null to workaround the "Argument list too long" error
|
||||
|
||||
@ -1436,6 +1436,8 @@ def DEBUG_MODE = "debug"
|
||||
@Field
|
||||
def DETAILED_LOG = "detailed_log"
|
||||
@Field
|
||||
def BUILD_DOCKER_IMAGE = "build_docker_image"
|
||||
@Field
|
||||
def testFilter = [
|
||||
(REUSE_TEST): null,
|
||||
(REUSE_STAGE_LIST): null,
|
||||
@ -1453,6 +1455,7 @@ def testFilter = [
|
||||
(DEBUG_MODE): false,
|
||||
(AUTO_TRIGGER_TAG_LIST): [],
|
||||
(DETAILED_LOG): false,
|
||||
(BUILD_DOCKER_IMAGE): false,
|
||||
]
|
||||
|
||||
@Field
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
|
||||
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202512241744-10055
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202512241744-10055
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202512241744-10055
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202512241744-10055
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202601131147-9403
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202601131147-9403
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202601131147-9403
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202601131147-9403
|
||||
|
||||
Loading…
Reference in New Issue
Block a user