mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
infra: [TRTLLM-5072] Add SBSA release images (#4231)
* infra: [TRTLLM-5072] Add SBSA release images and move SBSA to blossom Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Fix review Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Easy to review Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Fix BUILD_JOBS Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Use gitlab mirror for nixl and ucx Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> * Update BuildDockerImage.groovy Signed-off-by: Yanchao Lu <yanchaol@nvidia.com> --------- Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> Signed-off-by: Yanchao Lu <yanchaol@nvidia.com> Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
This commit is contained in:
parent
fb663b637a
commit
17d48e0009
@ -29,6 +29,11 @@ DOCKER_BUILD_OPTS ?= --pull --load
|
|||||||
DOCKER_BUILD_ARGS ?=
|
DOCKER_BUILD_ARGS ?=
|
||||||
DOCKER_PROGRESS ?= auto
|
DOCKER_PROGRESS ?= auto
|
||||||
CUDA_ARCHS ?=
|
CUDA_ARCHS ?=
|
||||||
|
PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64")
|
||||||
|
ifeq ($(PLATFORM), arm64)
|
||||||
|
CUDA_ARCHS = '90-real;100-real;120-real'
|
||||||
|
endif
|
||||||
|
|
||||||
BUILD_WHEEL_OPTS ?=
|
BUILD_WHEEL_OPTS ?=
|
||||||
BUILD_WHEEL_ARGS ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
|
BUILD_WHEEL_ARGS ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
|
||||||
TORCH_INSTALL_TYPE ?= skip
|
TORCH_INSTALL_TYPE ?= skip
|
||||||
@ -42,7 +47,6 @@ TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | g
|
|||||||
GITHUB_MIRROR ?=
|
GITHUB_MIRROR ?=
|
||||||
PYTHON_VERSION ?=
|
PYTHON_VERSION ?=
|
||||||
NGC_STAGING_REPO ?= nvcr.io/nvstaging/tensorrt-llm
|
NGC_STAGING_REPO ?= nvcr.io/nvstaging/tensorrt-llm
|
||||||
PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64")
|
|
||||||
|
|
||||||
define add_local_user
|
define add_local_user
|
||||||
docker build \
|
docker build \
|
||||||
@ -178,9 +182,14 @@ ubuntu22_%: BASE_TAG = 12.9.0-devel-ubuntu22.04
|
|||||||
|
|
||||||
trtllm_%: STAGE = release
|
trtllm_%: STAGE = release
|
||||||
trtllm_%: PUSH_TO_STAGING := 0
|
trtllm_%: PUSH_TO_STAGING := 0
|
||||||
trtllm_%: DEVEL_IMAGE = $(shell grep 'LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
|
trtllm_%: DEVEL_IMAGE = $(shell \
|
||||||
|
if [ "$(PLATFORM)" = "amd64" ]; then \
|
||||||
|
grep 'LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'; \
|
||||||
|
elif [ "$(PLATFORM)" = "arm64" ]; then \
|
||||||
|
grep 'LLM_SBSA_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'; \
|
||||||
|
fi)
|
||||||
trtllm_%: IMAGE_NAME = $(shell grep 'IMAGE_NAME = ' ../jenkins/BuildDockerImage.groovy | grep -o '".*"' | tr -d '"')
|
trtllm_%: IMAGE_NAME = $(shell grep 'IMAGE_NAME = ' ../jenkins/BuildDockerImage.groovy | grep -o '".*"' | tr -d '"')
|
||||||
trtllm_%: IMAGE_TAG = $(shell git rev-parse --abbrev-ref HEAD | tr '/' '_')
|
trtllm_%: IMAGE_TAG = $(shell git rev-parse --abbrev-ref HEAD | tr '/' '_')-$(PLATFORM)
|
||||||
trtllm_run: WORK_DIR = /app/tensorrt_llm
|
trtllm_run: WORK_DIR = /app/tensorrt_llm
|
||||||
|
|
||||||
# This requires a docker installation with multi-platform support
|
# This requires a docker installation with multi-platform support
|
||||||
|
|||||||
@ -6,8 +6,21 @@ GITHUB_URL="https://github.com"
|
|||||||
UCX_VERSION="v1.18.1"
|
UCX_VERSION="v1.18.1"
|
||||||
UCX_INSTALL_PATH="/usr/local/ucx/"
|
UCX_INSTALL_PATH="/usr/local/ucx/"
|
||||||
|
|
||||||
|
NIXL_VERSION="0.2.0"
|
||||||
|
|
||||||
|
UCX_REPO="https://github.com/openucx/ucx.git"
|
||||||
|
NIXL_REPO="https://github.com/ai-dynamo/nixl.git"
|
||||||
|
|
||||||
|
UCX_MIRROR="https://gitlab-master.nvidia.com/ftp/GitHubSync/ucx.git"
|
||||||
|
NIXL_MIRROR="https://gitlab-master.nvidia.com/ftp/GitHubSync/nixl.git"
|
||||||
|
|
||||||
|
if [ -n "${GITHUB_MIRROR}" ]; then
|
||||||
|
UCX_REPO=${UCX_MIRROR}
|
||||||
|
NIXL_REPO=${NIXL_MIRROR}
|
||||||
|
fi
|
||||||
|
|
||||||
if [ ! -d ${UCX_INSTALL_PATH} ]; then
|
if [ ! -d ${UCX_INSTALL_PATH} ]; then
|
||||||
git clone --depth 1 -b ${UCX_VERSION} https://github.com/openucx/ucx.git
|
git clone --depth 1 -b ${UCX_VERSION} ${UCX_REPO}
|
||||||
cd ucx
|
cd ucx
|
||||||
./autogen.sh
|
./autogen.sh
|
||||||
./contrib/configure-release --prefix=${UCX_INSTALL_PATH}
|
./contrib/configure-release --prefix=${UCX_INSTALL_PATH}
|
||||||
@ -17,9 +30,6 @@ if [ ! -d ${UCX_INSTALL_PATH} ]; then
|
|||||||
echo "export LD_LIBRARY_PATH=${UCX_INSTALL_PATH}/lib:\$LD_LIBRARY_PATH" >> "${ENV}"
|
echo "export LD_LIBRARY_PATH=${UCX_INSTALL_PATH}/lib:\$LD_LIBRARY_PATH" >> "${ENV}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
NIXL_VERSION="0.2.0"
|
|
||||||
NIXL_REPO="${GITHUB_URL}/ai-dynamo/nixl.git"
|
|
||||||
|
|
||||||
ARCH_NAME="x86_64-linux-gnu"
|
ARCH_NAME="x86_64-linux-gnu"
|
||||||
if [ "$(uname -m)" != "amd64" ] && [ "$(uname -m)" != "x86_64" ]; then
|
if [ "$(uname -m)" != "amd64" ] && [ "$(uname -m)" != "x86_64" ]; then
|
||||||
ARCH_NAME="aarch64-linux-gnu"
|
ARCH_NAME="aarch64-linux-gnu"
|
||||||
|
|||||||
@ -16,8 +16,10 @@ LLM_BRANCH = env.gitlabBranch? env.gitlabBranch : params.branch
|
|||||||
LLM_BRANCH_TAG = LLM_BRANCH.replaceAll('/', '_')
|
LLM_BRANCH_TAG = LLM_BRANCH.replaceAll('/', '_')
|
||||||
|
|
||||||
BUILD_JOBS = "32"
|
BUILD_JOBS = "32"
|
||||||
|
BUILD_JOBS_RELEASE_X86_64 = "16"
|
||||||
|
BUILD_JOBS_RELEASE_SBSA = "8"
|
||||||
|
|
||||||
def createKubernetesPodConfig(type)
|
def createKubernetesPodConfig(type, arch = "amd64")
|
||||||
{
|
{
|
||||||
def targetCould = "kubernetes-cpu"
|
def targetCould = "kubernetes-cpu"
|
||||||
def containerConfig = ""
|
def containerConfig = ""
|
||||||
@ -75,6 +77,7 @@ def createKubernetesPodConfig(type)
|
|||||||
nodeSelector:
|
nodeSelector:
|
||||||
nvidia.com/node_type: builder
|
nvidia.com/node_type: builder
|
||||||
kubernetes.io/os: linux
|
kubernetes.io/os: linux
|
||||||
|
kubernetes.io/arch: ${arch}
|
||||||
containers:
|
containers:
|
||||||
${containerConfig}
|
${containerConfig}
|
||||||
- name: jnlp
|
- name: jnlp
|
||||||
@ -96,9 +99,10 @@ def createKubernetesPodConfig(type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def buildImage(target, action="build", torchInstallType="skip", args="", custom_tag="", post_tag="")
|
def buildImage(target, action="build", torchInstallType="skip", args="", custom_tag="", post_tag="", is_sbsa=false)
|
||||||
{
|
{
|
||||||
def tag = "x86_64-${target}-torch_${torchInstallType}${post_tag}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}"
|
def arch = is_sbsa ? "sbsa" : "x86_64"
|
||||||
|
def tag = "${arch}-${target}-torch_${torchInstallType}${post_tag}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}"
|
||||||
|
|
||||||
// Step 1: cloning tekit source code
|
// Step 1: cloning tekit source code
|
||||||
// allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
|
// allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
|
||||||
@ -128,15 +132,31 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
// Fix the build OOM issue of release builds
|
||||||
|
def build_jobs = BUILD_JOBS
|
||||||
|
if (target == "trtllm") {
|
||||||
|
if (arch == "x86_64") {
|
||||||
|
build_jobs = BUILD_JOBS_RELEASE_X86_64
|
||||||
|
} else {
|
||||||
|
build_jobs = BUILD_JOBS_RELEASE_SBSA
|
||||||
|
}
|
||||||
|
}
|
||||||
containerGenFailure = null
|
containerGenFailure = null
|
||||||
stage ("make ${target}_${action}") {
|
stage ("make ${target}_${action}") {
|
||||||
retry(3)
|
retry(3)
|
||||||
{
|
{
|
||||||
|
// Fix the triton image pull timeout issue
|
||||||
|
def TRITON_IMAGE = sh(script: "cd ${LLM_ROOT} && grep 'ARG TRITON_IMAGE=' docker/Dockerfile.multi | grep -o '=.*' | tr -d '=\"'", returnStdout: true).trim()
|
||||||
|
def TRITON_BASE_TAG = sh(script: "cd ${LLM_ROOT} && grep 'ARG TRITON_BASE_TAG=' docker/Dockerfile.multi | grep -o '=.*' | tr -d '=\"'", returnStdout: true).trim()
|
||||||
|
retry(3) {
|
||||||
|
sh "docker pull ${TRITON_IMAGE}:${TRITON_BASE_TAG}"
|
||||||
|
}
|
||||||
|
|
||||||
sh """
|
sh """
|
||||||
cd ${LLM_ROOT} && make -C docker ${target}_${action} \
|
cd ${LLM_ROOT} && make -C docker ${target}_${action} \
|
||||||
TORCH_INSTALL_TYPE=${torchInstallType} \
|
TORCH_INSTALL_TYPE=${torchInstallType} \
|
||||||
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
|
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${tag} \
|
||||||
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
|
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} \
|
||||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
@ -148,7 +168,7 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
|||||||
cd ${LLM_ROOT} && make -C docker ${target}_${action} \
|
cd ${LLM_ROOT} && make -C docker ${target}_${action} \
|
||||||
TORCH_INSTALL_TYPE=${torchInstallType} \
|
TORCH_INSTALL_TYPE=${torchInstallType} \
|
||||||
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${custom_tag} \
|
IMAGE_NAME=${IMAGE_NAME} IMAGE_TAG=${custom_tag} \
|
||||||
BUILD_WHEEL_OPTS='-j ${BUILD_JOBS}' ${args} \
|
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} \
|
||||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
@ -170,38 +190,6 @@ def buildImage(target, action="build", torchInstallType="skip", args="", custom_
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def triggerSBSARemoteJob(action, type)
|
|
||||||
{
|
|
||||||
script
|
|
||||||
{
|
|
||||||
def parameters = """
|
|
||||||
token=L1_Nightly_Token
|
|
||||||
hostJobName=${JOB_NAME}
|
|
||||||
hostBuildNumber=${BUILD_NUMBER}
|
|
||||||
gitlabBranch=${LLM_BRANCH}
|
|
||||||
action=${action}
|
|
||||||
type=${type}
|
|
||||||
""".stripIndent()
|
|
||||||
|
|
||||||
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE')
|
|
||||||
{
|
|
||||||
def handle = triggerRemoteJob(
|
|
||||||
job: "https://prod.blsm.nvidia.com/sw-tensorrt-static-1/job/LLM/job/helpers/job/gh200-BuildImage/",
|
|
||||||
auth: CredentialsAuth(credentials: "STATIC_1_TOKEN"),
|
|
||||||
parameters: parameters,
|
|
||||||
pollInterval: 60,
|
|
||||||
abortTriggeredJob: true,
|
|
||||||
)
|
|
||||||
def status = handle.getBuildResult().toString()
|
|
||||||
|
|
||||||
if (status != "SUCCESS") {
|
|
||||||
error "Downstream job did not succeed"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pipeline {
|
pipeline {
|
||||||
agent {
|
agent {
|
||||||
kubernetes createKubernetesPodConfig("agent")
|
kubernetes createKubernetesPodConfig("agent")
|
||||||
@ -240,7 +228,7 @@ pipeline {
|
|||||||
}
|
}
|
||||||
steps
|
steps
|
||||||
{
|
{
|
||||||
buildImage("trtllm", "push", "skip", "", LLM_BRANCH_TAG)
|
buildImage("trtllm", params.action, "skip", "", LLM_BRANCH_TAG)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("Build x86_64-skip") {
|
stage("Build x86_64-skip") {
|
||||||
@ -252,22 +240,13 @@ pipeline {
|
|||||||
buildImage("tritondevel", params.action, "skip")
|
buildImage("tritondevel", params.action, "skip")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("Build x86_64-pre_cxx11_abi") {
|
stage("Build trtllm release-sbsa") {
|
||||||
agent {
|
agent {
|
||||||
kubernetes createKubernetesPodConfig("build")
|
kubernetes createKubernetesPodConfig("build", "arm64")
|
||||||
}
|
}
|
||||||
steps
|
steps
|
||||||
{
|
{
|
||||||
buildImage("devel", params.action, "src_non_cxx11_abi")
|
buildImage("trtllm", params.action, "skip", "", LLM_BRANCH_TAG + "-sbsa", "", true)
|
||||||
}
|
|
||||||
}
|
|
||||||
stage("Build x86_64-cxx11_abi") {
|
|
||||||
agent {
|
|
||||||
kubernetes createKubernetesPodConfig("build")
|
|
||||||
}
|
|
||||||
steps
|
|
||||||
{
|
|
||||||
buildImage("devel", params.action, "src_cxx11_abi")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("Build rockylinux8 x86_64-skip-py3.10") {
|
stage("Build rockylinux8 x86_64-skip-py3.10") {
|
||||||
@ -290,29 +269,11 @@ pipeline {
|
|||||||
}
|
}
|
||||||
stage("Build SBSA-skip") {
|
stage("Build SBSA-skip") {
|
||||||
agent {
|
agent {
|
||||||
kubernetes createKubernetesPodConfig("agent")
|
kubernetes createKubernetesPodConfig("build", "arm64")
|
||||||
}
|
}
|
||||||
steps
|
steps
|
||||||
{
|
{
|
||||||
triggerSBSARemoteJob(params.action, "skip")
|
buildImage("tritondevel", params.action, "skip", "", "", "", true)
|
||||||
}
|
|
||||||
}
|
|
||||||
stage("Build SBSA-pre_cxx11_abi") {
|
|
||||||
agent {
|
|
||||||
kubernetes createKubernetesPodConfig("agent")
|
|
||||||
}
|
|
||||||
steps
|
|
||||||
{
|
|
||||||
triggerSBSARemoteJob(params.action, "src_non_cxx11_abi")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage("Build SBSA-cxx11_abi") {
|
|
||||||
agent {
|
|
||||||
kubernetes createKubernetesPodConfig("agent")
|
|
||||||
}
|
|
||||||
steps
|
|
||||||
{
|
|
||||||
triggerSBSARemoteJob(params.action, "src_cxx11_abi")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,123 +0,0 @@
|
|||||||
@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _
|
|
||||||
|
|
||||||
import java.lang.Exception
|
|
||||||
import groovy.transform.Field
|
|
||||||
|
|
||||||
// Docker image registry
|
|
||||||
DOCKER_IMAGE = "docker:dind"
|
|
||||||
IMAGE_NAME = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging"
|
|
||||||
|
|
||||||
// LLM repository configuration
|
|
||||||
withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LLM_REPO')]) {
|
|
||||||
LLM_REPO = env.gitlabSourceRepoHttpUrl ? env.gitlabSourceRepoHttpUrl : "${DEFAULT_LLM_REPO}"
|
|
||||||
}
|
|
||||||
LLM_ROOT = "llm"
|
|
||||||
|
|
||||||
def buildImage(action, type)
|
|
||||||
{
|
|
||||||
def branch = env.gitlabBranch
|
|
||||||
def branchTag = branch.replaceAll('/', '_')
|
|
||||||
def buildNumber = env.hostBuildNumber ? env.hostBuildNumber : BUILD_NUMBER
|
|
||||||
def stage_docker = "tritondevel"
|
|
||||||
def tag = "sbsa-${stage_docker}-torch_${type}-${branchTag}-${buildNumber}"
|
|
||||||
|
|
||||||
// Step 1: cloning tekit source code
|
|
||||||
// allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
|
|
||||||
stage('Prepare') {
|
|
||||||
echo "hostJobName: ${env.hostJobName}"
|
|
||||||
echo "hostBuildNumber: ${env.hostBuildNumber}"
|
|
||||||
echo "gitlabBranch: ${env.gitlabBranch}"
|
|
||||||
echo "action: ${env.action}"
|
|
||||||
echo "type: ${env.type}"
|
|
||||||
sh 'pwd'
|
|
||||||
sh 'ls -lah'
|
|
||||||
sh 'rm -rf ./*'
|
|
||||||
sh 'ls -lah'
|
|
||||||
}
|
|
||||||
|
|
||||||
trtllm_utils.checkoutSource(LLM_REPO, branch, LLM_ROOT, true, true)
|
|
||||||
|
|
||||||
// Step 2: building wheels in container
|
|
||||||
docker.image(DOCKER_IMAGE).inside('-v /var/run/docker.sock:/var/run/docker.sock --privileged') {
|
|
||||||
stage ("Install packages") {
|
|
||||||
sh "pwd && ls -alh"
|
|
||||||
sh "env"
|
|
||||||
sh "apk add make git"
|
|
||||||
sh "git config --global --add safe.directory '*'"
|
|
||||||
|
|
||||||
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
|
|
||||||
sh "docker login urm.nvidia.com -u ${USERNAME} -p ${PASSWORD}"
|
|
||||||
}
|
|
||||||
|
|
||||||
withCredentials([
|
|
||||||
usernamePassword(
|
|
||||||
credentialsId: "svc_tensorrt_gitlab_read_api_token",
|
|
||||||
usernameVariable: 'USERNAME',
|
|
||||||
passwordVariable: 'PASSWORD'
|
|
||||||
),
|
|
||||||
string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')
|
|
||||||
]) {
|
|
||||||
sh "docker login ${DEFAULT_GIT_URL}:5005 -u ${USERNAME} -p ${PASSWORD}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
containerGenFailure = null
|
|
||||||
// stage ("Generate Image") {
|
|
||||||
// retry(3)
|
|
||||||
// {
|
|
||||||
// sh "cd ${LLM_ROOT} && make -C docker release_build TORCH_INSTALL_TYPE=${type}" +
|
|
||||||
// " GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
stage ("Perform '${action}' action on image") {
|
|
||||||
retry(3)
|
|
||||||
{
|
|
||||||
sh """cd ${LLM_ROOT} && make -C docker ${stage_docker}_${action} \
|
|
||||||
IMAGE_NAME=${IMAGE_NAME} \
|
|
||||||
IMAGE_TAG=${tag} \
|
|
||||||
TORCH_INSTALL_TYPE=${type} \
|
|
||||||
GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception ex) {
|
|
||||||
containerGenFailure = ex
|
|
||||||
} finally {
|
|
||||||
stage ("Docker logout") {
|
|
||||||
withCredentials([string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')]) {
|
|
||||||
sh "docker logout urm.nvidia.com"
|
|
||||||
sh "docker logout ${DEFAULT_GIT_URL}:5005"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (containerGenFailure != null) {
|
|
||||||
throw containerGenFailure
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pipeline {
|
|
||||||
agent {
|
|
||||||
label 'sbsa-a100-80gb-pcie-x4||sbsa-gh200-480gb'
|
|
||||||
}
|
|
||||||
options {
|
|
||||||
// Check the valid options at: https://www.jenkins.io/doc/book/pipeline/syntax/
|
|
||||||
// some step like results analysis stage, does not need to check out source code
|
|
||||||
skipDefaultCheckout()
|
|
||||||
// to better analyze the time for each step/test
|
|
||||||
timestamps()
|
|
||||||
timeout(time: 24, unit: 'HOURS')
|
|
||||||
}
|
|
||||||
environment {
|
|
||||||
PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
|
|
||||||
}
|
|
||||||
stages {
|
|
||||||
stage("Build")
|
|
||||||
{
|
|
||||||
steps
|
|
||||||
{
|
|
||||||
buildImage(env.action, env.type)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // stages
|
|
||||||
} // pipeline
|
|
||||||
Loading…
Reference in New Issue
Block a user