mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
infra: [TRTLLM-5873] Use build stage wheels to speed up docker release image build (#4939)
Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>
This commit is contained in:
parent
7231134996
commit
c3729dbd7d
@ -128,8 +128,9 @@ ENV CCACHE_DIR=/root/.cache/ccache
|
||||
# Build the TRT-LLM wheel
|
||||
ARG GITHUB_MIRROR=""
|
||||
ARG BUILD_WHEEL_ARGS="--clean --benchmarks"
|
||||
ARG BUILD_WHEEL_SCRIPT="scripts/build_wheel.py"
|
||||
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=${CCACHE_DIR} \
|
||||
GITHUB_MIRROR=$GITHUB_MIRROR python3 scripts/build_wheel.py ${BUILD_WHEEL_ARGS}
|
||||
GITHUB_MIRROR=$GITHUB_MIRROR python3 ${BUILD_WHEEL_SCRIPT} ${BUILD_WHEEL_ARGS}
|
||||
|
||||
FROM ${DEVEL_IMAGE} AS release
|
||||
|
||||
|
||||
@ -39,6 +39,7 @@ PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || ec
|
||||
CUDA_ARCHS ?= $(if $(filter arm64,$(PLATFORM)),'90-real;100-real;120-real',)
|
||||
BUILD_WHEEL_OPTS ?=
|
||||
BUILD_WHEEL_ARGS ?= $(shell grep '^ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
|
||||
BUILD_WHEEL_SCRIPT ?=
|
||||
TORCH_INSTALL_TYPE ?= skip
|
||||
CUDA_VERSION ?=
|
||||
CUDNN_VERSION ?=
|
||||
@ -80,6 +81,7 @@ endef
|
||||
$(if $(BASE_IMAGE), --build-arg BASE_IMAGE=$(BASE_IMAGE)) \
|
||||
$(if $(BASE_TAG), --build-arg BASE_TAG=$(BASE_TAG)) \
|
||||
$(if $(BUILD_WHEEL_ARGS), --build-arg BUILD_WHEEL_ARGS="$(BUILD_WHEEL_ARGS)") \
|
||||
$(if $(BUILD_WHEEL_SCRIPT), --build-arg BUILD_WHEEL_SCRIPT="$(BUILD_WHEEL_SCRIPT)") \
|
||||
$(if $(TORCH_INSTALL_TYPE), --build-arg TORCH_INSTALL_TYPE="$(TORCH_INSTALL_TYPE)") \
|
||||
$(if $(CUDA_VERSION), --build-arg CUDA_VER="$(CUDA_VERSION)") \
|
||||
$(if $(CUDNN_VERSION), --build-arg CUDNN_VER="$(CUDNN_VERSION)") \
|
||||
|
||||
@ -460,6 +460,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
sh "mkdir -p TensorRT-LLM/benchmarks/cpp"
|
||||
sh "cp ${LLM_ROOT}/cpp/build/benchmarks/bertBenchmark TensorRT-LLM/benchmarks/cpp"
|
||||
sh "cp ${LLM_ROOT}/cpp/build/benchmarks/gptManagerBenchmark TensorRT-LLM/benchmarks/cpp"
|
||||
sh "cp ${LLM_ROOT}/cpp/build/benchmarks/disaggServerBenchmark TensorRT-LLM/benchmarks/cpp"
|
||||
sh "cp ${LLM_ROOT}/cpp/build/tensorrt_llm/libtensorrt_llm.so TensorRT-LLM/benchmarks/cpp"
|
||||
sh "cp ${LLM_ROOT}/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so TensorRT-LLM/benchmarks/cpp"
|
||||
|
||||
|
||||
@ -27,6 +27,9 @@ LLM_SHORT_COMMIT = env.gitlabCommit ? env.gitlabCommit.substring(0, 7) : "undefi
|
||||
LLM_DEFAULT_TAG = env.defaultTag ?: "${LLM_SHORT_COMMIT}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}"
|
||||
|
||||
RUN_SANITY_CHECK = params.runSanityCheck ?: false
|
||||
TRIGGER_TYPE = env.triggerType ?: "manual"
|
||||
|
||||
WAIT_TIME_FOR_BUILD_STAGE = 60 // minutes
|
||||
|
||||
BUILD_JOBS = "32"
|
||||
BUILD_JOBS_RELEASE_X86_64 = "32"
|
||||
@ -189,6 +192,27 @@ def createKubernetesPodConfig(type, arch = "amd64", build_wheel = false)
|
||||
}
|
||||
|
||||
|
||||
def prepareWheelFromBuildStage(dockerfileStage, arch) {
|
||||
if (TRIGGER_TYPE != "post-merge") {
|
||||
echo "Trigger type is not post-merge, skip preparing wheel from build stage"
|
||||
return ""
|
||||
}
|
||||
|
||||
if (!dockerfileStage || !arch) {
|
||||
echo "Error: dockerfileStage and arch are required parameters"
|
||||
return ""
|
||||
}
|
||||
|
||||
if (dockerfileStage != "release") {
|
||||
echo "prepareWheelFromBuildStage: ${dockerfileStage} is not release"
|
||||
return ""
|
||||
}
|
||||
|
||||
def wheelScript = 'scripts/get_wheel_from_package.py'
|
||||
def wheelArgs = "--arch ${arch} --timeout ${WAIT_TIME_FOR_BUILD_STAGE} --artifact_path " + env.uploadPath
|
||||
return " BUILD_WHEEL_SCRIPT=${wheelScript} BUILD_WHEEL_ARGS='${wheelArgs}'"
|
||||
}
|
||||
|
||||
def buildImage(config, imageKeyToTag)
|
||||
{
|
||||
def target = config.target
|
||||
@ -209,11 +233,15 @@ def buildImage(config, imageKeyToTag)
|
||||
def dependentImageWithTag = "${IMAGE_NAME}/${dependent.dockerfileStage}:${dependentTag}"
|
||||
def customImageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${customTag}"
|
||||
|
||||
if (target == "ngc-release" && params.triggerType == "post-merge") {
|
||||
echo "Use NGC artifacts for post merge build"
|
||||
dependentImageWithTag = "${NGC_IMAGE_NAME}:${dependentTag}"
|
||||
imageWithTag = "${NGC_IMAGE_NAME}:${tag}"
|
||||
customImageWithTag = "${NGC_IMAGE_NAME}:${customTag}"
|
||||
if (target == "ngc-release") {
|
||||
if (TRIGGER_TYPE == "post-merge") {
|
||||
echo "Use NGC artifacts for post merge build"
|
||||
dependentImageWithTag = "${NGC_IMAGE_NAME}:${dependentTag}"
|
||||
imageWithTag = "${NGC_IMAGE_NAME}:${tag}"
|
||||
customImageWithTag = "${NGC_IMAGE_NAME}:${customTag}"
|
||||
}
|
||||
imageKeyToTag["NGC Devel Image ${config.arch}"] = dependentImageWithTag
|
||||
imageKeyToTag["NGC Release Image ${config.arch}"] = imageWithTag
|
||||
}
|
||||
|
||||
args += " GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
|
||||
@ -274,6 +302,7 @@ def buildImage(config, imageKeyToTag)
|
||||
}
|
||||
}
|
||||
|
||||
args += prepareWheelFromBuildStage(dockerfileStage, arch)
|
||||
// Avoid the frequency of OOM issue when building the wheel
|
||||
if (target == "trtllm") {
|
||||
if (arch == "x86_64") {
|
||||
@ -420,8 +449,8 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
|
||||
} catch (InterruptedException e) {
|
||||
throw e
|
||||
} catch (Exception e) {
|
||||
echo "Build ${key} failed."
|
||||
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
|
||||
echo "Build ${key} failed."
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
112
scripts/get_wheel_from_package.py
Normal file
112
scripts/get_wheel_from_package.py
Normal file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_project_dir():
|
||||
return Path(__file__).parent.resolve().parent
|
||||
|
||||
|
||||
def add_arguments(parser: ArgumentParser):
|
||||
parser.add_argument("--arch",
|
||||
"-a",
|
||||
required=True,
|
||||
help="Architecture of the built package")
|
||||
parser.add_argument("--artifact_path",
|
||||
"-u",
|
||||
required=True,
|
||||
help="the path of the built package")
|
||||
parser.add_argument("--timeout",
|
||||
"-t",
|
||||
type=int,
|
||||
default=60,
|
||||
help="Timeout in minutes")
|
||||
|
||||
|
||||
def get_wheel_from_package(arch, artifact_path, timeout):
|
||||
if arch == "x86_64":
|
||||
tarfile_name = "TensorRT-LLM.tar.gz"
|
||||
else:
|
||||
tarfile_name = "TensorRT-LLM-GH200.tar.gz"
|
||||
|
||||
tarfile_link = f"https://urm.nvidia.com/artifactory/{artifact_path}/{tarfile_name}"
|
||||
for attempt in range(timeout):
|
||||
try:
|
||||
subprocess.run(["wget", "-nv", tarfile_link], check=True)
|
||||
print(f"Tarfile is available at {tarfile_link}")
|
||||
break
|
||||
except Exception:
|
||||
if attempt == timeout - 1:
|
||||
raise TimeoutError(
|
||||
f"Failed to download file after {timeout} attempts: {tarfile_link}"
|
||||
)
|
||||
print(
|
||||
f"Tarfile not ready yet, waiting 60 seconds... (attempt {attempt + 1}/{timeout})"
|
||||
)
|
||||
time.sleep(60)
|
||||
|
||||
llm_root = get_project_dir()
|
||||
tmp_dir = llm_root / "tmp"
|
||||
tmp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
subprocess.run(["tar", "-zxf", tarfile_name, "-C",
|
||||
str(tmp_dir)],
|
||||
check=True)
|
||||
|
||||
tmp_dir = tmp_dir / "TensorRT-LLM"
|
||||
|
||||
build_dir = llm_root / "build"
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
benchmarks_dir = llm_root / "cpp" / "build" / "benchmarks"
|
||||
benchmarks_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
wheel_files = glob.glob(str(tmp_dir / "tensorrt_llm*.whl"))
|
||||
for wheel_file in wheel_files:
|
||||
shutil.move(wheel_file, str(build_dir))
|
||||
print(f"Moved wheel file: {wheel_file} -> {build_dir}")
|
||||
|
||||
benchmark_files = [
|
||||
"bertBenchmark", "gptManagerBenchmark", "disaggServerBenchmark"
|
||||
]
|
||||
|
||||
for benchmark in benchmark_files:
|
||||
src_path = tmp_dir / "benchmarks" / "cpp" / benchmark
|
||||
if src_path.exists():
|
||||
dst_path = benchmarks_dir / benchmark
|
||||
shutil.copy2(src_path, dst_path)
|
||||
print(f"Copied benchmark file: {src_path} -> {dst_path}")
|
||||
else:
|
||||
print(f"Warning: Benchmark file not found: {src_path}")
|
||||
|
||||
shutil.rmtree(tmp_dir)
|
||||
|
||||
if os.path.exists(tarfile_name):
|
||||
os.remove(tarfile_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser()
|
||||
add_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
get_wheel_from_package(**vars(args))
|
||||
Loading…
Reference in New Issue
Block a user