mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[ROCm][CI] Optimize ROCm Docker build: registry cache, DeepEP, and ci-bake script (#36949)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -3,12 +3,16 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
# Script to install TorchCodec from source (required for ROCm compatibility)
|
||||
# The PyPI wheel is built against upstream PyTorch and has ABI mismatches with
|
||||
# ROCm's custom torch build, so we must compile from source.
|
||||
|
||||
set -e
|
||||
|
||||
TORCHCODEC_REPO="${TORCHCODEC_REPO:-https://github.com/pytorch/torchcodec.git}"
|
||||
# Pin to a specific release for reproducibility; update as needed.
|
||||
TORCHCODEC_BRANCH="${TORCHCODEC_BRANCH:-v0.10.0}"
|
||||
# Cache directory for pre-built wheels to avoid redundant recompilation.
|
||||
TORCHCODEC_WHEEL_CACHE="${TORCHCODEC_WHEEL_CACHE:-/root/.cache/torchcodec-wheels}"
|
||||
|
||||
echo "=== TorchCodec Installation Script ==="
|
||||
|
||||
@@ -18,9 +22,26 @@ if python3 -c "from torchcodec.decoders import VideoDecoder" 2>/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Try to install from cached wheel first
|
||||
ARCH_TAG="${PYTORCH_ROCM_ARCH:-all}"
|
||||
# Normalize arch tag (replace ; with _) for use in filename
|
||||
ARCH_TAG="${ARCH_TAG//;/_}"
|
||||
CACHED_WHEEL="${TORCHCODEC_WHEEL_CACHE}/torchcodec-${TORCHCODEC_BRANCH}-${ARCH_TAG}.whl"
|
||||
|
||||
if [ -f "$CACHED_WHEEL" ]; then
|
||||
echo "Found cached wheel: $CACHED_WHEEL"
|
||||
pip install "$CACHED_WHEEL" && {
|
||||
echo "Installed from cached wheel."
|
||||
echo "=== TorchCodec installation complete ==="
|
||||
exit 0
|
||||
}
|
||||
echo "Cached wheel installation failed, rebuilding from source..."
|
||||
fi
|
||||
|
||||
echo "TorchCodec not found. Installing from source..."
|
||||
|
||||
# Install system dependencies (FFmpeg + pkg-config)
|
||||
# Install system dependencies (FFmpeg + pkg-config) if not already present.
|
||||
# The Docker test image pre-installs these, so this is a fallback for other envs.
|
||||
install_system_deps() {
|
||||
if command -v apt-get &> /dev/null; then
|
||||
echo "Installing system dependencies..."
|
||||
@@ -56,6 +77,12 @@ export pybind11_DIR=$(python3 -c "import pybind11; print(pybind11.get_cmake_dir(
|
||||
export CMAKE_PREFIX_PATH="${pybind11_DIR}:${CMAKE_PREFIX_PATH}"
|
||||
echo "pybind11_DIR set to: $pybind11_DIR"
|
||||
|
||||
# Limit GPU architectures to only what this image targets.
|
||||
# The default builds for all supported archs which is very slow.
|
||||
if [ -n "$PYTORCH_ROCM_ARCH" ]; then
|
||||
echo "Building for PYTORCH_ROCM_ARCH=$PYTORCH_ROCM_ARCH"
|
||||
fi
|
||||
|
||||
# Create temp directory for build
|
||||
BUILD_DIR=$(mktemp -d -t torchcodec-XXXXXX)
|
||||
echo "Building in temporary directory: $BUILD_DIR"
|
||||
@@ -77,9 +104,31 @@ cd torchcodec
|
||||
export TORCHCODEC_CMAKE_BUILD_DIR="${PWD}/build"
|
||||
export TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR=1
|
||||
export I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION=1
|
||||
# Use ninja for faster builds and parallelize compilation
|
||||
export CMAKE_GENERATOR=Ninja
|
||||
export MAX_JOBS="${MAX_JOBS:-$(nproc)}"
|
||||
# Use ccache if available to speed up recompilation
|
||||
if command -v ccache &> /dev/null; then
|
||||
export CMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
fi
|
||||
|
||||
echo "Building TorchCodec..."
|
||||
pip install . --no-build-isolation
|
||||
echo "Building TorchCodec (MAX_JOBS=$MAX_JOBS)..."
|
||||
pip wheel . --no-build-isolation --no-deps -w "$BUILD_DIR/dist"
|
||||
|
||||
# Install the built wheel
|
||||
BUILT_WHEEL=$(ls "$BUILD_DIR/dist"/torchcodec-*.whl 2>/dev/null | head -1)
|
||||
if [ -z "$BUILT_WHEEL" ]; then
|
||||
echo "Error: No wheel produced"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pip install "$BUILT_WHEEL"
|
||||
|
||||
# Cache the wheel for future runs
|
||||
mkdir -p "$TORCHCODEC_WHEEL_CACHE"
|
||||
cp "$BUILT_WHEEL" "$CACHED_WHEEL"
|
||||
echo "Cached wheel to: $CACHED_WHEEL"
|
||||
|
||||
# Verify installation
|
||||
echo "Verifying installation..."
|
||||
@@ -88,4 +137,4 @@ if python3 -c "from torchcodec.decoders import VideoDecoder; print('TorchCodec i
|
||||
else
|
||||
echo "Error: TorchCodec installation failed verification"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user