mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[ROCm][CI] Optimize ROCm Docker build: registry cache, DeepEP, and ci-bake script (#36949)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
name: vllm_rocm_ci
|
||||
job_dirs:
|
||||
- ".buildkite/hardware_tests"
|
||||
run_all_patterns:
|
||||
- "docker/Dockerfile.rocm"
|
||||
- "docker/Dockerfile.rocm_base"
|
||||
- "docker/ci-rocm.hcl"
|
||||
- "docker/docker-bake-rocm.hcl"
|
||||
- ".buildkite/hardware_tests/amd.yaml"
|
||||
- ".buildkite/scripts/ci-bake-rocm.sh"
|
||||
- ".buildkite/scripts/hardware_ci/run-amd-test.py"
|
||||
- ".buildkite/scripts/hardware_ci/run-amd-test.sh"
|
||||
- "CMakeLists.txt"
|
||||
- "requirements/common.txt"
|
||||
- "requirements/rocm.txt"
|
||||
- "requirements/build/rocm.txt"
|
||||
- "requirements/test/rocm.txt"
|
||||
- "setup.py"
|
||||
- "csrc/"
|
||||
- "cmake/"
|
||||
run_all_exclude_patterns:
|
||||
- "csrc/cpu/"
|
||||
- "cmake/cpu_extension.cmake"
|
||||
@@ -1,42 +1,73 @@
|
||||
group: Hardware - AMD Build
|
||||
group: Hardware - AMD Build
|
||||
steps:
|
||||
- label: "AMD: :docker: build image"
|
||||
key: image-build-amd
|
||||
# Ensure ci_base is up-to-date before building the test image.
|
||||
# Compares a content hash of ci_base-affecting files against the remote
|
||||
# image label. If hashes match the build is skipped (< 30 s); if they
|
||||
# differ ci_base is rebuilt and pushed automatically.
|
||||
- label: "AMD: :docker: ensure ci_base"
|
||||
key: ensure-ci-base-amd
|
||||
depends_on: []
|
||||
device: amd_cpu
|
||||
no_plugin: true
|
||||
commands:
|
||||
- >
|
||||
docker build
|
||||
--build-arg max_jobs=16
|
||||
--build-arg REMOTE_VLLM=1
|
||||
--build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942;gfx950'
|
||||
--build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
|
||||
--tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
|
||||
-f docker/Dockerfile.rocm
|
||||
--target test
|
||||
--no-cache
|
||||
--progress plain .
|
||||
- |
|
||||
docker run --rm --network=none --entrypoint /bin/bash "rocm/vllm-ci:${BUILDKITE_COMMIT}" -ec '
|
||||
if [ ! -d /vllm-workspace ]; then echo Missing directory: /vllm-workspace >&2; exit 1; fi
|
||||
if [ ! -d /vllm-workspace/tests ]; then echo Missing directory: /vllm-workspace/tests >&2; exit 1; fi
|
||||
if [ ! -d /vllm-workspace/src/vllm ]; then echo Missing directory: /vllm-workspace/src/vllm >&2; exit 1; fi
|
||||
if [ ! -x /vllm-workspace/src/vllm/vllm-rs ]; then echo Missing executable: /vllm-workspace/src/vllm/vllm-rs >&2; exit 1; fi
|
||||
command -v python3
|
||||
command -v uv
|
||||
command -v pytest
|
||||
if ! command -v amd-smi >/dev/null 2>&1 && ! command -v rocminfo >/dev/null 2>&1; then
|
||||
echo No ROCm CLI found in image >&2
|
||||
exit 1
|
||||
fi
|
||||
python3 - <<PY
|
||||
import torch, vllm
|
||||
print(torch.__version__)
|
||||
print(vllm.__version__)
|
||||
PY
|
||||
echo AMD image smoke OK
|
||||
'
|
||||
- docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
|
||||
- bash .buildkite/scripts/ci-bake-rocm.sh ci-base-rocm-ci-with-deps
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
VLLM_BAKE_FILE: "docker/docker-bake-rocm.hcl"
|
||||
PYTORCH_ROCM_ARCH: "gfx90a;gfx942;gfx950"
|
||||
REMOTE_VLLM: "1"
|
||||
VLLM_BRANCH: "$BUILDKITE_COMMIT"
|
||||
retry:
|
||||
automatic:
|
||||
- exit_status: -1 # Agent was lost
|
||||
limit: 1
|
||||
- exit_status: -10 # Agent was lost
|
||||
limit: 1
|
||||
|
||||
- label: "AMD: :docker: build test image and artifacts"
|
||||
key: image-build-amd
|
||||
depends_on:
|
||||
- ensure-ci-base-amd
|
||||
device: amd_cpu
|
||||
no_plugin: true
|
||||
commands:
|
||||
- |
|
||||
if [[ "${ROCM_CI_ARTIFACT_ONLY:-0}" == "1" ]]; then
|
||||
echo "ROCM_CI_ARTIFACT_ONLY=1; building ROCm wheel artifact only"
|
||||
IMAGE_TAG="" bash .buildkite/scripts/ci-bake-rocm.sh test-rocm-ci-with-artifacts
|
||||
else
|
||||
bash .buildkite/scripts/ci-bake-rocm.sh test-rocm-ci-with-wheel
|
||||
fi
|
||||
- |
|
||||
docker run --rm --network=none --entrypoint /bin/bash "rocm/vllm-ci:${BUILDKITE_COMMIT}" -ec '
|
||||
if [ ! -d /vllm-workspace ]; then echo Missing directory: /vllm-workspace >&2; exit 1; fi
|
||||
if [ ! -d /vllm-workspace/tests ]; then echo Missing directory: /vllm-workspace/tests >&2; exit 1; fi
|
||||
if [ ! -d /vllm-workspace/src/vllm ]; then echo Missing directory: /vllm-workspace/src/vllm >&2; exit 1; fi
|
||||
if [ ! -x /vllm-workspace/src/vllm/vllm-rs ]; then echo Missing executable: /vllm-workspace/src/vllm/vllm-rs >&2; exit 1; fi
|
||||
command -v python3
|
||||
command -v uv
|
||||
command -v pytest
|
||||
if ! command -v amd-smi >/dev/null 2>&1 && ! command -v rocminfo >/dev/null 2>&1; then
|
||||
echo No ROCm CLI found in image >&2
|
||||
exit 1
|
||||
fi
|
||||
python3 - <<PY
|
||||
import torch, vllm
|
||||
print(torch.__version__)
|
||||
print(vllm.__version__)
|
||||
PY
|
||||
echo AMD image smoke OK
|
||||
'
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
VLLM_BAKE_FILE: "docker/docker-bake-rocm.hcl"
|
||||
PYTORCH_ROCM_ARCH: "gfx90a;gfx942;gfx950"
|
||||
IMAGE_TAG: "rocm/vllm-ci:$BUILDKITE_COMMIT"
|
||||
REMOTE_VLLM: "1"
|
||||
VLLM_BRANCH: "$BUILDKITE_COMMIT"
|
||||
retry:
|
||||
automatic:
|
||||
- exit_status: -1 # Agent was lost
|
||||
limit: 1
|
||||
- exit_status: -10 # Agent was lost
|
||||
limit: 1
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -52,6 +52,108 @@ cleanup_network() {
|
||||
fi
|
||||
}
|
||||
|
||||
prepare_artifact_image() {
|
||||
if [[ "${VLLM_CI_USE_ARTIFACTS:-0}" != "1" ]]; then
|
||||
return 1
|
||||
fi
|
||||
if ! command -v buildkite-agent >/dev/null 2>&1; then
|
||||
echo "buildkite-agent not found; cannot download ROCm wheel artifact"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local artifact_glob="${VLLM_CI_ARTIFACT_GLOB:-artifacts/vllm-rocm-install/vllm-rocm-install.tar.gz}"
|
||||
local archive=""
|
||||
local metadata_file=""
|
||||
local base_image="${VLLM_CI_BASE_IMAGE:-rocm/vllm-dev:ci_base}"
|
||||
local artifact_image=""
|
||||
local artifact_key=""
|
||||
local base_digest=""
|
||||
local wheel_dir=""
|
||||
local context_dir=""
|
||||
local workspace_dir=""
|
||||
|
||||
artifact_work_dir=$(mktemp -d -t vllm-rocm-artifact.XXXXXX)
|
||||
wheel_dir="${artifact_work_dir}/wheels"
|
||||
context_dir="${artifact_work_dir}/context"
|
||||
workspace_dir="${context_dir}/workspace"
|
||||
mkdir -p "${wheel_dir}" "${context_dir}/wheels" "${workspace_dir}"
|
||||
|
||||
echo "--- Downloading ROCm wheel artifact"
|
||||
if ! buildkite-agent artifact download "${artifact_glob}" "${artifact_work_dir}"; then
|
||||
echo "Failed to download ${artifact_glob}"
|
||||
return 1
|
||||
fi
|
||||
buildkite-agent artifact download \
|
||||
"artifacts/vllm-rocm-install/ci-base-image.txt" \
|
||||
"${artifact_work_dir}" >/dev/null 2>&1 || true
|
||||
|
||||
archive=$(find "${artifact_work_dir}" -name "vllm-rocm-install.tar.gz" -type f | head -1)
|
||||
if [[ -z "${archive}" || ! -f "${archive}" ]]; then
|
||||
echo "ROCm wheel artifact archive was not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
metadata_file=$(find "${artifact_work_dir}" -name "ci-base-image.txt" -type f | head -1)
|
||||
if [[ -n "${metadata_file}" && -s "${metadata_file}" ]]; then
|
||||
base_image=$(tr -d '[:space:]' < "${metadata_file}")
|
||||
fi
|
||||
|
||||
echo "--- Preparing local ROCm test image"
|
||||
echo "Base image: ${base_image}"
|
||||
docker pull "${base_image}" || return 1
|
||||
base_digest=$(
|
||||
docker image inspect \
|
||||
--format='{{if .RepoDigests}}{{index .RepoDigests 0}}{{else}}{{.Id}}{{end}}' \
|
||||
"${base_image}" 2>/dev/null || printf '%s' "${base_image}"
|
||||
)
|
||||
|
||||
artifact_key=$(
|
||||
{
|
||||
printf 'base-image:%s\n' "${base_digest}"
|
||||
sha256sum "${archive}"
|
||||
} | sha256sum | cut -c1-24
|
||||
)
|
||||
artifact_image="rocm/vllm-ci-artifact:${artifact_key}"
|
||||
|
||||
if docker image inspect "${artifact_image}" >/dev/null 2>&1; then
|
||||
echo "Using existing local ROCm artifact image: ${artifact_image}"
|
||||
image_name="${artifact_image}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
tar -xzf "${archive}" -C "${wheel_dir}" || return 1
|
||||
if ! ls "${wheel_dir}"/*.whl >/dev/null 2>&1; then
|
||||
echo "ROCm wheel artifact did not contain a wheel"
|
||||
return 1
|
||||
fi
|
||||
if [[ ! -d "${wheel_dir}/tests" ]]; then
|
||||
echo "ROCm wheel artifact did not contain the test workspace"
|
||||
return 1
|
||||
fi
|
||||
|
||||
cp "${wheel_dir}"/*.whl "${context_dir}/wheels/" || return 1
|
||||
tar -C "${wheel_dir}" --exclude='*.whl' -cf - . \
|
||||
| tar -C "${workspace_dir}" -xf - || return 1
|
||||
cat > "${context_dir}/Dockerfile" <<'EOF'
|
||||
ARG BASE_IMAGE
|
||||
FROM ${BASE_IMAGE}
|
||||
COPY wheels/ /tmp/vllm-wheels/
|
||||
COPY workspace/ /vllm-workspace/
|
||||
RUN python3 -m pip install --no-deps --force-reinstall /tmp/vllm-wheels/*.whl \
|
||||
&& rm -rf /tmp/vllm-wheels
|
||||
WORKDIR /vllm-workspace
|
||||
EOF
|
||||
|
||||
echo "--- Building local ROCm test image"
|
||||
docker build \
|
||||
--pull=false \
|
||||
--build-arg "BASE_IMAGE=${base_image}" \
|
||||
-t "${artifact_image}" \
|
||||
"${context_dir}" || return 1
|
||||
image_name="${artifact_image}"
|
||||
return 0
|
||||
}
|
||||
|
||||
is_multi_node() {
|
||||
local cmds="$1"
|
||||
# Primary signal: NUM_NODES environment variable set by the pipeline
|
||||
@@ -243,22 +345,30 @@ report_docker_usage
|
||||
|
||||
# --- Pull test image ---
|
||||
echo "--- Pulling container"
|
||||
image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"
|
||||
image_name="${VLLM_CI_FALLBACK_IMAGE:-rocm/vllm-ci:${BUILDKITE_COMMIT:-local}}"
|
||||
artifact_work_dir=""
|
||||
container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
|
||||
docker pull "${image_name}"
|
||||
|
||||
remove_docker_container() {
|
||||
# docker run uses --rm, so the container is normally already gone when the
|
||||
# EXIT trap runs. Cleanup is best-effort and must not affect the test result.
|
||||
docker rm -f "${container_name}" >/dev/null 2>&1 || true
|
||||
if docker container inspect "${container_name}" >/dev/null 2>&1; then
|
||||
docker rm -f "${container_name}" || true
|
||||
fi
|
||||
if [[ "${VLLM_CI_REMOVE_TEST_IMAGE:-0}" == "1" ]]; then
|
||||
docker image rm -f "${image_name}" || true
|
||||
else
|
||||
# Keep images by default so later jobs on the same AMD node can reuse layers.
|
||||
echo "Keeping ROCm test image locally: ${image_name}"
|
||||
fi
|
||||
if [[ -n "${artifact_work_dir}" ]]; then
|
||||
rm -rf "${artifact_work_dir}"
|
||||
fi
|
||||
}
|
||||
trap remove_docker_container EXIT
|
||||
|
||||
on_exit() {
|
||||
local exit_code=$?
|
||||
remove_docker_container
|
||||
exit "$exit_code"
|
||||
}
|
||||
trap on_exit EXIT
|
||||
if ! prepare_artifact_image; then
|
||||
echo "Using full ROCm CI image: ${image_name}"
|
||||
docker pull "${image_name}" || exit 1
|
||||
fi
|
||||
|
||||
# --- Prepare commands ---
|
||||
echo "--- Running container"
|
||||
|
||||
@@ -33,3 +33,10 @@ share/python-wheels/
|
||||
*.egg
|
||||
MANIFEST
|
||||
rust/target/
|
||||
# Not needed in Docker builds
|
||||
docs/
|
||||
.github/
|
||||
.pre-commit-config.yaml
|
||||
.clang-format
|
||||
.gitattributes
|
||||
format.sh
|
||||
|
||||
@@ -81,6 +81,14 @@ function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
|
||||
set_property(GLOBAL APPEND PROPERTY VLLM_HIPIFY_ALL_SRCS ${SRCS})
|
||||
set_property(GLOBAL APPEND PROPERTY VLLM_HIPIFY_ALL_BYPRODUCTS ${HIP_SRCS})
|
||||
|
||||
# Chain hipify targets so they run sequentially. Parallel hipify
|
||||
# invocations race on shutil.copytree, overwriting .hip files
|
||||
# produced by another target back to .cu originals.
|
||||
if (DEFINED _VLLM_LAST_HIPIFY_TARGET)
|
||||
add_dependencies(hipify${NAME} ${_VLLM_LAST_HIPIFY_TARGET})
|
||||
endif()
|
||||
set(_VLLM_LAST_HIPIFY_TARGET "hipify${NAME}" PARENT_SCOPE)
|
||||
|
||||
# Swap out original extension sources with hipified sources.
|
||||
list(APPEND HIP_SRCS ${CXX_SRCS})
|
||||
set(${OUT_SRCS} ${HIP_SRCS} PARENT_SCOPE)
|
||||
|
||||
+200
-108
@@ -2,6 +2,7 @@
|
||||
ARG REMOTE_VLLM="0"
|
||||
ARG COMMON_WORKDIR=/app
|
||||
ARG BASE_IMAGE=rocm/vllm-dev:base
|
||||
ARG CI_BASE_IMAGE=rocm/vllm-dev:ci_base
|
||||
# NIC backend for MoRI RDMA support.
|
||||
# By default (all), drivers and userspace libraries for all supported NIC types
|
||||
# (ainic and bnxt) are installed; MoRI selects the appropriate one at runtime.
|
||||
@@ -16,7 +17,8 @@ ARG NIC_BACKEND=all
|
||||
ARG AINIC_VERSION=1.117.3-hydra
|
||||
ARG UBUNTU_CODENAME=jammy
|
||||
|
||||
# Sccache configuration (only used in release pipeline)
|
||||
# Sccache configuration. Release builds use this today; CI can opt in when a
|
||||
# shared S3-compatible cache backend is available.
|
||||
ARG USE_SCCACHE
|
||||
ARG SCCACHE_DOWNLOAD_URL
|
||||
ARG SCCACHE_ENDPOINT
|
||||
@@ -29,12 +31,16 @@ FROM ${BASE_IMAGE} AS base
|
||||
ARG ARG_PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
|
||||
|
||||
# Install some basic utilities
|
||||
# Install build dependencies and utilities
|
||||
RUN apt-get update -q -y && apt-get install -q -y \
|
||||
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
|
||||
apt-transport-https ca-certificates wget curl \
|
||||
libnuma-dev
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
libnuma-dev ccache mold
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
python3 -m pip install --upgrade pip
|
||||
# Note: mold is installed but not set as the system default linker because
|
||||
# some packages use JIT compilation at runtime with flags mold does not support.
|
||||
# Build stages opt in via LDFLAGS="-fuse-ld=mold".
|
||||
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
|
||||
ARG USE_SCCACHE
|
||||
RUN if [ "$USE_SCCACHE" != "1" ]; then \
|
||||
@@ -55,6 +61,12 @@ ENV UV_HTTP_TIMEOUT=500
|
||||
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||
ENV UV_LINK_MODE=copy
|
||||
# ccache directory - persisted across layer rebuilds via cache mounts.
|
||||
ENV CCACHE_DIR=/root/.cache/ccache
|
||||
ENV CCACHE_COMPILERCHECK=content
|
||||
# Empty by default so build steps fall back to $(nproc); CI can override.
|
||||
ARG max_jobs
|
||||
ENV MAX_JOBS=${max_jobs}
|
||||
|
||||
# Install sccache if USE_SCCACHE is enabled (for release builds)
|
||||
ARG USE_SCCACHE
|
||||
@@ -86,6 +98,7 @@ RUN if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
ARG USE_SCCACHE
|
||||
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
|
||||
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
|
||||
ENV SCCACHE_ENDPOINT=${USE_SCCACHE:+${SCCACHE_ENDPOINT}}
|
||||
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
|
||||
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
|
||||
|
||||
@@ -114,8 +127,7 @@ FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm
|
||||
# -----------------------
|
||||
# Rust build stage
|
||||
# Builds the `vllm-rs` frontend in a dedicated stage so the wheel build stages
|
||||
# don't need the rust toolchain or protoc. Runs in parallel with the main wheel
|
||||
# build for faster end-to-end builds.
|
||||
# don't need the rust toolchain or protoc.
|
||||
FROM fetch_vllm AS rust-build
|
||||
ARG COMMON_WORKDIR
|
||||
|
||||
@@ -144,24 +156,74 @@ ENV RUSTUP_MAX_RETRIES=10
|
||||
# layer for later COPY --from=rust-build.
|
||||
RUN --mount=type=cache,id=vllm-rocm-cargo-registry,target=/root/.cargo/registry,sharing=locked \
|
||||
--mount=type=cache,id=vllm-rocm-cargo-git,target=/root/.cargo/git,sharing=locked \
|
||||
--mount=type=cache,id=vllm-rocm-cargo-target,target=${COMMON_WORKDIR}/vllm/rust/target,sharing=locked \
|
||||
cd ${COMMON_WORKDIR}/vllm \
|
||||
&& VLLM_RS_TARGET_PATH=/tmp/vllm-rs bash build_rust.sh \
|
||||
&& test -x /tmp/vllm-rs
|
||||
|
||||
# -----------------------
|
||||
# vLLM build stages
|
||||
# vLLM native build stages
|
||||
#
|
||||
# csrc-build intentionally copies only files that affect ROCm native extension
|
||||
# compilation. That keeps unrelated CI/test/docs edits from invalidating the
|
||||
# expensive HIP/C++ build layer.
|
||||
FROM base AS csrc-build
|
||||
ARG COMMON_WORKDIR
|
||||
WORKDIR ${COMMON_WORKDIR}/vllm
|
||||
|
||||
COPY requirements/rocm.txt requirements/rocm.txt
|
||||
COPY requirements/common.txt requirements/common.txt
|
||||
RUN --mount=type=cache,id=vllm-rocm-uv,target=/root/.cache/uv \
|
||||
uv pip install --system -r requirements/rocm.txt
|
||||
|
||||
# pyproject.toml is bind-mounted in the RUN step so metadata-only changes do
|
||||
# not invalidate the expensive native build layer.
|
||||
COPY setup.py CMakeLists.txt ./
|
||||
COPY cmake cmake/
|
||||
COPY csrc csrc/
|
||||
COPY vllm/envs.py vllm/envs.py
|
||||
COPY vllm/__init__.py vllm/__init__.py
|
||||
|
||||
ENV VLLM_TARGET_DEVICE=rocm
|
||||
ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+rocm.csrc.build"
|
||||
|
||||
RUN --mount=type=bind,source=pyproject.toml,target=${COMMON_WORKDIR}/vllm/pyproject.toml \
|
||||
--mount=type=cache,id=vllm-rocm-ccache,target=/root/.cache/ccache \
|
||||
export CCACHE_BASEDIR="$PWD" \
|
||||
&& echo "=== ccache stats before ROCm native build ===" \
|
||||
&& (ccache --show-stats || true) \
|
||||
&& (ccache --zero-stats || true) \
|
||||
&& EFFECTIVE_MAX_JOBS="${MAX_JOBS:-$(nproc)}" \
|
||||
&& echo "Building ROCm native extension wheel with MAX_JOBS=${EFFECTIVE_MAX_JOBS}" \
|
||||
&& LDFLAGS="-fuse-ld=mold" MAX_JOBS="${EFFECTIVE_MAX_JOBS}" python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& test -d dist \
|
||||
&& ls dist/*.whl >/dev/null \
|
||||
&& echo "=== ccache stats after ROCm native build ===" \
|
||||
&& (ccache --show-stats || true)
|
||||
|
||||
# Build the full vLLM ROCm wheel by reusing the native extension wheel from
|
||||
# csrc-build. This stage still rebuilds for Python/package changes, but skips
|
||||
# the expensive HIP/C++ compile when native inputs are unchanged.
|
||||
FROM fetch_vllm AS build_vllm
|
||||
ARG COMMON_WORKDIR
|
||||
ENV VLLM_TARGET_DEVICE=rocm
|
||||
|
||||
COPY --from=csrc-build ${COMMON_WORKDIR}/vllm/dist /precompiled-wheels
|
||||
|
||||
# Drop the pre-built rust frontend binary into the source tree. setup.py
|
||||
# detects it and ships it as-is, skipping the local cargo build.
|
||||
COPY --from=rust-build /tmp/vllm-rs ${COMMON_WORKDIR}/vllm/vllm/vllm-rs
|
||||
|
||||
# Build vLLM (setup.py auto-detects sccache in PATH)
|
||||
RUN cd vllm \
|
||||
&& python3 -m pip install -r requirements/rocm.txt \
|
||||
&& python3 setup.py clean --all \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist
|
||||
RUN --mount=type=cache,id=vllm-rocm-uv,target=/root/.cache/uv \
|
||||
cd vllm \
|
||||
&& uv pip install --system -r requirements/rocm.txt \
|
||||
&& export VLLM_USE_PRECOMPILED=1 \
|
||||
&& export VLLM_PRECOMPILED_WHEEL_LOCATION="$(ls /precompiled-wheels/*.whl)" \
|
||||
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
|
||||
&& echo "Packaging vLLM ROCm wheel using precompiled extensions from ${VLLM_PRECOMPILED_WHEEL_LOCATION}" \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& test -d dist \
|
||||
&& ls dist/*.whl >/dev/null
|
||||
FROM scratch AS export_vllm
|
||||
ARG COMMON_WORKDIR
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/dist/*.whl /
|
||||
@@ -171,6 +233,7 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/pyproject.toml /pyproject.toml
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
|
||||
|
||||
# RIXL/UCX build stages
|
||||
@@ -201,14 +264,17 @@ RUN apt-get -y update && apt-get -y install autoconf libtool pkg-config \
|
||||
ibverbs-providers \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN uv pip install --system meson auditwheel patchelf tomlkit
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system meson auditwheel patchelf tomlkit
|
||||
|
||||
RUN cd /usr/local/src && \
|
||||
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
cd /usr/local/src && \
|
||||
git clone ${UCX_REPO} && \
|
||||
cd ucx && \
|
||||
git checkout ${UCX_BRANCH} && \
|
||||
./autogen.sh && \
|
||||
mkdir build && cd build && \
|
||||
CC="ccache gcc" CXX="ccache g++" \
|
||||
../configure \
|
||||
--prefix=/usr/local/ucx \
|
||||
--enable-shared \
|
||||
@@ -220,20 +286,22 @@ RUN cd /usr/local/src && \
|
||||
--with-verbs \
|
||||
--with-dm \
|
||||
--enable-mt && \
|
||||
make -j && \
|
||||
make -j$(nproc) && \
|
||||
make install
|
||||
|
||||
ENV PATH=/usr/local/ucx/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=${UCX_HOME}/lib:${LD_LIBRARY_PATH}
|
||||
|
||||
RUN git clone ${RIXL_REPO} /opt/rixl && \
|
||||
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
git clone ${RIXL_REPO} /opt/rixl && \
|
||||
cd /opt/rixl && \
|
||||
git checkout ${RIXL_BRANCH} && \
|
||||
CC="ccache gcc" CXX="ccache g++" \
|
||||
meson setup build --prefix=${RIXL_HOME} \
|
||||
-Ducx_path=${UCX_HOME} \
|
||||
-Drocm_path=${ROCM_PATH} && \
|
||||
cd build && \
|
||||
ninja && \
|
||||
ninja -j$(nproc) && \
|
||||
ninja install
|
||||
|
||||
# Generate RIXL wheel
|
||||
@@ -250,30 +318,44 @@ RUN cd /opt/rixl && \
|
||||
--ucx-plugins-dir ${UCX_HOME}/lib/ucx \
|
||||
--nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins
|
||||
|
||||
# DeepEP build stage
|
||||
FROM base AS build_deep
|
||||
# ROCShmem build stage - split from DeepEP so changing DEEPEP_BRANCH does not
|
||||
# invalidate the slow ROCShmem build.
|
||||
FROM base AS build_rocshmem
|
||||
ARG ROCSHMEM_BRANCH="f0acb0c6"
|
||||
ARG ROCSHMEM_REPO="https://github.com/ROCm/rocm-systems.git"
|
||||
ARG DEEPEP_BRANCH="a9ea9774"
|
||||
ARG DEEPEP_REPO="https://github.com/ROCm/DeepEP.git"
|
||||
ARG DEEPEP_NIC="cx7"
|
||||
# DeepEP only supports gfx942 and gfx950; build ROCShmem for the same set so
|
||||
# it can be linked against DeepEP without arch mismatches.
|
||||
ARG DEEPEP_ROCM_ARCH="gfx942;gfx950"
|
||||
ENV ROCM_PATH=/opt/rocm
|
||||
ENV ROCSHMEM_DIR=/opt/rocshmem
|
||||
|
||||
RUN git clone ${ROCSHMEM_REPO} \
|
||||
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
git clone --no-checkout --filter=blob:none ${ROCSHMEM_REPO} \
|
||||
&& cd rocm-systems \
|
||||
&& git sparse-checkout set --cone projects/rocshmem \
|
||||
&& git checkout ${ROCSHMEM_BRANCH} \
|
||||
&& mkdir -p projects/rocshmem/build \
|
||||
&& cd projects/rocshmem/build \
|
||||
&& INSTALL_PREFIX=${ROCSHMEM_DIR} \
|
||||
../scripts/build_configs/all_backends -DUSE_EXTERNAL_MPI=OFF
|
||||
&& CC="ccache gcc" CXX="ccache g++" INSTALL_PREFIX=${ROCSHMEM_DIR} \
|
||||
bash ../scripts/build_configs/all_backends \
|
||||
-DROCM_PATH=${ROCM_PATH} \
|
||||
-DGPU_TARGETS="${DEEPEP_ROCM_ARCH}" \
|
||||
-DUSE_EXTERNAL_MPI=OFF
|
||||
|
||||
# Build DeepEP wheel.
|
||||
# DeepEP looks for rocshmem at ROCSHMEM_DIR.
|
||||
RUN git clone ${DEEPEP_REPO} \
|
||||
# DeepEP build stage - depends on ROCShmem, builds the HIP kernel wheel.
|
||||
FROM build_rocshmem AS build_deepep
|
||||
ARG DEEPEP_BRANCH="a9ea9774"
|
||||
ARG DEEPEP_REPO="https://github.com/ROCm/DeepEP.git"
|
||||
ARG DEEPEP_NIC="cx7"
|
||||
|
||||
# Build DeepEP wheel. DeepEP looks for rocshmem at ROCSHMEM_DIR.
|
||||
# DeepEP only supports gfx942 and gfx950, so avoid gfx90a in the default list.
|
||||
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
export PYTORCH_ROCM_ARCH="gfx942;gfx950" \
|
||||
&& git clone ${DEEPEP_REPO} \
|
||||
&& cd DeepEP \
|
||||
&& git checkout ${DEEPEP_BRANCH} \
|
||||
&& python3 setup.py --variant rocm --rocm-explicit-ctx --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install
|
||||
&& LDFLAGS="-fuse-ld=mold" MAX_JOBS="${MAX_JOBS:-$(nproc)}" python3 setup.py --variant rocm --rocm-explicit-ctx --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install
|
||||
|
||||
# MoRI runtime dependencies live in Dockerfile.rocm so NIC backend changes do
|
||||
# not force users to rebuild the long-lived Dockerfile.rocm_base image.
|
||||
@@ -372,8 +454,9 @@ RUN if [ "$GIT_REPO_CHECK" != "0" ]; then \
|
||||
# Extract version from git BEFORE any modifications (pin_rocm_dependencies.py modifies requirements/rocm.txt)
|
||||
# This ensures setuptools_scm sees clean repo state for version detection
|
||||
RUN --mount=type=bind,source=.git,target=vllm/.git \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
cd vllm \
|
||||
&& pip install setuptools_scm regex \
|
||||
&& uv pip install --system setuptools_scm regex \
|
||||
&& VLLM_VERSION=$(python3 -c "import setuptools_scm; print(setuptools_scm.get_version())") \
|
||||
&& echo "Detected vLLM version: ${VLLM_VERSION}" \
|
||||
&& echo "${VLLM_VERSION}" > /tmp/vllm_version.txt
|
||||
@@ -409,18 +492,20 @@ RUN echo "Pinning vLLM dependencies to custom wheel versions..." \
|
||||
&& python3 /tmp/pin_rocm_dependencies.py /install ${COMMON_WORKDIR}/vllm/requirements/rocm.txt
|
||||
|
||||
# Install dependencies using custom wheels from /install
|
||||
RUN cd vllm \
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
cd vllm \
|
||||
&& echo "Building vLLM with custom wheels from /install" \
|
||||
&& python3 -m pip install --find-links /install -r requirements/rocm.txt \
|
||||
&& python3 setup.py clean --all
|
||||
&& uv pip install --system --find-links /install -r requirements/rocm.txt
|
||||
|
||||
# Build wheel using pre-extracted version to avoid dirty state from modified requirements/rocm.txt
|
||||
# (setup.py auto-detects sccache in PATH)
|
||||
# (setup.py auto-detects ccache/sccache in PATH)
|
||||
RUN --mount=type=bind,source=.git,target=vllm/.git \
|
||||
--mount=type=cache,id=vllm-rocm-ccache,target=/root/.cache/ccache \
|
||||
cd vllm \
|
||||
&& export CCACHE_BASEDIR="$PWD" \
|
||||
&& export SETUPTOOLS_SCM_PRETEND_VERSION=$(cat /tmp/vllm_version.txt) \
|
||||
&& echo "Building wheel with version: ${SETUPTOOLS_SCM_PRETEND_VERSION}" \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist
|
||||
&& MAX_JOBS="${MAX_JOBS:-$(nproc)}" python3 setup.py bdist_wheel --dist-dir=dist
|
||||
|
||||
FROM scratch AS export_vllm_wheel_release
|
||||
ARG COMMON_WORKDIR
|
||||
@@ -431,112 +516,118 @@ COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/tests /tests
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/examples /examples
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/pyproject.toml /pyproject.toml
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
|
||||
|
||||
# -----------------------
|
||||
# Test vLLM image
|
||||
FROM mori_base AS test
|
||||
# CI base image (Tier 1) - stable, rarely changing CI dependencies.
|
||||
# Per-PR test builds pull this as CI_BASE_IMAGE so the test stage only layers
|
||||
# in the vLLM artifacts for the current commit.
|
||||
FROM mori_base AS ci_base
|
||||
ARG COMMON_WORKDIR
|
||||
|
||||
RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install vLLM using uv (inherited from base stage)
|
||||
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
|
||||
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
cd /install \
|
||||
&& uv pip install --system -r requirements/rocm.txt \
|
||||
&& uv pip install --system -r requirements/test/rocm.txt \
|
||||
&& pip uninstall -y vllm \
|
||||
&& uv pip install --system *.whl
|
||||
|
||||
# Persist the built wheel in the image so python_only_compile_rocm.sh can
|
||||
# reinstall it after removing compilers. The bind-mounted /install contents
|
||||
# above are not available once that RUN step completes.
|
||||
COPY --from=export_vllm /*.whl /opt/vllm-wheels/
|
||||
|
||||
# Update rdma-core to support latest rocshmem
|
||||
# Update rdma-core to support latest rocshmem.
|
||||
ARG DEEPEP_NIC
|
||||
RUN if [ "${DEEPEP_NIC}" = "cx7" ] || [ "${DEEPEP_NIC}" = "io" ]; then \
|
||||
git clone --branch v62.0 --depth 1 https://github.com/linux-rdma/rdma-core.git /tmp/rdma-core && \
|
||||
cd /tmp/rdma-core && \
|
||||
mkdir -p build && cd build && \
|
||||
cmake -GNinja -DCMAKE_INSTALL_PREFIX=/usr -DNO_MAN_PAGES=1 .. && \
|
||||
ninja && ninja install && ldconfig && rm -rf /tmp/rdma-core; \
|
||||
ninja && ninja install && ldconfig && rm -rf /tmp/rdma-core; \
|
||||
fi
|
||||
|
||||
# Install RIXL wheel
|
||||
# Install RIXL + DeepEP wheels.
|
||||
RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \
|
||||
uv pip install --system /rixl_install/*.whl
|
||||
--mount=type=bind,from=build_deepep,src=/app/deep_install,target=/deep_install \
|
||||
uv pip install --system /rixl_install/*.whl /deep_install/*.whl
|
||||
|
||||
# Install DeepEP wheel
|
||||
RUN --mount=type=bind,from=build_deep,src=/app/deep_install,target=/deep_install \
|
||||
uv pip install --system /deep_install/*.whl
|
||||
COPY --from=build_deep /opt/rocshmem /opt/rocshmem
|
||||
# Copy ROCShmem runtime libraries.
|
||||
COPY --from=build_rocshmem /opt/rocshmem /opt/rocshmem
|
||||
|
||||
# RIXL/MoRIIO runtime dependencies (RDMA userspace libraries)
|
||||
RUN apt-get update -q -y && apt-get install -q -y \
|
||||
# RDMA userspace libraries plus FFmpeg dev libs needed by torchcodec.
|
||||
RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends \
|
||||
librdmacm1 \
|
||||
libibverbs1 \
|
||||
ibverbs-providers \
|
||||
ibverbs-utils \
|
||||
pkg-config ffmpeg libavcodec-dev libavformat-dev libavutil-dev \
|
||||
libswscale-dev libavdevice-dev libavfilter-dev libswresample-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /vllm-workspace
|
||||
ARG COMMON_WORKDIR
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace
|
||||
|
||||
# install development dependencies (for testing)
|
||||
RUN cd /vllm-workspace \
|
||||
&& python3 -m pip install -e tests/vllm_test_utils \
|
||||
&& python3 -m pip install pytest-shard
|
||||
|
||||
# enable fast downloads from hf (for testing)
|
||||
ENV HF_XET_HIGH_PERFORMANCE=1
|
||||
|
||||
# increase timeout for hf downloads (for testing)
|
||||
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
|
||||
|
||||
# install audio decode package `torchcodec` from source (required due to
|
||||
# ROCm and torch version mismatch) for tests with datasets package
|
||||
# Install torchcodec from source for ROCm/torch ABI compatibility.
|
||||
COPY tools/install_torchcodec_rocm.sh /tmp/install_torchcodec.sh
|
||||
RUN bash /tmp/install_torchcodec.sh \
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
--mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=cache,target=/root/.cache/torchcodec-wheels \
|
||||
bash /tmp/install_torchcodec.sh \
|
||||
&& rm /tmp/install_torchcodec.sh \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy in the v1 package (for python-only install test group)
|
||||
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
|
||||
# Pre-install shared ROCm runtime dependencies.
|
||||
COPY requirements/common.txt requirements/rocm.txt /tmp/ci-base-requirements/
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system -r /tmp/ci-base-requirements/rocm.txt \
|
||||
&& rm -rf /tmp/ci-base-requirements
|
||||
|
||||
# Set MIOPEN ENVS to resolve performance regressions in MIOpen 3D convolution kernel
|
||||
# Enable fast and less brittle model downloads in tests.
|
||||
ENV HF_XET_HIGH_PERFORMANCE=1
|
||||
ENV HF_HUB_DOWNLOAD_TIMEOUT=60
|
||||
|
||||
# Pre-install vLLM test dependencies.
|
||||
COPY requirements/test/rocm.txt /tmp/rocm-test-reqs.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system -r /tmp/rocm-test-reqs.txt
|
||||
|
||||
# Rebuild fastsafetensors from source so its C++ extension is compiled with
|
||||
# USE_ROCM and can detect libamdhip64.so at runtime.
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
FASTSAFETENSORS_REQ="$(grep -E '^fastsafetensors(==| @ )' /tmp/rocm-test-reqs.txt | head -1)" \
|
||||
&& test -n "${FASTSAFETENSORS_REQ}" \
|
||||
&& python3 -m pip install --force-reinstall --no-deps \
|
||||
--no-binary fastsafetensors "${FASTSAFETENSORS_REQ}" \
|
||||
&& rm /tmp/rocm-test-reqs.txt
|
||||
|
||||
# Set MIOPEN ENVS to resolve performance regressions in MIOpen 3D convolution kernel.
|
||||
# See: https://github.com/pytorch/pytorch/issues/169857
|
||||
ENV MIOPEN_DEBUG_CONV_DIRECT=0
|
||||
ENV MIOPEN_DEBUG_CONV_GEMM=0
|
||||
|
||||
# Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc
|
||||
# Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc.
|
||||
# See: https://github.com/ROCm/rocm-libraries/issues/6266
|
||||
ENV HSA_ENABLE_IPC_MODE_LEGACY=1
|
||||
|
||||
# Source code is used in the `python_only_compile.sh` test
|
||||
# We hide it inside `src/` so that this source code
|
||||
# will not be imported by other tests
|
||||
RUN mkdir src && mv vllm src/vllm
|
||||
# ROCm profiler limits workaround.
|
||||
RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf
|
||||
ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf"
|
||||
|
||||
# This is a workaround to ensure pytest exits with the correct status code in CI tests.
|
||||
RUN printf '%s\n' \
|
||||
'import os' \
|
||||
'' \
|
||||
'_exit_code = 1' \
|
||||
'' \
|
||||
'def pytest_sessionfinish(session, exitstatus):' \
|
||||
' global _exit_code' \
|
||||
' _exit_code = int(exitstatus)' \
|
||||
'' \
|
||||
'def pytest_unconfigure(config):' \
|
||||
' import sys' \
|
||||
' sys.stdout.flush()' \
|
||||
' sys.stderr.flush()' \
|
||||
' os._exit(_exit_code)' \
|
||||
> /vllm-workspace/conftest.py
|
||||
# Install vllm_test_utils in ci_base for ci_base + wheel parity.
|
||||
COPY tests/vllm_test_utils /tmp/vllm_test_utils
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system /tmp/vllm_test_utils \
|
||||
&& rm -rf /tmp/vllm_test_utils
|
||||
|
||||
# -----------------------
|
||||
# Test vLLM image (Tier 2) - vLLM-only layer on top of ci_base.
|
||||
FROM ${CI_BASE_IMAGE} AS test
|
||||
ARG COMMON_WORKDIR
|
||||
|
||||
# Install the vLLM wheel (--no-deps: all deps already in ci_base).
|
||||
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
cd /install \
|
||||
&& uv pip install --system --no-deps *.whl
|
||||
|
||||
# Store the vLLM wheel in the image for python-only install tests.
|
||||
COPY --from=export_vllm /*.whl /opt/vllm-wheels/
|
||||
|
||||
WORKDIR /vllm-workspace
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace
|
||||
|
||||
# Copy in the v1 package (for python-only install test group).
|
||||
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
|
||||
|
||||
# Hide source under src/ so it won't shadow the installed package in tests.
|
||||
RUN mkdir src && mv vllm src/vllm
|
||||
|
||||
# -----------------------
|
||||
# Final vLLM image
|
||||
@@ -553,6 +644,7 @@ RUN rm -f /usr/bin/sccache || true \
|
||||
# This prevents S3 bucket config from leaking into production images
|
||||
ENV SCCACHE_BUCKET=
|
||||
ENV SCCACHE_REGION=
|
||||
ENV SCCACHE_ENDPOINT=
|
||||
ENV SCCACHE_S3_NO_CREDENTIALS=
|
||||
ENV SCCACHE_IDLE_TIMEOUT=
|
||||
|
||||
|
||||
@@ -0,0 +1,376 @@
|
||||
# ci-rocm.hcl - CI-specific configuration for vLLM ROCm Docker builds
|
||||
#
|
||||
# This file lives in the vLLM repo at docker/ci-rocm.hcl so ROCm Docker
|
||||
# build mechanics can evolve with Dockerfile.rocm and docker-bake-rocm.hcl.
|
||||
# Used with: docker buildx bake -f docker/docker-bake-rocm.hcl -f docker/ci-rocm.hcl test-rocm-ci
|
||||
#
|
||||
# Registry cache: Docker Hub (rocm/vllm-ci-cache) is used exclusively.
|
||||
# AMD build agents already have Docker Hub credentials (they push the test
|
||||
# image to rocm/vllm-ci), so no additional credential setup is required.
|
||||
# ROCm CI uses Docker Hub for BuildKit layer cache by default. A separate
|
||||
# compiler cache can be enabled with USE_SCCACHE=1 when AMD provides a shared
|
||||
# S3-compatible cache endpoint.
|
||||
|
||||
# CI metadata
|
||||
|
||||
variable "BUILDKITE_COMMIT" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "BUILDKITE_BUILD_NUMBER" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "BUILDKITE_BUILD_ID" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "PARENT_COMMIT" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# Merge-base of HEAD with main - provides a more stable cache fallback than
|
||||
# parent commit for long-lived PRs. Mirrors the VLLM_MERGE_BASE_COMMIT
|
||||
# pattern used in the shared ci.hcl file. Auto-computed by ci-bake-rocm.sh
|
||||
# when unset.
|
||||
variable "VLLM_MERGE_BASE_COMMIT" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# Bridge to vLLM's COMMIT variable for OCI labels
|
||||
variable "COMMIT" {
|
||||
default = BUILDKITE_COMMIT
|
||||
}
|
||||
|
||||
# Image tags (set by CI)
|
||||
|
||||
variable "IMAGE_TAG" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "IMAGE_TAG_LATEST" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# ROCm-specific GPU architecture targets
|
||||
|
||||
variable "PYTORCH_ROCM_ARCH" {
|
||||
default = "gfx90a;gfx942;gfx950"
|
||||
}
|
||||
|
||||
# Pre-built CI base image (Tier 1). Per-PR builds pull this instead of
|
||||
# rebuilding RIXL/DeepEP/torchcodec from scratch. The ci_base stage in
|
||||
# Dockerfile.rocm inherits from base, so CI_BASE_IMAGE only affects the test
|
||||
# stage and is irrelevant when building --target ci_base itself.
|
||||
variable "CI_BASE_IMAGE" {
|
||||
default = "rocm/vllm-dev:ci_base"
|
||||
}
|
||||
|
||||
# Leave CI_MAX_JOBS empty so the Dockerfile falls back to $(nproc) and uses
|
||||
# the full builder parallelism. Operators can still override this per build.
|
||||
variable "CI_MAX_JOBS" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# Upstream dependency commit pins -- extracted from Dockerfile.rocm by
|
||||
# ci-bake-rocm.sh at build time. Empty defaults are safe: the cache
|
||||
# functions produce no entries when the variable is empty.
|
||||
variable "RIXL_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "UCX_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ROCSHMEM_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "DEEPEP_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "RIXL_CACHE_KEY" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ROCSHMEM_CACHE_KEY" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "DEEPEP_CACHE_KEY" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# Docker Hub registry cache for AMD builds.
|
||||
#
|
||||
# A separate repo (rocm/vllm-ci-cache) is used for BuildKit layer cache.
|
||||
# Final-image cache exports use mode=min to reduce the volume of data pushed.
|
||||
# Source-scoped csrc cache exports default to mode=max so fresh workers can
|
||||
# recover more of the native build graph when ROCm extension inputs change.
|
||||
# NOTE: mode=min still includes all layers referenced by the final image
|
||||
# manifest, including inherited base layers (~7.25GB ROCm runtime).
|
||||
# Docker Hub auto-creates the repo on first push.
|
||||
#
|
||||
# Final-image cache stays commit-scoped. Branch-to-branch reuse for the test
|
||||
# image comes from importing the parent and merge-base commit cache refs.
|
||||
#
|
||||
# The source-scoped native cache is exported both per-commit and per-branch so
|
||||
# ROCm extension rebuilds are shareable within the same commit reruns and across
|
||||
# consecutive commits on the same branch without depending on a single global
|
||||
# latest tag.
|
||||
|
||||
variable "DOCKERHUB_CACHE_REPO" {
|
||||
default = "rocm/vllm-ci-cache"
|
||||
}
|
||||
|
||||
variable "DOCKERHUB_CACHE_TO" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ROCM_CACHE_BRANCH_TAG" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ROCM_CACHE_UPSTREAM_BRANCH_TAG" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ROCM_CSRC_CACHE_TO_MODE" {
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "ROCM_FINAL_CACHE_TO_MODE" {
|
||||
default = "min"
|
||||
}
|
||||
|
||||
# Functions
|
||||
|
||||
function "get_cache_from_rocm" {
|
||||
params = []
|
||||
result = compact([
|
||||
# Exact commit hit - fastest cache on re-runs of the same commit
|
||||
BUILDKITE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-${BUILDKITE_COMMIT}" : "",
|
||||
# Parent commit - useful cache for incremental changes
|
||||
PARENT_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-${PARENT_COMMIT}" : "",
|
||||
# Merge-base with main - stable fallback for long-lived or rebased PRs;
|
||||
# maps to a real main-branch commit whose cache layers are likely warm
|
||||
VLLM_MERGE_BASE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-${VLLM_MERGE_BASE_COMMIT}" : "",
|
||||
# Import the source-scoped native build cache as well so builds whose
|
||||
# Python/package layers changed can still reuse compiled ROCm objects.
|
||||
BUILDKITE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${BUILDKITE_COMMIT}" : "",
|
||||
PARENT_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${PARENT_COMMIT}" : "",
|
||||
VLLM_MERGE_BASE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${VLLM_MERGE_BASE_COMMIT}" : "",
|
||||
ROCM_CACHE_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-branch-${ROCM_CACHE_BRANCH_TAG}" : "",
|
||||
ROCM_CACHE_UPSTREAM_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-branch-${ROCM_CACHE_UPSTREAM_BRANCH_TAG}" : "",
|
||||
# Branch-scoped full image cache - fallback when parent-commit cache is evicted
|
||||
ROCM_CACHE_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-branch-${ROCM_CACHE_BRANCH_TAG}" : "",
|
||||
ROCM_CACHE_UPSTREAM_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-branch-${ROCM_CACHE_UPSTREAM_BRANCH_TAG}" : "",
|
||||
])
|
||||
}
|
||||
|
||||
function "get_cache_to_rocm" {
|
||||
params = []
|
||||
result = compact([
|
||||
# Commit-scoped cache for exact re-runs.
|
||||
BUILDKITE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-${BUILDKITE_COMMIT},mode=${ROCM_FINAL_CACHE_TO_MODE}" : "",
|
||||
# Branch-scoped cache so later commits on the same branch can reuse the full
|
||||
# image layers when the parent-commit cache is evicted. Unlike the old
|
||||
# rocm-latest tag (which caused duplicate exporter 400s), this is per-branch.
|
||||
ROCM_CACHE_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocm-branch-${ROCM_CACHE_BRANCH_TAG},mode=${ROCM_FINAL_CACHE_TO_MODE}" : "",
|
||||
])
|
||||
}
|
||||
|
||||
function "get_cache_from_rocm_csrc" {
|
||||
params = []
|
||||
result = compact([
|
||||
BUILDKITE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${BUILDKITE_COMMIT}" : "",
|
||||
PARENT_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${PARENT_COMMIT}" : "",
|
||||
VLLM_MERGE_BASE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${VLLM_MERGE_BASE_COMMIT}" : "",
|
||||
ROCM_CACHE_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-branch-${ROCM_CACHE_BRANCH_TAG}" : "",
|
||||
ROCM_CACHE_UPSTREAM_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-branch-${ROCM_CACHE_UPSTREAM_BRANCH_TAG}" : "",
|
||||
])
|
||||
}
|
||||
|
||||
function "get_cache_to_rocm_csrc" {
|
||||
params = []
|
||||
result = compact([
|
||||
# Export the exact-commit native cache for same-commit reruns.
|
||||
BUILDKITE_COMMIT != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-${BUILDKITE_COMMIT},mode=${ROCM_CSRC_CACHE_TO_MODE}" : "",
|
||||
# Export the branch-scoped native cache so later commits on the same branch
|
||||
# can reuse compiled ROCm objects even when the exact parent cache is absent.
|
||||
ROCM_CACHE_BRANCH_TAG != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:csrc-rocm-branch-${ROCM_CACHE_BRANCH_TAG},mode=${ROCM_CSRC_CACHE_TO_MODE}" : "",
|
||||
])
|
||||
}
|
||||
|
||||
# Cache functions for upstream dependency stages (RIXL/UCX, ROCShmem, DeepEP).
|
||||
# These stages are pinned to specific upstream commit hashes, so cache keys use
|
||||
# those hashes rather than the Buildkite commit. This means the cache persists
|
||||
# across all vLLM commits as long as the upstream dependency pins don't change.
|
||||
|
||||
function "get_cache_from_rocm_deps" {
|
||||
params = []
|
||||
result = compact([
|
||||
RIXL_CACHE_KEY != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rixl-rocm-${RIXL_CACHE_KEY}" : (RIXL_BRANCH != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rixl-rocm-${RIXL_BRANCH}-ucx-${UCX_BRANCH}" : ""),
|
||||
ROCSHMEM_CACHE_KEY != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocshmem-rocm-${ROCSHMEM_CACHE_KEY}" : (ROCSHMEM_BRANCH != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocshmem-rocm-${ROCSHMEM_BRANCH}" : ""),
|
||||
DEEPEP_CACHE_KEY != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:deepep-rocm-${DEEPEP_CACHE_KEY}" : (DEEPEP_BRANCH != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:deepep-rocm-${DEEPEP_BRANCH}-rocshmem-${ROCSHMEM_BRANCH}" : ""),
|
||||
])
|
||||
}
|
||||
|
||||
function "get_cache_to_rocm_rixl" {
|
||||
params = []
|
||||
result = compact([
|
||||
RIXL_CACHE_KEY != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rixl-rocm-${RIXL_CACHE_KEY},mode=min" : (RIXL_BRANCH != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rixl-rocm-${RIXL_BRANCH}-ucx-${UCX_BRANCH},mode=min" : ""),
|
||||
])
|
||||
}
|
||||
|
||||
function "get_cache_to_rocm_rocshmem" {
|
||||
params = []
|
||||
result = compact([
|
||||
ROCSHMEM_CACHE_KEY != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocshmem-rocm-${ROCSHMEM_CACHE_KEY},mode=min" : (ROCSHMEM_BRANCH != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:rocshmem-rocm-${ROCSHMEM_BRANCH},mode=min" : ""),
|
||||
])
|
||||
}
|
||||
|
||||
function "get_cache_to_rocm_deepep" {
|
||||
params = []
|
||||
result = compact([
|
||||
DEEPEP_CACHE_KEY != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:deepep-rocm-${DEEPEP_CACHE_KEY},mode=min" : (DEEPEP_BRANCH != "" ? "type=registry,ref=${DOCKERHUB_CACHE_REPO}:deepep-rocm-${DEEPEP_BRANCH}-rocshmem-${ROCSHMEM_BRANCH},mode=min" : ""),
|
||||
])
|
||||
}
|
||||
|
||||
# CI targets
|
||||
|
||||
target "_ci-rocm" {
|
||||
annotations = [
|
||||
"manifest:vllm.buildkite.build_number=${BUILDKITE_BUILD_NUMBER}",
|
||||
"manifest:vllm.buildkite.build_id=${BUILDKITE_BUILD_ID}",
|
||||
]
|
||||
args = {
|
||||
ARG_PYTORCH_ROCM_ARCH = PYTORCH_ROCM_ARCH
|
||||
CI_BASE_IMAGE = CI_BASE_IMAGE
|
||||
max_jobs = CI_MAX_JOBS
|
||||
}
|
||||
}
|
||||
|
||||
target "test-rocm-ci" {
|
||||
inherits = ["_common-rocm", "_ci-rocm", "_labels"]
|
||||
target = "test"
|
||||
cache-from = get_cache_from_rocm()
|
||||
cache-to = get_cache_to_rocm()
|
||||
tags = compact([
|
||||
IMAGE_TAG,
|
||||
IMAGE_TAG_LATEST,
|
||||
])
|
||||
output = ["type=registry"]
|
||||
}
|
||||
|
||||
# Cache-only target for the source-scoped ROCm native build stage.
|
||||
# This persists the csrc-build stage in the registry cache even though the
|
||||
# final test image only consumes it indirectly while packaging the wheel.
|
||||
target "csrc-rocm-ci" {
|
||||
inherits = ["_common-rocm", "_ci-rocm"]
|
||||
target = "csrc-build"
|
||||
cache-from = get_cache_from_rocm_csrc()
|
||||
cache-to = get_cache_to_rocm_csrc()
|
||||
output = ["type=cacheonly"]
|
||||
}
|
||||
|
||||
# Keep wheel export on the same CI graph as the test image build so the
|
||||
# shared build_vllm/export_vllm stages resolve identically within one bake
|
||||
# invocation. Without this, export-wheel-rocm uses the plain local target
|
||||
# args while test-rocm-ci uses CI-only args, which can lead to separate
|
||||
# cache lineages and inconsistent export_vllm results.
|
||||
target "export-wheel-rocm" {
|
||||
inherits = ["_common-rocm", "_ci-rocm"]
|
||||
target = "export_vllm"
|
||||
cache-from = get_cache_from_rocm()
|
||||
cache-to = get_cache_to_rocm()
|
||||
output = ["type=local,dest=./wheel-export"]
|
||||
}
|
||||
|
||||
# Artifact-only vLLM build. GPU test jobs consume this artifact on top of
|
||||
# ci_base, avoiding a per-commit multi-GB image push/pull.
|
||||
group "test-rocm-ci-with-artifacts" {
|
||||
targets = ["csrc-rocm-ci", "export-wheel-rocm"]
|
||||
}
|
||||
|
||||
# Full test image + wheel export. Kept for fallback/debugging when a pushed
|
||||
# per-commit image is useful.
|
||||
group "test-rocm-ci-with-wheel" {
|
||||
targets = ["csrc-rocm-ci", "test-rocm-ci", "export-wheel-rocm"]
|
||||
}
|
||||
|
||||
# Image tags for the ci_base build. ci-bake-rocm.sh rewrites CI_BASE_IMAGE_TAG
|
||||
# to the primary tag for this build. Non-nightly builds use a commit-scoped tag
|
||||
# and also publish a content tag for reuse. NIGHTLY=1 builds on the stable branch
|
||||
# can additionally set CI_BASE_IMAGE_TAG_STABLE to refresh rocm/vllm-dev:ci_base.
|
||||
variable "CI_BASE_IMAGE_TAG" {
|
||||
default = "rocm/vllm-dev:ci_base"
|
||||
}
|
||||
|
||||
variable "CI_BASE_IMAGE_TAG_CONTENT" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "CI_BASE_IMAGE_TAG_STABLE" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# Cache-only targets for upstream dependency stages. These persist each stage
|
||||
# in the registry cache keyed by its upstream commit hash. When ci_base rebuilds
|
||||
# (e.g., requirements change), these stages are cache hits if their upstream
|
||||
# pins haven't changed -- saving ~35min of compilation.
|
||||
target "rixl-rocm-ci" {
|
||||
inherits = ["_common-rocm", "_ci-rocm"]
|
||||
target = "build_rixl"
|
||||
cache-from = get_cache_from_rocm_deps()
|
||||
cache-to = get_cache_to_rocm_rixl()
|
||||
output = ["type=cacheonly"]
|
||||
}
|
||||
|
||||
target "rocshmem-rocm-ci" {
|
||||
inherits = ["_common-rocm", "_ci-rocm"]
|
||||
target = "build_rocshmem"
|
||||
cache-from = get_cache_from_rocm_deps()
|
||||
cache-to = get_cache_to_rocm_rocshmem()
|
||||
output = ["type=cacheonly"]
|
||||
}
|
||||
|
||||
target "deepep-rocm-ci" {
|
||||
inherits = ["_common-rocm", "_ci-rocm"]
|
||||
target = "build_deepep"
|
||||
cache-from = get_cache_from_rocm_deps()
|
||||
cache-to = get_cache_to_rocm_deepep()
|
||||
output = ["type=cacheonly"]
|
||||
}
|
||||
|
||||
# Builds only the ci_base stage (RIXL, DeepEP, torchcodec, etc.)
|
||||
# Invoked by the ensure-ci-base step when the content hash of ci_base-affecting
|
||||
# files drifts from the remote image label. Per-PR builds then pull the result
|
||||
# as CI_BASE_IMAGE instead of rebuilding those slow layers on every commit.
|
||||
# Uses inline cache metadata on the ci_base image itself instead of exporting a
|
||||
# separate registry cache artifact.
|
||||
target "ci-base-rocm-ci" {
|
||||
inherits = ["_common-rocm", "_ci-rocm", "_labels"]
|
||||
target = "ci_base"
|
||||
cache-from = concat(
|
||||
compact([
|
||||
CI_BASE_IMAGE_TAG != "" ? "type=registry,ref=${CI_BASE_IMAGE_TAG}" : "",
|
||||
CI_BASE_IMAGE_TAG_CONTENT != "" ? "type=registry,ref=${CI_BASE_IMAGE_TAG_CONTENT}" : "",
|
||||
CI_BASE_IMAGE_TAG_STABLE != "" ? "type=registry,ref=${CI_BASE_IMAGE_TAG_STABLE}" : "",
|
||||
]),
|
||||
# Import upstream dependency caches so RIXL/ROCShmem/DeepEP stages
|
||||
# are cache hits even when ci_base itself needs rebuilding.
|
||||
get_cache_from_rocm_deps(),
|
||||
)
|
||||
cache-to = ["type=inline"]
|
||||
tags = compact([CI_BASE_IMAGE_TAG, CI_BASE_IMAGE_TAG_CONTENT, CI_BASE_IMAGE_TAG_STABLE])
|
||||
output = ["type=registry"]
|
||||
}
|
||||
|
||||
# Group for ci_base builds -- exports dependency stage caches alongside the
|
||||
# ci_base image so future rebuilds can reuse them independently.
|
||||
group "ci-base-rocm-ci-with-deps" {
|
||||
targets = ["rixl-rocm-ci", "rocshmem-rocm-ci", "deepep-rocm-ci", "ci-base-rocm-ci"]
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
# docker-bake-rocm.hcl - vLLM ROCm Docker build configuration
|
||||
#
|
||||
# This file lives in the vLLM repo at docker/docker-bake-rocm.hcl
|
||||
# Equivalent of docker-bake.hcl for ROCm builds.
|
||||
#
|
||||
# Usage:
|
||||
# docker buildx bake -f docker/docker-bake-rocm.hcl # Build test (default)
|
||||
# docker buildx bake -f docker/docker-bake-rocm.hcl final-rocm # Build final image
|
||||
# docker buildx bake -f docker/docker-bake-rocm.hcl --print # Show resolved config
|
||||
#
|
||||
# CI usage (with the vLLM-owned CI overlay):
|
||||
# docker buildx bake -f docker/docker-bake-rocm.hcl -f docker/ci-rocm.hcl test-rocm-ci
|
||||
|
||||
variable "MAX_JOBS" {
|
||||
# Empty string lets the Dockerfile fall back to $(nproc) via
|
||||
# MAX_JOBS="${MAX_JOBS:-$(nproc)}" in each RUN step, which uses all
|
||||
# available cores on whatever machine the build runs on.
|
||||
# Override with --set '*.args.max_jobs=8' for local builds on small machines.
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "PYTORCH_ROCM_ARCH" {
|
||||
default = "gfx90a;gfx942;gfx950"
|
||||
}
|
||||
|
||||
variable "COMMIT" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# Content hash of ci_base-affecting files. Computed by ci-bake-rocm.sh and
|
||||
# embedded as a label so future builds can compare without rebuilding.
|
||||
variable "CI_BASE_CONTENT_HASH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
# REMOTE_VLLM=0: use local source via Docker build context (ONBUILD COPY ./ vllm/)
|
||||
# REMOTE_VLLM=1: clone from GitHub at VLLM_BRANCH (standalone builds without local source)
|
||||
variable "REMOTE_VLLM" {
|
||||
default = "0"
|
||||
}
|
||||
|
||||
variable "VLLM_BRANCH" {
|
||||
default = "main"
|
||||
}
|
||||
|
||||
# CI_BASE_IMAGE: pre-built ci_base image for per-PR test builds.
|
||||
# Defaults to the local "ci_base" stage for standalone/local builds.
|
||||
# CI overrides this to "rocm/vllm-dev:ci_base" via environment variable.
|
||||
variable "CI_BASE_IMAGE" {
|
||||
default = "rocm/vllm-dev:ci_base"
|
||||
}
|
||||
|
||||
# Upstream dependency commit pins. Plain local bake builds use the Dockerfile
|
||||
# ARG defaults. ci-bake-rocm.sh resolves those defaults (plus any env
|
||||
# overrides) and writes a small HCL override before invoking CI targets.
|
||||
variable "RIXL_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "UCX_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ROCSHMEM_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "DEEPEP_BRANCH" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
group "default" {
|
||||
targets = ["test-rocm"]
|
||||
}
|
||||
|
||||
target "_common-rocm" {
|
||||
dockerfile = "docker/Dockerfile.rocm"
|
||||
context = "."
|
||||
args = {
|
||||
max_jobs = MAX_JOBS
|
||||
ARG_PYTORCH_ROCM_ARCH = PYTORCH_ROCM_ARCH
|
||||
REMOTE_VLLM = REMOTE_VLLM
|
||||
VLLM_BRANCH = VLLM_BRANCH
|
||||
CI_BASE_IMAGE = CI_BASE_IMAGE
|
||||
}
|
||||
}
|
||||
|
||||
target "_labels" {
|
||||
labels = {
|
||||
"org.opencontainers.image.source" = "https://github.com/vllm-project/vllm"
|
||||
"org.opencontainers.image.vendor" = "vLLM"
|
||||
"org.opencontainers.image.title" = "vLLM ROCm"
|
||||
"org.opencontainers.image.description" = "vLLM: A high-throughput and memory-efficient inference and serving engine for LLMs (ROCm)"
|
||||
"org.opencontainers.image.licenses" = "Apache-2.0"
|
||||
"org.opencontainers.image.revision" = COMMIT
|
||||
}
|
||||
annotations = [
|
||||
"manifest:org.opencontainers.image.revision=${COMMIT}",
|
||||
]
|
||||
}
|
||||
|
||||
target "test-rocm" {
|
||||
inherits = ["_common-rocm", "_labels"]
|
||||
target = "test"
|
||||
tags = ["rocm/vllm:test"]
|
||||
output = ["type=docker"]
|
||||
}
|
||||
|
||||
# CI base image target - builds only the ci_base stage (RIXL, DeepEP,
|
||||
# torchcodec, requirements, etc.). Used by the weekly scheduled build and
|
||||
# the auto-rebuild trigger when requirements change in a PR.
|
||||
target "ci-base-rocm" {
|
||||
inherits = ["_common-rocm", "_labels"]
|
||||
target = "ci_base"
|
||||
labels = {
|
||||
"vllm.ci_base.content_hash" = CI_BASE_CONTENT_HASH
|
||||
}
|
||||
tags = ["rocm/vllm-dev:ci_base"]
|
||||
output = ["type=docker"]
|
||||
}
|
||||
|
||||
# Wheel export target - extracts the built vLLM wheel + test workspace
|
||||
# to local disk. Used by CI to upload the wheel as a Buildkite artifact
|
||||
# so test jobs can assemble images locally from ci_base + wheel instead
|
||||
# of pulling the full large image from Docker Hub.
|
||||
#
|
||||
# Usage:
|
||||
# docker buildx bake -f docker/docker-bake-rocm.hcl export-wheel-rocm
|
||||
# # Creates ./wheel-export/*.whl, ./wheel-export/requirements/, etc.
|
||||
#
|
||||
# After a full bake build, BuildKit cache makes this nearly instant.
|
||||
target "export-wheel-rocm" {
|
||||
inherits = ["_common-rocm"]
|
||||
target = "export_vllm"
|
||||
output = ["type=local,dest=./wheel-export"]
|
||||
}
|
||||
|
||||
target "final-rocm" {
|
||||
inherits = ["_common-rocm", "_labels"]
|
||||
target = "final"
|
||||
tags = ["rocm/vllm:latest"]
|
||||
output = ["type=docker"]
|
||||
}
|
||||
@@ -3,12 +3,16 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
# Script to install TorchCodec from source (required for ROCm compatibility)
|
||||
# The PyPI wheel is built against upstream PyTorch and has ABI mismatches with
|
||||
# ROCm's custom torch build, so we must compile from source.
|
||||
|
||||
set -e
|
||||
|
||||
TORCHCODEC_REPO="${TORCHCODEC_REPO:-https://github.com/pytorch/torchcodec.git}"
|
||||
# Pin to a specific release for reproducibility; update as needed.
|
||||
TORCHCODEC_BRANCH="${TORCHCODEC_BRANCH:-v0.10.0}"
|
||||
# Cache directory for pre-built wheels to avoid redundant recompilation.
|
||||
TORCHCODEC_WHEEL_CACHE="${TORCHCODEC_WHEEL_CACHE:-/root/.cache/torchcodec-wheels}"
|
||||
|
||||
echo "=== TorchCodec Installation Script ==="
|
||||
|
||||
@@ -18,9 +22,26 @@ if python3 -c "from torchcodec.decoders import VideoDecoder" 2>/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Try to install from cached wheel first
|
||||
ARCH_TAG="${PYTORCH_ROCM_ARCH:-all}"
|
||||
# Normalize arch tag (replace ; with _) for use in filename
|
||||
ARCH_TAG="${ARCH_TAG//;/_}"
|
||||
CACHED_WHEEL="${TORCHCODEC_WHEEL_CACHE}/torchcodec-${TORCHCODEC_BRANCH}-${ARCH_TAG}.whl"
|
||||
|
||||
if [ -f "$CACHED_WHEEL" ]; then
|
||||
echo "Found cached wheel: $CACHED_WHEEL"
|
||||
pip install "$CACHED_WHEEL" && {
|
||||
echo "Installed from cached wheel."
|
||||
echo "=== TorchCodec installation complete ==="
|
||||
exit 0
|
||||
}
|
||||
echo "Cached wheel installation failed, rebuilding from source..."
|
||||
fi
|
||||
|
||||
echo "TorchCodec not found. Installing from source..."
|
||||
|
||||
# Install system dependencies (FFmpeg + pkg-config)
|
||||
# Install system dependencies (FFmpeg + pkg-config) if not already present.
|
||||
# The Docker test image pre-installs these, so this is a fallback for other envs.
|
||||
install_system_deps() {
|
||||
if command -v apt-get &> /dev/null; then
|
||||
echo "Installing system dependencies..."
|
||||
@@ -56,6 +77,12 @@ export pybind11_DIR=$(python3 -c "import pybind11; print(pybind11.get_cmake_dir(
|
||||
export CMAKE_PREFIX_PATH="${pybind11_DIR}:${CMAKE_PREFIX_PATH}"
|
||||
echo "pybind11_DIR set to: $pybind11_DIR"
|
||||
|
||||
# Limit GPU architectures to only what this image targets.
|
||||
# The default builds for all supported archs which is very slow.
|
||||
if [ -n "$PYTORCH_ROCM_ARCH" ]; then
|
||||
echo "Building for PYTORCH_ROCM_ARCH=$PYTORCH_ROCM_ARCH"
|
||||
fi
|
||||
|
||||
# Create temp directory for build
|
||||
BUILD_DIR=$(mktemp -d -t torchcodec-XXXXXX)
|
||||
echo "Building in temporary directory: $BUILD_DIR"
|
||||
@@ -77,9 +104,31 @@ cd torchcodec
|
||||
export TORCHCODEC_CMAKE_BUILD_DIR="${PWD}/build"
|
||||
export TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR=1
|
||||
export I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION=1
|
||||
# Use ninja for faster builds and parallelize compilation
|
||||
export CMAKE_GENERATOR=Ninja
|
||||
export MAX_JOBS="${MAX_JOBS:-$(nproc)}"
|
||||
# Use ccache if available to speed up recompilation
|
||||
if command -v ccache &> /dev/null; then
|
||||
export CMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
fi
|
||||
|
||||
echo "Building TorchCodec..."
|
||||
pip install . --no-build-isolation
|
||||
echo "Building TorchCodec (MAX_JOBS=$MAX_JOBS)..."
|
||||
pip wheel . --no-build-isolation --no-deps -w "$BUILD_DIR/dist"
|
||||
|
||||
# Install the built wheel
|
||||
BUILT_WHEEL=$(ls "$BUILD_DIR/dist"/torchcodec-*.whl 2>/dev/null | head -1)
|
||||
if [ -z "$BUILT_WHEEL" ]; then
|
||||
echo "Error: No wheel produced"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pip install "$BUILT_WHEEL"
|
||||
|
||||
# Cache the wheel for future runs
|
||||
mkdir -p "$TORCHCODEC_WHEEL_CACHE"
|
||||
cp "$BUILT_WHEEL" "$CACHED_WHEEL"
|
||||
echo "Cached wheel to: $CACHED_WHEEL"
|
||||
|
||||
# Verify installation
|
||||
echo "Verifying installation..."
|
||||
@@ -88,4 +137,4 @@ if python3 -c "from torchcodec.decoders import VideoDecoder; print('TorchCodec i
|
||||
else
|
||||
echo "Error: TorchCodec installation failed verification"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user