# default base image ARG REMOTE_VLLM="0" ARG COMMON_WORKDIR=/app ARG BASE_IMAGE=rocm/vllm-dev:base ARG CI_BASE_IMAGE=rocm/vllm-dev:ci_base # NIC backend for MoRI RDMA support. # By default (all), drivers and userspace libraries for all supported NIC types # (ainic and bnxt) are installed; MoRI selects the appropriate one at runtime. # To install drivers for a single NIC type only, set NIC_BACKEND explicitly: # --build-arg NIC_BACKEND=ainic # AMD AINIC (Pensando) only # --build-arg NIC_BACKEND=bnxt # Broadcom Thor-2 only # --build-arg NIC_BACKEND=none # Install nothing. ARG NIC_BACKEND=all # AMD AINIC apt repo settings # Users can specify a custom version compatible with their host drivers. # The default version has been tested with ioinic-dkms=25.11.1.001 ARG AINIC_VERSION=1.117.3-hydra ARG UBUNTU_CODENAME=jammy # Sccache configuration. Release builds use this today; CI can opt in when a # shared S3-compatible cache backend is available. ARG USE_SCCACHE ARG SCCACHE_DOWNLOAD_URL ARG SCCACHE_ENDPOINT ARG SCCACHE_BUCKET_NAME=vllm-build-sccache ARG SCCACHE_REGION_NAME=us-west-2 ARG SCCACHE_S3_NO_CREDENTIALS=0 FROM ${BASE_IMAGE} AS base ARG ARG_PYTORCH_ROCM_ARCH ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}} # Install build dependencies and utilities RUN apt-get update -q -y && apt-get install -q -y \ sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \ apt-transport-https ca-certificates wget curl \ libnuma-dev ccache mold RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install --upgrade pip # Note: mold is installed but not set as the system default linker because # some packages use JIT compilation at runtime with flags mold does not support. # Build stages opt in via LDFLAGS="-fuse-ld=mold". # Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base) ARG USE_SCCACHE RUN if [ "$USE_SCCACHE" != "1" ]; then \ apt-get purge -y sccache || true; \ python3 -m pip uninstall -y sccache || true; \ rm -f "$(which sccache)" || true; \ fi # Install UV — download first, then run, so a curl failure is not masked by the pipe RUN curl -LsSf --retry 3 --retry-delay 5 https://astral.sh/uv/install.sh -o /tmp/uv-install.sh \ && env UV_INSTALL_DIR="/usr/local/bin" sh /tmp/uv-install.sh \ && rm -f /tmp/uv-install.sh \ && uv --version # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 ENV UV_INDEX_STRATEGY="unsafe-best-match" # Use copy mode to avoid hardlink failures with Docker cache mounts ENV UV_LINK_MODE=copy # ccache directory - persisted across layer rebuilds via cache mounts. ENV CCACHE_DIR=/root/.cache/ccache ENV CCACHE_COMPILERCHECK=content # Empty by default so build steps fall back to $(nproc); CI can override. ARG max_jobs ENV MAX_JOBS=${max_jobs} # Install sccache if USE_SCCACHE is enabled (for release builds) ARG USE_SCCACHE ARG SCCACHE_DOWNLOAD_URL ARG SCCACHE_ENDPOINT ARG SCCACHE_BUCKET_NAME ARG SCCACHE_REGION_NAME ARG SCCACHE_S3_NO_CREDENTIALS RUN if [ "$USE_SCCACHE" = "1" ]; then \ if command -v sccache >/dev/null 2>&1; then \ echo "sccache already installed, skipping installation"; \ sccache --version; \ else \ echo "Installing sccache..." \ && SCCACHE_ARCH="x86_64" \ && SCCACHE_VERSION="v0.8.1" \ && SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \ && curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \ && tar -xzf /tmp/sccache.tar.gz -C /tmp \ && mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \ && chmod +x /usr/bin/sccache \ && rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \ && sccache --version; \ fi; \ fi # Set sccache environment variables only when USE_SCCACHE=1 # This prevents S3 config from leaking into images when sccache is not used ARG USE_SCCACHE ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}} ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}} ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}} ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0} ARG COMMON_WORKDIR WORKDIR ${COMMON_WORKDIR} # ----------------------- # vLLM fetch stages FROM base AS fetch_vllm_0 ONBUILD COPY ./ vllm/ FROM base AS fetch_vllm_1 ARG VLLM_REPO="https://github.com/vllm-project/vllm.git" ARG VLLM_BRANCH="main" ENV VLLM_REPO=${VLLM_REPO} ENV VLLM_BRANCH=${VLLM_BRANCH} ONBUILD RUN git clone ${VLLM_REPO} \ && cd vllm \ && git fetch -v --prune -- origin ${VLLM_BRANCH} \ && git checkout FETCH_HEAD \ && if [ ${VLLM_REPO} != "https://github.com/vllm-project/vllm.git" ] ; then \ git remote add upstream "https://github.com/vllm-project/vllm.git" \ && git fetch upstream ; fi FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm # ----------------------- # Rust build stage # Builds the `vllm-rs` frontend in a dedicated stage so the wheel build stages # don't need the rust toolchain or protoc. FROM fetch_vllm AS rust-build ARG COMMON_WORKDIR # protoc is used by tonic-build/prost-build. RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends \ ca-certificates curl unzip \ && rm -rf /var/lib/apt/lists/* COPY tools/install_protoc.sh /tmp/install_protoc.sh RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh # Install rustup; the toolchain itself is pinned by rust-toolchain.toml. RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \ sh -s -- -y --profile minimal --default-toolchain none ENV PATH="/root/.cargo/bin:${PATH}" # Cap cargo parallelism to avoid exhausting the AMD CI host's open-file limit # (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise). ENV CARGO_BUILD_JOBS=4 ENV CARGO_NET_RETRY=10 ENV RUSTUP_MAX_RETRIES=10 # Build the release binary. Cargo's registry/git caches can be written by # concurrent BuildKit jobs on shared workers, so lock those cache mounts while # keeping the cache benefit. Copy the binary out so it persists into the image # layer for later COPY --from=rust-build. RUN --mount=type=cache,id=vllm-rocm-cargo-registry,target=/root/.cargo/registry,sharing=locked \ --mount=type=cache,id=vllm-rocm-cargo-git,target=/root/.cargo/git,sharing=locked \ --mount=type=cache,id=vllm-rocm-cargo-target,target=${COMMON_WORKDIR}/vllm/rust/target,sharing=locked \ cd ${COMMON_WORKDIR}/vllm \ && VLLM_RS_TARGET_PATH=/tmp/vllm-rs bash build_rust.sh \ && test -x /tmp/vllm-rs # ----------------------- # vLLM native build stages # # csrc-build intentionally copies only files that affect ROCm native extension # compilation. That keeps unrelated CI/test/docs edits from invalidating the # expensive HIP/C++ build layer. FROM base AS csrc-build ARG COMMON_WORKDIR WORKDIR ${COMMON_WORKDIR}/vllm COPY requirements/rocm.txt requirements/rocm.txt COPY requirements/common.txt requirements/common.txt RUN --mount=type=cache,id=vllm-rocm-uv,target=/root/.cache/uv \ uv pip install --system -r requirements/rocm.txt # pyproject.toml is bind-mounted in the RUN step so metadata-only changes do # not invalidate the expensive native build layer. COPY setup.py CMakeLists.txt ./ COPY cmake cmake/ COPY csrc csrc/ COPY vllm/envs.py vllm/envs.py COPY vllm/__init__.py vllm/__init__.py ENV VLLM_TARGET_DEVICE=rocm ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+rocm.csrc.build" RUN --mount=type=bind,source=pyproject.toml,target=${COMMON_WORKDIR}/vllm/pyproject.toml \ --mount=type=cache,id=vllm-rocm-ccache,target=/root/.cache/ccache \ export CCACHE_BASEDIR="$PWD" \ && echo "=== ccache stats before ROCm native build ===" \ && (ccache --show-stats || true) \ && (ccache --zero-stats || true) \ && EFFECTIVE_MAX_JOBS="${MAX_JOBS:-$(nproc)}" \ && echo "Building ROCm native extension wheel with MAX_JOBS=${EFFECTIVE_MAX_JOBS}" \ && LDFLAGS="-fuse-ld=mold" MAX_JOBS="${EFFECTIVE_MAX_JOBS}" python3 setup.py bdist_wheel --dist-dir=dist \ && test -d dist \ && ls dist/*.whl >/dev/null \ && echo "=== ccache stats after ROCm native build ===" \ && (ccache --show-stats || true) # Build the full vLLM ROCm wheel by reusing the native extension wheel from # csrc-build. This stage still rebuilds for Python/package changes, but skips # the expensive HIP/C++ compile when native inputs are unchanged. FROM fetch_vllm AS build_vllm ARG COMMON_WORKDIR ENV VLLM_TARGET_DEVICE=rocm COPY --from=csrc-build ${COMMON_WORKDIR}/vllm/dist /precompiled-wheels # Drop the pre-built rust frontend binary into the source tree. setup.py # detects it and ships it as-is, skipping the local cargo build. COPY --from=rust-build /tmp/vllm-rs ${COMMON_WORKDIR}/vllm/vllm/vllm-rs RUN --mount=type=cache,id=vllm-rocm-uv,target=/root/.cache/uv \ cd vllm \ && uv pip install --system -r requirements/rocm.txt \ && export VLLM_USE_PRECOMPILED=1 \ && export VLLM_PRECOMPILED_WHEEL_LOCATION="$(ls /precompiled-wheels/*.whl)" \ && export VLLM_DOCKER_BUILD_CONTEXT=1 \ && echo "Packaging vLLM ROCm wheel using precompiled extensions from ${VLLM_PRECOMPILED_WHEEL_LOCATION}" \ && python3 setup.py bdist_wheel --dist-dir=dist \ && test -d dist \ && ls dist/*.whl >/dev/null FROM scratch AS export_vllm ARG COMMON_WORKDIR COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/dist/*.whl / COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/requirements /requirements COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/pyproject.toml /pyproject.toml COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1 # RIXL/UCX build stages FROM base AS build_rixl ARG RIXL_BRANCH="39be1de8" ARG RIXL_REPO="https://github.com/ROCm/RIXL.git" ARG UCX_BRANCH="bfb51733" ARG UCX_REPO="https://github.com/openucx/ucx.git" ENV ROCM_PATH=/opt/rocm ENV UCX_HOME=/usr/local/ucx ENV RIXL_HOME=/usr/local/rixl ENV RIXL_BENCH_HOME=/usr/local/rixl_bench # RIXL build system dependences and RDMA support RUN apt-get -y update && apt-get -y install autoconf libtool pkg-config \ libgrpc-dev \ libgrpc++-dev \ libprotobuf-dev \ protobuf-compiler-grpc \ libcpprest-dev \ libaio-dev \ librdmacm1 \ librdmacm-dev \ libibverbs1 \ libibverbs-dev \ ibverbs-utils \ rdmacm-utils \ ibverbs-providers \ && rm -rf /var/lib/apt/lists/* RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system meson auditwheel patchelf tomlkit RUN --mount=type=cache,target=/root/.cache/ccache \ cd /usr/local/src && \ git clone ${UCX_REPO} && \ cd ucx && \ git checkout ${UCX_BRANCH} && \ ./autogen.sh && \ mkdir build && cd build && \ CC="ccache gcc" CXX="ccache g++" \ ../configure \ --prefix=/usr/local/ucx \ --enable-shared \ --disable-static \ --disable-doxygen-doc \ --enable-optimizations \ --enable-devel-headers \ --with-rocm=${ROCM_PATH} \ --with-verbs \ --with-dm \ --enable-mt && \ make -j$(nproc) && \ make install ENV PATH=/usr/local/ucx/bin:$PATH ENV LD_LIBRARY_PATH=${UCX_HOME}/lib:${LD_LIBRARY_PATH} RUN --mount=type=cache,target=/root/.cache/ccache \ git clone ${RIXL_REPO} /opt/rixl && \ cd /opt/rixl && \ git checkout ${RIXL_BRANCH} && \ CC="ccache gcc" CXX="ccache g++" \ meson setup build --prefix=${RIXL_HOME} \ -Ducx_path=${UCX_HOME} \ -Drocm_path=${ROCM_PATH} && \ cd build && \ ninja -j$(nproc) && \ ninja install # Generate RIXL wheel # Exclude libcore and libpull from auditwheel: transitive dependencies # that are not shipped in the wheel and vary across base images. RUN cd /opt/rixl && \ sed -i "s/--exclude 'libamdhip64\*'/--exclude 'libamdhip64*' --exclude 'libcore*' --exclude 'libpull*'/" \ contrib/build-wheel.sh && \ mkdir -p /app/install && \ _ucx_install_dir=${UCX_HOME} \ ./contrib/build-wheel.sh \ --output-dir /app/install \ --rocm-dir ${ROCM_PATH} \ --ucx-plugins-dir ${UCX_HOME}/lib/ucx \ --nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins # ROCShmem build stage - split from DeepEP so changing DEEPEP_BRANCH does not # invalidate the slow ROCShmem build. FROM base AS build_rocshmem ARG ROCSHMEM_BRANCH="f0acb0c6" ARG ROCSHMEM_REPO="https://github.com/ROCm/rocm-systems.git" # DeepEP only supports gfx942 and gfx950; build ROCShmem for the same set so # it can be linked against DeepEP without arch mismatches. ARG DEEPEP_ROCM_ARCH="gfx942;gfx950" ENV ROCM_PATH=/opt/rocm ENV ROCSHMEM_DIR=/opt/rocshmem RUN --mount=type=cache,target=/root/.cache/ccache \ git clone --no-checkout --filter=blob:none ${ROCSHMEM_REPO} \ && cd rocm-systems \ && git sparse-checkout set --cone projects/rocshmem \ && git checkout ${ROCSHMEM_BRANCH} \ && mkdir -p projects/rocshmem/build \ && cd projects/rocshmem/build \ && CC="ccache gcc" CXX="ccache g++" INSTALL_PREFIX=${ROCSHMEM_DIR} \ bash ../scripts/build_configs/all_backends \ -DROCM_PATH=${ROCM_PATH} \ -DGPU_TARGETS="${DEEPEP_ROCM_ARCH}" \ -DUSE_EXTERNAL_MPI=OFF # DeepEP build stage - depends on ROCShmem, builds the HIP kernel wheel. FROM build_rocshmem AS build_deepep ARG DEEPEP_BRANCH="a9ea9774" ARG DEEPEP_REPO="https://github.com/ROCm/DeepEP.git" ARG DEEPEP_NIC="cx7" # Build DeepEP wheel. DeepEP looks for rocshmem at ROCSHMEM_DIR. # DeepEP only supports gfx942 and gfx950, so avoid gfx90a in the default list. RUN --mount=type=cache,target=/root/.cache/ccache \ export PYTORCH_ROCM_ARCH="gfx942;gfx950" \ && git clone ${DEEPEP_REPO} \ && cd DeepEP \ && git checkout ${DEEPEP_BRANCH} \ && LDFLAGS="-fuse-ld=mold" MAX_JOBS="${MAX_JOBS:-$(nproc)}" python3 setup.py --variant rocm --rocm-explicit-ctx --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install # MoRI runtime dependencies live in Dockerfile.rocm so NIC backend changes do # not force users to rebuild the long-lived Dockerfile.rocm_base image. FROM base AS mori_base ARG NIC_BACKEND ARG AINIC_VERSION ARG UBUNTU_CODENAME RUN /bin/bash -lc 'set -euo pipefail; \ \ install_ainic() { \ apt-get update && apt-get install -y --no-install-recommends ca-certificates curl gnupg apt-transport-https; \ rm -rf /var/lib/apt/lists/*; \ mkdir -p /etc/apt/keyrings; \ curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/amdainic.gpg; \ echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/amdainic.gpg] https://repo.radeon.com/amdainic/pensando/ubuntu/${AINIC_VERSION} ${UBUNTU_CODENAME} main" \ > /etc/apt/sources.list.d/amdainic.list; \ apt-get update && apt-get install -y --no-install-recommends \ libionic-dev \ ionic-common \ ; \ rm -rf /var/lib/apt/lists/*; \ }; \ \ # NOTE: requires FW 235.2.86.0 and kernel drivers on the host: \ # bnxt-en-dkms=1.10.3.235.2.86.0 bnxt-re-dkms=235.2.86.0 (from packages.broadcom.com PPA) \ install_bnxt() { \ install -m 0755 -d /etc/apt/keyrings; \ curl -fsSL https://packages.broadcom.com/artifactory/api/security/keypair/PackagesKey/public \ -o /etc/apt/keyrings/broadcom-nic.asc; \ chmod a+r /etc/apt/keyrings/broadcom-nic.asc; \ echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/broadcom-nic.asc] https://packages.broadcom.com/artifactory/ethernet-nic-debian-public jammy main" \ > /etc/apt/sources.list.d/broadcom-nic.list; \ apt-get update && apt-get install -y --no-install-recommends \ bnxt-rocelib=235.2.86.0 \ ; \ cp -a /usr/local/lib/x86_64-linux-gnu/libbnxt_re* /usr/local/lib/; \ ldconfig; \ rm -rf /var/lib/apt/lists/*; \ }; \ \ echo "[MORI] Install MoRI proxy deps"; \ pip install --quiet --ignore-installed blinker && \ pip install --quiet quart msgpack aiohttp pyzmq; \ echo "[MORI] NIC_BACKEND=${NIC_BACKEND}"; \ \ # NIC backend deps — mori auto-detects NIC at runtime (MORI_DEVICE_NIC env var override). \ # Only vendor packages are installed here for dlopen; no compile-time flags needed. \ case "${NIC_BACKEND}" in \ none) ;; \ all) install_ainic; install_bnxt ;; \ ainic) install_ainic ;; \ bnxt) install_bnxt ;; \ *) echo "ERROR: unknown NIC_BACKEND=${NIC_BACKEND}. Use one of: none, ainic, bnxt, all"; exit 2 ;; \ esac' # ----------------------- # vLLM wheel release build stage (for building distributable wheels) # This stage pins dependencies to custom ROCm wheel versions and handles version detection FROM fetch_vllm AS build_vllm_wheel_release ARG COMMON_WORKDIR # Drop the pre-built rust frontend binary into the source tree. setup.py # detects it and ships it as-is, skipping the local cargo build. COPY --from=rust-build /tmp/vllm-rs ${COMMON_WORKDIR}/vllm/vllm/vllm-rs # Create /install directory for custom wheels RUN mkdir -p /install # Copy custom ROCm wheels from docker/context if they exist # COPY ensures Docker cache is invalidated when wheels change # .keep file ensures directory always exists for COPY to work COPY docker/context/base-wheels/ /tmp/base-wheels/ # This is how we know if we are building for a wheel release or not. # If there are not wheels found there, we are not building for a wheel release. # So we exit with an error. To skip this stage. RUN if [ -n "$(ls /tmp/base-wheels/*.whl 2>/dev/null)" ]; then \ echo "Found custom wheels - copying to /install"; \ cp /tmp/base-wheels/*.whl /install/ && \ echo "Copied custom wheels:"; \ ls -lh /install/; \ else \ echo "ERROR: No custom wheels found in docker/context/base-wheels/"; \ echo "Wheel releases require pre-built ROCm wheels."; \ exit 1; \ fi # GIT_REPO_CHECK: Verify repo is clean and tags are available (for release builds) # This matches CUDA's Dockerfile behavior for proper version detection via setuptools_scm ARG GIT_REPO_CHECK=0 RUN if [ "$GIT_REPO_CHECK" != "0" ]; then \ echo "Running repository checks..."; \ cd vllm && bash tools/check_repo.sh; \ fi # Extract version from git BEFORE any modifications (pin_rocm_dependencies.py modifies requirements/rocm.txt) # This ensures setuptools_scm sees clean repo state for version detection RUN --mount=type=bind,source=.git,target=vllm/.git \ --mount=type=cache,target=/root/.cache/uv \ cd vllm \ && uv pip install --system setuptools_scm regex \ && VLLM_VERSION=$(python3 -c "import setuptools_scm; print(setuptools_scm.get_version())") \ && echo "Detected vLLM version: ${VLLM_VERSION}" \ && echo "${VLLM_VERSION}" > /tmp/vllm_version.txt # Fail if git-based package dependencies are found in requirements files # (uv doesn't handle git+ URLs well, and packages should be distributed on PyPI) # Extra notes: pip install is able to handle git+ URLs, but uv doesn't. RUN echo "Checking for git-based packages in requirements files..." \ && echo "Checking common.txt for git-based packages:" \ && if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; then \ echo "ERROR: Git-based packages found in common.txt:"; \ grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; \ echo "Please publish these packages to PyPI instead of using git dependencies."; \ exit 1; \ else \ echo " ✓ No git-based packages found in common.txt"; \ fi \ && echo "Checking rocm.txt for git-based packages:" \ && if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; then \ echo "ERROR: Git-based packages found in rocm.txt:"; \ grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; \ echo "Please publish these packages to PyPI instead of using git dependencies."; \ exit 1; \ else \ echo " ✓ No git-based packages found in rocm.txt"; \ fi \ && echo "All requirements files are clean - no git-based packages found" # Pin vLLM dependencies to exact versions of custom ROCm wheels # This ensures 'pip install vllm' automatically installs correct torch/triton/torchvision/amdsmi COPY tools/vllm-rocm/pin_rocm_dependencies.py /tmp/pin_rocm_dependencies.py RUN echo "Pinning vLLM dependencies to custom wheel versions..." \ && python3 /tmp/pin_rocm_dependencies.py /install ${COMMON_WORKDIR}/vllm/requirements/rocm.txt # Install dependencies using custom wheels from /install RUN --mount=type=cache,target=/root/.cache/uv \ cd vllm \ && echo "Building vLLM with custom wheels from /install" \ && uv pip install --system --find-links /install -r requirements/rocm.txt # Build wheel using pre-extracted version to avoid dirty state from modified requirements/rocm.txt # (setup.py auto-detects ccache/sccache in PATH) RUN --mount=type=bind,source=.git,target=vllm/.git \ --mount=type=cache,id=vllm-rocm-ccache,target=/root/.cache/ccache \ cd vllm \ && export CCACHE_BASEDIR="$PWD" \ && export SETUPTOOLS_SCM_PRETEND_VERSION=$(cat /tmp/vllm_version.txt) \ && echo "Building wheel with version: ${SETUPTOOLS_SCM_PRETEND_VERSION}" \ && MAX_JOBS="${MAX_JOBS:-$(nproc)}" python3 setup.py bdist_wheel --dist-dir=dist FROM scratch AS export_vllm_wheel_release ARG COMMON_WORKDIR COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/dist/*.whl / COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/requirements /requirements COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/tests /tests COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/examples /examples COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/ COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/pyproject.toml /pyproject.toml COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1 # ----------------------- # CI base image (Tier 1) - stable, rarely changing CI dependencies. # Per-PR test builds pull this as CI_BASE_IMAGE so the test stage only layers # in the vLLM artifacts for the current commit. FROM mori_base AS ci_base ARG COMMON_WORKDIR # Update rdma-core to support latest rocshmem. ARG DEEPEP_NIC RUN if [ "${DEEPEP_NIC}" = "cx7" ] || [ "${DEEPEP_NIC}" = "io" ]; then \ git clone --branch v62.0 --depth 1 https://github.com/linux-rdma/rdma-core.git /tmp/rdma-core && \ cd /tmp/rdma-core && \ mkdir -p build && cd build && \ cmake -GNinja -DCMAKE_INSTALL_PREFIX=/usr -DNO_MAN_PAGES=1 .. && \ ninja && ninja install && ldconfig && rm -rf /tmp/rdma-core; \ fi # Install RIXL + DeepEP wheels. RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \ --mount=type=bind,from=build_deepep,src=/app/deep_install,target=/deep_install \ uv pip install --system /rixl_install/*.whl /deep_install/*.whl # Copy ROCShmem runtime libraries. COPY --from=build_rocshmem /opt/rocshmem /opt/rocshmem # RDMA userspace libraries plus FFmpeg dev libs needed by torchcodec. RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends \ librdmacm1 \ libibverbs1 \ ibverbs-providers \ ibverbs-utils \ pkg-config ffmpeg libavcodec-dev libavformat-dev libavutil-dev \ libswscale-dev libavdevice-dev libavfilter-dev libswresample-dev \ && rm -rf /var/lib/apt/lists/* # Install torchcodec from source for ROCm/torch ABI compatibility. COPY tools/install_torchcodec_rocm.sh /tmp/install_torchcodec.sh RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/torchcodec-wheels \ bash /tmp/install_torchcodec.sh \ && rm /tmp/install_torchcodec.sh \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Pre-install shared ROCm runtime dependencies. COPY requirements/common.txt requirements/rocm.txt /tmp/ci-base-requirements/ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r /tmp/ci-base-requirements/rocm.txt \ && rm -rf /tmp/ci-base-requirements # Enable fast and less brittle model downloads in tests. ENV HF_XET_HIGH_PERFORMANCE=1 ENV HF_HUB_DOWNLOAD_TIMEOUT=60 # Pre-install vLLM test dependencies. COPY requirements/test/rocm.txt /tmp/rocm-test-reqs.txt RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r /tmp/rocm-test-reqs.txt # Rebuild fastsafetensors from source so its C++ extension is compiled with # USE_ROCM and can detect libamdhip64.so at runtime. RUN --mount=type=cache,target=/root/.cache/pip \ FASTSAFETENSORS_REQ="$(grep -E '^fastsafetensors(==| @ )' /tmp/rocm-test-reqs.txt | head -1)" \ && test -n "${FASTSAFETENSORS_REQ}" \ && python3 -m pip install --force-reinstall --no-deps \ --no-binary fastsafetensors "${FASTSAFETENSORS_REQ}" \ && rm /tmp/rocm-test-reqs.txt # Set MIOPEN ENVS to resolve performance regressions in MIOpen 3D convolution kernel. # See: https://github.com/pytorch/pytorch/issues/169857 ENV MIOPEN_DEBUG_CONV_DIRECT=0 ENV MIOPEN_DEBUG_CONV_GEMM=0 # Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc. # See: https://github.com/ROCm/rocm-libraries/issues/6266 ENV HSA_ENABLE_IPC_MODE_LEGACY=1 # ROCm profiler limits workaround. RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf" # Install vllm_test_utils in ci_base for ci_base + wheel parity. COPY tests/vllm_test_utils /tmp/vllm_test_utils RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system /tmp/vllm_test_utils \ && rm -rf /tmp/vllm_test_utils # ----------------------- # Test vLLM image (Tier 2) - vLLM-only layer on top of ci_base. FROM ${CI_BASE_IMAGE} AS test ARG COMMON_WORKDIR # Install the vLLM wheel (--no-deps: all deps already in ci_base). RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ --mount=type=cache,target=/root/.cache/uv \ cd /install \ && uv pip install --system --no-deps *.whl # Store the vLLM wheel in the image for python-only install tests. COPY --from=export_vllm /*.whl /opt/vllm-wheels/ WORKDIR /vllm-workspace COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace # Copy in the v1 package (for python-only install test group). COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 # Hide source under src/ so it won't shadow the installed package in tests. RUN mkdir src && mv vllm src/vllm # ----------------------- # Final vLLM image FROM mori_base AS final RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/* # Clean up sccache from release image (not needed at runtime) # This removes the binary and wrappers that may have been installed during build RUN rm -f /usr/bin/sccache || true \ && rm -rf /opt/sccache-wrappers || true # Unset sccache environment variables for the release image # This prevents S3 bucket config from leaking into production images ENV SCCACHE_BUCKET= ENV SCCACHE_REGION= ENV SCCACHE_ENDPOINT= ENV SCCACHE_S3_NO_CREDENTIALS= ENV SCCACHE_IDLE_TIMEOUT= # Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt. # Manually remove it so that later steps of numpy upgrade can continue RUN case "$(which python3)" in \ *"/opt/conda/envs/py_3.9"*) \ rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/;; \ *) ;; esac RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system --upgrade huggingface-hub[cli] # Install vLLM using uv (inherited from base stage) # Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ --mount=type=cache,target=/root/.cache/uv \ cd /install \ && uv pip install --system -r requirements/rocm.txt \ && pip uninstall -y vllm \ && uv pip install --system *.whl # Install RIXL wheel RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \ uv pip install --system /rixl_install/*.whl ARG COMMON_WORKDIR ARG BASE_IMAGE ARG NIC_BACKEND ARG AINIC_VERSION # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker # Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc # See: https://github.com/ROCm/rocm-libraries/issues/6266 ENV HSA_ENABLE_IPC_MODE_LEGACY=1 ENV TOKENIZERS_PARALLELISM=false # ENV that can improve safe tensor loading, and end-to-end time ENV SAFETENSORS_FAST_GPU=1 # Performance environment variable. ENV HIP_FORCE_DEV_KERNARG=1 # Workaround for ROCm profiler limits RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf" RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt \ && echo "MORI_NIC_BACKEND=${NIC_BACKEND}" >> ${COMMON_WORKDIR}/versions.txt \ && echo "AINIC_VERSION=${AINIC_VERSION}" >> ${COMMON_WORKDIR}/versions.txt CMD ["/bin/bash"] #Set entrypoint for vllm-openai official images FROM final AS vllm-openai ENTRYPOINT ["vllm", "serve"]