[Docker] Non-root support for vllm-openai; add opt-in vllm-openai-nonroot target (#40275)

Signed-off-by: TheDuyIT <nduy250299@gmail.com> Signed-off-by: dtnguyen <dtnguyen@nvidia.com> Co-authored-by: Claude <noreply@anthropic.com>
2026-06-06 00:16:14 +00:00 · 2026-05-25 12:45:31 +07:00
parent 1b26fa361e
commit 3df1c7c43e
7 changed files with 577 additions and 30 deletions
@@ -6,6 +6,48 @@ steps:
    timeout_in_minutes: 600
    commands:
    - if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG $IMAGE_TAG_LATEST; else .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG; fi
    # Non-root smoke 1: the default (root) image must still be importable
    # under a non-root UID via `--user 2000:0`. Validates the `vllm` passwd
    # entry + group-0-writable /home/vllm + uv path cleanup from #31959.
    # Uses `import vllm` rather than `vllm serve --help` because the latter
    # instantiates `VllmConfig` which requires a GPU attached to the
    # container.
    - docker run --rm --user 2000:0 --entrypoint python3 "$IMAGE_TAG" -c "import vllm; print(vllm.__version__)"
    # Non-root smoke 2: assert the non-root enabling invariants are baked
    # into the image. Runs as UID 2000:0 via a shell so we can verify
    # filesystem perms + passwd/group file state + wrapper presence without
    # triggering vLLM's GPU-requiring config-init path. The opt-in
    # `vllm-openai-nonroot` target adds only `USER vllm`, `WORKDIR
    # /home/vllm`, and an `ENTRYPOINT` override on top of these invariants;
    # its build correctness is reviewed at the Dockerfile level. Wrapper
    # logic is covered separately by the pre-commit hook
    # `test-nonroot-entrypoint` (see .pre-commit-config.yaml).
    - |
      docker run --rm --user 2000:0 --entrypoint /bin/sh "$IMAGE_TAG" -ec '
        if ! getent passwd 2000 | grep -q ^vllm:; then
          echo FAIL: UID 2000 != vllm
          exit 1
        fi
        if ! id -gn 2>/dev/null | grep -qx root; then
          echo FAIL: GID 0 not root group
          exit 1
        fi
        touch /home/vllm/.smoke && rm /home/vllm/.smoke
        touch /opt/uv/cache/.smoke && rm /opt/uv/cache/.smoke
        if ! test -x /usr/local/bin/vllm-nonroot-entrypoint.sh; then
          echo FAIL: wrapper missing
          exit 1
        fi
        if ! test -w /etc/passwd; then
          echo FAIL: /etc/passwd not group-writable
          exit 1
        fi
        if ! test -w /etc/group; then
          echo FAIL: /etc/group not group-writable
          exit 1
        fi
        echo non-root invariants OK
      '
    retry:
      automatic:
        - exit_status: -1  # Agent was lost
@@ -222,6 +222,12 @@ repos:
    name: Update Dockerfile dependency graph
    entry: tools/pre_commit/update-dockerfile-graph.sh
    language: script
  - id: test-nonroot-entrypoint
    name: Test non-root entrypoint wrapper
    entry: bash docker/entrypoints/test_vllm_nonroot_entrypoint.sh
    language: system
    pass_filenames: false
    files: ^docker/entrypoints/(vllm-nonroot-entrypoint|test_vllm_nonroot_entrypoint)\.sh$
  - id: check-forbidden-imports
    name: Check for forbidden imports
    entry: python tools/pre_commit/check_forbidden_imports.py
@@ -105,6 +105,23 @@ ARG BUILD_OS
 ENV DEBIAN_FRONTEND=noninteractive
 # Environment for uv
 # Declared BEFORE the installer + `uv venv` invocations below so the uv
 # binary, managed Python, download cache, and /opt/venv all land under
 # /opt/uv instead of /root/.local/. Without this, the venv created at
 # build time hardlinks back to /root/.local/share/uv/python and
 # descendants of this stage (`build`, `dev`, `csrc-build`,
 # `extensions-build`) inherit a root-owned, non-root-unreadable layout.
 # See #15174, #15359, #31959. Child stages inherit these via Dockerfile
 # `ENV` unless they override them explicitly.
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
 ENV UV_CACHE_DIR=/opt/uv/cache
 ENV UV_INSTALL_DIR=/opt/uv/bin
 ENV PATH="/opt/venv/bin:/opt/uv/bin:$PATH"
 ENV VIRTUAL_ENV="/opt/venv"
 # Install system dependencies including build tools.
 # The Ubuntu path uses apt + deadsnakes-via-uv for Python; the manylinux path
 # (AlmaLinux 8, e.g. pytorch/manylinux2_28-builder) uses dnf and the Python
@@ -145,15 +162,21 @@ RUN if [ "${BUILD_OS}" = "manylinux" ]; then \
 # Install uv and bootstrap /opt/venv. Both paths converge on /opt/venv so all
 # downstream stages stay distro-agnostic.
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" "${UV_INSTALL_DIR}" \
    && chmod -R a+rX /opt/uv \
    && curl -LsSf https://astral.sh/uv/install.sh | sh \
    # `--seed` installs pip/setuptools/wheel into the venv so `python3 -m
    # pip` works regardless of how uv happens to link the venv back to the
    # managed Python install (which, at a non-default UV_PYTHON_INSTALL_DIR,
    # doesn't always expose ensurepip via the default venv layout).
    && if [ "${BUILD_OS}" = "manylinux" ]; then \
           # manylinux images ship Python at /opt/python/cpXY-cpXY/; point uv
           # at the matching interpreter rather than letting it fetch one.
           PYV_NODOT=$(echo ${PYTHON_VERSION} | tr -d '.') \
           && MANYLINUX_PY=/opt/python/cp${PYV_NODOT}-cp${PYV_NODOT}/bin/python${PYTHON_VERSION} \
-           && $HOME/.local/bin/uv venv /opt/venv --python "$MANYLINUX_PY"; \
+           && uv venv --seed /opt/venv --python "$MANYLINUX_PY"; \
       else \
-           $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION}; \
+           uv venv --seed /opt/venv --python ${PYTHON_VERSION}; \
       fi \
    && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
    && ln -sf /opt/venv/bin/python3 /usr/bin/python3 \
@@ -161,13 +184,10 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
    && ln -sf /opt/venv/bin/pip /usr/bin/pip \
    && python3 --version && python3 -m pip --version
-# Activate virtual environment and add uv to PATH
+# UV_LINK_MODE=copy applies to subsequent `uv pip install` RUNs (avoids
-ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH"
+# hardlink failures with BuildKit cache mounts); it must not be set during
-ENV VIRTUAL_ENV="/opt/venv"
+# `uv venv` above, which relies on hardlinking /opt/venv back to the
-
+# managed Python source so ensurepip / `python3 -m pip` still resolve.
 # Environment for uv
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
 # Verify GCC version
@@ -198,7 +218,7 @@ COPY requirements/common.txt requirements/common.txt
 COPY requirements/cuda.txt requirements/cuda.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY pyproject.toml pyproject.toml
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
        sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt; \
    fi \
@@ -218,7 +238,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Track PyTorch lib versions used during build and match in downstream instances.
 # We do this for both nightly and release so we can strip dependencies/*.txt as needed.
 # Otherwise library dependencies can upgrade/downgrade torch incorrectly.
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \
    && TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \
    && echo "Installed torch libs: ${TORCH_LIB_VERSIONS}"
@@ -304,7 +324,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
        echo "Installing build requirements without torch..." \
        && python3 use_existing_torch.py --prefix \
@@ -349,7 +369,7 @@ ARG VLLM_MAIN_CUDA_VERSION=""
 ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"
 # Use existing torch for nightly builds
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
        python3 use_existing_torch.py --prefix; \
    fi
@@ -365,7 +385,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Build the vLLM wheel
 # if USE_SCCACHE is set, use sccache to speed up compilation
 # AWS credentials mounted at ~/.aws/credentials for sccache S3 auth (optional)
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    --mount=type=secret,id=aws-credentials,target=/root/.aws/credentials,required=false \
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
@@ -399,7 +419,7 @@ ARG vllm_target_device="cuda"
 ENV VLLM_TARGET_DEVICE=${vllm_target_device}
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
    if [ "$USE_SCCACHE" != "1" ]; then \
        # Clean any existing CMake artifacts
        rm -rf .deps && \
@@ -431,7 +451,7 @@ COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.
 # Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
 ARG DEEPEP_COMMIT_HASH=73b6ea4
 ARG NVSHMEM_VER
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    mkdir -p /tmp/ep_kernels_workspace/dist && \
    export TORCH_CUDA_ARCH_LIST='9.0a 10.0a' && \
    /tmp/install_python_libraries.sh \
@@ -465,7 +485,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
        echo "Installing build requirements without torch..." \
        && python3 use_existing_torch.py --prefix \
@@ -500,13 +520,13 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
 ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1
 # Use existing torch for nightly builds
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
        python3 use_existing_torch.py --prefix; \
    fi
 # Build the vLLM wheel
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    --mount=type=bind,source=.git,target=.git \
    if [ "${vllm_target_device}" = "cuda" ]; then \
        export VLLM_USE_PRECOMPILED=1; \
@@ -564,7 +584,7 @@ COPY requirements/test/cuda.txt requirements/test/cuda.txt
 COPY requirements/dev.txt requirements/dev.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
        echo "Installing dev requirements plus torch nightly..." \
        && python3 use_existing_torch.py --prefix \
@@ -664,9 +684,50 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
 RUN python3 -m pip install uv
 # Environment for uv
 # Redirect uv's managed Python and download cache out of /root/ so downstream
 # images (`FROM vllm/vllm-openai` + `USER <uid>`) and direct non-root runs
 # (`docker run --user <uid>:<gid>`) can read and execute them. See #15174,
 # #15359, #31959.
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
 ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
 ENV UV_CACHE_DIR=/opt/uv/cache
 RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" \
    && chgrp -R 0 /opt/uv \
    && chmod -R g+rwX,a+rX /opt/uv
 # ----------------------------------------------------------------------
 # Non-root support (opt-in)
 # ----------------------------------------------------------------------
 # Create a conventional `vllm` user (UID 2000, GID 0) so the image can be
 # run under `--user 2000:0` or the opt-in `vllm-openai-nonroot` target.
 #
 # Design notes:
 #   * GID 0 + group-writable cache dirs follow the OpenShift arbitrary-UID
 #     pattern, so any UID that is a member of group 0 at runtime can write
 #     to /home/vllm and /opt/uv without additional chown work.
 #   * The default `vllm-openai` image keeps `USER root`, so every existing
 #     `docker run vllm/vllm-openai ...` / K8s manifest / `FROM vllm/vllm-openai`
 #     + `RUN uv pip install --system ...` flow is unchanged.
 #   * The entrypoint wrapper below is only used by `vllm-openai-nonroot`; it
 #     handles the OpenShift arbitrary-UID case (UID not in /etc/passwd).
 # See #31959 and docs/deployment/docker.md.
 RUN useradd --uid 2000 --gid 0 --create-home --home-dir /home/vllm \
        --shell /bin/bash vllm \
    && mkdir -p /home/vllm/.cache /home/vllm/.config \
    && chown -R 2000:0 /home/vllm \
    && chmod -R g+rwX /home/vllm \
    # Allow the entrypoint wrapper to append a /etc/passwd entry for an
    # arbitrary runtime UID that shares GID 0. Without this, `whoami`, bash's
    # `\u` prompt, `id -un`, and anything else that calls `getpwuid()`
    # directly return "I have no name!" for OpenShift-style arbitrary UIDs.
    # This matches the convention used by Red Hat UBI base images.
    && chgrp 0 /etc/passwd /etc/group \
    && chmod g=u /etc/passwd /etc/group
 COPY docker/entrypoints/vllm-nonroot-entrypoint.sh \
    /usr/local/bin/vllm-nonroot-entrypoint.sh
 RUN chmod 0755 /usr/local/bin/vllm-nonroot-entrypoint.sh
 # Enable CUDA forward compatibility by setting '-e VLLM_ENABLE_CUDA_COMPATIBILITY=1'
 # Only needed for datacenter/professional GPUs with older drivers.
@@ -683,7 +744,7 @@ ENV VLLM_ENABLE_CUDA_COMPATIBILITY=0
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
        sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; \
    fi && \
@@ -695,7 +756,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # https://docs.flashinfer.ai/installation.html
 # From versions.json: .flashinfer.version
 ARG FLASHINFER_VERSION=0.6.11.post2
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
        --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
@@ -727,7 +788,7 @@ ARG BITSANDBYTES_VERSION_X86=0.46.1
 ARG BITSANDBYTES_VERSION_ARM64=0.42.0
 ARG TIMM_VERSION=">=1.0.17"
 ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
        BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
    else \
@@ -752,7 +813,7 @@ ARG PYTORCH_NIGHTLY
 # Check whether to install torch nightly instead of release for this build.
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
        echo "Installing torch nightly..." \
        && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
@@ -766,7 +827,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
    fi
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
 . /etc/environment && \
 uv pip list
@@ -775,7 +836,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 # Install EP kernels wheels (DeepEP) that have been built in the `build` stage
 RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
    uv pip install --system ep_kernels/dist/*.whl --verbose \
        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
@@ -830,7 +891,7 @@ COPY requirements/test/cuda.txt requirements/test/cuda.txt
 COPY requirements/dev.txt requirements/dev.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
    if [ "$CUDA_MAJOR" -ge 12 ]; then \
        if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
@@ -850,7 +911,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    fi
 # install development dependencies (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    uv pip install --system -e tests/vllm_test_utils
 # enable fast downloads from hf (for testing)
@@ -890,7 +951,7 @@ ENV UV_HTTP_TIMEOUT=500
 # install kv_connectors if requested
 ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
    --mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \
    CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
    CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); \
@@ -958,5 +1019,32 @@ ENTRYPOINT ["./sagemaker-entrypoint.sh"]
 FROM vllm-openai-base AS vllm-openai
 # To run the image as non-root, either build the `vllm-openai-nonroot` target
 # below, or in a derived Dockerfile uncomment the following line and ensure
 # any additional layers chgrp-0 / chmod-g+rwX paths they write to. The `vllm`
 # user (UID 2000, GID 0) is already created in the `vllm-base` stage.
 # See docs/deployment/docker.md.
 # USER vllm
 ENTRYPOINT ["vllm", "serve"]
 #################### OPENAI API SERVER ####################
 #################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
 # Non-root-ready variant of `vllm-openai`. Built via:
 #   docker build --target vllm-openai-nonroot -t vllm:openai-nonroot \
 #       -f docker/Dockerfile .
 #
 # Runtime behavior:
 #   * Default USER is `vllm` (UID 2000, GID 0) created in `vllm-base`.
 #   * HOME is /home/vllm, pre-created group-0-writable so arbitrary UIDs in
 #     group 0 (OpenShift / `--user <uid>:0`) can also use the image.
 #   * Entrypoint wrapper handles the "UID not in /etc/passwd" case for truly
 #     arbitrary UIDs by falling back HOME/USER to sane writable defaults.
 #   * All cache/config envs (HF_HOME, VLLM_CACHE_ROOT, TRITON_CACHE_DIR, ...)
 #     remain unset so their library defaults resolve to $HOME/.cache/... ,
 #     which is writable.
 FROM vllm-openai AS vllm-openai-nonroot
 USER vllm
 WORKDIR /home/vllm
 ENTRYPOINT ["/usr/local/bin/vllm-nonroot-entrypoint.sh"]
 #################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
@@ -0,0 +1,266 @@
 #!/bin/sh
 # Shell-level unit test for vllm-nonroot-entrypoint.sh.
 #
 # Runs on the host (no Docker, no GPU) by stubbing `vllm` with a shim that
 # dumps its env + argv instead of actually serving. Exercises the wrapper's
 # HOME/USER fallback behavior that can't be easily tested from buildkite
 # (which would need a GPU to run `vllm serve --help`).
 #
 # Usage:
 #   bash docker/entrypoints/test_vllm_nonroot_entrypoint.sh
 # Exits non-zero on the first failed assertion.
 set -eu
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 WRAPPER="${SCRIPT_DIR}/vllm-nonroot-entrypoint.sh"
 if [ ! -x "$WRAPPER" ]; then
    echo "FAIL: wrapper not found or not executable: $WRAPPER" >&2
    exit 1
 fi
 WORKDIR="$(mktemp -d)"
 trap 'rm -rf "$WORKDIR"' EXIT
 # Stub `vllm` on PATH. It dumps env + argv + cwd to stdout so we can assert.
 mkdir -p "$WORKDIR/bin"
 cat > "$WORKDIR/bin/vllm" <<'EOF'
 #!/bin/sh
 echo "ARGV=$*"
 echo "HOME=${HOME-__unset__}"
 echo "USER=${USER-__unset__}"
 echo "LOGNAME=${LOGNAME-__unset__}"
 echo "PWD=$(pwd)"
 EOF
 chmod +x "$WORKDIR/bin/vllm"
 run_wrapper() {
    # Usage: run_wrapper <output_file> <env_kv>... -- <wrapper_arg>...
    _out="$1"; shift
    _env=""
    while [ "${1:-}" != "--" ]; do
        _env="$_env $1"; shift
    done
    shift
    env -i PATH="$WORKDIR/bin:/usr/bin:/bin" $_env "$WRAPPER" "$@" > "$_out"
 }
 fail() { echo "FAIL: $*" >&2; echo "--- stdout ---" >&2; cat "$1" >&2; exit 1; }
 expect_default_home() {
    _out="$1"
    _case="$2"
    if [ -w /home/vllm ]; then
        expected_home="/home/vllm"
        grep -q "^HOME=$expected_home\$" "$_out" \
            || fail "$_out" "$_case: HOME not set to $expected_home"
    else
        expected_home="/tmp/vllm-home.XXXXXX"
        grep -Eq '^HOME=/tmp/vllm-home\.[^/]+$' "$_out" \
            || fail "$_out" "$_case: HOME not set to $expected_home"
    fi
 }
 # -----------------------------------------------------------------------------
 # Case 1: writable HOME and USER both set -> wrapper must leave them alone.
 # -----------------------------------------------------------------------------
 case1_home="$WORKDIR/case1-home"
 mkdir -p "$case1_home"
 out="$WORKDIR/case1.out"
 run_wrapper "$out" "HOME=$case1_home" "USER=alice" "LOGNAME=alice" -- --model foo
 grep -q "^HOME=$case1_home\$" "$out" || fail "$out" "case1: HOME not preserved"
 grep -q "^USER=alice\$" "$out" || fail "$out" "case1: USER not preserved"
 grep -q "^LOGNAME=alice\$" "$out" || fail "$out" "case1: LOGNAME not preserved"
 grep -q "^ARGV=serve --model foo\$" "$out" || fail "$out" "case1: ARGV wrong"
 echo "PASS: case1 (writable HOME + USER preserved)"
 # -----------------------------------------------------------------------------
 # Case 2: HOME unset -> falls back to /home/vllm if writable, else
 # /tmp/vllm-home.XXXXXX.
 # -----------------------------------------------------------------------------
 # The wrapper checks whether the real /home/vllm exists and is writable. On
 # dev machines /home/vllm typically does NOT exist, so the
 # wrapper should fall to /tmp/vllm-home.XXXXXX.
 out="$WORKDIR/case2.out"
 run_wrapper "$out" -- --model bar
 expect_default_home "$out" "case2"
 grep -q "^USER=vllm\$" "$out" || fail "$out" "case2: USER not defaulted to vllm"
 grep -q "^LOGNAME=vllm\$" "$out" || fail "$out" "case2: LOGNAME not defaulted to vllm"
 grep -q "^ARGV=serve --model bar\$" "$out" || fail "$out" "case2: ARGV wrong"
 echo "PASS: case2 (unset HOME falls back to $expected_home, USER defaulted)"
 # -----------------------------------------------------------------------------
 # Case 3: HOME set but unwritable -> must also fall back.
 # -----------------------------------------------------------------------------
 ro_home="$WORKDIR/ro-home"
 mkdir -p "$ro_home"
 chmod 0500 "$ro_home"
 out="$WORKDIR/case3.out"
 run_wrapper "$out" "HOME=$ro_home" -- --model baz
 expect_default_home "$out" "case3"
 grep -q "^USER=vllm\$" "$out" || fail "$out" "case3: USER not defaulted"
 chmod 0700 "$ro_home"
 echo "PASS: case3 (unwritable HOME overridden)"
 # -----------------------------------------------------------------------------
 # Case 4: USER set but LOGNAME unset -> LOGNAME mirrors USER.
 # -----------------------------------------------------------------------------
 case4_home="$WORKDIR/case4-home"
 mkdir -p "$case4_home"
 out="$WORKDIR/case4.out"
 run_wrapper "$out" "HOME=$case4_home" "USER=carol" -- --model qux
 grep -q "^USER=carol\$" "$out" || fail "$out" "case4: USER not preserved"
 grep -q "^LOGNAME=carol\$" "$out" || fail "$out" "case4: LOGNAME not mirrored from USER"
 echo "PASS: case4 (LOGNAME mirrors USER when unset)"
 # -----------------------------------------------------------------------------
 # Case 5: /etc/passwd is writable AND the current UID is not in it -> wrapper
 # appends a synthetic entry. Uses the VLLM_PASSWD_FILE test hook so we don't
 # touch the real /etc/passwd.
 # -----------------------------------------------------------------------------
 fake_passwd="$WORKDIR/fake-passwd"
 : > "$fake_passwd"  # empty file, current UID definitely not present
 case5_home="$WORKDIR/case5-home"
 mkdir -p "$case5_home"
 out="$WORKDIR/case5.out"
 run_wrapper "$out" "HOME=$case5_home" "VLLM_PASSWD_FILE=$fake_passwd" -- --model foo
 current_uid="$(id -u)"
 current_gid="$(id -g)"
 expected_line="vllm:x:${current_uid}:${current_gid}:vllm:${case5_home}:/bin/bash"
 grep -Fx "$expected_line" "$fake_passwd" > /dev/null \
    || { echo "FAIL: case5: expected line not found in fake passwd:"; echo "  expected: $expected_line"; echo "  file contents:"; cat "$fake_passwd"; exit 1; }
 echo "PASS: case5 (passwd entry appended for arbitrary UID)"
 # -----------------------------------------------------------------------------
 # Case 6: /etc/passwd is writable but current UID already has an entry ->
 # wrapper must NOT duplicate the entry.
 # -----------------------------------------------------------------------------
 fake_passwd="$WORKDIR/fake-passwd-prepopulated"
 printf 'vllm:x:%s:%s:vllm:/home/vllm:/bin/bash\n' "$current_uid" "$current_gid" > "$fake_passwd"
 out="$WORKDIR/case6.out"
 run_wrapper "$out" "HOME=$case5_home" "VLLM_PASSWD_FILE=$fake_passwd" -- --model foo
 line_count="$(wc -l < "$fake_passwd")"
 # NOTE: wc may count 0 or 1 depending on trailing newline; accept 1.
 # More robust: count lines matching our UID.
 uid_lines="$(grep -c ":${current_uid}:" "$fake_passwd" || true)"
 [ "$uid_lines" = "1" ] \
    || { echo "FAIL: case6: expected exactly one entry for UID $current_uid, got $uid_lines"; cat "$fake_passwd"; exit 1; }
 echo "PASS: case6 (existing passwd entry not duplicated)"
 # -----------------------------------------------------------------------------
 # Case 7: /etc/passwd is NOT writable -> wrapper must NOT crash, just skip.
 # Skipped when running as root, because root's DAC override means [ -w ... ]
 # is always true regardless of mode bits -- the case can't be simulated.
 # In the real deployment (non-root UID inside the container) this IS the
 # relevant behavior and is what `_passwd_file is not writable` encodes.
 # -----------------------------------------------------------------------------
 if [ "$(id -u)" = "0" ]; then
    echo "SKIP: case7 (running as root; DAC override makes unwritable check meaningless)"
 else
    fake_passwd="$WORKDIR/ro-passwd"
    : > "$fake_passwd"
    chmod 0444 "$fake_passwd"
    out="$WORKDIR/case7.out"
    run_wrapper "$out" "HOME=$case5_home" "VLLM_PASSWD_FILE=$fake_passwd" -- --model foo
    # File must remain empty (no write happened) and the wrapper exec'd
    # `vllm serve` successfully (stdout contains ARGV line).
    [ ! -s "$fake_passwd" ] \
        || { echo "FAIL: case7: RO passwd file was modified"; cat "$fake_passwd"; exit 1; }
    grep -q "^ARGV=serve --model foo\$" "$out" || fail "$out" "case7: wrapper didn't exec vllm"
    chmod 0600 "$fake_passwd"
    echo "PASS: case7 (unwritable passwd file tolerated)"
 fi
 # -----------------------------------------------------------------------------
 # Case 8: caller's writable CWD is preserved — wrapper must NOT chdir to HOME
 # when cwd is usable. Protects relative-path workflows like
 # `docker run -w /models ... --model ./llama.gguf`.
 # -----------------------------------------------------------------------------
 case8_home="$WORKDIR/case8-home"
 mkdir -p "$case8_home"
 case8_cwd="$WORKDIR/case8-cwd"
 mkdir -p "$case8_cwd"
 out="$WORKDIR/case8.out"
 (cd "$case8_cwd" && run_wrapper "$out" "HOME=$case8_home" "USER=alice" "LOGNAME=alice" -- --model ./relpath)
 grep -q "^PWD=$case8_cwd\$" "$out" \
    || fail "$out" "case8: writable cwd not preserved (got $(grep '^PWD=' "$out"))"
 grep -q "^ARGV=serve --model \\./relpath\$" "$out" \
    || fail "$out" "case8: relative argv not preserved"
 echo "PASS: case8 (writable cwd preserved; relative argv still resolves from caller's cwd)"
 # -----------------------------------------------------------------------------
 # Case 9: read-only cwd is ALSO preserved. A caller who mounts a read-only
 # model directory at the container's cwd (e.g. `docker run -w /models` with
 # /models bind-mounted ro) expects relative argv like `--model ./foo.gguf`
 # to resolve against /models. An earlier version of this wrapper rewrote
 # read-only cwd to $HOME and broke that workflow; this case guards against
 # the regression returning.
 # -----------------------------------------------------------------------------
 case9_home="$WORKDIR/case9-home"
 mkdir -p "$case9_home"
 case9_ro="$WORKDIR/case9-ro"
 mkdir -p "$case9_ro"
 chmod 0555 "$case9_ro"
 out="$WORKDIR/case9.out"
 (cd "$case9_ro" && run_wrapper "$out" "HOME=$case9_home" "USER=alice" "LOGNAME=alice" -- --model ./foo)
 grep -q "^PWD=$case9_ro\$" "$out" \
    || fail "$out" "case9: read-only cwd was rewritten (got $(grep '^PWD=' "$out"))"
 grep -q "^ARGV=serve --model \\./foo\$" "$out" \
    || fail "$out" "case9: relative argv not preserved"
 chmod 0700 "$case9_ro"
 echo "PASS: case9 (read-only cwd preserved; relative argv still resolves from caller's cwd)"
 # -----------------------------------------------------------------------------
 # Case 10: truly inaccessible cwd (no search bit) DOES fall back to $HOME.
 # Skipped as root because DAC override lets root cd into 0000 directories.
 # -----------------------------------------------------------------------------
 if [ "$(id -u)" = "0" ]; then
    echo "SKIP: case10 (running as root; DAC override makes inaccessible cwd untestable)"
 else
    case10_home="$WORKDIR/case10-home"
    mkdir -p "$case10_home"
    case10_cwd="$WORKDIR/case10-cwd"
    mkdir -p "$case10_cwd"
    out="$WORKDIR/case10.out"
    # Make cwd genuinely inaccessible (mode 0000 = no search bit -> cd .
    # fails with EACCES). Use absolute paths for chmod so our own test
    # cleanup still works without needing search perm on the dir.
    (
        cd "$case10_cwd"
        chmod 0000 "$case10_cwd"
        run_wrapper "$out" "HOME=$case10_home" "USER=alice" "LOGNAME=alice" -- --model foo
    )
    chmod 0700 "$case10_cwd"
    grep -q "^PWD=$case10_home\$" "$out" \
        || fail "$out" "case10: inaccessible cwd not overridden to HOME (got $(grep '^PWD=' "$out"))"
    echo "PASS: case10 (inaccessible cwd falls back to \$HOME)"
 fi
 # -----------------------------------------------------------------------------
 # Case 11: if /tmp cannot create a private fallback dir, wrapper uses /tmp as
 # the last-resort HOME instead of leaving HOME empty under set -eu.
 # -----------------------------------------------------------------------------
 if [ -w /home/vllm ]; then
    echo "SKIP: case11 (/home/vllm is writable; mktemp fallback path is not used)"
 else
    cat > "$WORKDIR/bin/mktemp" <<'EOF'
 #!/bin/sh
 exit 1
 EOF
    chmod +x "$WORKDIR/bin/mktemp"
    out="$WORKDIR/case11.out"
    run_wrapper "$out" -- --model no-mktemp
    rm -f "$WORKDIR/bin/mktemp"
    grep -q "^HOME=/tmp\$" "$out" \
        || fail "$out" "case11: mktemp failure did not fall back to /tmp"
    grep -q "^USER=vllm\$" "$out" || fail "$out" "case11: USER not defaulted"
    grep -q "^LOGNAME=vllm\$" "$out" || fail "$out" "case11: LOGNAME not defaulted"
    grep -q "^ARGV=serve --model no-mktemp\$" "$out" || fail "$out" "case11: ARGV wrong"
    echo "PASS: case11 (mktemp failure falls back to /tmp)"
 fi
 echo ""
 echo "ALL CASES PASSED."
@@ -0,0 +1,87 @@
 #!/bin/sh
 # Entrypoint wrapper for the opt-in `vllm-openai-nonroot` image.
 #
 # The image also ships a `vllm` user (UID 2000, GID 0) with HOME /home/vllm
 # and a group-0-writable home directory. When the container is launched with
 # `--user 2000:0` (or any other UID in group 0) the passwd entry is enough on
 # its own: Docker picks up HOME=/home/vllm, getpass.getuser() resolves to
 # "vllm", and every cache dir (HF, Triton, Inductor, vLLM, Numba, Outlines)
 # that defaults to `$HOME/.cache/...` lands in a writable location.
 #
 # This wrapper exists for the *arbitrary-UID* case (e.g. OpenShift's
 # `runAsUser: 1000540000` Restricted Pod Security Standard) where the caller
 # UID is not in /etc/passwd at all. In that case:
 #   * $HOME may be unset or resolve to "/" (unwritable).
 #   * getpass.getuser() falls back to pwd.getpwuid() -> KeyError.
 #
 # The wrapper re-points $HOME to /home/vllm when writable, /tmp/vllm-home.XXXXXX
 # otherwise, and defaults $USER to "vllm" so the pwd-lookup path is never
 # taken. Everything else is forwarded to `vllm serve`.
 #
 # Non-empty caller-set env vars (HOME, USER, LOGNAME) are preserved, so
 # existing K8s manifests and `docker run -e ...` keep working unchanged.
 # Unset or empty values fall through to the wrapper's defaults, matching
 # what shell code typically expects from "unset".
 set -eu
 if [ -z "${HOME:-}" ] || [ ! -w "${HOME}" ]; then
    if [ -w /home/vllm ]; then
        export HOME=/home/vllm
    else
        if _h="$(mktemp -d /tmp/vllm-home.XXXXXX 2>/dev/null)"; then
            export HOME="$_h"
            chmod 0700 "$HOME" 2>/dev/null || true
        else
            export HOME=/tmp
        fi
        unset _h
    fi
 fi
 # Preserve the caller's cwd whenever it's still usable. A read-only mount
 # (e.g. `docker run -w /models ... --model ./llama.gguf` where /models is
 # the user's model share) is a legitimate, usable cwd — vllm only needs to
 # *read* relative paths from there. We only fall back to $HOME when the
 # cwd itself is truly inaccessible (no search bit, deleted inode, mount
 # gone, etc.), which is when `cd .` actually fails.
 #
 # This is the accessibility check, not a writability check; the latter
 # would silently rewrite cwd for any read-only workflow and break relative
 # argv like `--model ./llama.gguf`, `--chat-template ./t.jinja`, relative
 # TLS cert paths, etc.
 if ! cd . 2>/dev/null; then
    cd "$HOME"
 fi
 # getpass.getuser() prefers $USER/$LOGNAME/etc. before hitting getpwuid();
 # setting it here makes the "UID not in passwd" path a no-op for everything
 # in the process tree.
 if [ -z "${USER:-}" ]; then
    export USER=vllm
 fi
 if [ -z "${LOGNAME:-}" ]; then
    export LOGNAME="$USER"
 fi
 # Shell-level tooling (`whoami`, bash's `\u` prompt, `id -un`, `sudo`) does
 # NOT consult $USER; it calls getpwuid(geteuid()) directly. For arbitrary
 # runtime UIDs in OpenShift-style deploys this returns "I have no name!".
 # If /etc/passwd is group-0 writable (set at build time) and doesn't yet
 # have an entry for this UID, append a synthetic one so every downstream
 # consumer sees a consistent "vllm" identity.
 #
 # We parse the passwd file directly instead of calling `getent` because
 # the container's NSS is typically just files anyway, and this lets us
 # unit-test via the VLLM_PASSWD_FILE hook (undocumented; production uses
 # /etc/passwd).
 _passwd_file="${VLLM_PASSWD_FILE:-/etc/passwd}"
 _uid="$(id -u)"
 if [ -w "$_passwd_file" ] \
    && ! awk -F: -v u="$_uid" '$3==u {found=1; exit} END {exit !found}' "$_passwd_file" 2>/dev/null; then
    printf 'vllm:x:%s:%s:vllm:%s:/bin/bash\n' \
        "$_uid" "$(id -g)" "$HOME" >> "$_passwd_file"
 fi
 unset _uid _passwd_file
 exec vllm serve "$@"
@@ -8,6 +8,64 @@ toc_depth: 2
 --8<-- "docs/getting_started/installation/gpu.md:pre-built-images"
 ## Run as a non-root user
 The CUDA `vllm/vllm-openai` image runs as root by default for backward
 compatibility. It is also prepared to run as the built-in `vllm` user
 (UID 2000, GID 0):
 ```bash
 docker run --rm --gpus all \
    --user 2000:0 \
    -p 8000:8000 \
    vllm/vllm-openai:latest \
    meta-llama/Llama-3.1-8B-Instruct
 ```
 When mounting model or cache volumes for a non-root container, mount writable
 paths under `/home/vllm` instead of `/root`. For example, mount the Hugging
 Face cache at `/home/vllm/.cache/huggingface` and make the mounted directory
 writable by group 0.
 ```bash
 docker run --rm --gpus all \
    --user 2000:0 \
    -v ~/.cache/huggingface:/home/vllm/.cache/huggingface \
    -p 8000:8000 \
    vllm/vllm-openai:latest \
    meta-llama/Llama-3.1-8B-Instruct
 ```
 To build an image that defaults to the non-root `vllm` user, use the opt-in
 `vllm-openai-nonroot` target:
 ```bash
 docker build --target vllm-openai-nonroot \
    -t vllm-openai-nonroot:local \
    -f docker/Dockerfile .
 docker run --rm --gpus all \
    -p 8000:8000 \
    vllm-openai-nonroot:local \
    meta-llama/Llama-3.1-8B-Instruct
 ```
 The `vllm-openai-nonroot` target also supports OpenShift-style arbitrary UIDs
 when the runtime UID is a member of group 0. In Kubernetes manifests, set the
 container security context accordingly and keep mounted cache/model paths
 writable by group 0:
 ```yaml
 securityContext:
  runAsNonRoot: true
  runAsUser: 1000540000
  runAsGroup: 0
  fsGroup: 0
 ```
 Runtime UIDs outside group 0 are not part of the documented support matrix
 because they may be unable to write to `/home/vllm` or `/opt/uv/cache`.
 ## Build image from source
 --8<-- "docs/getting_started/installation/gpu.md:build-image-from-source"