[CPU][IBM Z][Dockefile][Docs] Fix s390x builds for torch 2.11 and update docs for s390x (#39910)

Signed-off-by: Rehan Khan <Rehan.Khan7@ibm.com>
2026-06-06 00:16:14 +00:00 · 2026-04-16 10:56:21 +05:30
parent 445b7093fd
commit 4b7ca37bd4
5 changed files with 72 additions and 47 deletions
@@ -147,6 +147,9 @@ struct AttentionMetadata {
      case ISA::NEON:
        ss << "NEON, ";
        break;
+      case ISA::VXE:
+        ss << "VXE, ";
+        break;
    }
    ss << "workitem_group_num: " << workitem_group_num
       << ", reduction_item_num: " << reduction_item_num
@@ -54,12 +54,34 @@ struct Counter {
 };

 inline int64_t get_available_l2_size() {
+#if defined(__s390x__)
+  static int64_t size = []() {
+    uint32_t l2_cache_size = 0;
+    auto caps = at::cpu::get_cpu_capabilities();
+    auto it = caps.find("l2_cache_size");
+    if (it != caps.end()) {
+      l2_cache_size = static_cast<uint32_t>(it->second.toInt());
+    }
+    if (l2_cache_size == 0) {
+      long sys_l2 = sysconf(_SC_LEVEL2_CACHE_SIZE);
+      if (sys_l2 > 0) {
+        l2_cache_size = static_cast<uint32_t>(sys_l2);
+      }
+    }
+    if (l2_cache_size == 0) {
+      l2_cache_size = 256 * 1024;
+    }
+    return static_cast<int64_t>(l2_cache_size) >> 1;  // use 50% of L2 cache
+  }();
+  return size;
+#else
  static int64_t size = []() {
    auto caps = at::cpu::get_cpu_capabilities();
    const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt();
    return l2_cache_size >> 1;  // use 50% of L2 cache
  }();
  return size;
+#endif
 }

 template <int32_t alignment_v, typename T>
@@ -42,7 +42,7 @@ FROM python-install AS pyarrow
 # Build Apache Arrow
 WORKDIR /tmp
 RUN --mount=type=cache,target=/root/.cache/uv \
-    git clone https://github.com/apache/arrow.git && \
+    git clone https://github.com/apache/arrow.git  -b maint-19.0.1 && \
    cd arrow/cpp && \
    mkdir release && cd release && \
    cmake -DCMAKE_BUILD_TYPE=Release \
@@ -68,19 +68,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements-build.txt && \
    python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel

-FROM python-install AS numa-build
-# Install numactl (needed for numa.h dependency)
-WORKDIR /tmp
-RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \
-    tar -xvzf v2.0.16.tar.gz && \
-    cd numactl-2.0.16 && \
-    ./autogen.sh && \
-    ./configure && \
-    make
-
-# Set include path
-ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
-
 FROM python-install AS rust
 ENV CARGO_HOME=/root/.cargo
 ENV RUSTUP_HOME=/root/.rustup
@@ -91,6 +78,18 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \
    rustup default stable && \
    rustup show

+FROM python-install AS numa-build
+WORKDIR /tmp
+RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.19.tar.gz && \
+    tar -xvzf v2.0.19.tar.gz && \
+    cd numactl-2.0.19 && \
+    ./autogen.sh && \
+    ./configure && \
+    make
+
+# Set include path
+ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
+
 FROM python-install AS torch-vision
 # Install torchvision
 ARG TORCH_VISION_VERSION=v0.26.0
@@ -133,7 +132,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \
    git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \
    cd llvm-project && mkdir build && cd  build && \
-    uv pip install 'cmake<4' setuptools numpy && \
+    uv pip install 'cmake<4' 'setuptools<70' numpy && \
    export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \
    CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \
    CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \
@@ -193,27 +192,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    cd opencv-python && \
    python -m build --wheel --installer=uv --outdir /tmp/opencv-python/dist

-# Build Outlines Core
-FROM python-install AS outlines-core-builder
+## Todo(r3hankhan123): Remove guidance-builder stage once vLLM upgrades to new version of llguidance that fixes s390x issues. See https://github.com/guidance-ai/llguidance/issues/330
+FROM python-install AS guidance-builder
 WORKDIR /tmp
 ENV CARGO_HOME=/root/.cargo
 ENV RUSTUP_HOME=/root/.rustup
 ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
-COPY requirements/common.txt /tmp/requirements/common.txt
-ARG OUTLINES_CORE_VERSION
 RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \
    --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
-    OUTLINES_CORE_VERSION=${OUTLINES_CORE_VERSION:-$(grep -E '^outlines_core\s*==\s*[0-9.]+' /tmp/requirements/common.txt | grep -Eo '[0-9.]+')} && \
-    if [ -z "${OUTLINES_CORE_VERSION}" ]; then echo "ERROR: Could not determine outlines_core version"; exit 1; fi && \
-    git clone https://github.com/dottxt-ai/outlines-core.git && \
-    cd outlines-core && \
-    git checkout tags/${OUTLINES_CORE_VERSION} && \
-    sed -i "s/version = \"0.0.0\"/version = \"${OUTLINES_CORE_VERSION}\"/" Cargo.toml && \
+    git clone https://github.com/guidance-ai/llguidance.git && \
+    cd llguidance && \
+    git checkout s390x-fix-v2  && \
    uv pip install maturin && \
-    python -m maturin build --release --out dist
+    python -m maturin build --release --out dist --compatibility linux

-# Final build stage
+# # Final build stage
 FROM python-install AS vllm-cpu
 ARG PYTHON_VERSION
 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
@@ -229,10 +223,12 @@ ENV PKG_CONFIG_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:/usr/local/
 ENV PATH="${VIRTUAL_ENV:+${VIRTUAL_ENV}/bin}:/opt/rh/gcc-toolset-14/root/usr/bin:/usr/local/bin:$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
 ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
+# Force pure Python protobuf to avoid s390x C++ extension crashes
+ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 COPY . /workspace/vllm
 WORKDIR /workspace/vllm

-RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \
+RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.19,target=/numactl \
    make -C /numactl install

 # Install dependencies, including PyTorch and Apache Arrow
@@ -245,22 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \
    --mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \
    --mount=type=bind,from=opencv-builder,source=/tmp/opencv-python/dist,target=/tmp/opencv-wheels/ \
-    --mount=type=bind,from=outlines-core-builder,source=/tmp/outlines-core/dist,target=/tmp/outlines-core/dist/ \
-     ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl) && \
+    --mount=type=bind,from=guidance-builder,source=/tmp/llguidance/dist,target=/tmp/guidance-wheels/ \
+     ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/*.whl) && \
     VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \
     HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \
     LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \
     NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \
     OPENCV_WHL_FILE=$(ls /tmp/opencv-wheels/*.whl) && \
-     OUTLINES_CORE_WHL_FILE=$(ls /tmp/outlines-core/dist/*.whl) && \
-     uv pip install -v \
-        $ARROW_WHL_FILE  \
+     GUIDANCE_WHL_FILE=$(ls /tmp/guidance-wheels/*.whl) && \
+     uv pip install -v \    
+        $ARROW_WHL_FILE \
        $VISION_WHL_FILE \
        $HF_XET_WHL_FILE \
        $LLVM_WHL_FILE \
        $NUMBA_WHL_FILE \
        $OPENCV_WHL_FILE \
-        $OUTLINES_CORE_WHL_FILE \
+        $GUIDANCE_WHL_FILE \
        --index-strategy unsafe-best-match \
        -r requirements/build/cpu.txt \
        -r requirements/cpu.txt
@@ -271,6 +267,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \
    uv pip install "$(echo dist/*.whl)[tensorizer]"

+# Remove protobuf C++ extension that crashes on s390x
+RUN rm -rf /opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/_upb/*.so \
+           /opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/protobuf/pyext/*.so 2>/dev/null || true
+
 # setup non-root user for vllm
 RUN umask 002 && \
    /usr/sbin/useradd --uid 2000 --gid 0 vllm && \
@@ -3,15 +3,15 @@

 vLLM has experimental support for s390x architecture on IBM Z platform. For now, users must build from source to natively run on IBM Z platform.

-Currently, the CPU implementation for s390x architecture supports FP32 datatype only.
+Currently, the CPU implementation for s390x architecture supports FP32, BF16 and FP16.

 --8<-- [end:installation]
 --8<-- [start:requirements]

 - OS: `Linux`
- SDK: `gcc/g++ >= 12.3.0` or later with Command Line Tools
+- SDK: `gcc/g++ >= 14.0.0` or later with Command Line Tools
 - Instruction Set Architecture (ISA): VXE support is required. Works with Z14 and above.
- Build install python packages: `pyarrow`, `torch` and `torchvision`
+- Build install python packages: `torchvision`, `llvmlite`, `numba`, `pyarrow (for testing)`, `opencv-headless`

 --8<-- [end:requirements]
 --8<-- [start:set-up-using-python]
@@ -24,13 +24,14 @@ Currently, there are no pre-built IBM Z CPU wheels.
 --8<-- [end:pre-built-wheels]
 --8<-- [start:build-wheel-from-source]

-Install the following packages from the package manager before building the vLLM. For example on RHEL 9.4:
+Install the following packages from the package manager before building the vLLM. For example on RHEL 9.6:

 ```bash
 dnf install -y \
-    which procps findutils tar vim git gcc g++ make patch make cython zlib-devel \
+    which procps findutils tar vim git gcc-toolset-14 gcc-toolset-14-binutils gcc-toolset-14-libatomic-devel zlib-devel \
    libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
-    openssl-devel openblas openblas-devel wget autoconf automake libtool cmake numactl-devel
+    openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy libsndfile \
+    clang llvm-devel llvm-static clang-devel
 ```

 Install rust>=1.80 which is needed for `outlines-core` and `uvloop` python packages installation.
@@ -43,13 +44,13 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \
 Execute the following commands to build and install vLLM from source.

 !!! tip
-    Please build the following dependencies, `torchvision`, `pyarrow` from source before building vLLM.
+    Please build the following dependencies, `torchvision`, `llvmlite`, `numba`, `llguidance`, `pyarrow`, `opencv-headless` from source before building vLLM.

 ```bash
-    sed -i '/^torch/d' requirements/build/cuda.txt    # remove torch from requirements/build/cuda.txt since we use nightly builds
    uv pip install -v \
+        --extra-index-url https://download.pytorch.org/whl/cpu \
        --torch-backend auto \
-        -r requirements/build/cuda.txt \
+        -r requirements/build/cpu.txt \
        -r requirements/cpu.txt \
    VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
        uv pip install dist/*.whl
@@ -57,10 +58,9 @@ Execute the following commands to build and install vLLM from source.

 ??? console "pip"
    ```bash
-        sed -i '/^torch/d' requirements/build/cuda.txt    # remove torch from requirements/build/cuda.txt since we use nightly builds
        pip install -v \
-            --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
-            -r requirements/build/cuda.txt \
+            --extra-index-url https://download.pytorch.org/whl/cpu \
+            -r requirements/build/cpu.txt \
            -r requirements/cpu.txt \
        VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
            pip install dist/*.whl
@@ -19,7 +19,7 @@ pillow  # Required for image processing
 prometheus-fastapi-instrumentator >= 7.0.0
 tiktoken >= 0.6.0  # Required for DBRX tokenizer
 lm-format-enforcer == 0.11.3
-llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le"
+llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "ppc64le"
 outlines_core == 0.2.11
 # required for outlines backend disk cache
 diskcache == 5.6.3