[CPU][IBM Z][Dockefile][Docs] Fix s390x builds for torch 2.11 and update docs for s390x (#39910)

Signed-off-by: Rehan Khan <Rehan.Khan7@ibm.com>
This commit is contained in:
R3hankhan
2026-04-16 10:56:21 +05:30
committed by GitHub
parent 445b7093fd
commit 4b7ca37bd4
5 changed files with 72 additions and 47 deletions
+3
View File
@@ -147,6 +147,9 @@ struct AttentionMetadata {
case ISA::NEON:
ss << "NEON, ";
break;
case ISA::VXE:
ss << "VXE, ";
break;
}
ss << "workitem_group_num: " << workitem_group_num
<< ", reduction_item_num: " << reduction_item_num
+22
View File
@@ -54,12 +54,34 @@ struct Counter {
};
inline int64_t get_available_l2_size() {
#if defined(__s390x__)
static int64_t size = []() {
uint32_t l2_cache_size = 0;
auto caps = at::cpu::get_cpu_capabilities();
auto it = caps.find("l2_cache_size");
if (it != caps.end()) {
l2_cache_size = static_cast<uint32_t>(it->second.toInt());
}
if (l2_cache_size == 0) {
long sys_l2 = sysconf(_SC_LEVEL2_CACHE_SIZE);
if (sys_l2 > 0) {
l2_cache_size = static_cast<uint32_t>(sys_l2);
}
}
if (l2_cache_size == 0) {
l2_cache_size = 256 * 1024;
}
return static_cast<int64_t>(l2_cache_size) >> 1; // use 50% of L2 cache
}();
return size;
#else
static int64_t size = []() {
auto caps = at::cpu::get_cpu_capabilities();
const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt();
return l2_cache_size >> 1; // use 50% of L2 cache
}();
return size;
#endif
}
template <int32_t alignment_v, typename T>
+34 -34
View File
@@ -42,7 +42,7 @@ FROM python-install AS pyarrow
# Build Apache Arrow
WORKDIR /tmp
RUN --mount=type=cache,target=/root/.cache/uv \
git clone https://github.com/apache/arrow.git && \
git clone https://github.com/apache/arrow.git -b maint-19.0.1 && \
cd arrow/cpp && \
mkdir release && cd release && \
cmake -DCMAKE_BUILD_TYPE=Release \
@@ -68,19 +68,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -r requirements-build.txt && \
python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel
FROM python-install AS numa-build
# Install numactl (needed for numa.h dependency)
WORKDIR /tmp
RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \
tar -xvzf v2.0.16.tar.gz && \
cd numactl-2.0.16 && \
./autogen.sh && \
./configure && \
make
# Set include path
ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
FROM python-install AS rust
ENV CARGO_HOME=/root/.cargo
ENV RUSTUP_HOME=/root/.rustup
@@ -91,6 +78,18 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \
rustup default stable && \
rustup show
FROM python-install AS numa-build
WORKDIR /tmp
RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.19.tar.gz && \
tar -xvzf v2.0.19.tar.gz && \
cd numactl-2.0.19 && \
./autogen.sh && \
./configure && \
make
# Set include path
ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
FROM python-install AS torch-vision
# Install torchvision
ARG TORCH_VISION_VERSION=v0.26.0
@@ -133,7 +132,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \
git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \
cd llvm-project && mkdir build && cd build && \
uv pip install 'cmake<4' setuptools numpy && \
uv pip install 'cmake<4' 'setuptools<70' numpy && \
export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \
CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \
CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \
@@ -193,27 +192,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \
cd opencv-python && \
python -m build --wheel --installer=uv --outdir /tmp/opencv-python/dist
# Build Outlines Core
FROM python-install AS outlines-core-builder
## Todo(r3hankhan123): Remove guidance-builder stage once vLLM upgrades to new version of llguidance that fixes s390x issues. See https://github.com/guidance-ai/llguidance/issues/330
FROM python-install AS guidance-builder
WORKDIR /tmp
ENV CARGO_HOME=/root/.cargo
ENV RUSTUP_HOME=/root/.rustup
ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
COPY requirements/common.txt /tmp/requirements/common.txt
ARG OUTLINES_CORE_VERSION
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \
--mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
OUTLINES_CORE_VERSION=${OUTLINES_CORE_VERSION:-$(grep -E '^outlines_core\s*==\s*[0-9.]+' /tmp/requirements/common.txt | grep -Eo '[0-9.]+')} && \
if [ -z "${OUTLINES_CORE_VERSION}" ]; then echo "ERROR: Could not determine outlines_core version"; exit 1; fi && \
git clone https://github.com/dottxt-ai/outlines-core.git && \
cd outlines-core && \
git checkout tags/${OUTLINES_CORE_VERSION} && \
sed -i "s/version = \"0.0.0\"/version = \"${OUTLINES_CORE_VERSION}\"/" Cargo.toml && \
git clone https://github.com/guidance-ai/llguidance.git && \
cd llguidance && \
git checkout s390x-fix-v2 && \
uv pip install maturin && \
python -m maturin build --release --out dist
python -m maturin build --release --out dist --compatibility linux
# Final build stage
# # Final build stage
FROM python-install AS vllm-cpu
ARG PYTHON_VERSION
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
@@ -229,10 +223,12 @@ ENV PKG_CONFIG_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:/usr/local/
ENV PATH="${VIRTUAL_ENV:+${VIRTUAL_ENV}/bin}:/opt/rh/gcc-toolset-14/root/usr/bin:/usr/local/bin:$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
# Force pure Python protobuf to avoid s390x C++ extension crashes
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
COPY . /workspace/vllm
WORKDIR /workspace/vllm
RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \
RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.19,target=/numactl \
make -C /numactl install
# Install dependencies, including PyTorch and Apache Arrow
@@ -245,22 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \
--mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \
--mount=type=bind,from=opencv-builder,source=/tmp/opencv-python/dist,target=/tmp/opencv-wheels/ \
--mount=type=bind,from=outlines-core-builder,source=/tmp/outlines-core/dist,target=/tmp/outlines-core/dist/ \
ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl) && \
--mount=type=bind,from=guidance-builder,source=/tmp/llguidance/dist,target=/tmp/guidance-wheels/ \
ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/*.whl) && \
VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \
HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \
LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \
NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \
OPENCV_WHL_FILE=$(ls /tmp/opencv-wheels/*.whl) && \
OUTLINES_CORE_WHL_FILE=$(ls /tmp/outlines-core/dist/*.whl) && \
uv pip install -v \
$ARROW_WHL_FILE \
GUIDANCE_WHL_FILE=$(ls /tmp/guidance-wheels/*.whl) && \
uv pip install -v \
$ARROW_WHL_FILE \
$VISION_WHL_FILE \
$HF_XET_WHL_FILE \
$LLVM_WHL_FILE \
$NUMBA_WHL_FILE \
$OPENCV_WHL_FILE \
$OUTLINES_CORE_WHL_FILE \
$GUIDANCE_WHL_FILE \
--index-strategy unsafe-best-match \
-r requirements/build/cpu.txt \
-r requirements/cpu.txt
@@ -271,6 +267,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \
uv pip install "$(echo dist/*.whl)[tensorizer]"
# Remove protobuf C++ extension that crashes on s390x
RUN rm -rf /opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/_upb/*.so \
/opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/protobuf/pyext/*.so 2>/dev/null || true
# setup non-root user for vllm
RUN umask 002 && \
/usr/sbin/useradd --uid 2000 --gid 0 vllm && \
@@ -3,15 +3,15 @@
vLLM has experimental support for s390x architecture on IBM Z platform. For now, users must build from source to natively run on IBM Z platform.
Currently, the CPU implementation for s390x architecture supports FP32 datatype only.
Currently, the CPU implementation for s390x architecture supports FP32, BF16 and FP16.
--8<-- [end:installation]
--8<-- [start:requirements]
- OS: `Linux`
- SDK: `gcc/g++ >= 12.3.0` or later with Command Line Tools
- SDK: `gcc/g++ >= 14.0.0` or later with Command Line Tools
- Instruction Set Architecture (ISA): VXE support is required. Works with Z14 and above.
- Build install python packages: `pyarrow`, `torch` and `torchvision`
- Build install python packages: `torchvision`, `llvmlite`, `numba`, `pyarrow (for testing)`, `opencv-headless`
--8<-- [end:requirements]
--8<-- [start:set-up-using-python]
@@ -24,13 +24,14 @@ Currently, there are no pre-built IBM Z CPU wheels.
--8<-- [end:pre-built-wheels]
--8<-- [start:build-wheel-from-source]
Install the following packages from the package manager before building the vLLM. For example on RHEL 9.4:
Install the following packages from the package manager before building the vLLM. For example on RHEL 9.6:
```bash
dnf install -y \
which procps findutils tar vim git gcc g++ make patch make cython zlib-devel \
which procps findutils tar vim git gcc-toolset-14 gcc-toolset-14-binutils gcc-toolset-14-libatomic-devel zlib-devel \
libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
openssl-devel openblas openblas-devel wget autoconf automake libtool cmake numactl-devel
openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy libsndfile \
clang llvm-devel llvm-static clang-devel
```
Install rust>=1.80 which is needed for `outlines-core` and `uvloop` python packages installation.
@@ -43,13 +44,13 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \
Execute the following commands to build and install vLLM from source.
!!! tip
Please build the following dependencies, `torchvision`, `pyarrow` from source before building vLLM.
Please build the following dependencies, `torchvision`, `llvmlite`, `numba`, `llguidance`, `pyarrow`, `opencv-headless` from source before building vLLM.
```bash
sed -i '/^torch/d' requirements/build/cuda.txt # remove torch from requirements/build/cuda.txt since we use nightly builds
uv pip install -v \
--extra-index-url https://download.pytorch.org/whl/cpu \
--torch-backend auto \
-r requirements/build/cuda.txt \
-r requirements/build/cpu.txt \
-r requirements/cpu.txt \
VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
uv pip install dist/*.whl
@@ -57,10 +58,9 @@ Execute the following commands to build and install vLLM from source.
??? console "pip"
```bash
sed -i '/^torch/d' requirements/build/cuda.txt # remove torch from requirements/build/cuda.txt since we use nightly builds
pip install -v \
--extra-index-url https://download.pytorch.org/whl/nightly/cpu \
-r requirements/build/cuda.txt \
--extra-index-url https://download.pytorch.org/whl/cpu \
-r requirements/build/cpu.txt \
-r requirements/cpu.txt \
VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
pip install dist/*.whl
+1 -1
View File
@@ -19,7 +19,7 @@ pillow # Required for image processing
prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer == 0.11.3
llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le"
llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "ppc64le"
outlines_core == 0.2.11
# required for outlines backend disk cache
diskcache == 5.6.3