mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Build] Build bundled DeepGEMM _C per-Python so the wheel imports on every CPython (#41516)
Signed-off-by: mgoin <mgoin64@gmail.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -105,7 +105,11 @@ steps:
|
||||
device: h100
|
||||
num_devices: 1
|
||||
source_file_dependencies:
|
||||
- cmake/external_projects/deepgemm.cmake
|
||||
- tools/install_deepgemm.sh
|
||||
- tools/build_deepgemm_C.py
|
||||
- tools/setup_deepgemm_pythons.sh
|
||||
- tools/check_wheel_deepgemm.py
|
||||
- vllm/utils/deep_gemm.py
|
||||
- vllm/model_executor/layers/fused_moe
|
||||
- vllm/model_executor/layers/quantization
|
||||
@@ -115,6 +119,7 @@ steps:
|
||||
- tests/kernels/attention/test_deepgemm_attention.py
|
||||
- tests/quantization/test_cutlass_w4a16.py
|
||||
commands:
|
||||
- python3 ../tools/check_wheel_deepgemm.py
|
||||
- pytest -v -s kernels/quantization/test_block_fp8.py
|
||||
- pytest -v -s kernels/moe/test_deepgemm.py
|
||||
- pytest -v -s kernels/moe/test_batched_deepgemm.py
|
||||
|
||||
@@ -53,48 +53,67 @@ cuda_archs_loose_intersection(DEEPGEMM_ARCHS
|
||||
if(DEEPGEMM_ARCHS)
|
||||
message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
# Build _C once per interpreter in DEEPGEMM_PYTHON_INTERPRETERS (":"-
|
||||
# separated paths) so the wheel imports cleanly on every supported Python.
|
||||
# Unset → fall back to the build interpreter (editable / source builds).
|
||||
# The compile is delegated to tools/build_deepgemm_C.py and always runs
|
||||
# against the build interpreter's torch — target Pythons don't need torch.
|
||||
# Note: empty-but-set env vars are still DEFINED in cmake; treat empty as
|
||||
# unset so an empty interpreter list falls back to the build interpreter
|
||||
# rather than silently skipping the per-Python build.
|
||||
if(NOT "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}" STREQUAL "")
|
||||
string(REPLACE ":" ";" _dg_pythons "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}")
|
||||
else()
|
||||
set(_dg_pythons "${Python_EXECUTABLE}")
|
||||
endif()
|
||||
message(STATUS "DeepGEMM _C will be built for: ${_dg_pythons}")
|
||||
|
||||
#
|
||||
# Build the _C pybind11 extension from DeepGEMM's C++ source.
|
||||
# This is a CXX-only module — CUDA kernels are JIT-compiled at runtime.
|
||||
#
|
||||
Python_add_library(_deep_gemm_C MODULE WITH_SOABI
|
||||
"${deepgemm_SOURCE_DIR}/csrc/python_api.cpp")
|
||||
# Header set fed to add_custom_command's DEPENDS so a header-only edit
|
||||
# (in upstream DeepGEMM or its vendored cutlass/fmt) re-triggers the
|
||||
# rebuild. add_custom_command does no implicit header scanning, unlike
|
||||
# add_library.
|
||||
file(GLOB_RECURSE _dg_headers
|
||||
"${deepgemm_SOURCE_DIR}/csrc/*.h"
|
||||
"${deepgemm_SOURCE_DIR}/csrc/*.hpp"
|
||||
"${deepgemm_SOURCE_DIR}/deep_gemm/include/*.h"
|
||||
"${deepgemm_SOURCE_DIR}/deep_gemm/include/*.hpp"
|
||||
"${deepgemm_SOURCE_DIR}/deep_gemm/include/*.cuh")
|
||||
|
||||
# The pybind11 module name must be _C to match DeepGEMM's Python imports.
|
||||
set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C")
|
||||
|
||||
target_compile_definitions(_deep_gemm_C PRIVATE
|
||||
"-DTORCH_EXTENSION_NAME=_C")
|
||||
|
||||
target_include_directories(_deep_gemm_C PRIVATE
|
||||
"${deepgemm_SOURCE_DIR}/csrc"
|
||||
"${deepgemm_SOURCE_DIR}/deep_gemm/include"
|
||||
"${deepgemm_SOURCE_DIR}/third-party/cutlass/include"
|
||||
"${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include"
|
||||
"${deepgemm_SOURCE_DIR}/third-party/fmt/include")
|
||||
|
||||
target_compile_options(_deep_gemm_C PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-O3>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wno-psabi>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)
|
||||
|
||||
# torch_python is required because DeepGEMM uses pybind11 type casters
|
||||
# for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which
|
||||
# use torch::Library custom ops.
|
||||
find_library(TORCH_PYTHON_LIBRARY torch_python
|
||||
PATHS "${TORCH_INSTALL_PREFIX}/lib"
|
||||
REQUIRED)
|
||||
|
||||
target_link_libraries(_deep_gemm_C PRIVATE
|
||||
torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}"
|
||||
CUDA::cudart CUDA::nvrtc)
|
||||
|
||||
# Install the shared library into the vendored package directory
|
||||
install(TARGETS _deep_gemm_C
|
||||
LIBRARY DESTINATION vllm/third_party/deep_gemm
|
||||
COMPONENT _deep_gemm_C)
|
||||
set(_dg_markers)
|
||||
set(_dg_seen_soabis)
|
||||
foreach(_pybin IN LISTS _dg_pythons)
|
||||
execute_process(
|
||||
COMMAND "${_pybin}" -c
|
||||
"import sysconfig; print(sysconfig.get_config_var('SOABI'))"
|
||||
OUTPUT_VARIABLE _dg_soabi
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
COMMAND_ERROR_IS_FATAL ANY)
|
||||
# Dedup so duplicate paths (or two paths resolving to the same CPython)
|
||||
# don't register conflicting build rules.
|
||||
if(_dg_soabi IN_LIST _dg_seen_soabis)
|
||||
continue()
|
||||
endif()
|
||||
list(APPEND _dg_seen_soabis "${_dg_soabi}")
|
||||
set(_dg_dir "${CMAKE_CURRENT_BINARY_DIR}/deepgemm_C_${_dg_soabi}")
|
||||
set(_dg_marker "${_dg_dir}/.built")
|
||||
add_custom_command(
|
||||
OUTPUT "${_dg_marker}"
|
||||
COMMAND "${Python_EXECUTABLE}"
|
||||
"${CMAKE_SOURCE_DIR}/tools/build_deepgemm_C.py"
|
||||
"${deepgemm_SOURCE_DIR}" "${_dg_dir}" "${_pybin}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E touch "${_dg_marker}"
|
||||
DEPENDS "${CMAKE_SOURCE_DIR}/tools/build_deepgemm_C.py"
|
||||
"${deepgemm_SOURCE_DIR}/csrc/python_api.cpp"
|
||||
${_dg_headers}
|
||||
COMMENT "Building DeepGEMM _C for ${_pybin}"
|
||||
VERBATIM)
|
||||
list(APPEND _dg_markers "${_dg_marker}")
|
||||
install(DIRECTORY "${_dg_dir}/"
|
||||
DESTINATION vllm/third_party/deep_gemm
|
||||
COMPONENT _deep_gemm_C
|
||||
FILES_MATCHING PATTERN "_C.cpython-*.so")
|
||||
endforeach()
|
||||
add_custom_target(_deep_gemm_C ALL DEPENDS ${_dg_markers})
|
||||
|
||||
#
|
||||
# Vendor DeepGEMM Python package files
|
||||
|
||||
@@ -301,6 +301,15 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
python3 use_existing_torch.py --prefix; \
|
||||
fi
|
||||
|
||||
# Provision a bare interpreter for each CPython covered by `requires-python`
|
||||
# so DeepGEMM `_C` is built once per Python and bundled side-by-side in the
|
||||
# wheel; cmake reads DEEPGEMM_PYTHON_INTERPRETERS in deepgemm.cmake's
|
||||
# foreach loop. The matrix is derived from pyproject.toml.
|
||||
COPY tools/setup_deepgemm_pythons.sh tools/build_deepgemm_C.py tools/
|
||||
ENV DEEPGEMM_VENV_PREFIX=/opt/dgenv
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
tools/setup_deepgemm_pythons.sh > /tmp/dg_pythons.txt
|
||||
|
||||
# Build the vLLM wheel
|
||||
# if USE_SCCACHE is set, use sccache to speed up compilation
|
||||
# AWS credentials mounted at ~/.aws/credentials for sccache S3 auth (optional)
|
||||
@@ -328,6 +337,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
&& export VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_MERGE_BASE_COMMIT}" \
|
||||
&& export VLLM_MAIN_CUDA_VERSION="${VLLM_MAIN_CUDA_VERSION}" \
|
||||
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
|
||||
&& export DEEPGEMM_PYTHON_INTERPRETERS=$(cat /tmp/dg_pythons.txt) \
|
||||
&& sccache --show-stats \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
||||
&& sccache --show-stats; \
|
||||
@@ -345,6 +355,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
|
||||
export VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_MERGE_BASE_COMMIT}" && \
|
||||
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
||||
export DEEPGEMM_PYTHON_INTERPRETERS=$(cat /tmp/dg_pythons.txt) && \
|
||||
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
|
||||
fi
|
||||
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Build DeepGEMM's `_C` pybind11 extension for a target Python.
|
||||
|
||||
Driven from `cmake/external_projects/deepgemm.cmake`. The driver is the
|
||||
build interpreter (which has torch); the *target* Python is only used for
|
||||
its header path and SOABI. This avoids needing torch installed in N venvs
|
||||
to produce N matching `.so` files.
|
||||
|
||||
Usage: python build_deepgemm_C.py <DEEPGEMM_SRC_DIR> <OUTPUT_DIR> <TARGET_PY>
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch.utils import cpp_extension
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
sys.exit(f"usage: {sys.argv[0]} <SRC> <OUT> <TARGET_PY>")
|
||||
|
||||
src = Path(sys.argv[1]).resolve()
|
||||
out = Path(sys.argv[2]).resolve()
|
||||
target_py = sys.argv[3]
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
info = json.loads(
|
||||
subprocess.check_output(
|
||||
[
|
||||
target_py,
|
||||
"-c",
|
||||
"import sysconfig, json; "
|
||||
"print(json.dumps({k: sysconfig.get_config_var(k) "
|
||||
"for k in ('EXT_SUFFIX', 'INCLUDEPY')}))",
|
||||
]
|
||||
).decode()
|
||||
)
|
||||
|
||||
cuda_home = cpp_extension.CUDA_HOME
|
||||
if cuda_home is None:
|
||||
sys.exit("CUDA_HOME not found; cannot build DeepGEMM _C")
|
||||
# CCCL lives outside the standard CUDAToolkit search, mirroring DeepGEMM's
|
||||
# own setup.py.
|
||||
includes = [
|
||||
info["INCLUDEPY"],
|
||||
f"{cuda_home}/include",
|
||||
f"{cuda_home}/include/cccl",
|
||||
str(src / "csrc"),
|
||||
str(src / "deep_gemm/include"),
|
||||
str(src / "third-party/cutlass/include"),
|
||||
str(src / "third-party/cutlass/tools/util/include"),
|
||||
str(src / "third-party/fmt/include"),
|
||||
*cpp_extension.include_paths(device_type="cuda"),
|
||||
]
|
||||
|
||||
cmd = [
|
||||
os.environ.get("CXX", "g++"),
|
||||
"-shared",
|
||||
"-fPIC",
|
||||
"-std=c++20",
|
||||
"-O3",
|
||||
"-g0",
|
||||
"-Wno-psabi",
|
||||
"-Wno-deprecated-declarations",
|
||||
"-DTORCH_API_INCLUDE_EXTENSION_H",
|
||||
"-DTORCH_EXTENSION_NAME=_C",
|
||||
f"-D_GLIBCXX_USE_CXX11_ABI={int(torch.compiled_with_cxx11_abi())}",
|
||||
*(f"-I{p}" for p in includes),
|
||||
str(src / "csrc/python_api.cpp"),
|
||||
*(f"-L{p}" for p in cpp_extension.library_paths(device_type="cuda")),
|
||||
f"-L{cuda_home}/lib64",
|
||||
"-ltorch",
|
||||
"-ltorch_python",
|
||||
"-ltorch_cpu",
|
||||
"-ltorch_cuda",
|
||||
"-lc10",
|
||||
"-lc10_cuda",
|
||||
"-lcudart",
|
||||
"-lnvrtc",
|
||||
"-o",
|
||||
str(out / f"_C{info['EXT_SUFFIX']}"),
|
||||
]
|
||||
print("[build_deepgemm_C] " + " ".join(cmd), flush=True)
|
||||
subprocess.check_call(cmd)
|
||||
@@ -0,0 +1,41 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""Assert the installed vLLM has a `_C.cpython-X.Y-*.so` for every CPython
|
||||
covered by `requires-python`. Fails closed if a Python's `.so` is missing
|
||||
from the wheel — i.e. the regression that surfaced in #41476/#41512.
|
||||
|
||||
Run from a CI test job after vLLM is installed, e.g. the H100 deepgemm
|
||||
kernel tests in .buildkite/test_areas/kernels.yaml.
|
||||
"""
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import regex as re
|
||||
import tomllib
|
||||
|
||||
SO_RE = re.compile(r"^_C\.cpython-(\d)(\d+)-")
|
||||
|
||||
|
||||
def required_pythons() -> list[str]:
|
||||
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
|
||||
spec = tomllib.loads(pyproject.read_text())["project"]["requires-python"]
|
||||
m = re.match(r">=3\.(\d+),<3\.(\d+)", spec)
|
||||
if not m:
|
||||
sys.exit(f"unexpected requires-python format: {spec!r}")
|
||||
return [f"3.{v}" for v in range(int(m[1]), int(m[2]))]
|
||||
|
||||
|
||||
spec = importlib.util.find_spec("vllm.third_party.deep_gemm")
|
||||
if spec is None or spec.origin is None:
|
||||
sys.exit("vllm.third_party.deep_gemm not importable; is vllm installed?")
|
||||
pkg_dir = Path(spec.origin).parent
|
||||
|
||||
found = {f"{m[1]}.{m[2]}" for f in os.listdir(pkg_dir) if (m := SO_RE.match(f))}
|
||||
required = required_pythons()
|
||||
missing = [v for v in required if v not in found]
|
||||
print(f"deepgemm _C: found {sorted(found)}, required {required}, missing {missing}")
|
||||
sys.exit(1 if missing else 0)
|
||||
Executable
+49
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
# Provision bare Python interpreters for the DeepGEMM `_C` per-Python build
|
||||
# and print a colon-separated list of their paths to stdout.
|
||||
#
|
||||
# Each target Python only needs a working interpreter — torch is not
|
||||
# installed since `tools/build_deepgemm_C.py` runs from the build interpreter.
|
||||
# uv re-uses any matching system Python and downloads a managed build
|
||||
# otherwise.
|
||||
#
|
||||
# Usage:
|
||||
# export DEEPGEMM_PYTHON_INTERPRETERS=$(tools/setup_deepgemm_pythons.sh)
|
||||
# python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
|
||||
#
|
||||
# With no args, expands to every CPython covered by `requires-python` in
|
||||
# pyproject.toml. Pass explicit versions (e.g. `3.10 3.11`) to override.
|
||||
#
|
||||
# Skip this script if you don't have uv: set DEEPGEMM_PYTHON_INTERPRETERS
|
||||
# directly to existing interpreter paths. Editable / single-Python builds
|
||||
# don't need the env var at all (cmake falls back to the build interpreter).
|
||||
#
|
||||
# Optional: DEEPGEMM_VENV_PREFIX (default: /tmp/dgenv).
|
||||
set -euo pipefail
|
||||
|
||||
if [ "$#" -eq 0 ]; then
|
||||
# Derive the matrix from `requires-python = ">=3.X,<3.Y"` in pyproject.toml.
|
||||
pyproject="$(dirname "$0")/../pyproject.toml"
|
||||
spec=$(grep -E '^requires-python' "$pyproject" \
|
||||
| grep -oE '>=3\.[0-9]+,<3\.[0-9]+')
|
||||
lo=${spec#>=3.}; lo=${lo%%,*}
|
||||
hi=${spec##*<3.}
|
||||
set -- $(seq "$lo" $((hi - 1)) | sed 's/^/3./')
|
||||
fi
|
||||
|
||||
prefix="${DEEPGEMM_VENV_PREFIX:-/tmp/dgenv}"
|
||||
mkdir -p "$prefix"
|
||||
|
||||
paths=""
|
||||
for V in "$@"; do
|
||||
venv="$prefix/$V"
|
||||
# Force a managed (uv-downloaded) Python so dev headers are bundled.
|
||||
# System Pythons on the build base may lack headers (manylinux's
|
||||
# /opt/python/cpXY-cpXY are off PATH; an apt-installed python3.X often
|
||||
# has no -dev), and the per-Python build needs Python.h.
|
||||
[ -x "$venv/bin/python" ] || \
|
||||
uv venv --python "$V" "$venv" --python-preference only-managed --seed \
|
||||
>/dev/null
|
||||
paths="$paths:$venv/bin/python"
|
||||
done
|
||||
echo "${paths#:}"
|
||||
Reference in New Issue
Block a user