mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Build] DeepGEMM: trim comments, add integration notes + TODOs (#42429)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -53,14 +53,30 @@ cuda_archs_loose_intersection(DEEPGEMM_ARCHS
|
||||
if(DEEPGEMM_ARCHS)
|
||||
message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")
|
||||
|
||||
# Build _C once per interpreter in DEEPGEMM_PYTHON_INTERPRETERS (":"-
|
||||
# separated paths) so the wheel imports cleanly on every supported Python.
|
||||
# Unset → fall back to the build interpreter (editable / source builds).
|
||||
# The compile is delegated to tools/build_deepgemm_C.py and always runs
|
||||
# against the build interpreter's torch — target Pythons don't need torch.
|
||||
# Note: empty-but-set env vars are still DEFINED in cmake; treat empty as
|
||||
# unset so an empty interpreter list falls back to the build interpreter
|
||||
# rather than silently skipping the per-Python build.
|
||||
#
|
||||
# DeepGEMM integration notes
|
||||
# --------------------------
|
||||
# We vendor DeepGEMM into vllm/third_party/deep_gemm/ and bundle a
|
||||
# `_C.cpython-X.Y-*.so` for every CPython in `requires-python`. The
|
||||
# per-Python build is delegated to tools/build_deepgemm_C.py.
|
||||
#
|
||||
# Why per-Python: DeepGEMM's binding uses PYBIND11_MODULE, which links
|
||||
# private CPython symbols — a single `_C.abi3.so` is not viable today
|
||||
# (see #41476 / #41512 for the failed attempt).
|
||||
#
|
||||
# TODOs (tracked in vllm-project/vllm#42431):
|
||||
# - Replace DeepGEMM's pybind11 binding with a TORCH_LIBRARY + shim
|
||||
# binding (cf. vllm-flash-attention/csrc/common/pytorch_shim.h) to
|
||||
# collapse to one `_C.abi3.so`. Needs either an upstream change or
|
||||
# a maintained binding fork in vLLM.
|
||||
# - AOT-compile DeepGEMM's CUDA kernels instead of runtime JIT to drop
|
||||
# the vendored CUTLASS/CCCL headers and the CUDA-toolkit-at-runtime
|
||||
# requirement.
|
||||
#
|
||||
|
||||
# DEEPGEMM_PYTHON_INTERPRETERS: ":"-separated target Python paths.
|
||||
# Empty/unset → fall back to the build interpreter (editable installs).
|
||||
# (Empty-but-set env vars test as DEFINED in cmake — treat as unset.)
|
||||
if(NOT "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}" STREQUAL "")
|
||||
string(REPLACE ":" ";" _dg_pythons "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}")
|
||||
else()
|
||||
@@ -68,10 +84,8 @@ if(DEEPGEMM_ARCHS)
|
||||
endif()
|
||||
message(STATUS "DeepGEMM _C will be built for: ${_dg_pythons}")
|
||||
|
||||
# Header set fed to add_custom_command's DEPENDS so a header-only edit
|
||||
# (in upstream DeepGEMM or its vendored cutlass/fmt) re-triggers the
|
||||
# rebuild. add_custom_command does no implicit header scanning, unlike
|
||||
# add_library.
|
||||
# add_custom_command does no implicit header scanning; glob explicitly so
|
||||
# header-only edits in DeepGEMM/cutlass/fmt re-trigger the rebuild.
|
||||
file(GLOB_RECURSE _dg_headers
|
||||
"${deepgemm_SOURCE_DIR}/csrc/*.h"
|
||||
"${deepgemm_SOURCE_DIR}/csrc/*.hpp"
|
||||
@@ -88,8 +102,7 @@ if(DEEPGEMM_ARCHS)
|
||||
OUTPUT_VARIABLE _dg_soabi
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
COMMAND_ERROR_IS_FATAL ANY)
|
||||
# Dedup so duplicate paths (or two paths resolving to the same CPython)
|
||||
# don't register conflicting build rules.
|
||||
# Dedup interpreters that resolve to the same CPython.
|
||||
if(_dg_soabi IN_LIST _dg_seen_soabis)
|
||||
continue()
|
||||
endif()
|
||||
|
||||
+3
-4
@@ -301,10 +301,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
python3 use_existing_torch.py --prefix; \
|
||||
fi
|
||||
|
||||
# Provision a bare interpreter for each CPython covered by `requires-python`
|
||||
# so DeepGEMM `_C` is built once per Python and bundled side-by-side in the
|
||||
# wheel; cmake reads DEEPGEMM_PYTHON_INTERPRETERS in deepgemm.cmake's
|
||||
# foreach loop. The matrix is derived from pyproject.toml.
|
||||
# Provision one bare Python per `requires-python` entry; cmake reads
|
||||
# DEEPGEMM_PYTHON_INTERPRETERS to build DeepGEMM `_C` for each. See
|
||||
# cmake/external_projects/deepgemm.cmake for the full picture.
|
||||
COPY tools/setup_deepgemm_pythons.sh tools/build_deepgemm_C.py tools/
|
||||
ENV DEEPGEMM_VENV_PREFIX=/opt/dgenv
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Build DeepGEMM's `_C` pybind11 extension for a target Python.
|
||||
"""Build DeepGEMM's `_C` pybind11 extension for <TARGET_PY>.
|
||||
|
||||
Driven from `cmake/external_projects/deepgemm.cmake`. The driver is the
|
||||
build interpreter (which has torch); the *target* Python is only used for
|
||||
its header path and SOABI. This avoids needing torch installed in N venvs
|
||||
to produce N matching `.so` files.
|
||||
Driven from cmake/external_projects/deepgemm.cmake. The driver runs against
|
||||
the build interpreter's torch; <TARGET_PY> is only consulted for INCLUDEPY
|
||||
and SOABI, so target venvs don't need torch installed.
|
||||
|
||||
Usage: python build_deepgemm_C.py <DEEPGEMM_SRC_DIR> <OUTPUT_DIR> <TARGET_PY>
|
||||
"""
|
||||
@@ -42,8 +41,7 @@ info = json.loads(
|
||||
cuda_home = cpp_extension.CUDA_HOME
|
||||
if cuda_home is None:
|
||||
sys.exit("CUDA_HOME not found; cannot build DeepGEMM _C")
|
||||
# CCCL lives outside the standard CUDAToolkit search, mirroring DeepGEMM's
|
||||
# own setup.py.
|
||||
# CCCL lives outside the standard CUDAToolkit search (mirrors DeepGEMM's setup.py).
|
||||
includes = [
|
||||
info["INCLUDEPY"],
|
||||
f"{cuda_home}/include",
|
||||
|
||||
@@ -1,23 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
# Provision bare Python interpreters for the DeepGEMM `_C` per-Python build
|
||||
# and print a colon-separated list of their paths to stdout.
|
||||
#
|
||||
# Each target Python only needs a working interpreter — torch is not
|
||||
# installed since `tools/build_deepgemm_C.py` runs from the build interpreter.
|
||||
# uv re-uses any matching system Python and downloads a managed build
|
||||
# otherwise.
|
||||
# Provision one bare Python per `requires-python` entry (or per argument) and
|
||||
# print their paths as ":"-separated DEEPGEMM_PYTHON_INTERPRETERS. Skip this
|
||||
# entirely if you already have interpreter paths.
|
||||
#
|
||||
# Usage:
|
||||
# export DEEPGEMM_PYTHON_INTERPRETERS=$(tools/setup_deepgemm_pythons.sh)
|
||||
# python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
|
||||
#
|
||||
# With no args, expands to every CPython covered by `requires-python` in
|
||||
# pyproject.toml. Pass explicit versions (e.g. `3.10 3.11`) to override.
|
||||
#
|
||||
# Skip this script if you don't have uv: set DEEPGEMM_PYTHON_INTERPRETERS
|
||||
# directly to existing interpreter paths. Editable / single-Python builds
|
||||
# don't need the env var at all (cmake falls back to the build interpreter).
|
||||
#
|
||||
# Optional: DEEPGEMM_VENV_PREFIX (default: /tmp/dgenv).
|
||||
set -euo pipefail
|
||||
|
||||
@@ -37,10 +26,8 @@ mkdir -p "$prefix"
|
||||
paths=""
|
||||
for V in "$@"; do
|
||||
venv="$prefix/$V"
|
||||
# Force a managed (uv-downloaded) Python so dev headers are bundled.
|
||||
# System Pythons on the build base may lack headers (manylinux's
|
||||
# /opt/python/cpXY-cpXY are off PATH; an apt-installed python3.X often
|
||||
# has no -dev), and the per-Python build needs Python.h.
|
||||
# uv-managed Python ensures Python.h is present; system 3.X-dev packages
|
||||
# on the manylinux / Ubuntu build bases are not always installed.
|
||||
[ -x "$venv/bin/python" ] || \
|
||||
uv venv --python "$V" "$venv" --python-preference only-managed --seed \
|
||||
>/dev/null
|
||||
|
||||
Reference in New Issue
Block a user