[Bugfix] Install nvidia-cutlass-dsl[cu13] extra on CUDA 13 platforms (#42438)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
This commit is contained in:
Jiangyun Zhu
2026-05-13 16:57:21 +08:00
committed by GitHub
parent 9ce74042d3
commit 140dc2ec30
4 changed files with 14 additions and 2 deletions
+3
View File
@@ -9,6 +9,9 @@ PATH=${cuda_home}/bin:$PATH
LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
# Install requirements
if [ "$(echo $2 | cut -d. -f1)" = "12" ]; then
sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt
fi
$python_executable -m pip install -r requirements/build/cuda.txt -r requirements/cuda.txt
# Limit the number of parallel jobs to avoid OOM
+7 -1
View File
@@ -199,7 +199,10 @@ COPY requirements/cuda.txt requirements/cuda.txt
COPY use_existing_torch.py use_existing_torch.py
COPY pyproject.toml pyproject.toml
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt; \
fi \
&& if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing torch nightly..." \
&& uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
@@ -626,6 +629,9 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
COPY requirements/common.txt /tmp/common.txt
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; \
fi && \
uv pip install --system -r /tmp/requirements-cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
rm /tmp/requirements-cuda.txt /tmp/common.txt
+1 -1
View File
@@ -21,5 +21,5 @@ nvidia-cudnn-frontend>=1.13.0,<1.19.0
fastsafetensors >= 0.2.2
# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
nvidia-cutlass-dsl>=4.4.2
nvidia-cutlass-dsl[cu13]>=4.4.2
quack-kernels>=0.3.3
+3
View File
@@ -970,6 +970,9 @@ def get_requirements() -> list[str]:
# vllm-flash-attn is built only for CUDA 12.x.
# Skip for other versions.
continue
if "nvidia-cutlass-dsl[cu13]" in req and cuda_major == "12":
# [cu13] extra is the default; strip it on CUDA 12 builds.
req = req.replace("nvidia-cutlass-dsl[cu13]", "nvidia-cutlass-dsl")
modified_requirements.append(req)
requirements = modified_requirements
elif _is_hip():