mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
Update Flashinfer to 0.6.6 (#36768)
Signed-off-by: Dimitrios Bariamis <12195802+dbari@users.noreply.github.com> Co-authored-by: Dimitrios Bariamis <12195802+dbari@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
bdc2343454
commit
cc16b24b17
+1
-1
@@ -586,7 +586,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
# This is ~1.1GB and only changes when FlashInfer version bumps
|
||||
# https://docs.flashinfer.ai/installation.html
|
||||
# From versions.json: .flashinfer.version
|
||||
ARG FLASHINFER_VERSION=0.6.4
|
||||
ARG FLASHINFER_VERSION=0.6.6
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system flashinfer-cubin==${FLASHINFER_VERSION} \
|
||||
&& uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
|
||||
|
||||
@@ -217,13 +217,13 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.
|
||||
|
||||
|
||||
# build flashinfer for torch nightly from source around 10 mins
|
||||
# release version: v0.6.4
|
||||
# release version: v0.6.6
|
||||
# todo(elainewy): cache flashinfer build result for faster build
|
||||
ENV CCACHE_DIR=/root/.cache/ccache
|
||||
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
echo "git clone flashinfer..." \
|
||||
&& git clone --depth 1 --branch v0.6.4 --recursive https://github.com/flashinfer-ai/flashinfer.git \
|
||||
&& git clone --depth 1 --branch v0.6.6 --recursive https://github.com/flashinfer-ai/flashinfer.git \
|
||||
&& cd flashinfer \
|
||||
&& git submodule update --init --recursive \
|
||||
&& echo "finish git clone flashinfer..." \
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
"default": "true"
|
||||
},
|
||||
"FLASHINFER_VERSION": {
|
||||
"default": "0.6.4"
|
||||
"default": "0.6.6"
|
||||
},
|
||||
"GDRCOPY_CUDA_VERSION": {
|
||||
"default": "12.8"
|
||||
|
||||
@@ -9,7 +9,7 @@ torchaudio==2.10.0
|
||||
# These must be updated alongside torch
|
||||
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
||||
# FlashInfer should be updated together with the Dockerfile
|
||||
flashinfer-python==0.6.4
|
||||
flashinfer-python==0.6.6
|
||||
# Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
|
||||
# breaking changes in 1.19.0
|
||||
nvidia-cudnn-frontend>=1.13.0,<1.19.0
|
||||
|
||||
@@ -41,7 +41,7 @@ if printf '%s\n' "${FILES[@]}" | grep -q "^docker/Dockerfile$"; then
|
||||
--rm \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
--workdir /workspace \
|
||||
--volume "$(pwd)":/workspace \
|
||||
--volume "$(pwd -P)":/workspace \
|
||||
ghcr.io/patrickhoefler/dockerfilegraph:alpine \
|
||||
--output png \
|
||||
--dpi 200 \
|
||||
|
||||
Reference in New Issue
Block a user