fix deep_gemm & CUDA13

Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
This commit is contained in:
Xiwen Yu 2025-08-05 13:18:33 +08:00
parent 78a55b8b46
commit 271916d196
3 changed files with 10 additions and 3 deletions

2
.gitmodules vendored
View File

@ -25,4 +25,4 @@
url = https://github.com/wjakob/nanobind
[submodule "3rdparty/cppzmq"]
path = 3rdparty/cppzmq
url = https://github.com/zeromq/cppzmq.github
url = https://github.com/zeromq/cppzmq.git

View File

@ -15,6 +15,7 @@ apt install -y libstdc++-14-dev
elif [ $ARCH == "aarch64" ]; then
# to be moved to docker/common/ scripts
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb
@ -23,15 +24,21 @@ wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux
bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
apt update
apt remove -y ibverbs-providers libibverbs1
# fix NXIL
apt remove -y ibverbs-providers libibverbs1 # package version conflict with libibverbs-dev
apt install -y libibverbs-dev
# fix LLVM build
apt install -y libstdc++-14-dev
# wait for https://github.com/NVIDIA/TensorRT-LLM/pull/6588
pip install deep_gemm@git+https://github.com/VALLIS-NERIA/DeepGEMM.git@97d97a20c2ecd53a248ab64242219d780cf822b8 --no-build-isolation
else
echo "Unsupported architecture: $ARCH"
exit 1
fi
# wait for new triton to be published
cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \

View File

@ -61,6 +61,6 @@ etcd3
blake3
llguidance==0.7.29
soundfile
deep_gemm @ git+https://github.com/zongfeijing/DeepGEMM.git@a9d538ef4dff0326fe521c6ca0bfde115703b56a
deep_gemm@git+https://github.com/VALLIS-NERIA/DeepGEMM.git@97d97a20c2ecd53a248ab64242219d780cf822b8
triton==3.3.1; platform_machine == "x86_64"
blobfile