mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
upgrade to base image and new TRT, fix many dependency issues
Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
This commit is contained in:
parent
3036d49071
commit
303604f82d
2
3rdparty/json
vendored
2
3rdparty/json
vendored
@ -1 +1 @@
|
||||
Subproject commit bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d
|
||||
Subproject commit 55f93686c01528224f448c19128836e7df245f72
|
||||
47
bringup_fix.sh
Normal file
47
bringup_fix.sh
Normal file
@ -0,0 +1,47 @@
|
||||
ARCH=$(uname -m)
|
||||
if [ $ARCH == "x86_64" ]; then
|
||||
|
||||
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
|
||||
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \
|
||||
rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb
|
||||
|
||||
wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \
|
||||
bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
|
||||
|
||||
apt update
|
||||
apt remove -y ibverbs-providers libibverbs1
|
||||
apt install -y libibverbs-dev
|
||||
apt install -y libstdc++-14-dev
|
||||
|
||||
elif [ $ARCH == "aarch64" ]; then
|
||||
|
||||
wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
|
||||
dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \
|
||||
rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb
|
||||
|
||||
wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \
|
||||
bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir
|
||||
|
||||
apt update
|
||||
apt remove -y ibverbs-providers libibverbs1
|
||||
apt install -y libibverbs-dev
|
||||
apt install -y libstdc++-14-dev
|
||||
|
||||
else
|
||||
echo "Unsupported architecture: $ARCH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
ls -la | grep pytorch_triton && \
|
||||
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
echo "Current directory: $(pwd)" && \
|
||||
echo "Files in directory:" && \
|
||||
ls -la && \
|
||||
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
echo "METADATA after update:" && \
|
||||
grep "^Name:" METADATA
|
||||
|
||||
# pip install git+https://github.com/triton-lang/triton.git@main
|
||||
@ -472,7 +472,6 @@ print(os.path.dirname(torch.__file__),end='');"
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
else()
|
||||
if(NOT WIN32)
|
||||
if(NOT USE_CXX11_ABI)
|
||||
|
||||
@ -127,6 +127,8 @@ ExternalProject_Add(
|
||||
${DEEP_EP_SOURCE_DIR}/third-party/nvshmem.patch
|
||||
COMMAND sed "s/TRANSPORT_VERSION_MAJOR 3/TRANSPORT_VERSION_MAJOR 103/" -i
|
||||
src/CMakeLists.txt
|
||||
COMMAND sed "s/_STANDARD 11/_STANDARD 17/" -i src/device/CMakeLists.txt
|
||||
COMMAND sed "s/_STANDARD 11/_STANDARD 17/" -i src/CMakeLists.txt
|
||||
COMMAND patch -p1 --forward --batch -i
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nvshmem_fast_build.patch
|
||||
CMAKE_CACHE_ARGS
|
||||
|
||||
@ -63,9 +63,9 @@ DataType Tensor::getDataType() const
|
||||
case nvinfer1::DataType::kBF16: return DataType::kBF16;
|
||||
case nvinfer1::DataType::kINT64: return DataType::kINT64;
|
||||
case nvinfer1::DataType::kINT4: [[fallthrough]] /* do nothing */;
|
||||
case nvinfer1::DataType::kFP4: /* do nothing */;
|
||||
case nvinfer1::DataType::kFP4: [[fallthrough]] /* do nothing */;
|
||||
default: TLLM_THROW("Unsupported data type");
|
||||
}
|
||||
TLLM_THROW("Unsupported data type");
|
||||
}
|
||||
|
||||
MemoryType Tensor::getMemoryType() const
|
||||
|
||||
@ -101,9 +101,9 @@ char const* IBuffer::getDataTypeName(DataType dataType)
|
||||
case nvinfer1::DataType::kINT8: return DataTypeTraits<nvinfer1::DataType::kINT8>::name;
|
||||
case nvinfer1::DataType::kFP8: return DataTypeTraits<nvinfer1::DataType::kFP8>::name;
|
||||
case nvinfer1::DataType::kINT4: [[fallthrough]] /* do nothing */;
|
||||
case nvinfer1::DataType::kFP4: /* do nothing */;
|
||||
case nvinfer1::DataType::kFP4: [[fallthrough]] /* do nothing */;
|
||||
default: TLLM_THROW("Unknown data type");
|
||||
}
|
||||
TLLM_THROW("Unknown data type");
|
||||
}
|
||||
|
||||
char const* IBuffer::getDataTypeName() const
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Multi-stage Dockerfile
|
||||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
|
||||
ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch
|
||||
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
|
||||
ARG BASE_TAG=25.06-py3
|
||||
ARG BASE_TAG=25.08-py3.32224057-base
|
||||
ARG TRITON_BASE_TAG=25.06-py3
|
||||
ARG DEVEL_IMAGE=devel
|
||||
|
||||
@ -74,8 +74,10 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
|
||||
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
|
||||
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
|
||||
|
||||
COPY bringup_fix.sh bringup_fix.sh
|
||||
RUN bash ./bringup_fix.sh && rm bringup_fix.sh
|
||||
|
||||
# WARs against security issues inherited from pytorch:25.06
|
||||
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
|
||||
RUN pip3 install --upgrade --no-cache-dir \
|
||||
"protobuf>=4.25.8"
|
||||
|
||||
@ -103,7 +105,6 @@ RUN bash ./install_nixl.sh && rm install_nixl.sh
|
||||
COPY docker/common/install_etcd.sh install_etcd.sh
|
||||
RUN bash ./install_etcd.sh && rm install_etcd.sh
|
||||
|
||||
|
||||
FROM ${DEVEL_IMAGE} AS wheel
|
||||
WORKDIR /src/tensorrt_llm
|
||||
COPY benchmarks benchmarks
|
||||
|
||||
@ -49,31 +49,31 @@ install_ubuntu_requirements() {
|
||||
rm cuda-keyring_1.1-1_all.deb
|
||||
|
||||
apt-get update
|
||||
if [[ $(apt list --installed | grep libcudnn9) ]]; then
|
||||
apt-get remove --purge -y libcudnn9*
|
||||
fi
|
||||
if [[ $(apt list --installed | grep libnccl) ]]; then
|
||||
apt-get remove --purge -y --allow-change-held-packages libnccl*
|
||||
fi
|
||||
if [[ $(apt list --installed | grep libcublas) ]]; then
|
||||
apt-get remove --purge -y --allow-change-held-packages libcublas*
|
||||
fi
|
||||
if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
|
||||
apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
|
||||
fi
|
||||
# if [[ $(apt list --installed | grep libcudnn9) ]]; then
|
||||
# apt-get remove --purge -y libcudnn9*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libnccl) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libnccl*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep libcublas) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages libcublas*
|
||||
# fi
|
||||
# if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
|
||||
# apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
|
||||
# fi
|
||||
|
||||
CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
# CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
# NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
|
||||
|
||||
apt-get install -y --no-install-recommends \
|
||||
libcudnn9-cuda-12=${CUDNN_VER} \
|
||||
libcudnn9-dev-cuda-12=${CUDNN_VER} \
|
||||
libcudnn9-headers-cuda-12=${CUDNN_VER} \
|
||||
libnccl2=${NCCL_VER} \
|
||||
libnccl-dev=${NCCL_VER} \
|
||||
libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
|
||||
# apt-get install -y --no-install-recommends \
|
||||
# libcudnn9-cuda-12=${CUDNN_VER} \
|
||||
# libcudnn9-dev-cuda-12=${CUDNN_VER} \
|
||||
# libcudnn9-headers-cuda-12=${CUDNN_VER} \
|
||||
# libnccl2=${NCCL_VER} \
|
||||
# libnccl-dev=${NCCL_VER} \
|
||||
# libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
|
||||
# cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
|
||||
|
||||
apt-get clean
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
@ -130,12 +130,17 @@ install_tensorrt() {
|
||||
if [ -z "$ARCH" ];then ARCH=$(uname -m);fi
|
||||
if [ "$ARCH" = "arm64" ];then ARCH="aarch64";fi
|
||||
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
|
||||
RELEASE_URL_TRT="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${TRT_VER_SHORT}/tars/TensorRT-${TRT_VER}.Linux.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz"
|
||||
|
||||
if [ "$ARCH" = "x86_64" ]; then
|
||||
RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-x64-manylinux_2_28/tar/TensorRT-10.14.0.19.Linux.x86_64-gnu.cuda-13.0.tar.gz"
|
||||
else
|
||||
RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-aarch64-manylinux_2_35/tar/TensorRT-10.14.0.19.Ubuntu-22.04.aarch64-gnu.cuda-13.0.tar.gz"
|
||||
fi
|
||||
fi
|
||||
|
||||
wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
|
||||
tar -xf /tmp/TensorRT.tar -C /usr/local/
|
||||
mv /usr/local/TensorRT-${TRT_VER} /usr/local/tensorrt
|
||||
mv /usr/local/TensorRT-* /usr/local/tensorrt
|
||||
pip3 install --no-cache-dir /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl
|
||||
rm -rf /tmp/TensorRT.tar
|
||||
echo 'export LD_LIBRARY_PATH=/usr/local/tensorrt/lib:$LD_LIBRARY_PATH' >> "${ENV}"
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#
|
||||
# NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that
|
||||
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_x86_64
|
||||
LLM_SBSA_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_sbsa
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
|
||||
@ -21,7 +21,7 @@ pandas
|
||||
h5py==3.12.1
|
||||
StrEnum
|
||||
sentencepiece>=0.1.99
|
||||
tensorrt~=10.11.0
|
||||
tensorrt
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0.
|
||||
torch>=2.7.1,<=2.8.0a0
|
||||
torchvision
|
||||
|
||||
@ -289,7 +289,7 @@ target_link_libraries(
|
||||
FetchContent_Declare(
|
||||
json
|
||||
GIT_REPOSITORY https://github.com/nlohmann/json.git
|
||||
GIT_TAG v3.11.2)
|
||||
GIT_TAG v3.12.0)
|
||||
|
||||
FetchContent_MakeAvailable(json)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user