diff --git a/3rdparty/json b/3rdparty/json index bc889afb4c..55f93686c0 160000 --- a/3rdparty/json +++ b/3rdparty/json @@ -1 +1 @@ -Subproject commit bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d +Subproject commit 55f93686c01528224f448c19128836e7df245f72 diff --git a/bringup_fix.sh b/bringup_fix.sh new file mode 100644 index 0000000000..9d52d15748 --- /dev/null +++ b/bringup_fix.sh @@ -0,0 +1,47 @@ +ARCH=$(uname -m) +if [ $ARCH == "x86_64" ]; then + +wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-x86_64/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \ + dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb && \ + rm cuda-nvrtc-dev-13-0_13.0.48-1_amd64.deb + +wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-x86_64.sh && \ + bash cmake-4.0.3-linux-x86_64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir + +apt update +apt remove -y ibverbs-providers libibverbs1 +apt install -y libibverbs-dev +apt install -y libstdc++-14-dev + +elif [ $ARCH == "aarch64" ]; then + +wget https://urm.nvidia.com/artifactory/sw-gpu-cuda-installer-generic-local/packaging/r13.0/cuda_nvrtc/linux-sbsa/13.0.48/cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \ + dpkg -i cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb && \ + rm cuda-nvrtc-dev-13-0_13.0.48-1_arm64.deb + +wget https://github.com/Kitware/CMake/releases/download/v4.0.3/cmake-4.0.3-linux-aarch64.sh && \ + bash cmake-4.0.3-linux-aarch64.sh --skip-license --prefix=/usr/local/cmake --exclude-subdir + +apt update +apt remove -y ibverbs-providers libibverbs1 +apt install -y libibverbs-dev +apt install -y libstdc++-14-dev + +else + echo "Unsupported architecture: $ARCH" + exit 1 +fi + +cd /usr/local/lib/python3.12/dist-packages/ && \ + ls -la | grep pytorch_triton && \ + mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ + cd triton-3.3.1+gitc8757738.dist-info && \ + echo "Current directory: $(pwd)" && \ + echo "Files in directory:" && \ + ls -la && \ + sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ + sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ + echo "METADATA after update:" && \ + grep "^Name:" METADATA + +# pip install git+https://github.com/triton-lang/triton.git@main diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6732db6eaa..dd3f5423fd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -472,7 +472,6 @@ print(os.path.dirname(torch.__file__),end='');" endif() endif() endif() - else() if(NOT WIN32) if(NOT USE_CXX11_ABI) diff --git a/cpp/tensorrt_llm/deep_ep/CMakeLists.txt b/cpp/tensorrt_llm/deep_ep/CMakeLists.txt index 088391aef4..68d74a8858 100644 --- a/cpp/tensorrt_llm/deep_ep/CMakeLists.txt +++ b/cpp/tensorrt_llm/deep_ep/CMakeLists.txt @@ -127,6 +127,8 @@ ExternalProject_Add( ${DEEP_EP_SOURCE_DIR}/third-party/nvshmem.patch COMMAND sed "s/TRANSPORT_VERSION_MAJOR 3/TRANSPORT_VERSION_MAJOR 103/" -i src/CMakeLists.txt + COMMAND sed "s/_STANDARD 11/_STANDARD 17/" -i src/device/CMakeLists.txt + COMMAND sed "s/_STANDARD 11/_STANDARD 17/" -i src/CMakeLists.txt COMMAND patch -p1 --forward --batch -i ${CMAKE_CURRENT_SOURCE_DIR}/nvshmem_fast_build.patch CMAKE_CACHE_ARGS diff --git a/cpp/tensorrt_llm/executor/tensor.cpp b/cpp/tensorrt_llm/executor/tensor.cpp index 8b7e15ac17..c38feb0e34 100644 --- a/cpp/tensorrt_llm/executor/tensor.cpp +++ b/cpp/tensorrt_llm/executor/tensor.cpp @@ -63,9 +63,9 @@ DataType Tensor::getDataType() const case nvinfer1::DataType::kBF16: return DataType::kBF16; case nvinfer1::DataType::kINT64: return DataType::kINT64; case nvinfer1::DataType::kINT4: [[fallthrough]] /* do nothing */; - case nvinfer1::DataType::kFP4: /* do nothing */; + case nvinfer1::DataType::kFP4: [[fallthrough]] /* do nothing */; + default: TLLM_THROW("Unsupported data type"); } - TLLM_THROW("Unsupported data type"); } MemoryType Tensor::getMemoryType() const diff --git a/cpp/tensorrt_llm/runtime/iBuffer.cpp b/cpp/tensorrt_llm/runtime/iBuffer.cpp index f676fee088..77707a0e4c 100644 --- a/cpp/tensorrt_llm/runtime/iBuffer.cpp +++ b/cpp/tensorrt_llm/runtime/iBuffer.cpp @@ -101,9 +101,9 @@ char const* IBuffer::getDataTypeName(DataType dataType) case nvinfer1::DataType::kINT8: return DataTypeTraits::name; case nvinfer1::DataType::kFP8: return DataTypeTraits::name; case nvinfer1::DataType::kINT4: [[fallthrough]] /* do nothing */; - case nvinfer1::DataType::kFP4: /* do nothing */; + case nvinfer1::DataType::kFP4: [[fallthrough]] /* do nothing */; + default: TLLM_THROW("Unknown data type"); } - TLLM_THROW("Unknown data type"); } char const* IBuffer::getDataTypeName() const diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index c832481da9..c41b600d1a 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -1,7 +1,7 @@ # Multi-stage Dockerfile -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch +ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver -ARG BASE_TAG=25.06-py3 +ARG BASE_TAG=25.08-py3.32224057-base ARG TRITON_BASE_TAG=25.06-py3 ARG DEVEL_IMAGE=devel @@ -74,8 +74,10 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999" RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/ RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir +COPY bringup_fix.sh bringup_fix.sh +RUN bash ./bringup_fix.sh && rm bringup_fix.sh + # WARs against security issues inherited from pytorch:25.06 -# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7 RUN pip3 install --upgrade --no-cache-dir \ "protobuf>=4.25.8" @@ -103,7 +105,6 @@ RUN bash ./install_nixl.sh && rm install_nixl.sh COPY docker/common/install_etcd.sh install_etcd.sh RUN bash ./install_etcd.sh && rm install_etcd.sh - FROM ${DEVEL_IMAGE} AS wheel WORKDIR /src/tensorrt_llm COPY benchmarks benchmarks diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh index 6d118b62c4..e2e3c6218d 100644 --- a/docker/common/install_tensorrt.sh +++ b/docker/common/install_tensorrt.sh @@ -49,31 +49,31 @@ install_ubuntu_requirements() { rm cuda-keyring_1.1-1_all.deb apt-get update - if [[ $(apt list --installed | grep libcudnn9) ]]; then - apt-get remove --purge -y libcudnn9* - fi - if [[ $(apt list --installed | grep libnccl) ]]; then - apt-get remove --purge -y --allow-change-held-packages libnccl* - fi - if [[ $(apt list --installed | grep libcublas) ]]; then - apt-get remove --purge -y --allow-change-held-packages libcublas* - fi - if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then - apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* - fi + # if [[ $(apt list --installed | grep libcudnn9) ]]; then + # apt-get remove --purge -y libcudnn9* + # fi + # if [[ $(apt list --installed | grep libnccl) ]]; then + # apt-get remove --purge -y --allow-change-held-packages libnccl* + # fi + # if [[ $(apt list --installed | grep libcublas) ]]; then + # apt-get remove --purge -y --allow-change-held-packages libcublas* + # fi + # if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then + # apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* + # fi - CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') - NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') + # CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') + # NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g') - apt-get install -y --no-install-recommends \ - libcudnn9-cuda-12=${CUDNN_VER} \ - libcudnn9-dev-cuda-12=${CUDNN_VER} \ - libcudnn9-headers-cuda-12=${CUDNN_VER} \ - libnccl2=${NCCL_VER} \ - libnccl-dev=${NCCL_VER} \ - libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ - libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ - cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} + # apt-get install -y --no-install-recommends \ + # libcudnn9-cuda-12=${CUDNN_VER} \ + # libcudnn9-dev-cuda-12=${CUDNN_VER} \ + # libcudnn9-headers-cuda-12=${CUDNN_VER} \ + # libnccl2=${NCCL_VER} \ + # libnccl-dev=${NCCL_VER} \ + # libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ + # libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \ + # cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} apt-get clean rm -rf /var/lib/apt/lists/* @@ -130,12 +130,17 @@ install_tensorrt() { if [ -z "$ARCH" ];then ARCH=$(uname -m);fi if [ "$ARCH" = "arm64" ];then ARCH="aarch64";fi if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi - RELEASE_URL_TRT="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${TRT_VER_SHORT}/tars/TensorRT-${TRT_VER}.Linux.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz" + + if [ "$ARCH" = "x86_64" ]; then + RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-x64-manylinux_2_28/tar/TensorRT-10.14.0.19.Linux.x86_64-gnu.cuda-13.0.tar.gz" + else + RELEASE_URL_TRT="http://cuda-repo/release-candidates/Libraries/TensorRT/v10.14/10.14.0.19-6374d0f7/13.0-r580/Linux-aarch64-manylinux_2_35/tar/TensorRT-10.14.0.19.Ubuntu-22.04.aarch64-gnu.cuda-13.0.tar.gz" + fi fi wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar tar -xf /tmp/TensorRT.tar -C /usr/local/ - mv /usr/local/TensorRT-${TRT_VER} /usr/local/tensorrt + mv /usr/local/TensorRT-* /usr/local/tensorrt pip3 install --no-cache-dir /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl rm -rf /tmp/TensorRT.tar echo 'export LD_LIBRARY_PATH=/usr/local/tensorrt/lib:$LD_LIBRARY_PATH' >> "${ENV}" diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index dee2ee7218..87a2808834 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090 +LLM_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_x86_64 +LLM_SBSA_DOCKER_IMAGE=gitlab-master.nvidia.com:5005/xiweny/images:gb110_bringup_sbsa LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090 diff --git a/requirements.txt b/requirements.txt index d5a3a8ecb9..9612f7d1f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pandas h5py==3.12.1 StrEnum sentencepiece>=0.1.99 -tensorrt~=10.11.0 +tensorrt # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0. torch>=2.7.1,<=2.8.0a0 torchvision diff --git a/triton_backend/inflight_batcher_llm/CMakeLists.txt b/triton_backend/inflight_batcher_llm/CMakeLists.txt index 62da21994e..0f26015922 100644 --- a/triton_backend/inflight_batcher_llm/CMakeLists.txt +++ b/triton_backend/inflight_batcher_llm/CMakeLists.txt @@ -289,7 +289,7 @@ target_link_libraries( FetchContent_Declare( json GIT_REPOSITORY https://github.com/nlohmann/json.git - GIT_TAG v3.11.2) + GIT_TAG v3.12.0) FetchContent_MakeAvailable(json)