chore: [TRTLLM-325][infra] Prepare for NGC release - reduce size of the docker images (#3990)

* chore: reduce size of the docker images Signed-off-by: Martin Marciniszyn Mehringer <11665257+martinmarciniszyn@users.noreply.github.com> * Finish the renaming script and run with new images. Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> * Fix installation of GCC toolset for Rocky Linux Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> * Upgrade to new docker images Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com> --------- Signed-off-by: Martin Marciniszyn Mehringer <11665257+martinmarciniszyn@users.noreply.github.com> Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-05-09 13:31:29 +02:00 · 2025-05-09 13:31:29 +02:00 · d0e672f96d
commit d0e672f96d
parent bf5b2a2e0a
14 changed files with 319 additions and 68 deletions
--- a/docker/Dockerfile.multi
+++ b/docker/Dockerfile.multi
@ -9,6 +9,10 @@ FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

 FROM ${BASE_IMAGE}:${BASE_TAG} AS base

+# Add NVIDIA EULA and AI Terms labels
+LABEL com.nvidia.eula="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/"
+LABEL com.nvidia.ai-terms="https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/"
+
 # https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
 # The default values come from `nvcr.io/nvidia/pytorch`
 ENV BASH_ENV=${BASH_ENV:-/etc/bash.bashrc}
@ -85,7 +89,7 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh

 # Install OpenCV with FFMPEG support
 RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
-RUN pip3 install opencv-python-headless --force-reinstall --no-deps
+RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir

 FROM ${DEVEL_IMAGE} AS wheel
 WORKDIR /src/tensorrt_llm
@ -114,18 +118,20 @@ WORKDIR /app/tensorrt_llm
 COPY --from=wheel /src/tensorrt_llm/build/tensorrt_llm*.whl .
 RUN --mount=type=cache,target=/root/.cache/pip \
    pip install tensorrt_llm*.whl && \
-    rm tensorrt_llm*.whl
+    rm tensorrt_llm*.whl && \
+    pip cache purge
+
 COPY README.md ./
 COPY docs docs
 COPY cpp/include include
+
 RUN ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/bin")') bin && \
    test -f bin/executorWorker && \
    ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/libs")') lib && \
    test -f lib/libnvinfer_plugin_tensorrt_llm.so && \
    echo "/app/tensorrt_llm/lib" > /etc/ld.so.conf.d/tensorrt_llm.conf && \
-    ldconfig
-# Test LD configuration
-RUN ! ( ldd -v bin/executorWorker | grep tensorrt_llm | grep -q "not found" )
+    ldconfig && \
+    ! ( ldd -v bin/executorWorker | grep tensorrt_llm | grep -q "not found" )

 ARG SRC_DIR=/src/tensorrt_llm
 COPY --from=wheel ${SRC_DIR}/benchmarks benchmarks
@ -136,6 +142,7 @@ COPY --from=wheel \
     ${CPP_BUILD_DIR}/benchmarks/gptSessionBenchmark \
     ${CPP_BUILD_DIR}/benchmarks/disaggServerBenchmark \
     benchmarks/cpp/
+
 COPY examples examples
 RUN chmod -R a+w examples && \
    rm -v \
@ -143,7 +150,9 @@ RUN chmod -R a+w examples && \
      benchmarks/cpp/gptManagerBenchmark.cpp \
      benchmarks/cpp/gptSessionBenchmark.cpp \
      benchmarks/cpp/disaggServerBenchmark.cpp \
-      benchmarks/cpp/CMakeLists.txt
+      benchmarks/cpp/CMakeLists.txt && \
+    rm -rf /root/.cache/pip
+
 ARG GIT_COMMIT
 ARG TRT_LLM_VER
 ENV TRT_LLM_GIT_COMMIT=${GIT_COMMIT} \
--- a/docker/common/install_base.sh
+++ b/docker/common/install_base.sh
@ -19,6 +19,29 @@ set_bash_env() {
  fi
 }

+cleanup() {
+  # Clean up apt/dnf cache
+  if [ -f /etc/debian_version ]; then
+    apt-get clean
+    rm -rf /var/lib/apt/lists/*
+  elif [ -f /etc/redhat-release ]; then
+    dnf clean all
+    rm -rf /var/cache/dnf
+  fi
+
+  # Clean up temporary files
+  rm -rf /tmp/* /var/tmp/*
+
+  # Clean up pip cache
+  pip3 cache purge || true
+
+  # Clean up documentation
+  rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/*
+
+  # Clean up locale files
+  find /usr/share/locale -maxdepth 1 -mindepth 1 -type d ! -name 'en*' -exec rm -rf {} +
+}
+
 init_ubuntu() {
  apt-get update
  apt-get install -y --no-install-recommends \
@ -38,8 +61,6 @@ init_ubuntu() {
  if ! command -v mpirun &> /dev/null; then
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev
  fi
-  apt-get clean
-  rm -rf /var/lib/apt/lists/*
  echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> "${ENV}"
  # Remove previous TRT installation
  if [[ $(apt list --installed | grep libnvinfer) ]]; then
@ -55,7 +76,6 @@ install_python_rockylinux() {
  PYTHON_VERSION=$1
  PYTHON_MAJOR="3"
  PYTHON_URL="https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz"
-  PYTHON_ENV_FILE="/tmp/python${PYTHON_VERSION}_env"
  dnf makecache --refresh
  dnf install \
    epel-release \
@ -72,14 +92,13 @@ install_python_rockylinux() {
    xz-devel \
    sqlite-devel \
    -y
+  echo "Installing Python ${PYTHON_VERSION}..."
  curl -L ${PYTHON_URL} | tar -zx -C /tmp
  cd /tmp/Python-${PYTHON_VERSION}
  bash -c "./configure --enable-shared --prefix=/opt/python/${PYTHON_VERSION} --enable-ipv6 \
    LDFLAGS=-Wl,-rpath=/opt/python/${PYTHON_VERSION}/lib,--disable-new-dtags && make -j$(nproc) && make install"
  ln -s /opt/python/${PYTHON_VERSION}/bin/python3 /usr/local/bin/python
-  echo "export PATH=/opt/python/${PYTHON_VERSION}/bin:\$PATH" >> "${PYTHON_ENV_FILE}"
-  echo "source ${PYTHON_ENV_FILE}" >> "${ENV}"
-  dnf clean all
+  echo "export PATH=/opt/python/${PYTHON_VERSION}/bin:\$PATH" >> "${ENV}"
  cd .. && rm -rf /tmp/Python-${PYTHON_VERSION}
 }

@ -89,8 +108,6 @@ install_pyp_rockylinux() {

 install_gcctoolset_rockylinux() {
  dnf install -y gcc gcc-c++ file libtool make wget bzip2 bison flex
-  dnf clean all
-  DEVTOOLSET_ENV_FILE="/tmp/gcctoolset_env"
  # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda
  echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> "${ENV}"
  dnf install \
@ -105,10 +122,8 @@ install_gcctoolset_rockylinux() {
 	  openmpi-devel \
 	  pigz \
 	  -y
-  echo "source scl_source enable gcc-toolset-13" >> "${DEVTOOLSET_ENV_FILE}"
-  echo "source ${DEVTOOLSET_ENV_FILE}" >> "${ENV}"
+  echo "source scl_source enable gcc-toolset-13" >> "${ENV}"
  echo 'export PATH=/usr/lib64/openmpi/bin:$PATH' >> "${ENV}"
-  dnf clean all
 }

 # Install base packages depending on the base OS
@ -128,3 +143,6 @@ case "$ID" in
    exit 1
    ;;
 esac
+
+# Final cleanup
+cleanup
--- a/docker/common/install_ccache.sh
+++ b/docker/common/install_ccache.sh
@ -19,4 +19,7 @@ if [[ $ARCH == *"x86_64"* ]] && [[ $SYSTEM_ID == *"rocky"* ]]; then
 	  -y
  curl -L ${GITHUB_URL}/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}-linux-${ARCH}.tar.xz | xz -d | tar -x -C /tmp/
  cp /tmp/ccache-${CCACHE_VERSION}-linux-x86_64/ccache /usr/bin/ccache
+  # Clean up temporary files
+  rm -rf /tmp/ccache-${CCACHE_VERSION}-linux-x86_64
+  dnf clean all
 fi
--- a/docker/common/install_cmake.sh
+++ b/docker/common/install_cmake.sh
@ -16,4 +16,13 @@ wget --no-verbose ${RELEASE_URL_CMAKE} -P /tmp
 tar -xf /tmp/${CMAKE_FILE_NAME}.tar.gz -C /usr/local/
 ln -s /usr/local/${CMAKE_FILE_NAME} /usr/local/cmake

+# Clean up temporary files
+rm -rf /tmp/${CMAKE_FILE_NAME}.tar.gz
+rm -rf /usr/local/${CMAKE_FILE_NAME}/doc
+rm -rf /usr/local/${CMAKE_FILE_NAME}/man
+rm -rf /usr/local/${CMAKE_FILE_NAME}/share/aclocal
+rm -rf /usr/local/${CMAKE_FILE_NAME}/share/bash-completion
+rm -rf /usr/local/${CMAKE_FILE_NAME}/share/emacs
+rm -rf /usr/local/${CMAKE_FILE_NAME}/share/vim
+
 echo 'export PATH=/usr/local/cmake/bin:$PATH' >> "${ENV}"
--- a/docker/common/install_mpi4py.sh
+++ b/docker/common/install_mpi4py.sh
@ -9,11 +9,19 @@ fi

 MPI4PY_VERSION="3.1.5"
 RELEASE_URL="${GITHUB_URL}/mpi4py/mpi4py/archive/refs/tags/${MPI4PY_VERSION}.tar.gz"
-curl -L ${RELEASE_URL} | tar -zx -C /tmp
+
+# Create and use a temporary directory
+TMP_DIR=$(mktemp -d)
+trap 'rm -rf "$TMP_DIR"' EXIT
+
+# Download and extract in one step
+curl -L ${RELEASE_URL} | tar -zx -C "$TMP_DIR"
+
 # Bypassing compatibility issues with higher versions (>= 69) of setuptools.
-sed -i 's/>= 40\.9\.0/>= 40.9.0, < 69/g' /tmp/mpi4py-${MPI4PY_VERSION}/pyproject.toml
-OLDPWD=$(pwd)
-cd /tmp/mpi4py-${MPI4PY_VERSION}
+sed -i 's/>= 40\.9\.0/>= 40.9.0, < 69/g' "$TMP_DIR/mpi4py-${MPI4PY_VERSION}/pyproject.toml"
+
+# Apply the patch
+cd "$TMP_DIR/mpi4py-${MPI4PY_VERSION}"
 git apply <<EOF
 diff --git a/src/mpi4py/futures/_lib.py b/src/mpi4py/futures/_lib.py
 index f14934d1..eebfb8fc 100644
@ -62,6 +70,9 @@ index f14934d1..eebfb8fc 100644

 EOF

-cd ${OLDPWD}
-pip3 install /tmp/mpi4py-${MPI4PY_VERSION}
-rm -rf /tmp/mpi4py*
+# Install with pip and clean up cache
+pip3 install --no-cache-dir "$TMP_DIR/mpi4py-${MPI4PY_VERSION}"
+
+# Clean up
+rm -rf "$TMP_DIR"
+rm -rf ~/.cache/pip
--- a/docker/common/install_polygraphy.sh
+++ b/docker/common/install_polygraphy.sh
@ -3,3 +3,8 @@
 set -ex

 pip3 install polygraphy==0.49.9
+
+# Clean up pip cache and temporary files
+pip3 cache purge
+rm -rf ~/.cache/pip
+rm -rf /tmp/*
--- a/docker/common/install_tensorrt.sh
+++ b/docker/common/install_tensorrt.sh
@ -39,9 +39,10 @@ install_ubuntu_requirements() {
    ARCH=$(uname -m)
    if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
    if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi
-    # TODO: Replace with ubuntu2404 rather than using ubuntu2204.
+
    curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH}/cuda-keyring_1.1-1_all.deb
    dpkg -i cuda-keyring_1.1-1_all.deb
+    rm cuda-keyring_1.1-1_all.deb

    apt-get update
    if [[ $(apt list --installed | grep libcudnn9) ]]; then
@ -56,13 +57,19 @@ install_ubuntu_requirements() {
    if [[ $(apt list --installed | grep cuda-nvrtc-dev) ]]; then
      apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev*
    fi
+
    CUBLAS_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
-    apt-get install -y --no-install-recommends libcudnn9-cuda-12=${CUDNN_VER} libcudnn9-dev-cuda-12=${CUDNN_VER}
-    apt-get install -y --no-install-recommends libnccl2=${NCCL_VER} libnccl-dev=${NCCL_VER}
-    apt-get install -y --no-install-recommends libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER}
-    # NVRTC static library doesn't exist in NGC PyTorch container.
    NVRTC_CUDA_VERSION=$(echo $CUDA_VER | sed 's/\./-/g')
-    apt-get install -y --no-install-recommends cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
+
+    apt-get install -y --no-install-recommends \
+        libcudnn9-cuda-12=${CUDNN_VER} \
+        libcudnn9-dev-cuda-12=${CUDNN_VER} \
+        libnccl2=${NCCL_VER} \
+        libnccl-dev=${NCCL_VER} \
+        libcublas-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
+        libcublas-dev-${CUBLAS_CUDA_VERSION}=${CUBLAS_VER} \
+        cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER}
+
    apt-get clean
    rm -rf /var/lib/apt/lists/*
 }
@ -74,34 +81,35 @@ install_rockylinux_requirements() {
    if [ "$ARCH" = "x86_64" ];then ARCH1="x86_64" && ARCH2="x64" && ARCH3=$ARCH1;fi
    if [ "$ARCH" = "aarch64" ];then ARCH1="aarch64" && ARCH2="aarch64sbsa" && ARCH3="sbsa";fi

-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/libnccl-${NCCL_VER}.${ARCH1}.rpm"
-    dnf remove -y "libnccl*"
-    dnf -y install libnccl-${NCCL_VER}.${ARCH1}.rpm
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/libnccl-devel-${NCCL_VER}.${ARCH1}.rpm"
-    dnf -y install libnccl-devel-${NCCL_VER}.${ARCH1}.rpm
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}.rpm"
-    dnf remove -y "cuda-compat*"
-    dnf -y install cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}.rpm
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/cuda-toolkit-12-8-config-common-${CUDA_RUNTIME}.noarch.rpm"
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch.rpm"
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch.rpm"
-    dnf remove -y "cuda-toolkit*"
-    dnf -y install cuda-toolkit-12-8-config-common-${CUDA_RUNTIME}.noarch.rpm
-    dnf -y install cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch.rpm
-    dnf -y install cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch.rpm
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm"
-    dnf remove -y "libcublas*"
-    dnf -y install libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm
-    wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm"
-    dnf -y install libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm
-    dnf makecache --refresh
+    # Download and install packages
+    for pkg in \
+        "libnccl-${NCCL_VER}.${ARCH1}" \
+        "libnccl-devel-${NCCL_VER}.${ARCH1}" \
+        "cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}" \
+        "cuda-toolkit-12-8-config-common-${CUDA_RUNTIME}.noarch" \
+        "cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch" \
+        "cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch" \
+        "libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}" \
+        "libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}"; do
+        wget -q "https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/${ARCH3}/${pkg}.rpm"
+    done
+
+    # Remove old packages
+    dnf remove -y "libnccl*" "cuda-compat*" "cuda-toolkit*" "libcublas*"
+
+    # Install new packages
    dnf -y install \
-      epel-release \
-      # libnccl-${NCCL_VER} \
-      # libnccl-devel-${NCCL_VER} \
-      # libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER} \
-      # libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER} \
-      # cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}
+        libnccl-${NCCL_VER}.${ARCH1}.rpm \
+        libnccl-devel-${NCCL_VER}.${ARCH1}.rpm \
+        cuda-compat-${CUBLAS_CUDA_VERSION}-${CUDA_DRIVER_VERSION}.${ARCH1}.rpm \
+        cuda-toolkit-12-8-config-common-${CUDA_RUNTIME}.noarch.rpm \
+        cuda-toolkit-12-config-common-${CUDA_RUNTIME}.noarch.rpm \
+        cuda-toolkit-config-common-${CUDA_RUNTIME}.noarch.rpm \
+        libcublas-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm \
+        libcublas-devel-${CUBLAS_CUDA_VERSION}-${CUBLAS_VER}.${ARCH1}.rpm
+
+    # Clean up
+    rm -f *.rpm
    dnf clean all
    nvcc --version
 }
@ -118,10 +126,11 @@ install_tensorrt() {
        if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
        RELEASE_URL_TRT="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.9.0/tars/TensorRT-${TRT_VER}.Linux.${ARCH}-gnu.cuda-${TRT_CUDA_VERSION}.tar.gz"
    fi
+
    wget --no-verbose ${RELEASE_URL_TRT} -O /tmp/TensorRT.tar
    tar -xf /tmp/TensorRT.tar -C /usr/local/
    mv /usr/local/TensorRT-${TRT_VER} /usr/local/tensorrt
-    pip3 install /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl
+    pip3 install --no-cache-dir /usr/local/tensorrt/python/tensorrt-*-cp${PARSED_PY_VERSION}-*.whl
    rm -rf /tmp/TensorRT.tar
    echo 'export LD_LIBRARY_PATH=/usr/local/tensorrt/lib:$LD_LIBRARY_PATH' >> "${ENV}"
 }
--- a/jenkins/Build.groovy
+++ b/jenkins/Build.groovy
@ -16,7 +16,7 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"

 LLM_DOCKER_IMAGE = env.dockerImage

-AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
+AGENT_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202505081324-9379"

 POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"

--- a/jenkins/BuildDockerImage.groovy
+++ b/jenkins/BuildDockerImage.groovy
@ -98,7 +98,7 @@ def createKubernetesPodConfig(type)

 def buildImage(target, action="build", torchInstallType="skip", args="", custom_tag="", post_tag="")
 {
-    def tag = "x86_64-${target}-torch_${torchInstallType}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}${post_tag}"
+    def tag = "x86_64-${target}-torch_${torchInstallType}${post_tag}-${LLM_BRANCH_TAG}-${BUILD_NUMBER}"

    // Step 1: cloning tekit source code
    // allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
--- a/jenkins/GH200ImageBuilder.groovy
+++ b/jenkins/GH200ImageBuilder.groovy
@ -17,7 +17,8 @@ def buildImage(action, type)
 {
    def branch = env.gitlabBranch
    def branchTag = branch.replaceAll('/', '_')
-    def tag = "sbsa-devel-torch_${type}-${branchTag}-${BUILD_NUMBER}"
+    def buildNumber = env.hostBuildNumber ? env.hostBuildNumber : BUILD_NUMBER
+    def tag = "sbsa-devel-torch_${type}-${branchTag}-${buildNumber}"

    // Step 1: cloning tekit source code
    // allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
--- a/jenkins/L0_MergeRequest.groovy
+++ b/jenkins/L0_MergeRequest.groovy
@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
 // Container configuration
 // available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
 // [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
-LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
-LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-aarch64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
-LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504250100-3759"
-LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504250100-3759"
+LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202505081324-9379"
+LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-aarch64-ubuntu24.04-trt10.9.0.34-skip-devel-202505081324-9379"
+LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202505081324-9379"
+LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202505081324-9379"

 LLM_ROCKYLINUX8_DOCKER_IMAGE = LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE

--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@ -35,8 +35,8 @@ linuxPkgName = ( env.targetArch == AARCH64_TRIPLE ? "tensorrt-llm-sbsa-release-s
 // available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
 // [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
 LLM_DOCKER_IMAGE = env.dockerImage
-LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202504250100-3759"
-LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202504250100-3759"
+LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py310-trt10.9.0.34-skip-devel-202505081324-9379"
+LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.8.1-devel-rocky8-x86_64-rocky8-py312-trt10.9.0.34-skip-devel-202505081324-9379"

 // DLFW torch image
 DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.03-py3"
--- a/jenkins/controlCCache.groovy
+++ b/jenkins/controlCCache.groovy
@ -1,7 +1,7 @@

 import java.lang.InterruptedException

-DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202504250100-3759"
+DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.03-py3-x86_64-ubuntu24.04-trt10.9.0.34-skip-devel-202505081324-9379"

 def createKubernetesPodConfig(image)
 {
--- a/scripts/rename_docker_images.py
+++ b/scripts/rename_docker_images.py
@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+import argparse as _ap
+import datetime as _dt
+import os
+import pathlib as _pl
+import subprocess as _sp
+
+MERGE_REQUEST_GROOVY = "L0_MergeRequest.groovy"
+IMAGE_MAPPING = {
+    "LLM_DOCKER_IMAGE":
+    "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/devel:x86_64-devel-torch_skip",
+    "LLM_SBSA_DOCKER_IMAGE":
+    "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/devel:sbsa-devel-torch_skip",
+    "LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE":
+    "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/devel:x86_64-rockylinux8-torch_skip-py310",
+    "LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE":
+    "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/devel:x86_64-rockylinux8-torch_skip-py312",
+}
+
+BUILD_GROOVY = "Build.groovy"
+
+SRC_PATTERN = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/devel:x86_64-devel-torch_skip"
+# [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
+DST_IMAGE = "LLM_DOCKER_IMAGE"
+
+
+def parse_arguments() -> _ap.Namespace:
+    parser = _ap.ArgumentParser(
+        description="Rename Docker images based on the given instructions.")
+    parser.add_argument(
+        'src_branch',
+        type=str,
+        help="The name of the source branch releasing the Docker image.")
+    parser.add_argument(
+        'src_build_id',
+        type=int,
+        help="The name of the source build id release the Docker image.")
+    parser.add_argument(
+        'dst_mr',
+        type=int,
+        help="The number of the merge request for the destination image.")
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help="Simulate the rename process without making any changes.")
+    parser.add_argument(
+        "--timestamp",
+        type=str,
+        required=False,
+        help="The timestamp to use for the destination image name.")
+    return parser.parse_args()
+
+
+def get_current_timestamp() -> str:
+    """Get the current timestamp in YYYYMMDDhhmm format."""
+    return _dt.datetime.now(_dt.UTC).strftime("%Y%m%d%H%M")
+
+
+def run_shell_command(command: str, dry_run: bool) -> None:
+    """Run a shell command and display its output.
+
+    Args:
+        command (str): The shell command to execute.
+    """
+    print(command)
+    if not dry_run:
+        _sp.run(command, shell=True, check=True, text=True)
+
+
+def find_script_directory() -> _pl.Path:
+    """Find the directory containing this Python script.
+
+    Returns:
+        str: The absolute path of the script's directory.
+    """
+    return _pl.Path(__file__).resolve().parent
+
+
+def extract_line_after_prefix(file_path: _pl.Path, prefix: str) -> str or None:
+    """
+    Extracts the line starting with a certain prefix from the given file.
+
+    Args:
+        file_path (Path): Path to the file.
+        prefix (str): Prefix to search for.
+
+    Returns:
+        str: The line starting with the prefix, or None if no such line exists.
+    """
+    with open(file_path, 'r') as file:
+        for line in file:
+            if line.startswith(prefix):
+                return line[len(prefix):].strip()
+    return None
+
+
+def image_prefix(full_image_name: str) -> str:
+    """
+    Extracts the image prefix from a full image name.
+    Args:
+        full_image_name (str): Full image name.
+
+    Returns:
+        str: Image prefix.
+    """
+    dash = '-'
+    last_index = full_image_name.rfind(dash)  # Find the last occurrence
+    if last_index == -1:
+        raise ValueError("Invalid image name format")
+    second_last_index = full_image_name.rfind(
+        dash, 0, last_index)  # Look for the next occurrence before the last
+    if second_last_index == -1:
+        raise ValueError("Invalid image name format")
+    return full_image_name[:second_last_index]
+
+
+def find_and_replace_in_files(directory, file_extension: str,
+                              search_string: str, replace_string: str,
+                              dry_run: bool) -> None:
+    """
+    Perform find-and-replace in all files within a directory tree matching a specific extension.
+
+    Args:
+        directory (str or PathLike): Root directory of the search.
+        file_extension (str): File extension to filter (e.g., ".txt").
+        search_string (str): String to search for.
+        replace_string (str): String to replace the search string with.
+        dry_run (bool): Whether to perform the find-and-replace operation or not.
+    """
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(file_extension):
+                file_path = os.path.join(root, file)
+                with open(file_path, "r", encoding="utf-8") as f:
+                    content = f.read()
+
+                # Replace strings in file content
+                updated_content = content.replace(search_string, replace_string)
+                if content != updated_content:
+                    print(f"Updating {file_path}")
+                    if not dry_run:
+                        with open(file_path, "w", encoding="utf-8") as f:
+                            f.write(updated_content)
+
+
+def rename_images(*,
+                  src_branch: str,
+                  src_build_id: int,
+                  dst_mr: int,
+                  timestamp: str | None = None,
+                  dry_run: bool = False) -> None:
+    print(
+        f"Renaming images for branch {src_branch} and build id {src_build_id} to {dst_mr}"
+    )
+    if dry_run:
+        print("Dry-run mode enabled. No actual changes will be made.")
+    else:
+        print("Renaming images...")
+
+    timestamp = timestamp or get_current_timestamp()
+    src_branch_sanitized = src_branch.replace("/", "_")
+    base_dir = find_script_directory().parent
+    mr_groovy = base_dir / "jenkins" / MERGE_REQUEST_GROOVY
+
+    for dst_key, src_pattern in IMAGE_MAPPING.items():
+        print(f"Processing {dst_key} ...")
+        src_image = f"{src_pattern}-{src_branch_sanitized}-{src_build_id}"
+        dst_image_old = extract_line_after_prefix(mr_groovy,
+                                                  dst_key + " = ").strip('"')
+        dst_image = f"{image_prefix(dst_image_old)}-{timestamp}-{dst_mr}"
+        run_shell_command(f"docker pull {src_image}", dry_run)
+        run_shell_command(f"docker tag {src_image} {dst_image}", dry_run)
+        run_shell_command(f"docker push {dst_image}", dry_run)
+        find_and_replace_in_files(base_dir / "jenkins", ".groovy",
+                                  dst_image_old, dst_image, dry_run)
+        find_and_replace_in_files(base_dir / ".devcontainer", ".yaml",
+                                  dst_image_old, dst_image, dry_run)
+
+
+def main() -> None:
+    args = parse_arguments()
+    rename_images(**vars(args))
+
+
+if __name__ == "__main__":
+    main()