From db45ae9c3ddd08f28bd89f177c522007b172a840 Mon Sep 17 00:00:00 2001 From: xlliu-scitix Date: Sun, 21 Dec 2025 15:40:24 +0000 Subject: [PATCH] update dockerfile --- .github/workflows/pre-check.yml | 2 +- docker/Dockerfile.cuda12.x.ubuntu20.04 | 57 ++++++++++++++++---------- docker/Dockerfile.cuda13.x.ubuntu22.04 | 18 +++++++- 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/.github/workflows/pre-check.yml b/.github/workflows/pre-check.yml index f8f75d0..0b8d414 100644 --- a/.github/workflows/pre-check.yml +++ b/.github/workflows/pre-check.yml @@ -21,7 +21,7 @@ jobs: docker buildx build \ -f docker/Dockerfile.cuda13.x.ubuntu22.04 \ --platform linux/amd64 \ - --target build \ + --target package \ --output type=local,dest=dist \ . - name: List artifacts diff --git a/docker/Dockerfile.cuda12.x.ubuntu20.04 b/docker/Dockerfile.cuda12.x.ubuntu20.04 index 540eff0..b358aa5 100644 --- a/docker/Dockerfile.cuda12.x.ubuntu20.04 +++ b/docker/Dockerfile.cuda12.x.ubuntu20.04 @@ -43,30 +43,30 @@ WORKDIR /workspace # 1. Base build dependencies # ------------------------- RUN mv /etc/apt/sources.list.d/cuda*.list /tmp/disabled-cuda.list || true && \ - apt-get -o Acquire::http::No-Cache=true update && \ + { apt-get -o Acquire::http::No-Cache=true update > build.log 2>&1 && \ apt-get install -y --no-install-recommends \ build-essential gcc g++ curl git wget ca-certificates \ make automake autoconf libtool pkg-config \ - python3 python3-pip gzip xz-utils makeself > /dev/null 2>&1 && \ - rm -rf /var/lib/apt/lists/* + python3 python3-pip gzip xz-utils >> build.log 2>&1 && \ + rm -rf /var/lib/apt/lists/* && rm -f build.log; } || (cat build.log && false) # ------------------------- # 2. Install CUDA keyring and restore NVIDIA repository # ------------------------- -RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - apt-get update > /dev/null 2>&1 +RUN { wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb > build.log 2>&1 && \ + dpkg -i cuda-keyring_1.1-1_all.deb >> build.log 2>&1 && \ + apt-get update >> build.log 2>&1 && rm -f build.log; } || (cat build.log && false) # ------------------------- # 3. Install NCCL (pinned version) # ------------------------- RUN apt-mark unhold libnccl2 libnccl-dev || true && \ - apt-get install -y --no-install-recommends \ + { apt-get install -y --no-install-recommends \ libnccl2=${NCCL_PACKAGE_VERSION} \ - libnccl-dev=${NCCL_PACKAGE_VERSION} > /dev/null 2>&1 && \ - apt-mark hold libnccl2 libnccl-dev && \ - ldconfig && \ - rm -rf /var/lib/apt/lists/* + libnccl-dev=${NCCL_PACKAGE_VERSION} > build.log 2>&1 && \ + apt-mark hold libnccl2 libnccl-dev >> build.log 2>&1 && \ + ldconfig >> build.log 2>&1 && \ + rm -rf /var/lib/apt/lists/* && rm -f build.log; } || (cat build.log && false) # ------------------------- # 4. Build OpenMPI from source @@ -75,15 +75,15 @@ RUN wget https://download.open-mpi.org/release/open-mpi/v${MPI_SERIES}/openmpi-$ tar zxvf openmpi-${MPI_VERSION}.tar.gz > /dev/null 2>&1 && \ cd openmpi-${MPI_VERSION} && \ ./configure --prefix=/usr/local/sihpc --with-cuda=/usr/local/cuda > /dev/null 2>&1 && \ - make -j$(nproc) > /dev/null 2>&1 && make install && \ + make -j$(nproc) > /dev/null 2>&1 && make install > /dev/null 2>&1 && \ rm -rf /workspace/openmpi-${MPI_VERSION} /workspace/openmpi-${MPI_VERSION}.tar.gz # ------------------------- # 5. Build nccl-tests # ------------------------- -RUN git clone https://github.com/scitix/nccl-tests.git -b sicl && \ +RUN { git clone --depth 1 --single-branch -b sicl https://github.com/scitix/nccl-tests.git > build.log 2>&1 && \ cd nccl-tests && \ - make MPI=1 MPI_HOME=/usr/local/sihpc && \ + make MPI=1 MPI_HOME=/usr/local/sihpc > build.log 2>&1 && rm -f build.log || (cat build.log && false); } && \ mkdir -p /usr/local/sihpc/libexec/nccl-tests && \ cp -rf build/*_perf /usr/local/sihpc/libexec/nccl-tests/ && \ mkdir -p /usr/local/sihpc/bin && \ @@ -97,10 +97,11 @@ RUN git clone https://github.com/scitix/nccl-tests.git -b sicl && \ # ------------------------- # 6. Collect runtime libraries (strict selection) # ------------------------- -RUN set -eux && \ - mkdir -p /usr/local/sihpc/lib && \ - cp /usr/local/cuda/lib64/libcudart* /usr/local/sihpc/lib/ && \ - cp /usr/lib/x86_64-linux-gnu/libnccl.so* /usr/local/sihpc/lib/ +RUN { set -e && \ + mkdir -p /usr/local/sihpc/lib > build.log 2>&1 && \ + cp /usr/local/cuda/lib64/libcudart* /usr/local/sihpc/lib/ >> build.log 2>&1 && \ + cp /usr/lib/x86_64-linux-gnu/libnccl.so* /usr/local/sihpc/lib/ >> build.log 2>&1 && \ + rm -f build.log; } || (cat build.log && false) # cp /lib/x86_64-linux-gnu/libltdl.so.7.3.1 /usr/local/sihpc/lib/ && \ # cp /usr/lib/x86_64-linux-gnu/libhwloc.so* /usr/local/sihpc/lib/ && \ # cp /usr/lib/x86_64-linux-gnu/libevent_core* /usr/local/sihpc/lib/ && \ @@ -125,15 +126,29 @@ RUN cd /usr/local/sihpc/lib && \ # ln -sf libltdl.so.7.3.1 libltdl.so.7 && \ # ln -sf libltdl.so.7 libltdl.so +########################### +# Package Stage +########################### +FROM ubuntu:20.04 AS package + +# Re-declare args for this stage (values are inherited) +ARG UBUNTU_VERSION +ARG NCCL_PACKAGE_VERSION +ARG MPI_VERSION +ARG BUILD_DATE + # Expose versions/date as environment variables for runtime shell expansion ENV NCCL_PACKAGE_VERSION=${NCCL_PACKAGE_VERSION} \ MPI_VERSION=${MPI_VERSION} \ BUILD_DATE=${BUILD_DATE} +COPY --from=build /usr/local/sihpc /usr/local/sihpc + WORKDIR / -RUN SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \ +RUN apt-get update && apt-get install -y --no-install-recommends makeself && \ + SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \ PACKAGE_FILENAME="sicl-nccl${SAFE_NCCL_PKG}-ompi${MPI_VERSION}-ubuntu${UBUNTU_VERSION}-${BUILD_DATE}.run" && \ - makeself --gzip /usr/local/sihpc \ + { makeself --gzip /usr/local/sihpc \ "${PACKAGE_FILENAME}" \ "SiHPC MPI + NCCL + NCCL-tests Portable Installer" \ - ./bin/install_sihpc > /dev/null 2>&1 \ No newline at end of file + ./bin/install_sihpc > build.log 2>&1 && rm -f build.log; } || (cat build.log && false) \ No newline at end of file diff --git a/docker/Dockerfile.cuda13.x.ubuntu22.04 b/docker/Dockerfile.cuda13.x.ubuntu22.04 index ae389ea..82cb17d 100644 --- a/docker/Dockerfile.cuda13.x.ubuntu22.04 +++ b/docker/Dockerfile.cuda13.x.ubuntu22.04 @@ -46,7 +46,7 @@ RUN { apt-get -o Acquire::http::No-Cache=true update > build.log 2>&1 && \ apt-get install -y --no-install-recommends \ build-essential gcc g++ curl git wget ca-certificates \ make automake autoconf libtool pkg-config \ - python3 python3-pip gzip xz-utils makeself >> build.log 2>&1 && \ + python3 python3-pip gzip xz-utils >> build.log 2>&1 && \ rm -rf /var/lib/apt/lists/* && rm -f build.log; } || (cat build.log && false) # ------------------------- @@ -123,13 +123,27 @@ RUN cd /usr/local/sihpc/lib && \ # ln -sf libltdl.so.7.3.1 libltdl.so.7 && \ # ln -sf libltdl.so.7 libltdl.so +########################### +# Package Stage +########################### +FROM ubuntu:20.04 AS package + +# Re-declare args for this stage (values are inherited) +ARG UBUNTU_VERSION +ARG NCCL_PACKAGE_VERSION +ARG MPI_VERSION +ARG BUILD_DATE + # Expose versions/date as environment variables for runtime shell expansion ENV NCCL_PACKAGE_VERSION=${NCCL_PACKAGE_VERSION} \ MPI_VERSION=${MPI_VERSION} \ BUILD_DATE=${BUILD_DATE} +COPY --from=build /usr/local/sihpc /usr/local/sihpc + WORKDIR / -RUN SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \ +RUN apt-get update && apt-get install -y --no-install-recommends makeself && \ + SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \ PACKAGE_FILENAME="sicl-nccl${SAFE_NCCL_PKG}-ompi${MPI_VERSION}-ubuntu${UBUNTU_VERSION}-${BUILD_DATE}.run" && \ { makeself --gzip /usr/local/sihpc \ "${PACKAGE_FILENAME}" \