update dockerfile

This commit is contained in:
xlliu-scitix 2025-12-21 15:40:24 +00:00
parent 9b37f54680
commit db45ae9c3d
3 changed files with 53 additions and 24 deletions

View File

@ -21,7 +21,7 @@ jobs:
docker buildx build \
-f docker/Dockerfile.cuda13.x.ubuntu22.04 \
--platform linux/amd64 \
--target build \
--target package \
--output type=local,dest=dist \
.
- name: List artifacts

View File

@ -43,30 +43,30 @@ WORKDIR /workspace
# 1. Base build dependencies
# -------------------------
RUN mv /etc/apt/sources.list.d/cuda*.list /tmp/disabled-cuda.list || true && \
apt-get -o Acquire::http::No-Cache=true update && \
{ apt-get -o Acquire::http::No-Cache=true update > build.log 2>&1 && \
apt-get install -y --no-install-recommends \
build-essential gcc g++ curl git wget ca-certificates \
make automake autoconf libtool pkg-config \
python3 python3-pip gzip xz-utils makeself > /dev/null 2>&1 && \
rm -rf /var/lib/apt/lists/*
python3 python3-pip gzip xz-utils >> build.log 2>&1 && \
rm -rf /var/lib/apt/lists/* && rm -f build.log; } || (cat build.log && false)
# -------------------------
# 2. Install CUDA keyring and restore NVIDIA repository
# -------------------------
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
apt-get update > /dev/null 2>&1
RUN { wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb > build.log 2>&1 && \
dpkg -i cuda-keyring_1.1-1_all.deb >> build.log 2>&1 && \
apt-get update >> build.log 2>&1 && rm -f build.log; } || (cat build.log && false)
# -------------------------
# 3. Install NCCL (pinned version)
# -------------------------
RUN apt-mark unhold libnccl2 libnccl-dev || true && \
apt-get install -y --no-install-recommends \
{ apt-get install -y --no-install-recommends \
libnccl2=${NCCL_PACKAGE_VERSION} \
libnccl-dev=${NCCL_PACKAGE_VERSION} > /dev/null 2>&1 && \
apt-mark hold libnccl2 libnccl-dev && \
ldconfig && \
rm -rf /var/lib/apt/lists/*
libnccl-dev=${NCCL_PACKAGE_VERSION} > build.log 2>&1 && \
apt-mark hold libnccl2 libnccl-dev >> build.log 2>&1 && \
ldconfig >> build.log 2>&1 && \
rm -rf /var/lib/apt/lists/* && rm -f build.log; } || (cat build.log && false)
# -------------------------
# 4. Build OpenMPI from source
@ -75,15 +75,15 @@ RUN wget https://download.open-mpi.org/release/open-mpi/v${MPI_SERIES}/openmpi-$
tar zxvf openmpi-${MPI_VERSION}.tar.gz > /dev/null 2>&1 && \
cd openmpi-${MPI_VERSION} && \
./configure --prefix=/usr/local/sihpc --with-cuda=/usr/local/cuda > /dev/null 2>&1 && \
make -j$(nproc) > /dev/null 2>&1 && make install && \
make -j$(nproc) > /dev/null 2>&1 && make install > /dev/null 2>&1 && \
rm -rf /workspace/openmpi-${MPI_VERSION} /workspace/openmpi-${MPI_VERSION}.tar.gz
# -------------------------
# 5. Build nccl-tests
# -------------------------
RUN git clone https://github.com/scitix/nccl-tests.git -b sicl && \
RUN { git clone --depth 1 --single-branch -b sicl https://github.com/scitix/nccl-tests.git > build.log 2>&1 && \
cd nccl-tests && \
make MPI=1 MPI_HOME=/usr/local/sihpc && \
make MPI=1 MPI_HOME=/usr/local/sihpc > build.log 2>&1 && rm -f build.log || (cat build.log && false); } && \
mkdir -p /usr/local/sihpc/libexec/nccl-tests && \
cp -rf build/*_perf /usr/local/sihpc/libexec/nccl-tests/ && \
mkdir -p /usr/local/sihpc/bin && \
@ -97,10 +97,11 @@ RUN git clone https://github.com/scitix/nccl-tests.git -b sicl && \
# -------------------------
# 6. Collect runtime libraries (strict selection)
# -------------------------
RUN set -eux && \
mkdir -p /usr/local/sihpc/lib && \
cp /usr/local/cuda/lib64/libcudart* /usr/local/sihpc/lib/ && \
cp /usr/lib/x86_64-linux-gnu/libnccl.so* /usr/local/sihpc/lib/
RUN { set -e && \
mkdir -p /usr/local/sihpc/lib > build.log 2>&1 && \
cp /usr/local/cuda/lib64/libcudart* /usr/local/sihpc/lib/ >> build.log 2>&1 && \
cp /usr/lib/x86_64-linux-gnu/libnccl.so* /usr/local/sihpc/lib/ >> build.log 2>&1 && \
rm -f build.log; } || (cat build.log && false)
# cp /lib/x86_64-linux-gnu/libltdl.so.7.3.1 /usr/local/sihpc/lib/ && \
# cp /usr/lib/x86_64-linux-gnu/libhwloc.so* /usr/local/sihpc/lib/ && \
# cp /usr/lib/x86_64-linux-gnu/libevent_core* /usr/local/sihpc/lib/ && \
@ -125,15 +126,29 @@ RUN cd /usr/local/sihpc/lib && \
# ln -sf libltdl.so.7.3.1 libltdl.so.7 && \
# ln -sf libltdl.so.7 libltdl.so
###########################
# Package Stage
###########################
FROM ubuntu:20.04 AS package
# Re-declare args for this stage (values are inherited)
ARG UBUNTU_VERSION
ARG NCCL_PACKAGE_VERSION
ARG MPI_VERSION
ARG BUILD_DATE
# Expose versions/date as environment variables for runtime shell expansion
ENV NCCL_PACKAGE_VERSION=${NCCL_PACKAGE_VERSION} \
MPI_VERSION=${MPI_VERSION} \
BUILD_DATE=${BUILD_DATE}
COPY --from=build /usr/local/sihpc /usr/local/sihpc
WORKDIR /
RUN SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \
RUN apt-get update && apt-get install -y --no-install-recommends makeself && \
SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \
PACKAGE_FILENAME="sicl-nccl${SAFE_NCCL_PKG}-ompi${MPI_VERSION}-ubuntu${UBUNTU_VERSION}-${BUILD_DATE}.run" && \
makeself --gzip /usr/local/sihpc \
{ makeself --gzip /usr/local/sihpc \
"${PACKAGE_FILENAME}" \
"SiHPC MPI + NCCL + NCCL-tests Portable Installer" \
./bin/install_sihpc > /dev/null 2>&1
./bin/install_sihpc > build.log 2>&1 && rm -f build.log; } || (cat build.log && false)

View File

@ -46,7 +46,7 @@ RUN { apt-get -o Acquire::http::No-Cache=true update > build.log 2>&1 && \
apt-get install -y --no-install-recommends \
build-essential gcc g++ curl git wget ca-certificates \
make automake autoconf libtool pkg-config \
python3 python3-pip gzip xz-utils makeself >> build.log 2>&1 && \
python3 python3-pip gzip xz-utils >> build.log 2>&1 && \
rm -rf /var/lib/apt/lists/* && rm -f build.log; } || (cat build.log && false)
# -------------------------
@ -123,13 +123,27 @@ RUN cd /usr/local/sihpc/lib && \
# ln -sf libltdl.so.7.3.1 libltdl.so.7 && \
# ln -sf libltdl.so.7 libltdl.so
###########################
# Package Stage
###########################
FROM ubuntu:20.04 AS package
# Re-declare args for this stage (values are inherited)
ARG UBUNTU_VERSION
ARG NCCL_PACKAGE_VERSION
ARG MPI_VERSION
ARG BUILD_DATE
# Expose versions/date as environment variables for runtime shell expansion
ENV NCCL_PACKAGE_VERSION=${NCCL_PACKAGE_VERSION} \
MPI_VERSION=${MPI_VERSION} \
BUILD_DATE=${BUILD_DATE}
COPY --from=build /usr/local/sihpc /usr/local/sihpc
WORKDIR /
RUN SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \
RUN apt-get update && apt-get install -y --no-install-recommends makeself && \
SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \
PACKAGE_FILENAME="sicl-nccl${SAFE_NCCL_PKG}-ompi${MPI_VERSION}-ubuntu${UBUNTU_VERSION}-${BUILD_DATE}.run" && \
{ makeself --gzip /usr/local/sihpc \
"${PACKAGE_FILENAME}" \