From a7444d2ed89d8271fa182f67d25c8e414b7a25a7 Mon Sep 17 00:00:00 2001 From: xlliu-scitix Date: Sun, 21 Dec 2025 03:12:56 +0000 Subject: [PATCH 1/3] chore(scripts): add install scripts --- scripts/env.sh | 6 ++++ scripts/install_sihpc | 66 +++++++++++++++++++++++++++++++++++++++++ scripts/uninstall_sihpc | 25 ++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 scripts/env.sh create mode 100644 scripts/install_sihpc create mode 100644 scripts/uninstall_sihpc diff --git a/scripts/env.sh b/scripts/env.sh new file mode 100644 index 0000000..5e2610b --- /dev/null +++ b/scripts/env.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export SIHPC_HOME=/usr/local/sihpc +export PATH=$SIHPC_HOME/bin:$PATH +export LD_LIBRARY_PATH=$SIHPC_HOME/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH +export OMPI_MCA_opal_prefix=$SIHPC_HOME +export OPAL_PREFIX=$SIHPC_HOME \ No newline at end of file diff --git a/scripts/install_sihpc b/scripts/install_sihpc new file mode 100644 index 0000000..01e370a --- /dev/null +++ b/scripts/install_sihpc @@ -0,0 +1,66 @@ +#!/bin/bash +set -euo pipefail + +PREFIX="/usr/local/sihpc" +LIBCONF="/etc/ld.so.conf.d/sihpc.conf" +PROFILE_SH="/etc/profile.d/sihpc.sh" + +echo "Installing SiHPC runtime to: $PREFIX" + +if [ ! -d "$PREFIX" ]; then + mkdir -p "$PREFIX" +fi +cp -r ./* "$PREFIX/" +echo "Files installed to $PREFIX" + +if [ ! -f "$LIBCONF" ]; then + echo "$PREFIX/lib" > "$LIBCONF" + [ -d "$PREFIX/lib64" ] && echo "$PREFIX/lib64" >> "$LIBCONF" + echo "Added $LIBCONF" +else + if ! grep -q "$PREFIX/lib" "$LIBCONF"; then + echo "$PREFIX/lib" >> "$LIBCONF" + fi + if [ -d "$PREFIX/lib64" ] && ! grep -q "$PREFIX/lib64" "$LIBCONF"; then + echo "$PREFIX/lib64" >> "$LIBCONF" + fi + echo "Updated existing $LIBCONF" +fi + +ldconfig +echo "ldconfig updated" + +if [ ! -f "$PROFILE_SH" ]; then + cat < "$PROFILE_SH" +# Auto-generated by SiHPC installer +if [ -f $PREFIX/env.sh ]; then + source $PREFIX/env.sh +fi +EOF + chmod +x "$PROFILE_SH" + echo "Added $PROFILE_SH" +else + echo "$PROFILE_SH already exists, skipping." +fi + +for f in /etc/bash.bashrc /etc/bashrc; do + if [ -f "$f" ]; then + if ! grep -q "$PREFIX/env.sh" "$f"; then + { + echo "" + echo "# SiHPC environment" + echo "if [ -f $PREFIX/env.sh ]; then" + echo " source $PREFIX/env.sh" + echo "fi" + } >> "$f" + echo "Added SiHPC source to $f" + fi + fi +done + +echo +echo "SiHPC installation completed successfully!" +echo "Installed to: $PREFIX" +echo "Library config: $LIBCONF" +echo "Auto env setup: $PROFILE_SH" +echo "Run 'source $PREFIX/env.sh' now to activate current shell." \ No newline at end of file diff --git a/scripts/uninstall_sihpc b/scripts/uninstall_sihpc new file mode 100644 index 0000000..20cabe2 --- /dev/null +++ b/scripts/uninstall_sihpc @@ -0,0 +1,25 @@ +#!/bin/bash +# sihpc-uninstaller.sh + +set -e + +SIHPC_ROOT="/usr/local/sihpc" + +if [ ! -d "$SIHPC_ROOT" ]; then + echo "sihpc install dir $SIHPC_ROOT not exist." + exit 1 +fi + +echo "==============================" +echo " uninstall sihpc" +echo " install dir: $SIHPC_ROOT" +echo "==============================" + +echo "deleting $SIHPC_ROOT ..." +rm -rf "$SIHPC_ROOT" + +echo "please check shell config(~/.bashrc, ~/.zshrc etc.)," +echo "remove $SIHPC_ROOT/bin from PATH" + +echo "sihpc unintall done!" +exit 0 \ No newline at end of file From 5eec99dc086d31ff99e62c2a88ea52dbe05cbbd5 Mon Sep 17 00:00:00 2001 From: xlliu-scitix Date: Sun, 21 Dec 2025 15:12:18 +0800 Subject: [PATCH 2/3] add dockerfile and ci workflow (#1) --- .github/workflows/pre-check.yml | 29 +++++ .github/workflows/release.yml | 37 ++++++ docker/Dockerfile.cuda12.x.ubuntu20.04 | 153 +++++++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 .github/workflows/pre-check.yml create mode 100644 .github/workflows/release.yml create mode 100644 docker/Dockerfile.cuda12.x.ubuntu20.04 diff --git a/.github/workflows/pre-check.yml b/.github/workflows/pre-check.yml new file mode 100644 index 0000000..f293c61 --- /dev/null +++ b/.github/workflows/pre-check.yml @@ -0,0 +1,29 @@ +on: + pull_request: + workflow_dispatch: + +jobs: + build-only: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - name: Free disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache + sudo docker system prune -af || true + df -h + - name: Build run package + run: | + docker buildx build \ + -f docker/Dockerfile.cuda12.x.ubuntu20.04 \ + --platform linux/amd64 \ + --target package \ + --output type=local,dest=dist \ + . + - name: List artifacts + run: | + ls -lh dist \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..86b42f8 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,37 @@ +name: Release run installer + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + build-release: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v4 + + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build .run installer (Docker) + run: | + docker buildx build \ + -f docker/Dockerfile.cuda12.x.ubuntu20.04 \ + --platform linux/amd64 \ + --build-arg BUILD_DATE=$(date +%Y%m%d) \ + --output type=local,dest=dist \ + . + + - name: List artifacts + run: ls -lh dist + + - name: Upload to GitHub Release + uses: softprops/action-gh-release@v2 + with: + files: | + dist/*.run diff --git a/docker/Dockerfile.cuda12.x.ubuntu20.04 b/docker/Dockerfile.cuda12.x.ubuntu20.04 new file mode 100644 index 0000000..efe1a62 --- /dev/null +++ b/docker/Dockerfile.cuda12.x.ubuntu20.04 @@ -0,0 +1,153 @@ +########################### +# Build-time configuration +########################### + +# Base OS and CUDA versions +ARG UBUNTU_VERSION=20.04 +ARG CUDA_VERSION=12.8.1 +ARG CUDART_VERSION=12.8.90 +ARG CUDART_MAJOR_VERSION=12 + +# NCCL versions +ARG NCCL_PACKAGE_VERSION=2.27.7-1+cuda12.4 +ARG NCCL_SO_VERSION=2.27.7 + +# OpenMPI versions +# - MPI_VERSION: full OpenMPI version +# - MPI_SERIES: major.minor series used in download URL +ARG MPI_VERSION=4.1.8 +ARG MPI_SERIES=4.1 + +# Build date (override at build time) +ARG BUILD_DATE=20251221 + +########################### +# Build Stage +########################### +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS build + +# Re-declare build args for this stage (values are inherited) +ARG UBUNTU_VERSION +ARG CUDA_VERSION +ARG CUDART_VERSION +ARG NCCL_PACKAGE_VERSION +ARG NCCL_SO_VERSION +ARG MPI_VERSION +ARG MPI_SERIES +ARG BUILD_DATE + +ENV DEBIAN_FRONTEND=noninteractive +WORKDIR /workspace + +# ------------------------- +# 1. Base build dependencies +# ------------------------- +RUN mv /etc/apt/sources.list.d/cuda*.list /tmp/disabled-cuda.list || true && \ + apt-get -o Acquire::http::No-Cache=true update && \ + apt-get install -y --no-install-recommends \ + build-essential gcc g++ curl git wget ca-certificates \ + make automake autoconf libtool pkg-config \ + python3 python3-pip gzip xz-utils && \ + rm -rf /var/lib/apt/lists/* + +# ------------------------- +# 2. Install CUDA keyring and restore NVIDIA repository +# ------------------------- +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update + +# ------------------------- +# 3. Install NCCL (pinned version) +# ------------------------- +RUN apt-mark unhold libnccl2 libnccl-dev || true && \ + apt-get install -y --no-install-recommends \ + libnccl2=${NCCL_PACKAGE_VERSION} \ + libnccl-dev=${NCCL_PACKAGE_VERSION} && \ + apt-mark hold libnccl2 libnccl-dev && \ + ldconfig && \ + rm -rf /var/lib/apt/lists/* + +# ------------------------- +# 4. Build OpenMPI from source +# ------------------------- +RUN wget https://download.open-mpi.org/release/open-mpi/v${MPI_SERIES}/openmpi-${MPI_VERSION}.tar.gz && \ + tar zxvf openmpi-${MPI_VERSION}.tar.gz && \ + cd openmpi-${MPI_VERSION} && \ + ./configure --prefix=/usr/local/sihpc --with-cuda=/usr/local/cuda && \ + make -j$(nproc) && make install && \ + rm -rf /workspace/openmpi-${MPI_VERSION} /workspace/openmpi-${MPI_VERSION}.tar.gz + +# ------------------------- +# 5. Build nccl-tests +# ------------------------- +RUN git clone https://github.com/scitix/nccl-tests.git -b sicl && \ + cd nccl-tests && \ + make MPI=1 MPI_HOME=/usr/local/sihpc && \ + mkdir -p /usr/local/sihpc/libexec/nccl-tests && \ + cp -rf build/*_perf /usr/local/sihpc/libexec/nccl-tests/ && \ + cp scripts/nccl_perf /usr/local/sihpc/bin/nccl_perf && \ + cp scripts/nccl_test /usr/local/sihpc/libexec/nccl-tests/nccl_test && \ + cp scripts/env.sh /usr/local/sihpc/env.sh && \ + cp scripts/install_sihpc /usr/local/sihpc/bin/install_sihpc && \ + cp scripts/uninstall_sihpc /usr/local/sihpc/bin/uninstall_sihpc && \ + rm -rf /workspace/nccl-tests + +# ------------------------- +# 6. Collect runtime libraries (strict selection) +# ------------------------- +RUN set -eux && \ + mkdir -p /usr/local/sihpc/lib && \ + cp /usr/local/cuda/lib64/libcudart* /usr/local/sihpc/lib/ && \ + cp /usr/lib/x86_64-linux-gnu/libnccl.so* /usr/local/sihpc/lib/ + # cp /lib/x86_64-linux-gnu/libltdl.so.7.3.1 /usr/local/sihpc/lib/ && \ + # cp /usr/lib/x86_64-linux-gnu/libhwloc.so* /usr/local/sihpc/lib/ && \ + # cp /usr/lib/x86_64-linux-gnu/libevent_core* /usr/local/sihpc/lib/ && \ + # cp /usr/lib/x86_64-linux-gnu/libevent_pthreads* /usr/local/sihpc/lib/ + +# ------------------------- +# 7. Fix library symlinks +# ------------------------- +RUN cd /usr/local/sihpc/lib && \ + rm -f libcudart.so libcudart.so.12 && \ + ln -sf libnccl.so.2.27.7 libnccl.so.2 && \ + ln -sf libnccl.so.2 libnccl.so && \ + ln -sf libcudart.so.12.8.90 libcudart.so.12 && \ + ln -sf libcudart.so.12 libcudart.so + # rm -f libevent_core-2.1.so.7 && \ + # ln -sf libhwloc.so.15.1.0 libhwloc.so.15 && \ + # ln -sf libhwloc.so.15.1.0 libhwloc.so && \ + # ln -sf libevent_core-2.1.so.7.0.0 libevent_core-2.1.so.7 && \ + # ln -sf libevent_core-2.1.so.7 libevent_core-2.1.so && \ + # ln -sf libevent_pthreads-2.1.so.7.0.0 libevent_pthreads-2.1.so.7 && \ + # ln -sf libevent_pthreads-2.1.so.7 libevent_pthreads-2.1.so && \ + # ln -sf libltdl.so.7.3.1 libltdl.so.7 && \ + # ln -sf libltdl.so.7 libltdl.so + +########################### +# Package Stage +########################### +FROM ubuntu:20.04 AS package + +# Re-declare args for this stage (values are inherited) +ARG UBUNTU_VERSION +ARG NCCL_PACKAGE_VERSION +ARG MPI_VERSION +ARG BUILD_DATE + +# Expose versions/date as environment variables for runtime shell expansion +ENV NCCL_PACKAGE_VERSION=${NCCL_PACKAGE_VERSION} \ + MPI_VERSION=${MPI_VERSION} \ + BUILD_DATE=${BUILD_DATE} + +COPY --from=build /usr/local/sihpc /usr/local/sihpc + +WORKDIR / +RUN apt-get update && apt-get install -y --no-install-recommends makeself && \ + chmod +x /usr/local/sihpc/bin/install_sihpc && \ + SAFE_NCCL_PKG=$(printf '%s\n' "${NCCL_PACKAGE_VERSION}" | tr '+' '-') && \ + PACKAGE_FILENAME="sicl-nccl${SAFE_NCCL_PKG}-ompi${MPI_VERSION}-ubuntu${UBUNTU_VERSION}-${BUILD_DATE}.run" && \ + makeself --gzip /usr/local/sihpc \ + "${PACKAGE_FILENAME}" \ + "SiHPC MPI + NCCL + NCCL-tests Portable Installer" \ + ./bin/install_sihpc From 42c1025d7dcb58fc64eb11bd09f73530bdfd3891 Mon Sep 17 00:00:00 2001 From: xlliu-scitix Date: Sun, 21 Dec 2025 07:18:22 +0000 Subject: [PATCH 3/3] add free disk space in release.yml ci --- .github/workflows/release.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 86b42f8..6cfd95d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,6 +18,15 @@ jobs: - name: Setup Docker Buildx uses: docker/setup-buildx-action@v3 + - name: Free disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache + sudo docker system prune -af || true + df -h + - name: Build .run installer (Docker) run: | docker buildx build \