mirror of
https://github.com/NVIDIA/nccl-tests.git
synced 2026-05-01 11:58:19 +08:00
tweak build and test scripts for SiCL.
* add '/usr/local/sihpc/lib' to rpath. * print 'NVCC_GENCODE' in Makefile, and by default generate bin for Volta, Ampere, Ada, and Hopper. * add test run wrapper scripts "nccl_perf" and "nccl_test".
This commit is contained in:
parent
8dfeab9eb9
commit
fd83f7ca84
27
scripts/nccl_perf
Executable file
27
scripts/nccl_perf
Executable file
@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
BASE_DIR=$(cd $(dirname $0) && pwd)
|
||||||
|
TEST_DIR=$BASE_DIR/../libexec/nccl-tests
|
||||||
|
COLL=all_reduce
|
||||||
|
while [[ -n $1 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-l*)
|
||||||
|
case ${1:2} in
|
||||||
|
Broadcast|broadcast|Bcast|bcast) COLL=broadcast;;
|
||||||
|
Reduce|reduce) COLL=reduce;;
|
||||||
|
Gather|gather) COLL=gather;;
|
||||||
|
Scatter|scatter) COLL=scatter;;
|
||||||
|
AllToAll|alltoall) COLL=alltoall;;
|
||||||
|
AllGather|allgather) COLL=all_gather;;
|
||||||
|
ReduceScatter|reducescatter) COLL=reduce_scatter;;
|
||||||
|
HyperCube|Hypercube|hypercube) COLL=hypercube;;
|
||||||
|
SendRecv|sendrecv) COLL=sendrecv;;
|
||||||
|
esac;;
|
||||||
|
*) OPTIONS="$OPTIONS $1";;
|
||||||
|
esac
|
||||||
|
shift 1
|
||||||
|
done
|
||||||
|
export OMPI_ALLOW_RUN_AS_ROOT=1
|
||||||
|
export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
|
||||||
|
export OMPI_MCA_btl=self,tcp
|
||||||
|
export OMPI_MCA_pml=^ucx
|
||||||
|
$TEST_DIR/${COLL}_perf$OPTIONS
|
||||||
36
scripts/nccl_test
Executable file
36
scripts/nccl_test
Executable file
@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
TEST_DIR=$(cd $(dirname $0) && pwd)
|
||||||
|
COLL=all_reduce
|
||||||
|
while [[ -n $1 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-l*)
|
||||||
|
case ${1:2} in
|
||||||
|
Broadcast|broadcast|Bcast|bcast) COLL=broadcast;;
|
||||||
|
Reduce|reduce) COLL=reduce;;
|
||||||
|
Gather|gather) COLL=gather;;
|
||||||
|
Scatter|scatter) COLL=scatter;;
|
||||||
|
AllToAll|alltoall) COLL=alltoall;;
|
||||||
|
AllGather|allgather) COLL=all_gather;;
|
||||||
|
ReduceScatter|reducescatter) COLL=reduce_scatter;;
|
||||||
|
HyperCube|Hypercube|hypercube) COLL=hypercube;;
|
||||||
|
SendRecv|sendrecv) COLL=sendrecv;;
|
||||||
|
esac;;
|
||||||
|
*) OPTIONS="$OPTIONS $1";;
|
||||||
|
esac
|
||||||
|
shift 1
|
||||||
|
done
|
||||||
|
|
||||||
|
export OMPI_ALLOW_RUN_AS_ROOT=1
|
||||||
|
export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
|
||||||
|
export OMPI_MCA_btl=self,tcp
|
||||||
|
export OMPI_MCA_pml=^ucx
|
||||||
|
|
||||||
|
if [ $OMPI_COMM_WORLD_SIZE -gt $OMPI_COMM_WORLD_LOCAL_SIZE ]; then
|
||||||
|
if [ $OMPI_COMM_WORLD_RANK -eq 0 ]; then
|
||||||
|
export NCCL_DEBUG=${NCCL_DEBUG:-"INFO"}
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [ $OMPI_COMM_WORLD_RANK -eq 0 ]; then
|
||||||
|
echo "[$(hostname)] running nccl test $COLL$OPTIONS, world_size=$OMPI_COMM_WORLD_SIZE"
|
||||||
|
fi
|
||||||
|
$TEST_DIR/${COLL}_perf -f2$OPTIONS
|
||||||
12
src/Makefile
12
src/Makefile
@ -19,7 +19,14 @@ CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1)
|
|||||||
|
|
||||||
# Better define NVCC_GENCODE in your environment to the minimal set
|
# Better define NVCC_GENCODE in your environment to the minimal set
|
||||||
# of archs to reduce compile time.
|
# of archs to reduce compile time.
|
||||||
ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 11; echo $$?),0)
|
ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 12; echo $$?),0)
|
||||||
|
NVCC_GENCODE ?= -gencode=arch=compute_70,code=sm_70 \
|
||||||
|
-gencode=arch=compute_80,code=sm_80 \
|
||||||
|
-gencode=arch=compute_86,code=sm_86 \
|
||||||
|
-gencode=arch=compute_89,code=sm_89 \
|
||||||
|
-gencode=arch=compute_90,code=sm_90 \
|
||||||
|
-gencode=arch=compute_90,code=compute_90
|
||||||
|
else ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 11; echo $$?),0)
|
||||||
NVCC_GENCODE ?= -gencode=arch=compute_60,code=sm_60 \
|
NVCC_GENCODE ?= -gencode=arch=compute_60,code=sm_60 \
|
||||||
-gencode=arch=compute_61,code=sm_61 \
|
-gencode=arch=compute_61,code=sm_61 \
|
||||||
-gencode=arch=compute_70,code=sm_70 \
|
-gencode=arch=compute_70,code=sm_70 \
|
||||||
@ -33,6 +40,7 @@ NVCC_GENCODE ?= -gencode=arch=compute_35,code=sm_35 \
|
|||||||
-gencode=arch=compute_70,code=sm_70 \
|
-gencode=arch=compute_70,code=sm_70 \
|
||||||
-gencode=arch=compute_70,code=compute_70
|
-gencode=arch=compute_70,code=compute_70
|
||||||
endif
|
endif
|
||||||
|
$(info NVCC_GENCODE is ${NVCC_GENCODE})
|
||||||
|
|
||||||
NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11
|
NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11
|
||||||
CXXFLAGS := -std=c++11
|
CXXFLAGS := -std=c++11
|
||||||
@ -101,5 +109,5 @@ ${DST_DIR}/timer.o: timer.cc timer.h
|
|||||||
${DST_DIR}/%_perf:${DST_DIR}/%.o ${DST_DIR}/common.o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_OBJS)
|
${DST_DIR}/%_perf:${DST_DIR}/%.o ${DST_DIR}/common.o ${DST_DIR}/timer.o $(TEST_VERIFIABLE_OBJS)
|
||||||
@printf "Linking %-35s > %s\n" $< $@
|
@printf "Linking %-35s > %s\n" $< $@
|
||||||
@mkdir -p ${DST_DIR}
|
@mkdir -p ${DST_DIR}
|
||||||
$(NVCC) -o $@ $(NVCUFLAGS) $^ ${NVLDFLAGS}
|
$(NVCC) -o $@ $(NVCUFLAGS) $^ ${NVLDFLAGS} -Xcompiler \"-Wl,-rpath,/usr/local/sihpc/lib\"
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user