Added an MPI_Barrier() call after MPI_Bcast() for HCOLL issue

This commit is contained in:
David Addison 2023-10-12 16:53:32 -07:00
parent 6c46206a47
commit 1292b25553

View File

@ -924,6 +924,7 @@ testResult_t run() {
}
#ifdef MPI_SUPPORT
MPI_Bcast(&ncclId, sizeof(ncclId), MPI_BYTE, 0, mpi_comm);
MPI_Barrier(MPI_COMM_WORLD); // Ensure Bcast is complete for HCOLL
#endif
int gpus[nGpus*nThreads];
cudaStream_t streams[nGpus*nThreads];