mirror of
https://github.com/NVIDIA/nccl-tests.git
synced 2026-02-22 02:34:42 +08:00
Change all_gather/reduce_scatter algbw to match the documentation.
Fix #45 : All_gather and reduce_scatter algorithm bandwidth was computed as time/count*(nranks-1) which is not consistent with the way we compute it for other collectives. This change makes algbw higher; busbw is unchanged.
This commit is contained in:
parent
07ac716c1a
commit
ec1b5e22e6
@ -48,10 +48,10 @@ testResult_t AllGatherInitData(struct threadArgs* args, ncclDataType_t type, ncc
|
||||
}
|
||||
|
||||
void AllGatherGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, int nranks) {
|
||||
double baseBw = (double)(count * typesize * (nranks - 1)) / 1.0E9 / sec;
|
||||
double baseBw = (double)(count * typesize * nranks) / 1.0E9 / sec;
|
||||
|
||||
*algBw = baseBw;
|
||||
double factor = 1;
|
||||
double factor = ((double)(nranks - 1))/((double)nranks);
|
||||
*busBw = baseBw * factor;
|
||||
}
|
||||
|
||||
|
||||
@ -47,10 +47,10 @@ testResult_t ReduceScatterInitData(struct threadArgs* args, ncclDataType_t type,
|
||||
}
|
||||
|
||||
void ReduceScatterGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, int nranks) {
|
||||
double baseBw = (double)(count * typesize * (nranks - 1)) / 1.0E9 / sec;
|
||||
double baseBw = (double)(count * typesize * nranks) / 1.0E9 / sec;
|
||||
|
||||
*algBw = baseBw;
|
||||
double factor = 1;
|
||||
double factor = ((double)(nranks - 1))/((double)nranks);
|
||||
*busBw = baseBw * factor;
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user