mirror of
https://github.com/NVIDIA/nccl-tests.git
synced 2026-05-03 13:02:36 +00:00
Make the -c option be a datacheck iteration count parameter
Default is 1
This commit is contained in:
@@ -62,7 +62,7 @@ All tests support the same set of arguments :
|
|||||||
* `-a,--average <0/1/2/3>` Report performance as an average across all ranks (MPI=1 only). <0=Rank0,1=Avg,2=Min,3=Max>. Default : 1.
|
* `-a,--average <0/1/2/3>` Report performance as an average across all ranks (MPI=1 only). <0=Rank0,1=Avg,2=Min,3=Max>. Default : 1.
|
||||||
* Test operation
|
* Test operation
|
||||||
* `-p,--parallel_init <0/1>` use threads to initialize NCCL in parallel. Default : 0.
|
* `-p,--parallel_init <0/1>` use threads to initialize NCCL in parallel. Default : 0.
|
||||||
* `-c,--check <0/1>` check correctness of results. This can be quite slow on large numbers of GPUs. Default : 1.
|
* `-c,--check <check iteration count>` perform count iterations, checking correctness of results on each iteration. This can be quite slow on large numbers of GPUs. Default : 1.
|
||||||
* `-z,--blocking <0/1>` Make NCCL collective blocking, i.e. have CPUs wait and sync after each collective. Default : 0.
|
* `-z,--blocking <0/1>` Make NCCL collective blocking, i.e. have CPUs wait and sync after each collective. Default : 0.
|
||||||
* `-G,--cudagraph <num graph launches>` Capture iterations as a CUDA graph and then replay specified number of times. Default : 0.
|
* `-G,--cudagraph <num graph launches>` Capture iterations as a CUDA graph and then replay specified number of times. Default : 0.
|
||||||
|
|
||||||
|
|||||||
+4
-2
@@ -487,7 +487,7 @@ testResult_t BenchTime(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t
|
|||||||
int64_t wrongElts = 0;
|
int64_t wrongElts = 0;
|
||||||
static __thread int rep = 0;
|
static __thread int rep = 0;
|
||||||
rep++;
|
rep++;
|
||||||
if (datacheck) {
|
for (int c = 0; c < datacheck; c++) {
|
||||||
// Initialize sendbuffs, recvbuffs and expected
|
// Initialize sendbuffs, recvbuffs and expected
|
||||||
TESTCHECK(args->collTest->initData(args, type, op, root, rep, in_place));
|
TESTCHECK(args->collTest->initData(args, type, op, root, rep, in_place));
|
||||||
|
|
||||||
@@ -536,8 +536,10 @@ testResult_t BenchTime(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t
|
|||||||
|
|
||||||
//aggregate delta from all threads and procs
|
//aggregate delta from all threads and procs
|
||||||
long long wrongElts1 = wrongElts;
|
long long wrongElts1 = wrongElts;
|
||||||
|
//if (wrongElts) fprintf(stderr, "\nERROR: Data corruption : rank %d size %ld wrongElts %ld\n", args->proc, args->expectedBytes, wrongElts);
|
||||||
Allreduce(args, &wrongElts1, /*sum*/4);
|
Allreduce(args, &wrongElts1, /*sum*/4);
|
||||||
wrongElts = wrongElts1;
|
wrongElts = wrongElts1;
|
||||||
|
if (wrongElts) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
double timeUsec = (report_cputime ? cputimeSec : deltaSec)*1.0E6;
|
double timeUsec = (report_cputime ? cputimeSec : deltaSec)*1.0E6;
|
||||||
@@ -809,7 +811,7 @@ int main(int argc, char* argv[]) {
|
|||||||
"[-m,--agg_iters <aggregated iteration count>] \n\t"
|
"[-m,--agg_iters <aggregated iteration count>] \n\t"
|
||||||
"[-w,--warmup_iters <warmup iteration count>] \n\t"
|
"[-w,--warmup_iters <warmup iteration count>] \n\t"
|
||||||
"[-p,--parallel_init <0/1>] \n\t"
|
"[-p,--parallel_init <0/1>] \n\t"
|
||||||
"[-c,--check <0/1>] \n\t"
|
"[-c,--check <check iteration count>] \n\t"
|
||||||
#if NCCL_VERSION_CODE >= NCCL_VERSION(2,11,0)
|
#if NCCL_VERSION_CODE >= NCCL_VERSION(2,11,0)
|
||||||
"[-o,--op <sum/prod/min/max/avg/mulsum/all>] \n\t"
|
"[-o,--op <sum/prod/min/max/avg/mulsum/all>] \n\t"
|
||||||
#elif NCCL_VERSION_CODE >= NCCL_VERSION(2,10,0)
|
#elif NCCL_VERSION_CODE >= NCCL_VERSION(2,10,0)
|
||||||
|
|||||||
Reference in New Issue
Block a user