mirror of
https://github.com/NVIDIA/nccl-tests.git
synced 2026-05-03 13:02:36 +00:00
Modified warmup to run for more message sizes
Loops between minBytes and maxBytes doubling size each time Reduced default warmup iteration count to 1 (was 5)
This commit is contained in:
@@ -68,7 +68,7 @@ All tests support the same set of arguments :
|
||||
* `-r,--root <root/all>` Specify which root to use. Only for operations with a root like broadcast or reduce. Default : 0.
|
||||
* Performance
|
||||
* `-n,--iters <iteration count>` number of iterations. Default : 20.
|
||||
* `-w,--warmup_iters <warmup iteration count>` number of warmup iterations (not timed). Default : 5.
|
||||
* `-w,--warmup_iters <warmup iteration count>` number of warmup iterations (not timed). Default : 1.
|
||||
* `-m,--agg_iters <aggregation count>` number of operations to aggregate together in each iteration. Default : 1.
|
||||
* `-N,--run_cycles <cycle count>` run & print each cycle. Default : 1; 0=infinite.
|
||||
* `-a,--average <0/1/2/3>` Report performance as an average across all ranks (MPI=1 only). <0=Rank0,1=Avg,2=Min,3=Max>. Default : 1.
|
||||
|
||||
+8
-13
@@ -78,7 +78,7 @@ static size_t maxBytes = 32*1024*1024;
|
||||
static size_t stepBytes = 1*1024*1024;
|
||||
static size_t stepFactor = 1;
|
||||
static int datacheck = 1;
|
||||
static int warmup_iters = 5;
|
||||
static int warmup_iters = 1;
|
||||
static int iters = 20;
|
||||
static int agg_iters = 1;
|
||||
static int run_cycles = 1;
|
||||
@@ -607,19 +607,14 @@ testResult_t TimeTest(struct threadArgs* args, ncclDataType_t type, const char*
|
||||
// Sync to avoid first-call timeout
|
||||
Barrier(args);
|
||||
|
||||
// Warm-up for large size
|
||||
setupArgs(args->maxbytes, type, args);
|
||||
for (int iter = 0; iter < warmup_iters; iter++) {
|
||||
TESTCHECK(startColl(args, type, op, root, 0, iter));
|
||||
// Warm-up for all sizes (using a stepfactor of 2)
|
||||
for (size_t size = args->minbytes; size <= args->maxbytes; size = size * 2) {
|
||||
setupArgs(size, type, args);
|
||||
for (int iter = 0; iter < warmup_iters; iter++) {
|
||||
TESTCHECK(startColl(args, type, op, root, 0, iter));
|
||||
}
|
||||
TESTCHECK(completeColl(args));
|
||||
}
|
||||
TESTCHECK(completeColl(args));
|
||||
|
||||
// Warm-up for small size
|
||||
setupArgs(args->minbytes, type, args);
|
||||
for (int iter = 0; iter < warmup_iters; iter++) {
|
||||
TESTCHECK(startColl(args, type, op, root, 0, iter));
|
||||
}
|
||||
TESTCHECK(completeColl(args));
|
||||
|
||||
// Benchmark
|
||||
long repeat = run_cycles;
|
||||
|
||||
Reference in New Issue
Block a user