Modified warmup to run for more message sizes

Loops between minBytes and maxBytes doubling size each time

Reduced default warmup iteration count to 1 (was 5)
This commit is contained in:
David Addison
2025-08-25 13:47:54 -07:00
parent fae7cb4727
commit f2015cbe82
2 changed files with 9 additions and 14 deletions
+1 -1
View File
@@ -68,7 +68,7 @@ All tests support the same set of arguments :
* `-r,--root <root/all>` Specify which root to use. Only for operations with a root like broadcast or reduce. Default : 0. * `-r,--root <root/all>` Specify which root to use. Only for operations with a root like broadcast or reduce. Default : 0.
* Performance * Performance
* `-n,--iters <iteration count>` number of iterations. Default : 20. * `-n,--iters <iteration count>` number of iterations. Default : 20.
* `-w,--warmup_iters <warmup iteration count>` number of warmup iterations (not timed). Default : 5. * `-w,--warmup_iters <warmup iteration count>` number of warmup iterations (not timed). Default : 1.
* `-m,--agg_iters <aggregation count>` number of operations to aggregate together in each iteration. Default : 1. * `-m,--agg_iters <aggregation count>` number of operations to aggregate together in each iteration. Default : 1.
* `-N,--run_cycles <cycle count>` run & print each cycle. Default : 1; 0=infinite. * `-N,--run_cycles <cycle count>` run & print each cycle. Default : 1; 0=infinite.
* `-a,--average <0/1/2/3>` Report performance as an average across all ranks (MPI=1 only). <0=Rank0,1=Avg,2=Min,3=Max>. Default : 1. * `-a,--average <0/1/2/3>` Report performance as an average across all ranks (MPI=1 only). <0=Rank0,1=Avg,2=Min,3=Max>. Default : 1.
+8 -13
View File
@@ -78,7 +78,7 @@ static size_t maxBytes = 32*1024*1024;
static size_t stepBytes = 1*1024*1024; static size_t stepBytes = 1*1024*1024;
static size_t stepFactor = 1; static size_t stepFactor = 1;
static int datacheck = 1; static int datacheck = 1;
static int warmup_iters = 5; static int warmup_iters = 1;
static int iters = 20; static int iters = 20;
static int agg_iters = 1; static int agg_iters = 1;
static int run_cycles = 1; static int run_cycles = 1;
@@ -607,19 +607,14 @@ testResult_t TimeTest(struct threadArgs* args, ncclDataType_t type, const char*
// Sync to avoid first-call timeout // Sync to avoid first-call timeout
Barrier(args); Barrier(args);
// Warm-up for large size // Warm-up for all sizes (using a stepfactor of 2)
setupArgs(args->maxbytes, type, args); for (size_t size = args->minbytes; size <= args->maxbytes; size = size * 2) {
for (int iter = 0; iter < warmup_iters; iter++) { setupArgs(size, type, args);
TESTCHECK(startColl(args, type, op, root, 0, iter)); for (int iter = 0; iter < warmup_iters; iter++) {
TESTCHECK(startColl(args, type, op, root, 0, iter));
}
TESTCHECK(completeColl(args));
} }
TESTCHECK(completeColl(args));
// Warm-up for small size
setupArgs(args->minbytes, type, args);
for (int iter = 0; iter < warmup_iters; iter++) {
TESTCHECK(startColl(args, type, op, root, 0, iter));
}
TESTCHECK(completeColl(args));
// Benchmark // Benchmark
long repeat = run_cycles; long repeat = run_cycles;