mirror of
https://github.com/NVIDIA/nccl-tests.git
synced 2026-04-23 16:08:20 +08:00
Display unalign setting in output
This commit is contained in:
parent
e02c20b898
commit
eb0d3d2a00
@ -101,7 +101,7 @@ int cudaGraphLaunches = 0;
|
||||
static int report_cputime = 0;
|
||||
static int report_timestamps = 0;
|
||||
static int deviceImpl = 0;
|
||||
static int unalign = 0;
|
||||
int unalign = 0;
|
||||
int memory_report = 0;
|
||||
|
||||
int deviceCtaCount = 16; // Default number of CTAs for device implementation
|
||||
|
||||
@ -37,6 +37,7 @@ extern int agg_iters;
|
||||
extern int parallel_init;
|
||||
extern int blocking_coll;
|
||||
extern int cudaGraphLaunches;
|
||||
extern int unalign;
|
||||
|
||||
static FILE *json_report_fp;
|
||||
static thread_local bool write_json;
|
||||
@ -514,10 +515,10 @@ void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool re
|
||||
testResult_t writeDeviceReport(size_t *maxMem, int localRank, int proc, int totalProcs, int color, const char hostname[], const char *program_name) {
|
||||
PRINT("# nccl-tests version %s nccl-headers=%d nccl-library=%d\n", NCCL_TESTS_VERSION, NCCL_VERSION_CODE, test_ncclVersion);
|
||||
PRINT("# Collective test starting: %s\n", program_name);
|
||||
PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d\n",
|
||||
PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d unalign: %d\n",
|
||||
nThreads, nGpus, minBytes, maxBytes,
|
||||
(stepFactor > 1)?stepFactor:stepBytes, (stepFactor > 1)?"factor":"bytes",
|
||||
warmup_iters, iters, agg_iters, datacheck, cudaGraphLaunches);
|
||||
warmup_iters, iters, agg_iters, datacheck, cudaGraphLaunches, unalign);
|
||||
if (blocking_coll == 1) PRINT("# Blocking Enabled: wait for completion and barrier after each collective \n");
|
||||
if (blocking_coll > 1) PRINT("# Blocking Enabled: wait for completion after each collective (no barrier) \n");
|
||||
if (parallel_init) PRINT("# Parallel Init Enabled: threads call into NcclInitRank concurrently \n");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user