Display unalign setting in output

This commit is contained in:
David Addison 2026-03-11 15:05:54 -07:00
parent e02c20b898
commit eb0d3d2a00
2 changed files with 4 additions and 3 deletions

View File

@ -101,7 +101,7 @@ int cudaGraphLaunches = 0;
static int report_cputime = 0;
static int report_timestamps = 0;
static int deviceImpl = 0;
static int unalign = 0;
int unalign = 0;
int memory_report = 0;
int deviceCtaCount = 16; // Default number of CTAs for device implementation

View File

@ -37,6 +37,7 @@ extern int agg_iters;
extern int parallel_init;
extern int blocking_coll;
extern int cudaGraphLaunches;
extern int unalign;
static FILE *json_report_fp;
static thread_local bool write_json;
@ -514,10 +515,10 @@ void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool re
testResult_t writeDeviceReport(size_t *maxMem, int localRank, int proc, int totalProcs, int color, const char hostname[], const char *program_name) {
PRINT("# nccl-tests version %s nccl-headers=%d nccl-library=%d\n", NCCL_TESTS_VERSION, NCCL_VERSION_CODE, test_ncclVersion);
PRINT("# Collective test starting: %s\n", program_name);
PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d\n",
PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d unalign: %d\n",
nThreads, nGpus, minBytes, maxBytes,
(stepFactor > 1)?stepFactor:stepBytes, (stepFactor > 1)?"factor":"bytes",
warmup_iters, iters, agg_iters, datacheck, cudaGraphLaunches);
warmup_iters, iters, agg_iters, datacheck, cudaGraphLaunches, unalign);
if (blocking_coll == 1) PRINT("# Blocking Enabled: wait for completion and barrier after each collective \n");
if (blocking_coll > 1) PRINT("# Blocking Enabled: wait for completion after each collective (no barrier) \n");
if (parallel_init) PRINT("# Parallel Init Enabled: threads call into NcclInitRank concurrently \n");