mirror of
https://github.com/NVIDIA/nccl-tests.git
synced 2026-01-14 02:47:21 +08:00
Add option to suffix a timestamp to each perf line
Based on code from yakovdyadkin & Scott Moe in MR 349 Adds -S 1 option to suffix each performance report line with a timestamp. Format is "%Y-%m-%d %H:%M:%S" This is especially useful when using the -N 0 option and looking for hangs or failure events.
This commit is contained in:
parent
0bb567cc02
commit
a62c975681
@ -97,6 +97,7 @@ static int streamnull = 0;
|
||||
static int timeout = 0;
|
||||
int cudaGraphLaunches = 0;
|
||||
static int report_cputime = 0;
|
||||
static int report_timestamps = 0;
|
||||
static int deviceImpl = 0;
|
||||
|
||||
int deviceCtaCount = 16; // Default number of CTAs for device implementation
|
||||
@ -660,7 +661,7 @@ testResult_t BenchTime(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t
|
||||
}
|
||||
|
||||
double timeUsec = (report_cputime ? cputimeSec : deltaSec)*1.0E6;
|
||||
writeBenchmarkLineBody(timeUsec, algBw, busBw, args->reportErrors, wrongElts, report_cputime, in_place==0);
|
||||
writeBenchmarkLineBody(timeUsec, algBw, busBw, args->reportErrors, wrongElts, report_cputime, report_timestamps, in_place==0);
|
||||
|
||||
args->bw[0] += busBw;
|
||||
args->bw_count[0]++;
|
||||
@ -871,6 +872,7 @@ int main(int argc, char* argv[], char **envp) {
|
||||
{"timeout", required_argument, 0, 'T'},
|
||||
{"cudagraph", required_argument, 0, 'G'},
|
||||
{"report_cputime", required_argument, 0, 'C'},
|
||||
{"report_timestamps", required_argument, 0, 'S'},
|
||||
{"output_file", required_argument, 0, 'J'},
|
||||
{"average", required_argument, 0, 'a'},
|
||||
{"local_register", required_argument, 0, 'R'},
|
||||
@ -883,7 +885,7 @@ int main(int argc, char* argv[], char **envp) {
|
||||
|
||||
while(1) {
|
||||
int c;
|
||||
c = getopt_long(argc, argv, "t:g:b:e:i:f:n:m:w:N:p:c:o:d:r:z:y:T:hG:C:a:R:x:D:V:J:", longopts, &longindex);
|
||||
c = getopt_long(argc, argv, "t:g:b:e:i:f:n:m:w:N:p:c:o:d:r:z:y:T:hG:C:a:R:x:D:V:J:S:", longopts, &longindex);
|
||||
|
||||
if (c == -1)
|
||||
break;
|
||||
@ -975,6 +977,9 @@ int main(int argc, char* argv[], char **envp) {
|
||||
case 'J':
|
||||
output_file = strdup(optarg);
|
||||
break;
|
||||
case 'S':
|
||||
report_timestamps = strtol(optarg, NULL, 0);
|
||||
break;
|
||||
case 'a':
|
||||
average = (int)strtol(optarg, NULL, 0);
|
||||
break;
|
||||
@ -1053,6 +1058,7 @@ int main(int argc, char* argv[], char **envp) {
|
||||
"[-T,--timeout <time in seconds>] \n\t"
|
||||
"[-G,--cudagraph <num graph launches>] \n\t"
|
||||
"[-C,--report_cputime <0/1>] \n\t"
|
||||
"[-S,--report_timestamps <0/1> report timestamps (default 0)] \n\t"
|
||||
"[-J,--output_file <file> write output to filepath, if accessible. Infer type from suffix (only json supported presently.)] \n\t"
|
||||
"[-a,--average <0/1/2/3> report average iteration time <0=RANK0/1=AVG/2=MIN/3=MAX>] \n\t"
|
||||
"[-R,--local_register <0/1/2> enable local (1) or symmetric (2) buffer registration on send/recv buffers (default: disable (0))] \n\t"
|
||||
@ -1316,7 +1322,7 @@ testResult_t run() {
|
||||
|
||||
fflush(stdout);
|
||||
|
||||
writeResultHeader(report_cputime);
|
||||
writeResultHeader(report_cputime, report_timestamps);
|
||||
|
||||
struct testThread threads[nThreads];
|
||||
memset(threads, 0, sizeof(struct testThread)*nThreads);
|
||||
|
||||
24
src/util.cu
24
src/util.cu
@ -41,6 +41,8 @@ static thread_local bool write_json;
|
||||
|
||||
#define JSON_FILE_VERSION 1
|
||||
|
||||
#define TIME_STRING_FORMAT "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
typedef enum {
|
||||
JSON_NONE, // A pseudo-state meaning that the document is empty
|
||||
JSON_KEY,
|
||||
@ -274,7 +276,7 @@ void formatNow(char *buff, int len) {
|
||||
time(&now);
|
||||
struct tm *timeinfo = localtime(&now);
|
||||
|
||||
strftime(buff, len, "%Y-%m-%d %H:%M:%S", timeinfo);
|
||||
strftime(buff, len, TIME_STRING_FORMAT, timeinfo);
|
||||
}
|
||||
|
||||
// We provide some status line to stdout.
|
||||
@ -464,7 +466,7 @@ void getFloatStr(double value, int width, char* str) {
|
||||
// Write the performance-related payload to stdout/json.
|
||||
// We call this function twice at the top level per test: once for out-of-place, and once for in-place.
|
||||
// The Json output assumes out-of-place happens first.
|
||||
void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool reportErrors, int64_t wrongElts, bool report_cputime, bool out_of_place) {
|
||||
void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool reportErrors, int64_t wrongElts, bool report_cputime, bool report_timestamps, bool out_of_place) {
|
||||
char timeStr[8];
|
||||
getFloatStr(timeUsec, 7, timeStr);
|
||||
|
||||
@ -480,6 +482,12 @@ void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool re
|
||||
PRINT(" %7s %6s %6s N/A", timeStr, algBwStr, busBwStr);
|
||||
}
|
||||
|
||||
if (!out_of_place && report_timestamps) {
|
||||
char timebuffer[128];
|
||||
formatNow(timebuffer, sizeof(timebuffer));
|
||||
PRINT("%21s", timebuffer);
|
||||
}
|
||||
|
||||
if(write_json) {
|
||||
jsonKey(out_of_place ? "out_of_place" : "in_place");
|
||||
jsonStartObject();
|
||||
@ -607,14 +615,16 @@ testResult_t writeDeviceReport(size_t *maxMem, int localRank, int proc, int tota
|
||||
|
||||
// Write a result header to stdout/json.
|
||||
// Json results object and contained table list are left open
|
||||
void writeResultHeader(bool report_cputime) {
|
||||
void writeResultHeader(bool report_cputime, bool report_timestamps) {
|
||||
const char* tsLbl = report_timestamps ? "timestamp" : "";
|
||||
const char* tsFmt = report_timestamps ? TIME_STRING_FORMAT : "";
|
||||
const char* timeStr = report_cputime ? "cputime" : "time";
|
||||
PRINT("#\n");
|
||||
PRINT("# %10s %12s %8s %6s %6s out-of-place in-place \n", "", "", "", "", "");
|
||||
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %6s %7s %6s %6s %6s\n", "size", "count", "type", "redop", "root",
|
||||
timeStr, "algbw", "busbw", "#wrong", timeStr, "algbw", "busbw", "#wrong");
|
||||
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %5s %7s %6s %6s %5s\n", "(B)", "(elements)", "", "", "",
|
||||
"(us)", "(GB/s)", "(GB/s)", "", "(us)", "(GB/s)", "(GB/s)", "");
|
||||
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %6s %7s %6s %6s %6s %20s\n", "size", "count", "type", "redop", "root",
|
||||
timeStr, "algbw", "busbw", "#wrong", timeStr, "algbw", "busbw", "#wrong", tsLbl);
|
||||
PRINT("# %10s %12s %8s %6s %6s %7s %6s %6s %6s %7s %6s %6s %6s %20s\n", "(B)", "(elements)", "", "", "",
|
||||
"(us)", "(GB/s)", "(GB/s)", "", "(us)", "(GB/s)", "(GB/s)", "", tsFmt);
|
||||
|
||||
if(write_json) {
|
||||
jsonKey("results"); jsonStartList();
|
||||
|
||||
@ -28,9 +28,9 @@ void jsonOutputFinalize();
|
||||
void writeBenchmarkLinePreamble(size_t nBytes, size_t nElem, const char typeName[], const char opName[], int root);
|
||||
void writeBenchmarkLineTerminator(int actualIters, const char *name);
|
||||
void writeBenchMarkLineNullBody();
|
||||
void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool reportErrors, int64_t wrongElts, bool report_cputime, bool out_of_place);
|
||||
void writeBenchmarkLineBody(double timeUsec, double algBw, double busBw, bool reportErrors, int64_t wrongElts, bool report_cputime, bool report_timestamps, bool out_of_place);
|
||||
testResult_t writeDeviceReport(size_t *maxMem, int localRank, int proc, int totalProcs, int color, const char hostname[], const char *program_name);
|
||||
void writeResultHeader(bool report_cputime);
|
||||
void writeResultHeader(bool report_cputime, bool report_timestamps);
|
||||
void writeResultFooter(const int errors[], const double bw[], double check_avg_bw, const char *program_name);
|
||||
void writeErrors();
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user