From 2f48985b9ccdb296090b31e2a95768caeda6e56e Mon Sep 17 00:00:00 2001 From: Emma Qiao Date: Fri, 18 Apr 2025 12:56:31 +0800 Subject: [PATCH] infra: Add step to generate new duration file (#3298) * Add step to generate new duration file Signed-off-by: EmmaQiaoCh * Install python in earlier step Signed-off-by: EmmaQiaoCh * Clone repo and add debug info Signed-off-by: EmmaQiaoCh * Remove debug info and only generate duration for post-merge Signed-off-by: EmmaQiaoCh * Test for the new duration file Signed-off-by: EmmaQiaoCh * Update the duration file format Signed-off-by: EmmaQiaoCh * Move generate_duration.py to scripts folder and add try-catch avoiding any broken Signed-off-by: EmmaQiaoCh --------- Signed-off-by: EmmaQiaoCh --- jenkins/L0_MergeRequest.groovy | 13 +++++- scripts/generate_duration.py | 63 ++++++++++++++++++++++++++ tests/integration/defs/.test_durations | 0 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 scripts/generate_duration.py mode change 100755 => 100644 tests/integration/defs/.test_durations diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy index 21dd6fab19..594adcf168 100644 --- a/jenkins/L0_MergeRequest.groovy +++ b/jenkins/L0_MergeRequest.groovy @@ -606,6 +606,7 @@ def collectTestResults(pipeline, testFilter) testResultLink = "https://urm.nvidia.com/artifactory/sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}/test-results" trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add --no-cache curl") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add python3") trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget ${testResultLink}/", allowStepFailed: true) sh "cat index.html | grep \"tar.gz\" | cut -d \"\\\"\" -f 2 > result_file_names.txt" sh "cat result_file_names.txt" @@ -616,6 +617,16 @@ def collectTestResults(pipeline, testFilter) echo "Result File Number: ${resultFileNumber}, Downloaded: ${resultFileDownloadedNumber}" sh "find . -name results-\\*.tar.gz -type f -exec tar -zxvf {} \\; || true" + trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true) + if (testFilter[(IS_POST_MERGE)]) { + try { + sh "python3 llm/scripts/generate_duration.py --duration-file=new_test_duration.json" + trtllm_utils.uploadArtifacts("new_test_duration.json", "${UPLOAD_PATH}/test-results/") + } catch (Exception e) { + // No need to fail the stage if the duration file generation fails + echo "An error occurred while generating or uploading the duration file: ${e.toString()}" + } + } junit(testResults: '**/results*.xml', allowEmptyResults : true) } // Collect test result stage @@ -632,13 +643,11 @@ def collectTestResults(pipeline, testFilter) echo "Test coverage is skipped because there is no test data file." return } - trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add python3") trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add py3-pip") trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true") trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install coverage") sh "coverage --version" - trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true) sh "cp llm/examples/openai_triton/manual_plugin/fmha_triton.py llm/examples/openai_triton/plugin_autogen/" def coverageConfigFile = "cov/.coveragerc" sh """ diff --git a/scripts/generate_duration.py b/scripts/generate_duration.py new file mode 100644 index 0000000000..10d69978f5 --- /dev/null +++ b/scripts/generate_duration.py @@ -0,0 +1,63 @@ +import argparse +import glob +import json +import os + +# Parse command-line arguments +parser = argparse.ArgumentParser(description="Generate test duration file.") +parser.add_argument( + "--duration-file", + type=str, + default="new_test_duration.json", + help="Path to the output duration file (default: new_test_duration.json)") +args = parser.parse_args() + +# Define the directory containing the test result folders +TEST_RESULTS_DIR = os.getcwd() + +# Define the output file paths +FULL_RESULT_LOG = "full_result.log" +NEW_TEST_DURATION = args.duration_file + +# Step 1: Prepare full_result.log +with open(FULL_RESULT_LOG, 'w') as full_result_file: + print(f"TEST_RESULTS_DIR: {TEST_RESULTS_DIR}") + for report_csv in glob.glob(os.path.join(TEST_RESULTS_DIR, '*/report.csv')): + print(f"Processing {report_csv}...") + with open(report_csv, 'r') as csv_file: + for line in csv_file: + if 'passed' in line: + full_result_file.write(line) + +# Step 2: Generate new_test_duration.json +test_durations = {} + +# Read the full_result.log file line by line +with open(FULL_RESULT_LOG, 'r') as file: + for line in file: + # Extract the first column and the last column + columns = line.strip().split(',') + first_column = columns[0] + last_column = columns[-1] + + # Remove from left to first '/' in the first column + test_name = first_column.split('/', 1)[-1] + # Replace \"\" with \" and ]\" with ] in case we got these in names from report.csv + # which will broken the json parse + test_name = test_name.replace(']\"', ']').replace('\"\"', '\"') + + try: + last_column = float(last_column) + except ValueError: + print( + f"Warning: Could not convert {last_column} to float. Skipping.") + continue + + # Add to the test duration dictionary + test_durations[test_name] = last_column + +# Write the test durations to the new test duration file +with open(NEW_TEST_DURATION, 'w') as file: + json.dump(test_durations, file, indent=3) + +print(f"Test durations have been written to {NEW_TEST_DURATION}") diff --git a/tests/integration/defs/.test_durations b/tests/integration/defs/.test_durations old mode 100755 new mode 100644