infra: Add step to generate new duration file (#3298)

* Add step to generate new duration file

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Install python in earlier step

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Clone repo and add debug info

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Remove debug info and only generate duration for post-merge

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Test for the new duration file

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Update the duration file format

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Move generate_duration.py to scripts folder and add try-catch avoiding any broken

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

---------

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>
This commit is contained in:
Emma Qiao 2025-04-18 12:56:31 +08:00 committed by GitHub
parent 88cff61fa1
commit 2f48985b9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 74 additions and 2 deletions

View File

@ -606,6 +606,7 @@ def collectTestResults(pipeline, testFilter)
testResultLink = "https://urm.nvidia.com/artifactory/sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}/test-results"
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add --no-cache curl")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add python3")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget ${testResultLink}/", allowStepFailed: true)
sh "cat index.html | grep \"tar.gz\" | cut -d \"\\\"\" -f 2 > result_file_names.txt"
sh "cat result_file_names.txt"
@ -616,6 +617,16 @@ def collectTestResults(pipeline, testFilter)
echo "Result File Number: ${resultFileNumber}, Downloaded: ${resultFileDownloadedNumber}"
sh "find . -name results-\\*.tar.gz -type f -exec tar -zxvf {} \\; || true"
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
if (testFilter[(IS_POST_MERGE)]) {
try {
sh "python3 llm/scripts/generate_duration.py --duration-file=new_test_duration.json"
trtllm_utils.uploadArtifacts("new_test_duration.json", "${UPLOAD_PATH}/test-results/")
} catch (Exception e) {
// No need to fail the stage if the duration file generation fails
echo "An error occurred while generating or uploading the duration file: ${e.toString()}"
}
}
junit(testResults: '**/results*.xml', allowEmptyResults : true)
} // Collect test result stage
@ -632,13 +643,11 @@ def collectTestResults(pipeline, testFilter)
echo "Test coverage is skipped because there is no test data file."
return
}
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add python3")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add py3-pip")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install coverage")
sh "coverage --version"
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
sh "cp llm/examples/openai_triton/manual_plugin/fmha_triton.py llm/examples/openai_triton/plugin_autogen/"
def coverageConfigFile = "cov/.coveragerc"
sh """

View File

@ -0,0 +1,63 @@
import argparse
import glob
import json
import os
# Parse command-line arguments
parser = argparse.ArgumentParser(description="Generate test duration file.")
parser.add_argument(
"--duration-file",
type=str,
default="new_test_duration.json",
help="Path to the output duration file (default: new_test_duration.json)")
args = parser.parse_args()
# Define the directory containing the test result folders
TEST_RESULTS_DIR = os.getcwd()
# Define the output file paths
FULL_RESULT_LOG = "full_result.log"
NEW_TEST_DURATION = args.duration_file
# Step 1: Prepare full_result.log
with open(FULL_RESULT_LOG, 'w') as full_result_file:
print(f"TEST_RESULTS_DIR: {TEST_RESULTS_DIR}")
for report_csv in glob.glob(os.path.join(TEST_RESULTS_DIR, '*/report.csv')):
print(f"Processing {report_csv}...")
with open(report_csv, 'r') as csv_file:
for line in csv_file:
if 'passed' in line:
full_result_file.write(line)
# Step 2: Generate new_test_duration.json
test_durations = {}
# Read the full_result.log file line by line
with open(FULL_RESULT_LOG, 'r') as file:
for line in file:
# Extract the first column and the last column
columns = line.strip().split(',')
first_column = columns[0]
last_column = columns[-1]
# Remove from left to first '/' in the first column
test_name = first_column.split('/', 1)[-1]
# Replace \"\" with \" and ]\" with ] in case we got these in names from report.csv
# which will broken the json parse
test_name = test_name.replace(']\"', ']').replace('\"\"', '\"')
try:
last_column = float(last_column)
except ValueError:
print(
f"Warning: Could not convert {last_column} to float. Skipping.")
continue
# Add to the test duration dictionary
test_durations[test_name] = last_column
# Write the test durations to the new test duration file
with open(NEW_TEST_DURATION, 'w') as file:
json.dump(test_durations, file, indent=3)
print(f"Test durations have been written to {NEW_TEST_DURATION}")

0
tests/integration/defs/.test_durations Executable file → Normal file
View File