mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Merge 3963d565c0 into 6df2c8a074
This commit is contained in:
commit
fcc1fb9c56
@ -864,6 +864,26 @@ def collectTestResults(pipeline, testFilter)
|
||||
|
||||
junit(testResults: '**/results*.xml', allowEmptyResults : true)
|
||||
} // Collect test result stage
|
||||
stage("Collect Perf Regression Result") {
|
||||
def yamlFiles = sh(
|
||||
returnStdout: true,
|
||||
script: 'find . -type f -name "regression_data.yaml" 2>/dev/null || true'
|
||||
).trim()
|
||||
echo "yamlFiles: ${yamlFiles}"
|
||||
if (yamlFiles) {
|
||||
def yamlFileList = yamlFiles.split(/\s+/).collect { it.trim() }.findAll { it }.join(",")
|
||||
echo "Found regression data files: ${yamlFileList}"
|
||||
sh """
|
||||
python3 llm/jenkins/scripts/perf/perf_regression.py \
|
||||
--input-files=${yamlFileList} \
|
||||
--output-file=perf_regression.html
|
||||
"""
|
||||
trtllm_utils.uploadArtifacts("perf_regression.html", "${UPLOAD_PATH}/test-results/")
|
||||
echo "Perf regression report: https://urm.nvidia.com/artifactory/${UPLOAD_PATH}/test-results/perf_regression.html"
|
||||
} else {
|
||||
echo "No regression_data.yaml files found."
|
||||
}
|
||||
} // Collect Perf Regression Result stage
|
||||
stage("Rerun Report") {
|
||||
sh "rm -rf rerun && mkdir -p rerun"
|
||||
sh "find . -type f -wholename '*/rerun_results.xml' -exec sh -c 'mv \"{}\" \"rerun/\$(basename \$(dirname \"{}\"))_rerun_results.xml\"' \\; || true"
|
||||
|
||||
@ -124,6 +124,7 @@ def uploadResults(def pipeline, SlurmCluster cluster, String nodeName, String st
|
||||
|
||||
def hasTimeoutTest = false
|
||||
def downloadResultSucceed = false
|
||||
def downloadPerfResultSucceed = false
|
||||
|
||||
pipeline.stage('Submit Test Result') {
|
||||
sh "mkdir -p ${stageName}"
|
||||
@ -146,8 +147,18 @@ EOF_TIMEOUT_XML
|
||||
def resultsFilePath = "/home/svc_tensorrt/bloom/scripts/${nodeName}/results.xml"
|
||||
downloadResultSucceed = Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -P ${remote.port} -r -p ${COMMON_SSH_OPTIONS} ${remote.user}@${remote.host}:${resultsFilePath} ${stageName}/", returnStatus: true, numRetries: 3) == 0
|
||||
|
||||
echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}"
|
||||
if (hasTimeoutTest || downloadResultSucceed) {
|
||||
// Download perf test results
|
||||
def perfResultsBasePath = "/home/svc_tensorrt/bloom/scripts/${nodeName}"
|
||||
def folderListOutput = Utils.exec(pipeline, script: """sshpass -p '${remote.passwd}' ssh -p ${remote.port} ${COMMON_SSH_OPTIONS} ${remote.user}@${remote.host} "find ${perfResultsBasePath} -maxdepth 1 -type d \\( -name 'aggr*' -o -name 'disagg*' \\) -printf '%f\\n'" || true""", returnStdout: true, numRetries: 3)?.trim() ?: ""
|
||||
echo "folderListOutput: ${folderListOutput}"
|
||||
def perfFolders = folderListOutput.split(/\s+/).collect { it.trim().replaceAll(/\/$/, '') }.findAll { it }
|
||||
if (perfFolders) {
|
||||
def scpSources = perfFolders.collect { "${remote.user}@${remote.host}:${perfResultsBasePath}/${it}" }.join(' ')
|
||||
downloadPerfResultSucceed = Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -P ${remote.port} -r -p ${COMMON_SSH_OPTIONS} ${scpSources} ${stageName}/", returnStatus: true, numRetries: 3) == 0
|
||||
}
|
||||
|
||||
echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}, downloadPerfResultSucceed: ${downloadPerfResultSucceed}"
|
||||
if (hasTimeoutTest || downloadResultSucceed || downloadPerfResultSucceed) {
|
||||
sh "ls ${stageName}"
|
||||
echo "Upload test results."
|
||||
sh "tar -czvf results-${stageName}.tar.gz ${stageName}/"
|
||||
|
||||
275
jenkins/scripts/perf/perf_regression.py
Normal file
275
jenkins/scripts/perf/perf_regression.py
Normal file
@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Merge perf regression info from multiple YAML files into an HTML report."""
|
||||
|
||||
import argparse
|
||||
from html import escape as escape_html
|
||||
|
||||
import yaml
|
||||
|
||||
# Metrics where larger is better
|
||||
MAXIMIZE_METRICS = [
|
||||
"d_seq_throughput",
|
||||
"d_token_throughput",
|
||||
"d_total_token_throughput",
|
||||
"d_user_throughput",
|
||||
"d_mean_tpot",
|
||||
"d_median_tpot",
|
||||
"d_p99_tpot",
|
||||
]
|
||||
|
||||
# Metrics where smaller is better
|
||||
MINIMIZE_METRICS = [
|
||||
"d_mean_ttft",
|
||||
"d_median_ttft",
|
||||
"d_p99_ttft",
|
||||
"d_mean_itl",
|
||||
"d_median_itl",
|
||||
"d_p99_itl",
|
||||
"d_mean_e2el",
|
||||
"d_median_e2el",
|
||||
"d_p99_e2el",
|
||||
]
|
||||
|
||||
|
||||
def _get_metric_keys():
|
||||
"""Get all metric-related keys for filtering config keys."""
|
||||
metric_keys = set()
|
||||
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
|
||||
metric_suffix = metric[2:] # Strip "d_" prefix
|
||||
metric_keys.add(metric)
|
||||
metric_keys.add(f"d_baseline_{metric_suffix}")
|
||||
metric_keys.add(f"d_threshold_post_merge_{metric_suffix}")
|
||||
metric_keys.add(f"d_threshold_pre_merge_{metric_suffix}")
|
||||
return metric_keys
|
||||
|
||||
|
||||
def _get_regression_content(data):
|
||||
"""Get regression info and config content as a list of lines."""
|
||||
lines = []
|
||||
if "s_regression_info" in data:
|
||||
lines.append("=== Regression Info ===")
|
||||
regression_info = data["s_regression_info"]
|
||||
for line in regression_info.split(","):
|
||||
lines.append(line)
|
||||
|
||||
metric_keys = _get_metric_keys()
|
||||
|
||||
lines.append("")
|
||||
lines.append("=== Config ===")
|
||||
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
|
||||
for key in config_keys:
|
||||
if key == "s_regression_info":
|
||||
continue
|
||||
value = data[key]
|
||||
lines.append(f'"{key}": {value}')
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def merge_regression_data(input_files):
|
||||
"""Read all yaml file paths and merge regression data."""
|
||||
yaml_files = [f.strip() for f in input_files.split(",") if f.strip()]
|
||||
|
||||
regression_dict = {}
|
||||
load_failures = 0
|
||||
|
||||
for yaml_file in yaml_files:
|
||||
try:
|
||||
# Path format: .../{stage_name}/{folder_name}/regression_data.yaml
|
||||
path_parts = yaml_file.replace("\\", "/").split("/")
|
||||
if len(path_parts) < 3:
|
||||
continue
|
||||
|
||||
stage_name = path_parts[-3]
|
||||
folder_name = path_parts[-2]
|
||||
|
||||
with open(yaml_file, "r", encoding="utf-8") as f:
|
||||
content = yaml.safe_load(f)
|
||||
if content is None or not isinstance(content, list):
|
||||
continue
|
||||
|
||||
filtered_data = [
|
||||
d for d in content if isinstance(d, dict) and "s_test_case_name" in d
|
||||
]
|
||||
|
||||
if not filtered_data:
|
||||
continue
|
||||
|
||||
if stage_name not in regression_dict:
|
||||
regression_dict[stage_name] = {}
|
||||
|
||||
if folder_name not in regression_dict[stage_name]:
|
||||
regression_dict[stage_name][folder_name] = []
|
||||
|
||||
regression_dict[stage_name][folder_name].extend(filtered_data)
|
||||
|
||||
except (OSError, yaml.YAMLError, UnicodeDecodeError) as e:
|
||||
load_failures += 1
|
||||
print(f"Warning: Failed to load {yaml_file}: {e}")
|
||||
continue
|
||||
|
||||
# Fail fast if caller provided inputs but none were readable/parseable.
|
||||
# (Keeps "no regressions found" working when yaml_files is empty.)
|
||||
if yaml_files and not regression_dict and load_failures == len(yaml_files):
|
||||
raise RuntimeError("Failed to load any regression YAML inputs; cannot generate report.")
|
||||
|
||||
return regression_dict
|
||||
|
||||
|
||||
def generate_html(regression_dict, output_file):
|
||||
"""Generate HTML report from regression data."""
|
||||
html_template = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Perf Regression Summary</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; margin: 10px; }}
|
||||
.suite-container {{
|
||||
margin-bottom: 20px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}}
|
||||
.suite-header {{
|
||||
padding: 10px;
|
||||
background: #f8f9fa;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}}
|
||||
.summary {{ margin-bottom: 10px; }}
|
||||
.regression {{ color: #d93025; }}
|
||||
.testcase {{
|
||||
border-left: 4px solid #d93025;
|
||||
margin: 5px 0;
|
||||
background: white;
|
||||
}}
|
||||
.test-details {{
|
||||
padding: 10px;
|
||||
background: #f5f5f5;
|
||||
border-radius: 3px;
|
||||
}}
|
||||
pre {{
|
||||
margin: 0;
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
background: #2b2b2b;
|
||||
color: #cccccc;
|
||||
padding: 10px;
|
||||
counter-reset: line;
|
||||
}}
|
||||
pre + pre {{
|
||||
border-top: none;
|
||||
padding-top: 0;
|
||||
}}
|
||||
pre span {{
|
||||
display: block;
|
||||
position: relative;
|
||||
padding-left: 4em;
|
||||
}}
|
||||
pre span:before {{
|
||||
counter-increment: line;
|
||||
content: counter(line);
|
||||
position: absolute;
|
||||
left: 0;
|
||||
width: 3em;
|
||||
text-align: right;
|
||||
color: #666;
|
||||
padding-right: 1em;
|
||||
}}
|
||||
details summary {{
|
||||
cursor: pointer;
|
||||
outline: none;
|
||||
}}
|
||||
details[open] summary {{
|
||||
margin-bottom: 10px;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Perf Regression Summary</h2>
|
||||
{test_suites}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
all_suites_html = []
|
||||
total_tests = 0
|
||||
|
||||
for stage_name in regression_dict:
|
||||
folder_dict = regression_dict[stage_name]
|
||||
# Count total tests for this stage
|
||||
tests_count = sum(len(data_list) for data_list in folder_dict.values())
|
||||
total_tests += tests_count
|
||||
|
||||
# Generate summary for the suite
|
||||
summary = f"""
|
||||
<div class="suite-header">
|
||||
<h3>Stage: {escape_html(stage_name)}</h3>
|
||||
<p><span class="regression">Regression Tests: {tests_count}</span></p>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Generate test case details for the suite
|
||||
test_cases_html = []
|
||||
|
||||
for folder_name, data_list in folder_dict.items():
|
||||
for data in data_list:
|
||||
test_case_name = data.get("s_test_case_name", "N/A")
|
||||
test_name = f"perf/test_perf_sanity.py::test_e2e[{folder_name}] - {test_case_name}"
|
||||
|
||||
# Get content lines
|
||||
content_lines = _get_regression_content(data)
|
||||
content_html = "".join(
|
||||
f"<span>{escape_html(line)}</span>" for line in content_lines
|
||||
)
|
||||
|
||||
details = f"""
|
||||
<details class="test-details">
|
||||
<summary>{escape_html(test_name)}</summary>
|
||||
<pre>{content_html}</pre>
|
||||
</details>
|
||||
"""
|
||||
|
||||
test_case_html = f"""
|
||||
<div class="testcase">
|
||||
{details}
|
||||
</div>
|
||||
"""
|
||||
test_cases_html.append(test_case_html)
|
||||
|
||||
# Combine summary and test cases for this suite
|
||||
suite_html = f"""
|
||||
<div class="suite-container">
|
||||
{summary}
|
||||
<div class="test-cases">
|
||||
{" ".join(test_cases_html)}
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
all_suites_html.append(suite_html)
|
||||
|
||||
# Generate complete HTML
|
||||
html_content = html_template.format(test_suites="\n".join(all_suites_html))
|
||||
|
||||
# Write to file
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
|
||||
print(f"Generated HTML report with {total_tests} regression entries: {output_file}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Merge perf regression info from YAML files into an HTML report."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input-files", type=str, required=True, help="Comma-separated list of YAML file paths"
|
||||
)
|
||||
parser.add_argument("--output-file", type=str, required=True, help="Output HTML file path")
|
||||
args = parser.parse_args()
|
||||
|
||||
regression_dict = merge_regression_data(args.input_files)
|
||||
generate_html(regression_dict, args.output_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -22,6 +22,7 @@ import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import yaml
|
||||
from defs.trt_test_alternative import print_info, print_warning
|
||||
|
||||
_project_root = os.path.abspath(
|
||||
@ -666,11 +667,14 @@ def _print_regression_data(data, print_func=None):
|
||||
print_func(f'"{key}": {value}')
|
||||
|
||||
|
||||
def check_perf_regression(new_data_dict, fail_on_regression=False):
|
||||
def check_perf_regression(new_data_dict,
|
||||
fail_on_regression=False,
|
||||
output_dir=None):
|
||||
"""
|
||||
Check performance regression by printing regression data from new_data_dict.
|
||||
If fail_on_regression is True, raises RuntimeError when regressions are found.
|
||||
(This is a temporary feature to fail regression tests. We are observing the stability and will fail them by default soon.)
|
||||
If output_dir is provided, saves regression data to regression_data.yaml.
|
||||
"""
|
||||
# Filter regression data from new_data_dict
|
||||
regressive_data_list = [
|
||||
@ -687,6 +691,25 @@ def check_perf_regression(new_data_dict, fail_on_regression=False):
|
||||
if not data.get("b_is_post_merge", False)
|
||||
]
|
||||
|
||||
# Save regression data to yaml file if output_dir is provided
|
||||
if output_dir is not None and len(regressive_data_list) > 0:
|
||||
regression_data_file = os.path.join(output_dir, "regression_data.yaml")
|
||||
with open(regression_data_file, 'w') as f:
|
||||
yaml.dump(regressive_data_list, f, default_flow_style=False)
|
||||
print_info(
|
||||
f"Saved {len(regressive_data_list)} regression data to {regression_data_file}"
|
||||
)
|
||||
|
||||
# TODO: This is a temporary code for testing. Will delete it before merging the PR.
|
||||
if output_dir is not None:
|
||||
all_data_file = os.path.join(output_dir, "regression_data.yaml")
|
||||
all_data_list = list(new_data_dict.values())
|
||||
with open(all_data_file, 'w') as f:
|
||||
yaml.dump(all_data_list, f, default_flow_style=False)
|
||||
print_info(
|
||||
f"[TEMP] Saved all {len(all_data_list)} data entries to {all_data_file}"
|
||||
)
|
||||
|
||||
# Print pre-merge regression data with print_warning
|
||||
if len(pre_merge_regressions) > 0:
|
||||
print_warning(
|
||||
|
||||
@ -1068,13 +1068,13 @@ class PerfSanityTestConfig:
|
||||
|
||||
def get_commands(self):
|
||||
"""Get commands based on runtime."""
|
||||
perf_sanity_output_dir = os.path.join(self._output_dir, self._test_param_labels)
|
||||
os.makedirs(perf_sanity_output_dir, exist_ok=True)
|
||||
self.perf_sanity_output_dir = os.path.join(self._output_dir, self._test_param_labels)
|
||||
os.makedirs(self.perf_sanity_output_dir, exist_ok=True)
|
||||
|
||||
if self.runtime == "aggr_server":
|
||||
return self._get_aggr_commands(perf_sanity_output_dir)
|
||||
return self._get_aggr_commands(self.perf_sanity_output_dir)
|
||||
elif self.runtime == "multi_node_disagg_server":
|
||||
return self._get_disagg_commands(perf_sanity_output_dir)
|
||||
return self._get_disagg_commands(self.perf_sanity_output_dir)
|
||||
|
||||
def _get_aggr_commands(self, output_dir: str):
|
||||
"""Get commands for aggregated server."""
|
||||
@ -1440,7 +1440,11 @@ class PerfSanityTestConfig:
|
||||
# Upload the new perf data and baseline data to database
|
||||
post_new_perf_data(new_baseline_data_dict, new_data_dict)
|
||||
|
||||
check_perf_regression(new_data_dict, fail_on_regression=is_scenario_mode)
|
||||
check_perf_regression(
|
||||
new_data_dict,
|
||||
fail_on_regression=is_scenario_mode,
|
||||
output_dir=self.perf_sanity_output_dir,
|
||||
)
|
||||
|
||||
|
||||
# Perf sanity test case parameters
|
||||
|
||||
Loading…
Reference in New Issue
Block a user