diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy index adbfc46baa..43f59cad2b 100644 --- a/jenkins/L0_MergeRequest.groovy +++ b/jenkins/L0_MergeRequest.groovy @@ -864,6 +864,30 @@ def collectTestResults(pipeline, testFilter) junit(testResults: '**/results*.xml', allowEmptyResults : true) } // Collect test result stage + stage("Collect Perf Regression Result") { + def yamlFiles = sh( + returnStdout: true, + script: 'find . -type f -name "regression_data.yaml" 2>/dev/null || true' + ).trim() + echo "Regression data yaml files: ${yamlFiles}" + if (yamlFiles) { + def yamlFileList = yamlFiles.split(/\s+/).collect { it.trim() }.findAll { it }.join(",") + echo "Found regression data files: ${yamlFileList}" + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add python3") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add py3-pip") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true") + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install pyyaml") + sh """ + python3 llm/jenkins/scripts/perf/perf_regression.py \ + --input-files=${yamlFileList} \ + --output-file=perf_regression.html + """ + trtllm_utils.uploadArtifacts("perf_regression.html", "${UPLOAD_PATH}/test-results/") + echo "Perf regression report: https://urm.nvidia.com/artifactory/${UPLOAD_PATH}/test-results/perf_regression.html" + } else { + echo "No regression_data.yaml files found." + } + } // Collect Perf Regression Result stage stage("Rerun Report") { sh "rm -rf rerun && mkdir -p rerun" sh "find . -type f -wholename '*/rerun_results.xml' -exec sh -c 'mv \"{}\" \"rerun/\$(basename \$(dirname \"{}\"))_rerun_results.xml\"' \\; || true" diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index eb251801eb..606463d160 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -124,6 +124,7 @@ def uploadResults(def pipeline, SlurmCluster cluster, String nodeName, String st def hasTimeoutTest = false def downloadResultSucceed = false + def downloadPerfResultSucceed = false pipeline.stage('Submit Test Result') { sh "mkdir -p ${stageName}" @@ -146,8 +147,28 @@ EOF_TIMEOUT_XML def resultsFilePath = "/home/svc_tensorrt/bloom/scripts/${nodeName}/results.xml" downloadResultSucceed = Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -P ${remote.port} -r -p ${COMMON_SSH_OPTIONS} ${remote.user}@${remote.host}:${resultsFilePath} ${stageName}/", returnStatus: true, numRetries: 3) == 0 - echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}" - if (hasTimeoutTest || downloadResultSucceed) { + // Download perf test results + def perfResultsBasePath = "/home/svc_tensorrt/bloom/scripts/${nodeName}" + def folderListOutput = Utils.exec( + pipeline, + script: Utils.sshUserCmd( + remote, + "\"find '${perfResultsBasePath}' -maxdepth 1 -type d \\( -name 'aggr*' -o -name 'disagg*' \\) -printf '%f\\n' || true\"" + ), + returnStdout: true, + numRetries: 3 + )?.trim() ?: "" + def perfFolders = folderListOutput.split(/\s+/).collect { it.trim().replaceAll(/\/$/, '') }.findAll { it } + echo "Perf Result Folders: ${perfFolders}" + if (perfFolders) { + def scpSources = perfFolders.size() == 1 + ? "${remote.user}@${remote.host}:${perfResultsBasePath}/${perfFolders[0]}" + : "${remote.user}@${remote.host}:{${perfFolders.collect { "${perfResultsBasePath}/${it}" }.join(',')}}" + downloadPerfResultSucceed = Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -P ${remote.port} -r -p ${COMMON_SSH_OPTIONS} ${scpSources} ${stageName}/", returnStatus: true, numRetries: 3) == 0 + } + + echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}, downloadPerfResultSucceed: ${downloadPerfResultSucceed}" + if (hasTimeoutTest || downloadResultSucceed || downloadPerfResultSucceed) { sh "ls ${stageName}" echo "Upload test results." sh "tar -czvf results-${stageName}.tar.gz ${stageName}/" diff --git a/jenkins/scripts/perf/perf_regression.py b/jenkins/scripts/perf/perf_regression.py new file mode 100644 index 0000000000..0f4a48db43 --- /dev/null +++ b/jenkins/scripts/perf/perf_regression.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +"""Merge perf regression info from multiple YAML files into an HTML report.""" + +import argparse +from html import escape as escape_html + +import yaml + +# Metrics where larger is better +MAXIMIZE_METRICS = [ + "d_seq_throughput", + "d_token_throughput", + "d_total_token_throughput", + "d_user_throughput", + "d_mean_tpot", + "d_median_tpot", + "d_p99_tpot", +] + +# Metrics where smaller is better +MINIMIZE_METRICS = [ + "d_mean_ttft", + "d_median_ttft", + "d_p99_ttft", + "d_mean_itl", + "d_median_itl", + "d_p99_itl", + "d_mean_e2el", + "d_median_e2el", + "d_p99_e2el", +] + + +def _get_metric_keys(): + """Get all metric-related keys for filtering config keys.""" + metric_keys = set() + for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS: + metric_suffix = metric[2:] # Strip "d_" prefix + metric_keys.add(metric) + metric_keys.add(f"d_baseline_{metric_suffix}") + metric_keys.add(f"d_threshold_post_merge_{metric_suffix}") + metric_keys.add(f"d_threshold_pre_merge_{metric_suffix}") + return metric_keys + + +def _get_regression_content(data): + """Get regression info and config content as a list of lines.""" + lines = [] + if "s_regression_info" in data: + lines.append("=== Regression Info ===") + regression_info = data["s_regression_info"] + for line in regression_info.split(","): + lines.append(line) + + metric_keys = _get_metric_keys() + + lines.append("") + lines.append("=== Config ===") + config_keys = sorted([key for key in data.keys() if key not in metric_keys]) + for key in config_keys: + if key == "s_regression_info": + continue + value = data[key] + lines.append(f'"{key}": {value}') + + return lines + + +def merge_regression_data(input_files): + """Read all yaml file paths and merge regression data.""" + yaml_files = [f.strip() for f in input_files.split(",") if f.strip()] + + regression_dict = {} + load_failures = 0 + + for yaml_file in yaml_files: + try: + # Path format: .../{stage_name}/{folder_name}/regression_data.yaml + path_parts = yaml_file.replace("\\", "/").split("/") + if len(path_parts) < 3: + continue + + stage_name = path_parts[-3] + folder_name = path_parts[-2] + + with open(yaml_file, "r", encoding="utf-8") as f: + content = yaml.safe_load(f) + if content is None or not isinstance(content, list): + continue + + filtered_data = [ + d for d in content if isinstance(d, dict) and "s_test_case_name" in d + ] + + if not filtered_data: + continue + + if stage_name not in regression_dict: + regression_dict[stage_name] = {} + + if folder_name not in regression_dict[stage_name]: + regression_dict[stage_name][folder_name] = [] + + regression_dict[stage_name][folder_name].extend(filtered_data) + + except (OSError, yaml.YAMLError, UnicodeDecodeError) as e: + load_failures += 1 + print(f"Warning: Failed to load {yaml_file}: {e}") + continue + + # Fail fast if caller provided inputs but none were readable/parseable. + # (Keeps "no regressions found" working when yaml_files is empty.) + if yaml_files and not regression_dict and load_failures == len(yaml_files): + raise RuntimeError("Failed to load any regression YAML inputs; cannot generate report.") + + return regression_dict + + +def generate_html(regression_dict, output_file): + """Generate HTML report from regression data.""" + html_template = """ + + + + Perf Regression Summary + + + +

Perf Regression Summary

+ {test_suites} + + + """ + + all_suites_html = [] + total_tests = 0 + + for stage_name in regression_dict: + folder_dict = regression_dict[stage_name] + # Count total tests for this stage + tests_count = sum(len(data_list) for data_list in folder_dict.values()) + total_tests += tests_count + + # Generate summary for the suite + summary = f""" +
+

Stage: {escape_html(stage_name)}

+

Regression Tests: {tests_count}

+
+ """ + + # Generate test case details for the suite + test_cases_html = [] + + for folder_name, data_list in folder_dict.items(): + for data in data_list: + test_case_name = data.get("s_test_case_name", "N/A") + test_name = f"perf/test_perf_sanity.py::test_e2e[{folder_name}] - {test_case_name}" + + # Get content lines + content_lines = _get_regression_content(data) + content_html = "".join( + f"{escape_html(line)}" for line in content_lines + ) + + details = f""" +
+ {escape_html(test_name)} +
{content_html}
+
+ """ + + test_case_html = f""" +
+ {details} +
+ """ + test_cases_html.append(test_case_html) + + # Combine summary and test cases for this suite + suite_html = f""" +
+ {summary} +
+ {" ".join(test_cases_html)} +
+
+ """ + all_suites_html.append(suite_html) + + # Generate complete HTML + html_content = html_template.format(test_suites="\n".join(all_suites_html)) + + # Write to file + with open(output_file, "w", encoding="utf-8") as f: + f.write(html_content) + + print(f"Generated HTML report with {total_tests} regression entries: {output_file}") + + +def main(): + parser = argparse.ArgumentParser( + description="Merge perf regression info from YAML files into an HTML report." + ) + parser.add_argument( + "--input-files", type=str, required=True, help="Comma-separated list of YAML file paths" + ) + parser.add_argument("--output-file", type=str, required=True, help="Output HTML file path") + args = parser.parse_args() + + regression_dict = merge_regression_data(args.input_files) + generate_html(regression_dict, args.output_file) + + +if __name__ == "__main__": + main() diff --git a/tests/integration/defs/perf/open_search_db_utils.py b/tests/integration/defs/perf/open_search_db_utils.py index f59b496e1b..c1dd130593 100644 --- a/tests/integration/defs/perf/open_search_db_utils.py +++ b/tests/integration/defs/perf/open_search_db_utils.py @@ -22,6 +22,7 @@ import sys import time from datetime import datetime +import yaml from defs.trt_test_alternative import print_info, print_warning _project_root = os.path.abspath( @@ -609,11 +610,14 @@ def _print_regression_data(data, print_func=None): print_func(f'"{key}": {value}') -def check_perf_regression(new_data_dict, fail_on_regression=False): +def check_perf_regression(new_data_dict, + fail_on_regression=False, + output_dir=None): """ Check performance regression by printing regression data from new_data_dict. If fail_on_regression is True, raises RuntimeError when regressions are found. (This is a temporary feature to fail regression tests. We are observing the stability and will fail them by default soon.) + If output_dir is provided, saves regression data to regression_data.yaml. """ # Filter regression data from new_data_dict regressive_data_list = [ @@ -630,6 +634,15 @@ def check_perf_regression(new_data_dict, fail_on_regression=False): if not data.get("b_is_post_merge", False) ] + # Save regression data to yaml file if output_dir is provided + if output_dir is not None and len(regressive_data_list) > 0: + regression_data_file = os.path.join(output_dir, "regression_data.yaml") + with open(regression_data_file, 'w') as f: + yaml.dump(regressive_data_list, f, default_flow_style=False) + print_info( + f"Saved {len(regressive_data_list)} regression data to {regression_data_file}" + ) + # Print pre-merge regression data with print_warning if len(pre_merge_regressions) > 0: print_warning( diff --git a/tests/integration/defs/perf/test_perf_sanity.py b/tests/integration/defs/perf/test_perf_sanity.py index 387fd935d9..2dfb7b0515 100644 --- a/tests/integration/defs/perf/test_perf_sanity.py +++ b/tests/integration/defs/perf/test_perf_sanity.py @@ -1034,13 +1034,13 @@ class PerfSanityTestConfig: def get_commands(self): """Get commands based on runtime.""" - perf_sanity_output_dir = os.path.join(self._output_dir, self._test_param_labels) - os.makedirs(perf_sanity_output_dir, exist_ok=True) + self.perf_sanity_output_dir = os.path.join(self._output_dir, self._test_param_labels) + os.makedirs(self.perf_sanity_output_dir, exist_ok=True) if self.runtime == "aggr_server": - return self._get_aggr_commands(perf_sanity_output_dir) + return self._get_aggr_commands(self.perf_sanity_output_dir) elif self.runtime == "multi_node_disagg_server": - return self._get_disagg_commands(perf_sanity_output_dir) + return self._get_disagg_commands(self.perf_sanity_output_dir) def _get_aggr_commands(self, output_dir: str): """Get commands for aggregated server.""" @@ -1406,7 +1406,11 @@ class PerfSanityTestConfig: # Upload the new perf data and baseline data to database post_new_perf_data(new_baseline_data_dict, new_data_dict) - check_perf_regression(new_data_dict, fail_on_regression=is_scenario_mode) + check_perf_regression( + new_data_dict, + fail_on_regression=is_scenario_mode, + output_dir=self.perf_sanity_output_dir, + ) # Perf sanity test case parameters