#!/usr/bin/env python3 """Merge perf regression info from multiple YAML files into an HTML report.""" import argparse from html import escape as escape_html import yaml # Metrics where larger is better MAXIMIZE_METRICS = [ "d_seq_throughput", "d_token_throughput", "d_total_token_throughput", "d_user_throughput", "d_mean_tpot", "d_median_tpot", "d_p99_tpot", ] # Metrics where smaller is better MINIMIZE_METRICS = [ "d_mean_ttft", "d_median_ttft", "d_p99_ttft", "d_mean_itl", "d_median_itl", "d_p99_itl", "d_mean_e2el", "d_median_e2el", "d_p99_e2el", ] def _get_metric_keys(): """Get all metric-related keys for filtering config keys.""" metric_keys = set() for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS: metric_suffix = metric[2:] # Strip "d_" prefix metric_keys.add(metric) metric_keys.add(f"d_baseline_{metric_suffix}") metric_keys.add(f"d_threshold_post_merge_{metric_suffix}") metric_keys.add(f"d_threshold_pre_merge_{metric_suffix}") return metric_keys def _get_regression_content(data): """Get regression info and config content as a list of lines.""" lines = [] if "s_regression_info" in data: lines.append("=== Regression Info ===") regression_info = data["s_regression_info"] for line in regression_info.split(","): lines.append(line) metric_keys = _get_metric_keys() lines.append("") lines.append("=== Config ===") config_keys = sorted([key for key in data.keys() if key not in metric_keys]) for key in config_keys: if key == "s_regression_info": continue value = data[key] lines.append(f'"{key}": {value}') return lines def merge_regression_data(input_files): """Read all yaml file paths and merge regression data.""" yaml_files = [f.strip() for f in input_files.split(",") if f.strip()] regression_dict = {} load_failures = 0 for yaml_file in yaml_files: try: # Path format: .../{stage_name}/{folder_name}/regression_data.yaml path_parts = yaml_file.replace("\\", "/").split("/") if len(path_parts) < 3: continue stage_name = path_parts[-3] folder_name = path_parts[-2] with open(yaml_file, "r", encoding="utf-8") as f: content = yaml.safe_load(f) if content is None or not isinstance(content, list): continue filtered_data = [ d for d in content if isinstance(d, dict) and "s_test_case_name" in d ] if not filtered_data: continue if stage_name not in regression_dict: regression_dict[stage_name] = {} if folder_name not in regression_dict[stage_name]: regression_dict[stage_name][folder_name] = [] regression_dict[stage_name][folder_name].extend(filtered_data) except (OSError, yaml.YAMLError, UnicodeDecodeError) as e: load_failures += 1 print(f"Warning: Failed to load {yaml_file}: {e}") continue # Fail fast if caller provided inputs but none were readable/parseable. # (Keeps "no regressions found" working when yaml_files is empty.) if yaml_files and not regression_dict and load_failures == len(yaml_files): raise RuntimeError("Failed to load any regression YAML inputs; cannot generate report.") return regression_dict def generate_html(regression_dict, output_file): """Generate HTML report from regression data.""" html_template = """
Regression Tests: {tests_count}
{content_html}