mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-02 01:01:35 +08:00
Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com> Signed-off-by: fredricz-20070104 <226039983+fredricz-20070104@users.noreply.github.com>
562 lines
22 KiB
Python
562 lines
22 KiB
Python
"""YAML Configuration Loader with Default Metrics Support."""
|
|
|
|
import copy
|
|
import os
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
import yaml
|
|
from reporting.accuracy_validator import DatasetThreshold
|
|
|
|
from utils.common import EnvManager
|
|
from utils.logger import logger
|
|
|
|
|
|
@dataclass
|
|
class MetricsConfig:
|
|
"""Metrics configuration."""
|
|
|
|
log_file: str # Log file name
|
|
extractor_pattern: str # Regular expression
|
|
metric_names: List[str] # Metric names list
|
|
|
|
def merge(self, override: Optional[Dict]) -> "MetricsConfig":
|
|
"""Merge with override dict.
|
|
|
|
Args:
|
|
override: Dict with optional keys: log_file, extractor_pattern, metric_names
|
|
|
|
Returns:
|
|
New MetricsConfig with overridden values
|
|
"""
|
|
if not override:
|
|
return self
|
|
|
|
return MetricsConfig(
|
|
log_file=override.get("log_file", self.log_file),
|
|
extractor_pattern=override.get("extractor_pattern", self.extractor_pattern),
|
|
metric_names=override.get("metric_names", self.metric_names),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class AccuracyConfig:
|
|
"""Accuracy test configuration (supports multiple datasets)."""
|
|
|
|
datasets: List[DatasetThreshold] # List of dataset threshold configurations
|
|
|
|
def get_dataset_config(self, dataset_name: str) -> Optional[DatasetThreshold]:
|
|
"""Get configuration by dataset name.
|
|
|
|
Args:
|
|
dataset_name: Name of the dataset to look up
|
|
|
|
Returns:
|
|
DatasetThreshold config if found, None otherwise
|
|
"""
|
|
for ds in self.datasets:
|
|
if ds.dataset_name.lower() == dataset_name.lower():
|
|
return ds
|
|
return None
|
|
|
|
def get_all_dataset_names(self) -> List[str]:
|
|
"""Get all configured dataset names.
|
|
|
|
Returns:
|
|
List of dataset names
|
|
"""
|
|
return [ds.dataset_name for ds in self.datasets]
|
|
|
|
|
|
# ============================================================================
|
|
# Default Metrics configuration
|
|
# ============================================================================
|
|
|
|
# Accuracy test uses accuracy_eval.log (markdown table output from lm_eval)
|
|
# Note: Only log_file is used by AccuracyParser (accuracy_parser.py)
|
|
# The regex pattern is hardcoded in AccuracyParser._extract_accuracy_values()
|
|
_COMMON_ACCURACY_METRICS = MetricsConfig(
|
|
log_file="7_accuracy_eval.log",
|
|
extractor_pattern=r"\|([a-zA-Z0-9_-]+)\|.*?\|([\w-]+)\|.*?\|exact_match\|.*?\|([0-9.]+)\|",
|
|
metric_names=["flexible-extract", "strict-match"],
|
|
)
|
|
|
|
DEFAULT_METRICS_CONFIG = {
|
|
# Performance test default configuration
|
|
("disagg", "perf"): MetricsConfig(
|
|
log_file="6_bench.log",
|
|
extractor_pattern=r"""
|
|
^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Benchmark\ with\ concurrency\ (\d+)\ done
|
|
""",
|
|
metric_names=["SERVER_MEDIAN_TTFT", "SERVER_MEDIAN_E2EL"],
|
|
),
|
|
("wideep", "perf"): MetricsConfig(
|
|
log_file="6_bench.log",
|
|
extractor_pattern=r"""
|
|
^.*?Mean\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Median\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?P99\ TTFT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Mean\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Median\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?P99\ TPOT\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Mean\ ITL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Median\ ITL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?P99\ ITL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Mean\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Median\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?P99\ E2EL\ \(ms\):\s+([0-9.]+).*?$\n
|
|
(?:.*\n)*?
|
|
^.*?Benchmark\ with\ concurrency\ (\d+)\ done
|
|
""",
|
|
metric_names=[
|
|
"SERVER_MEAN_TTFT",
|
|
"SERVER_MEDIAN_TTFT", # Median TTFT (keep the same name as disagg)
|
|
"SERVER_P99_TTFT",
|
|
"SERVER_MEAN_TPOT",
|
|
"SERVER_MEDIAN_TPOT",
|
|
"SERVER_P99_TPOT",
|
|
"SERVER_MEAN_ITL",
|
|
"SERVER_MEDIAN_ITL",
|
|
"SERVER_P99_ITL",
|
|
"SERVER_MEAN_E2EL",
|
|
"SERVER_E2EL", # Median E2EL (keep the same name as disagg)
|
|
"SERVER_P99_E2EL",
|
|
],
|
|
),
|
|
# Accuracy test configuration
|
|
("disagg", "accuracy"): _COMMON_ACCURACY_METRICS,
|
|
("wideep", "accuracy"): _COMMON_ACCURACY_METRICS,
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class TestConfig:
|
|
"""Test configuration data class."""
|
|
|
|
config_path: str # YAML file path
|
|
temp_config_path: str # Temporary config file path (pre-calculated, not created yet)
|
|
test_id: str # Auto-generated test ID
|
|
test_type: str # disagg, widep, etc.
|
|
model_name: str # Model name (read from metadata)
|
|
test_category: str # perf or accuracy
|
|
benchmark_type: str # 1k1k, 8k1k, etc. (generated from sequence)
|
|
config_data: dict # Full YAML content
|
|
metrics_config: MetricsConfig # Metrics configuration (default or overridden)
|
|
supported_gpus: List[str] # Supported GPU types list
|
|
accuracy_config: Optional[AccuracyConfig] = None # Accuracy configuration (for accuracy tests)
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
"""Display name for pytest."""
|
|
return f"{self.test_type}/{self.test_category}/{Path(self.config_path).stem}"
|
|
|
|
|
|
class ConfigLoader:
|
|
"""Configuration loader with default metrics support."""
|
|
|
|
def __init__(self, base_dir: str = "test_configs"):
|
|
"""Initialize ConfigLoader.
|
|
|
|
Args:
|
|
base_dir: Base directory for test configs
|
|
"""
|
|
self.base_dir = Path(base_dir)
|
|
if not self.base_dir.exists():
|
|
raise FileNotFoundError(f"Config directory not found: {self.base_dir}")
|
|
|
|
def scan_configs(
|
|
self,
|
|
test_type: Optional[str] = None,
|
|
test_category: Optional[str] = None,
|
|
model_name: Optional[str] = None,
|
|
gpu_type: Optional[str] = None,
|
|
) -> List[TestConfig]:
|
|
"""Scan configuration files.
|
|
|
|
Directory structure: test_type/category/model_bench_config.yaml
|
|
|
|
Args:
|
|
test_type: Filter by test type (disagg, widep, etc.)
|
|
test_category: Filter by category (perf, accuracy)
|
|
model_name: Filter by model name
|
|
gpu_type: Filter by GPU type (GB200, H100, etc.). If None, uses EnvManager.get_gpu_type()
|
|
|
|
Returns:
|
|
List of TestConfig objects (filtered by GPU support)
|
|
"""
|
|
# Get current GPU type from environment if not specified
|
|
if gpu_type is None:
|
|
gpu_type = EnvManager.get_gpu_type()
|
|
|
|
configs = []
|
|
|
|
if not self.base_dir.exists():
|
|
logger.warning(f"Config directory not found: {self.base_dir}")
|
|
return configs
|
|
|
|
# Traverse: test_type/category/config.yaml
|
|
for test_type_dir in self.base_dir.iterdir():
|
|
if not test_type_dir.is_dir() or test_type_dir.name == "templates":
|
|
continue
|
|
|
|
current_test_type = test_type_dir.name
|
|
|
|
# Filter by test_type
|
|
if test_type and current_test_type != test_type:
|
|
continue
|
|
|
|
# Traverse category (perf/accuracy)
|
|
for category_dir in test_type_dir.iterdir():
|
|
if not category_dir.is_dir():
|
|
continue
|
|
|
|
current_category = category_dir.name
|
|
|
|
# Filter by test_category
|
|
if test_category and current_category != test_category:
|
|
continue
|
|
|
|
# Load all YAML files in this category
|
|
for yaml_file in category_dir.glob("*.yaml"):
|
|
try:
|
|
config = self._load_config_file(
|
|
yaml_file, current_test_type, current_category
|
|
)
|
|
|
|
# Filter by model_name
|
|
if model_name and config.model_name != model_name:
|
|
continue
|
|
|
|
# Filter by GPU support
|
|
if gpu_type and gpu_type not in config.supported_gpus:
|
|
logger.info(
|
|
f"Skipping {yaml_file.name}: not supported on {gpu_type} "
|
|
f"(supported: {config.supported_gpus})"
|
|
)
|
|
continue
|
|
|
|
configs.append(config)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load {yaml_file}: {e}")
|
|
|
|
# Check if any configuration files are found
|
|
if len(configs) == 0:
|
|
# Build detailed error information
|
|
filter_info = []
|
|
if test_type:
|
|
filter_info.append(f"test_type='{test_type}'")
|
|
if test_category:
|
|
filter_info.append(f"test_category='{test_category}'")
|
|
if model_name:
|
|
filter_info.append(f"model_name='{model_name}'")
|
|
if gpu_type:
|
|
filter_info.append(f"gpu_type='{gpu_type}'")
|
|
|
|
filters = ", ".join(filter_info) if filter_info else "no filters"
|
|
|
|
raise RuntimeError(
|
|
f"No configuration files found in '{self.base_dir}' with {filters}. "
|
|
f"Please check:\n"
|
|
f" 1. Configuration files exist in the correct directory structure\n"
|
|
f" 2. YAML files contain valid 'metadata' section with required fields\n"
|
|
f" 3. GPU type '{gpu_type}' is in the 'supported_gpus' list\n"
|
|
f" 4. Filter parameters match existing configurations"
|
|
)
|
|
|
|
logger.success(f"Loaded {len(configs)} configurations for GPU type: {gpu_type}")
|
|
return configs
|
|
|
|
def _make_test_id(self, test_type: str, test_category: str, test_file_name: str) -> str:
|
|
"""Generate test ID from test type, category, and filename.
|
|
|
|
Since YAML filenames now contain all configuration info in a standardized format,
|
|
we simply combine test_type, test_category, and the filename.
|
|
|
|
Format: {test_type}_{test_category}_{test_file_name}
|
|
Example: disagg_perf_deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL
|
|
|
|
Args:
|
|
test_type: Test type (disagg, widep, etc.)
|
|
test_category: Test category (perf, accuracy)
|
|
test_file_name: Test file name (without extension, contains all config info)
|
|
|
|
Returns:
|
|
Generated test ID string
|
|
"""
|
|
# Simplified: just combine test_type, test_category, and filename
|
|
# The filename already contains all necessary configuration details
|
|
test_id = f"{test_type}_{test_category}_{test_file_name}"
|
|
|
|
return test_id
|
|
|
|
def _load_config_file(self, yaml_path: Path, test_type: str, test_category: str) -> TestConfig:
|
|
"""Load single YAML config file."""
|
|
with open(yaml_path, "r") as f:
|
|
config_data = yaml.safe_load(f)
|
|
|
|
# Extract metadata from YAML file
|
|
metadata = config_data.get("metadata", {})
|
|
model_name = metadata.get("model_name", "unknown")
|
|
supported_gpus = metadata.get("supported_gpus", ["GB200", "GB300", "H100", "B200", "B300"])
|
|
|
|
# Override config with environment variables (in memory only, do not write back)
|
|
config_data = self._apply_env_overrides(config_data, model_name)
|
|
|
|
# Generate benchmark_type from sequence configuration
|
|
benchmark_type = self._generate_benchmark_type(config_data)
|
|
|
|
# Get metrics config (default or override)
|
|
metrics_config = self._get_metrics_config(test_type, test_category, config_data)
|
|
|
|
# Extract test file name (without extension)
|
|
test_file_name = yaml_path.stem # e.g., "deepseek-r1-fp4-0"
|
|
|
|
# Generate test ID using config data
|
|
test_id = self._make_test_id(test_type, test_category, test_file_name)
|
|
|
|
# Pre-calculate temporary config file path (not created yet, will be created in submit_job)
|
|
temp_filename = f"{yaml_path.stem}-temp.yaml"
|
|
temp_config_path = str(yaml_path.parent / temp_filename)
|
|
|
|
# Load accuracy configuration (only for accuracy tests)
|
|
accuracy_config = None
|
|
if test_category == "accuracy":
|
|
acc_meta = metadata.get("accuracy", {})
|
|
if acc_meta and "datasets" in acc_meta:
|
|
datasets = []
|
|
for ds_config in acc_meta["datasets"]:
|
|
# Parse optional hypothesis testing parameters
|
|
alpha = ds_config.get("alpha")
|
|
beta = ds_config.get("beta")
|
|
sigma = ds_config.get("sigma")
|
|
num_samples = ds_config.get("num_samples")
|
|
higher_is_better = ds_config.get("higher_is_better")
|
|
|
|
# Convert to appropriate types if present
|
|
if alpha is not None:
|
|
alpha = float(alpha)
|
|
if beta is not None:
|
|
beta = float(beta)
|
|
if sigma is not None:
|
|
sigma = float(sigma)
|
|
if num_samples is not None:
|
|
num_samples = int(num_samples)
|
|
if higher_is_better is not None:
|
|
higher_is_better = bool(higher_is_better)
|
|
|
|
datasets.append(
|
|
DatasetThreshold(
|
|
dataset_name=ds_config.get("name", "gsm8k"),
|
|
expected_value=float(ds_config.get("expected_value", 0.0)),
|
|
threshold_type=ds_config.get("threshold_type", "hypothesis_test"),
|
|
filter_type=ds_config.get("filter_type", "flexible-extract"),
|
|
alpha=alpha,
|
|
beta=beta,
|
|
sigma=sigma,
|
|
num_samples=num_samples,
|
|
higher_is_better=higher_is_better,
|
|
)
|
|
)
|
|
accuracy_config = AccuracyConfig(datasets=datasets)
|
|
logger.info(f"Loaded accuracy config with {len(datasets)} dataset(s)")
|
|
|
|
return TestConfig(
|
|
config_path=str(yaml_path),
|
|
temp_config_path=temp_config_path,
|
|
test_id=test_id,
|
|
test_type=test_type,
|
|
model_name=model_name,
|
|
test_category=test_category,
|
|
benchmark_type=benchmark_type,
|
|
config_data=config_data,
|
|
metrics_config=metrics_config,
|
|
supported_gpus=supported_gpus,
|
|
accuracy_config=accuracy_config,
|
|
)
|
|
|
|
def _generate_benchmark_type(self, config_data: dict) -> str:
|
|
"""Generate benchmark type from sequence configuration.
|
|
|
|
Examples:
|
|
input=1024, output=1024 -> "1k1k"
|
|
input=8192, output=1024 -> "8k1k"
|
|
input=16384, output=2048 -> "16k2k"
|
|
|
|
Args:
|
|
config_data: Full YAML config data
|
|
|
|
Returns:
|
|
Benchmark type string (e.g., "1k1k", "8k1k")
|
|
"""
|
|
sequence = config_data.get("sequence", {})
|
|
input_length = sequence.get("input_length", 0)
|
|
output_length = sequence.get("output_length", 0)
|
|
|
|
# Convert to k notation
|
|
input_k = input_length // 1024
|
|
output_k = output_length // 1024
|
|
|
|
return f"{input_k}k{output_k}k"
|
|
|
|
def _get_metrics_config(
|
|
self, test_type: str, test_category: str, config_data: dict
|
|
) -> MetricsConfig:
|
|
"""Get metrics config: use default or merge with override.
|
|
|
|
Args:
|
|
test_category: 'perf' or 'accuracy'
|
|
config_data: Full YAML config data
|
|
|
|
Returns:
|
|
MetricsConfig (default or merged with overrides)
|
|
"""
|
|
# Get default configuration
|
|
config_key = (test_type, test_category)
|
|
default_config = DEFAULT_METRICS_CONFIG.get(config_key)
|
|
if not default_config:
|
|
# If no default configuration, trigger exception
|
|
logger.warning(f"No default metrics config for config_key: {config_key}")
|
|
raise ValueError(f"No default metrics config for config_key: {config_key}")
|
|
|
|
# Check if there are metrics overrides in YAML
|
|
# Metrics are defined in metadata section instead of benchmark
|
|
metadata_config = config_data.get("metadata", {})
|
|
metrics_override = metadata_config.get("metrics")
|
|
|
|
if metrics_override:
|
|
# There are metrics overrides, merge them
|
|
logger.debug("Using custom metrics config (overriding defaults)")
|
|
return default_config.merge(metrics_override)
|
|
else:
|
|
# No metrics overrides, use default
|
|
logger.debug(f"Using default metrics config for {test_category}")
|
|
return default_config
|
|
|
|
def _apply_env_overrides(self, config_data: dict, model_name: str) -> dict:
|
|
"""Apply environment variable overrides to configuration.
|
|
|
|
Intelligently replaces empty or None values based on field path.
|
|
No placeholders needed - automatically matches by field name.
|
|
|
|
Args:
|
|
config_data: Original configuration dict
|
|
|
|
Returns:
|
|
Updated configuration dict with environment variables applied
|
|
"""
|
|
# Create a deep copy to avoid modifying original
|
|
config = copy.deepcopy(config_data)
|
|
|
|
# Field path mapping: (path, key) -> environment value getter
|
|
# Uses lazy evaluation to get values only when needed
|
|
field_mapping = {
|
|
("slurm", "partition"): lambda: EnvManager.get_slurm_partition(),
|
|
("slurm", "account"): lambda: EnvManager.get_slurm_account(),
|
|
("slurm", "job_name"): lambda: EnvManager.get_slurm_job_name(),
|
|
("environment", "container_mount"): lambda: EnvManager.get_container_mount(model_name),
|
|
("environment", "container_image"): lambda: EnvManager.get_container_image(),
|
|
("environment", "trtllm_repo"): lambda: EnvManager.get_repo_dir(),
|
|
("environment", "trtllm_wheel_path"): lambda: EnvManager.get_trtllm_wheel_path(),
|
|
("benchmark", "dataset_file"): lambda: self._get_dataset_file(config),
|
|
("environment", "work_dir"): lambda: EnvManager.get_script_dir(),
|
|
("environment", "model_path"): lambda: self._get_full_model_path(config),
|
|
("slurm", "script_file"): lambda: self._get_script_file(config),
|
|
("slurm", "set_segment"): lambda: EnvManager.get_slurm_set_segment(),
|
|
("slurm", "extra_args"): lambda: EnvManager.get_slurm_extra_args(),
|
|
}
|
|
|
|
# Apply overrides based on field paths
|
|
for (section, key), value_getter in field_mapping.items():
|
|
if section in config:
|
|
config[section][key] = value_getter()
|
|
return config
|
|
|
|
def _get_full_model_path(self, config: dict) -> str:
|
|
"""Get full model path by combining MODEL_DIR with model directory name.
|
|
|
|
Priority:
|
|
1. metadata.model_dir_name (explicit model directory name)
|
|
2. Empty string (fallback)
|
|
"""
|
|
metadata = config.get("metadata", {})
|
|
model_dir_name = metadata.get("model_dir_name", "")
|
|
|
|
if model_dir_name:
|
|
return os.path.join(EnvManager.get_model_dir(), model_dir_name)
|
|
else:
|
|
return ""
|
|
|
|
def _get_dataset_file(self, config: dict) -> str:
|
|
"""Get dataset file by combining dataset directory with dataset file name.
|
|
|
|
Args:
|
|
config: Full YAML config data
|
|
"""
|
|
metadata = config.get("metadata", {})
|
|
dataset_file = metadata.get("dataset_file", "")
|
|
return os.path.join(EnvManager.get_dataset_dir(), dataset_file)
|
|
|
|
def _get_script_file(self, config: dict) -> str:
|
|
"""Get script file by combining scripts directory with script file name.
|
|
|
|
Args:
|
|
config: Full YAML config data
|
|
"""
|
|
metadata = config.get("metadata", {})
|
|
script_file = metadata.get("script_file", "disaggr_torch.slurm")
|
|
return os.path.join(EnvManager.get_script_dir(), script_file)
|
|
|
|
def _write_config_file(self, yaml_path: Path, config_data: dict) -> None:
|
|
"""Write updated configuration back to YAML file.
|
|
|
|
This is necessary because submit.py reads the YAML file to submit jobs.
|
|
The file needs to contain actual values, not empty placeholders.
|
|
|
|
Args:
|
|
yaml_path: Path to the YAML file
|
|
config_data: Updated configuration dict with environment values applied
|
|
"""
|
|
try:
|
|
with open(yaml_path, "w") as f:
|
|
yaml.dump(
|
|
config_data,
|
|
f,
|
|
default_flow_style=False,
|
|
sort_keys=False,
|
|
allow_unicode=True,
|
|
width=1000, # Prevent line wrapping
|
|
)
|
|
logger.success(f"Updated config written to: {yaml_path.name}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to write config file {yaml_path}: {e}")
|
|
# Don't fail the test if write fails, just log warning
|
|
|
|
def get_all_models(self) -> List[str]:
|
|
"""Get list of all unique model names."""
|
|
configs = self.scan_configs()
|
|
return sorted(set(config.model_name for config in configs))
|
|
|
|
def get_all_test_types(self) -> List[str]:
|
|
"""Get list of all test types."""
|
|
if not self.base_dir.exists():
|
|
return []
|
|
return sorted(
|
|
[d.name for d in self.base_dir.iterdir() if d.is_dir() and d.name != "templates"]
|
|
)
|