mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
239 lines
8.6 KiB
Python
239 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Generate a performance regression test list from the config database.
|
|
|
|
This script:
|
|
1. Reads recipes from the examples/configs/database directory
|
|
2. Generates test config files per GPU type (e.g., config_database_b200_nvl.yaml)
|
|
3. Generates llm_config_database.yml test list with condition blocks grouped by GPU name and count
|
|
"""
|
|
|
|
import copy
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from examples.configs.database.database import (
|
|
DATABASE_LIST_PATH,
|
|
Recipe,
|
|
RecipeList,
|
|
select_key_recipes,
|
|
)
|
|
|
|
REPO_ROOT = Path(__file__).parent.parent
|
|
PERF_SANITY_DIR = REPO_ROOT / "tests" / "scripts" / "perf-sanity"
|
|
TEST_LIST_PATH = (
|
|
REPO_ROOT / "tests" / "integration" / "test_lists" / "qa" / "llm_config_database.yml"
|
|
)
|
|
ITERATIONS = 10
|
|
# Mapping from HuggingFace model IDs to MODEL_PATH_DICT keys used by the test framework
|
|
# in tests/integration/defs/perf/test_perf_sanity.py
|
|
MODEL_NAME_MAPPING = {
|
|
"deepseek-ai/DeepSeek-R1-0528": "deepseek_r1_0528_fp8",
|
|
"nvidia/DeepSeek-R1-0528-FP4-v2": "deepseek_r1_0528_fp4_v2",
|
|
"openai/gpt-oss-120b": "gpt_oss_120b_fp4",
|
|
}
|
|
|
|
|
|
# GPU type to condition wildcards mapping for test list
|
|
# Note: cpu is used to distinguish between e.g. H200_SXM and GH200
|
|
GPU_WILDCARDS = {
|
|
"B200_NVL": {"gpu": ["*b200*"], "cpu": "x86_64", "linux_distribution_name": "ubuntu*"},
|
|
"H200_SXM": {"gpu": ["*h200*"], "cpu": "x86_64", "linux_distribution_name": "ubuntu*"},
|
|
"H100_SXM": {"gpu": ["*h100*"], "cpu": "x86_64", "linux_distribution_name": "ubuntu*"},
|
|
"GH200": {"gpu": ["*gh200*"], "cpu": "aarch64", "linux_distribution_name": "ubuntu*"},
|
|
"GB200": {"gpu": ["*gb200*"], "cpu": "aarch64", "linux_distribution_name": "ubuntu*"},
|
|
}
|
|
|
|
|
|
def generate_server_name(recipe: Recipe) -> str:
|
|
"""Generate a unique server name from recipe."""
|
|
model_slug = recipe.model.replace("/", "_").replace("-", "_").replace(".", "_")
|
|
return f"{model_slug}_{recipe.isl}_{recipe.osl}_conc{recipe.concurrency}_gpu{recipe.num_gpus}"
|
|
|
|
|
|
def generate_client_name(recipe: Recipe) -> str:
|
|
"""Generate client config name."""
|
|
return f"con{recipe.concurrency}_isl{recipe.isl}_osl{recipe.osl}"
|
|
|
|
|
|
def recipe_to_server_config(recipe: Recipe, llm_api_config: dict) -> dict:
|
|
"""Convert a recipe + LLM API config to aggr_server format."""
|
|
model_name = MODEL_NAME_MAPPING.get(recipe.model)
|
|
if not model_name:
|
|
raise ValueError(f"Model not found in MODEL_NAME_MAPPING: {recipe.model}")
|
|
|
|
server_config = {
|
|
"name": generate_server_name(recipe),
|
|
"model_name": model_name,
|
|
"gpus": recipe.num_gpus,
|
|
# Enable scenario-only matching for baseline comparison
|
|
"match_mode": "scenario",
|
|
}
|
|
|
|
# Copy LLM API config fields
|
|
for key, value in llm_api_config.items():
|
|
server_config[key] = value
|
|
|
|
# Disable KV cache reuse to ensure consistency
|
|
if "kv_cache_config" not in server_config:
|
|
server_config["kv_cache_config"] = {}
|
|
server_config["kv_cache_config"]["enable_block_reuse"] = False
|
|
|
|
# Add client configs
|
|
server_config["client_configs"] = [
|
|
{
|
|
"name": generate_client_name(recipe),
|
|
"concurrency": recipe.concurrency,
|
|
"iterations": ITERATIONS,
|
|
"isl": recipe.isl,
|
|
"osl": recipe.osl,
|
|
"random_range_ratio": 0.0, # Fixed ISL/OSL for reproducibility
|
|
"backend": "openai",
|
|
"streaming": True,
|
|
}
|
|
]
|
|
|
|
return server_config
|
|
|
|
|
|
def group_recipes_by_scenario(recipes: RecipeList) -> dict:
|
|
"""Group recipes by scenario key (model, gpu, isl, osl, num_gpus)."""
|
|
groups = defaultdict(list)
|
|
for recipe in recipes:
|
|
key = (recipe.model, recipe.gpu, recipe.isl, recipe.osl, recipe.num_gpus)
|
|
groups[key].append(recipe)
|
|
return groups
|
|
|
|
|
|
def filter_to_key_recipes(recipes: RecipeList) -> list[Recipe]:
|
|
"""Filter recipes to only key configs (min latency, balanced, max throughput)."""
|
|
scenario_groups = group_recipes_by_scenario(recipes)
|
|
key_recipes = []
|
|
for scenario_recipes in scenario_groups.values():
|
|
for recipe, _ in select_key_recipes(scenario_recipes):
|
|
key_recipes.append(recipe)
|
|
return key_recipes
|
|
|
|
|
|
def group_recipes_by_gpu(recipes: list[Recipe]) -> dict[str, list[Recipe]]:
|
|
"""Group recipes by GPU type."""
|
|
groups = defaultdict(list)
|
|
for recipe in recipes:
|
|
groups[recipe.gpu].append(recipe)
|
|
return groups
|
|
|
|
|
|
def group_recipes_by_num_gpus(recipes: list[Recipe]) -> dict[int, list[Recipe]]:
|
|
"""Group recipes by num_gpus within a GPU type."""
|
|
groups = defaultdict(list)
|
|
for recipe in recipes:
|
|
groups[recipe.num_gpus].append(recipe)
|
|
return groups
|
|
|
|
|
|
def generate_aggr_config(recipes: list[Recipe]) -> dict[str, list[dict]]:
|
|
"""Generate aggr_server config from recipes."""
|
|
server_configs = []
|
|
|
|
for recipe in recipes:
|
|
llm_api_config = recipe.load_config()
|
|
server_config = recipe_to_server_config(recipe, llm_api_config)
|
|
server_configs.append(server_config)
|
|
|
|
return {"server_configs": server_configs}
|
|
|
|
|
|
def generate_condition_entry(
|
|
gpu_name: str, num_gpus: int, config_name: str, server_names: list
|
|
) -> dict:
|
|
# using copy.deepcopy to avoid creating YAML anchors
|
|
wildcards = copy.deepcopy(GPU_WILDCARDS[gpu_name])
|
|
condition = {
|
|
"wildcards": wildcards,
|
|
"ranges": {"system_gpu_count": {"gte": num_gpus}},
|
|
}
|
|
|
|
tests = [
|
|
f"perf/test_perf_sanity.py::test_e2e[aggr_upload-{config_name}-{name}]"
|
|
for name in server_names
|
|
]
|
|
return {"condition": condition, "tests": tests}
|
|
|
|
|
|
def generate_tests(test_list_path: Path = TEST_LIST_PATH, test_config_dir: Path = PERF_SANITY_DIR):
|
|
test_list_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
all_recipes = RecipeList.from_yaml(DATABASE_LIST_PATH)
|
|
recipes = filter_to_key_recipes(all_recipes)
|
|
print(f"Selected {len(recipes)} key recipes from {len(all_recipes)} total")
|
|
|
|
gpu_groups = group_recipes_by_gpu(recipes)
|
|
condition_entries = []
|
|
config_files = {}
|
|
|
|
for gpu_name in sorted(gpu_groups.keys()):
|
|
gpu_recipes = gpu_groups[gpu_name]
|
|
config_name = f"config_database_{gpu_name.lower()}"
|
|
config_path = test_config_dir / f"{config_name}.yaml"
|
|
|
|
aggr_config = generate_aggr_config(gpu_recipes)
|
|
config_content = yaml.dump(
|
|
aggr_config, default_flow_style=False, sort_keys=False, width=120
|
|
)
|
|
|
|
with open(config_path, "w", encoding="utf-8") as f:
|
|
f.write(config_content)
|
|
print(f"Generated {config_path}")
|
|
|
|
config_files[config_path] = config_content
|
|
|
|
# Generate condition entries grouped by num_gpus
|
|
num_gpus_groups = group_recipes_by_num_gpus(gpu_recipes)
|
|
for num_gpus in sorted(num_gpus_groups.keys()):
|
|
server_names = [generate_server_name(r) for r in num_gpus_groups[num_gpus]]
|
|
entry = generate_condition_entry(gpu_name, num_gpus, config_name, server_names)
|
|
condition_entries.append(entry)
|
|
|
|
test_list = {
|
|
"version": "0.0.1",
|
|
"llm_config_database": condition_entries,
|
|
}
|
|
|
|
header = """# ===============================================================================
|
|
# Config Database Performance Tests (AUTO-GENERATED)
|
|
# ===============================================================================
|
|
# Generated by: scripts/generate_config_database_tests.py
|
|
#
|
|
# These tests use scenario-only matching (match_mode: scenario) for baselines.
|
|
# Baselines are matched by (model, gpu, isl, osl, concurrency, num_gpus) instead
|
|
# of full config fields, allowing configs to evolve while maintaining comparison.
|
|
#
|
|
# To regenerate:
|
|
# python scripts/generate_config_database_tests.py
|
|
# ===============================================================================
|
|
|
|
"""
|
|
with open(test_list_path, "w", encoding="utf-8") as f:
|
|
f.write(header)
|
|
yaml.dump(test_list, f, default_flow_style=False, sort_keys=False, width=120)
|
|
print(f"Generated {test_list_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
generate_tests()
|