[TRTC-71][feat] Add regression testing for config database (#9832)

Signed-off-by: Anish Shanbhag <ashanbhag@nvidia.com>
This commit is contained in:
Anish Shanbhag 2025-12-18 16:15:38 -08:00 committed by GitHub
parent 799a2ae311
commit 91a9ae42d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 4488 additions and 706 deletions

File diff suppressed because it is too large Load Diff

View File

@ -15,15 +15,20 @@
from pathlib import Path
from typing import Any, Dict, Iterator, List
from typing import Any, Dict, Iterator, List, Tuple
import yaml
from pydantic import BaseModel, Field, RootModel
REPO_ROOT = Path(__file__).parent.parent.parent.parent
DATABASE_LIST_PATH = Path(__file__).parent / "lookup.yaml"
LOW_LATENCY_CONCURRENCY_THRESHOLD = 8
HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD = 32
KEY_PROFILES = {"Min Latency", "Balanced", "Max Throughput"}
class RecipeConstraints(BaseModel):
class Recipe(BaseModel):
"""Recipe record for scenario list."""
model: str = Field(description="Model name")
@ -36,29 +41,68 @@ class RecipeConstraints(BaseModel):
def load_config(self) -> Dict[str, Any]:
"""Load and return the YAML config at config_path."""
with open(self.config_path) as f:
data = yaml.safe_load(f)
return data if data is not None else {}
config_relative_path = Path(self.config_path)
# Ensure config path is within the repo root
if config_relative_path.is_absolute() or ".." in config_relative_path.parts:
raise ValueError(f"Invalid config path: {self.config_path}")
full_path = REPO_ROOT / self.config_path
if not full_path.exists():
raise FileNotFoundError(f"Config not found: {full_path}")
with open(full_path, encoding="utf-8") as f:
return yaml.safe_load(f)
class Recipe(BaseModel):
"""Recipe that describes a single scenario."""
constraints: RecipeConstraints = Field(description="Recipe constraints")
env_overrides: Dict[str, Any] = Field(description="Environment overrides", default_factory=dict)
config: Dict[str, Any] = Field(description="Configuration overrides", default_factory=dict)
class RecipeList(RootModel[List[RecipeConstraints]]):
class RecipeList(RootModel[List[Recipe]]):
@classmethod
def from_yaml(cls, yaml_path: Path) -> "RecipeList":
"""Load and validate recipe list from YAML file."""
with open(yaml_path) as f:
with open(yaml_path, encoding="utf-8") as f:
data = yaml.safe_load(f)
return cls(data)
def __iter__(self) -> Iterator[RecipeConstraints]:
def __iter__(self) -> Iterator[Recipe]:
return iter(self.root)
def __len__(self) -> int:
return len(self.root)
def assign_profile(num_recipes: int, idx: int, concurrency: int) -> str:
"""Assign performance profile to a recipe based on its position in a concurrency-sorted list."""
if num_recipes == 1:
if concurrency <= LOW_LATENCY_CONCURRENCY_THRESHOLD:
return "Low Latency"
elif concurrency >= HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD:
return "High Throughput"
else:
return "Balanced"
elif idx == 0:
return "Min Latency"
elif idx == num_recipes - 1:
return "Max Throughput"
elif idx in ((num_recipes - 1) // 2, num_recipes // 2):
return "Balanced"
elif idx < num_recipes // 2:
return "Low Latency"
else:
return "High Throughput"
def select_key_recipes(recipes: List[Recipe]) -> List[Tuple[Recipe, str]]:
"""Select key recipes (min latency, balanced, max throughput) from a list of recipes."""
if not recipes:
return []
sorted_recipes = sorted(recipes, key=lambda r: r.concurrency)
n = len(sorted_recipes)
result = []
seen_profiles = set()
for idx, recipe in enumerate(sorted_recipes):
profile = assign_profile(n, idx, recipe.concurrency)
# For n==1, keep whatever profile is assigned
# For n>=2, only keep key profiles and dedupe (for even n, two indices get "Balanced")
if n == 1 or (profile in KEY_PROFILES and profile not in seen_profiles):
result.append((recipe, profile))
seen_profiles.add(profile)
return result

View File

@ -0,0 +1,226 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate a performance regression test list from the config database.
This script:
1. Reads recipes from the examples/configs/database directory
2. Generates test config files per GPU type (e.g., config_database_b200_nvl.yaml)
3. Generates llm_config_database.yml test list with condition blocks grouped by GPU name and count
"""
import copy
from collections import defaultdict
from pathlib import Path
import yaml
from examples.configs.database.database import (
DATABASE_LIST_PATH,
Recipe,
RecipeList,
select_key_recipes,
)
REPO_ROOT = Path(__file__).parent.parent
PERF_SANITY_DIR = REPO_ROOT / "tests" / "scripts" / "perf-sanity"
TEST_LIST_PATH = (
REPO_ROOT / "tests" / "integration" / "test_lists" / "qa" / "llm_config_database.yml"
)
ITERATIONS = 10
# GPU type to condition wildcards mapping for test list
# Note: cpu is used to distinguish between e.g. H200_SXM and GH200
GPU_WILDCARDS = {
"B200_NVL": {"gpu": ["*b200*"], "cpu": "x86_64", "linux_distribution_name": "ubuntu*"},
"H200_SXM": {"gpu": ["*h200*"], "cpu": "x86_64", "linux_distribution_name": "ubuntu*"},
"H100_SXM": {"gpu": ["*h100*"], "cpu": "x86_64", "linux_distribution_name": "ubuntu*"},
"GH200": {"gpu": ["*gh200*"], "cpu": "aarch64", "linux_distribution_name": "ubuntu*"},
"GB200": {"gpu": ["*gb200*"], "cpu": "aarch64", "linux_distribution_name": "ubuntu*"},
}
def generate_server_name(recipe: Recipe) -> str:
"""Generate a unique server name from recipe."""
model_slug = recipe.model.replace("/", "_").replace("-", "_").replace(".", "_")
return f"{model_slug}_{recipe.isl}_{recipe.osl}_conc{recipe.concurrency}_gpu{recipe.num_gpus}"
def generate_client_name(recipe: Recipe) -> str:
"""Generate client config name."""
return f"con{recipe.concurrency}_isl{recipe.isl}_osl{recipe.osl}"
def recipe_to_server_config(recipe: Recipe, llm_api_config: dict) -> dict:
"""Convert a recipe + LLM API config to aggr_server format."""
server_config = {
"name": generate_server_name(recipe),
"model_name": recipe.model,
"gpus": recipe.num_gpus,
# Enable scenario-only matching for baseline comparison
"match_mode": "scenario",
}
# Copy LLM API config fields
for key, value in llm_api_config.items():
server_config[key] = value
# Disable KV cache reuse to ensure consistency
if "kv_cache_config" not in server_config:
server_config["kv_cache_config"] = {}
server_config["kv_cache_config"]["enable_block_reuse"] = False
# Add client configs
server_config["client_configs"] = [
{
"name": generate_client_name(recipe),
"concurrency": recipe.concurrency,
"iterations": ITERATIONS,
"isl": recipe.isl,
"osl": recipe.osl,
"random_range_ratio": 0.0, # Fixed ISL/OSL for reproducibility
"backend": "openai",
"streaming": True,
}
]
return server_config
def group_recipes_by_scenario(recipes: RecipeList) -> dict:
"""Group recipes by scenario key (model, gpu, isl, osl, num_gpus)."""
groups = defaultdict(list)
for recipe in recipes:
key = (recipe.model, recipe.gpu, recipe.isl, recipe.osl, recipe.num_gpus)
groups[key].append(recipe)
return groups
def filter_to_key_recipes(recipes: RecipeList) -> list[Recipe]:
"""Filter recipes to only key configs (min latency, balanced, max throughput)."""
scenario_groups = group_recipes_by_scenario(recipes)
key_recipes = []
for scenario_recipes in scenario_groups.values():
for recipe, _ in select_key_recipes(scenario_recipes):
key_recipes.append(recipe)
return key_recipes
def group_recipes_by_gpu(recipes: list[Recipe]) -> dict[str, list[Recipe]]:
"""Group recipes by GPU type."""
groups = defaultdict(list)
for recipe in recipes:
groups[recipe.gpu].append(recipe)
return groups
def group_recipes_by_num_gpus(recipes: list[Recipe]) -> dict[int, list[Recipe]]:
"""Group recipes by num_gpus within a GPU type."""
groups = defaultdict(list)
for recipe in recipes:
groups[recipe.num_gpus].append(recipe)
return groups
def generate_aggr_config(recipes: list[Recipe]) -> dict[str, list[dict]]:
"""Generate aggr_server config from recipes."""
server_configs = []
for recipe in recipes:
llm_api_config = recipe.load_config()
server_config = recipe_to_server_config(recipe, llm_api_config)
server_configs.append(server_config)
return {"server_configs": server_configs}
def generate_condition_entry(
gpu_name: str, num_gpus: int, config_name: str, server_names: list
) -> dict:
# using copy.deepcopy to avoid creating YAML anchors
wildcards = copy.deepcopy(GPU_WILDCARDS[gpu_name])
condition = {
"wildcards": wildcards,
"ranges": {"system_gpu_count": {"gte": num_gpus}},
}
tests = [
f"perf/test_perf.py::test_perf[perf_sanity_upload-{config_name}-{name}]"
for name in server_names
]
return {"condition": condition, "tests": tests}
def generate_tests(test_list_path: Path = TEST_LIST_PATH, test_config_dir: Path = PERF_SANITY_DIR):
test_list_path.parent.mkdir(parents=True, exist_ok=True)
all_recipes = RecipeList.from_yaml(DATABASE_LIST_PATH)
recipes = filter_to_key_recipes(all_recipes)
print(f"Selected {len(recipes)} key recipes from {len(all_recipes)} total")
gpu_groups = group_recipes_by_gpu(recipes)
condition_entries = []
config_files = {}
for gpu_name in sorted(gpu_groups.keys()):
gpu_recipes = gpu_groups[gpu_name]
config_name = f"config_database_{gpu_name.lower()}"
config_path = test_config_dir / f"{config_name}.yaml"
aggr_config = generate_aggr_config(gpu_recipes)
config_content = yaml.dump(
aggr_config, default_flow_style=False, sort_keys=False, width=120
)
with open(config_path, "w", encoding="utf-8") as f:
f.write(config_content)
print(f"Generated {config_path}")
config_files[config_path] = config_content
# Generate condition entries grouped by num_gpus
num_gpus_groups = group_recipes_by_num_gpus(gpu_recipes)
for num_gpus in sorted(num_gpus_groups.keys()):
server_names = [generate_server_name(r) for r in num_gpus_groups[num_gpus]]
entry = generate_condition_entry(gpu_name, num_gpus, config_name, server_names)
condition_entries.append(entry)
test_list = {
"version": "0.0.1",
"llm_config_database": condition_entries,
}
header = """# ===============================================================================
# Config Database Performance Tests (AUTO-GENERATED)
# ===============================================================================
# Generated by: scripts/generate_config_database_tests.py
#
# These tests use scenario-only matching (match_mode: scenario) for baselines.
# Baselines are matched by (model, gpu, isl, osl, concurrency, num_gpus) instead
# of full config fields, allowing configs to evolve while maintaining comparison.
#
# To regenerate:
# python scripts/generate_config_database_tests.py
# ===============================================================================
"""
with open(test_list_path, "w", encoding="utf-8") as f:
f.write(header)
yaml.dump(test_list, f, default_flow_style=False, sort_keys=False, width=120)
print(f"Generated {test_list_path}")
if __name__ == "__main__":
generate_tests()

View File

@ -19,7 +19,7 @@ import sys
from collections import defaultdict
from pathlib import Path
from examples.configs.database.database import DATABASE_LIST_PATH, RecipeList
from examples.configs.database.database import DATABASE_LIST_PATH, RecipeList, assign_profile
SCRIPT_DIR = Path(__file__).parent.resolve()
REPO_ROOT = SCRIPT_DIR.parent
@ -38,9 +38,6 @@ MODEL_INFO = {
},
}
LOW_LATENCY_CONCURRENCY_THRESHOLD = 8
HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD = 32
def generate_rst(yaml_path, output_file=None):
"""Generate RST table from YAML config database.
@ -51,10 +48,10 @@ def generate_rst(yaml_path, output_file=None):
"""
recipe_list = RecipeList.from_yaml(Path(yaml_path))
# Group by model -> (gpu, isl, osl) -> list of recipes
# Group by model -> (gpu, num_gpus, isl, osl) -> list of recipes
model_groups = defaultdict(lambda: defaultdict(list))
for recipe in recipe_list:
key = (recipe.gpu, recipe.isl, recipe.osl)
key = (recipe.gpu, recipe.num_gpus, recipe.isl, recipe.osl)
model_groups[recipe.model][key].append(recipe)
lines = []
@ -97,7 +94,8 @@ def generate_rst(yaml_path, output_file=None):
subgroups = model_groups[model]
sorted_keys = sorted(
subgroups.keys(), key=lambda k: (str(k[0]), int(k[1] or 0), int(k[2] or 0))
subgroups.keys(),
key=lambda k: (str(k[0]), int(k[1] or 0), int(k[2] or 0), int(k[3] or 0)),
)
for key in sorted_keys:
@ -114,23 +112,7 @@ def generate_rst(yaml_path, output_file=None):
conc = entry.concurrency
config_path = entry.config_path
if n == 1:
if conc <= LOW_LATENCY_CONCURRENCY_THRESHOLD:
profile = "Low Latency"
elif conc >= HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD:
profile = "High Throughput"
else:
profile = "Balanced"
elif idx == 0:
profile = "Min Latency"
elif idx == n - 1:
profile = "Max Throughput"
elif idx in ((n - 1) // 2, n // 2):
profile = "Balanced"
elif idx < n // 2:
profile = "Low Latency"
else:
profile = "High Throughput"
profile = assign_profile(n, idx, conc)
full_config_path = config_path
command = f"trtllm-serve {model} --extra_llm_api_options ${{TRTLLM_DIR}}/{full_config_path}"

View File

@ -58,6 +58,20 @@ MINIMIZE_METRICS = [
"d_p99_e2el",
]
# Fields for scenario-only matching for recipe tests.
# Unlike regular tests that match on all config fields, recipes match only on the benchmark
# scenario, allowing the underlying config to change while still comparing against baselines
# for the same scenario.
SCENARIO_MATCH_FIELDS = [
"s_runtime",
"s_model_name",
"s_gpu_type",
"l_isl",
"l_osl",
"l_concurrency",
"l_num_gpus",
]
def add_id(data):
OpenSearchDB.add_id_of_json(data)

View File

@ -29,7 +29,8 @@ from defs.trt_test_alternative import (is_linux, is_windows, print_info,
print_warning)
from ..conftest import get_llm_root, llm_models_root, trt_environment
from .open_search_db_utils import (add_id, get_history_data, get_job_info,
from .open_search_db_utils import (SCENARIO_MATCH_FIELDS, add_id,
get_history_data, get_job_info,
post_new_perf_data, prepare_baseline_data,
prepare_regressive_test_cases,
write_regressive_test_cases)
@ -597,6 +598,11 @@ class ServerConfig:
self.speculative_model_dir = speculative_config.get(
'speculative_model_dir', "")
# match_mode: "config" (default, 40+ fields) or "scenario" (benchmark scenario fields for recipe testing)
# When match_mode is "scenario", baselines are matched by scenario identity
# (model, gpu, isl, osl, concurrency, num_gpus) instead of full config fields.
self.match_mode = server_config_data.get('match_mode', "config")
# Store filtered config for extra_llm_api_config (exclude name, model_name, gpus, client_configs)
self.extra_llm_api_config_data = {
k: v
@ -2438,9 +2444,12 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
new_data_dict[cmd_idx] = new_data
cmd_idx += 1
if not match_keys:
match_keys.append("s_runtime")
match_keys.extend(server_config_dict.keys())
match_keys.extend(client_config_dict.keys())
if server_config.match_mode == "scenario":
match_keys = SCENARIO_MATCH_FIELDS.copy()
else:
match_keys.append("s_runtime")
match_keys.extend(server_config_dict.keys())
match_keys.extend(client_config_dict.keys())
elif self._config.runtime == "multi_node_disagg_server":
if self._config.disagg_configs[0][

View File

@ -59,6 +59,7 @@ This directory contains various test configuration files:
- `llm_perf_full.yml` - Main performance test configuration
- `llm_perf_cluster.yml` - Cluster-based performance tests
- `llm_perf_sanity.yml` - Performance sanity checks
- `llm_config_database.yml` - Performance regression tests for the config database in `examples/configs/database` (auto-generated by `scripts/generate_config_database_tests.py`)
- `llm_perf_nim.yml` - NIM-specific performance tests
- `llm_trt_integration_perf.yml` - Integration performance tests
- `llm_trt_integration_perf_sanity.yml` - Integration performance sanity checks
@ -77,7 +78,7 @@ QA tests are executed on a regular schedule:
- **Weekly**: Automated regression testing
- **Release**: Comprehensive validation before each release
- **Full Cycle Testing**:
run all gpu with llm_function_core.txt + run NIM specific gpu with llm_function_nim.txt
run all gpu with llm_function_core.txt, run NIM specific gpu with llm_function_nim.txt, and run config database tests with llm_config_database.yml
- **Sanity Cycle Testing**:
run all gpu with llm_function_core_sanity.txt
- **NIM Cycle Testing**:

View File

@ -0,0 +1,191 @@
# ===============================================================================
# Config Database Performance Tests (AUTO-GENERATED)
# ===============================================================================
# Generated by: scripts/generate_config_database_tests.py
#
# These tests use scenario-only matching (match_mode: scenario) for baselines.
# Baselines are matched by (model, gpu, isl, osl, concurrency, num_gpus) instead
# of full config fields, allowing configs to evolve while maintaining comparison.
#
# To regenerate:
# python scripts/generate_config_database_tests.py
# ===============================================================================
version: 0.0.1
llm_config_database:
- condition:
wildcards:
gpu:
- '*b200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 1
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
- condition:
wildcards:
gpu:
- '*b200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 2
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
- condition:
wildcards:
gpu:
- '*b200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 4
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
- condition:
wildcards:
gpu:
- '*b200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 8
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
- condition:
wildcards:
gpu:
- '*h200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 1
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
- condition:
wildcards:
gpu:
- '*h200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 2
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
- condition:
wildcards:
gpu:
- '*h200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 4
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
- condition:
wildcards:
gpu:
- '*h200*'
cpu: x86_64
linux_distribution_name: ubuntu*
ranges:
system_gpu_count:
gte: 8
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu8]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,127 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import tempfile
import unittest
from pathlib import Path
# Add scripts directory to path
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
SCRIPTS_DIR = os.path.join(REPO_ROOT, "scripts")
sys.path.insert(0, SCRIPTS_DIR)
from generate_config_database_tests import ( # noqa: E402
PERF_SANITY_DIR,
TEST_LIST_PATH,
generate_tests,
)
from generate_config_table import generate_rst # noqa: E402
class TestConfigDatabaseSync(unittest.TestCase):
def test_config_table_sync(self):
"""Test that the config_table.rst file is synchronized with the lookup.yaml database.
Ensures that the RST file is up-to-date with the YAML database.
"""
if generate_rst is None:
self.skipTest("generate_config_table not available")
# Define paths
yaml_path = os.path.join(REPO_ROOT, "examples/configs/database/lookup.yaml")
rst_path = os.path.join(REPO_ROOT, "docs/source/deployment-guide/config_table.rst")
# Ensure files exist
self.assertTrue(os.path.exists(yaml_path), f"YAML file not found: {yaml_path}")
self.assertTrue(os.path.exists(rst_path), f"RST file not found: {rst_path}")
# Read existing RST content
with open(rst_path, "r") as f:
existing_content = f.read()
# Generate new RST content
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as tmp:
generate_rst(yaml_path, output_file=tmp.name)
tmp.seek(0)
generated_content = tmp.read()
# Compare content
self.assertEqual(
existing_content.strip(),
generated_content.strip(),
"config_table.rst is not synchronized with lookup.yaml. "
"Please run 'python3 scripts/generate_config_table.py' from the repo root to update it.",
)
def test_config_database_tests_sync(self):
"""Test that config database test files are synchronized with lookup.yaml.
Ensures that both the test list YAML and per-GPU config files are up-to-date.
"""
self.assertTrue(TEST_LIST_PATH.exists(), f"Test list not found: {TEST_LIST_PATH}")
with open(TEST_LIST_PATH) as f:
existing_test_list = f.read()
existing_config_files = {}
for config_path in PERF_SANITY_DIR.glob("config_database_*.yaml"):
with open(config_path) as f:
existing_config_files[config_path.name] = f.read()
# Generate to temp directory
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_config_dir = Path(tmp_dir) / "configs"
tmp_test_list_path = Path(tmp_dir) / "test_list.yml"
tmp_config_dir.mkdir(parents=True, exist_ok=True)
generate_tests(tmp_test_list_path, tmp_config_dir)
with open(tmp_test_list_path) as f:
generated_test_list = f.read()
self.assertEqual(
existing_test_list.strip(),
generated_test_list.strip(),
f"{TEST_LIST_PATH} is not synchronized with lookup.yaml. "
"Please run 'python3 scripts/generate_config_database_tests.py' from the repo root.",
)
generated_config_files = {}
for config_path in tmp_config_dir.glob("config_database_*.yaml"):
with open(config_path) as f:
generated_config_files[config_path.name] = f.read()
# Check same set of files
self.assertEqual(
set(existing_config_files.keys()),
set(generated_config_files.keys()),
"Mismatch in config database config files. "
"Please run 'python scripts/generate_config_database_tests.py' from the repo root.",
)
# Compare each config file
for filename in existing_config_files:
self.assertEqual(
existing_config_files[filename].strip(),
generated_config_files[filename].strip(),
f"{filename} is not synchronized with lookup.yaml. "
"Please run 'python scripts/generate_config_database_tests.py' from the repo root.",
)
if __name__ == "__main__":
unittest.main()

View File

@ -1,66 +0,0 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import tempfile
import unittest
# Add scripts directory to path
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
SCRIPTS_DIR = os.path.join(REPO_ROOT, "scripts")
sys.path.insert(0, SCRIPTS_DIR)
from generate_config_table import generate_rst # noqa: E402
class TestConfigTableSync(unittest.TestCase):
def test_config_table_sync(self):
"""Test that the config_table.rst file is synchronized with the lookup.yaml database.
Ensures that the RST file is up-to-date with the YAML database.
"""
if generate_rst is None:
self.skipTest("generate_config_table not available")
# Define paths
yaml_path = os.path.join(REPO_ROOT, "examples/configs/database/lookup.yaml")
rst_path = os.path.join(REPO_ROOT, "docs/source/deployment-guide/config_table.rst")
# Ensure files exist
self.assertTrue(os.path.exists(yaml_path), f"YAML file not found: {yaml_path}")
self.assertTrue(os.path.exists(rst_path), f"RST file not found: {rst_path}")
# Read existing RST content
with open(rst_path, "r") as f:
existing_content = f.read()
# Generate new RST content
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as tmp:
generate_rst(yaml_path, output_file=tmp.name)
tmp.seek(0)
generated_content = tmp.read()
# Compare content
self.assertEqual(
existing_content.strip(),
generated_content.strip(),
"config_table.rst is not synchronized with lookup.yaml. "
"Please run 'python3 scripts/generate_config_table.py' from the repo root to update it.",
)
if __name__ == "__main__":
unittest.main()