[https://nvbugs/5761391][fix] Use correct model names for config database regression tests (#10192)

Signed-off-by: Anish Shanbhag <ashanbhag@nvidia.com>
This commit is contained in:
Anish Shanbhag 2026-01-12 10:55:07 -08:00 committed by GitHub
parent a1385243e1
commit dacc881993
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 221 additions and 197 deletions

View File

@ -40,6 +40,14 @@ TEST_LIST_PATH = (
REPO_ROOT / "tests" / "integration" / "test_lists" / "qa" / "llm_config_database.yml"
)
ITERATIONS = 10
# Mapping from HuggingFace model IDs to MODEL_PATH_DICT keys used by the test framework
# in tests/integration/defs/perf/test_perf_sanity.py
MODEL_NAME_MAPPING = {
"deepseek-ai/DeepSeek-R1-0528": "deepseek_r1_0528_fp8",
"nvidia/DeepSeek-R1-0528-FP4-v2": "deepseek_r1_0528_fp4_v2",
"openai/gpt-oss-120b": "gpt_oss_120b_fp4",
}
# GPU type to condition wildcards mapping for test list
# Note: cpu is used to distinguish between e.g. H200_SXM and GH200
@ -65,9 +73,13 @@ def generate_client_name(recipe: Recipe) -> str:
def recipe_to_server_config(recipe: Recipe, llm_api_config: dict) -> dict:
"""Convert a recipe + LLM API config to aggr_server format."""
model_name = MODEL_NAME_MAPPING.get(recipe.model)
if not model_name:
raise ValueError(f"Model not found in MODEL_NAME_MAPPING: {recipe.model}")
server_config = {
"name": generate_server_name(recipe),
"model_name": recipe.model,
"model_name": model_name,
"gpus": recipe.num_gpus,
# Enable scenario-only matching for baseline comparison
"match_mode": "scenario",
@ -157,7 +169,7 @@ def generate_condition_entry(
}
tests = [
f"perf/test_perf.py::test_perf[perf_sanity_upload-{config_name}-{name}]"
f"perf/test_perf_sanity.py::test_e2e[aggr_upload-{config_name}-{name}]"
for name in server_names
]
return {"condition": condition, "tests": tests}

View File

@ -78,6 +78,7 @@ PRE_MERGE_THRESHOLD = 0.1
# scenario, allowing the underlying config to change while still comparing against baselines
# for the same scenario.
SCENARIO_MATCH_FIELDS = [
"s_gpu_type",
"s_runtime",
"s_model_name",
"l_isl",

View File

@ -58,6 +58,7 @@ MODEL_PATH_DICT = {
}
SUPPORTED_GPU_TYPE = [
"H200",
"B200",
"B300",
"GB200",
@ -226,6 +227,7 @@ class ServerConfig:
"gpus_per_node",
"match_mode",
"client_configs",
"match_mode",
]
self.extra_llm_api_config_data = {
k: v for k, v in server_config_data.items() if k not in exclude_keys
@ -520,7 +522,9 @@ class AggrTestCmds(NamedTuple):
)
wait_for_endpoint_ready(
f"http://{server_hostname}:{server_port}/health", timeout=self.timeout
f"http://{server_hostname}:{server_port}/health",
timeout=self.timeout,
server_proc=server_proc,
)
# Run all clients for this server
@ -1321,11 +1325,11 @@ class PerfSanityTestConfig:
cmd_idx += 1
if not match_keys:
match_keys.extend(["s_gpu_type", "s_runtime"])
if server_config.match_mode == "scenario":
match_keys = SCENARIO_MATCH_FIELDS.copy()
is_scenario_mode = True
else:
match_keys.extend(["s_gpu_type", "s_runtime"])
match_keys.extend(server_config.to_match_keys())
match_keys.extend(client_config.to_match_keys())

View File

@ -23,15 +23,15 @@ llm_config_database:
system_gpu_count:
gte: 1
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
- condition:
wildcards:
gpu:
@ -42,15 +42,15 @@ llm_config_database:
system_gpu_count:
gte: 2
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
- condition:
wildcards:
gpu:
@ -61,21 +61,21 @@ llm_config_database:
system_gpu_count:
gte: 4
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
- condition:
wildcards:
gpu:
@ -86,27 +86,27 @@ llm_config_database:
system_gpu_count:
gte: 8
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
- condition:
wildcards:
gpu:
@ -117,15 +117,15 @@ llm_config_database:
system_gpu_count:
gte: 1
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
- condition:
wildcards:
gpu:
@ -136,15 +136,15 @@ llm_config_database:
system_gpu_count:
gte: 2
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
- condition:
wildcards:
gpu:
@ -155,15 +155,15 @@ llm_config_database:
system_gpu_count:
gte: 4
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
- condition:
wildcards:
gpu:
@ -174,18 +174,18 @@ llm_config_database:
system_gpu_count:
gte: 8
tests:
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu8]

View File

@ -1,6 +1,6 @@
server_configs:
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 4
match_mode: scenario
cuda_graph_config:
@ -31,7 +31,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 4
match_mode: scenario
cuda_graph_config:
@ -62,7 +62,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 4
match_mode: scenario
cuda_graph_config:
@ -97,7 +97,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -128,7 +128,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -159,7 +159,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -194,7 +194,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 4
match_mode: scenario
cuda_graph_config:
@ -225,7 +225,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 4
match_mode: scenario
cuda_graph_config:
@ -256,7 +256,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 4
match_mode: scenario
cuda_graph_config:
@ -291,7 +291,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -322,7 +322,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -353,7 +353,7 @@ server_configs:
backend: openai
streaming: true
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
model_name: deepseek_r1_0528_fp4_v2
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -388,7 +388,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -419,7 +419,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -450,7 +450,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -481,7 +481,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -512,7 +512,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -543,7 +543,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -578,7 +578,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -613,7 +613,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -648,7 +648,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -683,7 +683,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -718,7 +718,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -753,7 +753,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -788,7 +788,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -823,7 +823,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -858,7 +858,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -893,7 +893,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -928,7 +928,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -963,7 +963,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -998,7 +998,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1033,7 +1033,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1068,7 +1068,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1103,7 +1103,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1138,7 +1138,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1173,7 +1173,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1208,7 +1208,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1243,7 +1243,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1278,7 +1278,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1313,7 +1313,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1348,7 +1348,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1383,7 +1383,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1418,7 +1418,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1453,7 +1453,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1488,7 +1488,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1523,7 +1523,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1558,7 +1558,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1593,7 +1593,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1628,7 +1628,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1663,7 +1663,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1698,7 +1698,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1733,7 +1733,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1768,7 +1768,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1803,7 +1803,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:

View File

@ -1,6 +1,6 @@
server_configs:
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -31,7 +31,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -62,7 +62,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -93,7 +93,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -124,7 +124,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -155,7 +155,7 @@ server_configs:
backend: openai
streaming: true
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8
model_name: deepseek-ai/DeepSeek-R1-0528
model_name: deepseek_r1_0528_fp8
gpus: 8
match_mode: scenario
cuda_graph_config:
@ -190,7 +190,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -224,7 +224,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -258,7 +258,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -292,7 +292,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -326,7 +326,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -360,7 +360,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -394,7 +394,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -428,7 +428,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -462,7 +462,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -496,7 +496,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -530,7 +530,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -564,7 +564,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -598,7 +598,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -632,7 +632,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -666,7 +666,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -700,7 +700,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -734,7 +734,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -768,7 +768,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -802,7 +802,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -836,7 +836,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -870,7 +870,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -904,7 +904,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -938,7 +938,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -972,7 +972,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1006,7 +1006,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1040,7 +1040,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1074,7 +1074,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu1
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 1
match_mode: scenario
env_overrides:
@ -1108,7 +1108,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1142,7 +1142,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1176,7 +1176,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu2
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 2
match_mode: scenario
env_overrides:
@ -1210,7 +1210,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1244,7 +1244,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1278,7 +1278,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu4
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 4
match_mode: scenario
env_overrides:
@ -1312,7 +1312,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1346,7 +1346,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:
@ -1380,7 +1380,7 @@ server_configs:
backend: openai
streaming: true
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu8
model_name: openai/gpt-oss-120b
model_name: gpt_oss_120b_fp4
gpus: 8
match_mode: scenario
env_overrides:

View File

@ -1,11 +1,18 @@
import subprocess
import time
import requests
def wait_for_endpoint_ready(url: str, timeout: int = 300):
def wait_for_endpoint_ready(url: str, timeout: int = 300, server_proc: subprocess.Popen = None):
start = time.monotonic()
while time.monotonic() - start < timeout:
if server_proc is not None:
exit_code = server_proc.poll()
if exit_code is not None:
raise RuntimeError(
f"Server process exited with code {exit_code} before becoming ready."
)
try:
time.sleep(1)
if requests.get(url, timeout=5).status_code == 200: