mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 14:07:21 +08:00
[https://nvbugs/5761391][fix] Use correct model names for config database regression tests (#10192)
Signed-off-by: Anish Shanbhag <ashanbhag@nvidia.com>
This commit is contained in:
parent
a1385243e1
commit
dacc881993
@ -40,6 +40,14 @@ TEST_LIST_PATH = (
|
||||
REPO_ROOT / "tests" / "integration" / "test_lists" / "qa" / "llm_config_database.yml"
|
||||
)
|
||||
ITERATIONS = 10
|
||||
# Mapping from HuggingFace model IDs to MODEL_PATH_DICT keys used by the test framework
|
||||
# in tests/integration/defs/perf/test_perf_sanity.py
|
||||
MODEL_NAME_MAPPING = {
|
||||
"deepseek-ai/DeepSeek-R1-0528": "deepseek_r1_0528_fp8",
|
||||
"nvidia/DeepSeek-R1-0528-FP4-v2": "deepseek_r1_0528_fp4_v2",
|
||||
"openai/gpt-oss-120b": "gpt_oss_120b_fp4",
|
||||
}
|
||||
|
||||
|
||||
# GPU type to condition wildcards mapping for test list
|
||||
# Note: cpu is used to distinguish between e.g. H200_SXM and GH200
|
||||
@ -65,9 +73,13 @@ def generate_client_name(recipe: Recipe) -> str:
|
||||
|
||||
def recipe_to_server_config(recipe: Recipe, llm_api_config: dict) -> dict:
|
||||
"""Convert a recipe + LLM API config to aggr_server format."""
|
||||
model_name = MODEL_NAME_MAPPING.get(recipe.model)
|
||||
if not model_name:
|
||||
raise ValueError(f"Model not found in MODEL_NAME_MAPPING: {recipe.model}")
|
||||
|
||||
server_config = {
|
||||
"name": generate_server_name(recipe),
|
||||
"model_name": recipe.model,
|
||||
"model_name": model_name,
|
||||
"gpus": recipe.num_gpus,
|
||||
# Enable scenario-only matching for baseline comparison
|
||||
"match_mode": "scenario",
|
||||
@ -157,7 +169,7 @@ def generate_condition_entry(
|
||||
}
|
||||
|
||||
tests = [
|
||||
f"perf/test_perf.py::test_perf[perf_sanity_upload-{config_name}-{name}]"
|
||||
f"perf/test_perf_sanity.py::test_e2e[aggr_upload-{config_name}-{name}]"
|
||||
for name in server_names
|
||||
]
|
||||
return {"condition": condition, "tests": tests}
|
||||
|
||||
@ -78,6 +78,7 @@ PRE_MERGE_THRESHOLD = 0.1
|
||||
# scenario, allowing the underlying config to change while still comparing against baselines
|
||||
# for the same scenario.
|
||||
SCENARIO_MATCH_FIELDS = [
|
||||
"s_gpu_type",
|
||||
"s_runtime",
|
||||
"s_model_name",
|
||||
"l_isl",
|
||||
|
||||
@ -58,6 +58,7 @@ MODEL_PATH_DICT = {
|
||||
}
|
||||
|
||||
SUPPORTED_GPU_TYPE = [
|
||||
"H200",
|
||||
"B200",
|
||||
"B300",
|
||||
"GB200",
|
||||
@ -226,6 +227,7 @@ class ServerConfig:
|
||||
"gpus_per_node",
|
||||
"match_mode",
|
||||
"client_configs",
|
||||
"match_mode",
|
||||
]
|
||||
self.extra_llm_api_config_data = {
|
||||
k: v for k, v in server_config_data.items() if k not in exclude_keys
|
||||
@ -520,7 +522,9 @@ class AggrTestCmds(NamedTuple):
|
||||
)
|
||||
|
||||
wait_for_endpoint_ready(
|
||||
f"http://{server_hostname}:{server_port}/health", timeout=self.timeout
|
||||
f"http://{server_hostname}:{server_port}/health",
|
||||
timeout=self.timeout,
|
||||
server_proc=server_proc,
|
||||
)
|
||||
|
||||
# Run all clients for this server
|
||||
@ -1321,11 +1325,11 @@ class PerfSanityTestConfig:
|
||||
cmd_idx += 1
|
||||
|
||||
if not match_keys:
|
||||
match_keys.extend(["s_gpu_type", "s_runtime"])
|
||||
if server_config.match_mode == "scenario":
|
||||
match_keys = SCENARIO_MATCH_FIELDS.copy()
|
||||
is_scenario_mode = True
|
||||
else:
|
||||
match_keys.extend(["s_gpu_type", "s_runtime"])
|
||||
match_keys.extend(server_config.to_match_keys())
|
||||
match_keys.extend(client_config.to_match_keys())
|
||||
|
||||
|
||||
@ -23,15 +23,15 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -42,15 +42,15 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 2
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -61,21 +61,21 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -86,27 +86,27 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 8
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_b200_nvl-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -117,15 +117,15 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu1]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu1]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -136,15 +136,15 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 2
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu2]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu2]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -155,15 +155,15 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu4]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu4]
|
||||
- condition:
|
||||
wildcards:
|
||||
gpu:
|
||||
@ -174,18 +174,18 @@ llm_config_database:
|
||||
system_gpu_count:
|
||||
gte: 8
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf.py::test_perf[perf_sanity_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_1024_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_1024_8192_conc64_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc4_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc16_gpu8]
|
||||
- perf/test_perf_sanity.py::test_e2e[aggr_upload-config_database_h200_sxm-openai_gpt_oss_120b_8192_1024_conc64_gpu8]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
server_configs:
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu4
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -31,7 +31,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu4
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -62,7 +62,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu4
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -97,7 +97,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc4_gpu8
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -128,7 +128,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc32_gpu8
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -159,7 +159,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_1024_1024_conc256_gpu8
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -194,7 +194,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu4
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -225,7 +225,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu4
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -256,7 +256,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu4
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -291,7 +291,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc4_gpu8
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -322,7 +322,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc32_gpu8
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -353,7 +353,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: nvidia_DeepSeek_R1_0528_FP4_v2_8192_1024_conc256_gpu8
|
||||
model_name: nvidia/DeepSeek-R1-0528-FP4-v2
|
||||
model_name: deepseek_r1_0528_fp4_v2
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -388,7 +388,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -419,7 +419,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -450,7 +450,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -481,7 +481,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -512,7 +512,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -543,7 +543,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -578,7 +578,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -613,7 +613,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -648,7 +648,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -683,7 +683,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -718,7 +718,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -753,7 +753,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -788,7 +788,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -823,7 +823,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -858,7 +858,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -893,7 +893,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -928,7 +928,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -963,7 +963,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -998,7 +998,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1033,7 +1033,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1068,7 +1068,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1103,7 +1103,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1138,7 +1138,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1173,7 +1173,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1208,7 +1208,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1243,7 +1243,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1278,7 +1278,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1313,7 +1313,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1348,7 +1348,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1383,7 +1383,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1418,7 +1418,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1453,7 +1453,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1488,7 +1488,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1523,7 +1523,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1558,7 +1558,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1593,7 +1593,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1628,7 +1628,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1663,7 +1663,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1698,7 +1698,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1733,7 +1733,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1768,7 +1768,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1803,7 +1803,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
server_configs:
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc4_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -31,7 +31,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc16_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -62,7 +62,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_1024_1024_conc64_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -93,7 +93,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc4_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -124,7 +124,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc16_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -155,7 +155,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: deepseek_ai_DeepSeek_R1_0528_8192_1024_conc64_gpu8
|
||||
model_name: deepseek-ai/DeepSeek-R1-0528
|
||||
model_name: deepseek_r1_0528_fp8
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
cuda_graph_config:
|
||||
@ -190,7 +190,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -224,7 +224,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -258,7 +258,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -292,7 +292,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -326,7 +326,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -360,7 +360,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -394,7 +394,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -428,7 +428,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -462,7 +462,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -496,7 +496,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc4_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -530,7 +530,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc16_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -564,7 +564,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_1024_conc64_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -598,7 +598,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -632,7 +632,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -666,7 +666,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -700,7 +700,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -734,7 +734,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -768,7 +768,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -802,7 +802,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -836,7 +836,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -870,7 +870,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -904,7 +904,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc4_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -938,7 +938,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc16_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -972,7 +972,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_1024_8192_conc64_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1006,7 +1006,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1040,7 +1040,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1074,7 +1074,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu1
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 1
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1108,7 +1108,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1142,7 +1142,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1176,7 +1176,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu2
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 2
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1210,7 +1210,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1244,7 +1244,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1278,7 +1278,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu4
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 4
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1312,7 +1312,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc4_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1346,7 +1346,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc16_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
@ -1380,7 +1380,7 @@ server_configs:
|
||||
backend: openai
|
||||
streaming: true
|
||||
- name: openai_gpt_oss_120b_8192_1024_conc64_gpu8
|
||||
model_name: openai/gpt-oss-120b
|
||||
model_name: gpt_oss_120b_fp4
|
||||
gpus: 8
|
||||
match_mode: scenario
|
||||
env_overrides:
|
||||
|
||||
@ -1,11 +1,18 @@
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def wait_for_endpoint_ready(url: str, timeout: int = 300):
|
||||
def wait_for_endpoint_ready(url: str, timeout: int = 300, server_proc: subprocess.Popen = None):
|
||||
start = time.monotonic()
|
||||
while time.monotonic() - start < timeout:
|
||||
if server_proc is not None:
|
||||
exit_code = server_proc.poll()
|
||||
if exit_code is not None:
|
||||
raise RuntimeError(
|
||||
f"Server process exited with code {exit_code} before becoming ready."
|
||||
)
|
||||
try:
|
||||
time.sleep(1)
|
||||
if requests.get(url, timeout=5).status_code == 200:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user