[None][fix] Decrease Pre Merge Perf Tests (#10390)

Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>
Signed-off-by: Yanchao Lu <yanchaol@nvidia.com>
Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
This commit is contained in:
chenfeiz0326 2026-01-05 01:21:34 +08:00 committed by GitHub
parent c4f27fa4c0
commit a65b0d4efa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 144 additions and 273 deletions

View File

@ -2872,21 +2872,6 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
"""
}
}
if (stageName.contains("PerfSanity")) {
stage ("Check PerfSanity Result") {
def perfCheckResult = sh(
script: """
python3 ${llmSrc}/tests/integration/defs/perf/perf_regression_check.py \
${WORKSPACE}/${stageName}
""",
returnStatus: true
)
if (perfCheckResult != 0) {
error "Performance regression detected and failing the build (exit code: ${perfCheckResult})"
}
}
}
}
}
@ -3319,21 +3304,12 @@ def launchTestJobs(pipeline, testFilter)
// "GB300-PyTorch-1": ["gb300-single", "l0_gb300", 1, 1],
// "GB300-4_GPUs-PyTorch-Post-Merge-1": ["gb300-x4", "l0_gb300_multi_gpus", 1, 1, 4],
// PerfSanity pre-merge tests
"GB200-4_GPUs-PyTorch-PerfSanity-4": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 4, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-5": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 5, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-6": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 6, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-11": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 11, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-12": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 12, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-1": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 1, 2, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-2": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 2, 2, 4],
// PerfSanity post-merge tests
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 1, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 2, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 3, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-7": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 7, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-8": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 8, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-9": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 9, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-10": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 10, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-13": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 13, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-14": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 14, 14, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 1, 3, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 2, 3, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["gb200-x4-oci", "l0_gb200_multi_gpus_perf_sanity", 3, 3, 4],
]
fullSet += SBSASlurmTestConfigs.keySet()

View File

@ -102,7 +102,6 @@ set +e
pytest_exit_code=0
perf_check_exit_code=0
perf_report_exit_code=0
perf_sanity_check_exit_code=0
eval $pytestCommand
pytest_exit_code=$?
@ -154,20 +153,10 @@ if [ $SLURM_PROCID -eq 0 ] && [ "$perfMode" = "true" ]; then
echo "Rank${SLURM_PROCID} Perf check finished execution with exit code $perf_check_exit_code"
fi
if [ $SLURM_PROCID -eq 0 ] && [[ "$stageName" == *PerfSanity* ]]; then
echo "Check PerfSanity Result"
python3 $llmSrcNode/tests/integration/defs/perf/perf_regression_check.py \
$jobWorkspace
perf_sanity_check_exit_code=$?
echo "Rank${SLURM_PROCID} PerfSanity check finished execution with exit code $perf_sanity_check_exit_code"
fi
if [ "$pytest_exit_code" -ne 0 ]; then
final_exit_code=$pytest_exit_code
elif [ "$perf_check_exit_code" -ne 0 ]; then
final_exit_code=$perf_check_exit_code
elif [ "$perf_sanity_check_exit_code" -ne 0 ]; then
final_exit_code=$perf_sanity_check_exit_code
else
final_exit_code=0
fi

View File

@ -23,7 +23,7 @@ import time
from datetime import datetime
import yaml
from defs.trt_test_alternative import print_info, print_warning
from defs.trt_test_alternative import print_error, print_info, print_warning
_project_root = os.path.abspath(
os.path.join(os.path.dirname(__file__), '../../../..'))
@ -660,3 +660,123 @@ def write_regressive_test_cases(regressive_data_list, new_data_dict,
if len(regressive_data_list) > 0:
print_warning(
f"Found {len(regressive_data_list)} regressive test cases")
def _get_metric_keys():
"""Get all metric-related keys for filtering config keys."""
metric_keys = set()
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
metric_suffix = metric[2:] # Strip "d_" prefix
metric_keys.add(metric)
metric_keys.add(f"d_baseline_{metric_suffix}")
metric_keys.add(f"d_threshold_post_merge_{metric_suffix}")
metric_keys.add(f"d_threshold_pre_merge_{metric_suffix}")
return metric_keys
def _print_perf_data(data):
"""Print performance metrics and config for a single data entry."""
print_info("=== Metrics ===")
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
if metric in data:
value = data.get(metric, "N/A")
print_info(f'"{metric}": {value}')
metric_keys = _get_metric_keys()
print_info("\n=== Config ===")
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
for key in config_keys:
value = data[key]
print_info(f'"{key}": {value}')
def _print_regression_data(data, print_func=None):
"""
Print regression info, metrics with baselines/thresholds, and config.
"""
if print_func is None:
print_func = print_info
if "s_regression_info" in data:
print_func("=== Regression Info ===")
print_func(f"{data['s_regression_info']}")
metric_keys = _get_metric_keys()
is_post_merge = data.get("b_is_post_merge", False)
print_func("=== Metrics ===")
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
metric_suffix = metric[2:] # Strip "d_" prefix
baseline_key = f"d_baseline_{metric_suffix}"
if is_post_merge:
threshold_key = f"d_threshold_post_merge_{metric_suffix}"
else:
threshold_key = f"d_threshold_pre_merge_{metric_suffix}"
# Only print if at least one of the keys exists
if metric in data or baseline_key in data or threshold_key in data:
value = data.get(metric, "N/A")
baseline = data.get(baseline_key, "N/A")
threshold = data.get(threshold_key, "N/A")
# Calculate percentage difference between value and baseline
# Positive percentage means better perf, negative means regression
if (isinstance(value, (int, float))
and isinstance(baseline, (int, float)) and baseline != 0):
if metric in MAXIMIZE_METRICS:
# Larger is better: value > baseline is positive (better)
percentage = (value - baseline) / baseline * 100
else:
# Smaller is better: value < baseline is positive (better)
percentage = (baseline - value) / baseline * 100
percentage_str = f"{percentage:+.2f}%"
else:
percentage_str = "N/A"
print_func(
f'"{metric}": {value}, "{baseline_key}": {baseline}, '
f'"{threshold_key}": {threshold}, "diff": {percentage_str}')
print_func("\n=== Config ===")
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
for key in config_keys:
if key == "s_regression_info":
continue
value = data[key]
print_func(f'"{key}": {value}')
def check_perf_regression(regressive_data_list, new_data_dict):
"""
Check performance regression by printing regression data.
"""
# Split regression data into post-merge and pre-merge
post_merge_regressions = [
data for data in regressive_data_list
if data.get("b_is_post_merge", False)
]
pre_merge_regressions = [
data for data in regressive_data_list
if not data.get("b_is_post_merge", False)
]
# Print pre-merge regression data with print_warning
if len(pre_merge_regressions) > 0:
print_warning(
f"Found {len(pre_merge_regressions)} pre-merge regression data")
for i, data in enumerate(pre_merge_regressions):
print_warning(f"\n{'=' * 60}")
print_warning(f"Pre-merge Regression Data #{i + 1}")
print_warning("=" * 60)
_print_regression_data(data, print_func=print_warning)
# Print post-merge regression data with print_warning for content
if len(post_merge_regressions) > 0:
for i, data in enumerate(post_merge_regressions):
print_warning(f"\n{'=' * 60}")
print_warning(f"Post-merge Regression Data #{i + 1}")
print_warning("=" * 60)
_print_regression_data(data, print_func=print_warning)
print_error(
f"Found {len(post_merge_regressions)} post-merge regression data")
# Print summary if no regressions
if len(regressive_data_list) == 0:
print_info("No regression data found.")

View File

@ -1,203 +0,0 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import yaml
METRICS = [
"seq_throughput",
"token_throughput",
"total_token_throughput",
"user_throughput",
"mean_tpot",
"median_tpot",
"p99_tpot",
"mean_ttft",
"median_ttft",
"p99_ttft",
"mean_itl",
"median_itl",
"p99_itl",
"mean_e2el",
"median_e2el",
"p99_e2el",
]
def should_skip_execution():
disagg_type = os.getenv("DISAGG_SERVING_TYPE", "")
if (
disagg_type.startswith("GEN")
or disagg_type.startswith("CTX")
or disagg_type == "DISAGG_SERVER"
):
return True
return False
def find_yaml_files(job_workspace, filename):
yaml_files = []
for root, dirs, files in os.walk(job_workspace):
for file in files:
if file == filename:
yaml_files.append(os.path.join(root, file))
return yaml_files
def read_yaml_data(yaml_files):
all_data = []
for file_path in yaml_files:
try:
with open(file_path, "r") as f:
data = yaml.safe_load(f)
if data:
if isinstance(data, list):
all_data.extend(data)
else:
all_data.append(data)
except Exception as e:
print(f"Error reading {file_path}: {e}")
return all_data
def get_metric_keys():
metric_keys = set()
for metric in METRICS:
metric_keys.add(f"d_{metric}")
metric_keys.add(f"d_baseline_{metric}")
metric_keys.add(f"d_threshold_{metric}")
return metric_keys
def print_perf_data(data):
print("=== Metrics ===")
for metric in METRICS:
value_key = f"d_{metric}"
if value_key in data:
value = data.get(value_key, "N/A")
print(f'"{value_key}": {value}')
metric_keys = get_metric_keys()
print("\n=== Config ===")
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
for key in config_keys:
value = data[key]
print(f'"{key}": {value}')
def print_regression_data(data):
if "s_regression_info" in data:
print("=== Regression Info ===")
print(f"{data['s_regression_info']}")
metric_keys = get_metric_keys()
print("=== Metrics ===")
for metric in METRICS:
value_key = f"d_{metric}"
baseline_key = f"d_baseline_{metric}"
threshold_key = f"d_threshold_{metric}"
# Only print if at least one of the keys exists
if value_key in data or baseline_key in data or threshold_key in data:
value = data.get(value_key, "N/A")
baseline = data.get(baseline_key, "N/A")
threshold = data.get(threshold_key, "N/A")
# Calculate percentage difference between value and baseline
if (
isinstance(value, (int, float))
and isinstance(baseline, (int, float))
and baseline != 0
):
percentage = (value - baseline) / baseline * 100
percentage_str = f"{percentage:+.2f}%"
else:
percentage_str = "N/A"
print(
f'"{value_key}": {value}, "{baseline_key}": {baseline}, '
f'"{threshold_key}": {threshold}, "diff": {percentage_str}'
)
print("\n=== Config ===")
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
for key in config_keys:
if key == "s_regression_info":
continue
value = data[key]
print(f'"{key}": {value}')
def main():
if should_skip_execution():
print("Skipping check_perf_regression.py due to DISAGG_SERVING_TYPE")
return 0
job_workspace = sys.argv[1]
if not os.path.isdir(job_workspace):
print(f"Skipping perf regression check since {job_workspace} is not a valid directory.")
return 0
perf_data_files = find_yaml_files(job_workspace, "perf_data.yaml")
all_perf_data = read_yaml_data(perf_data_files)
print(f"Found {len(all_perf_data)} perf data")
for i, data in enumerate(all_perf_data):
print(f"\n{'=' * 60}")
print(f"Perf Data #{i + 1}")
print("=" * 60)
print_perf_data(data)
print(f"\n{'=' * 60}\n")
regression_files = find_yaml_files(job_workspace, "regression.yaml")
all_regression_data = read_yaml_data(regression_files)
print(f"Found {len(all_regression_data)} regression data")
for i, data in enumerate(all_regression_data):
print(f"\n{'=' * 60}")
print(f"Regression Data #{i + 1}")
print("=" * 60)
print_regression_data(data)
# Split regression data into post-merge and pre-merge categories
post_merge_regressions = [
data for data in all_regression_data if data.get("b_is_post_merge", False)
]
pre_merge_regressions = [
data for data in all_regression_data if not data.get("b_is_post_merge", False)
]
if len(all_regression_data) == 0:
print("\n No regression data found. Perf check is successful.")
return 0
if len(pre_merge_regressions) > 0:
print(
f"\n Warning: Found {len(pre_merge_regressions)} pre-merge regression data. "
"But we don't fail the check temporarily."
)
if len(post_merge_regressions) > 0:
print(
f"\n Error: Found {len(post_merge_regressions)} post-merge regression data. Perf check is failed."
)
return 1
print("\n No post-merge regression data found. Perf check is successful.")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -37,13 +37,13 @@ from ..conftest import get_llm_root, llm_models_root
from .open_search_db_utils import (
SCENARIO_MATCH_FIELDS,
add_id,
check_perf_regression,
get_common_values,
get_history_data,
get_job_info,
post_new_perf_data,
prepare_baseline_data,
prepare_regressive_test_cases,
write_regressive_test_cases,
)
from .utils import collect_and_clean_myelin_time
@ -781,16 +781,17 @@ class DisaggTestCmds(NamedTuple):
return ["multi-node disaggregated server tests, please check config files"]
def parse_select_pattern(select_pattern: str) -> str:
"""Parse select pattern (server config name).
def parse_select_pattern(select_pattern: str) -> list:
"""Parse select pattern (server config names).
Args:
select_pattern: Server config name (e.g., "r1_fp8_dep8_mtp1_1k1k").
select_pattern: Server config names separated by comma
(e.g., "r1_fp4_v2_dep4_mtp1_1k1k,r1_fp4_v2_tep4_mtp3_1k1k,r1_fp4_v2_tp4_mtp3_1k1k").
Returns:
Server config name string.
List of server config name strings.
"""
return select_pattern
return [name.strip() for name in select_pattern.split(",")]
class PerfSanityTestConfig:
@ -873,11 +874,11 @@ class PerfSanityTestConfig:
def _parse_aggr_config_file(self, config_file_path: str):
"""Parse YAML config file for aggregated server."""
# Parse selection pattern (server config name)
# Parse selection pattern (server config names)
if self.select_pattern:
selected_server_name = parse_select_pattern(self.select_pattern)
selected_server_names = parse_select_pattern(self.select_pattern)
else:
selected_server_name = None
selected_server_names = None
with open(config_file_path, "r") as f:
config = yaml.safe_load(f)
@ -895,10 +896,10 @@ class PerfSanityTestConfig:
server_client_configs = {}
for server_idx, server_config_data in enumerate(config["server_configs"]):
# Check if this server should be included based on selected_server_name
# Check if this server should be included based on selected_server_names
if (
selected_server_name is not None
and server_config_data.get("name") != selected_server_name
selected_server_names is not None
and server_config_data.get("name") not in selected_server_names
):
continue
@ -1375,8 +1376,7 @@ class PerfSanityTestConfig:
# Upload the new perf data and baseline data to database
post_new_perf_data(new_baseline_data_dict, new_data_dict, regressive_data_list)
perf_result_output_dir = os.path.join(self._output_dir, self._test_param_labels)
write_regressive_test_cases(regressive_data_list, new_data_dict, perf_result_output_dir)
check_perf_regression(regressive_data_list, new_data_dict)
# Perf sanity test case parameters
@ -1444,10 +1444,13 @@ def get_disagg_test_cases() -> List[str]:
return test_cases
# Hardcoded multi-test test cases from test db.
MULTI_TEST_TEST_CASES = []
# Generate all test case combinations
# For aggr: {test_type}-{config_yml}, {test_type}-{config_yml}-{server_config_name}
# For disagg: {test_type}-{config_yml}
PERF_SANITY_TEST_CASES = get_aggr_test_cases() + get_disagg_test_cases()
PERF_SANITY_TEST_CASES = get_aggr_test_cases() + get_disagg_test_cases() + MULTI_TEST_TEST_CASES
@pytest.mark.parametrize("perf_sanity_test_case", PERF_SANITY_TEST_CASES)

View File

@ -14,20 +14,12 @@ l0_gb200_multi_gpus_perf_sanity:
stage: pre_merge
backend: pytorch
tests:
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_dep4_mtp1_1k1k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tep4_mtp3_1k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tp4_mtp3_1k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_dep4_mtp1_8k1k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tep4_mtp3_8k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tp4_mtp3_8k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_dep4_mtp1_1k8k] TIMEOUT (120)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tep4_mtp3_1k8k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tp4_mtp3_1k8k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_dep4_1k8k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_dep2_1k1k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_tep2_1k8k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_tp2_1k8k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_tp4_eagle3_1k1k] TIMEOUT (90)
- condition:
ranges:
system_gpu_count:
@ -45,14 +37,8 @@ l0_gb200_multi_gpus_perf_sanity:
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_dep4_mtp1_1k1k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tep4_mtp3_1k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tp4_mtp3_1k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_dep4_mtp1_8k1k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tep4_mtp3_8k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tp4_mtp3_8k1k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_dep4_mtp1_1k8k] TIMEOUT (120)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tep4_mtp3_1k8k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwell-r1_fp4_v2_tp4_mtp3_1k8k] TIMEOUT (60)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_dep4_1k8k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_dep2_1k1k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_tep2_1k8k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_tp2_1k8k] TIMEOUT (90)
- perf/test_perf_sanity.py::test_e2e[aggr_upload-gpt_oss_120b_fp4_grace_blackwell-gpt_oss_fp4_tp4_eagle3_1k1k] TIMEOUT (90)