From d348dd95a71194ace2fde7ab7d99b614ba8b8346 Mon Sep 17 00:00:00 2001 From: yingguo-trt <244492186+yingguo-trt@users.noreply.github.com> Date: Tue, 27 Jan 2026 17:26:15 +0800 Subject: [PATCH] [None][feat] support Lyris GB200 and increase disagg test timeout (#11019) Signed-off-by: yingguo-trt <244492186+yingguo-trt@users.noreply.github.com> Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Co-authored-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- ...2-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml | 2 +- tests/integration/defs/perf/disagg/utils/common.py | 4 ++-- tests/integration/defs/perf/disagg/utils/config_loader.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index 58ae5032d8..0cbeb62161 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -13,7 +13,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 03:00:00 + job_time: 04:00:00 job_name: unified-benchmark extra_args: "--gres=gpu:4" numa_bind: true diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index fbd28aa534..ec09e225d2 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -14,9 +14,9 @@ GPU_RESOURCE_CONFIG = { }, # Lyris GB200 "GB200_LYRIS": { - "slurm_extra_args": "", # GB200 does not require extra args + "slurm_extra_args": "", # GB300 does not require extra args "set_segment": True, - "lock_freq_graphics_mhz": None, # TODO: Set GB200 lock frequency + "lock_freq_graphics_mhz": None, # TODO: Set GB300 lock frequency "lock_freq_memory_mhz": None, }, # Lyris GB300 diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py index 96cfc5f2be..b9cd15b601 100644 --- a/tests/integration/defs/perf/disagg/utils/config_loader.py +++ b/tests/integration/defs/perf/disagg/utils/config_loader.py @@ -343,7 +343,6 @@ class ConfigLoader: metadata = config_data.get("metadata", {}) model_name = metadata.get("model_name", "unknown") supported_gpus = metadata.get("supported_gpus", ["GB200", "GB300", "H100", "B200", "B300"]) - # Override config with environment variables (in memory only, do not write back) config_data = self._apply_env_overrides(config_data, model_name)