[None][feat] support Lyris GB200 and increase disagg test timeout (#11019)

Signed-off-by: yingguo-trt <244492186+yingguo-trt@users.noreply.github.com>
Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
Co-authored-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
yingguo-trt 2026-01-27 17:26:15 +08:00 committed by Yanchao Lu
parent fd4e6132e5
commit d348dd95a7
3 changed files with 3 additions and 4 deletions

View File

@ -13,7 +13,7 @@ slurm:
script_file: disaggr_torch.slurm
partition: <partition>
account: <account>
job_time: 03:00:00
job_time: 04:00:00
job_name: unified-benchmark
extra_args: "--gres=gpu:4"
numa_bind: true

View File

@ -14,9 +14,9 @@ GPU_RESOURCE_CONFIG = {
},
# Lyris GB200
"GB200_LYRIS": {
"slurm_extra_args": "", # GB200 does not require extra args
"slurm_extra_args": "", # GB300 does not require extra args
"set_segment": True,
"lock_freq_graphics_mhz": None, # TODO: Set GB200 lock frequency
"lock_freq_graphics_mhz": None, # TODO: Set GB300 lock frequency
"lock_freq_memory_mhz": None,
},
# Lyris GB300

View File

@ -343,7 +343,6 @@ class ConfigLoader:
metadata = config_data.get("metadata", {})
model_name = metadata.get("model_name", "unknown")
supported_gpus = metadata.get("supported_gpus", ["GB200", "GB300", "H100", "B200", "B300"])
# Override config with environment variables (in memory only, do not write back)
config_data = self._apply_env_overrides(config_data, model_name)