diff --git a/tests/integration/defs/perf/disagg/execution/executor.py b/tests/integration/defs/perf/disagg/execution/executor.py index b5e2d9a83f..68ac6166c7 100644 --- a/tests/integration/defs/perf/disagg/execution/executor.py +++ b/tests/integration/defs/perf/disagg/execution/executor.py @@ -219,6 +219,7 @@ class JobManager: # Write temporary config file with replaced environment variables logger.info(f"Creating temporary config: {temp_config_path}") + with open(temp_config_path, "w") as f: yaml.dump( test_config.config_data, diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index ec09e225d2..f119102052 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -2,49 +2,42 @@ import os -# GPU resource configuration -# Centralized configuration for all GPU-specific parameters +# GPU resource configuration - centralized config for all GPU-specific parameters GPU_RESOURCE_CONFIG = { - # OCI GB200 - "GB200": { - "slurm_extra_args": "--gres=gpu:4", # SLURM extra arguments (empty string if not required) + "GB200": { # OCI GB200 + "slurm_extra_args": "--gres=gpu:4", "set_segment": True, - "lock_freq_graphics_mhz": 2062, # GPU graphics clock lock frequency (MHz) - "lock_freq_memory_mhz": 3996, # GPU memory clock lock frequency (MHz) + "lock_freq_graphics_mhz": 2062, + "lock_freq_memory_mhz": 3996, }, - # Lyris GB200 - "GB200_LYRIS": { - "slurm_extra_args": "", # GB300 does not require extra args + "GB200_LYRIS": { # Lyris GB200 + "slurm_extra_args": "", "set_segment": True, - "lock_freq_graphics_mhz": None, # TODO: Set GB300 lock frequency + "lock_freq_graphics_mhz": None, "lock_freq_memory_mhz": None, }, - # Lyris GB300 - "GB300": { - "slurm_extra_args": "", # GB300 does not require extra args + "GB300": { # Lyris GB300 + "slurm_extra_args": "", "set_segment": True, - "lock_freq_graphics_mhz": None, # TODO: Set GB300 lock frequency + "lock_freq_graphics_mhz": None, "lock_freq_memory_mhz": None, }, - # H100 "H100": { - "slurm_extra_args": "", # H100 does not require extra args + "slurm_extra_args": "", "set_segment": False, - "lock_freq_graphics_mhz": None, # TODO: Set H100 lock frequency + "lock_freq_graphics_mhz": None, "lock_freq_memory_mhz": None, }, - # B200 - "B200": { + "B200": { # OCI B200 "slurm_extra_args": "--gres=gpu:4", "set_segment": False, - "lock_freq_graphics_mhz": None, # TODO: Set B200 lock frequency + "lock_freq_graphics_mhz": None, "lock_freq_memory_mhz": None, }, - # B300 - "B300": { + "B300": { # OCI B300 "slurm_extra_args": "--gres=gpu:4", "set_segment": False, - "lock_freq_graphics_mhz": None, # TODO: Set B300 lock frequency + "lock_freq_graphics_mhz": None, "lock_freq_memory_mhz": None, }, } @@ -67,7 +60,18 @@ class EnvManager: @staticmethod def get_slurm_job_name() -> str: - return os.getenv("SLURM_JOB_NAME", "unified-benchmark") + """Get SLURM job name: {SLURM_ACCOUNT}-{base}. + + Example: myaccount-unified.benchmark + Customize base via SLURM_JOB_BASE_NAME env var (default: unified.benchmark) + """ + account = EnvManager.get_slurm_account() + base = os.getenv("SLURM_JOB_BASE_NAME", "unified.benchmark") + + # Only use account as prefix if it's set and not a placeholder + if account and not account.startswith("<"): + return f"{account}-{base}" + return base @staticmethod def get_slurm_set_segment() -> bool: