TensorRT-LLMs/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
fredricz-20070104 ededeecb0f
[None][test] Add Kimi k2 WIDEEP perf and accuracy cases (#9686)
Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
Co-authored-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2025-12-08 01:25:07 -08:00

64 lines
1.9 KiB
Python

"""Simplified subprocess utilities for disagg module.
This module provides simple wrappers around subprocess for executing
SLURM commands (srun, sacct, scancel, sbatch).
No complex process tree cleanup is needed because:
1. SLURM commands (srun/sacct/scancel) are simple client tools that don't spawn complex process trees
2. Actual workloads run on remote cluster nodes managed by SLURM scheduler
3. SLURM automatically handles cleanup of remote jobs when the client disconnects
"""
import subprocess
from typing import Optional
from utils.logger import logger
def exec_cmd(*popenargs, timeout: Optional[float] = None, **kwargs) -> int:
"""Execute command and return exit code.
Args:
*popenargs: Command and arguments
timeout: Timeout in seconds
**kwargs: Additional subprocess arguments
Returns:
Exit code (0 for success, non-zero for failure)
Raises:
subprocess.TimeoutExpired: If timeout is reached
"""
result = subprocess.run(*popenargs, timeout=timeout, **kwargs)
return result.returncode
def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs) -> str:
"""Execute command and return output as string.
Args:
*popenargs: Command and arguments
timeout: Timeout in seconds
**kwargs: Additional subprocess arguments
Returns:
stdout as string (decoded from bytes)
Raises:
subprocess.CalledProcessError: If command returns non-zero exit code
subprocess.TimeoutExpired: If timeout is reached
"""
result = subprocess.run(
*popenargs,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=timeout,
check=True,
**kwargs,
)
# Log stderr if it exists
if result.stderr:
stderr_output = result.stderr.decode()
logger.error(f"Command stderr: {stderr_output}")
return result.stdout.decode()