TensorRT-LLMs/tests/integration/defs/perf/disagg/execution/executor.py

"""Disaggregated Benchmark Executor."""

import os
import re
import shutil
import time
from typing import Any, Dict, List, Optional

import yaml
from reporting.report import LogParser, LogWriter, ResultSaver
from utils.common import (
    GPU_RESOURCE_CONFIG,
    SESSION_COLLECT_CMD_TYPE,
    EnvManager,
    extract_config_fields,
)
from utils.logger import logger

from execution.subprocess_utils import exec_cmd, exec_cmd_with_output

# ============================================================================
# SLURM Run Command Builder
# ============================================================================


class SlurmRunCommandBuilder:
    """SLURM Run Command Builder.

    Build srun commands for different GPU types and command types.
    Reuses GPU_RESOURCE_CONFIG for consistency with SlurmJobBuilder.
    """

    def build_srun_prefix(self, job_name: str) -> List[str]:
        """Build srun command prefix based on GPU type."""
        gpu_type = EnvManager.get_gpu_type()

        # Reuse the same GPU_RESOURCE_CONFIG as SlurmJobBuilder
        gpu_config = GPU_RESOURCE_CONFIG.get(gpu_type)
        if not gpu_config:
            raise ValueError(
                f"GPU resource configuration not found for {gpu_type}. "
                f"Please add configuration in GPU_RESOURCE_CONFIG."
            )

        # Common srun arguments
        srun_args = [
            "srun",
            "-l",
            "--container-name=sysinfo-get",
            f"--container-image={EnvManager.get_container_image()}",
            f"--container-mounts={EnvManager.get_container_mount()}",
        ]

        # Add GPU-specific gres parameter (reuse gres_gpu field)
        # If gres_gpu is not None, add --gres parameter
        if gpu_config["gres_gpu"] is not None:
            srun_args.append(f"--gres=gpu:{gpu_config['gres_gpu']}")

        # Add common parameters
        srun_args.extend(
            [
                f"--partition={EnvManager.get_slurm_partition()}",
                f"--account={EnvManager.get_slurm_account()}",
                f"--job-name={job_name}",
                "--time=02:00:00",
                "--mpi=pmix",
                # Note: Removed --overlap to ensure GPU allocation for session_collect
                # which runs after all test jobs have completed
                "-N",
                "1",
                "-n",
                "1",
            ]
        )

        return srun_args

    def build_script_command(self, cmd_type: str) -> List[str]:
        """Build script command based on command type."""
        work_dir = EnvManager.get_work_dir()
        output_path = EnvManager.get_output_path()
        install_mode = EnvManager.get_install_mode()
        repo_dir = EnvManager.get_repo_dir()
        trtllm_wheel_path = EnvManager.get_trtllm_wheel_path()

        if cmd_type == SESSION_COLLECT_CMD_TYPE:
            if install_mode == "none":
                return [
                    "bash",
                    "-c",
                    f"cd {work_dir} && python3 {work_dir}/simple_collect.py {output_path}",
                ]
            elif install_mode == "wheel":
                # Install TensorRT-LLM wheel first, then run simple_collect.py
                # Note: Use --no-deps to avoid overwriting container's pre-installed packages (like torch)
                install_cmd = f"""
                    cd {repo_dir}
                    echo 'Step 1: Installing TensorRT-LLM wheel...'
                    pip3 install {trtllm_wheel_path} || echo 'Wheel install failed, continuing...'
                    echo 'Wheel installation completed'

                    echo 'Step 2: Running simple_collect.py...'
                    cd {work_dir}
                    python3 {work_dir}/simple_collect.py {output_path}
                """
                return ["bash", "-c", install_cmd]
            elif install_mode == "source":
                install_cmd = f"""
                cd {repo_dir}
                pip3 install -e . || echo 'Source install failed, continuing...'

                echo 'Source installation completed'

                echo 'Step 3: Running simple_collect.py...'
                cd {work_dir}
                python3 {work_dir}/simple_collect.py {output_path}
                """
                return ["bash", "-c", install_cmd]
            else:
                raise ValueError(f"Invalid install mode: {install_mode}")
        else:
            # Future command types can be added here
            # elif cmd_type == "benchmark_collect":
            #     model_dir = EnvManager.get_model_dir()
            #     return [
            #         "bash", "-c",
            #         f"cd {work_dir} && python3 {work_dir}/benchmark_collect.py "
            #         f"--model-dir {model_dir} --output {output_path}"
            #     ]
            # elif cmd_type == "metrics_collect":
            #     return [
            #         "bash", "-c",
            #         f"cd {work_dir} && python3 {work_dir}/metrics_collect.py --config {work_dir}/config.yaml"
            #     ]
            raise ValueError(
                f"Unsupported command type: {cmd_type}. "
                f"Currently supported: {SESSION_COLLECT_CMD_TYPE}"
            )

    def run_job(self, cmd_type: str, job_name: str, log_file: str = None) -> Dict[str, Any]:
        """Execute srun job.

        Args:
            cmd_type: Type of command to execute
            job_name: Name for the SLURM job
            log_file: Optional path to save command output

        Returns:
            Dict with status and message
        """
        try:
            # Build complete command
            srun_prefix = self.build_srun_prefix(job_name)
            script_command = self.build_script_command(cmd_type)
            full_command = srun_prefix + script_command

            # Execute with optional log file
            if log_file:
                logger.info(f"Saving output to: {log_file}")
                # Use Python file redirection to avoid shell quoting issues
                import subprocess

                with open(log_file, "w") as f:
                    result = subprocess.run(
                        full_command, stdout=f, stderr=subprocess.STDOUT, timeout=7200, text=True
                    )
                    if result.returncode != 0:
                        raise subprocess.CalledProcessError(result.returncode, full_command)
                logger.success(f"Output saved to {log_file}")
                output = ""  # Output is in file
            else:
                output = exec_cmd_with_output(full_command, timeout=7200)

            return {"status": True, "msg": "Job executed successfully", "output": output}
        except Exception as e:
            logger.error(f"Job execution failed: {e}")
            return {"status": False, "msg": str(e)}


def make_slurm_run_command():
    """Create run command function (maintain interface compatibility)."""
    builder = SlurmRunCommandBuilder()
    return builder.run_job


class JobManager:
    """Job manager class."""

    @staticmethod
    def submit_job(test_config) -> tuple:
        """Submit job using submit.py with YAML config.

        Args:
            test_config: TestConfig object containing configuration

        Returns:
            tuple: (success: bool, job_id: str)
        """
        logger.info("Submitting job using submit.py...")

        try:
            import re

            # Get pre-calculated temporary config file path from test_config
            temp_config_path = test_config.temp_config_path

            # Write temporary config file with replaced environment variables
            logger.info(f"Creating temporary config: {temp_config_path}")
            with open(temp_config_path, "w") as f:
                yaml.dump(
                    test_config.config_data,
                    f,
                    default_flow_style=False,
                    sort_keys=False,
                    allow_unicode=True,
                    width=1000,
                )
            logger.success(f"Temporary config created: {os.path.basename(temp_config_path)}")

            # Call submit.py with the temporary config file
            submit_script = os.path.join(EnvManager.get_script_dir(), "submit.py")
            cmd = ["python3", submit_script, "-c", temp_config_path]

            logger.info(f"Command: {' '.join(cmd)}")

            # Execute submission
            output = exec_cmd_with_output(cmd, timeout=60)
            logger.info(f"Output: {output}")

            # Parse job ID from output
            if "Submitted batch job" in output:
                match = re.search(r"Submitted batch job (\d+)", output)
                if match:
                    job_id = match.group(1)
                    logger.success(f"Job submitted successfully: {job_id}")
                    return True, job_id

            logger.error("Unable to extract job ID from output")
            # Clean up temporary file if submission failed
            if os.path.exists(temp_config_path):
                os.remove(temp_config_path)
            return False, ""

        except Exception as e:
            error_msg = str(e)
            # Extract stderr from CalledProcessError if available
            if hasattr(e, "stderr") and e.stderr:
                error_msg = e.stderr.decode() if isinstance(e.stderr, bytes) else e.stderr
            logger.error(f"Job submission exception: {error_msg}")
            # Clean up temporary file on exception
            temp_config_path = test_config.temp_config_path
            # if os.path.exists(temp_config_path):
            #     os.remove(temp_config_path)
            return False, error_msg

    @staticmethod
    def backup_logs(
        job_id: str,
        test_config,
        result_dir: str,
        is_passed: bool,
    ) -> Optional[str]:
        """Backup logs and config files to test_id directory.

        Args:
            job_id: SLURM job ID
            test_config: TestConfig object
            result_dir: Result directory path
            is_passed: Whether the job passed
        Returns:
            backup_dir path if successful, None otherwise
        """
        if not os.path.exists(result_dir):
            logger.warning(f"Result directory does not exist yet: {result_dir}")
            return None

        # Replace colons with hyphens for safe directory naming
        dst_dir_name = test_config.test_id.replace(":", "-")
        # Add ERROR suffix if the job failed
        if not is_passed:
            dst_dir_name = f"{dst_dir_name}_ERROR"
        backup_dir = os.path.join(os.path.dirname(result_dir), dst_dir_name)

        try:
            logger.info("Copying result directory to backup...")
            logger.info(f"Source: {result_dir}")
            logger.info(f"Destination: {backup_dir}")

            # Remove old backup if it exists
            if os.path.exists(backup_dir):
                logger.warning("Backup directory already exists, removing old backup")
                shutil.rmtree(backup_dir)

            # Copy result directory
            shutil.copytree(result_dir, backup_dir)
            logger.success(f"Backup created successfully: {backup_dir}")

            # Move temporary config file to backup directory (not copy)
            temp_config_path = test_config.temp_config_path
            if os.path.exists(temp_config_path):
                dest_path = os.path.join(backup_dir, os.path.basename(temp_config_path))
                shutil.move(temp_config_path, dest_path)
                logger.success(f"Temporary config moved to backup: {dest_path}")
            else:
                # Fallback: copy original config if no temp file (backward compatibility)
                case_config_path = test_config.config_path
                if os.path.exists(case_config_path):
                    shutil.copy(case_config_path, backup_dir)
                    logger.success(f"Case config copied successfully: {case_config_path}")
                else:
                    logger.warning(f"Case config not found: {case_config_path}")

            return backup_dir

        except Exception as e:
            logger.warning(f"Failed to create backup copy: {e}")
            # Try to clean up temporary file on backup failure
            temp_config_path = test_config.temp_config_path
            if os.path.exists(temp_config_path):
                try:
                    os.remove(temp_config_path)
                    logger.info(f"Cleaned up temp config after backup failure: {temp_config_path}")
                except Exception as cleanup_error:
                    logger.warning(f"Failed to cleanup temp config: {cleanup_error}")
            return None

    @staticmethod
    def cleanup_result_dir(result_dir: str) -> bool:
        """Clean up result directory.

        Args:
            result_dir: Result directory path

        Returns:
            True if successful, False otherwise
        """
        if os.path.exists(result_dir):
            try:
                shutil.rmtree(result_dir)
                logger.success(f"Result directory removed: {result_dir}")
                return True
            except Exception as e:
                logger.warning(f"Failed to remove result directory: {e}")
                return False
        return True

    @staticmethod
    def get_result_dir(test_config) -> str:
        """Get result directory.

        Args:
            test_config: TestConfig object

        Returns:
            Result directory path
        """
        config_data = test_config.config_data
        fields = extract_config_fields(config_data)

        # Extract fields for logging and result directory
        log_base = fields["log_base"]
        context_dir = fields["context_dir"]
        log_dir_name = log_base

        result_dir = os.path.join(EnvManager.get_script_dir(), log_dir_name, context_dir)
        return result_dir

    @staticmethod
    def check_result(
        job_id: str,
        test_config,
        timestamps: Optional[Dict[str, str]] = None,
        test_name: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Check job execution result and generate report.

        High-level method that automatically extracts parameters from TestConfig,
        parses logs, generates performance reports, and saves results to CSV.

        Note: backup_logs should be called separately by the caller (test_disagg.py).

        Args:
            job_id: SLURM job ID
            test_config: TestConfig object containing configuration
            timestamps: Optional timestamps dict for the test case
            test_name: Optional test name for reporting

        Returns:
            Dict with 'success' status and other result information
        """
        config_data = test_config.config_data
        # Get result directory
        result_dir = JobManager.get_result_dir(test_config)
        logger.info(f"Result directory: {result_dir}")

        # Initialize default result in case of exception
        check_result = {"job_id": job_id, "status": "ERROR", "success": False}

        try:
            # Call the internal implementation method
            check_result = JobManager._check_job_result(
                job_id=job_id,
                test_category=test_config.test_category,  # Pass test category for routing
                benchmark_type=test_config.benchmark_type,
                config=config_data,
                metrics_config=test_config.metrics_config,
                accuracy_config=test_config.accuracy_config,  # Pass accuracy config
                model_name=test_config.model_name,
                result_dir=result_dir,
                timestamps=timestamps,
                test_name=test_name,
            )
        except Exception as e:
            logger.error(f"Exception during result checking: {e}")
            check_result["error"] = f"Exception during result checking: {str(e)}"

        # Clean up result directory
        if EnvManager.get_debug_mode():
            logger.debug(f"Debug mode: Skipping result directory cleanup: {result_dir}")
        else:
            try:
                JobManager.cleanup_result_dir(result_dir)
            except Exception as e:
                logger.warning(f"Failed to cleanup result directory: {e}")

        return check_result

    @staticmethod
    def check_for_early_failure(job_id: str, test_config) -> tuple[bool, Optional[str]]:
        """Check logs for early failure indicators.

        Args:
            job_id: SLURM job ID
            test_config: TestConfig object

        Returns:
            tuple: (has_error, error_message)
        """
        # Key error patterns
        error_patterns = [
            (
                r"\[E\]\s+Traceback[\s\S]*?mpi4py\.MPI\.Comm\.allgather",
                "MPI communication error detected",
            ),
            (
                r"\[E\]\s+Traceback[\s\S]*?pickle data was truncated",
                "Pickle serialization error detected",
            ),
        ]

        try:
            # Check gen and ctx server logs if result_dir exists
            try:
                result_dir = JobManager.get_result_dir(test_config)
                if os.path.exists(result_dir):
                    # Find all output_gen_*.log and output_ctx_*.log files
                    for filename in os.listdir(result_dir):
                        is_gen_log = filename.startswith("output_gen_")
                        is_ctx_log = filename.startswith("output_ctx_")
                        if (is_gen_log or is_ctx_log) and filename.endswith(".log"):
                            log_path = os.path.join(result_dir, filename)
                            try:
                                with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
                                    # Read last 100KB
                                    f.seek(0, 2)
                                    file_size = f.tell()
                                    f.seek(max(0, file_size - 102400), 0)
                                    recent_content = f.read()

                                    for pattern, error_msg in error_patterns:
                                        if re.search(pattern, recent_content, re.MULTILINE):
                                            return True, f"{error_msg} in {filename}"
                            except Exception as e:
                                logger.warning(f"Failed to check {filename}: {e}")
            except Exception:
                # result_dir might not exist yet, that's OK
                pass

        except Exception as e:
            logger.warning(f"Error during early failure check: {e}")

        return False, None

    @staticmethod
    def check_job_exists(job_id: str) -> bool:
        """Check if job still exists in SLURM queue.

        Returns:
            True if job exists (running or pending), False if job is gone
        """
        try:
            # Use squeue to check if job exists in queue
            squeue_output = exec_cmd_with_output(["squeue", "-j", job_id, "--noheader"], timeout=30)
            # If output is not empty, job exists
            return bool(squeue_output.strip())
        except Exception as e:
            # If command fails, assume job doesn't exist
            logger.debug(f"squeue check failed (job likely finished): {e}")
            return False

    @staticmethod
    def wait_for_completion(
        job_id: str, timeout: int = 3600, test_config=None, check_early_failure: bool = True
    ) -> None:
        """Wait for job to finish (disappear from queue).

        Simplified logic: Just wait until job no longer exists in SLURM queue,
        regardless of final status (COMPLETED, CANCELLED, FAILED, etc).
        If timeout or early failure detected, cancel the job.
        The actual success/failure will be determined by log file parsing.

        Args:
            job_id: SLURM job ID
            timeout: Maximum wait time in seconds
            test_config: TestConfig object (required for early failure detection)
            check_early_failure: Whether to check logs for early failures
        """
        start_time = time.time()
        check_interval = 180  # Check every 3 minutes
        failure_check_interval = 60  # Check for failures every 60 seconds
        last_failure_check = start_time

        # Wait for job to appear in system (initial delay)
        logger.info(f"Waiting for job {job_id} to start...")
        time.sleep(60)  # Initial wait for job to be scheduled

        logger.info(f"Waiting for job {job_id} to finish...")

        while time.time() - start_time < timeout:
            # Simple check: does job still exist?
            job_exists = JobManager.check_job_exists(job_id)

            if not job_exists:
                # Job has disappeared from queue - it's done (whatever the status was)
                logger.success(f"Job {job_id} finished (no longer in queue)")
                return

            # Check for early failures (only if test_config is provided)
            current_time = time.time()
            if (
                check_early_failure
                and test_config
                and current_time - last_failure_check >= failure_check_interval
            ):
                has_error, error_msg = JobManager.check_for_early_failure(job_id, test_config)
                if has_error:
                    logger.error(f"Early failure detected: {error_msg}")
                    logger.warning(f"Cancelling job {job_id} due to early failure")
                    JobManager.cancel_job(job_id)
                    # Wait a bit for job to be cancelled, then return
                    time.sleep(10)
                    return
                last_failure_check = current_time

            time.sleep(check_interval)

        # Timeout - cancel the job
        logger.warning(f"Job {job_id} timeout after {timeout} seconds, cancelling...")
        JobManager.cancel_job(job_id)
        # Wait a bit for job to be cancelled
        time.sleep(10)

    @staticmethod
    def cancel_job(job_id: str) -> bool:
        """Cancel job."""
        try:
            exec_cmd(["scancel", job_id], timeout=30)
            logger.warning(f"Job cancelled: {job_id}")
            return True
        except Exception as e:
            logger.error(f"Job cancellation failed: {e}")
            return False

    @staticmethod
    def _print_logs_to_console(job_id: str, result_dir: str) -> None:
        """Print SLURM log and all .log/.yaml files in result_dir to console.

        Args:
            job_id: SLURM job ID for finding the slurm log file
            result_dir: Result directory containing log and config files
        """
        # Print the slurm log to console (check if exists first)
        slurm_log_path = os.path.join(EnvManager.get_work_dir(), f"slurm-{job_id}.out")
        if os.path.exists(slurm_log_path):
            slurm_log_writer = LogWriter(EnvManager.get_work_dir())
            slurm_log_writer.print_to_console(f"slurm-{job_id}.out")
        else:
            logger.warning(f"SLURM log file not found: {slurm_log_path}")

        # Print all .log and .yaml files in result_dir (except output_server.log)
        if not os.path.exists(result_dir):
            logger.warning(f"Result directory not found: {result_dir}")
            return

        log_writer = LogWriter(result_dir)
        files_to_print = []
        for file in os.listdir(result_dir):
            if (file.endswith(".log") or file.endswith(".yaml")) and file != "output_server.log":
                files_to_print.append(file)

        # Sort files for consistent output order
        files_to_print.sort()

        for file in files_to_print:
            file_path = os.path.join(result_dir, file)
            if os.path.exists(file_path):
                log_writer.print_to_console(file)
            else:
                logger.warning(f"Log file not found: {file}")

    @staticmethod
    def _check_accuracy_result(
        job_id: str,
        metrics_config,
        accuracy_config,
        result_dir: str,
    ) -> Dict[str, Any]:
        """Check accuracy test result.

        Args:
            job_id: SLURM job ID
            metrics_config: MetricsConfig object
            accuracy_config: AccuracyConfig object
            result_dir: Result directory

        Returns:
            Dict with success status and accuracy details
        """
        # Initialize base result
        result: Dict[str, Any] = {"job_id": job_id, "status": "UNKNOWN", "success": False}

        # Validate accuracy_config
        if not accuracy_config:
            result["error"] = "Accuracy config not found in test configuration"
            return result

        # Check if result_dir exists
        if not os.path.exists(result_dir):
            error_msg = f"Result directory not found: {result_dir}"
            logger.error(error_msg)
            result["error"] = error_msg
            return result

        # Check if required log file exists (7_accuracy_eval.log)
        accuracy_log = os.path.join(result_dir, "7_accuracy_eval.log")
        if not os.path.exists(accuracy_log):
            error_msg = f"Accuracy evaluation log file not found: {accuracy_log}"
            logger.error(error_msg)
            result["error"] = error_msg
            return result

        # Import and use AccuracyParser
        from reporting.accuracy_parser import AccuracyParser

        accuracy_parser = AccuracyParser(metrics_config, accuracy_config, result_dir)
        validation_result = accuracy_parser.parse_and_validate()

        # Check if parsing succeeded
        if not validation_result["success"]:
            result["error"] = validation_result.get("error", "Accuracy validation failed")
            return result

        # Log validation results
        logger.info("Accuracy Validation Results:")
        all_passed = validation_result["all_passed"]

        # Log results for each run (using dataclass attributes for type safety)
        for run_validation in validation_result.get("runs", []):
            run_name = run_validation.run_name
            run_passed = run_validation.all_passed
            status = "PASSED" if run_passed else "FAILED"

            logger.info(f"[{status}] {run_name}:")

            for ds_result in run_validation.results:
                status = "PASSED" if ds_result.passed else "FAILED"
                dataset_name = ds_result.dataset
                filter_type = ds_result.filter
                threshold_type = ds_result.threshold_type

                logger.info(f"   [{status}] {dataset_name} ({filter_type}) - {threshold_type}:")
                if ds_result.error:
                    logger.error(f"      Error: {ds_result.error}")
                else:
                    logger.info(f"      Expected: {ds_result.expected:.4f}")
                    logger.info(f"      Actual:   {ds_result.actual:.4f}")
                    logger.info(f"      Threshold type:  {ds_result.threshold_type}")
                    logger.info(f"      {ds_result.message}")

        # Set result status
        if all_passed:
            logger.success("All accuracy tests PASSED (all runs)")
            result["success"] = True
            result["status"] = "PASSED"
        else:
            logger.failure("Some accuracy tests FAILED")
            result["success"] = False
            result["status"] = "FAILED"
            result["error"] = "Some accuracy tests FAILED"

        # Add detailed results
        result["all_passed"] = validation_result["all_passed"]
        result["accuracy_runs"] = validation_result["runs"]
        result["raw_accuracy"] = validation_result.get("raw_results", [])

        return result

    @staticmethod
    def _check_perf_result(
        job_id: str,
        benchmark_type: str,
        config: dict,
        metrics_config,
        model_name: str,
        result_dir: str,
        timestamps: Optional[Dict[str, str]] = None,
        test_name: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Check performance test result.

        Args:
            job_id: SLURM job ID
            benchmark_type: Benchmark type (1k1k, 8k1k, etc.)
            config: Configuration dict (YAML data)
            metrics_config: MetricsConfig object
            model_name: Model name
            result_dir: Result directory
            timestamps: Optional timestamps dict
            test_name: Optional test name

        Returns:
            Dict with success status and performance details
        """
        result = {"job_id": job_id, "status": "UNKNOWN", "success": False}

        # Check if result_dir exists
        if not os.path.exists(result_dir):
            error_msg = f"Result directory not found: {result_dir}"
            logger.error(error_msg)
            result["error"] = error_msg
            return result

        # Check if required log file exists (6_bench.log)
        bench_log = os.path.join(result_dir, "6_bench.log")
        if not os.path.exists(bench_log):
            error_msg = f"Benchmark log file not found: {bench_log}"
            logger.error(error_msg)
            result["error"] = error_msg
            return result

        # Parse metrics and save to CSV
        log_parser = LogParser(benchmark_type, config, metrics_config, result_dir)
        parse_result = log_parser.parse(model_name, timestamps=timestamps, test_name=test_name)

        if not parse_result["status"]:
            result["error"] = "Failed to parse benchmark logs"
            return result

        # Check if df is None
        result_df = parse_result.get("df")
        if result_df is None:
            logger.error("Parse result contains None DataFrame")
            result["error"] = "Parse result contains None DataFrame"
            return result

        # Save results to CSV
        output_path = EnvManager.get_output_path()
        os.makedirs(output_path, exist_ok=True)

        output_csv = os.path.join(output_path, "perf_script_test_results.csv")
        result_saver = ResultSaver(output_csv)
        result_saver.append_a_df(result_df)

        result["success"] = True
        result["status"] = "SUCCESS"
        return result

    @staticmethod
    def _check_job_result(
        job_id: str,
        test_category: str,
        benchmark_type: str,
        config: dict,
        metrics_config,
        accuracy_config,
        model_name: str,
        result_dir: str,
        timestamps: Optional[Dict[str, str]] = None,
        test_name: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Internal method: Check job result with category routing.

        This is a low-level method that requires manual parameter extraction.
        Use check_result() for a high-level interface with TestConfig.

        Args:
            job_id: SLURM job ID
            test_category: Test category ("perf" or "accuracy")
            benchmark_type: Benchmark type (1k1k, 8k1k, etc.)
            config: Configuration dict (YAML data)
            metrics_config: MetricsConfig object (default or custom)
            accuracy_config: AccuracyConfig object (required for accuracy tests)
            model_name: Model name
            result_dir: Result directory
            timestamps: Optional timestamps dict
            test_name: Optional test name

        Returns:
            Dict with success status and details
        """
        logger.info(f"Checking result directory: {result_dir}")

        # Print logs and config files to console
        JobManager._print_logs_to_console(job_id, result_dir)

        # Route based on test_category
        if test_category == "accuracy":
            return JobManager._check_accuracy_result(
                job_id=job_id,
                metrics_config=metrics_config,
                accuracy_config=accuracy_config,
                result_dir=result_dir,
            )
        else:  # perf
            return JobManager._check_perf_result(
                job_id=job_id,
                benchmark_type=benchmark_type,
                config=config,
                metrics_config=metrics_config,
                model_name=model_name,
                result_dir=result_dir,
                timestamps=timestamps,
                test_name=test_name,
            )


# create executor function
run_job = make_slurm_run_command()