diff --git a/tests/integration/defs/perf/disagg/cleanup_jobs.sh b/tests/integration/defs/perf/disagg/cleanup_jobs.sh index 61ad4b160e..d2b21becbd 100644 --- a/tests/integration/defs/perf/disagg/cleanup_jobs.sh +++ b/tests/integration/defs/perf/disagg/cleanup_jobs.sh @@ -23,14 +23,19 @@ echo "==========================================" echo "Output path: $OUTPUT_PATH" echo "" -# Show pytest PID if available (for debugging) +# Terminate pytest process if still running if [ -f "$PID_FILE" ]; then PYTEST_PID=$(cat "$PID_FILE" | tr -d '\n') echo "Pytest PID: $PYTEST_PID" - # Check if pytest is still running + # Check if pytest is still running and kill it if kill -0 "$PYTEST_PID" 2>/dev/null; then - echo "Status: Still running" + echo "Status: Still running - terminating..." + if kill -9 "$PYTEST_PID" 2>/dev/null; then + echo " [OK] Process killed" + else + echo " [WARN] Failed to kill process (may already be gone)" + fi else echo "Status: Already terminated" fi diff --git a/tests/integration/defs/perf/disagg/execution/executor.py b/tests/integration/defs/perf/disagg/execution/executor.py index 8dec350f77..b5e2d9a83f 100644 --- a/tests/integration/defs/perf/disagg/execution/executor.py +++ b/tests/integration/defs/perf/disagg/execution/executor.py @@ -620,15 +620,8 @@ class JobManager: result["error"] = error_msg return result - # Check if required log file exists (7_accuracy_eval.log) - accuracy_log = os.path.join(result_dir, "7_accuracy_eval.log") - if not os.path.exists(accuracy_log): - error_msg = f"Accuracy evaluation log file not found: {accuracy_log}" - logger.error(error_msg) - result["error"] = error_msg - return result - # Import and use AccuracyParser + # Note: AccuracyParser handles log file checking with glob pattern support from reporting.accuracy_parser import AccuracyParser accuracy_parser = AccuracyParser(metrics_config, accuracy_config, result_dir) diff --git a/tests/integration/defs/perf/disagg/reporting/accuracy_parser.py b/tests/integration/defs/perf/disagg/reporting/accuracy_parser.py index 1e8e28f709..7704ae8ef0 100644 --- a/tests/integration/defs/perf/disagg/reporting/accuracy_parser.py +++ b/tests/integration/defs/perf/disagg/reporting/accuracy_parser.py @@ -1,5 +1,6 @@ """Accuracy test result parser.""" +import glob import os import re from typing import Dict, List @@ -28,40 +29,77 @@ class AccuracyParser: self.result_dir = result_dir def parse_and_validate(self) -> AccuracyValidationResult: - """Parse accuracy_eval.log and validate all configured datasets for all runs. + """Parse accuracy_eval.log(s) and validate all configured datasets for all runs. Supports multiple runs (e.g., pre-benchmark and post-benchmark). + Supports wildcard patterns in log_file (e.g., "7_accuracy_eval_*.log"). All runs must pass for the validation to succeed. Returns: AccuracyValidationResult with validation results for all runs """ - log_file = os.path.join(self.result_dir, self.metrics_config.log_file) + log_pattern = self.metrics_config.log_file - if not os.path.exists(log_file): + # Check if pattern contains wildcards + if "*" in log_pattern or "?" in log_pattern or "[" in log_pattern: + # Use glob to match multiple files + log_files = sorted(glob.glob(os.path.join(self.result_dir, log_pattern))) + + if not log_files: + return { + "success": False, + "all_passed": False, + "runs": [], + "raw_results": [], + "error": f"No log files found matching pattern: {log_pattern}", + } + + logger.info(f"Found {len(log_files)} log file(s) matching pattern '{log_pattern}'") + else: + # Single file (backward compatible) + log_file = os.path.join(self.result_dir, log_pattern) + + if not os.path.exists(log_file): + return { + "success": False, + "all_passed": False, + "runs": [], + "raw_results": [], + "error": f"Log file not found: {log_file}", + } + + log_files = [log_file] + + # Read and merge all log files + combined_log_content = "" + failed_files = [] + + for log_file in log_files: + try: + with open(log_file, "r", encoding="utf-8", errors="replace") as f: + content = f.read() + combined_log_content += content + if not content.endswith("\n"): + combined_log_content += "\n" # Ensure separation between files + logger.info(f"Successfully read log file: {os.path.basename(log_file)}") + except Exception as e: + failed_files.append((log_file, str(e))) + logger.warning(f"Failed to read {log_file}: {e}") + + if not combined_log_content: + error_msg = "No valid log content found." + if failed_files: + error_msg += f" Failed files: {failed_files}" return { "success": False, "all_passed": False, "runs": [], "raw_results": [], - "error": f"Log file not found: {log_file}", - } - - # Read log file - try: - with open(log_file, "r", encoding="utf-8", errors="replace") as f: - log_content = f.read() - except Exception as e: - return { - "success": False, - "all_passed": False, - "runs": [], - "raw_results": [], - "error": f"Failed to read log file: {e}", + "error": error_msg, } # Extract accuracy values for all runs - all_runs_results = self._extract_accuracy_values(log_content) + all_runs_results = self._extract_accuracy_values(combined_log_content) if not all_runs_results: return { diff --git a/tests/integration/defs/perf/disagg/reporting/accuracy_validator.py b/tests/integration/defs/perf/disagg/reporting/accuracy_validator.py index 14e0a1cdfb..1afe7ab959 100644 --- a/tests/integration/defs/perf/disagg/reporting/accuracy_validator.py +++ b/tests/integration/defs/perf/disagg/reporting/accuracy_validator.py @@ -135,7 +135,7 @@ DATASET_DEFAULTS = { "higher_is_better": True, }, # Alias for gpqa_diamond (same task, different naming convention) - "gpqa_diamond_cot_zeroshot": { + "gpqa_diamond_local": { "alpha": 0.05, "beta": 0.2, "sigma": 50, diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-NIXL.yaml index 13dee8cb7a..2f94667a53 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-UCX.yaml index 121c5c7dc6..fba25771bb 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb0_mtp3_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml index e7a0ed5620..213c6ced5f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml index bdf4ee44ab..d7279d3b21 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index 2aa6487c95..1ff8177d23 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 5ad6248b5a..0437408d43 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-NIXL.yaml index d58e41d4ce..eb9fd647b7 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-UCX.yaml index 8bb52a5e03..e468e51c7e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp1_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index 3453903ce4..7f4c66a24f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 4 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 06fb9200a8..b6e6e3d826 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/Qwen3-235B-A22B-FP8_1k1k_ctx1_gen1_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 4 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml index ba00067777..007c4b3325 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml index 6258e00ba3..d85ff79d08 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep8_bs768_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep8_bs768_eplb0_mtp0_ccb-UCX.yaml index 97ce35a0a7..be9dc6556d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep8_bs768_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep8_bs768_eplb0_mtp0_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index 95c87e6b5e..9f8ec3a9f9 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index e1036437b0..6260fc5b8c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml index 1b9577e9ee..e3b9d07ff4 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml index e483bedba9..b483ce1c8c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx1_gen4_tep8_bs32_eplb0_mtp3_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml index 5848d8e0f5..7bf0861937 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml index 5be2578220..9e6eda5459 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb0_mtp3_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen1_dep32_bs128_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen1_dep32_bs128_eplb0_mtp3_ccb-UCX.yaml index b6299357d2..c8f368acfc 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen1_dep32_bs128_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen1_dep32_bs128_eplb0_mtp3_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml index 3c64d97e26..6ff5914009 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml index 320e37c4ef..7667c7903a 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml index c4ec240071..8a0bc5ca82 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml index 76ea2c7e67..3328a559c3 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx1_gen3_tep8_bs32_eplb0_mtp0_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 8 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml index 20ab0f61af..0b37895f1e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml index cfa082f78e..856de14f2f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb0_mtp0_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml index e8dc26447f..690afbff78 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-NIXL.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml index aacad0ebc3..115af8642d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb0_mtp3_ccb-UCX.yaml @@ -42,9 +42,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/disagg/stress/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/disagg/stress/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml index 9d5ca29c12..358d50fcaf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/disagg/stress/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/disagg/stress/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml @@ -50,9 +50,14 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: true - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 + env_var: + HF_HOME: + tasks: + gsm8k: + model: "local-completions" + model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=7200,max_gen_toks=16384" + extra_kwargs: + trust_remote_code: true worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 914924bfd2..71438a796e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -47,9 +47,14 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: true - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 + env_var: + HF_HOME: + tasks: + gsm8k: + model: "local-completions" + model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=7200,max_gen_toks=16384" + extra_kwargs: + trust_remote_code: true worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml index c237dc5f0f..0d6c3b5d77 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/accuracy/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml @@ -10,7 +10,7 @@ metadata: dataset_file: disagg_datasets/kimi-k2-1024-1024-20000-ratio-1_for_serve.json accuracy: datasets: - - dataset_name: gpqa_diamond_cot_zeroshot + - dataset_name: gpqa_diamond_local expected_value: 0.65 threshold_type: hypothesis_test filter_type: strict-match @@ -29,7 +29,7 @@ benchmark: multi_round: 8 benchmark_ratio: 1.0 streaming: true - concurrency_list: '16384' + concurrency_list: '8192' input_length: 1024 output_length: 1024 dataset_file: diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml index 3c33b288e5..82ba1fc92c 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml index 0a6135f34a..431258ab8e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 75accd8631..a1cb7b2a24 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index 2df13f962f..7711e130ca 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml index 3c0b8d2e7a..873de8b2df 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml index 2dd7fd80b2..845b694cbb 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index fedb8825b2..93f3662b9f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml index 5766454980..10692cc270 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 4d4f8cb7db..55a292499a 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml index fc12422943..5c022fa295 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml index d2f81b865e..ee04a0268d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml @@ -45,9 +45,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml index a0d6836d55..00c518c864 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 60b022a21d..e6203c75c7 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml index f4cfcda4e6..ff4c5276bf 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index dafc6a7df7..c9bc7351f8 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml index bb54d661a5..4185f89449 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml @@ -43,9 +43,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml index 58ef6a5bed..58ae5032d8 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml @@ -13,7 +13,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 02:00:00 + job_time: 03:00:00 job_name: unified-benchmark extra_args: "--gres=gpu:4" numa_bind: true @@ -44,9 +44,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false # Set to true to enable accuracy evaluation - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml index 9da7bfd08b..c3a4bee671 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml @@ -13,7 +13,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 02:00:00 + job_time: 04:00:00 job_name: unified-benchmark extra_args: "--gres=gpu:4" numa_bind: true @@ -44,9 +44,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml index 21ab0d765e..f3dcda394e 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep48_bs16_eplb288_mtp3_ccb-DEFAULT.yaml @@ -14,7 +14,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 02:00:00 + job_time: 04:00:00 job_name: unified-benchmark extra_args: "--gres=gpu:4" numa_bind: true @@ -45,9 +45,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 48 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml index 5697528941..8752c59e1f 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx2_gen1_dep32_bs128_eplb288_mtp3_ccb-DEFAULT.yaml @@ -13,7 +13,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 02:00:00 + job_time: 04:00:00 job_name: disaggr-test extra_args: "--gres=gpu:4" numa_bind: true @@ -44,9 +44,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml index 7219692295..edec6340a7 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml @@ -13,7 +13,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 02:00:00 + job_time: 04:00:00 job_name: unified-benchmark extra_args: "--gres=gpu:4" numa_bind: true @@ -44,9 +44,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 16 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml index 6bfa477ad7..05dc30cf54 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml @@ -13,7 +13,7 @@ slurm: script_file: disaggr_torch.slurm partition: account: - job_time: 02:00:00 + job_time: 04:00:00 job_name: unified-benchmark extra_args: "--gres=gpu:4" numa_bind: true @@ -44,9 +44,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: tensor_parallel_size: 32 diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml index dbba4ca355..44a756fc8d 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_1k1k_ctx3_gen1_dep32_bs1024_eplb384_mtp0_ccb-NIXL.yaml @@ -41,9 +41,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-NIXL.yaml index 6178b8e929..2584fd7908 100644 --- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-NIXL.yaml +++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/kimi-k2-thinking-fp4_8k1k_ctx8_gen1_dep32_bs256_eplb416_mtp0_ccb-NIXL.yaml @@ -41,9 +41,6 @@ profiling: nsys_on: false accuracy: enable_accuracy_test: false - model: local-completions - tasks: gsm8k - model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096 worker_config: gen: enable_layerwise_nvtx_marker: true diff --git a/tests/integration/defs/perf/disagg/test_disagg.py b/tests/integration/defs/perf/disagg/test_disagg.py index 36e786949d..9e724c56de 100644 --- a/tests/integration/defs/perf/disagg/test_disagg.py +++ b/tests/integration/defs/perf/disagg/test_disagg.py @@ -112,8 +112,8 @@ class TestDisaggBenchmark: ) assert job_id, f"Failed to submit job for {test_config.test_id}\n{error_msg}" - # Wait for completion (timeout: 10 hours = 36000 seconds) - JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True) + # Wait for completion (timeout: 15 hours = 54000 seconds) + JobManager.wait_for_completion(job_id, 54000, test_config, check_early_failure=True) # End tracking test case test_tracker.end_test_case() @@ -188,8 +188,8 @@ class TestDisaggBenchmark: # Validate submission result assert job_id, f"Failed to get job_id for {test_config.test_id}" - # Wait for completion (timeout: 10 hours = 36000 seconds) - JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True) + # Wait for completion (timeout: 15 hours = 54000 seconds) + JobManager.wait_for_completion(job_id, 54000, test_config, check_early_failure=True) # End tracking test case test_tracker.end_test_case() @@ -272,8 +272,8 @@ class TestDisaggBenchmark: ) assert job_id, f"Failed to submit job for {test_config.test_id}\n{error_msg}" - # Wait for completion (timeout: 10 hours = 36000 seconds) - JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True) + # Wait for completion (timeout: 15 hours = 54000 seconds) + JobManager.wait_for_completion(job_id, 54000, test_config, check_early_failure=True) # End tracking test case test_tracker.end_test_case() diff --git a/tests/integration/defs/perf/disagg/utils/common.py b/tests/integration/defs/perf/disagg/utils/common.py index b4aecc116d..fbd28aa534 100644 --- a/tests/integration/defs/perf/disagg/utils/common.py +++ b/tests/integration/defs/perf/disagg/utils/common.py @@ -12,7 +12,14 @@ GPU_RESOURCE_CONFIG = { "lock_freq_graphics_mhz": 2062, # GPU graphics clock lock frequency (MHz) "lock_freq_memory_mhz": 3996, # GPU memory clock lock frequency (MHz) }, - # OCI GB300 + # Lyris GB200 + "GB200_LYRIS": { + "slurm_extra_args": "", # GB200 does not require extra args + "set_segment": True, + "lock_freq_graphics_mhz": None, # TODO: Set GB200 lock frequency + "lock_freq_memory_mhz": None, + }, + # Lyris GB300 "GB300": { "slurm_extra_args": "", # GB300 does not require extra args "set_segment": True, @@ -121,6 +128,10 @@ class EnvManager: def get_dataset_dir() -> str: return os.getenv("DATASET_DIR", "") + @staticmethod + def get_hf_home_dir() -> str: + return os.getenv("HF_HOME_DIR", "") + @staticmethod def get_output_path() -> str: output_path = os.getenv( diff --git a/tests/integration/defs/perf/disagg/utils/config_loader.py b/tests/integration/defs/perf/disagg/utils/config_loader.py index 567834d6a7..96cfc5f2be 100644 --- a/tests/integration/defs/perf/disagg/utils/config_loader.py +++ b/tests/integration/defs/perf/disagg/utils/config_loader.py @@ -87,10 +87,10 @@ class AccuracyConfig: # ============================================================================ # Accuracy test uses accuracy_eval.log (markdown table output from lm_eval) -# Note: Only log_file is used by AccuracyParser (accuracy_parser.py) -# The regex pattern is hardcoded in AccuracyParser._extract_accuracy_values() +# Note: submit.py generates separate log files for each task (e.g., 7_accuracy_eval_{task}.log) +# Use glob pattern to automatically match all accuracy log files _COMMON_ACCURACY_METRICS = MetricsConfig( - log_file="7_accuracy_eval.log", + log_file="7_accuracy_eval_*.log", extractor_pattern=r"\|([a-zA-Z0-9_-]+)\|.*?\|([\w-]+)\|.*?\|exact_match\|.*?\|([0-9.]+)\|", metric_names=["flexible-extract", "strict-match"], ) @@ -148,7 +148,7 @@ DEFAULT_METRICS_CONFIG = { "SERVER_MEDIAN_ITL", "SERVER_P99_ITL", "SERVER_MEAN_E2EL", - "SERVER_E2EL", # Median E2EL (keep the same name as disagg) + "SERVER_MEDIAN_E2EL", # Median E2EL (keep the same name as disagg) "SERVER_P99_E2EL", ], ), @@ -230,6 +230,9 @@ class ConfigLoader: if gpu_type is None: gpu_type = EnvManager.get_gpu_type() + # GB200_LYRIS in also in the GB200 family + if gpu_type.startswith("GB200_"): + gpu_type = "GB200" configs = [] if not self.base_dir.exists(): @@ -406,7 +409,7 @@ class ConfigLoader: if "metrics" in acc_meta: metrics_override = acc_meta["metrics"] custom_metrics = MetricsConfig( - log_file=metrics_override.get("log_file", "7_accuracy_eval.log"), + log_file=metrics_override.get("log_file", "7_accuracy_eval_*.log"), extractor_pattern=metrics_override.get( "extractor_pattern", r"\|([a-zA-Z0-9_-]+)\|.*?\|([\w-]+)\|.*?\|exact_match\|.*?\|([0-9.]+)\|", @@ -517,9 +520,10 @@ class ConfigLoader: ("slurm", "job_name"): lambda: EnvManager.get_slurm_job_name(), ("environment", "container_mount"): lambda: EnvManager.get_container_mount(model_name), ("environment", "container_image"): lambda: EnvManager.get_container_image(), - ("environment", "trtllm_repo"): lambda: EnvManager.get_repo_dir(), + ("environment", "trtllm_repo"): lambda: self._get_repo_dir(), ("environment", "trtllm_wheel_path"): lambda: EnvManager.get_trtllm_wheel_path(), ("benchmark", "dataset_file"): lambda: self._get_dataset_file(config), + ("accuracy", "env_var", "HF_HOME"): lambda: EnvManager.get_hf_home_dir(), ("environment", "work_dir"): lambda: EnvManager.get_script_dir(), ("environment", "model_path"): lambda: self._get_full_model_path(config), ("slurm", "script_file"): lambda: self._get_script_file(config), @@ -528,11 +532,67 @@ class ConfigLoader: } # Apply overrides based on field paths - for (section, key), value_getter in field_mapping.items(): - if section in config: - config[section][key] = value_getter() + for path, value_getter in field_mapping.items(): + self._set_nested_value(config, path, value_getter()) + + # Apply dynamic overrides for accuracy.tasks (task names are dynamic) + self._apply_accuracy_tasks_overrides(config) + return config + def _set_nested_value(self, config: dict, path: tuple, value: any) -> None: + """Set value at nested path in config. + + Supports arbitrary nesting depth using tuple paths. + Creates missing intermediate levels automatically. + + Args: + config: Configuration dictionary + path: Tuple of keys representing the path (e.g., ("a", "b", "c")) + value: Value to set + + Example: + _set_nested_value(config, ("accuracy", "env_var", "HF_HOME"), "/path") + # Sets config["accuracy"]["env_var"]["HF_HOME"] = "/path" + """ + current = config + + # Traverse/create path, except for the last key + for key in path[:-1]: + if key not in current: + current[key] = {} + current = current[key] + + # Set the final value + current[path[-1]] = value + + def _apply_accuracy_tasks_overrides(self, config: dict) -> None: + """Apply environment overrides for accuracy.tasks configuration. + + Handles dynamic task names (e.g., gsm8k, gpqa_diamond_local). + Replaces placeholders in custom_config paths. + + Args: + config: Configuration dictionary + """ + if "accuracy" not in config or "tasks" not in config["accuracy"]: + return + + repo_dir = EnvManager.get_repo_dir() + + # Iterate through all tasks (task names are dynamic) + for task_name, task_config in config["accuracy"]["tasks"].items(): + if not isinstance(task_config, dict): + continue + + # Replace in custom_config + if "extra_kwargs" in task_config and "custom_config" in task_config["extra_kwargs"]: + custom_config_path = task_config["extra_kwargs"]["custom_config"] + if "" in custom_config_path: + task_config["extra_kwargs"]["custom_config"] = custom_config_path.replace( + "", repo_dir + ) + def _get_full_model_path(self, config: dict) -> str: """Get full model path by combining MODEL_DIR with model directory name. @@ -548,6 +608,12 @@ class ConfigLoader: else: return "" + def _get_repo_dir(self): + if EnvManager.get_install_mode() == "source": + return EnvManager.get_repo_dir() + else: # wheel/none install_mode, no need to set repo_dir + return "" + def _get_dataset_file(self, config: dict) -> str: """Get dataset file by combining dataset directory with dataset file name.