mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[None][feat] support multi_acc and Lyris GB200 test (#11024)
Signed-off-by: yingguo-trt <244492186+yingguo-trt@users.noreply.github.com>
This commit is contained in:
parent
29647d9446
commit
e70a55bd94
@ -23,14 +23,19 @@ echo "=========================================="
|
||||
echo "Output path: $OUTPUT_PATH"
|
||||
echo ""
|
||||
|
||||
# Show pytest PID if available (for debugging)
|
||||
# Terminate pytest process if still running
|
||||
if [ -f "$PID_FILE" ]; then
|
||||
PYTEST_PID=$(cat "$PID_FILE" | tr -d '\n')
|
||||
echo "Pytest PID: $PYTEST_PID"
|
||||
|
||||
# Check if pytest is still running
|
||||
# Check if pytest is still running and kill it
|
||||
if kill -0 "$PYTEST_PID" 2>/dev/null; then
|
||||
echo "Status: Still running"
|
||||
echo "Status: Still running - terminating..."
|
||||
if kill -9 "$PYTEST_PID" 2>/dev/null; then
|
||||
echo " [OK] Process killed"
|
||||
else
|
||||
echo " [WARN] Failed to kill process (may already be gone)"
|
||||
fi
|
||||
else
|
||||
echo "Status: Already terminated"
|
||||
fi
|
||||
|
||||
@ -620,15 +620,8 @@ class JobManager:
|
||||
result["error"] = error_msg
|
||||
return result
|
||||
|
||||
# Check if required log file exists (7_accuracy_eval.log)
|
||||
accuracy_log = os.path.join(result_dir, "7_accuracy_eval.log")
|
||||
if not os.path.exists(accuracy_log):
|
||||
error_msg = f"Accuracy evaluation log file not found: {accuracy_log}"
|
||||
logger.error(error_msg)
|
||||
result["error"] = error_msg
|
||||
return result
|
||||
|
||||
# Import and use AccuracyParser
|
||||
# Note: AccuracyParser handles log file checking with glob pattern support
|
||||
from reporting.accuracy_parser import AccuracyParser
|
||||
|
||||
accuracy_parser = AccuracyParser(metrics_config, accuracy_config, result_dir)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
"""Accuracy test result parser."""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List
|
||||
@ -28,40 +29,77 @@ class AccuracyParser:
|
||||
self.result_dir = result_dir
|
||||
|
||||
def parse_and_validate(self) -> AccuracyValidationResult:
|
||||
"""Parse accuracy_eval.log and validate all configured datasets for all runs.
|
||||
"""Parse accuracy_eval.log(s) and validate all configured datasets for all runs.
|
||||
|
||||
Supports multiple runs (e.g., pre-benchmark and post-benchmark).
|
||||
Supports wildcard patterns in log_file (e.g., "7_accuracy_eval_*.log").
|
||||
All runs must pass for the validation to succeed.
|
||||
|
||||
Returns:
|
||||
AccuracyValidationResult with validation results for all runs
|
||||
"""
|
||||
log_file = os.path.join(self.result_dir, self.metrics_config.log_file)
|
||||
log_pattern = self.metrics_config.log_file
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
# Check if pattern contains wildcards
|
||||
if "*" in log_pattern or "?" in log_pattern or "[" in log_pattern:
|
||||
# Use glob to match multiple files
|
||||
log_files = sorted(glob.glob(os.path.join(self.result_dir, log_pattern)))
|
||||
|
||||
if not log_files:
|
||||
return {
|
||||
"success": False,
|
||||
"all_passed": False,
|
||||
"runs": [],
|
||||
"raw_results": [],
|
||||
"error": f"No log files found matching pattern: {log_pattern}",
|
||||
}
|
||||
|
||||
logger.info(f"Found {len(log_files)} log file(s) matching pattern '{log_pattern}'")
|
||||
else:
|
||||
# Single file (backward compatible)
|
||||
log_file = os.path.join(self.result_dir, log_pattern)
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
return {
|
||||
"success": False,
|
||||
"all_passed": False,
|
||||
"runs": [],
|
||||
"raw_results": [],
|
||||
"error": f"Log file not found: {log_file}",
|
||||
}
|
||||
|
||||
log_files = [log_file]
|
||||
|
||||
# Read and merge all log files
|
||||
combined_log_content = ""
|
||||
failed_files = []
|
||||
|
||||
for log_file in log_files:
|
||||
try:
|
||||
with open(log_file, "r", encoding="utf-8", errors="replace") as f:
|
||||
content = f.read()
|
||||
combined_log_content += content
|
||||
if not content.endswith("\n"):
|
||||
combined_log_content += "\n" # Ensure separation between files
|
||||
logger.info(f"Successfully read log file: {os.path.basename(log_file)}")
|
||||
except Exception as e:
|
||||
failed_files.append((log_file, str(e)))
|
||||
logger.warning(f"Failed to read {log_file}: {e}")
|
||||
|
||||
if not combined_log_content:
|
||||
error_msg = "No valid log content found."
|
||||
if failed_files:
|
||||
error_msg += f" Failed files: {failed_files}"
|
||||
return {
|
||||
"success": False,
|
||||
"all_passed": False,
|
||||
"runs": [],
|
||||
"raw_results": [],
|
||||
"error": f"Log file not found: {log_file}",
|
||||
}
|
||||
|
||||
# Read log file
|
||||
try:
|
||||
with open(log_file, "r", encoding="utf-8", errors="replace") as f:
|
||||
log_content = f.read()
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"all_passed": False,
|
||||
"runs": [],
|
||||
"raw_results": [],
|
||||
"error": f"Failed to read log file: {e}",
|
||||
"error": error_msg,
|
||||
}
|
||||
|
||||
# Extract accuracy values for all runs
|
||||
all_runs_results = self._extract_accuracy_values(log_content)
|
||||
all_runs_results = self._extract_accuracy_values(combined_log_content)
|
||||
|
||||
if not all_runs_results:
|
||||
return {
|
||||
|
||||
@ -135,7 +135,7 @@ DATASET_DEFAULTS = {
|
||||
"higher_is_better": True,
|
||||
},
|
||||
# Alias for gpqa_diamond (same task, different naming convention)
|
||||
"gpqa_diamond_cot_zeroshot": {
|
||||
"gpqa_diamond_local": {
|
||||
"alpha": 0.05,
|
||||
"beta": 0.2,
|
||||
"sigma": 50,
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 4
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 4
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 8
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -42,9 +42,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -50,9 +50,14 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: true
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
env_var:
|
||||
HF_HOME: <hf_home_path>
|
||||
tasks:
|
||||
gsm8k:
|
||||
model: "local-completions"
|
||||
model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=7200,max_gen_toks=16384"
|
||||
extra_kwargs:
|
||||
trust_remote_code: true
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -47,9 +47,14 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: true
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
env_var:
|
||||
HF_HOME: <hf_home_path>
|
||||
tasks:
|
||||
gsm8k:
|
||||
model: "local-completions"
|
||||
model_args_extra: "num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=7200,max_gen_toks=16384"
|
||||
extra_kwargs:
|
||||
trust_remote_code: true
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -10,7 +10,7 @@ metadata:
|
||||
dataset_file: disagg_datasets/kimi-k2-1024-1024-20000-ratio-1_for_serve.json
|
||||
accuracy:
|
||||
datasets:
|
||||
- dataset_name: gpqa_diamond_cot_zeroshot
|
||||
- dataset_name: gpqa_diamond_local
|
||||
expected_value: 0.65
|
||||
threshold_type: hypothesis_test
|
||||
filter_type: strict-match
|
||||
@ -29,7 +29,7 @@ benchmark:
|
||||
multi_round: 8
|
||||
benchmark_ratio: 1.0
|
||||
streaming: true
|
||||
concurrency_list: '16384'
|
||||
concurrency_list: '8192'
|
||||
input_length: 1024
|
||||
output_length: 1024
|
||||
dataset_file: <dataset_file>
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -45,9 +45,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -43,9 +43,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -13,7 +13,7 @@ slurm:
|
||||
script_file: disaggr_torch.slurm
|
||||
partition: <partition>
|
||||
account: <account>
|
||||
job_time: 02:00:00
|
||||
job_time: 03:00:00
|
||||
job_name: unified-benchmark
|
||||
extra_args: "--gres=gpu:4"
|
||||
numa_bind: true
|
||||
@ -44,9 +44,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false # Set to true to enable accuracy evaluation
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -13,7 +13,7 @@ slurm:
|
||||
script_file: disaggr_torch.slurm
|
||||
partition: <partition>
|
||||
account: <account>
|
||||
job_time: 02:00:00
|
||||
job_time: 04:00:00
|
||||
job_name: unified-benchmark
|
||||
extra_args: "--gres=gpu:4"
|
||||
numa_bind: true
|
||||
@ -44,9 +44,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -14,7 +14,7 @@ slurm:
|
||||
script_file: disaggr_torch.slurm
|
||||
partition: <partition>
|
||||
account: <account>
|
||||
job_time: 02:00:00
|
||||
job_time: 04:00:00
|
||||
job_name: unified-benchmark
|
||||
extra_args: "--gres=gpu:4"
|
||||
numa_bind: true
|
||||
@ -45,9 +45,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 48
|
||||
|
||||
@ -13,7 +13,7 @@ slurm:
|
||||
script_file: disaggr_torch.slurm
|
||||
partition: <partition>
|
||||
account: <account>
|
||||
job_time: 02:00:00
|
||||
job_time: 04:00:00
|
||||
job_name: disaggr-test
|
||||
extra_args: "--gres=gpu:4"
|
||||
numa_bind: true
|
||||
@ -44,9 +44,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -13,7 +13,7 @@ slurm:
|
||||
script_file: disaggr_torch.slurm
|
||||
partition: <partition>
|
||||
account: <account>
|
||||
job_time: 02:00:00
|
||||
job_time: 04:00:00
|
||||
job_name: unified-benchmark
|
||||
extra_args: "--gres=gpu:4"
|
||||
numa_bind: true
|
||||
@ -44,9 +44,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 16
|
||||
|
||||
@ -13,7 +13,7 @@ slurm:
|
||||
script_file: disaggr_torch.slurm
|
||||
partition: <partition>
|
||||
account: <account>
|
||||
job_time: 02:00:00
|
||||
job_time: 04:00:00
|
||||
job_name: unified-benchmark
|
||||
extra_args: "--gres=gpu:4"
|
||||
numa_bind: true
|
||||
@ -44,9 +44,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
tensor_parallel_size: 32
|
||||
|
||||
@ -41,9 +41,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -41,9 +41,6 @@ profiling:
|
||||
nsys_on: false
|
||||
accuracy:
|
||||
enable_accuracy_test: false
|
||||
model: local-completions
|
||||
tasks: gsm8k
|
||||
model_args_extra: num_concurrent=512,max_retries=3,tokenized_requests=false,timeout=1200,max_gen_toks=256,max_length=4096
|
||||
worker_config:
|
||||
gen:
|
||||
enable_layerwise_nvtx_marker: true
|
||||
|
||||
@ -112,8 +112,8 @@ class TestDisaggBenchmark:
|
||||
)
|
||||
assert job_id, f"Failed to submit job for {test_config.test_id}\n{error_msg}"
|
||||
|
||||
# Wait for completion (timeout: 10 hours = 36000 seconds)
|
||||
JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True)
|
||||
# Wait for completion (timeout: 15 hours = 54000 seconds)
|
||||
JobManager.wait_for_completion(job_id, 54000, test_config, check_early_failure=True)
|
||||
|
||||
# End tracking test case
|
||||
test_tracker.end_test_case()
|
||||
@ -188,8 +188,8 @@ class TestDisaggBenchmark:
|
||||
# Validate submission result
|
||||
assert job_id, f"Failed to get job_id for {test_config.test_id}"
|
||||
|
||||
# Wait for completion (timeout: 10 hours = 36000 seconds)
|
||||
JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True)
|
||||
# Wait for completion (timeout: 15 hours = 54000 seconds)
|
||||
JobManager.wait_for_completion(job_id, 54000, test_config, check_early_failure=True)
|
||||
|
||||
# End tracking test case
|
||||
test_tracker.end_test_case()
|
||||
@ -272,8 +272,8 @@ class TestDisaggBenchmark:
|
||||
)
|
||||
assert job_id, f"Failed to submit job for {test_config.test_id}\n{error_msg}"
|
||||
|
||||
# Wait for completion (timeout: 10 hours = 36000 seconds)
|
||||
JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True)
|
||||
# Wait for completion (timeout: 15 hours = 54000 seconds)
|
||||
JobManager.wait_for_completion(job_id, 54000, test_config, check_early_failure=True)
|
||||
|
||||
# End tracking test case
|
||||
test_tracker.end_test_case()
|
||||
|
||||
@ -12,7 +12,14 @@ GPU_RESOURCE_CONFIG = {
|
||||
"lock_freq_graphics_mhz": 2062, # GPU graphics clock lock frequency (MHz)
|
||||
"lock_freq_memory_mhz": 3996, # GPU memory clock lock frequency (MHz)
|
||||
},
|
||||
# OCI GB300
|
||||
# Lyris GB200
|
||||
"GB200_LYRIS": {
|
||||
"slurm_extra_args": "", # GB200 does not require extra args
|
||||
"set_segment": True,
|
||||
"lock_freq_graphics_mhz": None, # TODO: Set GB200 lock frequency
|
||||
"lock_freq_memory_mhz": None,
|
||||
},
|
||||
# Lyris GB300
|
||||
"GB300": {
|
||||
"slurm_extra_args": "", # GB300 does not require extra args
|
||||
"set_segment": True,
|
||||
@ -121,6 +128,10 @@ class EnvManager:
|
||||
def get_dataset_dir() -> str:
|
||||
return os.getenv("DATASET_DIR", "<Your dataset directory>")
|
||||
|
||||
@staticmethod
|
||||
def get_hf_home_dir() -> str:
|
||||
return os.getenv("HF_HOME_DIR", "<Your HF home directory>")
|
||||
|
||||
@staticmethod
|
||||
def get_output_path() -> str:
|
||||
output_path = os.getenv(
|
||||
|
||||
@ -87,10 +87,10 @@ class AccuracyConfig:
|
||||
# ============================================================================
|
||||
|
||||
# Accuracy test uses accuracy_eval.log (markdown table output from lm_eval)
|
||||
# Note: Only log_file is used by AccuracyParser (accuracy_parser.py)
|
||||
# The regex pattern is hardcoded in AccuracyParser._extract_accuracy_values()
|
||||
# Note: submit.py generates separate log files for each task (e.g., 7_accuracy_eval_{task}.log)
|
||||
# Use glob pattern to automatically match all accuracy log files
|
||||
_COMMON_ACCURACY_METRICS = MetricsConfig(
|
||||
log_file="7_accuracy_eval.log",
|
||||
log_file="7_accuracy_eval_*.log",
|
||||
extractor_pattern=r"\|([a-zA-Z0-9_-]+)\|.*?\|([\w-]+)\|.*?\|exact_match\|.*?\|([0-9.]+)\|",
|
||||
metric_names=["flexible-extract", "strict-match"],
|
||||
)
|
||||
@ -148,7 +148,7 @@ DEFAULT_METRICS_CONFIG = {
|
||||
"SERVER_MEDIAN_ITL",
|
||||
"SERVER_P99_ITL",
|
||||
"SERVER_MEAN_E2EL",
|
||||
"SERVER_E2EL", # Median E2EL (keep the same name as disagg)
|
||||
"SERVER_MEDIAN_E2EL", # Median E2EL (keep the same name as disagg)
|
||||
"SERVER_P99_E2EL",
|
||||
],
|
||||
),
|
||||
@ -230,6 +230,9 @@ class ConfigLoader:
|
||||
if gpu_type is None:
|
||||
gpu_type = EnvManager.get_gpu_type()
|
||||
|
||||
# GB200_LYRIS in also in the GB200 family
|
||||
if gpu_type.startswith("GB200_"):
|
||||
gpu_type = "GB200"
|
||||
configs = []
|
||||
|
||||
if not self.base_dir.exists():
|
||||
@ -406,7 +409,7 @@ class ConfigLoader:
|
||||
if "metrics" in acc_meta:
|
||||
metrics_override = acc_meta["metrics"]
|
||||
custom_metrics = MetricsConfig(
|
||||
log_file=metrics_override.get("log_file", "7_accuracy_eval.log"),
|
||||
log_file=metrics_override.get("log_file", "7_accuracy_eval_*.log"),
|
||||
extractor_pattern=metrics_override.get(
|
||||
"extractor_pattern",
|
||||
r"\|([a-zA-Z0-9_-]+)\|.*?\|([\w-]+)\|.*?\|exact_match\|.*?\|([0-9.]+)\|",
|
||||
@ -517,9 +520,10 @@ class ConfigLoader:
|
||||
("slurm", "job_name"): lambda: EnvManager.get_slurm_job_name(),
|
||||
("environment", "container_mount"): lambda: EnvManager.get_container_mount(model_name),
|
||||
("environment", "container_image"): lambda: EnvManager.get_container_image(),
|
||||
("environment", "trtllm_repo"): lambda: EnvManager.get_repo_dir(),
|
||||
("environment", "trtllm_repo"): lambda: self._get_repo_dir(),
|
||||
("environment", "trtllm_wheel_path"): lambda: EnvManager.get_trtllm_wheel_path(),
|
||||
("benchmark", "dataset_file"): lambda: self._get_dataset_file(config),
|
||||
("accuracy", "env_var", "HF_HOME"): lambda: EnvManager.get_hf_home_dir(),
|
||||
("environment", "work_dir"): lambda: EnvManager.get_script_dir(),
|
||||
("environment", "model_path"): lambda: self._get_full_model_path(config),
|
||||
("slurm", "script_file"): lambda: self._get_script_file(config),
|
||||
@ -528,11 +532,67 @@ class ConfigLoader:
|
||||
}
|
||||
|
||||
# Apply overrides based on field paths
|
||||
for (section, key), value_getter in field_mapping.items():
|
||||
if section in config:
|
||||
config[section][key] = value_getter()
|
||||
for path, value_getter in field_mapping.items():
|
||||
self._set_nested_value(config, path, value_getter())
|
||||
|
||||
# Apply dynamic overrides for accuracy.tasks (task names are dynamic)
|
||||
self._apply_accuracy_tasks_overrides(config)
|
||||
|
||||
return config
|
||||
|
||||
def _set_nested_value(self, config: dict, path: tuple, value: any) -> None:
|
||||
"""Set value at nested path in config.
|
||||
|
||||
Supports arbitrary nesting depth using tuple paths.
|
||||
Creates missing intermediate levels automatically.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
path: Tuple of keys representing the path (e.g., ("a", "b", "c"))
|
||||
value: Value to set
|
||||
|
||||
Example:
|
||||
_set_nested_value(config, ("accuracy", "env_var", "HF_HOME"), "/path")
|
||||
# Sets config["accuracy"]["env_var"]["HF_HOME"] = "/path"
|
||||
"""
|
||||
current = config
|
||||
|
||||
# Traverse/create path, except for the last key
|
||||
for key in path[:-1]:
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
|
||||
# Set the final value
|
||||
current[path[-1]] = value
|
||||
|
||||
def _apply_accuracy_tasks_overrides(self, config: dict) -> None:
|
||||
"""Apply environment overrides for accuracy.tasks configuration.
|
||||
|
||||
Handles dynamic task names (e.g., gsm8k, gpqa_diamond_local).
|
||||
Replaces placeholders in custom_config paths.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
"""
|
||||
if "accuracy" not in config or "tasks" not in config["accuracy"]:
|
||||
return
|
||||
|
||||
repo_dir = EnvManager.get_repo_dir()
|
||||
|
||||
# Iterate through all tasks (task names are dynamic)
|
||||
for task_name, task_config in config["accuracy"]["tasks"].items():
|
||||
if not isinstance(task_config, dict):
|
||||
continue
|
||||
|
||||
# Replace <repo_path> in custom_config
|
||||
if "extra_kwargs" in task_config and "custom_config" in task_config["extra_kwargs"]:
|
||||
custom_config_path = task_config["extra_kwargs"]["custom_config"]
|
||||
if "<repo_path>" in custom_config_path:
|
||||
task_config["extra_kwargs"]["custom_config"] = custom_config_path.replace(
|
||||
"<repo_path>", repo_dir
|
||||
)
|
||||
|
||||
def _get_full_model_path(self, config: dict) -> str:
|
||||
"""Get full model path by combining MODEL_DIR with model directory name.
|
||||
|
||||
@ -548,6 +608,12 @@ class ConfigLoader:
|
||||
else:
|
||||
return ""
|
||||
|
||||
def _get_repo_dir(self):
|
||||
if EnvManager.get_install_mode() == "source":
|
||||
return EnvManager.get_repo_dir()
|
||||
else: # wheel/none install_mode, no need to set repo_dir
|
||||
return ""
|
||||
|
||||
def _get_dataset_file(self, config: dict) -> str:
|
||||
"""Get dataset file by combining dataset directory with dataset file name.
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user