TensorRT-LLMs/examples/disaggregated/slurm/benchmark/accuracy_eval.sh
Kaiyu Xie 5a611cb8f5
[None] [feat] Enhancements to slurm scripts (#10112)
Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2025-12-21 10:24:56 -05:00

35 lines
873 B
Bash

#!/bin/bash
set -euo pipefail
# Parse arguments
full_logdir=${1}
accuracy_model=${2}
accuracy_tasks=${3}
model_path=${4}
model_args_extra=${5}
output_dir=${6}
hostname=${7}
port=${8}
echo "Starting accuracy evaluation..."
echo "Log directory: ${full_logdir}"
echo "Hostname: ${hostname}, Port: ${port}"
base_url="http://${hostname}:${port}/v1/completions"
echo "Using base_url: ${base_url}"
# Install lm_eval and run evaluation
echo "Installing lm_eval[api] and running evaluation..."
pip install lm_eval[api]==0.4.8
echo "Running lm_eval with tasks: ${accuracy_tasks}..."
mkdir -p ${output_dir}
lm_eval --model ${accuracy_model} \
--tasks ${accuracy_tasks} \
--model_args model=${model_path},base_url=${base_url},${model_args_extra} \
--output_path ${output_dir} --log_samples \
--trust_remote_code
echo "Accuracy evaluation completed successfully"