TensorRT-LLMs/tests/integration/evaltool/run_trtllm_inference_server.sh
Kaiyu Xie 2631f21089
Update (#2978)
Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2025-03-23 16:39:35 +08:00

97 lines
2.9 KiB
Bash
Executable File

#!/bin/bash
set -x
set -e
# Function to check if server is running on port 12478
check_server() {
if curl -s http://localhost:12478 > /dev/null; then
echo "Server is running on http://localhost:12478"
return 0
fi
return 1
}
# Function to display usage instructions
usage() {
echo "Usage: $0 [-e <engine_path>] [-t <tokenizer_path>] [-d <evaltool_dir>] [-m <max_output_len>] [-c <device_count>] [-l <lookahead_config>]"
exit 1
}
# Parse command line arguments
device_count=1
while getopts "e:t:d:m:c:l:" opt; do
case "$opt" in
e) engine_path=$OPTARG ;;
t) tokenizer_path=$OPTARG ;;
d) evaltool_dir=$OPTARG ;;
m) max_output_len=$OPTARG ;;
c)
if [ -z "$OPTARG" ]; then
:
else
device_count=$OPTARG
fi
;;
l) lookahead_config=$OPTARG ;;
*) usage ;;
esac
done
# Check if all arguments are provided
if [ -z "$engine_path" ] || [ -z "$tokenizer_path" ] || [ -z "$evaltool_dir" ]; then
usage
fi
# Check if the paths are valid
if [ ! -e "$engine_path" ]; then
echo "Error: engine_path '$engine_path' does not exist."
exit 1
fi
if [ ! -e "$tokenizer_path" ]; then
echo "Error: tokenizer_path '$tokenizer_path' does not exist."
exit 1
fi
if [ ! -d "$evaltool_dir" ]; then
echo "Error: evaltool_dir '$evaltool_dir' is not a valid directory."
exit 1
fi
# Check if the server is already running
if check_server; then
echo "Server is already running, skipping the rest of the script"
exit 0
fi
# Install dependencies
trtllm_script_dir="evaltool/inference_server/trtllm"
cd ${evaltool_dir}/${trtllm_script_dir} || exit
pip install -r requirements.txt
file_path=$(dirname "$0")
cp -f ${file_path}/patch_for_server/server.py ${evaltool_dir}/${trtllm_script_dir}/
cp -f ${file_path}/patch_for_server/trt_llm_model.py ${evaltool_dir}/${trtllm_script_dir}/
# Run the server in the background and redirect logs to a file
export PYTHONUNBUFFERED=1
current_datetime=$(date '+%Y-%m-%d_%H-%M-%S')
if [ -z "$lookahead_config" ]; then
nohup mpirun --allow-run-as-root --oversubscribe -np $device_count python3 server.py --engine_path "$engine_path" --tokenizer_path "$tokenizer_path" --max_output_len $max_output_len > server_$current_datetime.log 2>&1 &
else
nohup mpirun --allow-run-as-root --oversubscribe -np $device_count python3 server.py --engine_path "$engine_path" --tokenizer_path "$tokenizer_path" --max_output_len $max_output_len --lookahead_config "$lookahead_config" > server_$current_datetime.log 2>&1 &
fi
# Wait for server to start and check status every 30 seconds for 60 attempts
for i in {1..60}; do
sleep 30
if check_server; then
echo "Server started."
exit 0
fi
done
echo "Error: Server did not start on http://localhost:12478 within the expected time."
exit 1