TensorRT-LLMs/examples/layer_wise_benchmarks/run.sh
Tailing Yuan a7fe043b13
[None][feat] Layer-wise benchmarks: support TEP balance, polish slurm scripts (#10237)
Signed-off-by: Tailing Yuan <yuantailing@gmail.com>
2026-01-05 11:23:04 +08:00

49 lines
1.3 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
if [ -v OMPI_COMM_WORLD_SIZE ]; then
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export RANK=$OMPI_COMM_WORLD_RANK
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
export NODE_RANK=$OMPI_COMM_WORLD_NODE_RANK
fi
if [ "$RANK" -eq 0 ]; then
export TLLM_LOG_LEVEL=INFO
fi
PROFILE_DIR=${PROFILE_DIR:-profiles}
mkdir -p ${PROFILE_DIR}
PROFILE=${PROFILE:-1}
BACKTRACE=${BACKTRACE:-0}
GPU_METRICS=${GPU_METRICS:-0}
if [ "$PROFILE" -eq 1 ]; then
PROFILE_CMD="nsys profile
-t cuda,nvtx
--cpuctxsw none --cuda-event-trace false
--cuda-graph-trace node
-c cudaProfilerApi --capture-range-end stop
-o ${PROFILE_DIR}/report_np${WORLD_SIZE}_rank${RANK}.nsys-rep
--force-overwrite true"
if [ "$BACKTRACE" -eq 1 ]; then
PROFILE_CMD+=" --python-backtrace=cuda --cudabacktrace all"
else
PROFILE_CMD+=" -s none"
fi
if [ "$GPU_METRICS" -eq 1 ]; then
PROFILE_CMD+=" --gpu-metrics-devices $LOCAL_RANK
--gpu-metrics-frequency 10000"
fi
else
PROFILE_CMD=
fi
SCRIPT_PATH=$(realpath --relative-to="$(pwd)" "$(dirname -- "$0")"/run.py)
set -x
$PROFILE_CMD bash -o pipefail -c \
"python3 -u \"\$1\" \"\${@:3}\" 2>&1 | tee \"\$2/report_np${WORLD_SIZE}_rank${RANK}.log\"" \
bash "$SCRIPT_PATH" "$PROFILE_DIR" "$@"