TensorRT-LLMs/examples/layer_wise_benchmarks/run.sh
Tailing Yuan 4345636b04
[None][chore] Clean up layer-wise benchmarks code (#11092)
Signed-off-by: Tailing Yuan <yuantailing@gmail.com>
2026-01-29 14:29:37 -05:00

53 lines
1.4 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
if [ -v OMPI_COMM_WORLD_SIZE ]; then
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export RANK=$OMPI_COMM_WORLD_RANK
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
export NODE_RANK=$OMPI_COMM_WORLD_NODE_RANK
fi
if [ "$RANK" -eq 0 ]; then
export TLLM_LOG_LEVEL=INFO
fi
PROFILE_DIR=${PROFILE_DIR:-profiles}
mkdir -p -- "$PROFILE_DIR"
PROFILE=${PROFILE:-1}
BACKTRACE=${BACKTRACE:-0}
GPU_METRICS=${GPU_METRICS:-0}
if [ "$PROFILE" -eq 1 ]; then
PROFILE_CMD=(
nsys profile
-t cuda,nvtx
--cpuctxsw none --cuda-event-trace false
--cuda-graph-trace node
-c cudaProfilerApi --capture-range-end stop
-o "${PROFILE_DIR}/report_np${WORLD_SIZE}_rank${RANK}.nsys-rep"
--force-overwrite true
)
if [ "$BACKTRACE" -eq 1 ]; then
PROFILE_CMD+=(--python-backtrace=cuda --cudabacktrace all)
else
PROFILE_CMD+=(-s none)
fi
if [ "$GPU_METRICS" -eq 1 ]; then
PROFILE_CMD+=(
--gpu-metrics-devices $LOCAL_RANK
--gpu-metrics-frequency 10000
)
fi
else
PROFILE_CMD=()
fi
SCRIPT_PATH=$(realpath --relative-to="$(pwd)" -- "$(dirname -- "$0")"/run.py)
set -x
${PROFILE_CMD[@]+"${PROFILE_CMD[@]}"} bash -o pipefail -c \
'python3 -u "$1" "${@:3}" 2>&1 | tee "$2/report_np'"${WORLD_SIZE}"'_rank'"${RANK}"'.log"' \
bash "$SCRIPT_PATH" "$PROFILE_DIR" "$@"