TensorRT-LLMs/examples/layer_wise_benchmarks/run.sh
2025-12-04 13:41:15 +08:00

47 lines
1.2 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
if [ -v OMPI_COMM_WORLD_SIZE ]; then
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export RANK=$OMPI_COMM_WORLD_RANK
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
export NODE_RANK=$OMPI_COMM_WORLD_NODE_RANK
fi
if [ "$RANK" -eq 0 ]; then
export TLLM_LOG_LEVEL=INFO
fi
PROFILE_DIR=${PROFILE_DIR:-profiles}
mkdir -p ${PROFILE_DIR}
PROFILE=${PROFILE:-1}
BACKTRACE=${BACKTRACE:-0}
GPU_METRICS=${GPU_METRICS:-0}
if [ "$PROFILE" -eq 1 ]; then
PROFILE_CMD="nsys profile
-t cuda,nvtx
--cpuctxsw none --cuda-event-trace false
--cuda-graph-trace node
-c cudaProfilerApi --capture-range-end stop
-o ${PROFILE_DIR}/report_np${WORLD_SIZE}_rank${RANK}.nsys-rep
--force-overwrite true"
if [ "$BACKTRACE" -eq 1 ]; then
PROFILE_CMD+=" --python-backtrace=cuda --cudabacktrace all"
else
PROFILE_CMD+=" -s none"
fi
if [ "$GPU_METRICS" -eq 1 ]; then
PROFILE_CMD+=" --gpu-metrics-devices $LOCAL_RANK
--gpu-metrics-frequency 10000"
fi
else
PROFILE_CMD=
fi
set -x
$PROFILE_CMD bash -c \
"python3 -u run.py \"\$@\" 2>&1 | tee \"$PROFILE_DIR/report_np${WORLD_SIZE}_rank${RANK}.log\"" \
bash "$@"