mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
53 lines
1.4 KiB
Bash
Executable File
53 lines
1.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -euo pipefail
|
|
|
|
if [ -v OMPI_COMM_WORLD_SIZE ]; then
|
|
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
|
|
export RANK=$OMPI_COMM_WORLD_RANK
|
|
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
|
|
export NODE_RANK=$OMPI_COMM_WORLD_NODE_RANK
|
|
fi
|
|
|
|
if [ "$RANK" -eq 0 ]; then
|
|
export TLLM_LOG_LEVEL=INFO
|
|
fi
|
|
|
|
PROFILE_DIR=${PROFILE_DIR:-profiles}
|
|
mkdir -p -- "$PROFILE_DIR"
|
|
|
|
PROFILE=${PROFILE:-1}
|
|
BACKTRACE=${BACKTRACE:-0}
|
|
GPU_METRICS=${GPU_METRICS:-0}
|
|
if [ "$PROFILE" -eq 1 ]; then
|
|
PROFILE_CMD=(
|
|
nsys profile
|
|
-t cuda,nvtx
|
|
--cpuctxsw none --cuda-event-trace false
|
|
--cuda-graph-trace node
|
|
-c cudaProfilerApi --capture-range-end stop
|
|
-o "${PROFILE_DIR}/report_np${WORLD_SIZE}_rank${RANK}.nsys-rep"
|
|
--force-overwrite true
|
|
)
|
|
if [ "$BACKTRACE" -eq 1 ]; then
|
|
PROFILE_CMD+=(--python-backtrace=cuda --cudabacktrace all)
|
|
else
|
|
PROFILE_CMD+=(-s none)
|
|
fi
|
|
if [ "$GPU_METRICS" -eq 1 ]; then
|
|
PROFILE_CMD+=(
|
|
--gpu-metrics-devices $LOCAL_RANK
|
|
--gpu-metrics-frequency 10000
|
|
)
|
|
fi
|
|
else
|
|
PROFILE_CMD=()
|
|
fi
|
|
|
|
SCRIPT_PATH=$(realpath --relative-to="$(pwd)" -- "$(dirname -- "$0")"/run.py)
|
|
|
|
set -x
|
|
${PROFILE_CMD[@]+"${PROFILE_CMD[@]}"} bash -o pipefail -c \
|
|
'python3 -u "$1" "${@:3}" 2>&1 | tee "$2/report_np'"${WORLD_SIZE}"'_rank'"${RANK}"'.log"' \
|
|
bash "$SCRIPT_PATH" "$PROFILE_DIR" "$@"
|