mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5405041][fix] Update wide-ep doc (#6933)
Signed-off-by: Xianjie <5410381+qiaoxj07@users.noreply.github.com>
This commit is contained in:
parent
1c1d5d2495
commit
c2fe8b03a2
@ -12,7 +12,7 @@ model_dir=<model_dir> # Path to the model checkpoint
|
||||
repo_dir=<repo_dir> # Path to the repo to install TensorRT-LLM, if this is empty, the pre-installed version will be used
|
||||
|
||||
mtp_size=0
|
||||
ntasks_per_node=4 # 4 GPUs per GB200 node
|
||||
ntasks_per_node=4 # 4 GPUs per GB200 node, 8 GPUs per B200 node
|
||||
|
||||
isl=1024
|
||||
osl=1024
|
||||
@ -23,8 +23,9 @@ streaming=true
|
||||
for b in 1 64 1024; do
|
||||
for eplb_num_slots in 0 256 288; do
|
||||
concurrency=$((b * 16))
|
||||
ctx_num=$(((concurrency + 5499)/5500))
|
||||
total_node_num=$((ctx_num + 4))
|
||||
ctx_node_num=$(((concurrency + 5499)/5500)) # $(((concurrency + 10999)/11000)) for B200
|
||||
ctx_num=${ctx_node_num} # $((ctx_node_num * 2)) for B200
|
||||
total_node_num=$((ctx_node_num + 4)) # $((ctx_node_num + 2)) for B200
|
||||
ntasks=$((total_node_num * ntasks_per_node))
|
||||
|
||||
args=(
|
||||
@ -58,8 +59,9 @@ done
|
||||
# dep32 eplb288
|
||||
for b in 512; do
|
||||
concurrency=$((b * 32))
|
||||
ctx_num=$(((concurrency + 5499)/5500))
|
||||
total_node_num=$((ctx_num + 8))
|
||||
ctx_node_num=$(((concurrency + 5499)/5500)) # $(((concurrency + 10999)/11000)) for B200
|
||||
ctx_num=${ctx_node_num} # $((ctx_node_num * 2)) for B200
|
||||
total_node_num=$((ctx_node_num + 8)) # $((ctx_node_num + 4)) for B200
|
||||
ntasks=$((total_node_num * ntasks_per_node))
|
||||
eplb_num_slots=288
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user