mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5569754][chore] Adjust max batch size to prevent OOM (#8876)
Signed-off-by: Junyi Xu <219237550+JunyiXu-nv@users.noreply.github.com>
This commit is contained in:
parent
cacb8a84f2
commit
c329f5f78b
@ -51,5 +51,6 @@ srun -l \
|
||||
trtllm-llmapi-launch python3 $script \
|
||||
--model_dir $LOCAL_MODEL \
|
||||
--prompt 'Hello, how are you?' \
|
||||
--tp_size 2
|
||||
--tp_size 2 \
|
||||
--max_batch_size 256
|
||||
"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user