mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Signed-off-by: lkomali <lkomali@nvidia.com> Signed-off-by: Harshini Komali <157742537+lkomali@users.noreply.github.com> Co-authored-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
20 lines
533 B
Bash
20 lines
533 B
Bash
#! /usr/bin/env bash
|
|
|
|
aiperf profile \
|
|
-m Qwen2.5-VL-3B-Instruct \
|
|
--tokenizer Qwen/Qwen2.5-VL-3B-Instruct \
|
|
--endpoint-type chat \
|
|
--random-seed 123 \
|
|
--image-width-mean 64 \
|
|
--image-height-mean 64 \
|
|
--image-format png \
|
|
--synthetic-input-tokens-mean 128 \
|
|
--synthetic-input-tokens-stddev 0 \
|
|
--output-tokens-mean 128 \
|
|
--output-tokens-stddev 0 \
|
|
--request-count 5 \
|
|
--request-rate 1 \
|
|
--profile-export-file my_profile_export.json \
|
|
--url localhost:8000 \
|
|
--streaming
|