mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
18 lines
495 B
Bash
Executable File
18 lines
495 B
Bash
Executable File
#! /usr/bin/env bash
|
|
|
|
genai-perf profile \
|
|
-m TinyLlama-1.1B-Chat-v1.0 \
|
|
--tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
|
|
--service-kind openai \
|
|
--endpoint-type chat \
|
|
--random-seed 123 \
|
|
--synthetic-input-tokens-mean 128 \
|
|
--synthetic-input-tokens-stddev 0 \
|
|
--output-tokens-mean 128 \
|
|
--output-tokens-stddev 0 \
|
|
--request-count 100 \
|
|
--request-rate 10 \
|
|
--profile-export-file my_profile_export.json \
|
|
--url localhost:8000 \
|
|
--streaming
|