TensorRT-LLMs/benchmarks/cpp/utils/prepare_synthetic_data.py
Kaiyu Xie 5955b8afba
Update TensorRT-LLM Release branch (#1192)
* Update TensorRT-LLM

---------

Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
2024-02-29 17:20:55 +08:00

62 lines
2.3 KiB
Python

import click
from utils.utils import (dataset_dump, gen_random_tokens, get_list_of_delays,
get_norm_dist_tokens)
@click.command()
@click.option("--num-requests",
required=True,
type=int,
help='Number of requests to be generated')
@click.option('--input-mean',
required=True,
type=int,
help='normal dist mean for input tokens')
@click.option('--input-stdev',
required=True,
type=int,
help='normal dist stdev for input tokens')
@click.option('--output-mean',
required=True,
type=int,
help='normal dist mean for output tokens')
@click.option('--output-stdev',
required=True,
type=int,
help='normal dist stdev for output tokens')
@click.pass_obj
def token_norm_dist(root_args, **kwargs):
"""Prepare dataset by generating random tokens."""
input_ids = []
input_lens = []
output_lens = []
input_lens = get_norm_dist_tokens(kwargs['input_mean'],
kwargs['input_stdev'],
kwargs['num_requests'],
root_args.random_seed)
num_reqs = len(input_lens)
output_lens = get_norm_dist_tokens(kwargs['output_mean'],
kwargs['output_stdev'], num_reqs,
root_args.random_seed)
delays = get_list_of_delays(root_args.time_delay_dist,
root_args.mean_time_bet_reqs, num_reqs,
root_args.random_seed)
input_ids = gen_random_tokens(input_lens, root_args.tokenizer,
root_args.random_seed)
dataset_dump(
input_ids, output_lens, delays, {
"workload_type": "token-norm-dist",
"input_mean": kwargs['input_mean'],
"input_stdev": kwargs['input_stdev'],
"output_mean": kwargs['output_mean'],
"output_stdev": kwargs['output_stdev'],
"num_requests": kwargs['num_requests'],
"delay_distr": root_args.time_delay_dist,
"request_rate": root_args.request_rate,
"tokenize_vocabsize": root_args.tokenizer.vocab_size
}, root_args.output)