import random import click from utils.utils import (dataset_dump, gen_random_tokens, get_list_of_delays, get_norm_dist_tokens) @click.command() @click.option("--num-requests", required=True, type=int, help='Number of requests to be generated') @click.option('--input-mean', required=True, type=int, help='normal dist mean for input tokens') @click.option('--input-stdev', required=True, type=int, help='normal dist stdev for input tokens') @click.option('--output-mean', required=True, type=int, help='normal dist mean for output tokens') @click.option('--output-stdev', required=True, type=int, help='normal dist stdev for output tokens') @click.pass_obj def token_norm_dist(root_args, **kwargs): """Prepare dataset by generating random tokens.""" input_ids = [] input_lens = [] output_lens = [] task_ids = [] input_lens = get_norm_dist_tokens(kwargs['input_mean'], kwargs['input_stdev'], kwargs['num_requests'], root_args.random_seed) num_reqs = len(input_lens) output_lens = get_norm_dist_tokens(kwargs['output_mean'], kwargs['output_stdev'], num_reqs, root_args.random_seed) delays = get_list_of_delays(root_args.time_delay_dist, root_args.mean_time_bet_reqs, num_reqs, root_args.random_seed) input_ids = gen_random_tokens(input_lens, root_args.tokenizer, root_args.random_seed) if root_args.rand_task_id is None: task_ids = [root_args.task_id for _ in range(num_reqs)] else: min_id, max_id = root_args.rand_task_id task_ids = [random.randint(min_id, max_id) for _ in range(num_reqs)] dataset_dump( input_ids, output_lens, delays, task_ids, { "workload_type": "token-norm-dist", "input_mean": kwargs['input_mean'], "input_stdev": kwargs['input_stdev'], "output_mean": kwargs['output_mean'], "output_stdev": kwargs['output_stdev'], "num_requests": kwargs['num_requests'], "delay_distr": root_args.time_delay_dist, "request_rate": root_args.request_rate, "tokenize_vocabsize": root_args.tokenizer.vocab_size }, root_args.output)