diff --git a/tensorrt_llm/bench/benchmark/throughput.py b/tensorrt_llm/bench/benchmark/throughput.py index 369a7c702b..6c5279e2b3 100755 --- a/tensorrt_llm/bench/benchmark/throughput.py +++ b/tensorrt_llm/bench/benchmark/throughput.py @@ -222,6 +222,16 @@ from tensorrt_llm.sampling_params import SamplingParams required=False, help="Path where output should be written to.", ) +@optgroup.option( + "--request_json", + type=click.Path(dir_okay=False, + writable=True, + readable=False, + path_type=Path, + resolve_path=True), + required=False, + help="Path where per request information is written to.", +) @optgroup.option( "--enable_chunked_context", is_flag=True, @@ -262,6 +272,7 @@ def throughput_command( # Reporting options report_json: Path = params.pop("report_json") output_json: Path = params.pop("output_json") + request_json: Path = params.pop("request_json") iteration_log: Path = params.pop("iteration_log") iteration_writer = IterationWriter(iteration_log) @@ -433,6 +444,10 @@ def throughput_command( with open(output_json, "w") as f: output_token_info = report_utility.get_output_tokens(tokenizer) f.write(json.dumps(output_token_info, indent=4)) + if request_json: + logger.info(f"Writing request information to {request_json}.") + with open(request_json, "w") as f: + f.write(json.dumps(report_utility.get_request_info(tokenizer))) report_utility.report_statistics() except KeyboardInterrupt: logger.info("Keyboard interrupt, exiting benchmark...") diff --git a/tensorrt_llm/bench/benchmark/utils/asynchronous.py b/tensorrt_llm/bench/benchmark/utils/asynchronous.py index 3dbf9f40be..ae20343f45 100644 --- a/tensorrt_llm/bench/benchmark/utils/asynchronous.py +++ b/tensorrt_llm/bench/benchmark/utils/asynchronous.py @@ -86,7 +86,7 @@ class LlmManager: request_perf_item = PerfItemTuple( start_timestamp=request_start_timestamp, end_timestamp=response_end_timestamp, - request_id=response.request_id, + request_id=response.id, num_input_tokens=len(output.prompt_token_ids), response_is_final=response.finished, error=False, diff --git a/tensorrt_llm/bench/dataclasses/reporting.py b/tensorrt_llm/bench/dataclasses/reporting.py index d7e28ab680..d994000d6d 100755 --- a/tensorrt_llm/bench/dataclasses/reporting.py +++ b/tensorrt_llm/bench/dataclasses/reporting.py @@ -59,6 +59,7 @@ class StatsKeeper: Register request perf items, used exclusively with LLM API. """ record = self.requests[request_perf_item.request_id] + record.id = request_perf_item.request_id record.num_input_tokens = request_perf_item.num_input_tokens record.start_timestamp = request_perf_item.start_timestamp record.register_event(request_perf_item.error, @@ -220,6 +221,16 @@ class ReportUtility: retval[req_id] = output_str return dict(sorted(retval.items())) + def get_request_info(self, tokenizer) -> Dict[int, List[str]]: + requests = [] + for request in self.raw_statistics.requests.values(): + entry = request.model_dump() + entry["output"] = tokenizer.decode(entry["tokens"]) + entry["output_tokens"] = len(entry["tokens"]) + entry.pop("tokens") + requests.append(entry) + return requests + def get_statistics_dict(self) -> Dict[str, Any]: """Get statistics as a dictionary.