diff --git a/tensorrt_llm/bench/benchmark/throughput.py b/tensorrt_llm/bench/benchmark/throughput.py
index 369a7c702b..6c5279e2b3 100755
--- a/tensorrt_llm/bench/benchmark/throughput.py
+++ b/tensorrt_llm/bench/benchmark/throughput.py
@@ -222,6 +222,16 @@ from tensorrt_llm.sampling_params import SamplingParams
     required=False,
     help="Path where output should be written to.",
 )
+@optgroup.option(
+    "--request_json",
+    type=click.Path(dir_okay=False,
+                    writable=True,
+                    readable=False,
+                    path_type=Path,
+                    resolve_path=True),
+    required=False,
+    help="Path where per request information is written to.",
+)
 @optgroup.option(
     "--enable_chunked_context",
     is_flag=True,
@@ -262,6 +272,7 @@ def throughput_command(
     # Reporting options
     report_json: Path = params.pop("report_json")
     output_json: Path = params.pop("output_json")
+    request_json: Path = params.pop("request_json")
     iteration_log: Path = params.pop("iteration_log")
     iteration_writer = IterationWriter(iteration_log)
 
@@ -433,6 +444,10 @@ def throughput_command(
             with open(output_json, "w") as f:
                 output_token_info = report_utility.get_output_tokens(tokenizer)
                 f.write(json.dumps(output_token_info, indent=4))
+        if request_json:
+            logger.info(f"Writing request information to {request_json}.")
+            with open(request_json, "w") as f:
+                f.write(json.dumps(report_utility.get_request_info(tokenizer)))
         report_utility.report_statistics()
     except KeyboardInterrupt:
         logger.info("Keyboard interrupt, exiting benchmark...")
diff --git a/tensorrt_llm/bench/benchmark/utils/asynchronous.py b/tensorrt_llm/bench/benchmark/utils/asynchronous.py
index 3dbf9f40be..ae20343f45 100644
--- a/tensorrt_llm/bench/benchmark/utils/asynchronous.py
+++ b/tensorrt_llm/bench/benchmark/utils/asynchronous.py
@@ -86,7 +86,7 @@ class LlmManager:
             request_perf_item = PerfItemTuple(
                 start_timestamp=request_start_timestamp,
                 end_timestamp=response_end_timestamp,
-                request_id=response.request_id,
+                request_id=response.id,
                 num_input_tokens=len(output.prompt_token_ids),
                 response_is_final=response.finished,
                 error=False,
diff --git a/tensorrt_llm/bench/dataclasses/reporting.py b/tensorrt_llm/bench/dataclasses/reporting.py
index d7e28ab680..d994000d6d 100755
--- a/tensorrt_llm/bench/dataclasses/reporting.py
+++ b/tensorrt_llm/bench/dataclasses/reporting.py
@@ -59,6 +59,7 @@ class StatsKeeper:
         Register request perf items, used exclusively with LLM API.
         """
         record = self.requests[request_perf_item.request_id]
+        record.id = request_perf_item.request_id
         record.num_input_tokens = request_perf_item.num_input_tokens
         record.start_timestamp = request_perf_item.start_timestamp
         record.register_event(request_perf_item.error,
@@ -220,6 +221,16 @@ class ReportUtility:
             retval[req_id] = output_str
         return dict(sorted(retval.items()))
 
+    def get_request_info(self, tokenizer) -> Dict[int, List[str]]:
+        requests = []
+        for request in self.raw_statistics.requests.values():
+            entry = request.model_dump()
+            entry["output"] = tokenizer.decode(entry["tokens"])
+            entry["output_tokens"] = len(entry["tokens"])
+            entry.pop("tokens")
+            requests.append(entry)
+        return requests
+
     def get_statistics_dict(self) -> Dict[str, Any]:
         """Get statistics as a dictionary.