[feat]: Allow for a settable end-of-sequence/padding token in max throughput benchmark. (#3776)

* Move world options to a different group for clarity. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * Add eos_id option. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> --------- Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-04-30 21:42:46 -04:00 · 2025-04-30 21:42:46 -04:00 · 1e317c98c6
commit 1e317c98c6
parent 9cc5922a0b
1 changed files with 27 additions and 14 deletions
--- a/tensorrt_llm/bench/benchmark/throughput.py
+++ b/tensorrt_llm/bench/benchmark/throughput.py
@ -94,6 +94,14 @@ from tensorrt_llm.sampling_params import SamplingParams
    required=False,
    help="Pass in a dataset file for parsing instead of stdin.",
 )
+@optgroup.option(
+    "--eos_id",
+    type=int,
+    default=-1,
+    required=False,
+    help=
+    "Set the end-of-sequence token for the benchmark. Set to -1 to disable EOS.",
+)
@optgroup.option(
    "--modality",
    type=click.Choice(["image", "video"]),
@ -122,6 +130,22 @@ from tensorrt_llm.sampling_params import SamplingParams
    default=2,
    help="Number of requests warm up benchmark.",
 )
+@optgroup.option(
+    "--target_input_len",
+    default=None,
+    type=click.IntRange(min=1),
+    help="Target (average) input length for tuning heuristics.",
+)
+@optgroup.option(
+    "--target_output_len",
+    default=None,
+    type=click.IntRange(min=1),
+    help="Target (average) sequence length for tuning heuristics.",
+)
+@optgroup.group(
+    "World Configuration",
+    help="Options for configuring the backend multi-GPU world.",
+)
@optgroup.option(
    "--tp",
    type=int,
@ -146,18 +170,6 @@ from tensorrt_llm.sampling_params import SamplingParams
    default=None,
    help="expert cluster parallelism size",
 )
-@optgroup.option(
-    "--target_input_len",
-    default=None,
-    type=click.IntRange(min=1),
-    help="Target (average) input length for tuning heuristics.",
-)
-@optgroup.option(
-    "--target_output_len",
-    default=None,
-    type=click.IntRange(min=1),
-    help="Target (average) sequence length for tuning heuristics.",
-)
@optgroup.group("Request Load Control Options",
                cls=MutuallyExclusiveOptionGroup,
                help="Limits how requests are loaded.")
@ -218,6 +230,7 @@ def throughput_command(
    # Parameters from CLI
    # Model, experiment, and engine params
    dataset_path: Path = params.pop("dataset")
+    eos_id: int = params.pop("eos_id")
    warmup: int = params.get("warmup")
    num_requests: int = params.pop("num_requests")
    max_seq_len: int = params.pop("max_seq_len")
@ -329,8 +342,8 @@ def throughput_command(
        else:
            llm = LLM(**kwargs)

-        sampling_params = SamplingParams(end_id=-1,
-                                         pad_id=-1,
+        sampling_params = SamplingParams(end_id=eos_id,
+                                         pad_id=eos_id,
                                         beam_width=beam_width)

        # Perform warmup if requested.