Support max_stats_len in autodeploy

Signed-off-by: Hui Gao <huig@nvidia.com>
2026-01-13 22:18:36 +08:00 · 2026-01-13 11:24:26 +00:00 · 2026-01-13 11:24:26 +00:00 · 94bb18a991
commit 94bb18a991
parent 60a621e4e1
2 changed files with 8 additions and 0 deletions
--- a/tensorrt_llm/_torch/auto_deploy/llm_args.py
+++ b/tensorrt_llm/_torch/auto_deploy/llm_args.py
@ -384,6 +384,12 @@ class LlmArgs(AutoDeployConfig, BaseLlmArgs, BaseSettings):

    _quant_config: Optional[QuantConfig] = PrivateAttr(default=None)

+    max_stats_len: int = Field(
+        default=1000,
+        description="The max number of performance statistic entries.",
+        status="prototype",
+    )
+
    @property
    def quant_config(self) -> QuantConfig:
        if self._quant_config is None:
--- a/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py
+++ b/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py
@ -490,10 +490,12 @@ class ADEngine(ModelEngine):
            self.max_beam_width = ad_config.max_beam_width
            self.spec_config = ad_config.speculative_config
            self._disable_overlap_scheduler = ad_config.disable_overlap_scheduler
+            self.llm_args.max_stats_len = ad_config.max_stats_len
        else:
            self.max_beam_width = 1
            self.spec_config = None
            self._disable_overlap_scheduler = False
+            self.llm_args.max_stats_len = 1000

        # check for max total draft tokens
        if self.spec_config is not None: