Support max_stats_len in autodeploy

Signed-off-by: Hui Gao <huig@nvidia.com>
This commit is contained in:
Hui Gao 2026-01-13 11:24:26 +00:00
parent 60a621e4e1
commit 94bb18a991
2 changed files with 8 additions and 0 deletions

View File

@ -384,6 +384,12 @@ class LlmArgs(AutoDeployConfig, BaseLlmArgs, BaseSettings):
_quant_config: Optional[QuantConfig] = PrivateAttr(default=None)
max_stats_len: int = Field(
default=1000,
description="The max number of performance statistic entries.",
status="prototype",
)
@property
def quant_config(self) -> QuantConfig:
if self._quant_config is None:

View File

@ -490,10 +490,12 @@ class ADEngine(ModelEngine):
self.max_beam_width = ad_config.max_beam_width
self.spec_config = ad_config.speculative_config
self._disable_overlap_scheduler = ad_config.disable_overlap_scheduler
self.llm_args.max_stats_len = ad_config.max_stats_len
else:
self.max_beam_width = 1
self.spec_config = None
self._disable_overlap_scheduler = False
self.llm_args.max_stats_len = 1000
# check for max total draft tokens
if self.spec_config is not None: