[TRTLLM-8763][chore] Deprecate pybind based GuidedDecodingConfig usage in torch backend (#8717)

Signed-off-by: leslie-fang25 <leslief@nvidia.com>
This commit is contained in:
Leslie Fang 2025-10-29 20:37:14 +08:00 committed by GitHub
parent fc3b6f5331
commit 451959c60d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 29 additions and 6 deletions

View File

@ -6,7 +6,9 @@ import llguidance.torch
import torch
import xgrammar
from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams
from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig
from ...bindings.executor import GuidedDecodingParams
class GrammarMatcher(ABC):

View File

@ -5,8 +5,10 @@ from typing import Iterable, List, Optional, Tuple
import torch
from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig
from ..._utils import nvtx_range
from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams
from ...bindings.executor import GuidedDecodingParams
from ...bindings.internal.batch_manager import LlmRequestType
from ...logger import logger
from ..hostfunc import hostfunc

View File

@ -14,9 +14,9 @@ from strenum import StrEnum
import tensorrt_llm
from tensorrt_llm._torch.pyexecutor.resource_manager import ResourceManagerType
from tensorrt_llm._utils import get_sm_version, mpi_disabled
from tensorrt_llm.bindings.executor import GuidedDecodingConfig
from tensorrt_llm.llmapi.llm_args import (CapacitySchedulerPolicy,
ContextChunkingPolicy, LoadFormat,
ContextChunkingPolicy,
GuidedDecodingConfig, LoadFormat,
TorchLlmArgs)
from tensorrt_llm.llmapi.tokenizer import (TokenizerBase,
_llguidance_tokenizer_info,

View File

@ -44,8 +44,7 @@ from ..bindings.executor import (BatchingType as _BatchingType,
KvCacheConfig as _KvCacheConfig,
LookaheadDecodingConfig as _LookaheadDecodingConfig,
PeftCacheConfig as _PeftCacheConfig,
SchedulerConfig as _SchedulerConfig,
GuidedDecodingConfig as _GuidedDecodingConfig) # isort: skip
SchedulerConfig as _SchedulerConfig) # isort: skip
# isort: on
# yapf: enable
@ -164,6 +163,26 @@ class CudaGraphConfig(StrictBaseModel):
return batch_sizes
class GuidedDecodingConfig(StrictBaseModel):
class GuidedDecodingBackend(Enum):
XGRAMMAR = 0
LLGUIDANCE = 1
backend: GuidedDecodingBackend = Field(
default=GuidedDecodingBackend.XGRAMMAR,
description="The backend for guided decoding config.")
encoded_vocab: Optional[List[str]] = Field(
default=None,
description="The encoded vocab for guided decoding config.")
tokenizer_str: Optional[str] = Field(
default=None,
description="The tokenizer string for guided decoding config.")
stop_token_ids: Optional[List[int]] = Field(
default=None,
description="The stop token ids for guided decoding config.")
class BaseSparseAttentionConfig(StrictBaseModel):
"""
Configuration for sparse attention.