TensorRT-LLMs/tensorrt_llm/_torch/speculative/drafter.py
Robin Kobus 30a19fcf7c
[TRTLLM-6291] feat: Add user-provided speculative decoding support (#5204)
Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
2025-07-07 16:30:43 +02:00

25 lines
629 B
Python

from abc import ABC, abstractmethod
from typing import Optional
from ..pyexecutor.resource_manager import BaseResourceManager
from ..pyexecutor.scheduler import ScheduledRequests
class Drafter(ABC):
def __init__(
self,
spec_resource_manager: Optional[BaseResourceManager] = None,
):
self.spec_resource_manager = spec_resource_manager
@abstractmethod
def prepare_draft_tokens(
self,
scheduled_requests: ScheduledRequests,
) -> None:
"""
Prepare the drafter tokens for the forward computation this step.
"""
raise NotImplementedError