TensorRT-LLMs/tensorrt_llm/_torch/speculative/drafter.py
wili 56cdfe5c6c
[TRTLLM-5000][feat] NGrams V2 (#4569)
Signed-off-by: wili-65535 <wili-65535@users.noreply.github.com>
Co-authored-by: wili-65535 <wili-65535@users.noreply.github.com>
2025-06-27 23:00:17 +08:00

27 lines
702 B
Python

from abc import ABC, abstractmethod
from typing import Optional
from ..pyexecutor.resource_manager import BaseResourceManager
from ..pyexecutor.sampler import SampleState
from ..pyexecutor.scheduler import ScheduledRequests
class Drafter(ABC):
def __init__(
self,
spec_resource_manager: Optional[BaseResourceManager] = None,
):
self.spec_resource_manager = spec_resource_manager
@abstractmethod
def prepare_draft_tokens(
self,
scheduled_requests: ScheduledRequests,
state: SampleState,
) -> None:
"""
Prepare the drafter tokens for the forward computation this step.
"""
raise NotImplementedError