mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[None][feat] KV Connector Support for MTP (#10932)
Signed-off-by: jthomson04 <jwillthomson19@gmail.com> Co-authored-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com>
This commit is contained in:
parent
1fbbb1f3cd
commit
cf88da7eca
@ -49,6 +49,7 @@ from tensorrt_llm.bindings.internal.batch_manager import \
|
||||
from tensorrt_llm.bindings.internal.batch_manager import LlmRequest
|
||||
from tensorrt_llm.llmapi.llm_args import TorchLlmArgs
|
||||
|
||||
from .llm_request import get_draft_token_length
|
||||
from .scheduler import ScheduledRequests
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -310,7 +311,8 @@ class KvCacheConnectorSchedulerOutputRequest:
|
||||
req.context_chunk_size)
|
||||
else:
|
||||
computed_position = len(tokens) - 1
|
||||
num_scheduled_tokens = 1 # Specdec with draft tokens is not supported yet.
|
||||
num_scheduled_tokens = 1 + get_draft_token_length(
|
||||
req) # Specdec with draft tokens is not supported yet.
|
||||
|
||||
return RequestData(req.request_id, new_tokens, new_block_ids,
|
||||
computed_position, num_scheduled_tokens)
|
||||
|
||||
@ -554,9 +554,6 @@ def create_py_executor(
|
||||
raise NotImplementedError(
|
||||
"KV connector is only supported with guaranteed no evict scheduler policy."
|
||||
)
|
||||
elif spec_config is not None:
|
||||
raise NotImplementedError(
|
||||
"KV connector is not supported with speculative decoding.")
|
||||
try:
|
||||
module = importlib.import_module(
|
||||
kv_connector_config.connector_module)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user