From cf88da7ecaa1f3c45ef124ec09dc87b690b8487b Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Fri, 23 Jan 2026 15:58:26 -0800 Subject: [PATCH] [None][feat] KV Connector Support for MTP (#10932) Signed-off-by: jthomson04 Co-authored-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> --- tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py | 4 +++- tensorrt_llm/_torch/pyexecutor/py_executor_creator.py | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py b/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py index 59f7256c98..380486e935 100644 --- a/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py +++ b/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py @@ -49,6 +49,7 @@ from tensorrt_llm.bindings.internal.batch_manager import \ from tensorrt_llm.bindings.internal.batch_manager import LlmRequest from tensorrt_llm.llmapi.llm_args import TorchLlmArgs +from .llm_request import get_draft_token_length from .scheduler import ScheduledRequests if TYPE_CHECKING: @@ -310,7 +311,8 @@ class KvCacheConnectorSchedulerOutputRequest: req.context_chunk_size) else: computed_position = len(tokens) - 1 - num_scheduled_tokens = 1 # Specdec with draft tokens is not supported yet. + num_scheduled_tokens = 1 + get_draft_token_length( + req) # Specdec with draft tokens is not supported yet. return RequestData(req.request_id, new_tokens, new_block_ids, computed_position, num_scheduled_tokens) diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py index 46960dabe7..50feb71943 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py @@ -554,9 +554,6 @@ def create_py_executor( raise NotImplementedError( "KV connector is only supported with guaranteed no evict scheduler policy." ) - elif spec_config is not None: - raise NotImplementedError( - "KV connector is not supported with speculative decoding.") try: module = importlib.import_module( kv_connector_config.connector_module)