[None][feat] KV Connector Support for MTP (#10932)

Signed-off-by: jthomson04 <jwillthomson19@gmail.com> Co-authored-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com>
2026-02-05 02:31:33 +08:00 · 2026-01-23 15:58:26 -08:00 · 2026-01-23 15:58:26 -08:00 · cf88da7eca
commit cf88da7eca
parent 1fbbb1f3cd
2 changed files with 3 additions and 4 deletions
--- a/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py
+++ b/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py
@ -49,6 +49,7 @@ from tensorrt_llm.bindings.internal.batch_manager import \
 from tensorrt_llm.bindings.internal.batch_manager import LlmRequest
 from tensorrt_llm.llmapi.llm_args import TorchLlmArgs

+from .llm_request import get_draft_token_length
 from .scheduler import ScheduledRequests

 if TYPE_CHECKING:
@ -310,7 +311,8 @@ class KvCacheConnectorSchedulerOutputRequest:
                                       req.context_chunk_size)
        else:
            computed_position = len(tokens) - 1
-            num_scheduled_tokens = 1  # Specdec with draft tokens is not supported yet.
+            num_scheduled_tokens = 1 + get_draft_token_length(
+                req)  # Specdec with draft tokens is not supported yet.

        return RequestData(req.request_id, new_tokens, new_block_ids,
                           computed_position, num_scheduled_tokens)
--- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
+++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
@ -554,9 +554,6 @@ def create_py_executor(
            raise NotImplementedError(
                "KV connector is only supported with guaranteed no evict scheduler policy."
            )
-        elif spec_config is not None:
-            raise NotImplementedError(
-                "KV connector is not supported with speculative decoding.")
        try:
            module = importlib.import_module(
                kv_connector_config.connector_module)