diff --git a/tests/reasoning/test_kimi_k2_reasoning_parser.py b/tests/reasoning/test_kimi_k2_reasoning_parser.py index 0f80bb8854a..dfce2075c6a 100644 --- a/tests/reasoning/test_kimi_k2_reasoning_parser.py +++ b/tests/reasoning/test_kimi_k2_reasoning_parser.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from unittest.mock import MagicMock + import pytest from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest @@ -12,6 +14,20 @@ from vllm.tokenizers import get_tokenizer REASONING_MODEL_NAME = "moonshotai/Kimi-K2.5" +@pytest.fixture +def mock_kimi_k2_tokenizer(): + tokenizer = MagicMock() + tokenizer.get_vocab.return_value = { + "": 100, + "": 101, + "<|tool_calls_section_begin|>": 200, + "<|tool_calls_section_end|>": 201, + "<|tool_call_begin|>": 202, + "<|tool_call_end|>": 203, + } + return tokenizer + + @pytest.fixture(scope="module") def kimi_k2_tokenizer(): return get_tokenizer(tokenizer_name=REASONING_MODEL_NAME, trust_remote_code=True) @@ -153,3 +169,50 @@ def test_streaming_tool_section_ends_reasoning(kimi_k2_tokenizer): ) assert isinstance(result, DeltaMessage) assert result.content == "<|tool_calls_section_begin|>" + + +def test_streaming_end_token_id_buffered(mock_kimi_k2_tokenizer): + """When stop sequences buffer text, ID arrives before its text. + + The token ID is present in delta_token_ids but the actual string is not + yet in delta_text (still buffered). The parser must return None to wait + for the next delta, instead of calling find() which returns -1 and + silently corrupting the text split. + """ + parser = KimiK2ReasoningParser(mock_kimi_k2_tokenizer) + think_id = parser._start_token_id + end_think_id = parser._end_token_id + + # Simulate: ID arrived but text not yet flushed. + # Two token IDs in delta to bypass the single-special-token guard. + result = parser.extract_reasoning_streaming( + previous_text="some reasoning", + current_text="some reasoning extra", + delta_text="extra", # text not yet flushed + previous_token_ids=[think_id], + current_token_ids=[think_id, end_think_id, 999], + delta_token_ids=[end_think_id, 999], + ) + assert result is None + + +def test_streaming_tool_section_id_buffered(mock_kimi_k2_tokenizer): + """When stop sequences buffer text, tool section start ID arrives before its text. + + Same buffering scenario as above but for <|tool_calls_section_begin|>. + Without the guard, find() returns -1 and delta_text[:tool_index] silently + drops the last character of reasoning. + """ + parser = KimiK2ReasoningParser(mock_kimi_k2_tokenizer) + think_id = parser._start_token_id + tool_begin_id = parser._tool_section_start_token_id + + result = parser.extract_reasoning_streaming( + previous_text="some reasoning", + current_text="some reasoning extra", + delta_text="extra", # tool section text not yet flushed + previous_token_ids=[think_id], + current_token_ids=[think_id, tool_begin_id, 999], + delta_token_ids=[tool_begin_id, 999], + ) + assert result is None diff --git a/vllm/reasoning/kimi_k2_reasoning_parser.py b/vllm/reasoning/kimi_k2_reasoning_parser.py index 7a92703426f..0b64c5c62ea 100644 --- a/vllm/reasoning/kimi_k2_reasoning_parser.py +++ b/vllm/reasoning/kimi_k2_reasoning_parser.py @@ -221,6 +221,10 @@ class KimiK2ReasoningParser(ReasoningParser): return None if self._end_token_id in delta_token_ids: + if self._end_token not in delta_text: + # Token ID arrived before text was flushed (stop-sequence buffering). + # Wait for the next delta when the text becomes visible. + return None end_index = delta_text.find(self._end_token) reasoning = delta_text[:end_index] content = delta_text[end_index + len(self._end_token) :] @@ -229,6 +233,9 @@ class KimiK2ReasoningParser(ReasoningParser): ) if self._tool_section_start_token_id in delta_token_ids: + if self._tool_section_start_token not in delta_text: + # Token ID arrived before text was flushed (stop-sequence buffering). + return None tool_index = delta_text.find(self._tool_section_start_token) reasoning = delta_text[:tool_index] content = delta_text[tool_index:]