From e30313220c43fcaa74cab666092844eceb8e39d7 Mon Sep 17 00:00:00 2001 From: alberto Date: Tue, 2 Jun 2026 09:50:05 +0100 Subject: [PATCH] [Parser] Migrate `ResponsesParser` to unified `Parser` interface (#42977) Signed-off-by: Alberto Perdomo --- .../openai/test_responses_parser_unified.py | 382 ++++++++++++++++++ .../openai/parser/responses_parser.py | 125 +++--- vllm/entrypoints/openai/responses/context.py | 18 +- vllm/entrypoints/openai/responses/serving.py | 11 +- 4 files changed, 442 insertions(+), 94 deletions(-) create mode 100644 tests/entrypoints/openai/test_responses_parser_unified.py diff --git a/tests/entrypoints/openai/test_responses_parser_unified.py b/tests/entrypoints/openai/test_responses_parser_unified.py new file mode 100644 index 00000000000..ecc857e1aac --- /dev/null +++ b/tests/entrypoints/openai/test_responses_parser_unified.py @@ -0,0 +1,382 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Unit tests for ResponsesParser with the unified Parser interface. + +These tests verify that ResponsesParser correctly delegates to the unified +Parser (via extract_response_outputs) instead of calling separate +ReasoningParser / ToolParser instances directly. +""" + +from collections.abc import Sequence +from unittest.mock import MagicMock + +import pytest + +from vllm.entrypoints.openai.engine.protocol import ( + DeltaMessage, + ExtractedToolCallInformation, + FunctionCall, + ToolCall, +) +from vllm.entrypoints.openai.parser.responses_parser import ( + ResponsesParser, + get_responses_parser_for_simple_context, +) +from vllm.entrypoints.openai.responses.protocol import ResponsesRequest +from vllm.outputs import CompletionOutput +from vllm.parser.abstract_parser import DelegatingParser + +pytestmark = pytest.mark.skip_global_cleanup + + +# --------------------------------------------------------------------------- +# Test parser stubs +# --------------------------------------------------------------------------- + + +class _NoOpParser(DelegatingParser): + """Parser that extracts no reasoning and no tool calls.""" + + def is_reasoning_end(self, input_ids: list[int]) -> bool: + return False + + def extract_content_ids(self, input_ids: list[int]) -> list[int]: + return input_ids + + def extract_reasoning(self, model_output, request): + return None, model_output + + def extract_reasoning_streaming(self, *args, **kwargs): + return None + + def extract_tool_calls(self, model_output, request): + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) + + def extract_tool_calls_streaming(self, *args, **kwargs): + return None + + def parse_delta(self, *args, **kwargs) -> DeltaMessage | None: + return None + + +class _ReasoningOnlyParser(DelegatingParser): + """Parser that extracts reasoning but no tool calls.""" + + def is_reasoning_end(self, input_ids: list[int]) -> bool: + return False + + def extract_content_ids(self, input_ids: list[int]) -> list[int]: + return input_ids + + def extract_reasoning(self, model_output, request): + if "" in model_output and "" in model_output: + start = model_output.index("") + len("") + end = model_output.index("") + reasoning = model_output[start:end] + content = model_output[end + len("") :] + return reasoning, content.strip() or None + return None, model_output + + def extract_reasoning_streaming(self, *args, **kwargs): + return None + + def extract_tool_calls(self, model_output, request): + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) + + def extract_tool_calls_streaming(self, *args, **kwargs): + return None + + def parse_delta(self, *args, **kwargs) -> DeltaMessage | None: + return None + + +class _StubToolParser: + """Minimal tool parser stub that always returns a hardcoded tool call.""" + + supports_required_and_named = False + + def __init__(self, tokenizer=None, tools=None): + pass + + def extract_tool_calls(self, model_output, request): + return ExtractedToolCallInformation( + tools_called=True, + tool_calls=[ + ToolCall( + id="call_123", + type="function", + function=FunctionCall( + name="get_weather", + arguments='{"location": "Paris"}', + ), + ) + ], + content=None, + ) + + def extract_tool_calls_streaming(self, *args, **kwargs): + return None + + def adjust_request(self, request): + return request + + +class _ToolCallingParser(DelegatingParser): + """Parser that extracts a hardcoded tool call from any input.""" + + def __init__(self, tokenizer, *args, **kwargs): + super().__init__(tokenizer) + self._tool_parser = _StubToolParser() + + def is_reasoning_end(self, input_ids: list[int]) -> bool: + return False + + def extract_content_ids(self, input_ids: list[int]) -> list[int]: + return input_ids + + def extract_reasoning(self, model_output, request): + return None, model_output + + def extract_reasoning_streaming(self, *args, **kwargs): + return None + + def extract_tool_calls_streaming(self, *args, **kwargs): + return None + + def parse_delta(self, *args, **kwargs) -> DeltaMessage | None: + return None + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_request(**overrides) -> ResponsesRequest: + defaults = {"model": "test-model", "input": "test"} + defaults.update(overrides) + return ResponsesRequest.model_validate(defaults) + + +def _make_output( + text: str = "Hello, world!", + token_ids: Sequence[int] = (1, 2, 3), + finish_reason: str = "stop", +) -> CompletionOutput: + return CompletionOutput( + index=0, + text=text, + token_ids=list(token_ids), + cumulative_logprob=None, + logprobs=None, + finish_reason=finish_reason, + ) + + +def _make_parser(parser_cls, **overrides): + defaults = dict( + tokenizer=MagicMock(), + parser_cls=parser_cls, + response_messages=[], + request=_make_request(), + chat_template=None, + chat_template_content_format="auto", + ) + defaults.update(overrides) + return ResponsesParser(**defaults) + + +# --------------------------------------------------------------------------- +# Tests: basic text passthrough +# --------------------------------------------------------------------------- + + +def test_process_text_with_parser(): + """Parser with no reasoning/tools returns a single message item.""" + parser = _make_parser(_NoOpParser) + parser.process(_make_output(text="Hello!")) + + assert len(parser.response_messages) == 1 + msg = parser.response_messages[0] + assert msg.type == "message" + assert msg.content[0].text == "Hello!" + + +def test_process_text_without_parser(): + """parser_cls=None falls back to plain text wrapping.""" + parser = _make_parser(None) + parser.process(_make_output(text="Hello!")) + + assert len(parser.response_messages) == 1 + msg = parser.response_messages[0] + assert msg.type == "message" + assert msg.content[0].text == "Hello!" + + +# --------------------------------------------------------------------------- +# Tests: empty / whitespace output +# --------------------------------------------------------------------------- + + +def test_process_empty_text_without_parser(): + """Empty text with no parser produces no output items.""" + parser = _make_parser(None) + parser.process(_make_output(text="")) + + assert len(parser.response_messages) == 0 + + +def test_process_empty_text_with_parser(): + """Empty text with parser produces no output items.""" + parser = _make_parser(_NoOpParser) + parser.process(_make_output(text="")) + + assert len(parser.response_messages) == 0 + + +# --------------------------------------------------------------------------- +# Tests: reasoning extraction +# --------------------------------------------------------------------------- + + +def test_process_extracts_reasoning(): + """Parser that finds reasoning produces both reasoning and message items.""" + parser = _make_parser(_ReasoningOnlyParser) + parser.process(_make_output(text="Let me checkThe answer is 42")) + + types = [m.type for m in parser.response_messages] + assert "reasoning" in types + assert "message" in types + + reasoning_item = next(m for m in parser.response_messages if m.type == "reasoning") + assert reasoning_item.content[0].text == "Let me check" + + message_item = next(m for m in parser.response_messages if m.type == "message") + assert message_item.content[0].text == "The answer is 42" + + +def test_process_reasoning_only_no_content(): + """When reasoning consumes all text, only a reasoning item is produced.""" + parser = _make_parser(_ReasoningOnlyParser) + parser.process(_make_output(text="Just thinking")) + + types = [m.type for m in parser.response_messages] + assert "reasoning" in types + assert "message" not in types + + +# --------------------------------------------------------------------------- +# Tests: tool call extraction +# --------------------------------------------------------------------------- + + +def test_process_extracts_tool_calls(): + """Parser that finds tool calls produces function_call items.""" + request = _make_request( + tool_choice="auto", + tools=[ + { + "type": "function", + "name": "get_weather", + "parameters": {"type": "object", "properties": {}}, + } + ], + ) + parser = _make_parser(_ToolCallingParser, request=request, enable_auto_tools=True) + parser.process(_make_output(text="calling tool")) + + types = [m.type for m in parser.response_messages] + assert "function_call" in types + + tool_item = next(m for m in parser.response_messages if m.type == "function_call") + assert tool_item.name == "get_weather" + assert tool_item.arguments == '{"location": "Paris"}' + assert tool_item.status == "completed" + + +# --------------------------------------------------------------------------- +# Tests: finish_reason tracking +# --------------------------------------------------------------------------- + + +def test_finish_reason_tracked(): + """finish_reason from CompletionOutput is stored on the parser.""" + parser = _make_parser(_NoOpParser) + assert parser.finish_reason is None + + parser.process(_make_output(finish_reason="stop")) + assert parser.finish_reason == "stop" + + parser.process(_make_output(finish_reason="length")) + assert parser.finish_reason == "length" + + +# --------------------------------------------------------------------------- +# Tests: multi-turn accumulation +# --------------------------------------------------------------------------- + + +def test_multi_turn_accumulation(): + """Multiple process() calls accumulate response_messages.""" + parser = _make_parser(_NoOpParser) + + parser.process(_make_output(text="First turn")) + parser.process(_make_output(text="Second turn")) + + assert len(parser.response_messages) == 2 + texts = [m.content[0].text for m in parser.response_messages] + assert texts == ["First turn", "Second turn"] + + +def test_num_init_messages_offset(): + """Initial messages are preserved and offset works correctly.""" + init_messages = [MagicMock(type="message")] + parser = _make_parser(_NoOpParser, response_messages=init_messages) + + assert parser.num_init_messages == 1 + + parser.process(_make_output(text="New output")) + + assert len(parser.response_messages) == 2 + items = parser.make_response_output_items_from_parsable_context() + assert len(items) == 1 + assert items[0].type == "message" + + +# --------------------------------------------------------------------------- +# Tests: factory function +# --------------------------------------------------------------------------- + + +def test_factory_function_creates_parser(): + """get_responses_parser_for_simple_context returns a working parser.""" + rp = get_responses_parser_for_simple_context( + tokenizer=MagicMock(), + parser_cls=_NoOpParser, + response_messages=[], + request=_make_request(), + chat_template=None, + chat_template_content_format="auto", + ) + assert isinstance(rp, ResponsesParser) + + rp.process(_make_output(text="Works!")) + assert len(rp.response_messages) == 1 + + +def test_factory_function_none_parser(): + """Factory function works with parser_cls=None.""" + rp = get_responses_parser_for_simple_context( + tokenizer=MagicMock(), + parser_cls=None, + response_messages=[], + request=_make_request(), + chat_template=None, + chat_template_content_format="auto", + ) + assert isinstance(rp, ResponsesParser) + assert rp.parser_instance is None diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py index 1868a31ca28..809b601fd21 100644 --- a/vllm/entrypoints/openai/parser/responses_parser.py +++ b/vllm/entrypoints/openai/parser/responses_parser.py @@ -10,10 +10,6 @@ from openai.types.responses.response_function_tool_call_output_item import ( from openai.types.responses.response_output_item import McpCall from openai.types.responses.response_output_message import ResponseOutputMessage from openai.types.responses.response_output_text import ResponseOutputText -from openai.types.responses.response_reasoning_item import ( - Content, - ResponseReasoningItem, -) from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption from vllm.entrypoints.constants import MCP_PREFIX @@ -22,9 +18,8 @@ from vllm.entrypoints.openai.responses.protocol import ( ResponsesRequest, ) from vllm.outputs import CompletionOutput -from vllm.reasoning.abs_reasoning_parsers import ReasoningParser +from vllm.parser.abstract_parser import Parser from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers.abstract_tool_parser import ToolParser from vllm.utils import random_uuid logger = logging.getLogger(__name__) @@ -37,12 +32,13 @@ class ResponsesParser: self, *, tokenizer: TokenizerLike, - reasoning_parser_cls: type[ReasoningParser], + parser_cls: type[Parser] | None, response_messages: list[ResponseInputOutputItem], request: ResponsesRequest, - tool_parser_cls: type[ToolParser] | None, chat_template: str | None, chat_template_content_format: ChatTemplateContentFormatOption, + enable_auto_tools: bool = False, + tool_call_id_type: str = "random", ): self.response_messages: list[ResponseInputOutputItem] = ( # TODO: initial messages may not be properly typed @@ -52,17 +48,22 @@ class ResponsesParser: self.tokenizer = tokenizer self.request = request - self.reasoning_parser_instance = reasoning_parser_cls( - tokenizer, - chat_template_kwargs=_effective_chat_template_kwargs( + self.parser_instance: Parser | None = None + if parser_cls is not None: + chat_template_kwargs = _effective_chat_template_kwargs( request, chat_template=chat_template, chat_template_content_format=chat_template_content_format, - ), - ) - self.tool_parser_instance = None - if tool_parser_cls is not None: - self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools) + ) + + self.parser_instance = parser_cls( + tokenizer, + tools=request.tools, + chat_template_kwargs=chat_template_kwargs, + ) + + self.enable_auto_tools = enable_auto_tools + self.tool_call_id_type = tool_call_id_type # Store the last finish_reason to determine response status self.finish_reason: str | None = None @@ -71,66 +72,34 @@ class ResponsesParser: # Store the finish_reason from the output self.finish_reason = output.finish_reason - reasoning, content = self.reasoning_parser_instance.extract_reasoning( - output.text, request=self.request - ) - if reasoning: - self.response_messages.append( - ResponseReasoningItem( - type="reasoning", - id=f"rs_{random_uuid()}", - summary=[], - content=[ - Content( - type="reasoning_text", - text=reasoning, - ) - ], - ) + if self.parser_instance is not None: + output_items = self.parser_instance.extract_response_outputs( + model_output=output.text, + model_output_token_ids=output.token_ids, + request=self.request, + enable_auto_tools=self.enable_auto_tools, + tool_call_id_type=self.tool_call_id_type, ) - - function_calls: list[ResponseFunctionToolCall] = [] - if self.tool_parser_instance is not None: - tool_call_info = self.tool_parser_instance.extract_tool_calls( - content if content is not None else "", - request=self.request, # type: ignore - ) - if tool_call_info is not None and tool_call_info.tools_called: - # extract_tool_calls() returns a list of tool calls. - function_calls.extend( - ResponseFunctionToolCall( - id=f"fc_{random_uuid()}", - call_id=f"call_{random_uuid()}", - type="function_call", + self.response_messages.extend(output_items) + else: + # No parser configured, treat entire output as text content + if output.text: + self.response_messages.append( + ResponseOutputMessage( + type="message", + id=f"msg_{random_uuid()}", status="completed", - name=tool_call.function.name, - arguments=tool_call.function.arguments, + role="assistant", + content=[ + ResponseOutputText( + annotations=[], # TODO + type="output_text", + text=output.text, + logprobs=None, # TODO + ) + ], ) - for tool_call in tool_call_info.tool_calls ) - content = tool_call_info.content - if content and content.strip() == "": - content = None - - if content: - self.response_messages.append( - ResponseOutputMessage( - type="message", - id=f"msg_{random_uuid()}", - status="completed", - role="assistant", - content=[ - ResponseOutputText( - annotations=[], # TODO - type="output_text", - text=content, - logprobs=None, # TODO - ) - ], - ) - ) - if len(function_calls) > 0: - self.response_messages.extend(function_calls) return self @@ -169,27 +138,29 @@ class ResponsesParser: def get_responses_parser_for_simple_context( *, tokenizer: TokenizerLike, - reasoning_parser_cls: type[ReasoningParser], + parser_cls: type[Parser] | None, response_messages: list[ResponseInputOutputItem], request: ResponsesRequest, - tool_parser_cls, chat_template: str | None, chat_template_content_format: ChatTemplateContentFormatOption, + enable_auto_tools: bool = False, + tool_call_id_type: str = "random", ) -> ResponsesParser: """Factory function to create a ResponsesParser with - optional reasoning parser. + optional unified parser. Returns: ResponsesParser instance configured with the provided parser """ return ResponsesParser( tokenizer=tokenizer, - reasoning_parser_cls=reasoning_parser_cls, + parser_cls=parser_cls, response_messages=response_messages, request=request, - tool_parser_cls=tool_parser_cls, chat_template=chat_template, chat_template_content_format=chat_template_content_format, + enable_auto_tools=enable_auto_tools, + tool_call_id_type=tool_call_id_type, ) diff --git a/vllm/entrypoints/openai/responses/context.py b/vllm/entrypoints/openai/responses/context.py index 644dc8cfaaa..62de02ef826 100644 --- a/vllm/entrypoints/openai/responses/context.py +++ b/vllm/entrypoints/openai/responses/context.py @@ -41,9 +41,8 @@ from vllm.entrypoints.openai.responses.protocol import ( ) from vllm.entrypoints.openai.responses.utils import construct_tool_dicts from vllm.outputs import RequestOutput -from vllm.reasoning.abs_reasoning_parsers import ReasoningParser +from vllm.parser.abstract_parser import Parser from vllm.tokenizers import TokenizerLike -from vllm.tool_parsers.abstract_tool_parser import ToolParser from vllm.utils import random_uuid if TYPE_CHECKING: @@ -272,12 +271,13 @@ class ParsableContext(ConversationContext): *, response_messages: list[ResponseInputOutputItem], tokenizer: TokenizerLike, - reasoning_parser_cls: type[ReasoningParser] | None, + parser_cls: type[Parser] | None, request: ResponsesRequest, available_tools: list[str] | None, - tool_parser_cls: type[ToolParser] | None, chat_template: str | None, chat_template_content_format: ChatTemplateContentFormatOption, + enable_auto_tools: bool = False, + tool_call_id_type: str = "random", ): self.num_prompt_tokens = 0 self.num_output_tokens = 0 @@ -286,19 +286,17 @@ class ParsableContext(ConversationContext): # not implemented yet for ParsableContext self.all_turn_metrics: list[TurnMetrics] = [] - if reasoning_parser_cls is None: - raise ValueError("reasoning_parser_cls must be provided.") - self.parser = get_responses_parser_for_simple_context( tokenizer=tokenizer, - reasoning_parser_cls=reasoning_parser_cls, + parser_cls=parser_cls, response_messages=response_messages, request=request, - tool_parser_cls=tool_parser_cls, chat_template=chat_template, chat_template_content_format=chat_template_content_format, + enable_auto_tools=enable_auto_tools, + tool_call_id_type=tool_call_id_type, ) - self.tool_parser_cls = tool_parser_cls + self.parser_cls = parser_cls self.request = request self.available_tools = available_tools or [] diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 7da04b3994d..bb700cd7dd6 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -460,16 +460,13 @@ class OpenAIServingResponses(OpenAIServing): context = ParsableContext( response_messages=messages, tokenizer=tokenizer, - reasoning_parser_cls=self.parser.reasoning_parser_cls - if self.parser - else None, + parser_cls=self.parser, request=request, - tool_parser_cls=self.parser.tool_parser_cls - if self.parser - else None, available_tools=available_tools, chat_template=self.chat_template, chat_template_content_format=self.chat_template_content_format, + enable_auto_tools=self.enable_auto_tools, + tool_call_id_type=self.tool_call_id_type, ) else: context = SimpleContext() @@ -708,7 +705,7 @@ class OpenAIServingResponses(OpenAIServing): context.request, context.parser.response_messages, context.tool_dicts, - context.tool_parser_cls, + context.parser_cls.tool_parser_cls if context.parser_cls else None, context.chat_template, context.chat_template_content_format, )