From e30313220c43fcaa74cab666092844eceb8e39d7 Mon Sep 17 00:00:00 2001
From: alberto <aperdomo@redhat.com>
Date: Tue, 2 Jun 2026 09:50:05 +0100
Subject: [PATCH] [Parser] Migrate `ResponsesParser` to unified `Parser`
 interface (#42977)

Signed-off-by: Alberto Perdomo <aperdomo@redhat.com>
---
 .../openai/test_responses_parser_unified.py   | 382 ++++++++++++++++++
 .../openai/parser/responses_parser.py         | 125 +++---
 vllm/entrypoints/openai/responses/context.py  |  18 +-
 vllm/entrypoints/openai/responses/serving.py  |  11 +-
 4 files changed, 442 insertions(+), 94 deletions(-)
 create mode 100644 tests/entrypoints/openai/test_responses_parser_unified.py
diff --git a/tests/entrypoints/openai/test_responses_parser_unified.py b/tests/entrypoints/openai/test_responses_parser_unified.py
new file mode 100644
index 00000000000..ecc857e1aac
--- /dev/null
+++ b/tests/entrypoints/openai/test_responses_parser_unified.py
@@ -0,0 +1,382 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for ResponsesParser with the unified Parser interface.
+
+These tests verify that ResponsesParser correctly delegates to the unified
+Parser (via extract_response_outputs) instead of calling separate
+ReasoningParser / ToolParser instances directly.
+"""
+
+from collections.abc import Sequence
+from unittest.mock import MagicMock
+
+import pytest
+
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaMessage,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.entrypoints.openai.parser.responses_parser import (
+    ResponsesParser,
+    get_responses_parser_for_simple_context,
+)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.outputs import CompletionOutput
+from vllm.parser.abstract_parser import DelegatingParser
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+# ---------------------------------------------------------------------------
+# Test parser stubs
+# ---------------------------------------------------------------------------
+
+
+class _NoOpParser(DelegatingParser):
+    """Parser that extracts no reasoning and no tool calls."""
+
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        return input_ids
+
+    def extract_reasoning(self, model_output, request):
+        return None, model_output
+
+    def extract_reasoning_streaming(self, *args, **kwargs):
+        return None
+
+    def extract_tool_calls(self, model_output, request):
+        return ExtractedToolCallInformation(
+            tools_called=False, tool_calls=[], content=model_output
+        )
+
+    def extract_tool_calls_streaming(self, *args, **kwargs):
+        return None
+
+    def parse_delta(self, *args, **kwargs) -> DeltaMessage | None:
+        return None
+
+
+class _ReasoningOnlyParser(DelegatingParser):
+    """Parser that extracts reasoning but no tool calls."""
+
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        return input_ids
+
+    def extract_reasoning(self, model_output, request):
+        if "<think>" in model_output and "</think>" in model_output:
+            start = model_output.index("<think>") + len("<think>")
+            end = model_output.index("</think>")
+            reasoning = model_output[start:end]
+            content = model_output[end + len("</think>") :]
+            return reasoning, content.strip() or None
+        return None, model_output
+
+    def extract_reasoning_streaming(self, *args, **kwargs):
+        return None
+
+    def extract_tool_calls(self, model_output, request):
+        return ExtractedToolCallInformation(
+            tools_called=False, tool_calls=[], content=model_output
+        )
+
+    def extract_tool_calls_streaming(self, *args, **kwargs):
+        return None
+
+    def parse_delta(self, *args, **kwargs) -> DeltaMessage | None:
+        return None
+
+
+class _StubToolParser:
+    """Minimal tool parser stub that always returns a hardcoded tool call."""
+
+    supports_required_and_named = False
+
+    def __init__(self, tokenizer=None, tools=None):
+        pass
+
+    def extract_tool_calls(self, model_output, request):
+        return ExtractedToolCallInformation(
+            tools_called=True,
+            tool_calls=[
+                ToolCall(
+                    id="call_123",
+                    type="function",
+                    function=FunctionCall(
+                        name="get_weather",
+                        arguments='{"location": "Paris"}',
+                    ),
+                )
+            ],
+            content=None,
+        )
+
+    def extract_tool_calls_streaming(self, *args, **kwargs):
+        return None
+
+    def adjust_request(self, request):
+        return request
+
+
+class _ToolCallingParser(DelegatingParser):
+    """Parser that extracts a hardcoded tool call from any input."""
+
+    def __init__(self, tokenizer, *args, **kwargs):
+        super().__init__(tokenizer)
+        self._tool_parser = _StubToolParser()
+
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        return input_ids
+
+    def extract_reasoning(self, model_output, request):
+        return None, model_output
+
+    def extract_reasoning_streaming(self, *args, **kwargs):
+        return None
+
+    def extract_tool_calls_streaming(self, *args, **kwargs):
+        return None
+
+    def parse_delta(self, *args, **kwargs) -> DeltaMessage | None:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_request(**overrides) -> ResponsesRequest:
+    defaults = {"model": "test-model", "input": "test"}
+    defaults.update(overrides)
+    return ResponsesRequest.model_validate(defaults)
+
+
+def _make_output(
+    text: str = "Hello, world!",
+    token_ids: Sequence[int] = (1, 2, 3),
+    finish_reason: str = "stop",
+) -> CompletionOutput:
+    return CompletionOutput(
+        index=0,
+        text=text,
+        token_ids=list(token_ids),
+        cumulative_logprob=None,
+        logprobs=None,
+        finish_reason=finish_reason,
+    )
+
+
+def _make_parser(parser_cls, **overrides):
+    defaults = dict(
+        tokenizer=MagicMock(),
+        parser_cls=parser_cls,
+        response_messages=[],
+        request=_make_request(),
+        chat_template=None,
+        chat_template_content_format="auto",
+    )
+    defaults.update(overrides)
+    return ResponsesParser(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# Tests: basic text passthrough
+# ---------------------------------------------------------------------------
+
+
+def test_process_text_with_parser():
+    """Parser with no reasoning/tools returns a single message item."""
+    parser = _make_parser(_NoOpParser)
+    parser.process(_make_output(text="Hello!"))
+
+    assert len(parser.response_messages) == 1
+    msg = parser.response_messages[0]
+    assert msg.type == "message"
+    assert msg.content[0].text == "Hello!"
+
+
+def test_process_text_without_parser():
+    """parser_cls=None falls back to plain text wrapping."""
+    parser = _make_parser(None)
+    parser.process(_make_output(text="Hello!"))
+
+    assert len(parser.response_messages) == 1
+    msg = parser.response_messages[0]
+    assert msg.type == "message"
+    assert msg.content[0].text == "Hello!"
+
+
+# ---------------------------------------------------------------------------
+# Tests: empty / whitespace output
+# ---------------------------------------------------------------------------
+
+
+def test_process_empty_text_without_parser():
+    """Empty text with no parser produces no output items."""
+    parser = _make_parser(None)
+    parser.process(_make_output(text=""))
+
+    assert len(parser.response_messages) == 0
+
+
+def test_process_empty_text_with_parser():
+    """Empty text with parser produces no output items."""
+    parser = _make_parser(_NoOpParser)
+    parser.process(_make_output(text=""))
+
+    assert len(parser.response_messages) == 0
+
+
+# ---------------------------------------------------------------------------
+# Tests: reasoning extraction
+# ---------------------------------------------------------------------------
+
+
+def test_process_extracts_reasoning():
+    """Parser that finds reasoning produces both reasoning and message items."""
+    parser = _make_parser(_ReasoningOnlyParser)
+    parser.process(_make_output(text="<think>Let me check</think>The answer is 42"))
+
+    types = [m.type for m in parser.response_messages]
+    assert "reasoning" in types
+    assert "message" in types
+
+    reasoning_item = next(m for m in parser.response_messages if m.type == "reasoning")
+    assert reasoning_item.content[0].text == "Let me check"
+
+    message_item = next(m for m in parser.response_messages if m.type == "message")
+    assert message_item.content[0].text == "The answer is 42"
+
+
+def test_process_reasoning_only_no_content():
+    """When reasoning consumes all text, only a reasoning item is produced."""
+    parser = _make_parser(_ReasoningOnlyParser)
+    parser.process(_make_output(text="<think>Just thinking</think>"))
+
+    types = [m.type for m in parser.response_messages]
+    assert "reasoning" in types
+    assert "message" not in types
+
+
+# ---------------------------------------------------------------------------
+# Tests: tool call extraction
+# ---------------------------------------------------------------------------
+
+
+def test_process_extracts_tool_calls():
+    """Parser that finds tool calls produces function_call items."""
+    request = _make_request(
+        tool_choice="auto",
+        tools=[
+            {
+                "type": "function",
+                "name": "get_weather",
+                "parameters": {"type": "object", "properties": {}},
+            }
+        ],
+    )
+    parser = _make_parser(_ToolCallingParser, request=request, enable_auto_tools=True)
+    parser.process(_make_output(text="calling tool"))
+
+    types = [m.type for m in parser.response_messages]
+    assert "function_call" in types
+
+    tool_item = next(m for m in parser.response_messages if m.type == "function_call")
+    assert tool_item.name == "get_weather"
+    assert tool_item.arguments == '{"location": "Paris"}'
+    assert tool_item.status == "completed"
+
+
+# ---------------------------------------------------------------------------
+# Tests: finish_reason tracking
+# ---------------------------------------------------------------------------
+
+
+def test_finish_reason_tracked():
+    """finish_reason from CompletionOutput is stored on the parser."""
+    parser = _make_parser(_NoOpParser)
+    assert parser.finish_reason is None
+
+    parser.process(_make_output(finish_reason="stop"))
+    assert parser.finish_reason == "stop"
+
+    parser.process(_make_output(finish_reason="length"))
+    assert parser.finish_reason == "length"
+
+
+# ---------------------------------------------------------------------------
+# Tests: multi-turn accumulation
+# ---------------------------------------------------------------------------
+
+
+def test_multi_turn_accumulation():
+    """Multiple process() calls accumulate response_messages."""
+    parser = _make_parser(_NoOpParser)
+
+    parser.process(_make_output(text="First turn"))
+    parser.process(_make_output(text="Second turn"))
+
+    assert len(parser.response_messages) == 2
+    texts = [m.content[0].text for m in parser.response_messages]
+    assert texts == ["First turn", "Second turn"]
+
+
+def test_num_init_messages_offset():
+    """Initial messages are preserved and offset works correctly."""
+    init_messages = [MagicMock(type="message")]
+    parser = _make_parser(_NoOpParser, response_messages=init_messages)
+
+    assert parser.num_init_messages == 1
+
+    parser.process(_make_output(text="New output"))
+
+    assert len(parser.response_messages) == 2
+    items = parser.make_response_output_items_from_parsable_context()
+    assert len(items) == 1
+    assert items[0].type == "message"
+
+
+# ---------------------------------------------------------------------------
+# Tests: factory function
+# ---------------------------------------------------------------------------
+
+
+def test_factory_function_creates_parser():
+    """get_responses_parser_for_simple_context returns a working parser."""
+    rp = get_responses_parser_for_simple_context(
+        tokenizer=MagicMock(),
+        parser_cls=_NoOpParser,
+        response_messages=[],
+        request=_make_request(),
+        chat_template=None,
+        chat_template_content_format="auto",
+    )
+    assert isinstance(rp, ResponsesParser)
+
+    rp.process(_make_output(text="Works!"))
+    assert len(rp.response_messages) == 1
+
+
+def test_factory_function_none_parser():
+    """Factory function works with parser_cls=None."""
+    rp = get_responses_parser_for_simple_context(
+        tokenizer=MagicMock(),
+        parser_cls=None,
+        response_messages=[],
+        request=_make_request(),
+        chat_template=None,
+        chat_template_content_format="auto",
+    )
+    assert isinstance(rp, ResponsesParser)
+    assert rp.parser_instance is None
diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py
index 1868a31ca28..809b601fd21 100644
--- a/vllm/entrypoints/openai/parser/responses_parser.py
+++ b/vllm/entrypoints/openai/parser/responses_parser.py
@@ -10,10 +10,6 @@ from openai.types.responses.response_function_tool_call_output_item import (
 from openai.types.responses.response_output_item import McpCall
 from openai.types.responses.response_output_message import ResponseOutputMessage
 from openai.types.responses.response_output_text import ResponseOutputText
-from openai.types.responses.response_reasoning_item import (
-    Content,
-    ResponseReasoningItem,
-)
 
 from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
 from vllm.entrypoints.constants import MCP_PREFIX
@@ -22,9 +18,8 @@ from vllm.entrypoints.openai.responses.protocol import (
     ResponsesRequest,
 )
 from vllm.outputs import CompletionOutput
-from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
+from vllm.parser.abstract_parser import Parser
 from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
 from vllm.utils import random_uuid
 
 logger = logging.getLogger(__name__)
@@ -37,12 +32,13 @@ class ResponsesParser:
         self,
         *,
         tokenizer: TokenizerLike,
-        reasoning_parser_cls: type[ReasoningParser],
+        parser_cls: type[Parser] | None,
         response_messages: list[ResponseInputOutputItem],
         request: ResponsesRequest,
-        tool_parser_cls: type[ToolParser] | None,
         chat_template: str | None,
         chat_template_content_format: ChatTemplateContentFormatOption,
+        enable_auto_tools: bool = False,
+        tool_call_id_type: str = "random",
     ):
         self.response_messages: list[ResponseInputOutputItem] = (
             # TODO: initial messages may not be properly typed
@@ -52,17 +48,22 @@ class ResponsesParser:
         self.tokenizer = tokenizer
         self.request = request
 
-        self.reasoning_parser_instance = reasoning_parser_cls(
-            tokenizer,
-            chat_template_kwargs=_effective_chat_template_kwargs(
+        self.parser_instance: Parser | None = None
+        if parser_cls is not None:
+            chat_template_kwargs = _effective_chat_template_kwargs(
                 request,
                 chat_template=chat_template,
                 chat_template_content_format=chat_template_content_format,
-            ),
-        )
-        self.tool_parser_instance = None
-        if tool_parser_cls is not None:
-            self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools)
+            )
+
+            self.parser_instance = parser_cls(
+                tokenizer,
+                tools=request.tools,
+                chat_template_kwargs=chat_template_kwargs,
+            )
+
+        self.enable_auto_tools = enable_auto_tools
+        self.tool_call_id_type = tool_call_id_type
 
         # Store the last finish_reason to determine response status
         self.finish_reason: str | None = None
@@ -71,66 +72,34 @@ class ResponsesParser:
         # Store the finish_reason from the output
         self.finish_reason = output.finish_reason
 
-        reasoning, content = self.reasoning_parser_instance.extract_reasoning(
-            output.text, request=self.request
-        )
-        if reasoning:
-            self.response_messages.append(
-                ResponseReasoningItem(
-                    type="reasoning",
-                    id=f"rs_{random_uuid()}",
-                    summary=[],
-                    content=[
-                        Content(
-                            type="reasoning_text",
-                            text=reasoning,
-                        )
-                    ],
-                )
+        if self.parser_instance is not None:
+            output_items = self.parser_instance.extract_response_outputs(
+                model_output=output.text,
+                model_output_token_ids=output.token_ids,
+                request=self.request,
+                enable_auto_tools=self.enable_auto_tools,
+                tool_call_id_type=self.tool_call_id_type,
             )
-
-        function_calls: list[ResponseFunctionToolCall] = []
-        if self.tool_parser_instance is not None:
-            tool_call_info = self.tool_parser_instance.extract_tool_calls(
-                content if content is not None else "",
-                request=self.request,  # type: ignore
-            )
-            if tool_call_info is not None and tool_call_info.tools_called:
-                # extract_tool_calls() returns a list of tool calls.
-                function_calls.extend(
-                    ResponseFunctionToolCall(
-                        id=f"fc_{random_uuid()}",
-                        call_id=f"call_{random_uuid()}",
-                        type="function_call",
+            self.response_messages.extend(output_items)
+        else:
+            # No parser configured, treat entire output as text content
+            if output.text:
+                self.response_messages.append(
+                    ResponseOutputMessage(
+                        type="message",
+                        id=f"msg_{random_uuid()}",
                         status="completed",
-                        name=tool_call.function.name,
-                        arguments=tool_call.function.arguments,
+                        role="assistant",
+                        content=[
+                            ResponseOutputText(
+                                annotations=[],  # TODO
+                                type="output_text",
+                                text=output.text,
+                                logprobs=None,  # TODO
+                            )
+                        ],
                     )
-                    for tool_call in tool_call_info.tool_calls
                 )
-                content = tool_call_info.content
-                if content and content.strip() == "":
-                    content = None
-
-        if content:
-            self.response_messages.append(
-                ResponseOutputMessage(
-                    type="message",
-                    id=f"msg_{random_uuid()}",
-                    status="completed",
-                    role="assistant",
-                    content=[
-                        ResponseOutputText(
-                            annotations=[],  # TODO
-                            type="output_text",
-                            text=content,
-                            logprobs=None,  # TODO
-                        )
-                    ],
-                )
-            )
-        if len(function_calls) > 0:
-            self.response_messages.extend(function_calls)
 
         return self
 
@@ -169,27 +138,29 @@ class ResponsesParser:
 def get_responses_parser_for_simple_context(
     *,
     tokenizer: TokenizerLike,
-    reasoning_parser_cls: type[ReasoningParser],
+    parser_cls: type[Parser] | None,
     response_messages: list[ResponseInputOutputItem],
     request: ResponsesRequest,
-    tool_parser_cls,
     chat_template: str | None,
     chat_template_content_format: ChatTemplateContentFormatOption,
+    enable_auto_tools: bool = False,
+    tool_call_id_type: str = "random",
 ) -> ResponsesParser:
     """Factory function to create a ResponsesParser with
-    optional reasoning parser.
+    optional unified parser.
 
     Returns:
         ResponsesParser instance configured with the provided parser
     """
     return ResponsesParser(
         tokenizer=tokenizer,
-        reasoning_parser_cls=reasoning_parser_cls,
+        parser_cls=parser_cls,
         response_messages=response_messages,
         request=request,
-        tool_parser_cls=tool_parser_cls,
         chat_template=chat_template,
         chat_template_content_format=chat_template_content_format,
+        enable_auto_tools=enable_auto_tools,
+        tool_call_id_type=tool_call_id_type,
     )
 
 
diff --git a/vllm/entrypoints/openai/responses/context.py b/vllm/entrypoints/openai/responses/context.py
index 644dc8cfaaa..62de02ef826 100644
--- a/vllm/entrypoints/openai/responses/context.py
+++ b/vllm/entrypoints/openai/responses/context.py
@@ -41,9 +41,8 @@ from vllm.entrypoints.openai.responses.protocol import (
 )
 from vllm.entrypoints.openai.responses.utils import construct_tool_dicts
 from vllm.outputs import RequestOutput
-from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
+from vllm.parser.abstract_parser import Parser
 from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
 from vllm.utils import random_uuid
 
 if TYPE_CHECKING:
@@ -272,12 +271,13 @@ class ParsableContext(ConversationContext):
         *,
         response_messages: list[ResponseInputOutputItem],
         tokenizer: TokenizerLike,
-        reasoning_parser_cls: type[ReasoningParser] | None,
+        parser_cls: type[Parser] | None,
         request: ResponsesRequest,
         available_tools: list[str] | None,
-        tool_parser_cls: type[ToolParser] | None,
         chat_template: str | None,
         chat_template_content_format: ChatTemplateContentFormatOption,
+        enable_auto_tools: bool = False,
+        tool_call_id_type: str = "random",
     ):
         self.num_prompt_tokens = 0
         self.num_output_tokens = 0
@@ -286,19 +286,17 @@ class ParsableContext(ConversationContext):
         # not implemented yet for ParsableContext
         self.all_turn_metrics: list[TurnMetrics] = []
 
-        if reasoning_parser_cls is None:
-            raise ValueError("reasoning_parser_cls must be provided.")
-
         self.parser = get_responses_parser_for_simple_context(
             tokenizer=tokenizer,
-            reasoning_parser_cls=reasoning_parser_cls,
+            parser_cls=parser_cls,
             response_messages=response_messages,
             request=request,
-            tool_parser_cls=tool_parser_cls,
             chat_template=chat_template,
             chat_template_content_format=chat_template_content_format,
+            enable_auto_tools=enable_auto_tools,
+            tool_call_id_type=tool_call_id_type,
         )
-        self.tool_parser_cls = tool_parser_cls
+        self.parser_cls = parser_cls
         self.request = request
 
         self.available_tools = available_tools or []
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 7da04b3994d..bb700cd7dd6 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -460,16 +460,13 @@ class OpenAIServingResponses(OpenAIServing):
                     context = ParsableContext(
                         response_messages=messages,
                         tokenizer=tokenizer,
-                        reasoning_parser_cls=self.parser.reasoning_parser_cls
-                        if self.parser
-                        else None,
+                        parser_cls=self.parser,
                         request=request,
-                        tool_parser_cls=self.parser.tool_parser_cls
-                        if self.parser
-                        else None,
                         available_tools=available_tools,
                         chat_template=self.chat_template,
                         chat_template_content_format=self.chat_template_content_format,
+                        enable_auto_tools=self.enable_auto_tools,
+                        tool_call_id_type=self.tool_call_id_type,
                     )
                 else:
                     context = SimpleContext()
@@ -708,7 +705,7 @@ class OpenAIServingResponses(OpenAIServing):
                     context.request,
                     context.parser.response_messages,
                     context.tool_dicts,
-                    context.tool_parser_cls,
+                    context.parser_cls.tool_parser_cls if context.parser_cls else None,
                     context.chat_template,
                     context.chat_template_content_format,
                 )