mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Parser] Migrate ResponsesParser to unified Parser interface (#42977)
Signed-off-by: Alberto Perdomo <aperdomo@redhat.com>
This commit is contained in:
@@ -0,0 +1,382 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Unit tests for ResponsesParser with the unified Parser interface.
|
||||
|
||||
These tests verify that ResponsesParser correctly delegates to the unified
|
||||
Parser (via extract_response_outputs) instead of calling separate
|
||||
ReasoningParser / ToolParser instances directly.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.openai.engine.protocol import (
|
||||
DeltaMessage,
|
||||
ExtractedToolCallInformation,
|
||||
FunctionCall,
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.parser.responses_parser import (
|
||||
ResponsesParser,
|
||||
get_responses_parser_for_simple_context,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
|
||||
from vllm.outputs import CompletionOutput
|
||||
from vllm.parser.abstract_parser import DelegatingParser
|
||||
|
||||
pytestmark = pytest.mark.skip_global_cleanup
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test parser stubs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _NoOpParser(DelegatingParser):
|
||||
"""Parser that extracts no reasoning and no tool calls."""
|
||||
|
||||
def is_reasoning_end(self, input_ids: list[int]) -> bool:
|
||||
return False
|
||||
|
||||
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
|
||||
return input_ids
|
||||
|
||||
def extract_reasoning(self, model_output, request):
|
||||
return None, model_output
|
||||
|
||||
def extract_reasoning_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def extract_tool_calls(self, model_output, request):
|
||||
return ExtractedToolCallInformation(
|
||||
tools_called=False, tool_calls=[], content=model_output
|
||||
)
|
||||
|
||||
def extract_tool_calls_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def parse_delta(self, *args, **kwargs) -> DeltaMessage | None:
|
||||
return None
|
||||
|
||||
|
||||
class _ReasoningOnlyParser(DelegatingParser):
|
||||
"""Parser that extracts reasoning but no tool calls."""
|
||||
|
||||
def is_reasoning_end(self, input_ids: list[int]) -> bool:
|
||||
return False
|
||||
|
||||
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
|
||||
return input_ids
|
||||
|
||||
def extract_reasoning(self, model_output, request):
|
||||
if "<think>" in model_output and "</think>" in model_output:
|
||||
start = model_output.index("<think>") + len("<think>")
|
||||
end = model_output.index("</think>")
|
||||
reasoning = model_output[start:end]
|
||||
content = model_output[end + len("</think>") :]
|
||||
return reasoning, content.strip() or None
|
||||
return None, model_output
|
||||
|
||||
def extract_reasoning_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def extract_tool_calls(self, model_output, request):
|
||||
return ExtractedToolCallInformation(
|
||||
tools_called=False, tool_calls=[], content=model_output
|
||||
)
|
||||
|
||||
def extract_tool_calls_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def parse_delta(self, *args, **kwargs) -> DeltaMessage | None:
|
||||
return None
|
||||
|
||||
|
||||
class _StubToolParser:
|
||||
"""Minimal tool parser stub that always returns a hardcoded tool call."""
|
||||
|
||||
supports_required_and_named = False
|
||||
|
||||
def __init__(self, tokenizer=None, tools=None):
|
||||
pass
|
||||
|
||||
def extract_tool_calls(self, model_output, request):
|
||||
return ExtractedToolCallInformation(
|
||||
tools_called=True,
|
||||
tool_calls=[
|
||||
ToolCall(
|
||||
id="call_123",
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name="get_weather",
|
||||
arguments='{"location": "Paris"}',
|
||||
),
|
||||
)
|
||||
],
|
||||
content=None,
|
||||
)
|
||||
|
||||
def extract_tool_calls_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def adjust_request(self, request):
|
||||
return request
|
||||
|
||||
|
||||
class _ToolCallingParser(DelegatingParser):
|
||||
"""Parser that extracts a hardcoded tool call from any input."""
|
||||
|
||||
def __init__(self, tokenizer, *args, **kwargs):
|
||||
super().__init__(tokenizer)
|
||||
self._tool_parser = _StubToolParser()
|
||||
|
||||
def is_reasoning_end(self, input_ids: list[int]) -> bool:
|
||||
return False
|
||||
|
||||
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
|
||||
return input_ids
|
||||
|
||||
def extract_reasoning(self, model_output, request):
|
||||
return None, model_output
|
||||
|
||||
def extract_reasoning_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def extract_tool_calls_streaming(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def parse_delta(self, *args, **kwargs) -> DeltaMessage | None:
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_request(**overrides) -> ResponsesRequest:
|
||||
defaults = {"model": "test-model", "input": "test"}
|
||||
defaults.update(overrides)
|
||||
return ResponsesRequest.model_validate(defaults)
|
||||
|
||||
|
||||
def _make_output(
|
||||
text: str = "Hello, world!",
|
||||
token_ids: Sequence[int] = (1, 2, 3),
|
||||
finish_reason: str = "stop",
|
||||
) -> CompletionOutput:
|
||||
return CompletionOutput(
|
||||
index=0,
|
||||
text=text,
|
||||
token_ids=list(token_ids),
|
||||
cumulative_logprob=None,
|
||||
logprobs=None,
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
|
||||
|
||||
def _make_parser(parser_cls, **overrides):
|
||||
defaults = dict(
|
||||
tokenizer=MagicMock(),
|
||||
parser_cls=parser_cls,
|
||||
response_messages=[],
|
||||
request=_make_request(),
|
||||
chat_template=None,
|
||||
chat_template_content_format="auto",
|
||||
)
|
||||
defaults.update(overrides)
|
||||
return ResponsesParser(**defaults)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: basic text passthrough
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_process_text_with_parser():
|
||||
"""Parser with no reasoning/tools returns a single message item."""
|
||||
parser = _make_parser(_NoOpParser)
|
||||
parser.process(_make_output(text="Hello!"))
|
||||
|
||||
assert len(parser.response_messages) == 1
|
||||
msg = parser.response_messages[0]
|
||||
assert msg.type == "message"
|
||||
assert msg.content[0].text == "Hello!"
|
||||
|
||||
|
||||
def test_process_text_without_parser():
|
||||
"""parser_cls=None falls back to plain text wrapping."""
|
||||
parser = _make_parser(None)
|
||||
parser.process(_make_output(text="Hello!"))
|
||||
|
||||
assert len(parser.response_messages) == 1
|
||||
msg = parser.response_messages[0]
|
||||
assert msg.type == "message"
|
||||
assert msg.content[0].text == "Hello!"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: empty / whitespace output
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_process_empty_text_without_parser():
|
||||
"""Empty text with no parser produces no output items."""
|
||||
parser = _make_parser(None)
|
||||
parser.process(_make_output(text=""))
|
||||
|
||||
assert len(parser.response_messages) == 0
|
||||
|
||||
|
||||
def test_process_empty_text_with_parser():
|
||||
"""Empty text with parser produces no output items."""
|
||||
parser = _make_parser(_NoOpParser)
|
||||
parser.process(_make_output(text=""))
|
||||
|
||||
assert len(parser.response_messages) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: reasoning extraction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_process_extracts_reasoning():
|
||||
"""Parser that finds reasoning produces both reasoning and message items."""
|
||||
parser = _make_parser(_ReasoningOnlyParser)
|
||||
parser.process(_make_output(text="<think>Let me check</think>The answer is 42"))
|
||||
|
||||
types = [m.type for m in parser.response_messages]
|
||||
assert "reasoning" in types
|
||||
assert "message" in types
|
||||
|
||||
reasoning_item = next(m for m in parser.response_messages if m.type == "reasoning")
|
||||
assert reasoning_item.content[0].text == "Let me check"
|
||||
|
||||
message_item = next(m for m in parser.response_messages if m.type == "message")
|
||||
assert message_item.content[0].text == "The answer is 42"
|
||||
|
||||
|
||||
def test_process_reasoning_only_no_content():
|
||||
"""When reasoning consumes all text, only a reasoning item is produced."""
|
||||
parser = _make_parser(_ReasoningOnlyParser)
|
||||
parser.process(_make_output(text="<think>Just thinking</think>"))
|
||||
|
||||
types = [m.type for m in parser.response_messages]
|
||||
assert "reasoning" in types
|
||||
assert "message" not in types
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: tool call extraction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_process_extracts_tool_calls():
|
||||
"""Parser that finds tool calls produces function_call items."""
|
||||
request = _make_request(
|
||||
tool_choice="auto",
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
}
|
||||
],
|
||||
)
|
||||
parser = _make_parser(_ToolCallingParser, request=request, enable_auto_tools=True)
|
||||
parser.process(_make_output(text="calling tool"))
|
||||
|
||||
types = [m.type for m in parser.response_messages]
|
||||
assert "function_call" in types
|
||||
|
||||
tool_item = next(m for m in parser.response_messages if m.type == "function_call")
|
||||
assert tool_item.name == "get_weather"
|
||||
assert tool_item.arguments == '{"location": "Paris"}'
|
||||
assert tool_item.status == "completed"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: finish_reason tracking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_finish_reason_tracked():
|
||||
"""finish_reason from CompletionOutput is stored on the parser."""
|
||||
parser = _make_parser(_NoOpParser)
|
||||
assert parser.finish_reason is None
|
||||
|
||||
parser.process(_make_output(finish_reason="stop"))
|
||||
assert parser.finish_reason == "stop"
|
||||
|
||||
parser.process(_make_output(finish_reason="length"))
|
||||
assert parser.finish_reason == "length"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: multi-turn accumulation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_multi_turn_accumulation():
|
||||
"""Multiple process() calls accumulate response_messages."""
|
||||
parser = _make_parser(_NoOpParser)
|
||||
|
||||
parser.process(_make_output(text="First turn"))
|
||||
parser.process(_make_output(text="Second turn"))
|
||||
|
||||
assert len(parser.response_messages) == 2
|
||||
texts = [m.content[0].text for m in parser.response_messages]
|
||||
assert texts == ["First turn", "Second turn"]
|
||||
|
||||
|
||||
def test_num_init_messages_offset():
|
||||
"""Initial messages are preserved and offset works correctly."""
|
||||
init_messages = [MagicMock(type="message")]
|
||||
parser = _make_parser(_NoOpParser, response_messages=init_messages)
|
||||
|
||||
assert parser.num_init_messages == 1
|
||||
|
||||
parser.process(_make_output(text="New output"))
|
||||
|
||||
assert len(parser.response_messages) == 2
|
||||
items = parser.make_response_output_items_from_parsable_context()
|
||||
assert len(items) == 1
|
||||
assert items[0].type == "message"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: factory function
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_factory_function_creates_parser():
|
||||
"""get_responses_parser_for_simple_context returns a working parser."""
|
||||
rp = get_responses_parser_for_simple_context(
|
||||
tokenizer=MagicMock(),
|
||||
parser_cls=_NoOpParser,
|
||||
response_messages=[],
|
||||
request=_make_request(),
|
||||
chat_template=None,
|
||||
chat_template_content_format="auto",
|
||||
)
|
||||
assert isinstance(rp, ResponsesParser)
|
||||
|
||||
rp.process(_make_output(text="Works!"))
|
||||
assert len(rp.response_messages) == 1
|
||||
|
||||
|
||||
def test_factory_function_none_parser():
|
||||
"""Factory function works with parser_cls=None."""
|
||||
rp = get_responses_parser_for_simple_context(
|
||||
tokenizer=MagicMock(),
|
||||
parser_cls=None,
|
||||
response_messages=[],
|
||||
request=_make_request(),
|
||||
chat_template=None,
|
||||
chat_template_content_format="auto",
|
||||
)
|
||||
assert isinstance(rp, ResponsesParser)
|
||||
assert rp.parser_instance is None
|
||||
@@ -10,10 +10,6 @@ from openai.types.responses.response_function_tool_call_output_item import (
|
||||
from openai.types.responses.response_output_item import McpCall
|
||||
from openai.types.responses.response_output_message import ResponseOutputMessage
|
||||
from openai.types.responses.response_output_text import ResponseOutputText
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
Content,
|
||||
ResponseReasoningItem,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
@@ -22,9 +18,8 @@ from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponsesRequest,
|
||||
)
|
||||
from vllm.outputs import CompletionOutput
|
||||
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
||||
from vllm.parser.abstract_parser import Parser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers.abstract_tool_parser import ToolParser
|
||||
from vllm.utils import random_uuid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -37,12 +32,13 @@ class ResponsesParser:
|
||||
self,
|
||||
*,
|
||||
tokenizer: TokenizerLike,
|
||||
reasoning_parser_cls: type[ReasoningParser],
|
||||
parser_cls: type[Parser] | None,
|
||||
response_messages: list[ResponseInputOutputItem],
|
||||
request: ResponsesRequest,
|
||||
tool_parser_cls: type[ToolParser] | None,
|
||||
chat_template: str | None,
|
||||
chat_template_content_format: ChatTemplateContentFormatOption,
|
||||
enable_auto_tools: bool = False,
|
||||
tool_call_id_type: str = "random",
|
||||
):
|
||||
self.response_messages: list[ResponseInputOutputItem] = (
|
||||
# TODO: initial messages may not be properly typed
|
||||
@@ -52,17 +48,22 @@ class ResponsesParser:
|
||||
self.tokenizer = tokenizer
|
||||
self.request = request
|
||||
|
||||
self.reasoning_parser_instance = reasoning_parser_cls(
|
||||
tokenizer,
|
||||
chat_template_kwargs=_effective_chat_template_kwargs(
|
||||
self.parser_instance: Parser | None = None
|
||||
if parser_cls is not None:
|
||||
chat_template_kwargs = _effective_chat_template_kwargs(
|
||||
request,
|
||||
chat_template=chat_template,
|
||||
chat_template_content_format=chat_template_content_format,
|
||||
),
|
||||
)
|
||||
self.tool_parser_instance = None
|
||||
if tool_parser_cls is not None:
|
||||
self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools)
|
||||
)
|
||||
|
||||
self.parser_instance = parser_cls(
|
||||
tokenizer,
|
||||
tools=request.tools,
|
||||
chat_template_kwargs=chat_template_kwargs,
|
||||
)
|
||||
|
||||
self.enable_auto_tools = enable_auto_tools
|
||||
self.tool_call_id_type = tool_call_id_type
|
||||
|
||||
# Store the last finish_reason to determine response status
|
||||
self.finish_reason: str | None = None
|
||||
@@ -71,66 +72,34 @@ class ResponsesParser:
|
||||
# Store the finish_reason from the output
|
||||
self.finish_reason = output.finish_reason
|
||||
|
||||
reasoning, content = self.reasoning_parser_instance.extract_reasoning(
|
||||
output.text, request=self.request
|
||||
)
|
||||
if reasoning:
|
||||
self.response_messages.append(
|
||||
ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
id=f"rs_{random_uuid()}",
|
||||
summary=[],
|
||||
content=[
|
||||
Content(
|
||||
type="reasoning_text",
|
||||
text=reasoning,
|
||||
)
|
||||
],
|
||||
)
|
||||
if self.parser_instance is not None:
|
||||
output_items = self.parser_instance.extract_response_outputs(
|
||||
model_output=output.text,
|
||||
model_output_token_ids=output.token_ids,
|
||||
request=self.request,
|
||||
enable_auto_tools=self.enable_auto_tools,
|
||||
tool_call_id_type=self.tool_call_id_type,
|
||||
)
|
||||
|
||||
function_calls: list[ResponseFunctionToolCall] = []
|
||||
if self.tool_parser_instance is not None:
|
||||
tool_call_info = self.tool_parser_instance.extract_tool_calls(
|
||||
content if content is not None else "",
|
||||
request=self.request, # type: ignore
|
||||
)
|
||||
if tool_call_info is not None and tool_call_info.tools_called:
|
||||
# extract_tool_calls() returns a list of tool calls.
|
||||
function_calls.extend(
|
||||
ResponseFunctionToolCall(
|
||||
id=f"fc_{random_uuid()}",
|
||||
call_id=f"call_{random_uuid()}",
|
||||
type="function_call",
|
||||
self.response_messages.extend(output_items)
|
||||
else:
|
||||
# No parser configured, treat entire output as text content
|
||||
if output.text:
|
||||
self.response_messages.append(
|
||||
ResponseOutputMessage(
|
||||
type="message",
|
||||
id=f"msg_{random_uuid()}",
|
||||
status="completed",
|
||||
name=tool_call.function.name,
|
||||
arguments=tool_call.function.arguments,
|
||||
role="assistant",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[], # TODO
|
||||
type="output_text",
|
||||
text=output.text,
|
||||
logprobs=None, # TODO
|
||||
)
|
||||
],
|
||||
)
|
||||
for tool_call in tool_call_info.tool_calls
|
||||
)
|
||||
content = tool_call_info.content
|
||||
if content and content.strip() == "":
|
||||
content = None
|
||||
|
||||
if content:
|
||||
self.response_messages.append(
|
||||
ResponseOutputMessage(
|
||||
type="message",
|
||||
id=f"msg_{random_uuid()}",
|
||||
status="completed",
|
||||
role="assistant",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[], # TODO
|
||||
type="output_text",
|
||||
text=content,
|
||||
logprobs=None, # TODO
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
if len(function_calls) > 0:
|
||||
self.response_messages.extend(function_calls)
|
||||
|
||||
return self
|
||||
|
||||
@@ -169,27 +138,29 @@ class ResponsesParser:
|
||||
def get_responses_parser_for_simple_context(
|
||||
*,
|
||||
tokenizer: TokenizerLike,
|
||||
reasoning_parser_cls: type[ReasoningParser],
|
||||
parser_cls: type[Parser] | None,
|
||||
response_messages: list[ResponseInputOutputItem],
|
||||
request: ResponsesRequest,
|
||||
tool_parser_cls,
|
||||
chat_template: str | None,
|
||||
chat_template_content_format: ChatTemplateContentFormatOption,
|
||||
enable_auto_tools: bool = False,
|
||||
tool_call_id_type: str = "random",
|
||||
) -> ResponsesParser:
|
||||
"""Factory function to create a ResponsesParser with
|
||||
optional reasoning parser.
|
||||
optional unified parser.
|
||||
|
||||
Returns:
|
||||
ResponsesParser instance configured with the provided parser
|
||||
"""
|
||||
return ResponsesParser(
|
||||
tokenizer=tokenizer,
|
||||
reasoning_parser_cls=reasoning_parser_cls,
|
||||
parser_cls=parser_cls,
|
||||
response_messages=response_messages,
|
||||
request=request,
|
||||
tool_parser_cls=tool_parser_cls,
|
||||
chat_template=chat_template,
|
||||
chat_template_content_format=chat_template_content_format,
|
||||
enable_auto_tools=enable_auto_tools,
|
||||
tool_call_id_type=tool_call_id_type,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -41,9 +41,8 @@ from vllm.entrypoints.openai.responses.protocol import (
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.utils import construct_tool_dicts
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
||||
from vllm.parser.abstract_parser import Parser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers.abstract_tool_parser import ToolParser
|
||||
from vllm.utils import random_uuid
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -272,12 +271,13 @@ class ParsableContext(ConversationContext):
|
||||
*,
|
||||
response_messages: list[ResponseInputOutputItem],
|
||||
tokenizer: TokenizerLike,
|
||||
reasoning_parser_cls: type[ReasoningParser] | None,
|
||||
parser_cls: type[Parser] | None,
|
||||
request: ResponsesRequest,
|
||||
available_tools: list[str] | None,
|
||||
tool_parser_cls: type[ToolParser] | None,
|
||||
chat_template: str | None,
|
||||
chat_template_content_format: ChatTemplateContentFormatOption,
|
||||
enable_auto_tools: bool = False,
|
||||
tool_call_id_type: str = "random",
|
||||
):
|
||||
self.num_prompt_tokens = 0
|
||||
self.num_output_tokens = 0
|
||||
@@ -286,19 +286,17 @@ class ParsableContext(ConversationContext):
|
||||
# not implemented yet for ParsableContext
|
||||
self.all_turn_metrics: list[TurnMetrics] = []
|
||||
|
||||
if reasoning_parser_cls is None:
|
||||
raise ValueError("reasoning_parser_cls must be provided.")
|
||||
|
||||
self.parser = get_responses_parser_for_simple_context(
|
||||
tokenizer=tokenizer,
|
||||
reasoning_parser_cls=reasoning_parser_cls,
|
||||
parser_cls=parser_cls,
|
||||
response_messages=response_messages,
|
||||
request=request,
|
||||
tool_parser_cls=tool_parser_cls,
|
||||
chat_template=chat_template,
|
||||
chat_template_content_format=chat_template_content_format,
|
||||
enable_auto_tools=enable_auto_tools,
|
||||
tool_call_id_type=tool_call_id_type,
|
||||
)
|
||||
self.tool_parser_cls = tool_parser_cls
|
||||
self.parser_cls = parser_cls
|
||||
self.request = request
|
||||
|
||||
self.available_tools = available_tools or []
|
||||
|
||||
@@ -460,16 +460,13 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
context = ParsableContext(
|
||||
response_messages=messages,
|
||||
tokenizer=tokenizer,
|
||||
reasoning_parser_cls=self.parser.reasoning_parser_cls
|
||||
if self.parser
|
||||
else None,
|
||||
parser_cls=self.parser,
|
||||
request=request,
|
||||
tool_parser_cls=self.parser.tool_parser_cls
|
||||
if self.parser
|
||||
else None,
|
||||
available_tools=available_tools,
|
||||
chat_template=self.chat_template,
|
||||
chat_template_content_format=self.chat_template_content_format,
|
||||
enable_auto_tools=self.enable_auto_tools,
|
||||
tool_call_id_type=self.tool_call_id_type,
|
||||
)
|
||||
else:
|
||||
context = SimpleContext()
|
||||
@@ -708,7 +705,7 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
context.request,
|
||||
context.parser.response_messages,
|
||||
context.tool_dicts,
|
||||
context.tool_parser_cls,
|
||||
context.parser_cls.tool_parser_cls if context.parser_cls else None,
|
||||
context.chat_template,
|
||||
context.chat_template_content_format,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user