mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Bugfix] Handle real-world gpt-oss tool call output in Harmony parsing (#42454)
Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
@@ -199,12 +199,107 @@ class TestExtractHarmonyStreamingDelta:
|
||||
assert delta_message.content == delta_text
|
||||
assert tools_streamed is False
|
||||
|
||||
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
|
||||
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
|
||||
def test_new_tool_call_without_functions_prefix(
|
||||
self, mock_make_tool_call_id, channel
|
||||
):
|
||||
mock_make_tool_call_id.return_value = "call_bare123"
|
||||
parser = MockStreamableParser()
|
||||
|
||||
token_states = [TokenState(channel=channel, recipient="get_weather", text="")]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient=None,
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message is not None
|
||||
assert len(delta_message.tool_calls) == 1
|
||||
tool_call = delta_message.tool_calls[0]
|
||||
assert tool_call.id == "call_bare123"
|
||||
assert tool_call.type == "function"
|
||||
assert tool_call.function.name == "get_weather"
|
||||
assert tool_call.function.arguments == ""
|
||||
assert tool_call.index == 0
|
||||
assert tools_streamed is True
|
||||
|
||||
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
|
||||
def test_tool_call_argument_streaming_without_functions_prefix(self, channel):
|
||||
parser = MockStreamableParser()
|
||||
args_text = '{"location": "Paris"}'
|
||||
|
||||
token_states = [
|
||||
TokenState(channel=channel, recipient="get_weather", text=args_text)
|
||||
]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient="get_weather",
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message is not None
|
||||
tool_call = delta_message.tool_calls[0]
|
||||
assert tool_call.id is None
|
||||
assert tool_call.function.arguments == args_text
|
||||
assert tool_call.index == 0
|
||||
assert tools_streamed is True
|
||||
|
||||
def test_tool_call_index_from_previous_messages_without_functions_prefix(self):
|
||||
messages = [
|
||||
MockMessage(channel="commentary", recipient="tool1"),
|
||||
]
|
||||
parser = MockStreamableParser(messages=messages)
|
||||
|
||||
token_states = [
|
||||
TokenState(channel="commentary", recipient="tool2", text="args")
|
||||
]
|
||||
|
||||
delta_message, _ = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient="tool2",
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message.tool_calls[0].index == 1
|
||||
|
||||
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
|
||||
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
|
||||
def test_new_tool_call_dotted_function_name(self, mock_make_tool_call_id, channel):
|
||||
mock_make_tool_call_id.return_value = "call_dotted123"
|
||||
parser = MockStreamableParser()
|
||||
|
||||
token_states = [TokenState(channel=channel, recipient="math.sum", text="")]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient=None,
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message is not None
|
||||
assert len(delta_message.tool_calls) == 1
|
||||
tool_call = delta_message.tool_calls[0]
|
||||
assert tool_call.id == "call_dotted123"
|
||||
assert tool_call.type == "function"
|
||||
assert tool_call.function.name == "math.sum"
|
||||
assert tool_call.function.arguments == ""
|
||||
assert tool_call.index == 0
|
||||
assert tools_streamed is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"channel,recipient",
|
||||
[
|
||||
(None, None),
|
||||
("unknown_channel", None),
|
||||
("commentary", "browser.search"),
|
||||
("commentary", "assistant"),
|
||||
],
|
||||
)
|
||||
def test_returns_none_for_invalid_inputs(self, channel, recipient):
|
||||
@@ -348,3 +443,92 @@ class TestExtractHarmonyStreamingDelta:
|
||||
assert tool_c_args.function.arguments == '{"key_c": "val_c"}'
|
||||
|
||||
assert delta_message.content == "Thinking... Thinking again..."
|
||||
|
||||
|
||||
class TestToolCallsOnNonStandardChannels:
|
||||
"""Tool calls are detected by recipient, not channel.
|
||||
|
||||
Models sometimes emit tool calls on unexpected channels (e.g. ``comment``
|
||||
instead of ``commentary``). These tests verify that the streaming delta
|
||||
extraction is channel-agnostic for tool call detection.
|
||||
"""
|
||||
|
||||
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
|
||||
def test_prefixed_tool_call_on_comment_channel(self, mock_make_tool_call_id):
|
||||
mock_make_tool_call_id.return_value = "call_comment_chan"
|
||||
parser = MockStreamableParser()
|
||||
|
||||
token_states = [
|
||||
TokenState(channel="comment", recipient="functions.get_weather", text="")
|
||||
]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient=None,
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message is not None
|
||||
assert len(delta_message.tool_calls) == 1
|
||||
assert delta_message.tool_calls[0].function.name == "get_weather"
|
||||
assert tools_streamed is True
|
||||
|
||||
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
|
||||
def test_bare_tool_call_on_comment_channel(self, mock_make_tool_call_id):
|
||||
mock_make_tool_call_id.return_value = "call_bare_comment"
|
||||
parser = MockStreamableParser()
|
||||
|
||||
token_states = [TokenState(channel="comment", recipient="get_weather", text="")]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient=None,
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message is not None
|
||||
assert len(delta_message.tool_calls) == 1
|
||||
assert delta_message.tool_calls[0].function.name == "get_weather"
|
||||
assert tools_streamed is True
|
||||
|
||||
def test_tool_call_arguments_on_comment_channel(self):
|
||||
parser = MockStreamableParser()
|
||||
args_text = '{"location": "Paris"}'
|
||||
|
||||
token_states = [
|
||||
TokenState(
|
||||
channel="comment", recipient="functions.get_weather", text=args_text
|
||||
)
|
||||
]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient="functions.get_weather",
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message is not None
|
||||
assert delta_message.tool_calls[0].function.arguments == args_text
|
||||
assert tools_streamed is True
|
||||
|
||||
def test_base_index_counts_tool_calls_on_comment_channel(self):
|
||||
messages = [
|
||||
MockMessage(channel="comment", recipient="functions.tool1"),
|
||||
]
|
||||
parser = MockStreamableParser(messages=messages)
|
||||
|
||||
token_states = [
|
||||
TokenState(channel="commentary", recipient="functions.tool2", text="args")
|
||||
]
|
||||
|
||||
delta_message, _ = extract_harmony_streaming_delta(
|
||||
harmony_parser=parser,
|
||||
token_states=token_states,
|
||||
prev_recipient="functions.tool2",
|
||||
include_reasoning=False,
|
||||
)
|
||||
|
||||
assert delta_message.tool_calls[0].index == 1
|
||||
|
||||
@@ -7,9 +7,11 @@ from openai_harmony import Message, Role
|
||||
from tests.entrypoints.openai.utils import verify_harmony_messages
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
auto_drop_analysis_messages,
|
||||
extract_function_from_recipient,
|
||||
get_encoding,
|
||||
get_system_message,
|
||||
has_custom_tools,
|
||||
is_function_recipient,
|
||||
parse_chat_input_to_harmony_message,
|
||||
parse_chat_output,
|
||||
)
|
||||
@@ -19,6 +21,182 @@ from vllm.entrypoints.openai.responses.harmony import (
|
||||
)
|
||||
|
||||
|
||||
class TestIsFunctionRecipient:
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"functions.get_weather",
|
||||
"functions.search_web",
|
||||
"functions.math.sum",
|
||||
],
|
||||
)
|
||||
def test_functions_prefix_accepted(self, recipient):
|
||||
assert is_function_recipient(recipient) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"get_weather",
|
||||
"search_web",
|
||||
"calculator",
|
||||
"my-tool",
|
||||
],
|
||||
)
|
||||
def test_bare_function_name_accepted(self, recipient):
|
||||
assert is_function_recipient(recipient) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"assistant",
|
||||
],
|
||||
)
|
||||
def test_assistant_rejected(self, recipient):
|
||||
assert is_function_recipient(recipient) is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"math.sum",
|
||||
"code.run",
|
||||
"namespace.tool_name",
|
||||
"my.deeply.nested.tool",
|
||||
],
|
||||
)
|
||||
def test_dotted_function_names_accepted(self, recipient):
|
||||
assert is_function_recipient(recipient) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"python",
|
||||
"browser",
|
||||
"container",
|
||||
],
|
||||
)
|
||||
def test_builtin_tool_names_rejected(self, recipient):
|
||||
assert is_function_recipient(recipient) is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"python.run",
|
||||
"python.execute",
|
||||
"browser.search",
|
||||
"browser.open",
|
||||
"container.exec",
|
||||
],
|
||||
)
|
||||
def test_builtin_dotted_variants_rejected(self, recipient):
|
||||
assert is_function_recipient(recipient) is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"",
|
||||
"functions.",
|
||||
],
|
||||
)
|
||||
def test_empty_recipients_rejected(self, recipient):
|
||||
assert is_function_recipient(recipient) is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"<|start|>",
|
||||
"<|end|>",
|
||||
"<|channel|>",
|
||||
],
|
||||
)
|
||||
def test_harmony_tokens_rejected(self, recipient):
|
||||
assert is_function_recipient(recipient) is False
|
||||
|
||||
|
||||
class TestIsFunctionRecipientWithAllowedNames:
|
||||
"""Tests for is_function_recipient with allowed_function_tool_names."""
|
||||
|
||||
def test_prefixed_always_accepted(self):
|
||||
"""functions. prefix is always accepted regardless of allowed names."""
|
||||
fn_names = frozenset({"other_tool"})
|
||||
assert is_function_recipient("functions.get_weather", fn_names) is True
|
||||
|
||||
def test_bare_name_accepted_when_in_allowed_names(self):
|
||||
fn_names = frozenset({"get_weather", "search_web"})
|
||||
assert is_function_recipient("get_weather", fn_names) is True
|
||||
assert is_function_recipient("search_web", fn_names) is True
|
||||
|
||||
def test_bare_name_rejected_when_not_in_allowed_names(self):
|
||||
fn_names = frozenset({"get_weather"})
|
||||
assert is_function_recipient("unknown_tool", fn_names) is False
|
||||
|
||||
def test_dotted_name_accepted_when_in_allowed_names(self):
|
||||
fn_names = frozenset({"math.sum", "namespace.tool_name"})
|
||||
assert is_function_recipient("math.sum", fn_names) is True
|
||||
assert is_function_recipient("namespace.tool_name", fn_names) is True
|
||||
|
||||
def test_dotted_name_rejected_when_not_in_allowed_names(self):
|
||||
fn_names = frozenset({"get_weather"})
|
||||
assert is_function_recipient("custom_server.search", fn_names) is False
|
||||
|
||||
def test_empty_allowed_names_rejects_bare_names(self):
|
||||
"""Empty frozenset means no function tools — bare names are not functions."""
|
||||
fn_names: frozenset[str] = frozenset()
|
||||
assert is_function_recipient("get_weather", fn_names) is False
|
||||
assert is_function_recipient("math.sum", fn_names) is False
|
||||
|
||||
def test_builtin_tools_always_rejected(self):
|
||||
fn_names = frozenset({"python", "browser", "container"})
|
||||
assert is_function_recipient("python", fn_names) is False
|
||||
assert is_function_recipient("browser", fn_names) is False
|
||||
assert is_function_recipient("container", fn_names) is False
|
||||
|
||||
def test_builtin_dotted_always_rejected(self):
|
||||
fn_names = frozenset({"python.run", "browser.search"})
|
||||
assert is_function_recipient("python.run", fn_names) is False
|
||||
assert is_function_recipient("browser.search", fn_names) is False
|
||||
|
||||
def test_none_allowed_names_uses_heuristic(self):
|
||||
"""When allowed names is None (Chat Completions), use heuristic."""
|
||||
assert is_function_recipient("get_weather", None) is True
|
||||
assert is_function_recipient("math.sum", None) is True
|
||||
assert is_function_recipient("python", None) is False
|
||||
|
||||
|
||||
class TestExtractFunctionFromRecipient:
|
||||
@pytest.mark.parametrize(
|
||||
"recipient,expected",
|
||||
[
|
||||
("functions.get_weather", "get_weather"),
|
||||
("functions.search_web", "search_web"),
|
||||
("functions.", ""),
|
||||
],
|
||||
)
|
||||
def test_strips_functions_prefix(self, recipient, expected):
|
||||
assert extract_function_from_recipient(recipient) == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient",
|
||||
[
|
||||
"get_weather",
|
||||
"calculator",
|
||||
"my-tool",
|
||||
],
|
||||
)
|
||||
def test_bare_name_returned_as_is(self, recipient):
|
||||
assert extract_function_from_recipient(recipient) == recipient
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"recipient,expected",
|
||||
[
|
||||
("functions.math.sum", "math.sum"),
|
||||
("math.sum", "math.sum"),
|
||||
("namespace.tool_name", "namespace.tool_name"),
|
||||
],
|
||||
)
|
||||
def test_dotted_function_name_extraction(self, recipient, expected):
|
||||
assert extract_function_from_recipient(recipient) == expected
|
||||
|
||||
|
||||
class TestCommonParseInputToHarmonyMessage:
|
||||
"""
|
||||
Tests for scenarios that are common to both Chat Completion
|
||||
|
||||
@@ -246,7 +246,8 @@ class TestHarmonyToResponseOutput:
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("custom_tool")
|
||||
|
||||
output_items = harmony_to_response_output(message)
|
||||
fn_names = frozenset({"other_tool"})
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
@@ -286,13 +287,179 @@ class TestHarmonyToResponseOutput:
|
||||
assert len(output_items) == 0
|
||||
|
||||
|
||||
class TestHarmonyToResponseOutputWithFunctionToolNames:
|
||||
"""Tests for bare function name handling with function_tool_names."""
|
||||
|
||||
def test_bare_name_creates_function_call_when_in_tool_names(self):
|
||||
"""Bare function name matching a known tool creates function call."""
|
||||
message = Message.from_role_and_content(
|
||||
Role.ASSISTANT, '{"location": "San Francisco"}'
|
||||
)
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("get_weather")
|
||||
|
||||
fn_names = frozenset({"get_weather"})
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseFunctionToolCall)
|
||||
assert output_items[0].type == "function_call"
|
||||
assert output_items[0].name == "get_weather"
|
||||
assert output_items[0].arguments == '{"location": "San Francisco"}'
|
||||
|
||||
def test_bare_name_creates_mcp_call_when_not_in_tool_names(self):
|
||||
"""Bare name not matching any known tool creates MCP call."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("custom_tool")
|
||||
|
||||
fn_names = frozenset({"get_weather"})
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
assert output_items[0].type == "mcp_call"
|
||||
|
||||
def test_dotted_function_name_creates_function_call(self):
|
||||
"""Dotted function name in tool names creates function call."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"a": 1, "b": 2}')
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("math.sum")
|
||||
|
||||
fn_names = frozenset({"math.sum"})
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseFunctionToolCall)
|
||||
assert output_items[0].name == "math.sum"
|
||||
|
||||
def test_empty_tool_names_defaults_to_mcp(self):
|
||||
"""With empty function_tool_names, bare names become MCP calls."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("get_weather")
|
||||
|
||||
output_items = harmony_to_response_output(message, frozenset())
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
|
||||
def test_prefixed_name_always_function_call(self):
|
||||
"""functions. prefix always creates function call even with empty tool names."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
|
||||
message = message.with_channel("commentary")
|
||||
message = message.with_recipient("functions.get_weather")
|
||||
|
||||
output_items = harmony_to_response_output(message, frozenset())
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseFunctionToolCall)
|
||||
assert output_items[0].name == "get_weather"
|
||||
|
||||
|
||||
class TestParserStateWithFunctionToolNames:
|
||||
"""Tests for parser_state_to_response_output with function_tool_names."""
|
||||
|
||||
def test_bare_name_creates_function_call(self):
|
||||
from unittest.mock import Mock
|
||||
|
||||
parser = Mock()
|
||||
parser.current_content = '{"arg": "value"}'
|
||||
parser.current_role = Role.ASSISTANT
|
||||
parser.current_channel = "commentary"
|
||||
parser.current_recipient = "get_weather"
|
||||
|
||||
fn_names = frozenset({"get_weather"})
|
||||
items = parser_state_to_response_output(parser, fn_names)
|
||||
|
||||
assert len(items) == 1
|
||||
assert isinstance(items[0], ResponseFunctionToolCall)
|
||||
assert items[0].name == "get_weather"
|
||||
assert items[0].status == "in_progress"
|
||||
|
||||
def test_bare_name_creates_mcp_when_not_in_tool_names(self):
|
||||
from unittest.mock import Mock
|
||||
|
||||
parser = Mock()
|
||||
parser.current_content = '{"arg": "value"}'
|
||||
parser.current_role = Role.ASSISTANT
|
||||
parser.current_channel = "commentary"
|
||||
parser.current_recipient = "unknown_tool"
|
||||
|
||||
fn_names = frozenset({"get_weather"})
|
||||
items = parser_state_to_response_output(parser, fn_names)
|
||||
|
||||
assert len(items) == 1
|
||||
assert isinstance(items[0], McpCall)
|
||||
assert items[0].name == "unknown_tool"
|
||||
|
||||
|
||||
class TestToolCallsOnNonStandardChannels:
|
||||
"""Tests verifying tool calls are detected regardless of channel."""
|
||||
|
||||
def test_function_call_on_comment_channel(self):
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
|
||||
message = message.with_channel("comment")
|
||||
message = message.with_recipient("functions.get_weather")
|
||||
|
||||
output_items = harmony_to_response_output(message)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseFunctionToolCall)
|
||||
assert output_items[0].type == "function_call"
|
||||
assert output_items[0].name == "get_weather"
|
||||
|
||||
def test_bare_function_on_comment_channel(self):
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
|
||||
message = message.with_channel("comment")
|
||||
message = message.with_recipient("get_weather")
|
||||
|
||||
fn_names = frozenset({"get_weather"})
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseFunctionToolCall)
|
||||
assert output_items[0].name == "get_weather"
|
||||
|
||||
def test_parser_state_comment_channel_function(self):
|
||||
from unittest.mock import Mock
|
||||
|
||||
parser = Mock()
|
||||
parser.current_content = '{"arg": "value"}'
|
||||
parser.current_role = Role.ASSISTANT
|
||||
parser.current_channel = "comment"
|
||||
parser.current_recipient = "functions.get_weather"
|
||||
|
||||
items = parser_state_to_response_output(parser)
|
||||
|
||||
assert len(items) == 1
|
||||
assert isinstance(items[0], ResponseFunctionToolCall)
|
||||
assert items[0].name == "get_weather"
|
||||
|
||||
def test_parser_state_comment_channel_mcp(self):
|
||||
from unittest.mock import Mock
|
||||
|
||||
parser = Mock()
|
||||
parser.current_content = '{"arg": "value"}'
|
||||
parser.current_role = Role.ASSISTANT
|
||||
parser.current_channel = "comment"
|
||||
parser.current_recipient = "mcp.server.tool"
|
||||
|
||||
fn_names: frozenset[str] = frozenset()
|
||||
items = parser_state_to_response_output(parser, fn_names)
|
||||
|
||||
assert len(items) == 1
|
||||
assert isinstance(items[0], McpCall)
|
||||
|
||||
|
||||
def test_parse_mcp_call_basic() -> None:
|
||||
"""Test that MCP calls are parsed with correct type and server_label."""
|
||||
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
|
||||
message = message.with_recipient("filesystem")
|
||||
message = message.with_channel("commentary")
|
||||
|
||||
output_items = harmony_to_response_output(message)
|
||||
fn_names: frozenset[str] = frozenset()
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
@@ -309,7 +476,8 @@ def test_parse_mcp_call_dotted_recipient() -> None:
|
||||
message = message.with_recipient("repo_browser.list")
|
||||
message = message.with_channel("commentary")
|
||||
|
||||
output_items = harmony_to_response_output(message)
|
||||
fn_names: frozenset[str] = frozenset()
|
||||
output_items = harmony_to_response_output(message, fn_names)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], McpCall)
|
||||
@@ -371,7 +539,8 @@ def test_parser_state_to_response_output_commentary_channel() -> None:
|
||||
parser_mcp.current_channel = "commentary"
|
||||
parser_mcp.current_recipient = "filesystem"
|
||||
|
||||
mcp_items = parser_state_to_response_output(parser_mcp)
|
||||
fn_names: frozenset[str] = frozenset()
|
||||
mcp_items = parser_state_to_response_output(parser_mcp, fn_names)
|
||||
|
||||
assert len(mcp_items) == 1
|
||||
assert isinstance(mcp_items[0], McpCall)
|
||||
@@ -438,7 +607,8 @@ def test_parser_state_to_response_output_analysis_channel() -> None:
|
||||
parser_mcp.current_channel = "analysis"
|
||||
parser_mcp.current_recipient = "database"
|
||||
|
||||
mcp_items = parser_state_to_response_output(parser_mcp)
|
||||
fn_names: frozenset[str] = frozenset()
|
||||
mcp_items = parser_state_to_response_output(parser_mcp, fn_names)
|
||||
|
||||
assert len(mcp_items) == 1
|
||||
assert isinstance(mcp_items[0], McpCall)
|
||||
|
||||
@@ -217,6 +217,158 @@ def test_extract_tool_calls_multiple_tools(
|
||||
assert extracted_info.content is None
|
||||
|
||||
|
||||
def test_extract_tool_calls_bare_function_name(
|
||||
openai_tool_parser,
|
||||
harmony_encoding,
|
||||
):
|
||||
convo = Conversation.from_messages(
|
||||
[
|
||||
Message.from_role_and_content(Role.USER, "What is the weather in Tokyo?"),
|
||||
Message.from_role_and_content(
|
||||
Role.ASSISTANT,
|
||||
"We need to use get_current_weather tool.",
|
||||
).with_channel("analysis"),
|
||||
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
|
||||
.with_channel("commentary")
|
||||
.with_recipient("get_current_weather")
|
||||
.with_content_type("json"),
|
||||
]
|
||||
)
|
||||
token_ids = harmony_encoding.render_conversation_for_completion(
|
||||
convo, Role.ASSISTANT
|
||||
)
|
||||
|
||||
extracted_info = openai_tool_parser.extract_tool_calls(
|
||||
"",
|
||||
request=None,
|
||||
token_ids=token_ids,
|
||||
)
|
||||
assert extracted_info.tools_called
|
||||
expected_tool_calls = [
|
||||
ToolCall(
|
||||
function=FunctionCall(
|
||||
name="get_current_weather",
|
||||
arguments=json.dumps({"location": "Tokyo"}),
|
||||
)
|
||||
)
|
||||
]
|
||||
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
|
||||
assert extracted_info.content is None
|
||||
|
||||
|
||||
def test_extract_tool_calls_bare_function_name_multiple(
|
||||
openai_tool_parser,
|
||||
harmony_encoding,
|
||||
):
|
||||
convo = Conversation.from_messages(
|
||||
[
|
||||
Message.from_role_and_content(
|
||||
Role.USER, "What is the weather in Tokyo based on where I'm at?"
|
||||
),
|
||||
Message.from_role_and_content(
|
||||
Role.ASSISTANT,
|
||||
"We need to use both tools.",
|
||||
).with_channel("analysis"),
|
||||
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
|
||||
.with_channel("commentary")
|
||||
.with_recipient("get_current_weather")
|
||||
.with_content_type("json"),
|
||||
Message.from_role_and_content(Role.ASSISTANT, "{}")
|
||||
.with_channel("commentary")
|
||||
.with_recipient("get_user_location")
|
||||
.with_content_type("json"),
|
||||
]
|
||||
)
|
||||
token_ids = harmony_encoding.render_conversation_for_completion(
|
||||
convo, Role.ASSISTANT
|
||||
)
|
||||
|
||||
extracted_info = openai_tool_parser.extract_tool_calls(
|
||||
"",
|
||||
request=None,
|
||||
token_ids=token_ids,
|
||||
)
|
||||
assert extracted_info.tools_called
|
||||
expected_tool_calls = [
|
||||
ToolCall(
|
||||
function=FunctionCall(
|
||||
name="get_current_weather",
|
||||
arguments=json.dumps({"location": "Tokyo"}),
|
||||
)
|
||||
),
|
||||
ToolCall(
|
||||
function=FunctionCall(
|
||||
name="get_user_location",
|
||||
arguments=json.dumps({}),
|
||||
)
|
||||
),
|
||||
]
|
||||
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
|
||||
|
||||
|
||||
def test_extract_tool_calls_assistant_recipient_ignored(
|
||||
openai_tool_parser,
|
||||
harmony_encoding,
|
||||
):
|
||||
convo = Conversation.from_messages(
|
||||
[
|
||||
Message.from_role_and_content(Role.USER, "Hello"),
|
||||
Message.from_role_and_content(Role.ASSISTANT, "Some tool response")
|
||||
.with_channel("commentary")
|
||||
.with_recipient("assistant"),
|
||||
Message.from_role_and_content(
|
||||
Role.ASSISTANT, "Here is the answer"
|
||||
).with_channel("final"),
|
||||
]
|
||||
)
|
||||
token_ids = harmony_encoding.render_conversation_for_completion(
|
||||
convo, Role.ASSISTANT
|
||||
)
|
||||
|
||||
extracted_info = openai_tool_parser.extract_tool_calls(
|
||||
"",
|
||||
request=None,
|
||||
token_ids=token_ids,
|
||||
)
|
||||
assert not extracted_info.tools_called
|
||||
assert extracted_info.tool_calls == []
|
||||
assert extracted_info.content == "Here is the answer"
|
||||
|
||||
|
||||
def test_extract_tool_calls_dotted_function_name(
|
||||
openai_tool_parser,
|
||||
harmony_encoding,
|
||||
):
|
||||
convo = Conversation.from_messages(
|
||||
[
|
||||
Message.from_role_and_content(Role.USER, "Compute 2+3"),
|
||||
Message.from_role_and_content(Role.ASSISTANT, '{"a": 2, "b": 3}')
|
||||
.with_channel("commentary")
|
||||
.with_recipient("math.sum")
|
||||
.with_content_type("json"),
|
||||
]
|
||||
)
|
||||
token_ids = harmony_encoding.render_conversation_for_completion(
|
||||
convo, Role.ASSISTANT
|
||||
)
|
||||
|
||||
extracted_info = openai_tool_parser.extract_tool_calls(
|
||||
"",
|
||||
request=None,
|
||||
token_ids=token_ids,
|
||||
)
|
||||
assert extracted_info.tools_called
|
||||
expected_tool_calls = [
|
||||
ToolCall(
|
||||
function=FunctionCall(
|
||||
name="math.sum",
|
||||
arguments=json.dumps({"a": 2, "b": 3}),
|
||||
)
|
||||
)
|
||||
]
|
||||
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
|
||||
|
||||
|
||||
def test_extract_tool_calls_with_content(
|
||||
openai_tool_parser,
|
||||
harmony_encoding,
|
||||
|
||||
@@ -17,6 +17,10 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
DeltaMessage,
|
||||
DeltaToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
extract_function_from_recipient,
|
||||
is_function_recipient,
|
||||
)
|
||||
|
||||
|
||||
class TokenState(NamedTuple):
|
||||
@@ -79,16 +83,12 @@ def extract_harmony_streaming_delta(
|
||||
# This counts completed tool calls in messages
|
||||
base_index = 0
|
||||
for msg in harmony_parser.messages:
|
||||
if (
|
||||
(msg.channel == "commentary" or msg.channel == "analysis")
|
||||
and msg.recipient
|
||||
and msg.recipient.startswith("functions.")
|
||||
):
|
||||
if msg.recipient and is_function_recipient(msg.recipient):
|
||||
base_index += 1
|
||||
|
||||
# If there's an ongoing tool call from previous chunk,
|
||||
# the next new tool call starts at base_index + 1
|
||||
if prev_recipient and prev_recipient.startswith("functions."):
|
||||
if prev_recipient and is_function_recipient(prev_recipient):
|
||||
next_tool_index = base_index + 1
|
||||
# Ongoing call is at base_index
|
||||
ongoing_tool_index = base_index
|
||||
@@ -101,15 +101,11 @@ def extract_harmony_streaming_delta(
|
||||
if group.channel == "final":
|
||||
combined_content += group.text
|
||||
content_encountered = True
|
||||
elif (
|
||||
(group.channel == "commentary" or group.channel == "analysis")
|
||||
and group.recipient
|
||||
and group.recipient.startswith("functions.")
|
||||
):
|
||||
elif group.recipient and is_function_recipient(group.recipient):
|
||||
opened_new_call = False
|
||||
if prev_recipient != group.recipient:
|
||||
# New tool call - emit the opening message
|
||||
tool_name = group.recipient.split("functions.", 1)[1]
|
||||
tool_name = extract_function_from_recipient(group.recipient)
|
||||
tool_messages.append(
|
||||
DeltaToolCall(
|
||||
id=make_tool_call_id(),
|
||||
|
||||
@@ -26,6 +26,42 @@ from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def is_function_recipient(
|
||||
recipient: str,
|
||||
allowed_function_tool_names: frozenset[str] | None = None,
|
||||
) -> bool:
|
||||
"""Check whether *recipient* refers to a function tool call.
|
||||
|
||||
The optional *allowed_function_tool_names* parameter is used by the
|
||||
Responses API to distinguish bare function-call recipients (missing the
|
||||
``functions.`` prefix) from MCP tool calls. When provided, a bare
|
||||
recipient is only treated as a function call if it appears in the set.
|
||||
The Chat Completions path omits this parameter so that all bare
|
||||
recipients are accepted as function calls (the heuristic fallback).
|
||||
"""
|
||||
if not recipient:
|
||||
return False
|
||||
if recipient.startswith("<|"):
|
||||
return False
|
||||
if recipient.startswith("functions."):
|
||||
return len(recipient) > len("functions.")
|
||||
if recipient == "assistant":
|
||||
return False
|
||||
if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
|
||||
return False
|
||||
first_segment = recipient.split(".", 1)[0]
|
||||
if first_segment in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
|
||||
return False
|
||||
if allowed_function_tool_names is not None:
|
||||
return recipient in allowed_function_tool_names
|
||||
return True
|
||||
|
||||
|
||||
def extract_function_from_recipient(recipient: str) -> str:
|
||||
return recipient.removeprefix("functions.")
|
||||
|
||||
|
||||
REASONING_EFFORT = {
|
||||
"high": ReasoningEffort.HIGH,
|
||||
"medium": ReasoningEffort.MEDIUM,
|
||||
|
||||
@@ -524,10 +524,12 @@ class HarmonyContext(ConversationContext):
|
||||
self,
|
||||
messages: list,
|
||||
available_tools: list[str],
|
||||
function_tool_names: frozenset[str] | None = None,
|
||||
):
|
||||
self._messages = messages
|
||||
self.finish_reason: str | None = None
|
||||
self.available_tools = available_tools
|
||||
self.function_tool_names = function_tool_names
|
||||
self._tool_sessions: dict[str, ClientSession | Tool] = {}
|
||||
self.called_tools: set[str] = set()
|
||||
|
||||
|
||||
@@ -31,7 +31,9 @@ from openai_harmony import Author, Message, Role, StreamableParser, TextContent
|
||||
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
|
||||
extract_function_from_recipient,
|
||||
flatten_chat_text_content,
|
||||
is_function_recipient,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseInputOutputItem,
|
||||
@@ -294,7 +296,7 @@ def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutput
|
||||
|
||||
def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
|
||||
"""Parse function calls into function tool call items."""
|
||||
function_name = recipient.split(".")[-1]
|
||||
function_name = extract_function_from_recipient(recipient)
|
||||
output_items = []
|
||||
for content in message.content:
|
||||
random_id = random_uuid()
|
||||
@@ -410,7 +412,10 @@ def _parse_message_no_recipient(
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
|
||||
def harmony_to_response_output(
|
||||
message: Message,
|
||||
function_tool_names: frozenset[str] | None = None,
|
||||
) -> list[ResponseOutputItem]:
|
||||
"""Parse a Harmony message into a list of output response items.
|
||||
|
||||
This is the main dispatcher that routes based on channel and recipient.
|
||||
@@ -429,8 +434,8 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
|
||||
if recipient.startswith("browser."):
|
||||
output_items.append(_parse_browser_tool_call(message, recipient))
|
||||
|
||||
# Function calls (should only happen on commentary channel)
|
||||
elif message.channel == "commentary" and recipient.startswith("functions."):
|
||||
# Function calls (with or without "functions." prefix)
|
||||
elif is_function_recipient(recipient, function_tool_names):
|
||||
output_items.extend(_parse_function_call(message, recipient))
|
||||
|
||||
# Built-in MCP tools (python, browser, container)
|
||||
@@ -450,6 +455,7 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
|
||||
|
||||
def parser_state_to_response_output(
|
||||
parser: StreamableParser,
|
||||
function_tool_names: frozenset[str] | None = None,
|
||||
) -> list[ResponseOutputItem]:
|
||||
"""Extract in-progress response items from incomplete parser state.
|
||||
|
||||
@@ -464,15 +470,15 @@ def parser_state_to_response_output(
|
||||
if current_recipient is not None and current_recipient.startswith("browser."):
|
||||
return []
|
||||
|
||||
if current_recipient and parser.current_channel in ("commentary", "analysis"):
|
||||
if current_recipient.startswith("functions."):
|
||||
if current_recipient:
|
||||
if is_function_recipient(current_recipient, function_tool_names):
|
||||
rid = random_uuid()
|
||||
return [
|
||||
ResponseFunctionToolCall(
|
||||
arguments=parser.current_content,
|
||||
call_id=f"call_{rid}",
|
||||
type="function_call",
|
||||
name=current_recipient.split(".")[-1],
|
||||
name=extract_function_from_recipient(current_recipient),
|
||||
id=f"fc_{rid}",
|
||||
status="in_progress",
|
||||
)
|
||||
|
||||
@@ -89,6 +89,7 @@ from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
from vllm.entrypoints.openai.responses.utils import (
|
||||
construct_input_messages,
|
||||
construct_tool_dicts,
|
||||
extract_function_tool_names,
|
||||
extract_tool_types,
|
||||
)
|
||||
from vllm.entrypoints.serve.render.serving import OpenAIServingRender
|
||||
@@ -449,11 +450,16 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
)
|
||||
|
||||
context: ConversationContext
|
||||
function_tool_names = extract_function_tool_names(request.tools)
|
||||
if self.use_harmony:
|
||||
if request.stream:
|
||||
context = StreamingHarmonyContext(messages, available_tools)
|
||||
context = StreamingHarmonyContext(
|
||||
messages, available_tools, function_tool_names
|
||||
)
|
||||
else:
|
||||
context = HarmonyContext(messages, available_tools)
|
||||
context = HarmonyContext(
|
||||
messages, available_tools, function_tool_names
|
||||
)
|
||||
else:
|
||||
if envs.VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT:
|
||||
# This is a feature in development for parsing
|
||||
@@ -1070,10 +1076,11 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
) -> list[ResponseOutputItem]:
|
||||
output_items: list[ResponseOutputItem] = []
|
||||
num_init_messages = context.num_init_messages
|
||||
fn_names = context.function_tool_names
|
||||
for msg in context.messages[num_init_messages:]:
|
||||
output_items.extend(harmony_to_response_output(msg))
|
||||
output_items.extend(harmony_to_response_output(msg, fn_names))
|
||||
# Handle the generation stopped in the middle (if any).
|
||||
last_items = parser_state_to_response_output(context.parser)
|
||||
last_items = parser_state_to_response_output(context.parser, fn_names)
|
||||
if last_items:
|
||||
output_items.extend(last_items)
|
||||
return output_items
|
||||
@@ -1448,7 +1455,9 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
if ctx.is_expecting_start():
|
||||
if len(ctx.parser.messages) > 0:
|
||||
previous_item = ctx.parser.messages[-1]
|
||||
for event in emit_previous_item_done_events(previous_item, state):
|
||||
for event in emit_previous_item_done_events(
|
||||
previous_item, state, ctx.function_tool_names
|
||||
):
|
||||
yield _increment_sequence_number_and_return(event)
|
||||
state.reset_for_new_item()
|
||||
|
||||
|
||||
@@ -62,6 +62,10 @@ from openai_harmony import Message as HarmonyMessage
|
||||
|
||||
from vllm.entrypoints.mcp.tool_server import ToolServer
|
||||
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
extract_function_from_recipient,
|
||||
is_function_recipient,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.context import StreamingHarmonyContext
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseReasoningPartAddedEvent,
|
||||
@@ -111,19 +115,19 @@ class StreamingState:
|
||||
self.current_call_id = ""
|
||||
|
||||
|
||||
def is_mcp_tool_by_namespace(recipient: str | None) -> bool:
|
||||
def is_mcp_tool_by_namespace(
|
||||
recipient: str | None,
|
||||
allowed_function_tool_names: frozenset[str] | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if a tool call is an MCP tool based on recipient prefix.
|
||||
|
||||
- Tools starting with "functions." are function calls
|
||||
- Everything else is an MCP tool
|
||||
Inverse of :func:`is_function_recipient` — everything that is not
|
||||
a function call is an MCP tool.
|
||||
"""
|
||||
if recipient is None:
|
||||
return False
|
||||
|
||||
# Function calls have "functions." prefix
|
||||
# Everything else is an MCP tool
|
||||
return not recipient.startswith("functions.")
|
||||
return not is_function_recipient(recipient, allowed_function_tool_names)
|
||||
|
||||
|
||||
# =====================================================================
|
||||
@@ -575,16 +579,16 @@ def emit_content_delta_events(
|
||||
return emit_text_delta_events(delta, state)
|
||||
elif channel == "analysis" and recipient is None:
|
||||
return emit_reasoning_delta_events(delta, state)
|
||||
# built-in tools will be triggered on the analysis channel
|
||||
# However, occasionally built-in tools will
|
||||
# still be output to commentary.
|
||||
elif channel in ("commentary", "analysis") and recipient is not None:
|
||||
if recipient.startswith("functions."):
|
||||
function_name = recipient[len("functions.") :]
|
||||
elif recipient is not None:
|
||||
fn_names = ctx.function_tool_names
|
||||
if is_function_recipient(recipient, fn_names):
|
||||
function_name = extract_function_from_recipient(recipient)
|
||||
return emit_function_call_delta_events(delta, function_name, state)
|
||||
elif recipient == "python":
|
||||
return emit_code_interpreter_delta_events(delta, state)
|
||||
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
|
||||
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
|
||||
recipient, fn_names
|
||||
):
|
||||
return emit_mcp_delta_events(delta, state, recipient)
|
||||
|
||||
return []
|
||||
@@ -593,6 +597,7 @@ def emit_content_delta_events(
|
||||
def emit_previous_item_done_events(
|
||||
previous_item: HarmonyMessage,
|
||||
state: StreamingState,
|
||||
function_tool_names: frozenset[str] | None = None,
|
||||
) -> list[StreamingResponsesResponse]:
|
||||
"""Emit done events for the previous item when expecting a new start.
|
||||
|
||||
@@ -602,13 +607,13 @@ def emit_previous_item_done_events(
|
||||
text = previous_item.content[0].text
|
||||
if previous_item.recipient is not None:
|
||||
# Deal with tool call
|
||||
if previous_item.recipient.startswith("functions."):
|
||||
function_name = previous_item.recipient[len("functions.") :]
|
||||
if is_function_recipient(previous_item.recipient, function_tool_names):
|
||||
function_name = extract_function_from_recipient(previous_item.recipient)
|
||||
return emit_function_call_done_events(function_name, text, state)
|
||||
elif previous_item.recipient == "python":
|
||||
return emit_code_interpreter_completion_events(previous_item, state)
|
||||
elif (
|
||||
is_mcp_tool_by_namespace(previous_item.recipient)
|
||||
is_mcp_tool_by_namespace(previous_item.recipient, function_tool_names)
|
||||
and state.current_item_id is not None
|
||||
and state.current_item_id.startswith("mcp_")
|
||||
):
|
||||
@@ -792,9 +797,12 @@ def emit_tool_action_events(
|
||||
and state.sent_output_item_added
|
||||
):
|
||||
recipient = previous_item.recipient
|
||||
fn_names = ctx.function_tool_names
|
||||
if recipient == "python":
|
||||
events.extend(emit_code_interpreter_completion_events(previous_item, state))
|
||||
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
|
||||
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
|
||||
recipient, fn_names
|
||||
):
|
||||
events.extend(
|
||||
emit_mcp_completion_events(
|
||||
recipient, previous_item.content[0].text, state
|
||||
|
||||
@@ -236,6 +236,10 @@ def _construct_message_from_response_item(
|
||||
return item # type: ignore[arg-type]
|
||||
|
||||
|
||||
def extract_function_tool_names(tools: list[Tool]) -> frozenset[str]:
|
||||
return frozenset(tool.name for tool in tools if tool.type == "function")
|
||||
|
||||
|
||||
def extract_tool_types(tools: list[Tool]) -> set[str]:
|
||||
"""
|
||||
Extracts the tool types from the given tools.
|
||||
|
||||
@@ -13,7 +13,11 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
FunctionCall,
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
extract_function_from_recipient,
|
||||
is_function_recipient,
|
||||
parse_output_into_messages,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tool_parsers.abstract_tool_parser import (
|
||||
Tool,
|
||||
@@ -50,10 +54,12 @@ class OpenAIToolParser(ToolParser):
|
||||
|
||||
if len(parser.messages) > 0:
|
||||
for msg in parser.messages:
|
||||
if msg.author.role != "assistant":
|
||||
continue
|
||||
if len(msg.content) < 1:
|
||||
continue
|
||||
msg_text = msg.content[0].text
|
||||
if msg.recipient and msg.recipient.startswith("functions."):
|
||||
if msg.recipient and is_function_recipient(msg.recipient):
|
||||
# If no content-type is given assume JSON, as that's the
|
||||
# most common case with gpt-oss models.
|
||||
if not msg.content_type or "json" in msg.content_type:
|
||||
@@ -72,7 +78,7 @@ class OpenAIToolParser(ToolParser):
|
||||
ToolCall(
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=msg.recipient.split("functions.")[1],
|
||||
name=extract_function_from_recipient(msg.recipient),
|
||||
arguments=tool_args,
|
||||
),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user