mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Bugfix] Fix Harmony preamble visibility in Responses API (#32114)
Signed-off-by: Pushkar Patel <git@thepushkarp.com> Signed-off-by: pupa <pupa@users.noreply.github.com>
This commit is contained in:
@@ -2,7 +2,11 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
|
||||
from openai.types.responses import (
|
||||
ResponseFunctionToolCall,
|
||||
ResponseOutputMessage,
|
||||
ResponseReasoningItem,
|
||||
)
|
||||
from openai.types.responses.response_output_item import McpCall
|
||||
from openai_harmony import Author, Message, Role, TextContent
|
||||
|
||||
@@ -10,6 +14,7 @@ from tests.entrypoints.openai.utils import verify_harmony_messages
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
auto_drop_analysis_messages,
|
||||
get_encoding,
|
||||
get_system_message,
|
||||
has_custom_tools,
|
||||
parse_chat_input_to_harmony_message,
|
||||
parse_chat_output,
|
||||
@@ -840,15 +845,58 @@ class TestParseChatOutput:
|
||||
assert reasoning == "I've thought hard about this."
|
||||
assert final_content == "The answer is 4."
|
||||
|
||||
def test_parse_chat_output_commentary_with_recipient_excluded(self) -> None:
|
||||
"""Commentary with a recipient (tool call) should not appear in
|
||||
final_content — those are handled separately by the tool parser.
|
||||
|
||||
The first message is a preamble (visible), the second is a tool
|
||||
call (excluded). Only the preamble should appear in final_content.
|
||||
"""
|
||||
harmony_str = (
|
||||
"<|channel|>commentary"
|
||||
"<|message|>Let me check the weather.<|end|>"
|
||||
"<|start|>assistant to=functions.get_weather"
|
||||
"<|channel|>commentary"
|
||||
'<|message|>{"location": "SF"}<|end|>'
|
||||
)
|
||||
token_ids = get_encoding().encode(harmony_str, allowed_special="all")
|
||||
reasoning, final_content, _ = parse_chat_output(token_ids)
|
||||
assert reasoning is None
|
||||
assert final_content == "Let me check the weather."
|
||||
|
||||
def test_parse_chat_output_interrupted_preamble(self) -> None:
|
||||
"""Partial/interrupted preamble (commentary without recipient) should
|
||||
appear in final_content, not reasoning."""
|
||||
harmony_str = "<|channel|>commentary<|message|>I'll search for that"
|
||||
token_ids = get_encoding().encode(harmony_str, allowed_special="all")
|
||||
reasoning, final_content, _ = parse_chat_output(token_ids)
|
||||
assert reasoning is None
|
||||
assert final_content == "I'll search for that"
|
||||
|
||||
def test_parse_chat_output_preamble_then_final(self) -> None:
|
||||
"""Preamble followed by a final message should both appear in
|
||||
final_content, joined by newline."""
|
||||
harmony_str = (
|
||||
"<|channel|>commentary"
|
||||
"<|message|>Let me look that up.<|end|>"
|
||||
"<|start|>assistant<|channel|>final"
|
||||
"<|message|>The answer is 42.<|end|>"
|
||||
)
|
||||
token_ids = get_encoding().encode(harmony_str, allowed_special="all")
|
||||
reasoning, final_content, _ = parse_chat_output(token_ids)
|
||||
assert reasoning is None
|
||||
assert final_content == "Let me look that up.\nThe answer is 42."
|
||||
|
||||
|
||||
class TestParseOutputMessage:
|
||||
"""Tests for parse_output_message function."""
|
||||
|
||||
def test_commentary_with_no_recipient_creates_reasoning(self):
|
||||
"""Test that commentary with recipient=None (preambles) creates reasoning items.
|
||||
def test_commentary_with_no_recipient_creates_message(self):
|
||||
"""Test that commentary with recipient=None (preambles) creates message items.
|
||||
|
||||
Per Harmony format, commentary channel can contain preambles to calling
|
||||
multiple functions - explanatory text with no recipient.
|
||||
Per Harmony format, preambles are intended to be shown to end-users,
|
||||
unlike analysis channel content which is hidden reasoning.
|
||||
See: https://cookbook.openai.com/articles/openai-harmony
|
||||
"""
|
||||
message = Message.from_role_and_content(
|
||||
Role.ASSISTANT, "I will now search for the weather information."
|
||||
@@ -859,13 +907,16 @@ class TestParseOutputMessage:
|
||||
output_items = parse_output_message(message)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseReasoningItem)
|
||||
assert output_items[0].type == "reasoning"
|
||||
assert isinstance(output_items[0], ResponseOutputMessage)
|
||||
assert output_items[0].type == "message"
|
||||
assert output_items[0].role == "assistant"
|
||||
assert output_items[0].status == "completed"
|
||||
assert len(output_items[0].content) == 1
|
||||
assert output_items[0].content[0].type == "output_text"
|
||||
assert (
|
||||
output_items[0].content[0].text
|
||||
== "I will now search for the weather information."
|
||||
)
|
||||
assert output_items[0].content[0].type == "reasoning_text"
|
||||
|
||||
def test_commentary_with_function_recipient_creates_function_call(self):
|
||||
"""Test commentary with recipient='functions.X' creates function calls."""
|
||||
@@ -944,7 +995,7 @@ class TestParseOutputMessage:
|
||||
output_items = parse_output_message(message)
|
||||
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseReasoningItem)
|
||||
assert isinstance(output_items[0], ResponseOutputMessage)
|
||||
assert output_items[0].content[0].text == ""
|
||||
|
||||
def test_commentary_with_multiple_contents_and_no_recipient(self):
|
||||
@@ -958,10 +1009,13 @@ class TestParseOutputMessage:
|
||||
|
||||
output_items = parse_output_message(message)
|
||||
|
||||
assert len(output_items) == 2
|
||||
assert all(isinstance(item, ResponseReasoningItem) for item in output_items)
|
||||
# _parse_final_message returns single ResponseOutputMessage with
|
||||
# multiple contents
|
||||
assert len(output_items) == 1
|
||||
assert isinstance(output_items[0], ResponseOutputMessage)
|
||||
assert len(output_items[0].content) == 2
|
||||
assert output_items[0].content[0].text == "Step 1: Analyze the request"
|
||||
assert output_items[1].content[0].text == "Step 2: Prepare to call functions"
|
||||
assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
|
||||
|
||||
def test_commentary_with_multiple_function_calls(self):
|
||||
"""Test multiple function calls in commentary channel."""
|
||||
@@ -1133,7 +1187,7 @@ def test_parse_remaining_state_commentary_channel() -> None:
|
||||
assert mcp_items[0].status == "in_progress"
|
||||
|
||||
# Test 3: Built-in tool (python)
|
||||
# should NOT return MCP call, falls through to reasoning
|
||||
# should NOT return MCP call, returns reasoning (internal tool interaction)
|
||||
parser_builtin = Mock()
|
||||
parser_builtin.current_content = "print('hello')"
|
||||
parser_builtin.current_role = Role.ASSISTANT
|
||||
@@ -1142,11 +1196,26 @@ def test_parse_remaining_state_commentary_channel() -> None:
|
||||
|
||||
builtin_items = parse_remaining_state(parser_builtin)
|
||||
|
||||
# Should fall through to reasoning logic
|
||||
# Built-in tools explicitly return reasoning
|
||||
assert len(builtin_items) == 1
|
||||
assert not isinstance(builtin_items[0], McpCall)
|
||||
assert builtin_items[0].type == "reasoning"
|
||||
|
||||
# Test 4: No recipient (preamble) → should return message, not reasoning
|
||||
parser_preamble = Mock()
|
||||
parser_preamble.current_content = "I'll search for that information now."
|
||||
parser_preamble.current_role = Role.ASSISTANT
|
||||
parser_preamble.current_channel = "commentary"
|
||||
parser_preamble.current_recipient = None
|
||||
|
||||
preamble_items = parse_remaining_state(parser_preamble)
|
||||
|
||||
assert len(preamble_items) == 1
|
||||
assert isinstance(preamble_items[0], ResponseOutputMessage)
|
||||
assert preamble_items[0].type == "message"
|
||||
assert preamble_items[0].content[0].text == "I'll search for that information now."
|
||||
assert preamble_items[0].status == "incomplete" # streaming
|
||||
|
||||
|
||||
def test_parse_remaining_state_analysis_channel() -> None:
|
||||
"""Test parse_remaining_state with analysis channel and various recipients."""
|
||||
@@ -1199,3 +1268,29 @@ def test_parse_remaining_state_analysis_channel() -> None:
|
||||
assert len(builtin_items) == 1
|
||||
assert not isinstance(builtin_items[0], McpCall)
|
||||
assert builtin_items[0].type == "reasoning"
|
||||
|
||||
|
||||
class TestGetSystemMessage:
|
||||
"""Tests for get_system_message channel configuration."""
|
||||
|
||||
def test_commentary_channel_present_without_custom_tools(self) -> None:
|
||||
"""Commentary channel must be valid even without custom tools."""
|
||||
sys_msg = get_system_message(with_custom_tools=False)
|
||||
valid_channels = sys_msg.content[0].channel_config.valid_channels
|
||||
assert "commentary" in valid_channels
|
||||
|
||||
def test_commentary_channel_present_with_custom_tools(self) -> None:
|
||||
"""Commentary channel present when custom tools are enabled."""
|
||||
sys_msg = get_system_message(with_custom_tools=True)
|
||||
valid_channels = sys_msg.content[0].channel_config.valid_channels
|
||||
assert "commentary" in valid_channels
|
||||
|
||||
def test_all_standard_channels_present(self) -> None:
|
||||
"""All three standard Harmony channels should always be valid."""
|
||||
for with_tools in (True, False):
|
||||
sys_msg = get_system_message(with_custom_tools=with_tools)
|
||||
valid_channels = sys_msg.content[0].channel_config.valid_channels
|
||||
for channel in ("analysis", "commentary", "final"):
|
||||
assert channel in valid_channels, (
|
||||
f"{channel} missing when with_custom_tools={with_tools}"
|
||||
)
|
||||
|
||||
@@ -712,15 +712,14 @@ async def test_function_calling_required(client: OpenAI, model_name: str):
|
||||
async def test_system_message_with_tools(client: OpenAI, model_name: str):
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import get_system_message
|
||||
|
||||
# Test with custom tools enabled - commentary channel should be available
|
||||
sys_msg = get_system_message(with_custom_tools=True)
|
||||
valid_channels = sys_msg.content[0].channel_config.valid_channels
|
||||
assert "commentary" in valid_channels
|
||||
|
||||
# Test with custom tools disabled - commentary channel should be removed
|
||||
sys_msg = get_system_message(with_custom_tools=False)
|
||||
valid_channels = sys_msg.content[0].channel_config.valid_channels
|
||||
assert "commentary" not in valid_channels
|
||||
# Commentary channel should always be present (needed for preambles)
|
||||
# regardless of whether custom tools are enabled
|
||||
for with_tools in (True, False):
|
||||
sys_msg = get_system_message(with_custom_tools=with_tools)
|
||||
valid_channels = sys_msg.content[0].channel_config.valid_channels
|
||||
assert "commentary" in valid_channels, (
|
||||
f"commentary channel missing when with_custom_tools={with_tools}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -172,13 +172,13 @@ class TestMCPEnabled:
|
||||
recipient = message.get("recipient")
|
||||
if recipient and recipient.startswith("python"):
|
||||
tool_call_found = True
|
||||
assert message.get("channel") == "analysis"
|
||||
assert message.get("channel") == "commentary"
|
||||
author = message.get("author", {})
|
||||
if author.get("role") == "tool" and (author.get("name") or "").startswith(
|
||||
"python"
|
||||
):
|
||||
tool_response_found = True
|
||||
assert message.get("channel") == "analysis"
|
||||
assert message.get("channel") == "commentary"
|
||||
|
||||
assert tool_call_found, (
|
||||
f"No Python tool call found. "
|
||||
|
||||
@@ -180,20 +180,13 @@ class TestExtractHarmonyStreamingDelta:
|
||||
|
||||
assert delta_message.tool_calls[0].index == 1
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"channel,recipient",
|
||||
[
|
||||
("commentary", None),
|
||||
("commentary", "browser.search"),
|
||||
],
|
||||
)
|
||||
def test_returns_tool_call_preambles(self, channel, recipient):
|
||||
"""Test that invalid tool recipient on commentary is treated as content."""
|
||||
def test_returns_preambles_as_content(self):
|
||||
"""Test that commentary with no recipient (preamble) is user content."""
|
||||
parser = MockStreamableParser()
|
||||
delta_text = "some text"
|
||||
|
||||
token_states = [
|
||||
TokenState(channel=channel, recipient=recipient, text=delta_text)
|
||||
TokenState(channel="commentary", recipient=None, text=delta_text)
|
||||
]
|
||||
|
||||
delta_message, tools_streamed = extract_harmony_streaming_delta(
|
||||
@@ -211,6 +204,7 @@ class TestExtractHarmonyStreamingDelta:
|
||||
[
|
||||
(None, None),
|
||||
("unknown_channel", None),
|
||||
("commentary", "browser.search"),
|
||||
],
|
||||
)
|
||||
def test_returns_none_for_invalid_inputs(self, channel, recipient):
|
||||
|
||||
@@ -26,6 +26,9 @@ from vllm.entrypoints.openai.responses.serving import (
|
||||
_extract_allowed_tools_from_mcp_requests,
|
||||
extract_tool_types,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
StreamingState,
|
||||
)
|
||||
from vllm.inputs.data import TokensPrompt
|
||||
from vllm.outputs import CompletionOutput, RequestOutput
|
||||
from vllm.sampling_params import SamplingParams
|
||||
@@ -439,3 +442,115 @@ class TestExtractAllowedToolsFromMcpRequests:
|
||||
"server1": ["tool1"],
|
||||
"server2": ["tool2"],
|
||||
}
|
||||
|
||||
|
||||
class TestHarmonyPreambleStreaming:
|
||||
"""Tests for preamble (commentary with no recipient) streaming events."""
|
||||
|
||||
@staticmethod
|
||||
def _make_ctx(*, channel, recipient, delta="hello"):
|
||||
"""Build a lightweight mock StreamingHarmonyContext."""
|
||||
ctx = MagicMock()
|
||||
ctx.last_content_delta = delta
|
||||
ctx.parser.current_channel = channel
|
||||
ctx.parser.current_recipient = recipient
|
||||
return ctx
|
||||
|
||||
@staticmethod
|
||||
def _make_previous_item(*, channel, recipient, text="preamble text"):
|
||||
"""Build a lightweight mock previous_item (openai_harmony Message)."""
|
||||
content_part = MagicMock()
|
||||
content_part.text = text
|
||||
item = MagicMock()
|
||||
item.channel = channel
|
||||
item.recipient = recipient
|
||||
item.content = [content_part]
|
||||
return item
|
||||
|
||||
def test_preamble_delta_emits_text_events(self) -> None:
|
||||
"""commentary + recipient=None should emit output_text.delta events."""
|
||||
from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
emit_content_delta_events,
|
||||
)
|
||||
|
||||
ctx = self._make_ctx(channel="commentary", recipient=None)
|
||||
state = StreamingState()
|
||||
|
||||
events = emit_content_delta_events(ctx, state)
|
||||
|
||||
type_names = [e.type for e in events]
|
||||
assert "response.output_text.delta" in type_names
|
||||
assert "response.output_item.added" in type_names
|
||||
|
||||
def test_preamble_delta_second_token_no_added(self) -> None:
|
||||
"""Second preamble token should emit delta only, not added again."""
|
||||
from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
emit_content_delta_events,
|
||||
)
|
||||
|
||||
ctx = self._make_ctx(channel="commentary", recipient=None, delta="w")
|
||||
state = StreamingState()
|
||||
state.sent_output_item_added = True
|
||||
state.current_item_id = "msg_test"
|
||||
state.current_content_index = 0
|
||||
|
||||
events = emit_content_delta_events(ctx, state)
|
||||
|
||||
type_names = [e.type for e in events]
|
||||
assert "response.output_text.delta" in type_names
|
||||
assert "response.output_item.added" not in type_names
|
||||
|
||||
def test_commentary_with_function_recipient_not_preamble(self) -> None:
|
||||
"""commentary + recipient='functions.X' must NOT use preamble path."""
|
||||
from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
emit_content_delta_events,
|
||||
)
|
||||
|
||||
ctx = self._make_ctx(
|
||||
channel="commentary",
|
||||
recipient="functions.get_weather",
|
||||
)
|
||||
state = StreamingState()
|
||||
|
||||
events = emit_content_delta_events(ctx, state)
|
||||
|
||||
type_names = [e.type for e in events]
|
||||
assert "response.output_text.delta" not in type_names
|
||||
|
||||
def test_preamble_done_emits_text_done_events(self) -> None:
|
||||
"""Completed preamble should emit text done + content_part done +
|
||||
output_item done, same shape as final channel."""
|
||||
from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
emit_previous_item_done_events,
|
||||
)
|
||||
|
||||
previous = self._make_previous_item(channel="commentary", recipient=None)
|
||||
state = StreamingState()
|
||||
state.current_item_id = "msg_test"
|
||||
state.current_output_index = 0
|
||||
state.current_content_index = 0
|
||||
|
||||
events = emit_previous_item_done_events(previous, state)
|
||||
|
||||
type_names = [e.type for e in events]
|
||||
assert "response.output_text.done" in type_names
|
||||
assert "response.content_part.done" in type_names
|
||||
assert "response.output_item.done" in type_names
|
||||
|
||||
def test_commentary_with_recipient_no_preamble_done(self) -> None:
|
||||
"""commentary + recipient='functions.X' should route to function call
|
||||
done, not preamble done."""
|
||||
from vllm.entrypoints.openai.responses.streaming_events import (
|
||||
emit_previous_item_done_events,
|
||||
)
|
||||
|
||||
previous = self._make_previous_item(
|
||||
channel="commentary", recipient="functions.get_weather"
|
||||
)
|
||||
state = StreamingState()
|
||||
state.current_item_id = "fc_test"
|
||||
|
||||
events = emit_previous_item_done_events(previous, state)
|
||||
|
||||
type_names = [e.type for e in events]
|
||||
assert "response.output_text.done" not in type_names
|
||||
|
||||
@@ -236,6 +236,44 @@ def test_reasoning_tokens_counting(mock_parser):
|
||||
assert context.num_output_tokens == 4
|
||||
|
||||
|
||||
def test_preamble_tokens_not_counted_as_reasoning(mock_parser):
|
||||
"""Preambles (commentary with no recipient) are visible user text,
|
||||
not hidden reasoning. They must NOT inflate num_reasoning_tokens."""
|
||||
context = HarmonyContext(messages=[], available_tools=[])
|
||||
|
||||
mock_parser.current_channel = "commentary"
|
||||
mock_parser.current_recipient = None # preamble
|
||||
|
||||
mock_output = create_mock_request_output(
|
||||
prompt_token_ids=[1, 2, 3],
|
||||
output_token_ids=[4, 5, 6],
|
||||
num_cached_tokens=0,
|
||||
)
|
||||
context.append_output(mock_output)
|
||||
|
||||
assert context.num_reasoning_tokens == 0
|
||||
assert context.num_output_tokens == 3
|
||||
|
||||
|
||||
def test_commentary_with_recipient_counted_as_reasoning(mock_parser):
|
||||
"""Commentary directed at a tool (recipient != None) is hidden from
|
||||
the user, so it should still count as reasoning tokens."""
|
||||
context = HarmonyContext(messages=[], available_tools=[])
|
||||
|
||||
mock_parser.current_channel = "commentary"
|
||||
mock_parser.current_recipient = "python"
|
||||
|
||||
mock_output = create_mock_request_output(
|
||||
prompt_token_ids=[1, 2, 3],
|
||||
output_token_ids=[4, 5, 6],
|
||||
num_cached_tokens=0,
|
||||
)
|
||||
context.append_output(mock_output)
|
||||
|
||||
assert context.num_reasoning_tokens == 3
|
||||
assert context.num_output_tokens == 3
|
||||
|
||||
|
||||
def test_zero_tokens_edge_case():
|
||||
"""Test behavior with all zero token counts."""
|
||||
context = HarmonyContext(messages=[], available_tools=[])
|
||||
|
||||
@@ -147,7 +147,7 @@ def extract_harmony_streaming_delta(
|
||||
function=DeltaFunctionCall(arguments=group.text),
|
||||
)
|
||||
)
|
||||
elif group.channel == "commentary":
|
||||
elif group.channel == "commentary" and group.recipient is None:
|
||||
# Tool call preambles meant to be shown to the user
|
||||
combined_content += group.text
|
||||
content_encountered = True
|
||||
|
||||
@@ -26,7 +26,6 @@ from openai.types.responses.response_reasoning_item import (
|
||||
from openai.types.responses.tool import Tool
|
||||
from openai_harmony import (
|
||||
Author,
|
||||
ChannelConfig,
|
||||
Conversation,
|
||||
DeveloperContent,
|
||||
HarmonyEncodingName,
|
||||
@@ -126,13 +125,6 @@ def get_system_message(
|
||||
sys_msg_content = sys_msg_content.with_tools(python_description)
|
||||
if container_description is not None:
|
||||
sys_msg_content = sys_msg_content.with_tools(container_description)
|
||||
if not with_custom_tools:
|
||||
channel_config = sys_msg_content.channel_config
|
||||
invalid_channel = "commentary"
|
||||
new_config = ChannelConfig.require_channels(
|
||||
[c for c in channel_config.valid_channels if c != invalid_channel]
|
||||
)
|
||||
sys_msg_content = sys_msg_content.with_channel_config(new_config)
|
||||
sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content)
|
||||
return sys_msg
|
||||
|
||||
@@ -686,6 +678,22 @@ def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem
|
||||
return output_items
|
||||
|
||||
|
||||
def _parse_message_no_recipient(
|
||||
message: Message,
|
||||
) -> list[ResponseOutputItem]:
|
||||
"""Parse a Harmony message with no recipient based on its channel."""
|
||||
if message.channel == "analysis":
|
||||
return _parse_reasoning(message)
|
||||
|
||||
if message.channel in ("commentary", "final"):
|
||||
# Per Harmony format, preambles (commentary with no recipient) and
|
||||
# final channel content are both intended to be shown to end-users.
|
||||
# See: https://cookbook.openai.com/articles/openai-harmony
|
||||
return [_parse_final_message(message)]
|
||||
|
||||
raise ValueError(f"Unknown channel: {message.channel}")
|
||||
|
||||
|
||||
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
|
||||
"""
|
||||
Parse a Harmony message into a list of output response items.
|
||||
@@ -717,19 +725,8 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
|
||||
output_items.extend(_parse_mcp_call(message, recipient))
|
||||
|
||||
# No recipient - handle based on channel for non-tool messages
|
||||
elif message.channel == "analysis":
|
||||
output_items.extend(_parse_reasoning(message))
|
||||
|
||||
elif message.channel == "commentary":
|
||||
# Per Harmony format, commentary channel can contain preambles to calling
|
||||
# multiple functions - explanatory text with no recipient
|
||||
output_items.extend(_parse_reasoning(message))
|
||||
|
||||
elif message.channel == "final":
|
||||
output_items.append(_parse_final_message(message))
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown channel: {message.channel}")
|
||||
output_items.extend(_parse_message_no_recipient(message))
|
||||
|
||||
return output_items
|
||||
|
||||
@@ -786,7 +783,26 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
|
||||
)
|
||||
]
|
||||
|
||||
if parser.current_channel in ("commentary", "analysis"):
|
||||
if parser.current_channel == "commentary":
|
||||
# Per Harmony format, preambles (commentary with no recipient) are
|
||||
# intended to be shown to end-users, unlike analysis channel content.
|
||||
output_text = ResponseOutputText(
|
||||
text=parser.current_content,
|
||||
annotations=[],
|
||||
type="output_text",
|
||||
logprobs=None,
|
||||
)
|
||||
return [
|
||||
ResponseOutputMessage(
|
||||
id=f"msg_{random_uuid()}",
|
||||
content=[output_text],
|
||||
role="assistant",
|
||||
status="incomplete",
|
||||
type="message",
|
||||
)
|
||||
]
|
||||
|
||||
if parser.current_channel == "analysis":
|
||||
return [
|
||||
ResponseReasoningItem(
|
||||
id=f"rs_{random_uuid()}",
|
||||
@@ -855,17 +871,30 @@ def parse_chat_output(
|
||||
is_tool_call = False # TODO: update this when tool call is supported
|
||||
|
||||
# Get completed messages from the parser
|
||||
# - analysis channel: hidden reasoning
|
||||
# - commentary channel without recipient (preambles): visible to user
|
||||
# - final channel: visible to user
|
||||
# - commentary with recipient (tool calls): handled separately by tool parser
|
||||
reasoning_texts = [
|
||||
msg.content[0].text for msg in output_msgs if msg.channel == "analysis"
|
||||
]
|
||||
final_texts = [
|
||||
msg.content[0].text for msg in output_msgs if msg.channel != "analysis"
|
||||
msg.content[0].text
|
||||
for msg in output_msgs
|
||||
if msg.channel == "final" or (msg.channel == "commentary" and not msg.recipient)
|
||||
]
|
||||
|
||||
# Extract partial messages from the parser
|
||||
if parser.current_channel == "analysis" and parser.current_content:
|
||||
reasoning_texts.append(parser.current_content)
|
||||
elif parser.current_channel != "analysis" and parser.current_content:
|
||||
elif parser.current_channel == "final" and parser.current_content:
|
||||
final_texts.append(parser.current_content)
|
||||
elif (
|
||||
parser.current_channel == "commentary"
|
||||
and not parser.current_recipient
|
||||
and parser.current_content
|
||||
):
|
||||
# Preambles (commentary without recipient) are visible to user
|
||||
final_texts.append(parser.current_content)
|
||||
|
||||
# Flatten multiple messages into a single string
|
||||
|
||||
@@ -540,8 +540,12 @@ class HarmonyContext(ConversationContext):
|
||||
self.first_tok_of_message = True # For streaming support
|
||||
|
||||
def _update_num_reasoning_tokens(self):
|
||||
# Count all analysis and commentary channels as reasoning tokens
|
||||
if self.parser.current_channel in {"analysis", "commentary"}:
|
||||
channel = self.parser.current_channel
|
||||
if channel == "analysis":
|
||||
self.num_reasoning_tokens += 1
|
||||
elif channel == "commentary" and self.parser.current_recipient is not None:
|
||||
# Tool interactions (python/browser/container) are hidden.
|
||||
# Preambles (recipient=None) are visible user text.
|
||||
self.num_reasoning_tokens += 1
|
||||
|
||||
def append_output(self, output: RequestOutput) -> None:
|
||||
|
||||
@@ -563,7 +563,9 @@ def emit_content_delta_events(
|
||||
channel = ctx.parser.current_channel
|
||||
recipient = ctx.parser.current_recipient
|
||||
|
||||
if channel == "final" and recipient is None:
|
||||
if channel in ("final", "commentary") and recipient is None:
|
||||
# Preambles (commentary with no recipient) and final messages
|
||||
# are both user-visible text.
|
||||
return emit_text_delta_events(delta, state)
|
||||
elif channel == "analysis" and recipient is None:
|
||||
return emit_reasoning_delta_events(delta, state)
|
||||
@@ -607,7 +609,9 @@ def emit_previous_item_done_events(
|
||||
return emit_mcp_completion_events(previous_item.recipient, text, state)
|
||||
elif previous_item.channel == "analysis":
|
||||
return emit_reasoning_done_events(text, state)
|
||||
elif previous_item.channel == "final":
|
||||
elif previous_item.channel in ("commentary", "final"):
|
||||
# Preambles (commentary with no recipient) and final messages
|
||||
# are both user-visible text.
|
||||
return emit_text_output_done_events(text, state)
|
||||
return []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user