mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Bugfix] GPT-OSS instruction rendering (#44330)
Signed-off-by: Yifan Zong <yzong@redhat.com>
This commit is contained in:
@@ -1364,6 +1364,57 @@ class TestServingChatWithHarmony:
|
||||
],
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_message_without_tools(self, serving_chat, stream):
|
||||
"""Leading system message produces a developer message with
|
||||
DeveloperContent (# Instructions header)."""
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
{"role": "system"},
|
||||
{
|
||||
"role": "developer",
|
||||
"instructions": "You are a helpful assistant.",
|
||||
},
|
||||
{"role": "user", "content": "Hello"},
|
||||
],
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_message_with_tools(self, serving_chat, stream, weather_tools):
|
||||
"""Leading system message is folded into the developer message
|
||||
alongside tool definitions."""
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "What's the weather?"},
|
||||
]
|
||||
req = ChatCompletionRequest(
|
||||
model=MODEL_NAME, messages=messages, tools=weather_tools
|
||||
)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
{"role": "system"},
|
||||
{
|
||||
"role": "developer",
|
||||
"instructions": "You are a helpful assistant.",
|
||||
"tool_definitions": ["get_weather"],
|
||||
},
|
||||
{"role": "user", "content": "What's the weather?"},
|
||||
],
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_call_response_with_content(
|
||||
self, serving_chat, stream, weather_tools, weather_messages_start
|
||||
|
||||
@@ -45,6 +45,31 @@ class TestResponseInputToHarmonyRenderParity:
|
||||
# Single-message cases
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def test_developer_message(self):
|
||||
"""Both APIs must render developer messages identically using
|
||||
DeveloperContent (with the '# Instructions' header)."""
|
||||
chat_msgs = parse_chat_input_to_harmony_message(
|
||||
{"role": "developer", "content": "Be concise."}
|
||||
)
|
||||
resp_msgs = [
|
||||
response_input_to_harmony(
|
||||
{
|
||||
"type": "message",
|
||||
"role": "developer",
|
||||
"content": "Be concise.",
|
||||
},
|
||||
prev_responses=[],
|
||||
)
|
||||
]
|
||||
|
||||
expected = [{"role": "developer", "instructions": "Be concise."}]
|
||||
verify_harmony_messages(chat_msgs, expected)
|
||||
verify_harmony_messages(resp_msgs, expected)
|
||||
|
||||
assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
|
||||
[_system()] + resp_msgs
|
||||
)
|
||||
|
||||
def test_user_message(self):
|
||||
chat_msgs = parse_chat_input_to_harmony_message(
|
||||
{"role": "user", "content": "What's the weather in Paris?"}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from openai_harmony import Message, Role
|
||||
from openai_harmony import DeveloperContent, Message, Role
|
||||
|
||||
from tests.entrypoints.openai.utils import verify_harmony_messages
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
@@ -269,7 +269,8 @@ class TestCommonParseInputToHarmonyMessage:
|
||||
assert messages[0].recipient == "functions.get_current_time"
|
||||
|
||||
def test_system_message(self, parse_function):
|
||||
"""Test parsing system message."""
|
||||
"""Test parsing system messages, which are parsed into developer messages
|
||||
with DeveloperContent."""
|
||||
chat_msg = {
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant",
|
||||
@@ -278,9 +279,9 @@ class TestCommonParseInputToHarmonyMessage:
|
||||
messages = parse_function(chat_msg)
|
||||
|
||||
assert len(messages) == 1
|
||||
# System messages are converted using Message.from_dict
|
||||
# which should preserve the role
|
||||
assert messages[0].author.role == Role.SYSTEM
|
||||
assert messages[0].author.role == Role.DEVELOPER
|
||||
assert isinstance(messages[0].content[0], DeveloperContent)
|
||||
assert messages[0].content[0].instructions == "You are a helpful assistant"
|
||||
|
||||
def test_developer_message(self, parse_function):
|
||||
"""Test parsing developer message."""
|
||||
@@ -293,6 +294,8 @@ class TestCommonParseInputToHarmonyMessage:
|
||||
|
||||
assert len(messages) == 1
|
||||
assert messages[0].author.role == Role.DEVELOPER
|
||||
assert isinstance(messages[0].content[0], DeveloperContent)
|
||||
assert messages[0].content[0].instructions == "Use concise language"
|
||||
|
||||
def test_user_message_with_string_content(self, parse_function):
|
||||
"""Test parsing user message with string content."""
|
||||
|
||||
@@ -12,7 +12,7 @@ from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningIt
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
Content as ReasoningTextContent,
|
||||
)
|
||||
from openai_harmony import Role
|
||||
from openai_harmony import DeveloperContent, Role
|
||||
|
||||
from vllm.entrypoints.openai.responses.harmony import response_input_to_harmony
|
||||
|
||||
@@ -65,14 +65,16 @@ class TestResponseInputToHarmonyMessage:
|
||||
assert msg.content[0].text == "Hello"
|
||||
|
||||
def test_system_message(self):
|
||||
"""System messages carry developer instructions and must be rendered
|
||||
as developer messages with DeveloperContent."""
|
||||
msg = response_input_to_harmony(
|
||||
{"type": "message", "role": "system", "content": "Be helpful."},
|
||||
prev_responses=[],
|
||||
)
|
||||
|
||||
assert msg.author.role == Role.SYSTEM
|
||||
assert msg.content[0].text == "Be helpful."
|
||||
assert msg.channel is None
|
||||
assert msg.author.role == Role.DEVELOPER
|
||||
assert isinstance(msg.content[0], DeveloperContent)
|
||||
assert msg.content[0].instructions == "Be helpful."
|
||||
|
||||
def test_assistant_message_gets_final_channel(self):
|
||||
msg = response_input_to_harmony(
|
||||
@@ -85,14 +87,16 @@ class TestResponseInputToHarmonyMessage:
|
||||
assert msg.content[0].text == "The answer is 42."
|
||||
|
||||
def test_developer_message_gets_instructions_prefix(self):
|
||||
"""Developer messages must use DeveloperContent which adds the
|
||||
'# Instructions' header the model was trained on."""
|
||||
msg = response_input_to_harmony(
|
||||
{"type": "message", "role": "developer", "content": "Be concise."},
|
||||
prev_responses=[],
|
||||
)
|
||||
|
||||
assert msg.author.role == Role.DEVELOPER
|
||||
assert msg.content[0].text == "Instructions:\nBe concise."
|
||||
assert msg.channel is None
|
||||
assert isinstance(msg.content[0], DeveloperContent)
|
||||
assert msg.content[0].instructions == "Be concise."
|
||||
|
||||
def test_message_with_array_content(self):
|
||||
msg = response_input_to_harmony(
|
||||
@@ -112,7 +116,9 @@ class TestResponseInputToHarmonyMessage:
|
||||
assert msg.content[0].text == "Part one. "
|
||||
assert msg.content[1].text == "Part two."
|
||||
|
||||
def test_developer_message_array_content_gets_prefix_on_each_part(self):
|
||||
def test_developer_message_array_content_concatenated(self):
|
||||
"""Array content in developer messages is flattened and rendered
|
||||
via DeveloperContent with the '# Instructions' header."""
|
||||
msg = response_input_to_harmony(
|
||||
{
|
||||
"type": "message",
|
||||
@@ -125,8 +131,9 @@ class TestResponseInputToHarmonyMessage:
|
||||
prev_responses=[],
|
||||
)
|
||||
|
||||
assert msg.content[0].text == "Instructions:\nRule 1."
|
||||
assert msg.content[1].text == "Instructions:\nRule 2."
|
||||
assert msg.author.role == Role.DEVELOPER
|
||||
assert isinstance(msg.content[0], DeveloperContent)
|
||||
assert msg.content[0].instructions == "Rule 1.Rule 2."
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# type="reasoning"
|
||||
|
||||
@@ -155,6 +155,8 @@ def verify_harmony_messages(
|
||||
assert msg.content[0].text == expected["content"]
|
||||
if "content_type" in expected:
|
||||
assert msg.content_type == expected["content_type"]
|
||||
if "instructions" in expected:
|
||||
assert msg.content[0].instructions == expected["instructions"]
|
||||
if "tool_definitions" in expected:
|
||||
# Check that the tool definitions match the expected list of tool names
|
||||
actual_tools = [t.name for t in msg.content[0].tools["functions"].tools]
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
import datetime
|
||||
from collections.abc import Iterable, Sequence
|
||||
from typing import Any
|
||||
|
||||
from openai.types.responses.tool import Tool
|
||||
from openai_harmony import (
|
||||
@@ -195,6 +196,12 @@ def get_user_message(content: str) -> Message:
|
||||
return Message.from_role_and_content(Role.USER, content)
|
||||
|
||||
|
||||
def get_system_or_developer_message(role: str, instructions: str) -> Message:
|
||||
if role == "system" and envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
|
||||
return get_system_message(instructions=instructions)
|
||||
return get_developer_message(instructions=instructions)
|
||||
|
||||
|
||||
def parse_chat_inputs_to_harmony_messages(chat_msgs: list) -> list[Message]:
|
||||
"""
|
||||
Parse a list of messages from request.messages in the Chat Completion API to
|
||||
@@ -249,18 +256,96 @@ def auto_drop_analysis_messages(msgs: list[Message]) -> list[Message]:
|
||||
return cleaned_msgs
|
||||
|
||||
|
||||
def flatten_chat_text_content(content: str | list | None) -> str | None:
|
||||
def flatten_input_text_content(content: Any) -> str | None:
|
||||
"""
|
||||
Extract the text parts from a chat message content field and flatten them
|
||||
into a single string.
|
||||
Extract text parts from a Chat Completion or Responses API content field and
|
||||
flatten them into a single string. Returns None if no text content is found.
|
||||
"""
|
||||
if isinstance(content, list):
|
||||
return "".join(
|
||||
item.get("text", "")
|
||||
for item in content
|
||||
if isinstance(item, dict) and item.get("type") == "text"
|
||||
if content is None or isinstance(content, str):
|
||||
return content
|
||||
if not isinstance(content, list):
|
||||
return None
|
||||
|
||||
texts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
texts.append(item)
|
||||
continue
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if text is not None:
|
||||
texts.append(text)
|
||||
return "".join(texts) if texts else None
|
||||
|
||||
|
||||
def extract_instructions_from_messages(
|
||||
messages: Sequence[Any],
|
||||
) -> tuple[str | None, list[Any]]:
|
||||
"""
|
||||
Peel a leading system/developer Chat Completion or Responses message and
|
||||
flatten its instruction text.
|
||||
"""
|
||||
remaining_messages = list(messages)
|
||||
if not remaining_messages:
|
||||
return None, remaining_messages
|
||||
|
||||
first_message = remaining_messages[0]
|
||||
if not isinstance(first_message, dict):
|
||||
if hasattr(first_message, "to_dict"):
|
||||
# Handle OpenAI Harmony Message
|
||||
first_message = first_message.to_dict()
|
||||
elif hasattr(first_message, "model_dump"):
|
||||
first_message = first_message.model_dump(exclude_none=True)
|
||||
else:
|
||||
raise ValueError(f"Unknown message type: {type(first_message)}")
|
||||
|
||||
if first_message.get("role") not in (
|
||||
"system",
|
||||
"developer",
|
||||
):
|
||||
return None, remaining_messages
|
||||
|
||||
instructions = flatten_input_text_content(first_message.get("content"))
|
||||
return instructions, remaining_messages[1:]
|
||||
|
||||
|
||||
def build_harmony_preamble(
|
||||
*,
|
||||
instructions: str | None = None,
|
||||
tools: list[Tool | ChatCompletionToolsParam] | None = None,
|
||||
reasoning_effort: str | None = None,
|
||||
browser_description: str | None = None,
|
||||
python_description: str | None = None,
|
||||
container_description: str | None = None,
|
||||
with_custom_tools: bool = False,
|
||||
) -> list[Message]:
|
||||
"""
|
||||
Build the standard Harmony system/developer prefix for a request.
|
||||
"""
|
||||
developer_instructions = system_instructions = None
|
||||
if envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
|
||||
system_instructions = instructions
|
||||
else:
|
||||
developer_instructions = instructions
|
||||
|
||||
messages = [
|
||||
get_system_message(
|
||||
reasoning_effort=reasoning_effort,
|
||||
browser_description=browser_description,
|
||||
python_description=python_description,
|
||||
container_description=container_description,
|
||||
instructions=system_instructions,
|
||||
with_custom_tools=with_custom_tools,
|
||||
)
|
||||
return content
|
||||
]
|
||||
if developer_instructions or tools:
|
||||
messages.append(
|
||||
get_developer_message(
|
||||
instructions=developer_instructions,
|
||||
tools=tools,
|
||||
)
|
||||
)
|
||||
return messages
|
||||
|
||||
|
||||
def parse_chat_input_to_harmony_message(
|
||||
@@ -283,7 +368,7 @@ def parse_chat_input_to_harmony_message(
|
||||
tool_calls = chat_msg.get("tool_calls", [])
|
||||
|
||||
if role == "assistant" and tool_calls:
|
||||
content = flatten_chat_text_content(chat_msg.get("content"))
|
||||
content = flatten_input_text_content(chat_msg.get("content"))
|
||||
if content:
|
||||
commentary_msg = Message.from_role_and_content(Role.ASSISTANT, content)
|
||||
commentary_msg = commentary_msg.with_channel("commentary")
|
||||
@@ -313,8 +398,7 @@ def parse_chat_input_to_harmony_message(
|
||||
if role == "tool":
|
||||
tool_call_id = chat_msg.get("tool_call_id", "")
|
||||
name = tool_id_names.get(tool_call_id, "")
|
||||
content = chat_msg.get("content", "") or ""
|
||||
content = flatten_chat_text_content(content)
|
||||
content = flatten_input_text_content(chat_msg.get("content")) or ""
|
||||
|
||||
msg = (
|
||||
Message.from_author_and_content(
|
||||
@@ -349,7 +433,12 @@ def parse_chat_input_to_harmony_message(
|
||||
# Send non-tool assistant messages to the final channel
|
||||
msg = msg.with_channel("final")
|
||||
msgs.append(msg)
|
||||
# For user/system/developer messages, add them directly even if no content.
|
||||
elif role in ("system", "developer"):
|
||||
instructions = flatten_input_text_content(chat_msg.get("content"))
|
||||
if instructions is not None:
|
||||
msg = get_system_or_developer_message(role, instructions)
|
||||
msgs.append(msg)
|
||||
# For user messages, add them directly even if no content.
|
||||
elif role != "assistant":
|
||||
msg = Message.from_role_and_contents(role, contents)
|
||||
msgs.append(msg)
|
||||
|
||||
@@ -32,7 +32,8 @@ from openai_harmony import Author, Message, Role, StreamableParser, TextContent
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
|
||||
extract_function_from_recipient,
|
||||
flatten_chat_text_content,
|
||||
flatten_input_text_content,
|
||||
get_system_or_developer_message,
|
||||
is_function_recipient,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
@@ -109,8 +110,7 @@ def _parse_chat_format_message(chat_msg: dict) -> list[Message]:
|
||||
name = chat_msg.get("name", "")
|
||||
if name and not name.startswith("functions."):
|
||||
name = f"functions.{name}"
|
||||
content = chat_msg.get("content", "") or ""
|
||||
content = flatten_chat_text_content(content)
|
||||
content = flatten_input_text_content(chat_msg.get("content")) or ""
|
||||
# NOTE: .with_recipient("assistant") is required on tool messages
|
||||
# to match parse_chat_input_to_harmony_message behavior and ensure
|
||||
# proper routing in the Harmony protocol.
|
||||
@@ -121,7 +121,15 @@ def _parse_chat_format_message(chat_msg: dict) -> list[Message]:
|
||||
)
|
||||
return [msg]
|
||||
|
||||
# Default: user/assistant/system messages
|
||||
# System/developer messages into proper DeveloperContent
|
||||
if role in ("system", "developer"):
|
||||
text = flatten_input_text_content(chat_msg.get("content"))
|
||||
if text:
|
||||
msg = get_system_or_developer_message(role, text)
|
||||
return [msg]
|
||||
return []
|
||||
|
||||
# Default: user/assistant messages
|
||||
content = chat_msg.get("content", "")
|
||||
if isinstance(content, str):
|
||||
contents = [TextContent(text=content)]
|
||||
@@ -151,13 +159,17 @@ def response_input_to_harmony(
|
||||
if "type" not in response_msg or response_msg["type"] == "message":
|
||||
role = response_msg["role"]
|
||||
content = response_msg["content"]
|
||||
# Add prefix for developer messages.
|
||||
# <|start|>developer<|message|># Instructions {instructions}<|end|>
|
||||
text_prefix = "Instructions:\n" if role == "developer" else ""
|
||||
if isinstance(content, str):
|
||||
msg = Message.from_role_and_content(role, text_prefix + content)
|
||||
if role in ("system", "developer"):
|
||||
text = flatten_input_text_content(content)
|
||||
if text:
|
||||
msg = get_system_or_developer_message(role, text)
|
||||
else:
|
||||
# Empty content — skip, no message emitted.
|
||||
return None
|
||||
elif isinstance(content, str):
|
||||
msg = Message.from_role_and_content(role, content)
|
||||
else:
|
||||
contents = [TextContent(text=text_prefix + c["text"]) for c in content]
|
||||
contents = [TextContent(text=c.get("text", "")) for c in content]
|
||||
msg = Message.from_role_and_contents(role, contents)
|
||||
if role == "assistant":
|
||||
msg = msg.with_channel("final")
|
||||
|
||||
@@ -44,8 +44,8 @@ from vllm.entrypoints.openai.engine.serving import (
|
||||
)
|
||||
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
get_developer_message,
|
||||
get_system_message,
|
||||
build_harmony_preamble,
|
||||
extract_instructions_from_messages,
|
||||
get_user_message,
|
||||
has_custom_tools,
|
||||
render_for_completion,
|
||||
@@ -1078,37 +1078,9 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
output_items.extend(last_items)
|
||||
return output_items
|
||||
|
||||
def _extract_system_message_from_request(
|
||||
self, request: ResponsesRequest
|
||||
) -> str | None:
|
||||
system_msg = None
|
||||
if not isinstance(request.input, str):
|
||||
for response_msg in request.input:
|
||||
if (
|
||||
isinstance(response_msg, dict)
|
||||
and response_msg.get("role") == "system"
|
||||
):
|
||||
content = response_msg.get("content")
|
||||
if isinstance(content, str):
|
||||
system_msg = content
|
||||
elif isinstance(content, list):
|
||||
for param in content:
|
||||
if (
|
||||
isinstance(param, dict)
|
||||
and param.get("type") == "input_text"
|
||||
):
|
||||
system_msg = param.get("text")
|
||||
break
|
||||
break
|
||||
return system_msg
|
||||
|
||||
def _construct_harmony_system_input_message(
|
||||
self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str]
|
||||
) -> OpenAIHarmonyMessage:
|
||||
model_identity = self._extract_system_message_from_request(request)
|
||||
|
||||
reasoning_effort = request.reasoning.effort if request.reasoning else None
|
||||
|
||||
def _get_harmony_builtin_tool_descriptions(
|
||||
self, request: ResponsesRequest, tool_types: set[str]
|
||||
) -> dict[str, str | None]:
|
||||
# Extract allowed_tools from MCP tool requests
|
||||
allowed_tools_map = _extract_allowed_tools_from_mcp_requests(request.tools)
|
||||
|
||||
@@ -1141,17 +1113,11 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
and self.tool_server.has_tool("container")
|
||||
else None
|
||||
)
|
||||
|
||||
sys_msg = get_system_message(
|
||||
model_identity=model_identity,
|
||||
reasoning_effort=reasoning_effort,
|
||||
browser_description=browser_description,
|
||||
python_description=python_description,
|
||||
container_description=container_description,
|
||||
instructions=request.instructions,
|
||||
with_custom_tools=with_custom_tools,
|
||||
)
|
||||
return sys_msg
|
||||
return {
|
||||
"browser_description": browser_description,
|
||||
"python_description": python_description,
|
||||
"container_description": container_description,
|
||||
}
|
||||
|
||||
def _construct_input_messages_with_harmony(
|
||||
self,
|
||||
@@ -1159,20 +1125,31 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
prev_response: ResponsesResponse | None,
|
||||
) -> list[OpenAIHarmonyMessage]:
|
||||
messages: list[OpenAIHarmonyMessage] = []
|
||||
request_input = request.input
|
||||
if prev_response is None:
|
||||
# New conversation.
|
||||
tool_types = extract_tool_types(request.tools)
|
||||
with_custom_tools = has_custom_tools(tool_types)
|
||||
|
||||
sys_msg = self._construct_harmony_system_input_message(
|
||||
request, with_custom_tools, tool_types
|
||||
)
|
||||
messages.append(sys_msg)
|
||||
if with_custom_tools:
|
||||
dev_msg = get_developer_message(
|
||||
instructions=request.instructions, tools=request.tools
|
||||
instructions = request.instructions
|
||||
if instructions is None and isinstance(request_input, list):
|
||||
instructions, request_input = extract_instructions_from_messages(
|
||||
request_input
|
||||
)
|
||||
messages.append(dev_msg)
|
||||
tool_descriptions = self._get_harmony_builtin_tool_descriptions(
|
||||
request, tool_types
|
||||
)
|
||||
tools = request.tools if with_custom_tools else None
|
||||
messages.extend(
|
||||
build_harmony_preamble(
|
||||
instructions=instructions,
|
||||
tools=tools,
|
||||
reasoning_effort=(
|
||||
request.reasoning.effort if request.reasoning else None
|
||||
),
|
||||
with_custom_tools=with_custom_tools,
|
||||
**tool_descriptions,
|
||||
)
|
||||
)
|
||||
messages += construct_harmony_previous_input_messages(request)
|
||||
|
||||
else:
|
||||
@@ -1208,20 +1185,20 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
messages.extend(prev_msgs)
|
||||
# Append the new input.
|
||||
# Responses API supports simple text inputs without chat format.
|
||||
if isinstance(request.input, str):
|
||||
if isinstance(request_input, str):
|
||||
# Skip empty string input when previous_input_messages supplies
|
||||
# the full conversation history --- an empty trailing user message
|
||||
# confuses the model into thinking nothing was sent.
|
||||
if request.input or not request.previous_input_messages:
|
||||
messages.append(get_user_message(request.input))
|
||||
if request_input or not request.previous_input_messages:
|
||||
messages.append(get_user_message(request_input))
|
||||
else:
|
||||
if prev_response is not None:
|
||||
prev_outputs = copy(prev_response.output)
|
||||
else:
|
||||
prev_outputs = []
|
||||
for response_msg in request.input:
|
||||
for response_msg in request_input:
|
||||
new_msg = response_input_to_harmony(response_msg, prev_outputs)
|
||||
if new_msg is not None and new_msg.author.role != "system":
|
||||
if new_msg is not None:
|
||||
messages.append(new_msg)
|
||||
|
||||
# User passes in a tool call request and its output. We need
|
||||
|
||||
@@ -18,8 +18,8 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
)
|
||||
from vllm.entrypoints.openai.models.serving import OpenAIModelRegistry
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import (
|
||||
get_developer_message,
|
||||
get_system_message,
|
||||
build_harmony_preamble,
|
||||
extract_instructions_from_messages,
|
||||
parse_chat_inputs_to_harmony_messages,
|
||||
render_for_completion,
|
||||
)
|
||||
@@ -405,6 +405,9 @@ class OpenAIServingRender:
|
||||
# for more info: see comment in `maybe_serialize_tool_calls`
|
||||
_mt.maybe_serialize_tool_calls(request) # type: ignore[arg-type]
|
||||
|
||||
chat_messages = list(request.messages)
|
||||
instructions, chat_messages = extract_instructions_from_messages(chat_messages)
|
||||
|
||||
# Add system message.
|
||||
# NOTE: In Chat Completion API, browsing is enabled by default
|
||||
# if the model supports it. TODO: Support browsing.
|
||||
@@ -412,23 +415,18 @@ class OpenAIServingRender:
|
||||
assert not self.supports_code_interpreter
|
||||
if (reasoning_effort := request.reasoning_effort) == "none":
|
||||
raise ValueError(f"Harmony does not support {reasoning_effort=}")
|
||||
sys_msg = get_system_message(
|
||||
reasoning_effort=reasoning_effort,
|
||||
browser_description=None,
|
||||
python_description=None,
|
||||
with_custom_tools=should_include_tools,
|
||||
)
|
||||
messages.append(sys_msg)
|
||||
|
||||
# Add developer message.
|
||||
if request.tools:
|
||||
dev_msg = get_developer_message(
|
||||
tools=request.tools if should_include_tools else None # type: ignore[arg-type]
|
||||
tools = request.tools if should_include_tools else None
|
||||
messages.extend(
|
||||
build_harmony_preamble(
|
||||
instructions=instructions,
|
||||
tools=tools, # type: ignore[arg-type]
|
||||
reasoning_effort=reasoning_effort,
|
||||
with_custom_tools=should_include_tools,
|
||||
)
|
||||
messages.append(dev_msg)
|
||||
)
|
||||
|
||||
# Add user message.
|
||||
messages.extend(parse_chat_inputs_to_harmony_messages(request.messages))
|
||||
# Add remaining conversation messages.
|
||||
messages.extend(parse_chat_inputs_to_harmony_messages(chat_messages))
|
||||
|
||||
# Render prompt token ids.
|
||||
prompt_token_ids = render_for_completion(messages)
|
||||
|
||||
Reference in New Issue
Block a user