mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Anthropic] Support system role messages inside messages array (#44283)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com> Co-authored-by: Aleksandar Yanakiev <alexander.yanakiev@discretestack.com> Co-authored-by: Ang Kah Min, Kelvin <syraxius@hotmail.com>
This commit is contained in:
@@ -635,3 +635,143 @@ class TestThinkingBlockConversion:
|
||||
# Redacted thinking is ignored, normal thinking still becomes reasoning.
|
||||
assert asst.get("reasoning") == "Thinking..."
|
||||
assert asst.get("content") == "Hi!"
|
||||
|
||||
|
||||
class TestInlineSystemMessageInMessagesArray:
|
||||
"""Verify that ``role: system`` messages embedded inside the ``messages``
|
||||
array are accepted and merged with the top-level ``system`` prompt.
|
||||
|
||||
This handles clients that place system messages inside the messages array
|
||||
instead of the Anthropic-standard top-level ``system`` field.
|
||||
"""
|
||||
|
||||
def test_inline_system_merged_with_top_level_system(self):
|
||||
"""Full integration: inline system + top-level system + user message."""
|
||||
request = _make_request(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "<system-reminder>\n.....\n</system-reminder>\n\n",
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "help?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "system",
|
||||
"content": ".....",
|
||||
},
|
||||
],
|
||||
system=[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "x-anthropic-billing-header: "
|
||||
"cc_version=2.1.160.bca; cc_entrypoint=cli; cch=d1d48;",
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "....",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
tools=[],
|
||||
)
|
||||
|
||||
result = _convert(request)
|
||||
|
||||
# First message should be the merged system prompt.
|
||||
assert result.messages[0]["role"] == "system"
|
||||
# Billing header stripped, inline system appended.
|
||||
assert (
|
||||
result.messages[0]["content"]
|
||||
== "You are Claude Code, Anthropic's official CLI for Claude."
|
||||
"...."
|
||||
"....."
|
||||
)
|
||||
|
||||
# Second message should be the user message, content preserved.
|
||||
assert result.messages[1]["role"] == "user"
|
||||
user_content = result.messages[1]["content"]
|
||||
assert len(user_content) == 2
|
||||
assert user_content[0] == {
|
||||
"type": "text",
|
||||
"text": "<system-reminder>\n.....\n</system-reminder>\n\n",
|
||||
}
|
||||
assert user_content[1] == {
|
||||
"type": "text",
|
||||
"text": "help?",
|
||||
}
|
||||
|
||||
def test_inline_system_string_only(self):
|
||||
"""Only an inline system string, no top-level system."""
|
||||
request = _make_request(
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "system", "content": "Be concise."},
|
||||
]
|
||||
)
|
||||
result = _convert(request)
|
||||
|
||||
assert result.messages[0]["role"] == "system"
|
||||
assert result.messages[0]["content"] == "Be concise."
|
||||
assert result.messages[1]["role"] == "user"
|
||||
|
||||
def test_inline_system_list_content(self):
|
||||
"""Inline system with list content blocks."""
|
||||
request = _make_request(
|
||||
[
|
||||
{"role": "user", "content": "Hi"},
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{"type": "text", "text": "Part one. "},
|
||||
{"type": "text", "text": "Part two."},
|
||||
],
|
||||
},
|
||||
]
|
||||
)
|
||||
result = _convert(request)
|
||||
|
||||
assert result.messages[0]["role"] == "system"
|
||||
assert result.messages[0]["content"] == "Part one. Part two."
|
||||
|
||||
def test_multiple_inline_system_messages(self):
|
||||
"""Multiple inline system messages should all be merged."""
|
||||
request = _make_request(
|
||||
[
|
||||
{"role": "system", "content": "First system."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "system", "content": "Second system."},
|
||||
]
|
||||
)
|
||||
result = _convert(request)
|
||||
|
||||
assert result.messages[0]["role"] == "system"
|
||||
assert result.messages[0]["content"] == "First system.Second system."
|
||||
assert result.messages[1]["role"] == "user"
|
||||
|
||||
def test_inline_system_with_top_level_string(self):
|
||||
"""Top-level system is a string, inline system is also present."""
|
||||
request = _make_request(
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "system", "content": "Inline hint."},
|
||||
],
|
||||
system="Top-level prompt.",
|
||||
)
|
||||
result = _convert(request)
|
||||
|
||||
assert result.messages[0]["role"] == "system"
|
||||
assert result.messages[0]["content"] == "Top-level prompt.Inline hint."
|
||||
assert result.messages[1]["role"] == "user"
|
||||
|
||||
@@ -65,7 +65,7 @@ class AnthropicContentBlock(BaseModel):
|
||||
class AnthropicMessage(BaseModel):
|
||||
"""Message structure"""
|
||||
|
||||
role: Literal["user", "assistant"]
|
||||
role: Literal["user", "assistant", "system"]
|
||||
content: str | list[AnthropicContentBlock]
|
||||
|
||||
|
||||
|
||||
@@ -143,23 +143,36 @@ class AnthropicServingMessages(OpenAIServingChat):
|
||||
openai_messages: list[dict[str, Any]],
|
||||
) -> None:
|
||||
"""Convert Anthropic system message to OpenAI format"""
|
||||
if not anthropic_request.system:
|
||||
return
|
||||
system_parts: list[str] = []
|
||||
|
||||
if isinstance(anthropic_request.system, str):
|
||||
openai_messages.append(
|
||||
{"role": "system", "content": anthropic_request.system}
|
||||
)
|
||||
else:
|
||||
system_prompt = ""
|
||||
for block in anthropic_request.system:
|
||||
if block.type == "text" and block.text:
|
||||
# Strip Claude Code's attribution header which contains
|
||||
# a per-request hash that defeats prefix caching.
|
||||
if block.text.startswith("x-anthropic-billing-header"):
|
||||
continue
|
||||
system_prompt += block.text
|
||||
openai_messages.append({"role": "system", "content": system_prompt})
|
||||
# Top-level system field
|
||||
if anthropic_request.system:
|
||||
if isinstance(anthropic_request.system, str):
|
||||
system_parts.append(anthropic_request.system)
|
||||
else:
|
||||
for block in anthropic_request.system:
|
||||
if block.type == "text" and block.text:
|
||||
# Strip Claude Code's attribution header which contains
|
||||
# a per-request hash that defeats prefix caching.
|
||||
if block.text.startswith("x-anthropic-billing-header"):
|
||||
continue
|
||||
system_parts.append(block.text)
|
||||
|
||||
# System messages embedded inside the messages array
|
||||
for msg in anthropic_request.messages:
|
||||
if msg.role != "system":
|
||||
continue
|
||||
if isinstance(msg.content, str):
|
||||
system_parts.append(msg.content)
|
||||
else:
|
||||
for block in msg.content:
|
||||
if block.type == "text" and block.text:
|
||||
if block.text.startswith("x-anthropic-billing-header"):
|
||||
continue
|
||||
system_parts.append(block.text)
|
||||
|
||||
if system_parts:
|
||||
openai_messages.append({"role": "system", "content": "".join(system_parts)})
|
||||
|
||||
@classmethod
|
||||
def _convert_messages(
|
||||
@@ -167,6 +180,9 @@ class AnthropicServingMessages(OpenAIServingChat):
|
||||
) -> None:
|
||||
"""Convert Anthropic messages to OpenAI format"""
|
||||
for msg in messages:
|
||||
if msg.role == "system":
|
||||
continue
|
||||
|
||||
openai_msg: dict[str, Any] = {"role": msg.role} # type: ignore
|
||||
|
||||
if isinstance(msg.content, str):
|
||||
|
||||
Reference in New Issue
Block a user