mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Frontend][Responses API] Move developer-to-system conversion into HF renderer (#43590)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com> Co-authored-by: kdcyberdude <kdsingh.cyberdude@gmail.com> Co-authored-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
@@ -7,6 +7,9 @@ from vllm.config import ModelConfig
|
||||
from vllm.entrypoints.chat_utils import load_chat_template
|
||||
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
|
||||
from vllm.renderers.hf import (
|
||||
_consolidate_system_messages,
|
||||
_convert_developer_to_system,
|
||||
_detect_developer_role_support,
|
||||
_get_hf_base_chat_template_params,
|
||||
_try_extract_ast,
|
||||
resolve_chat_template,
|
||||
@@ -592,3 +595,322 @@ def test_get_gen_prompt(
|
||||
f"The generated prompt does not match the expected output for "
|
||||
f"model {model} and template {template}"
|
||||
)
|
||||
|
||||
|
||||
class TestConvertDeveloperToSystem:
|
||||
def test_converts_role(self):
|
||||
conversation = [
|
||||
{"role": "developer", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = _convert_developer_to_system(conversation)
|
||||
assert result[0]["role"] == "system"
|
||||
assert result[0]["content"] == "You are helpful."
|
||||
assert result[1]["role"] == "user"
|
||||
|
||||
def test_removes_tools_key(self):
|
||||
conversation = [
|
||||
{
|
||||
"role": "developer",
|
||||
"content": "Instructions",
|
||||
"tools": [{"type": "function"}],
|
||||
},
|
||||
]
|
||||
result = _convert_developer_to_system(conversation)
|
||||
assert "tools" not in result[0]
|
||||
|
||||
def test_no_developer_messages_unchanged(self):
|
||||
conversation = [
|
||||
{"role": "system", "content": "System prompt"},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = _convert_developer_to_system(conversation)
|
||||
assert result[0]["role"] == "system"
|
||||
assert result[1]["role"] == "user"
|
||||
|
||||
def test_does_not_mutate_original(self):
|
||||
original = {
|
||||
"role": "developer",
|
||||
"content": "Instructions",
|
||||
"tools": [{"type": "function"}],
|
||||
}
|
||||
conversation = [original]
|
||||
_convert_developer_to_system(conversation)
|
||||
assert original["role"] == "developer"
|
||||
assert "tools" in original
|
||||
|
||||
|
||||
# --- Developer role detection and conversion tests ---
|
||||
|
||||
CHATML_TEMPLATE = (
|
||||
"{% for message in messages %}"
|
||||
"{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}"
|
||||
"{% if (loop.last and add_generation_prompt) or not loop.last %}"
|
||||
"{{ '<|im_end|>' + '\\n'}}"
|
||||
"{% endif %}"
|
||||
"{% endfor %}"
|
||||
"{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}"
|
||||
"{{ '<|im_start|>assistant\\n' }}"
|
||||
"{% endif %}"
|
||||
)
|
||||
|
||||
TEMPLATE_WITH_DEVELOPER = (
|
||||
"{% for message in messages %}"
|
||||
"{% if message['role'] == 'developer' %}"
|
||||
"{{'<|im_start|>developer\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'system' %}"
|
||||
"{{'<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'user' %}"
|
||||
"{{'<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'assistant' %}"
|
||||
"{{'<|im_start|>assistant\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% endif %}"
|
||||
"{% endfor %}"
|
||||
"{% if add_generation_prompt %}"
|
||||
"{{ '<|im_start|>assistant\\n' }}"
|
||||
"{% endif %}"
|
||||
)
|
||||
|
||||
STRICT_ROLE_TEMPLATE = (
|
||||
"{% for message in messages %}"
|
||||
"{% if message['role'] == 'system' %}"
|
||||
"{{'<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'user' %}"
|
||||
"{{'<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'assistant' %}"
|
||||
"{{'<|im_start|>assistant\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% else %}"
|
||||
"{{ raise_exception('Unexpected message role: ' + message['role']) }}"
|
||||
"{% endif %}"
|
||||
"{% endfor %}"
|
||||
"{% if add_generation_prompt %}"
|
||||
"{{ '<|im_start|>assistant\\n' }}"
|
||||
"{% endif %}"
|
||||
)
|
||||
|
||||
|
||||
class TestDetectDeveloperRoleSupport:
|
||||
def test_absent_in_chatml(self):
|
||||
assert _detect_developer_role_support(CHATML_TEMPLATE) is False
|
||||
|
||||
def test_present_double_quotes(self):
|
||||
assert _detect_developer_role_support(TEMPLATE_WITH_DEVELOPER) is True
|
||||
|
||||
def test_present_single_quotes(self):
|
||||
template = TEMPLATE_WITH_DEVELOPER.replace('"developer"', "'developer'")
|
||||
assert _detect_developer_role_support(template) is True
|
||||
|
||||
def test_absent_in_strict_template(self):
|
||||
assert _detect_developer_role_support(STRICT_ROLE_TEMPLATE) is False
|
||||
|
||||
|
||||
class TestSafeApplyChatTemplateDeveloperRole:
|
||||
@pytest.fixture
|
||||
def model_config(self):
|
||||
return ModelConfig(
|
||||
"facebook/opt-125m",
|
||||
tokenizer="facebook/opt-125m",
|
||||
tokenizer_mode="auto",
|
||||
trust_remote_code=False,
|
||||
dtype="float16",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer(self):
|
||||
return get_tokenizer("facebook/opt-125m")
|
||||
|
||||
def test_developer_converted_to_system_for_chatml(self, model_config, tokenizer):
|
||||
conversation = [
|
||||
{"role": "developer", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = safe_apply_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
conversation,
|
||||
chat_template=CHATML_TEMPLATE,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
)
|
||||
assert "<|im_start|>system" in result
|
||||
assert "You are a helpful assistant." in result
|
||||
assert "<|im_start|>developer" not in result
|
||||
|
||||
def test_developer_preserved_when_template_supports_it(
|
||||
self, model_config, tokenizer
|
||||
):
|
||||
conversation = [
|
||||
{"role": "developer", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = safe_apply_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
conversation,
|
||||
chat_template=TEMPLATE_WITH_DEVELOPER,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
)
|
||||
assert "<|im_start|>developer" in result
|
||||
assert "You are a helpful assistant." in result
|
||||
|
||||
def test_developer_does_not_crash_strict_template(self, model_config, tokenizer):
|
||||
conversation = [
|
||||
{"role": "developer", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = safe_apply_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
conversation,
|
||||
chat_template=STRICT_ROLE_TEMPLATE,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
)
|
||||
assert "<|im_start|>system" in result
|
||||
assert "You are a helpful assistant." in result
|
||||
|
||||
def test_no_developer_messages_no_overhead(self, model_config, tokenizer):
|
||||
conversation = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = safe_apply_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
conversation,
|
||||
chat_template=CHATML_TEMPLATE,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
)
|
||||
assert "<|im_start|>system" in result
|
||||
assert "You are helpful." in result
|
||||
|
||||
def test_developer_at_non_first_position_consolidated(
|
||||
self, model_config, tokenizer
|
||||
):
|
||||
conversation = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
{"role": "developer", "content": "Be concise."},
|
||||
{"role": "user", "content": "What is 2+2?"},
|
||||
]
|
||||
result = safe_apply_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
conversation,
|
||||
chat_template=SYSTEM_FIRST_TEMPLATE,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
)
|
||||
assert "<|im_start|>system" in result
|
||||
assert "You are helpful." in result
|
||||
assert "Be concise." in result
|
||||
assert "What is 2+2?" in result
|
||||
|
||||
def test_developer_only_no_prior_system(self, model_config, tokenizer):
|
||||
conversation = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "developer", "content": "Be concise."},
|
||||
{"role": "user", "content": "What is 2+2?"},
|
||||
]
|
||||
result = safe_apply_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
conversation,
|
||||
chat_template=SYSTEM_FIRST_TEMPLATE,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
)
|
||||
assert "<|im_start|>system" in result
|
||||
assert "Be concise." in result
|
||||
|
||||
|
||||
SYSTEM_FIRST_TEMPLATE = (
|
||||
"{% for message in messages %}"
|
||||
"{% if message['role'] == 'system' %}"
|
||||
"{% if not loop.first %}"
|
||||
"{{ raise_exception('System message must be at the beginning.') }}"
|
||||
"{% endif %}"
|
||||
"{{'<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'user' %}"
|
||||
"{{'<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% elif message['role'] == 'assistant' %}"
|
||||
"{{'<|im_start|>assistant\\n' + message['content'] + '<|im_end|>\\n'}}"
|
||||
"{% else %}"
|
||||
"{{ raise_exception('Unexpected message role: ' + message['role']) }}"
|
||||
"{% endif %}"
|
||||
"{% endfor %}"
|
||||
"{% if add_generation_prompt %}"
|
||||
"{{ '<|im_start|>assistant\\n' }}"
|
||||
"{% endif %}"
|
||||
)
|
||||
|
||||
|
||||
class TestConsolidateSystemMessages:
|
||||
def test_no_system_messages_unchanged(self):
|
||||
conversation = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
]
|
||||
result = _consolidate_system_messages(conversation)
|
||||
assert result == conversation
|
||||
|
||||
def test_single_system_at_start_unchanged(self):
|
||||
conversation = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = _consolidate_system_messages(conversation)
|
||||
assert result == conversation
|
||||
|
||||
def test_system_at_non_first_position_moved(self):
|
||||
conversation = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
]
|
||||
result = _consolidate_system_messages(conversation)
|
||||
assert result[0]["role"] == "system"
|
||||
assert result[0]["content"] == "You are helpful."
|
||||
assert result[1]["role"] == "user"
|
||||
assert result[1]["content"] == "Hello"
|
||||
|
||||
def test_multiple_system_messages_merged(self):
|
||||
conversation = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "system", "content": "Be concise."},
|
||||
]
|
||||
result = _consolidate_system_messages(conversation)
|
||||
assert len(result) == 2
|
||||
assert result[0]["role"] == "system"
|
||||
assert result[0]["content"] == "You are helpful.\n\nBe concise."
|
||||
assert result[1]["role"] == "user"
|
||||
|
||||
def test_list_content_handled(self):
|
||||
conversation = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{"type": "text", "text": "Rule 1."},
|
||||
{"type": "text", "text": "Rule 2."},
|
||||
],
|
||||
},
|
||||
]
|
||||
result = _consolidate_system_messages(conversation)
|
||||
assert result[0]["role"] == "system"
|
||||
assert result[0]["content"] == "Rule 1.\nRule 2."
|
||||
assert result[1]["role"] == "user"
|
||||
|
||||
def test_does_not_mutate_original(self):
|
||||
conversation = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
]
|
||||
original_len = len(conversation)
|
||||
_consolidate_system_messages(conversation)
|
||||
assert len(conversation) == original_len
|
||||
assert conversation[0]["role"] == "user"
|
||||
assert conversation[1]["role"] == "system"
|
||||
|
||||
+69
-1
@@ -450,6 +450,66 @@ def _detect_content_format(
|
||||
return "openai"
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def _detect_developer_role_support(chat_template: str) -> bool:
|
||||
return '"developer"' in chat_template or "'developer'" in chat_template
|
||||
|
||||
|
||||
def _convert_developer_to_system(
|
||||
conversation: list[ConversationMessage],
|
||||
) -> list[ConversationMessage]:
|
||||
converted: list[ConversationMessage] = []
|
||||
for msg in conversation:
|
||||
if msg["role"] == "developer":
|
||||
new_msg = dict(msg)
|
||||
new_msg["role"] = "system"
|
||||
new_msg.pop("tools", None)
|
||||
converted.append(new_msg) # type: ignore[arg-type]
|
||||
else:
|
||||
converted.append(msg)
|
||||
return converted
|
||||
|
||||
|
||||
def _consolidate_system_messages(
|
||||
conversation: list[ConversationMessage],
|
||||
) -> list[ConversationMessage]:
|
||||
"""Merge all system messages into one at position 0.
|
||||
|
||||
Some chat templates (e.g. Qwen 3.6) require the system message to be the
|
||||
very first message. After developer-to-system conversion, system messages
|
||||
may appear at non-first positions; this merges them into a single message.
|
||||
"""
|
||||
system_contents: list[str] = []
|
||||
non_system: list[ConversationMessage] = []
|
||||
needs_consolidation = False
|
||||
for i, msg in enumerate(conversation):
|
||||
if msg["role"] == "system":
|
||||
if i > 0 or system_contents:
|
||||
needs_consolidation = True
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and "text" in part:
|
||||
parts.append(part["text"])
|
||||
elif isinstance(part, str):
|
||||
parts.append(part)
|
||||
content = "\n".join(parts)
|
||||
if content:
|
||||
system_contents.append(content)
|
||||
else:
|
||||
non_system.append(msg)
|
||||
|
||||
if not needs_consolidation:
|
||||
return conversation
|
||||
|
||||
merged: ConversationMessage = {
|
||||
"role": "system",
|
||||
"content": "\n\n".join(system_contents),
|
||||
}
|
||||
return [merged, *non_system]
|
||||
|
||||
|
||||
def _resolve_chat_template_content_format(
|
||||
chat_template: str | None,
|
||||
tools: list[dict[str, Any]] | None,
|
||||
@@ -653,7 +713,15 @@ def safe_apply_chat_template(
|
||||
"allowed, so you must provide a chat template if the tokenizer "
|
||||
"does not define one."
|
||||
)
|
||||
|
||||
if any(
|
||||
msg["role"] == "developer" for msg in conversation
|
||||
) and not _detect_developer_role_support(chat_template):
|
||||
conversation = _convert_developer_to_system(conversation)
|
||||
conversation = _consolidate_system_messages(conversation)
|
||||
logger.info_once(
|
||||
"Chat template does not support the 'developer' message role. "
|
||||
"Converting developer messages to 'system' role.",
|
||||
)
|
||||
resolved_kwargs = resolve_chat_template_kwargs(
|
||||
tokenizer=tokenizer,
|
||||
chat_template=chat_template,
|
||||
|
||||
Reference in New Issue
Block a user