diff --git a/tests/renderers/test_hf.py b/tests/renderers/test_hf.py index c2eb6556394..0545457eb7a 100644 --- a/tests/renderers/test_hf.py +++ b/tests/renderers/test_hf.py @@ -7,6 +7,9 @@ from vllm.config import ModelConfig from vllm.entrypoints.chat_utils import load_chat_template from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest from vllm.renderers.hf import ( + _consolidate_system_messages, + _convert_developer_to_system, + _detect_developer_role_support, _get_hf_base_chat_template_params, _try_extract_ast, resolve_chat_template, @@ -592,3 +595,322 @@ def test_get_gen_prompt( f"The generated prompt does not match the expected output for " f"model {model} and template {template}" ) + + +class TestConvertDeveloperToSystem: + def test_converts_role(self): + conversation = [ + {"role": "developer", "content": "You are helpful."}, + {"role": "user", "content": "Hello"}, + ] + result = _convert_developer_to_system(conversation) + assert result[0]["role"] == "system" + assert result[0]["content"] == "You are helpful." + assert result[1]["role"] == "user" + + def test_removes_tools_key(self): + conversation = [ + { + "role": "developer", + "content": "Instructions", + "tools": [{"type": "function"}], + }, + ] + result = _convert_developer_to_system(conversation) + assert "tools" not in result[0] + + def test_no_developer_messages_unchanged(self): + conversation = [ + {"role": "system", "content": "System prompt"}, + {"role": "user", "content": "Hello"}, + ] + result = _convert_developer_to_system(conversation) + assert result[0]["role"] == "system" + assert result[1]["role"] == "user" + + def test_does_not_mutate_original(self): + original = { + "role": "developer", + "content": "Instructions", + "tools": [{"type": "function"}], + } + conversation = [original] + _convert_developer_to_system(conversation) + assert original["role"] == "developer" + assert "tools" in original + + +# --- Developer role detection and conversion tests --- + +CHATML_TEMPLATE = ( + "{% for message in messages %}" + "{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}" + "{% if (loop.last and add_generation_prompt) or not loop.last %}" + "{{ '<|im_end|>' + '\\n'}}" + "{% endif %}" + "{% endfor %}" + "{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}" + "{{ '<|im_start|>assistant\\n' }}" + "{% endif %}" +) + +TEMPLATE_WITH_DEVELOPER = ( + "{% for message in messages %}" + "{% if message['role'] == 'developer' %}" + "{{'<|im_start|>developer\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'system' %}" + "{{'<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'user' %}" + "{{'<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'assistant' %}" + "{{'<|im_start|>assistant\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% endif %}" + "{% endfor %}" + "{% if add_generation_prompt %}" + "{{ '<|im_start|>assistant\\n' }}" + "{% endif %}" +) + +STRICT_ROLE_TEMPLATE = ( + "{% for message in messages %}" + "{% if message['role'] == 'system' %}" + "{{'<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'user' %}" + "{{'<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'assistant' %}" + "{{'<|im_start|>assistant\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% else %}" + "{{ raise_exception('Unexpected message role: ' + message['role']) }}" + "{% endif %}" + "{% endfor %}" + "{% if add_generation_prompt %}" + "{{ '<|im_start|>assistant\\n' }}" + "{% endif %}" +) + + +class TestDetectDeveloperRoleSupport: + def test_absent_in_chatml(self): + assert _detect_developer_role_support(CHATML_TEMPLATE) is False + + def test_present_double_quotes(self): + assert _detect_developer_role_support(TEMPLATE_WITH_DEVELOPER) is True + + def test_present_single_quotes(self): + template = TEMPLATE_WITH_DEVELOPER.replace('"developer"', "'developer'") + assert _detect_developer_role_support(template) is True + + def test_absent_in_strict_template(self): + assert _detect_developer_role_support(STRICT_ROLE_TEMPLATE) is False + + +class TestSafeApplyChatTemplateDeveloperRole: + @pytest.fixture + def model_config(self): + return ModelConfig( + "facebook/opt-125m", + tokenizer="facebook/opt-125m", + tokenizer_mode="auto", + trust_remote_code=False, + dtype="float16", + ) + + @pytest.fixture + def tokenizer(self): + return get_tokenizer("facebook/opt-125m") + + def test_developer_converted_to_system_for_chatml(self, model_config, tokenizer): + conversation = [ + {"role": "developer", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + ] + result = safe_apply_chat_template( + model_config, + tokenizer, + conversation, + chat_template=CHATML_TEMPLATE, + tokenize=False, + add_generation_prompt=True, + ) + assert "<|im_start|>system" in result + assert "You are a helpful assistant." in result + assert "<|im_start|>developer" not in result + + def test_developer_preserved_when_template_supports_it( + self, model_config, tokenizer + ): + conversation = [ + {"role": "developer", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + ] + result = safe_apply_chat_template( + model_config, + tokenizer, + conversation, + chat_template=TEMPLATE_WITH_DEVELOPER, + tokenize=False, + add_generation_prompt=True, + ) + assert "<|im_start|>developer" in result + assert "You are a helpful assistant." in result + + def test_developer_does_not_crash_strict_template(self, model_config, tokenizer): + conversation = [ + {"role": "developer", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + ] + result = safe_apply_chat_template( + model_config, + tokenizer, + conversation, + chat_template=STRICT_ROLE_TEMPLATE, + tokenize=False, + add_generation_prompt=True, + ) + assert "<|im_start|>system" in result + assert "You are a helpful assistant." in result + + def test_no_developer_messages_no_overhead(self, model_config, tokenizer): + conversation = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"}, + ] + result = safe_apply_chat_template( + model_config, + tokenizer, + conversation, + chat_template=CHATML_TEMPLATE, + tokenize=False, + add_generation_prompt=True, + ) + assert "<|im_start|>system" in result + assert "You are helpful." in result + + def test_developer_at_non_first_position_consolidated( + self, model_config, tokenizer + ): + conversation = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "developer", "content": "Be concise."}, + {"role": "user", "content": "What is 2+2?"}, + ] + result = safe_apply_chat_template( + model_config, + tokenizer, + conversation, + chat_template=SYSTEM_FIRST_TEMPLATE, + tokenize=False, + add_generation_prompt=True, + ) + assert "<|im_start|>system" in result + assert "You are helpful." in result + assert "Be concise." in result + assert "What is 2+2?" in result + + def test_developer_only_no_prior_system(self, model_config, tokenizer): + conversation = [ + {"role": "user", "content": "Hello"}, + {"role": "developer", "content": "Be concise."}, + {"role": "user", "content": "What is 2+2?"}, + ] + result = safe_apply_chat_template( + model_config, + tokenizer, + conversation, + chat_template=SYSTEM_FIRST_TEMPLATE, + tokenize=False, + add_generation_prompt=True, + ) + assert "<|im_start|>system" in result + assert "Be concise." in result + + +SYSTEM_FIRST_TEMPLATE = ( + "{% for message in messages %}" + "{% if message['role'] == 'system' %}" + "{% if not loop.first %}" + "{{ raise_exception('System message must be at the beginning.') }}" + "{% endif %}" + "{{'<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'user' %}" + "{{'<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% elif message['role'] == 'assistant' %}" + "{{'<|im_start|>assistant\\n' + message['content'] + '<|im_end|>\\n'}}" + "{% else %}" + "{{ raise_exception('Unexpected message role: ' + message['role']) }}" + "{% endif %}" + "{% endfor %}" + "{% if add_generation_prompt %}" + "{{ '<|im_start|>assistant\\n' }}" + "{% endif %}" +) + + +class TestConsolidateSystemMessages: + def test_no_system_messages_unchanged(self): + conversation = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + ] + result = _consolidate_system_messages(conversation) + assert result == conversation + + def test_single_system_at_start_unchanged(self): + conversation = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"}, + ] + result = _consolidate_system_messages(conversation) + assert result == conversation + + def test_system_at_non_first_position_moved(self): + conversation = [ + {"role": "user", "content": "Hello"}, + {"role": "system", "content": "You are helpful."}, + ] + result = _consolidate_system_messages(conversation) + assert result[0]["role"] == "system" + assert result[0]["content"] == "You are helpful." + assert result[1]["role"] == "user" + assert result[1]["content"] == "Hello" + + def test_multiple_system_messages_merged(self): + conversation = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hello"}, + {"role": "system", "content": "Be concise."}, + ] + result = _consolidate_system_messages(conversation) + assert len(result) == 2 + assert result[0]["role"] == "system" + assert result[0]["content"] == "You are helpful.\n\nBe concise." + assert result[1]["role"] == "user" + + def test_list_content_handled(self): + conversation = [ + {"role": "user", "content": "Hello"}, + { + "role": "system", + "content": [ + {"type": "text", "text": "Rule 1."}, + {"type": "text", "text": "Rule 2."}, + ], + }, + ] + result = _consolidate_system_messages(conversation) + assert result[0]["role"] == "system" + assert result[0]["content"] == "Rule 1.\nRule 2." + assert result[1]["role"] == "user" + + def test_does_not_mutate_original(self): + conversation = [ + {"role": "user", "content": "Hello"}, + {"role": "system", "content": "You are helpful."}, + ] + original_len = len(conversation) + _consolidate_system_messages(conversation) + assert len(conversation) == original_len + assert conversation[0]["role"] == "user" + assert conversation[1]["role"] == "system" diff --git a/vllm/renderers/hf.py b/vllm/renderers/hf.py index e796607722a..e57d0586aa0 100644 --- a/vllm/renderers/hf.py +++ b/vllm/renderers/hf.py @@ -450,6 +450,66 @@ def _detect_content_format( return "openai" +@lru_cache(maxsize=32) +def _detect_developer_role_support(chat_template: str) -> bool: + return '"developer"' in chat_template or "'developer'" in chat_template + + +def _convert_developer_to_system( + conversation: list[ConversationMessage], +) -> list[ConversationMessage]: + converted: list[ConversationMessage] = [] + for msg in conversation: + if msg["role"] == "developer": + new_msg = dict(msg) + new_msg["role"] = "system" + new_msg.pop("tools", None) + converted.append(new_msg) # type: ignore[arg-type] + else: + converted.append(msg) + return converted + + +def _consolidate_system_messages( + conversation: list[ConversationMessage], +) -> list[ConversationMessage]: + """Merge all system messages into one at position 0. + + Some chat templates (e.g. Qwen 3.6) require the system message to be the + very first message. After developer-to-system conversion, system messages + may appear at non-first positions; this merges them into a single message. + """ + system_contents: list[str] = [] + non_system: list[ConversationMessage] = [] + needs_consolidation = False + for i, msg in enumerate(conversation): + if msg["role"] == "system": + if i > 0 or system_contents: + needs_consolidation = True + content = msg.get("content", "") + if isinstance(content, list): + parts = [] + for part in content: + if isinstance(part, dict) and "text" in part: + parts.append(part["text"]) + elif isinstance(part, str): + parts.append(part) + content = "\n".join(parts) + if content: + system_contents.append(content) + else: + non_system.append(msg) + + if not needs_consolidation: + return conversation + + merged: ConversationMessage = { + "role": "system", + "content": "\n\n".join(system_contents), + } + return [merged, *non_system] + + def _resolve_chat_template_content_format( chat_template: str | None, tools: list[dict[str, Any]] | None, @@ -653,7 +713,15 @@ def safe_apply_chat_template( "allowed, so you must provide a chat template if the tokenizer " "does not define one." ) - + if any( + msg["role"] == "developer" for msg in conversation + ) and not _detect_developer_role_support(chat_template): + conversation = _convert_developer_to_system(conversation) + conversation = _consolidate_system_messages(conversation) + logger.info_once( + "Chat template does not support the 'developer' message role. " + "Converting developer messages to 'system' role.", + ) resolved_kwargs = resolve_chat_template_kwargs( tokenizer=tokenizer, chat_template=chat_template,