mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
4d51588e23
Signed-off-by: Yifan Qiao <yifanqiao@inferact.ai> Signed-off-by: Woosuk Kwon <woosuk@inferact.ai> Signed-off-by: qizixi <zixi@inferact.ai> Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Signed-off-by: Yongye Zhu <zyy1102000@gmail.com> Co-authored-by: Yongye Zhu <zyy1102000@gmail.com> Co-authored-by: Yongye Zhu <yongye@inferact.ai> Co-authored-by: Simon Mo <simon@inferact.ai> Co-authored-by: Bugen Zhao <i@bugenzhao.com> Co-authored-by: Giancarlo Delfin <gdelfin@inferact.ai> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Roger Wang <hey@rogerw.io> Co-authored-by: Roy Wang <yasong.wang@inferact.ai> Co-authored-by: Woosuk Kwon <woosuk@inferact.ai> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Zhewen Li <jerven.vllm@gmail.com> Co-authored-by: Zijing Liu <liuzijing2014@gmail.com> Co-authored-by: khluu <khluu000@gmail.com> Co-authored-by: qizixi <zixi@inferact.ai> Co-authored-by: Zhewen Li <zhewenli@inferact.ai>
225 lines
6.4 KiB
Python
225 lines
6.4 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||
|
||
import json
|
||
from pathlib import Path
|
||
from types import SimpleNamespace
|
||
|
||
import pytest
|
||
|
||
from vllm.entrypoints.chat_utils import parse_chat_messages
|
||
from vllm.renderers.registry import RENDERER_REGISTRY
|
||
from vllm.tokenizers.deepseek_v4 import get_deepseek_v4_tokenizer
|
||
from vllm.tokenizers.registry import TokenizerRegistry
|
||
|
||
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "deepseek_v4"
|
||
|
||
|
||
class FakeHfTokenizer:
|
||
vocab_size = 100
|
||
|
||
def get_added_vocab(self) -> dict[str, int]:
|
||
return {"</think>": 100}
|
||
|
||
def encode(
|
||
self,
|
||
text: str,
|
||
add_special_tokens: bool = False,
|
||
**kwargs,
|
||
) -> list[int]:
|
||
self.last_encode = (text, add_special_tokens, kwargs)
|
||
return [len(text)]
|
||
|
||
|
||
def _tokenizer():
|
||
return get_deepseek_v4_tokenizer(FakeHfTokenizer())
|
||
|
||
|
||
def _model_config():
|
||
return SimpleNamespace(
|
||
multimodal_config=None,
|
||
allowed_local_media_path="",
|
||
allowed_media_domains=None,
|
||
)
|
||
|
||
|
||
def _load_reference_case(case_id: int):
|
||
data = json.loads((FIXTURES_DIR / f"test_input_{case_id}.json").read_text())
|
||
if isinstance(data, dict):
|
||
return data["messages"], data.get("tools")
|
||
return data, None
|
||
|
||
|
||
def _render_reference_case(case_id: int, **kwargs):
|
||
messages, tools = _load_reference_case(case_id)
|
||
conversation, _, _ = parse_chat_messages(
|
||
messages,
|
||
_model_config(),
|
||
content_format="string",
|
||
)
|
||
return _tokenizer().apply_chat_template(
|
||
conversation=conversation,
|
||
messages=messages,
|
||
tools=tools,
|
||
tokenize=False,
|
||
**kwargs,
|
||
)
|
||
|
||
|
||
def test_deepseek_v4_tokenizer_registered():
|
||
assert TokenizerRegistry.load_tokenizer_cls("deepseek_v4").__name__ == (
|
||
"DeepseekV4Tokenizer"
|
||
)
|
||
assert RENDERER_REGISTRY.load_renderer_cls("deepseek_v4").__name__ == (
|
||
"DeepseekV4Renderer"
|
||
)
|
||
|
||
|
||
def test_deepseek_v4_defaults_to_chat_mode():
|
||
prompt = _tokenizer().apply_chat_template(
|
||
[{"role": "user", "content": "Hello"}],
|
||
tokenize=False,
|
||
)
|
||
|
||
assert prompt == ("<|begin▁of▁sentence|><|User|>Hello<|Assistant|></think>")
|
||
|
||
|
||
@pytest.mark.parametrize("kwargs", [{"thinking": True}, {"enable_thinking": True}])
|
||
def test_deepseek_v4_enables_thinking_with_compatible_kwargs(kwargs):
|
||
prompt = _tokenizer().apply_chat_template(
|
||
[{"role": "user", "content": "Hello"}],
|
||
tokenize=False,
|
||
**kwargs,
|
||
)
|
||
|
||
assert prompt == ("<|begin▁of▁sentence|><|User|>Hello<|Assistant|><think>")
|
||
|
||
|
||
def test_deepseek_v4_uses_v4_tool_prompt_from_request_tools():
|
||
tools = [
|
||
{
|
||
"type": "function",
|
||
"function": {
|
||
"name": "get_weather",
|
||
"description": "Get weather for a city",
|
||
"parameters": {
|
||
"type": "object",
|
||
"properties": {"city": {"type": "string"}},
|
||
"required": ["city"],
|
||
},
|
||
},
|
||
}
|
||
]
|
||
|
||
prompt = _tokenizer().apply_chat_template(
|
||
[{"role": "user", "content": "Weather?"}],
|
||
tools=tools,
|
||
tokenize=False,
|
||
)
|
||
|
||
assert "## Tools" in prompt
|
||
assert "<|DSML|tool_calls>" in prompt
|
||
assert "</|DSML|tool_calls>" in prompt
|
||
assert "function_calls" not in prompt
|
||
assert '"name": "get_weather"' in prompt
|
||
assert prompt.endswith("<|User|>Weather?<|Assistant|></think>")
|
||
|
||
|
||
def test_deepseek_v4_renders_parsed_history_tool_arguments():
|
||
messages = [
|
||
{"role": "user", "content": "List the repo"},
|
||
{
|
||
"role": "assistant",
|
||
"tool_calls": [
|
||
{
|
||
"id": "call_1",
|
||
"type": "function",
|
||
"function": {
|
||
"name": "str_replace_editor",
|
||
"arguments": '{"command": "view", "path": "/testbed"}',
|
||
},
|
||
}
|
||
],
|
||
},
|
||
{
|
||
"role": "tool",
|
||
"tool_call_id": "call_1",
|
||
"content": "file list",
|
||
},
|
||
]
|
||
tools = [
|
||
{
|
||
"type": "function",
|
||
"function": {
|
||
"name": "str_replace_editor",
|
||
"description": "Edit files",
|
||
"parameters": {
|
||
"type": "object",
|
||
"properties": {
|
||
"command": {"type": "string"},
|
||
"path": {"type": "string"},
|
||
},
|
||
"required": ["command", "path"],
|
||
},
|
||
},
|
||
}
|
||
]
|
||
conversation, _, _ = parse_chat_messages(
|
||
messages,
|
||
_model_config(),
|
||
content_format="string",
|
||
)
|
||
|
||
prompt = _tokenizer().apply_chat_template(
|
||
conversation=conversation,
|
||
messages=messages,
|
||
tools=tools,
|
||
tokenize=False,
|
||
)
|
||
|
||
assert '<|DSML|parameter name="command" string="true">view' in prompt
|
||
assert '<|DSML|parameter name="path" string="true">/testbed' in prompt
|
||
assert 'parameter name="arguments"' not in prompt
|
||
|
||
|
||
@pytest.mark.parametrize("reasoning_effort", ["none", "low", "medium", "high"])
|
||
def test_deepseek_v4_accepts_openai_reasoning_effort_values(reasoning_effort):
|
||
prompt = _tokenizer().apply_chat_template(
|
||
[{"role": "user", "content": "Hello"}],
|
||
tokenize=False,
|
||
enable_thinking=True,
|
||
reasoning_effort=reasoning_effort,
|
||
)
|
||
|
||
assert prompt.endswith("<|Assistant|><think>")
|
||
assert "Reasoning Effort: Absolute maximum" not in prompt
|
||
|
||
|
||
def test_deepseek_v4_preserves_reference_max_reasoning_effort():
|
||
prompt = _tokenizer().apply_chat_template(
|
||
[{"role": "user", "content": "Hello"}],
|
||
tokenize=False,
|
||
enable_thinking=True,
|
||
reasoning_effort="max",
|
||
)
|
||
|
||
assert prompt.startswith(
|
||
"<|begin▁of▁sentence|>Reasoning Effort: Absolute maximum"
|
||
)
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("case_id", "kwargs"),
|
||
[
|
||
(1, {"thinking": True}),
|
||
(2, {"thinking": True}),
|
||
(3, {"thinking": True}),
|
||
(4, {}),
|
||
],
|
||
)
|
||
def test_deepseek_v4_matches_reference_golden_fixtures(case_id, kwargs):
|
||
prompt = _render_reference_case(case_id, **kwargs)
|
||
|
||
expected = (FIXTURES_DIR / f"test_output_{case_id}.txt").read_text()
|
||
assert prompt == expected
|