Files
vllm/tests/tokenizers_/test_deepseek_v4.py
T
Yifan Qiao 4d51588e23 [Feat] DeepSeek V4 Rebased (#40860)
Signed-off-by: Yifan Qiao <yifanqiao@inferact.ai>
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
Signed-off-by: qizixi <zixi@inferact.ai>
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com>
Co-authored-by: Yongye Zhu <yongye@inferact.ai>
Co-authored-by: Simon Mo <simon@inferact.ai>
Co-authored-by: Bugen Zhao <i@bugenzhao.com>
Co-authored-by: Giancarlo Delfin <gdelfin@inferact.ai>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
Co-authored-by: Roy Wang <yasong.wang@inferact.ai>
Co-authored-by: Woosuk Kwon <woosuk@inferact.ai>
Co-authored-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: Zhewen Li <jerven.vllm@gmail.com>
Co-authored-by: Zijing Liu <liuzijing2014@gmail.com>
Co-authored-by: khluu <khluu000@gmail.com>
Co-authored-by: qizixi <zixi@inferact.ai>
Co-authored-by: Zhewen Li <zhewenli@inferact.ai>
2026-04-26 18:31:08 -07:00

225 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from pathlib import Path
from types import SimpleNamespace
import pytest
from vllm.entrypoints.chat_utils import parse_chat_messages
from vllm.renderers.registry import RENDERER_REGISTRY
from vllm.tokenizers.deepseek_v4 import get_deepseek_v4_tokenizer
from vllm.tokenizers.registry import TokenizerRegistry
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "deepseek_v4"
class FakeHfTokenizer:
vocab_size = 100
def get_added_vocab(self) -> dict[str, int]:
return {"</think>": 100}
def encode(
self,
text: str,
add_special_tokens: bool = False,
**kwargs,
) -> list[int]:
self.last_encode = (text, add_special_tokens, kwargs)
return [len(text)]
def _tokenizer():
return get_deepseek_v4_tokenizer(FakeHfTokenizer())
def _model_config():
return SimpleNamespace(
multimodal_config=None,
allowed_local_media_path="",
allowed_media_domains=None,
)
def _load_reference_case(case_id: int):
data = json.loads((FIXTURES_DIR / f"test_input_{case_id}.json").read_text())
if isinstance(data, dict):
return data["messages"], data.get("tools")
return data, None
def _render_reference_case(case_id: int, **kwargs):
messages, tools = _load_reference_case(case_id)
conversation, _, _ = parse_chat_messages(
messages,
_model_config(),
content_format="string",
)
return _tokenizer().apply_chat_template(
conversation=conversation,
messages=messages,
tools=tools,
tokenize=False,
**kwargs,
)
def test_deepseek_v4_tokenizer_registered():
assert TokenizerRegistry.load_tokenizer_cls("deepseek_v4").__name__ == (
"DeepseekV4Tokenizer"
)
assert RENDERER_REGISTRY.load_renderer_cls("deepseek_v4").__name__ == (
"DeepseekV4Renderer"
)
def test_deepseek_v4_defaults_to_chat_mode():
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
)
assert prompt == ("<begin▁of▁sentence><User>Hello<Assistant></think>")
@pytest.mark.parametrize("kwargs", [{"thinking": True}, {"enable_thinking": True}])
def test_deepseek_v4_enables_thinking_with_compatible_kwargs(kwargs):
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
**kwargs,
)
assert prompt == ("<begin▁of▁sentence><User>Hello<Assistant><think>")
def test_deepseek_v4_uses_v4_tool_prompt_from_request_tools():
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a city",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
]
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Weather?"}],
tools=tools,
tokenize=False,
)
assert "## Tools" in prompt
assert "<DSMLtool_calls>" in prompt
assert "</DSMLtool_calls>" in prompt
assert "function_calls" not in prompt
assert '"name": "get_weather"' in prompt
assert prompt.endswith("<User>Weather?<Assistant></think>")
def test_deepseek_v4_renders_parsed_history_tool_arguments():
messages = [
{"role": "user", "content": "List the repo"},
{
"role": "assistant",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "str_replace_editor",
"arguments": '{"command": "view", "path": "/testbed"}',
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_1",
"content": "file list",
},
]
tools = [
{
"type": "function",
"function": {
"name": "str_replace_editor",
"description": "Edit files",
"parameters": {
"type": "object",
"properties": {
"command": {"type": "string"},
"path": {"type": "string"},
},
"required": ["command", "path"],
},
},
}
]
conversation, _, _ = parse_chat_messages(
messages,
_model_config(),
content_format="string",
)
prompt = _tokenizer().apply_chat_template(
conversation=conversation,
messages=messages,
tools=tools,
tokenize=False,
)
assert '<DSMLparameter name="command" string="true">view' in prompt
assert '<DSMLparameter name="path" string="true">/testbed' in prompt
assert 'parameter name="arguments"' not in prompt
@pytest.mark.parametrize("reasoning_effort", ["none", "low", "medium", "high"])
def test_deepseek_v4_accepts_openai_reasoning_effort_values(reasoning_effort):
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
enable_thinking=True,
reasoning_effort=reasoning_effort,
)
assert prompt.endswith("<Assistant><think>")
assert "Reasoning Effort: Absolute maximum" not in prompt
def test_deepseek_v4_preserves_reference_max_reasoning_effort():
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
enable_thinking=True,
reasoning_effort="max",
)
assert prompt.startswith(
"<begin▁of▁sentence>Reasoning Effort: Absolute maximum"
)
@pytest.mark.parametrize(
("case_id", "kwargs"),
[
(1, {"thinking": True}),
(2, {"thinking": True}),
(3, {"thinking": True}),
(4, {}),
],
)
def test_deepseek_v4_matches_reference_golden_fixtures(case_id, kwargs):
prompt = _render_reference_case(case_id, **kwargs)
expected = (FIXTURES_DIR / f"test_output_{case_id}.txt").read_text()
assert prompt == expected