mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Rust Frontend] Cover different thinking modes in roundtrip tests (#44320)
Signed-off-by: Bugen Zhao <i@bugenzhao.com>
This commit is contained in:
@@ -20,6 +20,7 @@ use vllm_chat::{
|
||||
use vllm_text::{DecodedTextEvent, Finished, Prompt};
|
||||
|
||||
/// One model/parser configuration used to run the fixed roundtrip fixtures.
|
||||
#[derive(Clone)]
|
||||
struct RoundtripCase {
|
||||
/// Hugging Face model id resolved through the production backend loader.
|
||||
model_id: &'static str,
|
||||
@@ -31,11 +32,45 @@ struct RoundtripCase {
|
||||
tool_call_parser: ParserSelection,
|
||||
/// Reasoning parser selection used by the output processor.
|
||||
reasoning_parser: ParserSelection,
|
||||
/// How this model's chat template handles thinking mode.
|
||||
thinking_behavior: ThinkingBehavior,
|
||||
/// JSON formatting expected after this model's template has materialized
|
||||
/// tool-call arguments.
|
||||
json_fmt: JsonFmt,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum ThinkingBehavior {
|
||||
/// The chat template accepts explicit thinking on/off kwargs, and uses
|
||||
/// `default` when the request does not specify either kwarg.
|
||||
Toggleable { default: bool },
|
||||
/// The chat template always behaves as `value` for this fixture.
|
||||
Always { value: bool },
|
||||
}
|
||||
|
||||
impl ThinkingBehavior {
|
||||
fn default(self) -> bool {
|
||||
match self {
|
||||
Self::Toggleable { default } => default,
|
||||
Self::Always { value } => value,
|
||||
}
|
||||
}
|
||||
|
||||
fn fixtures(self) -> Vec<Option<bool>> {
|
||||
match self {
|
||||
Self::Toggleable { .. } => vec![
|
||||
Some(true), // explicitly enable thinking
|
||||
Some(false), // explicitly disable thinking
|
||||
None, // use default template behavior
|
||||
],
|
||||
Self::Always { value } => vec![
|
||||
Some(value), // explicitly request the supported thinking behavior
|
||||
None, // use default template behavior
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RoundtripCase {
|
||||
/// Qwen3 XML tool-call format with `qwen3` reasoning tags.
|
||||
fn qwen3() -> Self {
|
||||
@@ -44,6 +79,7 @@ impl RoundtripCase {
|
||||
assistant_stop_suffix: "<|im_end|>\n",
|
||||
tool_call_parser: ParserSelection::Auto,
|
||||
reasoning_parser: ParserSelection::Auto,
|
||||
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
|
||||
json_fmt: spaced_json_fmt(),
|
||||
}
|
||||
}
|
||||
@@ -55,6 +91,7 @@ impl RoundtripCase {
|
||||
assistant_stop_suffix: "<|im_end|>\n",
|
||||
tool_call_parser: ParserSelection::Auto,
|
||||
reasoning_parser: ParserSelection::Auto,
|
||||
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
|
||||
json_fmt: compact_json_fmt(),
|
||||
}
|
||||
}
|
||||
@@ -66,6 +103,7 @@ impl RoundtripCase {
|
||||
assistant_stop_suffix: "[e~[\n",
|
||||
tool_call_parser: ParserSelection::Auto,
|
||||
reasoning_parser: ParserSelection::Auto,
|
||||
thinking_behavior: ThinkingBehavior::Always { value: true },
|
||||
json_fmt: compact_json_fmt(),
|
||||
}
|
||||
}
|
||||
@@ -77,6 +115,7 @@ impl RoundtripCase {
|
||||
assistant_stop_suffix: "<|end▁of▁sentence|>",
|
||||
tool_call_parser: ParserSelection::Auto,
|
||||
reasoning_parser: ParserSelection::Auto,
|
||||
thinking_behavior: ThinkingBehavior::Toggleable { default: false },
|
||||
json_fmt: compact_json_fmt(),
|
||||
}
|
||||
}
|
||||
@@ -88,6 +127,7 @@ impl RoundtripCase {
|
||||
assistant_stop_suffix: "",
|
||||
tool_call_parser: ParserSelection::Auto,
|
||||
reasoning_parser: ParserSelection::Auto,
|
||||
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
|
||||
json_fmt: compact_json_fmt(),
|
||||
}
|
||||
}
|
||||
@@ -100,6 +140,7 @@ impl RoundtripCase {
|
||||
assistant_stop_suffix: "<|im_end|>",
|
||||
tool_call_parser: ParserSelection::Auto,
|
||||
reasoning_parser: ParserSelection::Auto,
|
||||
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
|
||||
json_fmt: spaced_json_fmt(),
|
||||
}
|
||||
}
|
||||
@@ -135,35 +176,44 @@ roundtrip_tests! {
|
||||
|
||||
/// Run the fixed reasoning+content fixture for one model/parser case.
|
||||
async fn run_roundtrip_reasoning_and_content(case: RoundtripCase) -> Result<()> {
|
||||
for thinking in case.thinking_behavior.fixtures() {
|
||||
run_roundtrip_reasoning_and_content_inner(case.clone(), thinking).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_roundtrip_reasoning_and_content_inner(
|
||||
case: RoundtripCase,
|
||||
thinking: Option<bool>,
|
||||
) -> Result<()> {
|
||||
let backends = load_roundtrip_backends(&case).await?;
|
||||
let request = roundtrip_request(
|
||||
"roundtrip-reasoning-content",
|
||||
vec![ChatMessage::text(ChatRole::User, "What is 2 + 2?")],
|
||||
Vec::new(),
|
||||
thinking,
|
||||
);
|
||||
let expected_reasoning = "Need compute 2 + 2 directly.";
|
||||
let expected_text = "The answer is 4.";
|
||||
let effective_thinking = thinking.unwrap_or(case.thinking_behavior.default());
|
||||
|
||||
let result = run_roundtrip(
|
||||
&case,
|
||||
&backends,
|
||||
&request,
|
||||
AssistantMessage {
|
||||
content: vec![
|
||||
AssistantContentBlock::Reasoning {
|
||||
text: expected_reasoning.to_string(),
|
||||
},
|
||||
AssistantContentBlock::Text {
|
||||
text: expected_text.to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
let assistant = {
|
||||
let mut content = Vec::new();
|
||||
if effective_thinking {
|
||||
content.push(AssistantContentBlock::Reasoning {
|
||||
text: expected_reasoning.to_string(),
|
||||
});
|
||||
}
|
||||
content.push(AssistantContentBlock::Text {
|
||||
text: expected_text.to_string(),
|
||||
});
|
||||
AssistantMessage { content }
|
||||
};
|
||||
let result = run_roundtrip(&case, &backends, &request, assistant).await?;
|
||||
|
||||
assert_eq!(
|
||||
result.parsed_message.reasoning().as_deref().map(str::trim),
|
||||
Some(expected_reasoning)
|
||||
effective_thinking.then_some(expected_reasoning)
|
||||
);
|
||||
assert_eq!(result.parsed_message.text().trim(), expected_text);
|
||||
assert_eq!(result.parsed_message.tool_calls().count(), 0);
|
||||
@@ -186,6 +236,7 @@ async fn run_roundtrip_tool_call_mix(case: RoundtripCase) -> Result<()> {
|
||||
"Check Shanghai weather and add 1.00 plus 2.",
|
||||
)],
|
||||
test_tools(),
|
||||
Some(true), // always enable thinking in this fixture
|
||||
);
|
||||
let expected_reasoning = "Need call the weather and add tools.";
|
||||
let expected_text = "I will call the tools.";
|
||||
@@ -487,6 +538,7 @@ fn roundtrip_request(
|
||||
request_id: impl Into<String>,
|
||||
messages: Vec<ChatMessage>,
|
||||
tools: Vec<ChatTool>,
|
||||
thinking: Option<bool>,
|
||||
) -> ChatRequest {
|
||||
let mut request = ChatRequest {
|
||||
request_id: request_id.into(),
|
||||
@@ -500,10 +552,12 @@ fn roundtrip_request(
|
||||
..ChatRequest::for_test()
|
||||
};
|
||||
|
||||
// Enable thinking for some models so that rendering and parsing the reasoning block is
|
||||
// exercised in the roundtrip.
|
||||
for key in ["thinking", "enable_thinking"] {
|
||||
request.chat_options.template_kwargs.insert(key.to_string(), true.into());
|
||||
// Explicitly enable or disable thinking so that rendering and parsing the reasoning block is
|
||||
// exercised or skipped in the roundtrip. If unspecified, use the default template behavior.
|
||||
if let Some(thinking) = thinking {
|
||||
for key in ["thinking", "enable_thinking"] {
|
||||
request.chat_options.template_kwargs.insert(key.to_string(), thinking.into());
|
||||
}
|
||||
}
|
||||
|
||||
request
|
||||
|
||||
Reference in New Issue
Block a user