[Rust Frontend] Cover different thinking modes in roundtrip tests (#44320)

Signed-off-by: Bugen Zhao <i@bugenzhao.com>
This commit is contained in:
Bugen Zhao
2026-06-02 22:51:25 +08:00
committed by GitHub
parent 88f172188b
commit 586201ebdc
+75 -21
View File
@@ -20,6 +20,7 @@ use vllm_chat::{
use vllm_text::{DecodedTextEvent, Finished, Prompt};
/// One model/parser configuration used to run the fixed roundtrip fixtures.
#[derive(Clone)]
struct RoundtripCase {
/// Hugging Face model id resolved through the production backend loader.
model_id: &'static str,
@@ -31,11 +32,45 @@ struct RoundtripCase {
tool_call_parser: ParserSelection,
/// Reasoning parser selection used by the output processor.
reasoning_parser: ParserSelection,
/// How this model's chat template handles thinking mode.
thinking_behavior: ThinkingBehavior,
/// JSON formatting expected after this model's template has materialized
/// tool-call arguments.
json_fmt: JsonFmt,
}
#[derive(Clone, Copy)]
enum ThinkingBehavior {
/// The chat template accepts explicit thinking on/off kwargs, and uses
/// `default` when the request does not specify either kwarg.
Toggleable { default: bool },
/// The chat template always behaves as `value` for this fixture.
Always { value: bool },
}
impl ThinkingBehavior {
fn default(self) -> bool {
match self {
Self::Toggleable { default } => default,
Self::Always { value } => value,
}
}
fn fixtures(self) -> Vec<Option<bool>> {
match self {
Self::Toggleable { .. } => vec![
Some(true), // explicitly enable thinking
Some(false), // explicitly disable thinking
None, // use default template behavior
],
Self::Always { value } => vec![
Some(value), // explicitly request the supported thinking behavior
None, // use default template behavior
],
}
}
}
impl RoundtripCase {
/// Qwen3 XML tool-call format with `qwen3` reasoning tags.
fn qwen3() -> Self {
@@ -44,6 +79,7 @@ impl RoundtripCase {
assistant_stop_suffix: "<|im_end|>\n",
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
json_fmt: spaced_json_fmt(),
}
}
@@ -55,6 +91,7 @@ impl RoundtripCase {
assistant_stop_suffix: "<|im_end|>\n",
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
json_fmt: compact_json_fmt(),
}
}
@@ -66,6 +103,7 @@ impl RoundtripCase {
assistant_stop_suffix: "[e~[\n",
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
thinking_behavior: ThinkingBehavior::Always { value: true },
json_fmt: compact_json_fmt(),
}
}
@@ -77,6 +115,7 @@ impl RoundtripCase {
assistant_stop_suffix: "<end▁of▁sentence>",
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
thinking_behavior: ThinkingBehavior::Toggleable { default: false },
json_fmt: compact_json_fmt(),
}
}
@@ -88,6 +127,7 @@ impl RoundtripCase {
assistant_stop_suffix: "",
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
json_fmt: compact_json_fmt(),
}
}
@@ -100,6 +140,7 @@ impl RoundtripCase {
assistant_stop_suffix: "<|im_end|>",
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
thinking_behavior: ThinkingBehavior::Toggleable { default: true },
json_fmt: spaced_json_fmt(),
}
}
@@ -135,35 +176,44 @@ roundtrip_tests! {
/// Run the fixed reasoning+content fixture for one model/parser case.
async fn run_roundtrip_reasoning_and_content(case: RoundtripCase) -> Result<()> {
for thinking in case.thinking_behavior.fixtures() {
run_roundtrip_reasoning_and_content_inner(case.clone(), thinking).await?;
}
Ok(())
}
async fn run_roundtrip_reasoning_and_content_inner(
case: RoundtripCase,
thinking: Option<bool>,
) -> Result<()> {
let backends = load_roundtrip_backends(&case).await?;
let request = roundtrip_request(
"roundtrip-reasoning-content",
vec![ChatMessage::text(ChatRole::User, "What is 2 + 2?")],
Vec::new(),
thinking,
);
let expected_reasoning = "Need compute 2 + 2 directly.";
let expected_text = "The answer is 4.";
let effective_thinking = thinking.unwrap_or(case.thinking_behavior.default());
let result = run_roundtrip(
&case,
&backends,
&request,
AssistantMessage {
content: vec![
AssistantContentBlock::Reasoning {
text: expected_reasoning.to_string(),
},
AssistantContentBlock::Text {
text: expected_text.to_string(),
},
],
},
)
.await?;
let assistant = {
let mut content = Vec::new();
if effective_thinking {
content.push(AssistantContentBlock::Reasoning {
text: expected_reasoning.to_string(),
});
}
content.push(AssistantContentBlock::Text {
text: expected_text.to_string(),
});
AssistantMessage { content }
};
let result = run_roundtrip(&case, &backends, &request, assistant).await?;
assert_eq!(
result.parsed_message.reasoning().as_deref().map(str::trim),
Some(expected_reasoning)
effective_thinking.then_some(expected_reasoning)
);
assert_eq!(result.parsed_message.text().trim(), expected_text);
assert_eq!(result.parsed_message.tool_calls().count(), 0);
@@ -186,6 +236,7 @@ async fn run_roundtrip_tool_call_mix(case: RoundtripCase) -> Result<()> {
"Check Shanghai weather and add 1.00 plus 2.",
)],
test_tools(),
Some(true), // always enable thinking in this fixture
);
let expected_reasoning = "Need call the weather and add tools.";
let expected_text = "I will call the tools.";
@@ -487,6 +538,7 @@ fn roundtrip_request(
request_id: impl Into<String>,
messages: Vec<ChatMessage>,
tools: Vec<ChatTool>,
thinking: Option<bool>,
) -> ChatRequest {
let mut request = ChatRequest {
request_id: request_id.into(),
@@ -500,10 +552,12 @@ fn roundtrip_request(
..ChatRequest::for_test()
};
// Enable thinking for some models so that rendering and parsing the reasoning block is
// exercised in the roundtrip.
for key in ["thinking", "enable_thinking"] {
request.chat_options.template_kwargs.insert(key.to_string(), true.into());
// Explicitly enable or disable thinking so that rendering and parsing the reasoning block is
// exercised or skipped in the roundtrip. If unspecified, use the default template behavior.
if let Some(thinking) = thinking {
for key in ["thinking", "enable_thinking"] {
request.chat_options.template_kwargs.insert(key.to_string(), thinking.into());
}
}
request