diff --git a/rust/src/chat/tests/roundtrip.rs b/rust/src/chat/tests/roundtrip.rs index 74491cd0243..c88624a84b6 100644 --- a/rust/src/chat/tests/roundtrip.rs +++ b/rust/src/chat/tests/roundtrip.rs @@ -20,6 +20,7 @@ use vllm_chat::{ use vllm_text::{DecodedTextEvent, Finished, Prompt}; /// One model/parser configuration used to run the fixed roundtrip fixtures. +#[derive(Clone)] struct RoundtripCase { /// Hugging Face model id resolved through the production backend loader. model_id: &'static str, @@ -31,11 +32,45 @@ struct RoundtripCase { tool_call_parser: ParserSelection, /// Reasoning parser selection used by the output processor. reasoning_parser: ParserSelection, + /// How this model's chat template handles thinking mode. + thinking_behavior: ThinkingBehavior, /// JSON formatting expected after this model's template has materialized /// tool-call arguments. json_fmt: JsonFmt, } +#[derive(Clone, Copy)] +enum ThinkingBehavior { + /// The chat template accepts explicit thinking on/off kwargs, and uses + /// `default` when the request does not specify either kwarg. + Toggleable { default: bool }, + /// The chat template always behaves as `value` for this fixture. + Always { value: bool }, +} + +impl ThinkingBehavior { + fn default(self) -> bool { + match self { + Self::Toggleable { default } => default, + Self::Always { value } => value, + } + } + + fn fixtures(self) -> Vec> { + match self { + Self::Toggleable { .. } => vec![ + Some(true), // explicitly enable thinking + Some(false), // explicitly disable thinking + None, // use default template behavior + ], + Self::Always { value } => vec![ + Some(value), // explicitly request the supported thinking behavior + None, // use default template behavior + ], + } + } +} + impl RoundtripCase { /// Qwen3 XML tool-call format with `qwen3` reasoning tags. fn qwen3() -> Self { @@ -44,6 +79,7 @@ impl RoundtripCase { assistant_stop_suffix: "<|im_end|>\n", tool_call_parser: ParserSelection::Auto, reasoning_parser: ParserSelection::Auto, + thinking_behavior: ThinkingBehavior::Toggleable { default: true }, json_fmt: spaced_json_fmt(), } } @@ -55,6 +91,7 @@ impl RoundtripCase { assistant_stop_suffix: "<|im_end|>\n", tool_call_parser: ParserSelection::Auto, reasoning_parser: ParserSelection::Auto, + thinking_behavior: ThinkingBehavior::Toggleable { default: true }, json_fmt: compact_json_fmt(), } } @@ -66,6 +103,7 @@ impl RoundtripCase { assistant_stop_suffix: "[e~[\n", tool_call_parser: ParserSelection::Auto, reasoning_parser: ParserSelection::Auto, + thinking_behavior: ThinkingBehavior::Always { value: true }, json_fmt: compact_json_fmt(), } } @@ -77,6 +115,7 @@ impl RoundtripCase { assistant_stop_suffix: "<|end▁of▁sentence|>", tool_call_parser: ParserSelection::Auto, reasoning_parser: ParserSelection::Auto, + thinking_behavior: ThinkingBehavior::Toggleable { default: false }, json_fmt: compact_json_fmt(), } } @@ -88,6 +127,7 @@ impl RoundtripCase { assistant_stop_suffix: "", tool_call_parser: ParserSelection::Auto, reasoning_parser: ParserSelection::Auto, + thinking_behavior: ThinkingBehavior::Toggleable { default: true }, json_fmt: compact_json_fmt(), } } @@ -100,6 +140,7 @@ impl RoundtripCase { assistant_stop_suffix: "<|im_end|>", tool_call_parser: ParserSelection::Auto, reasoning_parser: ParserSelection::Auto, + thinking_behavior: ThinkingBehavior::Toggleable { default: true }, json_fmt: spaced_json_fmt(), } } @@ -135,35 +176,44 @@ roundtrip_tests! { /// Run the fixed reasoning+content fixture for one model/parser case. async fn run_roundtrip_reasoning_and_content(case: RoundtripCase) -> Result<()> { + for thinking in case.thinking_behavior.fixtures() { + run_roundtrip_reasoning_and_content_inner(case.clone(), thinking).await?; + } + Ok(()) +} + +async fn run_roundtrip_reasoning_and_content_inner( + case: RoundtripCase, + thinking: Option, +) -> Result<()> { let backends = load_roundtrip_backends(&case).await?; let request = roundtrip_request( "roundtrip-reasoning-content", vec![ChatMessage::text(ChatRole::User, "What is 2 + 2?")], Vec::new(), + thinking, ); let expected_reasoning = "Need compute 2 + 2 directly."; let expected_text = "The answer is 4."; + let effective_thinking = thinking.unwrap_or(case.thinking_behavior.default()); - let result = run_roundtrip( - &case, - &backends, - &request, - AssistantMessage { - content: vec![ - AssistantContentBlock::Reasoning { - text: expected_reasoning.to_string(), - }, - AssistantContentBlock::Text { - text: expected_text.to_string(), - }, - ], - }, - ) - .await?; + let assistant = { + let mut content = Vec::new(); + if effective_thinking { + content.push(AssistantContentBlock::Reasoning { + text: expected_reasoning.to_string(), + }); + } + content.push(AssistantContentBlock::Text { + text: expected_text.to_string(), + }); + AssistantMessage { content } + }; + let result = run_roundtrip(&case, &backends, &request, assistant).await?; assert_eq!( result.parsed_message.reasoning().as_deref().map(str::trim), - Some(expected_reasoning) + effective_thinking.then_some(expected_reasoning) ); assert_eq!(result.parsed_message.text().trim(), expected_text); assert_eq!(result.parsed_message.tool_calls().count(), 0); @@ -186,6 +236,7 @@ async fn run_roundtrip_tool_call_mix(case: RoundtripCase) -> Result<()> { "Check Shanghai weather and add 1.00 plus 2.", )], test_tools(), + Some(true), // always enable thinking in this fixture ); let expected_reasoning = "Need call the weather and add tools."; let expected_text = "I will call the tools."; @@ -487,6 +538,7 @@ fn roundtrip_request( request_id: impl Into, messages: Vec, tools: Vec, + thinking: Option, ) -> ChatRequest { let mut request = ChatRequest { request_id: request_id.into(), @@ -500,10 +552,12 @@ fn roundtrip_request( ..ChatRequest::for_test() }; - // Enable thinking for some models so that rendering and parsing the reasoning block is - // exercised in the roundtrip. - for key in ["thinking", "enable_thinking"] { - request.chat_options.template_kwargs.insert(key.to_string(), true.into()); + // Explicitly enable or disable thinking so that rendering and parsing the reasoning block is + // exercised or skipped in the roundtrip. If unspecified, use the default template behavior. + if let Some(thinking) = thinking { + for key in ["thinking", "enable_thinking"] { + request.chat_options.template_kwargs.insert(key.to_string(), thinking.into()); + } } request