fix: Correct reasoning parameters for Aliyun Bailian GLM models and support qwen3-max snapshots (#12614)

Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Phantom 2026-01-27 22:57:34 +08:00 committed by GitHub
parent e6f1df2511
commit 0d12da6468
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 53 additions and 19 deletions

View File

@ -82,10 +82,10 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
// providers that use enable_thinking
if (
isSupportEnableThinkingProvider(provider) &&
(isSupportedThinkingTokenQwenModel(model) ||
isSupportedThinkingTokenHunyuanModel(model) ||
(provider.id === SystemProviderIds.dashscope && isDeepSeekHybridInferenceModel(model)))
(isSupportEnableThinkingProvider(provider) &&
(isSupportedThinkingTokenQwenModel(model) || isSupportedThinkingTokenHunyuanModel(model))) ||
(provider.id === SystemProviderIds.dashscope &&
(isDeepSeekHybridInferenceModel(model) || isSupportedThinkingTokenZhipuModel(model)))
) {
return { enable_thinking: false }
}
@ -309,18 +309,24 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
}
}
// https://help.aliyun.com/zh/model-studio/deep-thinking
if (provider.id === SystemProviderIds.dashscope) {
// For dashscope: Qwen, DeepSeek, and GLM models use enable_thinking to control thinking
// No effort, only on/off
if (isQwenReasoningModel(model) || isSupportedThinkingTokenZhipuModel(model)) {
return {
enable_thinking: true,
thinking_budget: budgetTokens
}
}
}
// Qwen models, use enable_thinking
if (isQwenReasoningModel(model)) {
const thinkConfig = {
enable_thinking: isQwenAlwaysThinkModel(model) || !isSupportEnableThinkingProvider(provider) ? undefined : true,
thinking_budget: budgetTokens
}
if (provider.id === SystemProviderIds.dashscope) {
return {
...thinkConfig,
incremental_output: true
}
}
return thinkConfig
}

View File

@ -341,6 +341,7 @@ describe('Claude & regional providers', () => {
})
it('covers zhipu/minimax/step specific classifiers', () => {
expect(isSupportedThinkingTokenZhipuModel(createModel({ id: 'glm-4.5' }))).toBe(true)
expect(isSupportedThinkingTokenZhipuModel(createModel({ id: 'glm-4.6-pro' }))).toBe(true)
expect(isZhipuReasoningModel(createModel({ id: 'glm-z1' }))).toBe(true)
expect(isStepReasoningModel(createModel({ id: 'step-r1-v-mini' }))).toBe(true)
@ -422,12 +423,18 @@ describe('Qwen & Gemini thinking coverage', () => {
expect(isSupportedThinkingTokenQwenModel(createModel({ id }))).toBe(true)
})
it.each(['qwen3-thinking', 'qwen3-instruct', 'qwen3-max', 'qwen3-vl-thinking'])(
'blocks thinking tokens for %s',
(id) => {
expect(isSupportedThinkingTokenQwenModel(createModel({ id }))).toBe(false)
}
)
it.each(['qwen3-thinking', 'qwen3-instruct', 'qwen3-vl-thinking'])('blocks thinking tokens for %s', (id) => {
expect(isSupportedThinkingTokenQwenModel(createModel({ id }))).toBe(false)
})
it('supports thinking tokens for qwen3-max-preview and qwen3-max-2026-01-23', () => {
expect(isSupportedThinkingTokenQwenModel(createModel({ id: 'qwen3-max-preview' }))).toBe(true)
expect(isSupportedThinkingTokenQwenModel(createModel({ id: 'qwen3-max-2026-01-23' }))).toBe(true)
})
it('blocks thinking tokens for qwen3-max and other unsupported versions', () => {
expect(isSupportedThinkingTokenQwenModel(createModel({ id: 'qwen3-max' }))).toBe(false)
})
it.each(['qwen3-thinking', 'qwen3-vl-235b-thinking'])('always thinks for %s', (id) => {
expect(isQwenAlwaysThinkModel(createModel({ id }))).toBe(true)

View File

@ -434,12 +434,15 @@ export function isSupportedThinkingTokenQwenModel(model?: Model): boolean {
if (modelId.startsWith('qwen3')) {
// instruct 是非思考模型 thinking 是思考模型,二者都不能控制思考
if (modelId.includes('instruct') || modelId.includes('thinking') || modelId.includes('qwen3-max')) {
if (modelId.includes('instruct') || modelId.includes('thinking')) {
return false
}
return true
if (!modelId.includes('qwen3-max')) {
return true
}
}
// https://help.aliyun.com/zh/model-studio/deep-thinking
return [
'qwen-plus',
'qwen-plus-latest',
@ -456,7 +459,9 @@ export function isSupportedThinkingTokenQwenModel(model?: Model): boolean {
'qwen-turbo-0715',
'qwen-turbo-2025-07-15',
'qwen-flash',
'qwen-flash-2025-07-28'
'qwen-flash-2025-07-28',
'qwen3-max-2026-01-23',
'qwen3-max-preview'
].includes(modelId)
}

View File

@ -124,6 +124,22 @@ const ThinkModelTypes = [
'deepseek_hybrid'
] as const
/** If the model's reasoning effort could be controlled, or its reasoning behavior could be turned on/off.
* It's basically based on OpenAI's reasoning effort, but we have adapted it for other models.
*
* Possible options:
* - 'none': Disable reasoning for the model. (inherit from OpenAI)
* It's also used as "off" when the reasoning behavior of the model only could be set to "on" and "off".
* - 'minimal': Enable minimal reasoning effort for the model. (inherit from OpenAI, only for few models, such as GPT-5.)
* - 'low': Enable low reasoning effort for the model. (inherit from OpenAI)
* - 'medium': Enable medium reasoning effort for the model. (inherit from OpenAI)
* - 'high': Enable high reasoning effort for the model. (inherit from OpenAI)
* - 'xhigh': Enable extra high reasoning effort for the model. (inherit from OpenAI)
* - 'auto': Automatically determine the reasoning effort based on the model's capabilities.
* For some providers, it's same with 'default'.
* It's also used as "on" when the reasoning behavior of the model only could be set to "on" and "off".
* - 'default': Depend on default behavior. It means we would not set any reasoning related settings when calling API.
*/
export type ReasoningEffortOption = NonNullable<OpenAI.ReasoningEffort> | 'auto' | 'default'
export type ThinkingOption = ReasoningEffortOption
export type ThinkingModelType = (typeof ThinkModelTypes)[number]