fix: Correct reasoning parameters for Aliyun Bailian GLM models and support qwen3-max snapshots (#12614)

Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-06 02:51:07 +08:00 · 2026-01-27 22:57:34 +08:00 · 2026-01-27 22:57:34 +08:00 · 0d12da6468
commit 0d12da6468
parent e6f1df2511
4 changed files with 53 additions and 19 deletions
--- a/src/renderer/src/aiCore/utils/reasoning.ts
+++ b/src/renderer/src/aiCore/utils/reasoning.ts
@ -82,10 +82,10 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin

    // providers that use enable_thinking
    if (
-      isSupportEnableThinkingProvider(provider) &&
-      (isSupportedThinkingTokenQwenModel(model) ||
-        isSupportedThinkingTokenHunyuanModel(model) ||
-        (provider.id === SystemProviderIds.dashscope && isDeepSeekHybridInferenceModel(model)))
+      (isSupportEnableThinkingProvider(provider) &&
+        (isSupportedThinkingTokenQwenModel(model) || isSupportedThinkingTokenHunyuanModel(model))) ||
+      (provider.id === SystemProviderIds.dashscope &&
+        (isDeepSeekHybridInferenceModel(model) || isSupportedThinkingTokenZhipuModel(model)))
    ) {
      return { enable_thinking: false }
    }
@ -309,18 +309,24 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
    }
  }

+  // https://help.aliyun.com/zh/model-studio/deep-thinking
+  if (provider.id === SystemProviderIds.dashscope) {
+    // For dashscope: Qwen, DeepSeek, and GLM models use enable_thinking to control thinking
+    // No effort, only on/off
+    if (isQwenReasoningModel(model) || isSupportedThinkingTokenZhipuModel(model)) {
+      return {
+        enable_thinking: true,
+        thinking_budget: budgetTokens
+      }
+    }
+  }
+
  // Qwen models, use enable_thinking
  if (isQwenReasoningModel(model)) {
    const thinkConfig = {
      enable_thinking: isQwenAlwaysThinkModel(model) || !isSupportEnableThinkingProvider(provider) ? undefined : true,
      thinking_budget: budgetTokens
    }
-    if (provider.id === SystemProviderIds.dashscope) {
-      return {
-        ...thinkConfig,
-        incremental_output: true
-      }
-    }
    return thinkConfig
  }

--- a/src/renderer/src/config/models/tests/reasoning.test.ts
+++ b/src/renderer/src/config/models/tests/reasoning.test.ts
@ -341,6 +341,7 @@ describe('Claude & regional providers', () => {
  })

  it('covers zhipu/minimax/step specific classifiers', () => {
+    expect(isSupportedThinkingTokenZhipuModel(createModel({ id: 'glm-4.5' }))).toBe(true)
    expect(isSupportedThinkingTokenZhipuModel(createModel({ id: 'glm-4.6-pro' }))).toBe(true)
    expect(isZhipuReasoningModel(createModel({ id: 'glm-z1' }))).toBe(true)
    expect(isStepReasoningModel(createModel({ id: 'step-r1-v-mini' }))).toBe(true)
@ -422,12 +423,18 @@ describe('Qwen & Gemini thinking coverage', () => {
    expect(isSupportedThinkingTokenQwenModel(createModel({ id }))).toBe(true)
  })

-  it.each(['qwen3-thinking', 'qwen3-instruct', 'qwen3-max', 'qwen3-vl-thinking'])(
-    'blocks thinking tokens for %s',
-    (id) => {
-      expect(isSupportedThinkingTokenQwenModel(createModel({ id }))).toBe(false)
-    }
-  )
+  it.each(['qwen3-thinking', 'qwen3-instruct', 'qwen3-vl-thinking'])('blocks thinking tokens for %s', (id) => {
+    expect(isSupportedThinkingTokenQwenModel(createModel({ id }))).toBe(false)
+  })
+
+  it('supports thinking tokens for qwen3-max-preview and qwen3-max-2026-01-23', () => {
+    expect(isSupportedThinkingTokenQwenModel(createModel({ id: 'qwen3-max-preview' }))).toBe(true)
+    expect(isSupportedThinkingTokenQwenModel(createModel({ id: 'qwen3-max-2026-01-23' }))).toBe(true)
+  })
+
+  it('blocks thinking tokens for qwen3-max and other unsupported versions', () => {
+    expect(isSupportedThinkingTokenQwenModel(createModel({ id: 'qwen3-max' }))).toBe(false)
+  })

  it.each(['qwen3-thinking', 'qwen3-vl-235b-thinking'])('always thinks for %s', (id) => {
    expect(isQwenAlwaysThinkModel(createModel({ id }))).toBe(true)
--- a/src/renderer/src/config/models/reasoning.ts
+++ b/src/renderer/src/config/models/reasoning.ts
@ -434,12 +434,15 @@ export function isSupportedThinkingTokenQwenModel(model?: Model): boolean {

  if (modelId.startsWith('qwen3')) {
    // instruct 是非思考模型 thinking 是思考模型，二者都不能控制思考
-    if (modelId.includes('instruct') || modelId.includes('thinking') || modelId.includes('qwen3-max')) {
+    if (modelId.includes('instruct') || modelId.includes('thinking')) {
      return false
    }
-    return true
+    if (!modelId.includes('qwen3-max')) {
+      return true
+    }
  }

+  // https://help.aliyun.com/zh/model-studio/deep-thinking
  return [
    'qwen-plus',
    'qwen-plus-latest',
@ -456,7 +459,9 @@ export function isSupportedThinkingTokenQwenModel(model?: Model): boolean {
    'qwen-turbo-0715',
    'qwen-turbo-2025-07-15',
    'qwen-flash',
-    'qwen-flash-2025-07-28'
+    'qwen-flash-2025-07-28',
+    'qwen3-max-2026-01-23',
+    'qwen3-max-preview'
  ].includes(modelId)
 }

--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@ -124,6 +124,22 @@ const ThinkModelTypes = [
  'deepseek_hybrid'
 ] as const

+/** If the model's reasoning effort could be controlled, or its reasoning behavior could be turned on/off.
+ * It's basically based on OpenAI's reasoning effort, but we have adapted it for other models.
+ *
+ * Possible options:
+ * - 'none': Disable reasoning for the model. (inherit from OpenAI)
+ *            It's also used as "off" when the reasoning behavior of the model only could be set to "on" and "off".
+ * - 'minimal': Enable minimal reasoning effort for the model. (inherit from OpenAI, only for few models, such as GPT-5.)
+ * - 'low': Enable low reasoning effort for the model. (inherit from OpenAI)
+ * - 'medium': Enable medium reasoning effort for the model. (inherit from OpenAI)
+ * - 'high': Enable high reasoning effort for the model. (inherit from OpenAI)
+ * - 'xhigh': Enable extra high reasoning effort for the model. (inherit from OpenAI)
+ * - 'auto': Automatically determine the reasoning effort based on the model's capabilities.
+ *            For some providers, it's same with 'default'.
+ *            It's also used as "on" when the reasoning behavior of the model only could be set to "on" and "off".
+ * - 'default': Depend on default behavior. It means we would not set any reasoning related settings when calling API.
+ */
 export type ReasoningEffortOption = NonNullable<OpenAI.ReasoningEffort> | 'auto' | 'default'
 export type ThinkingOption = ReasoningEffortOption
 export type ThinkingModelType = (typeof ThinkModelTypes)[number]