fix(aiCore): preserve conversation history for image enhancement models (#12239)

* fix(aiCore): preserve conversation history for image enhancement models - Changed image enhancement model handling to preserve full conversation context - Only merge images from previous assistant message into last user message - Return messages as-is when no images need to be merged - Added test case for LLM-to-image-model switching scenario This allows users to switch from LLM conversations to image generation models while keeping the conversation context for guiding image generation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * chore: format --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-02-05 18:41:10 +08:00 · 2026-01-14 14:23:19 +08:00 · 2026-01-14 14:23:19 +08:00 · ac23c7f30b
commit ac23c7f30b
parent 9414f13f6d
2 changed files with 162 additions and 44 deletions
--- a/src/renderer/src/aiCore/prepareParams/tests/message-converter.test.ts
+++ b/src/renderer/src/aiCore/prepareParams/tests/message-converter.test.ts
@ -263,7 +263,7 @@ describe('messageConverter', () => {
  })

  describe('convertMessagesToSdkMessages', () => {
-    it('collapses to [system?, user(image)] for image enhancement models', async () => {
+    it('preserves conversation history and merges images for image enhancement models', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const initialUser = createMessage('user')
      initialUser.__mockContent = 'Start editing'
@ -277,7 +277,16 @@ describe('messageConverter', () => {

      const result = await convertMessagesToSdkMessages([initialUser, assistant, finalUser], model)

+      // Preserves all conversation history, only merges images into the last user message
      expect(result).toEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Start editing' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Here is the current preview' }]
+        },
        {
          role: 'user',
          content: [
@ -288,7 +297,7 @@ describe('messageConverter', () => {
      ])
    })

-    it('preserves system messages and collapses others for enhancement payloads', async () => {
+    it('preserves system messages and conversation history for enhancement payloads', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const fileUser = createMessage('user')
      fileUser.__mockContent = 'Use this document as inspiration'
@ -309,8 +318,17 @@ describe('messageConverter', () => {

      const result = await convertMessagesToSdkMessages([fileUser, assistant, finalUser], model)

+      // Preserves system message, conversation history, and merges images into the last user message
      expect(result).toEqual([
        { role: 'system', content: 'fileid://reference' },
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Use this document as inspiration' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Generated previews ready' }]
+        },
        {
          role: 'user',
          content: [
@ -321,7 +339,7 @@ describe('messageConverter', () => {
      ])
    })

-    it('handles no previous assistant message with images', async () => {
+    it('returns messages as-is when no previous assistant message with images', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const user1 = createMessage('user')
      user1.__mockContent = 'Start'
@ -331,7 +349,12 @@ describe('messageConverter', () => {

      const result = await convertMessagesToSdkMessages([user1, user2], model)

+      // No images to merge, returns all messages as-is
      expect(result).toEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Start' }]
+        },
        {
          role: 'user',
          content: [{ type: 'text', text: 'Continue without images' }]
@ -339,7 +362,7 @@ describe('messageConverter', () => {
      ])
    })

-    it('handles assistant message without images', async () => {
+    it('returns messages as-is when assistant message has no images', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const user1 = createMessage('user')
      user1.__mockContent = 'Start'
@ -353,7 +376,16 @@ describe('messageConverter', () => {

      const result = await convertMessagesToSdkMessages([user1, assistant, user2], model)

+      // No images to merge, returns all messages as-is
      expect(result).toEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Start' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Text only response' }]
+        },
        {
          role: 'user',
          content: [{ type: 'text', text: 'Follow up' }]
@ -361,7 +393,7 @@ describe('messageConverter', () => {
      ])
    })

-    it('handles multiple assistant messages by using the most recent one', async () => {
+    it('merges images from the most recent assistant message', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const user1 = createMessage('user')
      user1.__mockContent = 'Start'
@ -382,7 +414,24 @@ describe('messageConverter', () => {

      const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model)

+      // Preserves all history, merges only the most recent assistant's images
      expect(result).toEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Start' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'First response' }]
+        },
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Continue' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Second response' }]
+        },
        {
          role: 'user',
          content: [
@ -393,7 +442,7 @@ describe('messageConverter', () => {
      ])
    })

-    it('handles conversation ending with assistant message', async () => {
+    it('returns messages as-is when conversation ends with assistant message', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const user = createMessage('user')
      user.__mockContent = 'Start'
@ -406,15 +455,20 @@ describe('messageConverter', () => {

      // The user message is the last user message, but since the assistant comes after,
      // there's no "previous" assistant message (search starts from messages.length-2 backwards)
+      // So no images to merge, returns all messages as-is
      expect(result).toEqual([
        {
          role: 'user',
          content: [{ type: 'text', text: 'Start' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Response with image' }]
        }
      ])
    })

-    it('handles empty content in last user message', async () => {
+    it('merges images even when last user message has empty content', async () => {
      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
      const user1 = createMessage('user')
      user1.__mockContent = 'Start'
@ -428,12 +482,79 @@ describe('messageConverter', () => {

      const result = await convertMessagesToSdkMessages([user1, assistant, user2], model)

+      // Preserves history, merges images into last user message (even if empty)
      expect(result).toEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Start' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Here is the preview' }]
+        },
        {
          role: 'user',
          content: [{ type: 'image', image: 'https://example.com/preview.png' }]
        }
      ])
    })
+
+    it('allows using LLM conversation context for image generation', async () => {
+      // This test verifies the key use case: switching from LLM to image enhancement model
+      // and using the previous conversation as context for image generation
+      const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
+
+      // Simulate a conversation that started with a regular LLM
+      const user1 = createMessage('user')
+      user1.__mockContent = 'Help me design a futuristic robot with blue lights'
+
+      const assistant1 = createMessage('assistant')
+      assistant1.__mockContent =
+        'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...'
+      assistant1.__mockImageBlocks = [] // LLM response, no images
+
+      const user2 = createMessage('user')
+      user2.__mockContent = 'Yes, and add some chrome accents'
+
+      const assistant2 = createMessage('assistant')
+      assistant2.__mockContent = 'Perfect! Chrome accents would complement the blue lights beautifully...'
+      assistant2.__mockImageBlocks = [] // Still LLM response, no images
+
+      // User switches to image enhancement model and asks for image generation
+      const user3 = createMessage('user')
+      user3.__mockContent = 'Now generate an image based on our discussion'
+
+      const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model)
+
+      // All conversation history should be preserved for context
+      // No images to merge since previous assistant had no images
+      expect(result).toEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Help me design a futuristic robot with blue lights' }]
+        },
+        {
+          role: 'assistant',
+          content: [
+            {
+              type: 'text',
+              text: 'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...'
+            }
+          ]
+        },
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Yes, and add some chrome accents' }]
+        },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: 'Perfect! Chrome accents would complement the blue lights beautifully...' }]
+        },
+        {
+          role: 'user',
+          content: [{ type: 'text', text: 'Now generate an image based on our discussion' }]
+        }
+      ])
+    })
  })
 })
--- a/src/renderer/src/aiCore/prepareParams/messageConverter.ts
+++ b/src/renderer/src/aiCore/prepareParams/messageConverter.ts
@ -229,23 +229,15 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
    sdkMessages.push(...(Array.isArray(sdkMessage) ? sdkMessage : [sdkMessage]))
  }
  // Special handling for image enhancement models
-  // Target behavior: Collapse the conversation into [system?, user(image)].
-  // Explanation of why we don't simply use slice:
-  // 1) We need to preserve all system messages: During the convertMessageToSdkParam process, native file uploads may insert `system(fileid://...)`.
-  // Directly slicing the original messages or already converted sdkMessages could easily result in missing these system instructions.
-  // Therefore, we first perform a full conversion and then aggregate the system messages afterward.
-  // 2) The conversion process may split messages: A single user message might be broken into two SDK messages—[system, user].
-  // Slicing either side could lead to obtaining semantically incorrect fragments (e.g., only the split-out system message).
-  // 3) The “previous assistant message” is not necessarily the second-to-last one: There might be system messages or other message blocks inserted in between,
-  // making a simple slice(-2) assumption too rigid. Here, we trace back from the end of the original messages to locate the most recent assistant message, which better aligns with business semantics.
-  // 4) This is a “collapse” rather than a simple “slice”: Ultimately, we need to synthesize a new user message
-  // (with text from the last user message and images from the previous assistant message). Using slice can only extract subarrays,
-  // which still require reassembly; constructing directly according to the target structure is clearer and more reliable.
+  // These models support multi-turn conversations but need images from previous assistant messages
+  // to be merged into the current user message for editing/enhancement operations.
+  //
+  // Key behaviors:
+  // 1. Preserve all conversation history for context
+  // 2. Find images from the previous assistant message and merge them into the last user message
+  // 3. This allows users to switch from LLM conversations and use that context for image generation
  if (isImageEnhancementModel(model)) {
-    // Collect all system messages (including ones generated from file uploads)
-    const systemMessages = sdkMessages.filter((m): m is SystemModelMessage => m.role === 'system')
-
-    // Find the last user message (SDK converted)
+    // Find the last user SDK message index
    const lastUserSdkIndex = (() => {
      for (let i = sdkMessages.length - 1; i >= 0; i--) {
        if (sdkMessages[i].role === 'user') return i
@ -253,7 +245,10 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
      return -1
    })()

-    const lastUserSdk = lastUserSdkIndex >= 0 ? (sdkMessages[lastUserSdkIndex] as UserModelMessage) : null
+    // If no user message found, return messages as-is
+    if (lastUserSdkIndex < 0) {
+      return sdkMessages
+    }

    // Find the nearest preceding assistant message in original messages
    let prevAssistant: Message | null = null
@ -264,31 +259,33 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
      }
    }

-    // Build the final user content parts
+    // Check if there are images from the previous assistant message
+    const imageBlocks = prevAssistant ? findImageBlocks(prevAssistant) : []
+    const imageParts = await convertImageBlockToImagePart(imageBlocks)
+
+    // If no images to merge, return messages as-is
+    if (imageParts.length === 0) {
+      return sdkMessages
+    }
+
+    // Build the new last user message with merged images
+    const lastUserSdk = sdkMessages[lastUserSdkIndex] as UserModelMessage
    let finalUserParts: Array<TextPart | FilePart | ImagePart> = []
-    if (lastUserSdk) {
-      if (typeof lastUserSdk.content === 'string') {
-        finalUserParts.push({ type: 'text', text: lastUserSdk.content })
-      } else if (Array.isArray(lastUserSdk.content)) {
-        finalUserParts = [...lastUserSdk.content]
-      }
+
+    if (typeof lastUserSdk.content === 'string') {
+      finalUserParts.push({ type: 'text', text: lastUserSdk.content })
+    } else if (Array.isArray(lastUserSdk.content)) {
+      finalUserParts = [...lastUserSdk.content]
    }

-    // Append images from the previous assistant message if any
-    if (prevAssistant) {
-      const imageBlocks = findImageBlocks(prevAssistant)
-      const imageParts = await convertImageBlockToImagePart(imageBlocks)
-      if (imageParts.length > 0) {
-        finalUserParts.push(...imageParts)
-      }
-    }
+    // Append images from the previous assistant message
+    finalUserParts.push(...imageParts)

-    // If we couldn't find a last user message, fall back to returning collected system messages only
-    if (!lastUserSdk) {
-      return systemMessages
-    }
+    // Replace the last user message with the merged version
+    const result = [...sdkMessages]
+    result[lastUserSdkIndex] = { role: 'user', content: finalUserParts }

-    return [...systemMessages, { role: 'user', content: finalUserParts }]
+    return result
  }

  return sdkMessages