From ac23c7f30bee65449dc2df66d6ba64d1f093faec Mon Sep 17 00:00:00 2001 From: SuYao Date: Wed, 14 Jan 2026 14:23:19 +0800 Subject: [PATCH] fix(aiCore): preserve conversation history for image enhancement models (#12239) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(aiCore): preserve conversation history for image enhancement models - Changed image enhancement model handling to preserve full conversation context - Only merge images from previous assistant message into last user message - Return messages as-is when no images need to be merged - Added test case for LLM-to-image-model switching scenario This allows users to switch from LLM conversations to image generation models while keeping the conversation context for guiding image generation. šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * chore: format --------- Co-authored-by: Claude --- .../__tests__/message-converter.test.ts | 135 +++++++++++++++++- .../aiCore/prepareParams/messageConverter.ts | 71 +++++---- 2 files changed, 162 insertions(+), 44 deletions(-) diff --git a/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts b/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts index 2a69f3bcef..4f76cbee7b 100644 --- a/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts +++ b/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts @@ -263,7 +263,7 @@ describe('messageConverter', () => { }) describe('convertMessagesToSdkMessages', () => { - it('collapses to [system?, user(image)] for image enhancement models', async () => { + it('preserves conversation history and merges images for image enhancement models', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const initialUser = createMessage('user') initialUser.__mockContent = 'Start editing' @@ -277,7 +277,16 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([initialUser, assistant, finalUser], model) + // Preserves all conversation history, only merges images into the last user message expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Start editing' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Here is the current preview' }] + }, { role: 'user', content: [ @@ -288,7 +297,7 @@ describe('messageConverter', () => { ]) }) - it('preserves system messages and collapses others for enhancement payloads', async () => { + it('preserves system messages and conversation history for enhancement payloads', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const fileUser = createMessage('user') fileUser.__mockContent = 'Use this document as inspiration' @@ -309,8 +318,17 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([fileUser, assistant, finalUser], model) + // Preserves system message, conversation history, and merges images into the last user message expect(result).toEqual([ { role: 'system', content: 'fileid://reference' }, + { + role: 'user', + content: [{ type: 'text', text: 'Use this document as inspiration' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Generated previews ready' }] + }, { role: 'user', content: [ @@ -321,7 +339,7 @@ describe('messageConverter', () => { ]) }) - it('handles no previous assistant message with images', async () => { + it('returns messages as-is when no previous assistant message with images', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const user1 = createMessage('user') user1.__mockContent = 'Start' @@ -331,7 +349,12 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([user1, user2], model) + // No images to merge, returns all messages as-is expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Start' }] + }, { role: 'user', content: [{ type: 'text', text: 'Continue without images' }] @@ -339,7 +362,7 @@ describe('messageConverter', () => { ]) }) - it('handles assistant message without images', async () => { + it('returns messages as-is when assistant message has no images', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const user1 = createMessage('user') user1.__mockContent = 'Start' @@ -353,7 +376,16 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([user1, assistant, user2], model) + // No images to merge, returns all messages as-is expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Start' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Text only response' }] + }, { role: 'user', content: [{ type: 'text', text: 'Follow up' }] @@ -361,7 +393,7 @@ describe('messageConverter', () => { ]) }) - it('handles multiple assistant messages by using the most recent one', async () => { + it('merges images from the most recent assistant message', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const user1 = createMessage('user') user1.__mockContent = 'Start' @@ -382,7 +414,24 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model) + // Preserves all history, merges only the most recent assistant's images expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Start' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'First response' }] + }, + { + role: 'user', + content: [{ type: 'text', text: 'Continue' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Second response' }] + }, { role: 'user', content: [ @@ -393,7 +442,7 @@ describe('messageConverter', () => { ]) }) - it('handles conversation ending with assistant message', async () => { + it('returns messages as-is when conversation ends with assistant message', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const user = createMessage('user') user.__mockContent = 'Start' @@ -406,15 +455,20 @@ describe('messageConverter', () => { // The user message is the last user message, but since the assistant comes after, // there's no "previous" assistant message (search starts from messages.length-2 backwards) + // So no images to merge, returns all messages as-is expect(result).toEqual([ { role: 'user', content: [{ type: 'text', text: 'Start' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Response with image' }] } ]) }) - it('handles empty content in last user message', async () => { + it('merges images even when last user message has empty content', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const user1 = createMessage('user') user1.__mockContent = 'Start' @@ -428,12 +482,79 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([user1, assistant, user2], model) + // Preserves history, merges images into last user message (even if empty) expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Start' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Here is the preview' }] + }, { role: 'user', content: [{ type: 'image', image: 'https://example.com/preview.png' }] } ]) }) + + it('allows using LLM conversation context for image generation', async () => { + // This test verifies the key use case: switching from LLM to image enhancement model + // and using the previous conversation as context for image generation + const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) + + // Simulate a conversation that started with a regular LLM + const user1 = createMessage('user') + user1.__mockContent = 'Help me design a futuristic robot with blue lights' + + const assistant1 = createMessage('assistant') + assistant1.__mockContent = + 'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...' + assistant1.__mockImageBlocks = [] // LLM response, no images + + const user2 = createMessage('user') + user2.__mockContent = 'Yes, and add some chrome accents' + + const assistant2 = createMessage('assistant') + assistant2.__mockContent = 'Perfect! Chrome accents would complement the blue lights beautifully...' + assistant2.__mockImageBlocks = [] // Still LLM response, no images + + // User switches to image enhancement model and asks for image generation + const user3 = createMessage('user') + user3.__mockContent = 'Now generate an image based on our discussion' + + const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model) + + // All conversation history should be preserved for context + // No images to merge since previous assistant had no images + expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Help me design a futuristic robot with blue lights' }] + }, + { + role: 'assistant', + content: [ + { + type: 'text', + text: 'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...' + } + ] + }, + { + role: 'user', + content: [{ type: 'text', text: 'Yes, and add some chrome accents' }] + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Perfect! Chrome accents would complement the blue lights beautifully...' }] + }, + { + role: 'user', + content: [{ type: 'text', text: 'Now generate an image based on our discussion' }] + } + ]) + }) }) }) diff --git a/src/renderer/src/aiCore/prepareParams/messageConverter.ts b/src/renderer/src/aiCore/prepareParams/messageConverter.ts index 56c5f6a4e7..eba16c6619 100644 --- a/src/renderer/src/aiCore/prepareParams/messageConverter.ts +++ b/src/renderer/src/aiCore/prepareParams/messageConverter.ts @@ -229,23 +229,15 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M sdkMessages.push(...(Array.isArray(sdkMessage) ? sdkMessage : [sdkMessage])) } // Special handling for image enhancement models - // Target behavior: Collapse the conversation into [system?, user(image)]. - // Explanation of why we don't simply use slice: - // 1) We need to preserve all system messages: During the convertMessageToSdkParam process, native file uploads may insert `system(fileid://...)`. - // Directly slicing the original messages or already converted sdkMessages could easily result in missing these system instructions. - // Therefore, we first perform a full conversion and then aggregate the system messages afterward. - // 2) The conversion process may split messages: A single user message might be broken into two SDK messages—[system, user]. - // Slicing either side could lead to obtaining semantically incorrect fragments (e.g., only the split-out system message). - // 3) The ā€œprevious assistant messageā€ is not necessarily the second-to-last one: There might be system messages or other message blocks inserted in between, - // making a simple slice(-2) assumption too rigid. Here, we trace back from the end of the original messages to locate the most recent assistant message, which better aligns with business semantics. - // 4) This is a ā€œcollapseā€ rather than a simple ā€œsliceā€: Ultimately, we need to synthesize a new user message - // (with text from the last user message and images from the previous assistant message). Using slice can only extract subarrays, - // which still require reassembly; constructing directly according to the target structure is clearer and more reliable. + // These models support multi-turn conversations but need images from previous assistant messages + // to be merged into the current user message for editing/enhancement operations. + // + // Key behaviors: + // 1. Preserve all conversation history for context + // 2. Find images from the previous assistant message and merge them into the last user message + // 3. This allows users to switch from LLM conversations and use that context for image generation if (isImageEnhancementModel(model)) { - // Collect all system messages (including ones generated from file uploads) - const systemMessages = sdkMessages.filter((m): m is SystemModelMessage => m.role === 'system') - - // Find the last user message (SDK converted) + // Find the last user SDK message index const lastUserSdkIndex = (() => { for (let i = sdkMessages.length - 1; i >= 0; i--) { if (sdkMessages[i].role === 'user') return i @@ -253,7 +245,10 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M return -1 })() - const lastUserSdk = lastUserSdkIndex >= 0 ? (sdkMessages[lastUserSdkIndex] as UserModelMessage) : null + // If no user message found, return messages as-is + if (lastUserSdkIndex < 0) { + return sdkMessages + } // Find the nearest preceding assistant message in original messages let prevAssistant: Message | null = null @@ -264,31 +259,33 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M } } - // Build the final user content parts + // Check if there are images from the previous assistant message + const imageBlocks = prevAssistant ? findImageBlocks(prevAssistant) : [] + const imageParts = await convertImageBlockToImagePart(imageBlocks) + + // If no images to merge, return messages as-is + if (imageParts.length === 0) { + return sdkMessages + } + + // Build the new last user message with merged images + const lastUserSdk = sdkMessages[lastUserSdkIndex] as UserModelMessage let finalUserParts: Array = [] - if (lastUserSdk) { - if (typeof lastUserSdk.content === 'string') { - finalUserParts.push({ type: 'text', text: lastUserSdk.content }) - } else if (Array.isArray(lastUserSdk.content)) { - finalUserParts = [...lastUserSdk.content] - } + + if (typeof lastUserSdk.content === 'string') { + finalUserParts.push({ type: 'text', text: lastUserSdk.content }) + } else if (Array.isArray(lastUserSdk.content)) { + finalUserParts = [...lastUserSdk.content] } - // Append images from the previous assistant message if any - if (prevAssistant) { - const imageBlocks = findImageBlocks(prevAssistant) - const imageParts = await convertImageBlockToImagePart(imageBlocks) - if (imageParts.length > 0) { - finalUserParts.push(...imageParts) - } - } + // Append images from the previous assistant message + finalUserParts.push(...imageParts) - // If we couldn't find a last user message, fall back to returning collected system messages only - if (!lastUserSdk) { - return systemMessages - } + // Replace the last user message with the merged version + const result = [...sdkMessages] + result[lastUserSdkIndex] = { role: 'user', content: finalUserParts } - return [...systemMessages, { role: 'user', content: finalUserParts }] + return result } return sdkMessages