fix(aiCore): preserve conversation history for image enhancement models (#12239)

* fix(aiCore): preserve conversation history for image enhancement models

- Changed image enhancement model handling to preserve full conversation context
- Only merge images from previous assistant message into last user message
- Return messages as-is when no images need to be merged
- Added test case for LLM-to-image-model switching scenario

This allows users to switch from LLM conversations to image generation models
while keeping the conversation context for guiding image generation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* chore: format

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
SuYao 2026-01-14 14:23:19 +08:00 committed by GitHub
parent 9414f13f6d
commit ac23c7f30b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 162 additions and 44 deletions

View File

@ -263,7 +263,7 @@ describe('messageConverter', () => {
})
describe('convertMessagesToSdkMessages', () => {
it('collapses to [system?, user(image)] for image enhancement models', async () => {
it('preserves conversation history and merges images for image enhancement models', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const initialUser = createMessage('user')
initialUser.__mockContent = 'Start editing'
@ -277,7 +277,16 @@ describe('messageConverter', () => {
const result = await convertMessagesToSdkMessages([initialUser, assistant, finalUser], model)
// Preserves all conversation history, only merges images into the last user message
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Start editing' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Here is the current preview' }]
},
{
role: 'user',
content: [
@ -288,7 +297,7 @@ describe('messageConverter', () => {
])
})
it('preserves system messages and collapses others for enhancement payloads', async () => {
it('preserves system messages and conversation history for enhancement payloads', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const fileUser = createMessage('user')
fileUser.__mockContent = 'Use this document as inspiration'
@ -309,8 +318,17 @@ describe('messageConverter', () => {
const result = await convertMessagesToSdkMessages([fileUser, assistant, finalUser], model)
// Preserves system message, conversation history, and merges images into the last user message
expect(result).toEqual([
{ role: 'system', content: 'fileid://reference' },
{
role: 'user',
content: [{ type: 'text', text: 'Use this document as inspiration' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Generated previews ready' }]
},
{
role: 'user',
content: [
@ -321,7 +339,7 @@ describe('messageConverter', () => {
])
})
it('handles no previous assistant message with images', async () => {
it('returns messages as-is when no previous assistant message with images', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const user1 = createMessage('user')
user1.__mockContent = 'Start'
@ -331,7 +349,12 @@ describe('messageConverter', () => {
const result = await convertMessagesToSdkMessages([user1, user2], model)
// No images to merge, returns all messages as-is
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Start' }]
},
{
role: 'user',
content: [{ type: 'text', text: 'Continue without images' }]
@ -339,7 +362,7 @@ describe('messageConverter', () => {
])
})
it('handles assistant message without images', async () => {
it('returns messages as-is when assistant message has no images', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const user1 = createMessage('user')
user1.__mockContent = 'Start'
@ -353,7 +376,16 @@ describe('messageConverter', () => {
const result = await convertMessagesToSdkMessages([user1, assistant, user2], model)
// No images to merge, returns all messages as-is
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Start' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Text only response' }]
},
{
role: 'user',
content: [{ type: 'text', text: 'Follow up' }]
@ -361,7 +393,7 @@ describe('messageConverter', () => {
])
})
it('handles multiple assistant messages by using the most recent one', async () => {
it('merges images from the most recent assistant message', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const user1 = createMessage('user')
user1.__mockContent = 'Start'
@ -382,7 +414,24 @@ describe('messageConverter', () => {
const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model)
// Preserves all history, merges only the most recent assistant's images
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Start' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'First response' }]
},
{
role: 'user',
content: [{ type: 'text', text: 'Continue' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Second response' }]
},
{
role: 'user',
content: [
@ -393,7 +442,7 @@ describe('messageConverter', () => {
])
})
it('handles conversation ending with assistant message', async () => {
it('returns messages as-is when conversation ends with assistant message', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const user = createMessage('user')
user.__mockContent = 'Start'
@ -406,15 +455,20 @@ describe('messageConverter', () => {
// The user message is the last user message, but since the assistant comes after,
// there's no "previous" assistant message (search starts from messages.length-2 backwards)
// So no images to merge, returns all messages as-is
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Start' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Response with image' }]
}
])
})
it('handles empty content in last user message', async () => {
it('merges images even when last user message has empty content', async () => {
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
const user1 = createMessage('user')
user1.__mockContent = 'Start'
@ -428,12 +482,79 @@ describe('messageConverter', () => {
const result = await convertMessagesToSdkMessages([user1, assistant, user2], model)
// Preserves history, merges images into last user message (even if empty)
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Start' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Here is the preview' }]
},
{
role: 'user',
content: [{ type: 'image', image: 'https://example.com/preview.png' }]
}
])
})
it('allows using LLM conversation context for image generation', async () => {
// This test verifies the key use case: switching from LLM to image enhancement model
// and using the previous conversation as context for image generation
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
// Simulate a conversation that started with a regular LLM
const user1 = createMessage('user')
user1.__mockContent = 'Help me design a futuristic robot with blue lights'
const assistant1 = createMessage('assistant')
assistant1.__mockContent =
'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...'
assistant1.__mockImageBlocks = [] // LLM response, no images
const user2 = createMessage('user')
user2.__mockContent = 'Yes, and add some chrome accents'
const assistant2 = createMessage('assistant')
assistant2.__mockContent = 'Perfect! Chrome accents would complement the blue lights beautifully...'
assistant2.__mockImageBlocks = [] // Still LLM response, no images
// User switches to image enhancement model and asks for image generation
const user3 = createMessage('user')
user3.__mockContent = 'Now generate an image based on our discussion'
const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model)
// All conversation history should be preserved for context
// No images to merge since previous assistant had no images
expect(result).toEqual([
{
role: 'user',
content: [{ type: 'text', text: 'Help me design a futuristic robot with blue lights' }]
},
{
role: 'assistant',
content: [
{
type: 'text',
text: 'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...'
}
]
},
{
role: 'user',
content: [{ type: 'text', text: 'Yes, and add some chrome accents' }]
},
{
role: 'assistant',
content: [{ type: 'text', text: 'Perfect! Chrome accents would complement the blue lights beautifully...' }]
},
{
role: 'user',
content: [{ type: 'text', text: 'Now generate an image based on our discussion' }]
}
])
})
})
})

View File

@ -229,23 +229,15 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
sdkMessages.push(...(Array.isArray(sdkMessage) ? sdkMessage : [sdkMessage]))
}
// Special handling for image enhancement models
// Target behavior: Collapse the conversation into [system?, user(image)].
// Explanation of why we don't simply use slice:
// 1) We need to preserve all system messages: During the convertMessageToSdkParam process, native file uploads may insert `system(fileid://...)`.
// Directly slicing the original messages or already converted sdkMessages could easily result in missing these system instructions.
// Therefore, we first perform a full conversion and then aggregate the system messages afterward.
// 2) The conversion process may split messages: A single user message might be broken into two SDK messages—[system, user].
// Slicing either side could lead to obtaining semantically incorrect fragments (e.g., only the split-out system message).
// 3) The “previous assistant message” is not necessarily the second-to-last one: There might be system messages or other message blocks inserted in between,
// making a simple slice(-2) assumption too rigid. Here, we trace back from the end of the original messages to locate the most recent assistant message, which better aligns with business semantics.
// 4) This is a “collapse” rather than a simple “slice”: Ultimately, we need to synthesize a new user message
// (with text from the last user message and images from the previous assistant message). Using slice can only extract subarrays,
// which still require reassembly; constructing directly according to the target structure is clearer and more reliable.
// These models support multi-turn conversations but need images from previous assistant messages
// to be merged into the current user message for editing/enhancement operations.
//
// Key behaviors:
// 1. Preserve all conversation history for context
// 2. Find images from the previous assistant message and merge them into the last user message
// 3. This allows users to switch from LLM conversations and use that context for image generation
if (isImageEnhancementModel(model)) {
// Collect all system messages (including ones generated from file uploads)
const systemMessages = sdkMessages.filter((m): m is SystemModelMessage => m.role === 'system')
// Find the last user message (SDK converted)
// Find the last user SDK message index
const lastUserSdkIndex = (() => {
for (let i = sdkMessages.length - 1; i >= 0; i--) {
if (sdkMessages[i].role === 'user') return i
@ -253,7 +245,10 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
return -1
})()
const lastUserSdk = lastUserSdkIndex >= 0 ? (sdkMessages[lastUserSdkIndex] as UserModelMessage) : null
// If no user message found, return messages as-is
if (lastUserSdkIndex < 0) {
return sdkMessages
}
// Find the nearest preceding assistant message in original messages
let prevAssistant: Message | null = null
@ -264,31 +259,33 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
}
}
// Build the final user content parts
// Check if there are images from the previous assistant message
const imageBlocks = prevAssistant ? findImageBlocks(prevAssistant) : []
const imageParts = await convertImageBlockToImagePart(imageBlocks)
// If no images to merge, return messages as-is
if (imageParts.length === 0) {
return sdkMessages
}
// Build the new last user message with merged images
const lastUserSdk = sdkMessages[lastUserSdkIndex] as UserModelMessage
let finalUserParts: Array<TextPart | FilePart | ImagePart> = []
if (lastUserSdk) {
if (typeof lastUserSdk.content === 'string') {
finalUserParts.push({ type: 'text', text: lastUserSdk.content })
} else if (Array.isArray(lastUserSdk.content)) {
finalUserParts = [...lastUserSdk.content]
}
if (typeof lastUserSdk.content === 'string') {
finalUserParts.push({ type: 'text', text: lastUserSdk.content })
} else if (Array.isArray(lastUserSdk.content)) {
finalUserParts = [...lastUserSdk.content]
}
// Append images from the previous assistant message if any
if (prevAssistant) {
const imageBlocks = findImageBlocks(prevAssistant)
const imageParts = await convertImageBlockToImagePart(imageBlocks)
if (imageParts.length > 0) {
finalUserParts.push(...imageParts)
}
}
// Append images from the previous assistant message
finalUserParts.push(...imageParts)
// If we couldn't find a last user message, fall back to returning collected system messages only
if (!lastUserSdk) {
return systemMessages
}
// Replace the last user message with the merged version
const result = [...sdkMessages]
result[lastUserSdkIndex] = { role: 'user', content: finalUserParts }
return [...systemMessages, { role: 'user', content: finalUserParts }]
return result
}
return sdkMessages