mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-02-05 18:41:10 +08:00
fix(aiCore): preserve conversation history for image enhancement models (#12239)
* fix(aiCore): preserve conversation history for image enhancement models - Changed image enhancement model handling to preserve full conversation context - Only merge images from previous assistant message into last user message - Return messages as-is when no images need to be merged - Added test case for LLM-to-image-model switching scenario This allows users to switch from LLM conversations to image generation models while keeping the conversation context for guiding image generation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * chore: format --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
9414f13f6d
commit
ac23c7f30b
@ -263,7 +263,7 @@ describe('messageConverter', () => {
|
||||
})
|
||||
|
||||
describe('convertMessagesToSdkMessages', () => {
|
||||
it('collapses to [system?, user(image)] for image enhancement models', async () => {
|
||||
it('preserves conversation history and merges images for image enhancement models', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const initialUser = createMessage('user')
|
||||
initialUser.__mockContent = 'Start editing'
|
||||
@ -277,7 +277,16 @@ describe('messageConverter', () => {
|
||||
|
||||
const result = await convertMessagesToSdkMessages([initialUser, assistant, finalUser], model)
|
||||
|
||||
// Preserves all conversation history, only merges images into the last user message
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Start editing' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Here is the current preview' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
@ -288,7 +297,7 @@ describe('messageConverter', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('preserves system messages and collapses others for enhancement payloads', async () => {
|
||||
it('preserves system messages and conversation history for enhancement payloads', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const fileUser = createMessage('user')
|
||||
fileUser.__mockContent = 'Use this document as inspiration'
|
||||
@ -309,8 +318,17 @@ describe('messageConverter', () => {
|
||||
|
||||
const result = await convertMessagesToSdkMessages([fileUser, assistant, finalUser], model)
|
||||
|
||||
// Preserves system message, conversation history, and merges images into the last user message
|
||||
expect(result).toEqual([
|
||||
{ role: 'system', content: 'fileid://reference' },
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Use this document as inspiration' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Generated previews ready' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
@ -321,7 +339,7 @@ describe('messageConverter', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('handles no previous assistant message with images', async () => {
|
||||
it('returns messages as-is when no previous assistant message with images', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const user1 = createMessage('user')
|
||||
user1.__mockContent = 'Start'
|
||||
@ -331,7 +349,12 @@ describe('messageConverter', () => {
|
||||
|
||||
const result = await convertMessagesToSdkMessages([user1, user2], model)
|
||||
|
||||
// No images to merge, returns all messages as-is
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Start' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Continue without images' }]
|
||||
@ -339,7 +362,7 @@ describe('messageConverter', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('handles assistant message without images', async () => {
|
||||
it('returns messages as-is when assistant message has no images', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const user1 = createMessage('user')
|
||||
user1.__mockContent = 'Start'
|
||||
@ -353,7 +376,16 @@ describe('messageConverter', () => {
|
||||
|
||||
const result = await convertMessagesToSdkMessages([user1, assistant, user2], model)
|
||||
|
||||
// No images to merge, returns all messages as-is
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Start' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Text only response' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Follow up' }]
|
||||
@ -361,7 +393,7 @@ describe('messageConverter', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('handles multiple assistant messages by using the most recent one', async () => {
|
||||
it('merges images from the most recent assistant message', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const user1 = createMessage('user')
|
||||
user1.__mockContent = 'Start'
|
||||
@ -382,7 +414,24 @@ describe('messageConverter', () => {
|
||||
|
||||
const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model)
|
||||
|
||||
// Preserves all history, merges only the most recent assistant's images
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Start' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'First response' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Continue' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Second response' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
@ -393,7 +442,7 @@ describe('messageConverter', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('handles conversation ending with assistant message', async () => {
|
||||
it('returns messages as-is when conversation ends with assistant message', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const user = createMessage('user')
|
||||
user.__mockContent = 'Start'
|
||||
@ -406,15 +455,20 @@ describe('messageConverter', () => {
|
||||
|
||||
// The user message is the last user message, but since the assistant comes after,
|
||||
// there's no "previous" assistant message (search starts from messages.length-2 backwards)
|
||||
// So no images to merge, returns all messages as-is
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Start' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Response with image' }]
|
||||
}
|
||||
])
|
||||
})
|
||||
|
||||
it('handles empty content in last user message', async () => {
|
||||
it('merges images even when last user message has empty content', async () => {
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
const user1 = createMessage('user')
|
||||
user1.__mockContent = 'Start'
|
||||
@ -428,12 +482,79 @@ describe('messageConverter', () => {
|
||||
|
||||
const result = await convertMessagesToSdkMessages([user1, assistant, user2], model)
|
||||
|
||||
// Preserves history, merges images into last user message (even if empty)
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Start' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Here is the preview' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'image', image: 'https://example.com/preview.png' }]
|
||||
}
|
||||
])
|
||||
})
|
||||
|
||||
it('allows using LLM conversation context for image generation', async () => {
|
||||
// This test verifies the key use case: switching from LLM to image enhancement model
|
||||
// and using the previous conversation as context for image generation
|
||||
const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' })
|
||||
|
||||
// Simulate a conversation that started with a regular LLM
|
||||
const user1 = createMessage('user')
|
||||
user1.__mockContent = 'Help me design a futuristic robot with blue lights'
|
||||
|
||||
const assistant1 = createMessage('assistant')
|
||||
assistant1.__mockContent =
|
||||
'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...'
|
||||
assistant1.__mockImageBlocks = [] // LLM response, no images
|
||||
|
||||
const user2 = createMessage('user')
|
||||
user2.__mockContent = 'Yes, and add some chrome accents'
|
||||
|
||||
const assistant2 = createMessage('assistant')
|
||||
assistant2.__mockContent = 'Perfect! Chrome accents would complement the blue lights beautifully...'
|
||||
assistant2.__mockImageBlocks = [] // Still LLM response, no images
|
||||
|
||||
// User switches to image enhancement model and asks for image generation
|
||||
const user3 = createMessage('user')
|
||||
user3.__mockContent = 'Now generate an image based on our discussion'
|
||||
|
||||
const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model)
|
||||
|
||||
// All conversation history should be preserved for context
|
||||
// No images to merge since previous assistant had no images
|
||||
expect(result).toEqual([
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Help me design a futuristic robot with blue lights' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Great idea! The robot could have a sleek metallic body with glowing blue LED strips...'
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Yes, and add some chrome accents' }]
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'Perfect! Chrome accents would complement the blue lights beautifully...' }]
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'Now generate an image based on our discussion' }]
|
||||
}
|
||||
])
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -229,23 +229,15 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
|
||||
sdkMessages.push(...(Array.isArray(sdkMessage) ? sdkMessage : [sdkMessage]))
|
||||
}
|
||||
// Special handling for image enhancement models
|
||||
// Target behavior: Collapse the conversation into [system?, user(image)].
|
||||
// Explanation of why we don't simply use slice:
|
||||
// 1) We need to preserve all system messages: During the convertMessageToSdkParam process, native file uploads may insert `system(fileid://...)`.
|
||||
// Directly slicing the original messages or already converted sdkMessages could easily result in missing these system instructions.
|
||||
// Therefore, we first perform a full conversion and then aggregate the system messages afterward.
|
||||
// 2) The conversion process may split messages: A single user message might be broken into two SDK messages—[system, user].
|
||||
// Slicing either side could lead to obtaining semantically incorrect fragments (e.g., only the split-out system message).
|
||||
// 3) The “previous assistant message” is not necessarily the second-to-last one: There might be system messages or other message blocks inserted in between,
|
||||
// making a simple slice(-2) assumption too rigid. Here, we trace back from the end of the original messages to locate the most recent assistant message, which better aligns with business semantics.
|
||||
// 4) This is a “collapse” rather than a simple “slice”: Ultimately, we need to synthesize a new user message
|
||||
// (with text from the last user message and images from the previous assistant message). Using slice can only extract subarrays,
|
||||
// which still require reassembly; constructing directly according to the target structure is clearer and more reliable.
|
||||
// These models support multi-turn conversations but need images from previous assistant messages
|
||||
// to be merged into the current user message for editing/enhancement operations.
|
||||
//
|
||||
// Key behaviors:
|
||||
// 1. Preserve all conversation history for context
|
||||
// 2. Find images from the previous assistant message and merge them into the last user message
|
||||
// 3. This allows users to switch from LLM conversations and use that context for image generation
|
||||
if (isImageEnhancementModel(model)) {
|
||||
// Collect all system messages (including ones generated from file uploads)
|
||||
const systemMessages = sdkMessages.filter((m): m is SystemModelMessage => m.role === 'system')
|
||||
|
||||
// Find the last user message (SDK converted)
|
||||
// Find the last user SDK message index
|
||||
const lastUserSdkIndex = (() => {
|
||||
for (let i = sdkMessages.length - 1; i >= 0; i--) {
|
||||
if (sdkMessages[i].role === 'user') return i
|
||||
@ -253,7 +245,10 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
|
||||
return -1
|
||||
})()
|
||||
|
||||
const lastUserSdk = lastUserSdkIndex >= 0 ? (sdkMessages[lastUserSdkIndex] as UserModelMessage) : null
|
||||
// If no user message found, return messages as-is
|
||||
if (lastUserSdkIndex < 0) {
|
||||
return sdkMessages
|
||||
}
|
||||
|
||||
// Find the nearest preceding assistant message in original messages
|
||||
let prevAssistant: Message | null = null
|
||||
@ -264,31 +259,33 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M
|
||||
}
|
||||
}
|
||||
|
||||
// Build the final user content parts
|
||||
// Check if there are images from the previous assistant message
|
||||
const imageBlocks = prevAssistant ? findImageBlocks(prevAssistant) : []
|
||||
const imageParts = await convertImageBlockToImagePart(imageBlocks)
|
||||
|
||||
// If no images to merge, return messages as-is
|
||||
if (imageParts.length === 0) {
|
||||
return sdkMessages
|
||||
}
|
||||
|
||||
// Build the new last user message with merged images
|
||||
const lastUserSdk = sdkMessages[lastUserSdkIndex] as UserModelMessage
|
||||
let finalUserParts: Array<TextPart | FilePart | ImagePart> = []
|
||||
if (lastUserSdk) {
|
||||
if (typeof lastUserSdk.content === 'string') {
|
||||
finalUserParts.push({ type: 'text', text: lastUserSdk.content })
|
||||
} else if (Array.isArray(lastUserSdk.content)) {
|
||||
finalUserParts = [...lastUserSdk.content]
|
||||
}
|
||||
|
||||
if (typeof lastUserSdk.content === 'string') {
|
||||
finalUserParts.push({ type: 'text', text: lastUserSdk.content })
|
||||
} else if (Array.isArray(lastUserSdk.content)) {
|
||||
finalUserParts = [...lastUserSdk.content]
|
||||
}
|
||||
|
||||
// Append images from the previous assistant message if any
|
||||
if (prevAssistant) {
|
||||
const imageBlocks = findImageBlocks(prevAssistant)
|
||||
const imageParts = await convertImageBlockToImagePart(imageBlocks)
|
||||
if (imageParts.length > 0) {
|
||||
finalUserParts.push(...imageParts)
|
||||
}
|
||||
}
|
||||
// Append images from the previous assistant message
|
||||
finalUserParts.push(...imageParts)
|
||||
|
||||
// If we couldn't find a last user message, fall back to returning collected system messages only
|
||||
if (!lastUserSdk) {
|
||||
return systemMessages
|
||||
}
|
||||
// Replace the last user message with the merged version
|
||||
const result = [...sdkMessages]
|
||||
result[lastUserSdkIndex] = { role: 'user', content: finalUserParts }
|
||||
|
||||
return [...systemMessages, { role: 'user', content: finalUserParts }]
|
||||
return result
|
||||
}
|
||||
|
||||
return sdkMessages
|
||||
|
||||
Loading…
Reference in New Issue
Block a user