fix(agent): preload image base64 via MCP before model input

This commit is contained in:
ringotypowriter
2026-02-17 21:50:51 +08:00
parent daed9d2d95
commit 05905a33da
+131 -42
View File
@@ -27,8 +27,10 @@ import {
dedupeAttachments, dedupeAttachments,
AttachmentsStreamExtractor, AttachmentsStreamExtractor,
} from './utils/attachments' } from './utils/attachments'
import type { ContainerFileAttachment, ImageAttachment } from './types/attachment' import type {
import { readFileSync } from 'fs' ContainerFileAttachment,
ImageAttachment,
} from './types/attachment'
import { getMCPTools } from './tools/mcp' import { getMCPTools } from './tools/mcp'
import { getTools } from './tools' import { getTools } from './tools'
import { buildIdentityHeaders } from './utils/headers' import { buildIdentityHeaders } from './utils/headers'
@@ -121,6 +123,85 @@ export const createAgent = (
} }
} }
const prepareInputWithMCPImageBase64 = async (
input: AgentInput,
): Promise<AgentInput> => {
if (!auth?.bearer || !identity.botId) {
return input
}
const url = `${auth.baseUrl.replace(/\/$/, '')}/bots/${identity.botId}/tools`
const headers: Record<string, string> = {
'Content-Type': 'application/json',
Accept: 'application/json, text/event-stream',
Authorization: `Bearer ${auth.bearer}`,
}
if (identity.channelIdentityId) {
headers['X-Memoh-Channel-Identity-Id'] = identity.channelIdentityId
}
if (identity.sessionToken) {
headers['X-Memoh-Session-Token'] = identity.sessionToken
}
if (identity.currentPlatform) {
headers['X-Memoh-Current-Platform'] = identity.currentPlatform
}
if (identity.replyTarget) {
headers['X-Memoh-Reply-Target'] = identity.replyTarget
}
const attachments = await Promise.all(
input.attachments.map(async (attachment) => {
if (attachment.type !== 'image') {
return attachment
}
const image = attachment as ImageAttachment
if (typeof image.base64 === 'string' && image.base64.trim() !== '') {
return image
}
const path = String(image.path ?? '').trim()
if (!path) {
return image
}
const quotedPath = `'${path.replace(/'/g, '\'\\\'\'')}'`
const command = `base64 ${quotedPath} | tr -d '\\n'`
const body = JSON.stringify({
jsonrpc: '2.0',
id: `read-image-${quotedPath}`,
method: 'tools/call',
params: {
name: 'exec',
arguments: { command },
},
})
try {
const response = await fetch(url, { method: 'POST', headers, body })
if (!response.ok) {
return image
}
const payload = await response.json().catch(() => ({}))
const structured = payload?.result?.structuredContent
const execResult = (
structured && typeof structured === 'object' ? structured : null
) as { stdout?: unknown; exit_code?: unknown } | null
const exitCode = Number(execResult?.exit_code ?? 1)
const stdout =
typeof execResult?.stdout === 'string'
? execResult.stdout.trim()
: ''
if (exitCode !== 0 || stdout === '') {
return image
}
const mime = String(image.mime ?? '').trim() || 'image/png'
return {
...image,
base64: `data:${mime};base64,${stdout}`,
}
} catch {
return image
}
}),
)
return { ...input, attachments }
}
const generateSystemPrompt = async () => { const generateSystemPrompt = async () => {
const { identityContent, soulContent, toolsContent } = const { identityContent, soulContent, toolsContent } =
await loadSystemFiles() await loadSystemFiles()
@@ -154,14 +235,23 @@ export const createAgent = (
name: 'builtin', name: 'builtin',
url: `${baseUrl}/bots/${botId}/tools`, url: `${baseUrl}/bots/${botId}/tools`,
headers, headers,
} },
] ]
const { tools: mcpTools, close: closeMCP } = await getMCPTools([...builtins, ...mcpConnections], { const { tools: mcpTools, close: closeMCP } = await getMCPTools(
auth, [...builtins, ...mcpConnections],
{
auth,
fetch,
botId,
},
)
const tools = getTools(allowedActions, {
fetch, fetch,
botId, model: modelConfig,
identity,
auth,
enableSkill,
}) })
const tools = getTools(allowedActions, { fetch, model: modelConfig, identity, auth, enableSkill })
return { return {
tools: { ...mcpTools, ...tools } as ToolSet, tools: { ...mcpTools, ...tools } as ToolSet,
close: closeMCP, close: closeMCP,
@@ -185,10 +275,15 @@ export const createAgent = (
.filter((a) => a.type === 'image') .filter((a) => a.type === 'image')
.map((a) => ({ .map((a) => ({
type: 'file' as const, type: 'file' as const,
path: String((a as ImageAttachment).path || a.metadata?.path || '[image]'), path: String(
(a as ImageAttachment).path || a.metadata?.path || '[image]',
),
metadata: a.metadata, metadata: a.metadata,
})) }))
const allFiles: ContainerFileAttachment[] = [...fallbackFiles, ...unsupportedImages] const allFiles: ContainerFileAttachment[] = [
...fallbackFiles,
...unsupportedImages,
]
const text = user(input.query, { const text = user(input.query, {
channelIdentityId: identity.channelIdentityId || identity.contactId || '', channelIdentityId: identity.channelIdentityId || identity.contactId || '',
@@ -198,39 +293,30 @@ export const createAgent = (
date: new Date(), date: new Date(),
attachments: allFiles, attachments: allFiles,
}) })
const imageParts: ImagePart[] = nativeImages.map((image) => { const imageParts: ImagePart[] = nativeImages
const img = image as ImageAttachment .map((image) => {
if (img.base64) { const img = image as ImageAttachment
return { type: 'image', image: img.base64 } as ImagePart if (img.base64) {
} return { type: 'image', image: img.base64 } as ImagePart
if (img.path) {
try {
const data = readFileSync(img.path)
const mime = img.mime || 'image/png'
return { type: 'image', image: `data:${mime};base64,${data.toString('base64')}` } as ImagePart
} catch {
return { type: 'image', image: '' } as ImagePart
} }
} if (img.url) {
if (img.url) { return { type: 'image', image: img.url } as ImagePart
return { type: 'image', image: img.url } as ImagePart }
} return { type: 'image', image: '' } as ImagePart
return { type: 'image', image: '' } as ImagePart })
}).filter((p) => p.image !== '') .filter((p) => p.image !== '')
const userMessage: UserModelMessage = { const userMessage: UserModelMessage = {
role: 'user', role: 'user',
content: [ content: [{ type: 'text', text }, ...imageParts],
{ type: 'text', text },
...imageParts,
],
} }
return userMessage return userMessage
} }
const ask = async (input: AgentInput) => { const ask = async (input: AgentInput) => {
const userPrompt = generateUserPrompt(input) const preparedInput = await prepareInputWithMCPImageBase64(input)
const messages = [...input.messages, userPrompt] const userPrompt = generateUserPrompt(preparedInput)
input.skills.forEach((skill) => enableSkill(skill)) const messages = [...preparedInput.messages, userPrompt]
preparedInput.skills.forEach((skill) => enableSkill(skill))
const systemPrompt = await generateSystemPrompt() const systemPrompt = await generateSystemPrompt()
const { tools, close } = await getAgentTools() const { tools, close } = await getAgentTools()
const { response, reasoning, text, usage } = await generateText({ const { response, reasoning, text, usage } = await generateText({
@@ -368,9 +454,10 @@ export const createAgent = (
} }
async function* stream(input: AgentInput): AsyncGenerator<AgentAction> { async function* stream(input: AgentInput): AsyncGenerator<AgentAction> {
const userPrompt = generateUserPrompt(input) const preparedInput = await prepareInputWithMCPImageBase64(input)
const messages = [...input.messages, userPrompt] const userPrompt = generateUserPrompt(preparedInput)
input.skills.forEach((skill) => enableSkill(skill)) const messages = [...preparedInput.messages, userPrompt]
preparedInput.skills.forEach((skill) => enableSkill(skill))
const systemPrompt = await generateSystemPrompt() const systemPrompt = await generateSystemPrompt()
const attachmentsExtractor = new AttachmentsStreamExtractor() const attachmentsExtractor = new AttachmentsStreamExtractor()
const result: { const result: {
@@ -496,11 +583,13 @@ export const createAgent = (
case 'file': case 'file':
yield { yield {
type: 'attachment_delta', type: 'attachment_delta',
attachments: [{ attachments: [
type: 'image', {
url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`, type: 'image',
mime: chunk.file.mediaType ?? 'image/png', url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
}], mime: chunk.file.mediaType ?? 'image/png',
},
],
} }
} }
} }