mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
fix(agent): preload image base64 via MCP before model input
This commit is contained in:
+131
-42
@@ -27,8 +27,10 @@ import {
|
|||||||
dedupeAttachments,
|
dedupeAttachments,
|
||||||
AttachmentsStreamExtractor,
|
AttachmentsStreamExtractor,
|
||||||
} from './utils/attachments'
|
} from './utils/attachments'
|
||||||
import type { ContainerFileAttachment, ImageAttachment } from './types/attachment'
|
import type {
|
||||||
import { readFileSync } from 'fs'
|
ContainerFileAttachment,
|
||||||
|
ImageAttachment,
|
||||||
|
} from './types/attachment'
|
||||||
import { getMCPTools } from './tools/mcp'
|
import { getMCPTools } from './tools/mcp'
|
||||||
import { getTools } from './tools'
|
import { getTools } from './tools'
|
||||||
import { buildIdentityHeaders } from './utils/headers'
|
import { buildIdentityHeaders } from './utils/headers'
|
||||||
@@ -121,6 +123,85 @@ export const createAgent = (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const prepareInputWithMCPImageBase64 = async (
|
||||||
|
input: AgentInput,
|
||||||
|
): Promise<AgentInput> => {
|
||||||
|
if (!auth?.bearer || !identity.botId) {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
const url = `${auth.baseUrl.replace(/\/$/, '')}/bots/${identity.botId}/tools`
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Accept: 'application/json, text/event-stream',
|
||||||
|
Authorization: `Bearer ${auth.bearer}`,
|
||||||
|
}
|
||||||
|
if (identity.channelIdentityId) {
|
||||||
|
headers['X-Memoh-Channel-Identity-Id'] = identity.channelIdentityId
|
||||||
|
}
|
||||||
|
if (identity.sessionToken) {
|
||||||
|
headers['X-Memoh-Session-Token'] = identity.sessionToken
|
||||||
|
}
|
||||||
|
if (identity.currentPlatform) {
|
||||||
|
headers['X-Memoh-Current-Platform'] = identity.currentPlatform
|
||||||
|
}
|
||||||
|
if (identity.replyTarget) {
|
||||||
|
headers['X-Memoh-Reply-Target'] = identity.replyTarget
|
||||||
|
}
|
||||||
|
const attachments = await Promise.all(
|
||||||
|
input.attachments.map(async (attachment) => {
|
||||||
|
if (attachment.type !== 'image') {
|
||||||
|
return attachment
|
||||||
|
}
|
||||||
|
const image = attachment as ImageAttachment
|
||||||
|
if (typeof image.base64 === 'string' && image.base64.trim() !== '') {
|
||||||
|
return image
|
||||||
|
}
|
||||||
|
const path = String(image.path ?? '').trim()
|
||||||
|
if (!path) {
|
||||||
|
return image
|
||||||
|
}
|
||||||
|
const quotedPath = `'${path.replace(/'/g, '\'\\\'\'')}'`
|
||||||
|
const command = `base64 ${quotedPath} | tr -d '\\n'`
|
||||||
|
const body = JSON.stringify({
|
||||||
|
jsonrpc: '2.0',
|
||||||
|
id: `read-image-${quotedPath}`,
|
||||||
|
method: 'tools/call',
|
||||||
|
params: {
|
||||||
|
name: 'exec',
|
||||||
|
arguments: { command },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, { method: 'POST', headers, body })
|
||||||
|
if (!response.ok) {
|
||||||
|
return image
|
||||||
|
}
|
||||||
|
const payload = await response.json().catch(() => ({}))
|
||||||
|
const structured = payload?.result?.structuredContent
|
||||||
|
const execResult = (
|
||||||
|
structured && typeof structured === 'object' ? structured : null
|
||||||
|
) as { stdout?: unknown; exit_code?: unknown } | null
|
||||||
|
const exitCode = Number(execResult?.exit_code ?? 1)
|
||||||
|
const stdout =
|
||||||
|
typeof execResult?.stdout === 'string'
|
||||||
|
? execResult.stdout.trim()
|
||||||
|
: ''
|
||||||
|
if (exitCode !== 0 || stdout === '') {
|
||||||
|
return image
|
||||||
|
}
|
||||||
|
const mime = String(image.mime ?? '').trim() || 'image/png'
|
||||||
|
return {
|
||||||
|
...image,
|
||||||
|
base64: `data:${mime};base64,${stdout}`,
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
return image
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
return { ...input, attachments }
|
||||||
|
}
|
||||||
|
|
||||||
const generateSystemPrompt = async () => {
|
const generateSystemPrompt = async () => {
|
||||||
const { identityContent, soulContent, toolsContent } =
|
const { identityContent, soulContent, toolsContent } =
|
||||||
await loadSystemFiles()
|
await loadSystemFiles()
|
||||||
@@ -154,14 +235,23 @@ export const createAgent = (
|
|||||||
name: 'builtin',
|
name: 'builtin',
|
||||||
url: `${baseUrl}/bots/${botId}/tools`,
|
url: `${baseUrl}/bots/${botId}/tools`,
|
||||||
headers,
|
headers,
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
const { tools: mcpTools, close: closeMCP } = await getMCPTools([...builtins, ...mcpConnections], {
|
const { tools: mcpTools, close: closeMCP } = await getMCPTools(
|
||||||
auth,
|
[...builtins, ...mcpConnections],
|
||||||
|
{
|
||||||
|
auth,
|
||||||
|
fetch,
|
||||||
|
botId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
const tools = getTools(allowedActions, {
|
||||||
fetch,
|
fetch,
|
||||||
botId,
|
model: modelConfig,
|
||||||
|
identity,
|
||||||
|
auth,
|
||||||
|
enableSkill,
|
||||||
})
|
})
|
||||||
const tools = getTools(allowedActions, { fetch, model: modelConfig, identity, auth, enableSkill })
|
|
||||||
return {
|
return {
|
||||||
tools: { ...mcpTools, ...tools } as ToolSet,
|
tools: { ...mcpTools, ...tools } as ToolSet,
|
||||||
close: closeMCP,
|
close: closeMCP,
|
||||||
@@ -185,10 +275,15 @@ export const createAgent = (
|
|||||||
.filter((a) => a.type === 'image')
|
.filter((a) => a.type === 'image')
|
||||||
.map((a) => ({
|
.map((a) => ({
|
||||||
type: 'file' as const,
|
type: 'file' as const,
|
||||||
path: String((a as ImageAttachment).path || a.metadata?.path || '[image]'),
|
path: String(
|
||||||
|
(a as ImageAttachment).path || a.metadata?.path || '[image]',
|
||||||
|
),
|
||||||
metadata: a.metadata,
|
metadata: a.metadata,
|
||||||
}))
|
}))
|
||||||
const allFiles: ContainerFileAttachment[] = [...fallbackFiles, ...unsupportedImages]
|
const allFiles: ContainerFileAttachment[] = [
|
||||||
|
...fallbackFiles,
|
||||||
|
...unsupportedImages,
|
||||||
|
]
|
||||||
|
|
||||||
const text = user(input.query, {
|
const text = user(input.query, {
|
||||||
channelIdentityId: identity.channelIdentityId || identity.contactId || '',
|
channelIdentityId: identity.channelIdentityId || identity.contactId || '',
|
||||||
@@ -198,39 +293,30 @@ export const createAgent = (
|
|||||||
date: new Date(),
|
date: new Date(),
|
||||||
attachments: allFiles,
|
attachments: allFiles,
|
||||||
})
|
})
|
||||||
const imageParts: ImagePart[] = nativeImages.map((image) => {
|
const imageParts: ImagePart[] = nativeImages
|
||||||
const img = image as ImageAttachment
|
.map((image) => {
|
||||||
if (img.base64) {
|
const img = image as ImageAttachment
|
||||||
return { type: 'image', image: img.base64 } as ImagePart
|
if (img.base64) {
|
||||||
}
|
return { type: 'image', image: img.base64 } as ImagePart
|
||||||
if (img.path) {
|
|
||||||
try {
|
|
||||||
const data = readFileSync(img.path)
|
|
||||||
const mime = img.mime || 'image/png'
|
|
||||||
return { type: 'image', image: `data:${mime};base64,${data.toString('base64')}` } as ImagePart
|
|
||||||
} catch {
|
|
||||||
return { type: 'image', image: '' } as ImagePart
|
|
||||||
}
|
}
|
||||||
}
|
if (img.url) {
|
||||||
if (img.url) {
|
return { type: 'image', image: img.url } as ImagePart
|
||||||
return { type: 'image', image: img.url } as ImagePart
|
}
|
||||||
}
|
return { type: 'image', image: '' } as ImagePart
|
||||||
return { type: 'image', image: '' } as ImagePart
|
})
|
||||||
}).filter((p) => p.image !== '')
|
.filter((p) => p.image !== '')
|
||||||
const userMessage: UserModelMessage = {
|
const userMessage: UserModelMessage = {
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: [
|
content: [{ type: 'text', text }, ...imageParts],
|
||||||
{ type: 'text', text },
|
|
||||||
...imageParts,
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
return userMessage
|
return userMessage
|
||||||
}
|
}
|
||||||
|
|
||||||
const ask = async (input: AgentInput) => {
|
const ask = async (input: AgentInput) => {
|
||||||
const userPrompt = generateUserPrompt(input)
|
const preparedInput = await prepareInputWithMCPImageBase64(input)
|
||||||
const messages = [...input.messages, userPrompt]
|
const userPrompt = generateUserPrompt(preparedInput)
|
||||||
input.skills.forEach((skill) => enableSkill(skill))
|
const messages = [...preparedInput.messages, userPrompt]
|
||||||
|
preparedInput.skills.forEach((skill) => enableSkill(skill))
|
||||||
const systemPrompt = await generateSystemPrompt()
|
const systemPrompt = await generateSystemPrompt()
|
||||||
const { tools, close } = await getAgentTools()
|
const { tools, close } = await getAgentTools()
|
||||||
const { response, reasoning, text, usage } = await generateText({
|
const { response, reasoning, text, usage } = await generateText({
|
||||||
@@ -368,9 +454,10 @@ export const createAgent = (
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function* stream(input: AgentInput): AsyncGenerator<AgentAction> {
|
async function* stream(input: AgentInput): AsyncGenerator<AgentAction> {
|
||||||
const userPrompt = generateUserPrompt(input)
|
const preparedInput = await prepareInputWithMCPImageBase64(input)
|
||||||
const messages = [...input.messages, userPrompt]
|
const userPrompt = generateUserPrompt(preparedInput)
|
||||||
input.skills.forEach((skill) => enableSkill(skill))
|
const messages = [...preparedInput.messages, userPrompt]
|
||||||
|
preparedInput.skills.forEach((skill) => enableSkill(skill))
|
||||||
const systemPrompt = await generateSystemPrompt()
|
const systemPrompt = await generateSystemPrompt()
|
||||||
const attachmentsExtractor = new AttachmentsStreamExtractor()
|
const attachmentsExtractor = new AttachmentsStreamExtractor()
|
||||||
const result: {
|
const result: {
|
||||||
@@ -496,11 +583,13 @@ export const createAgent = (
|
|||||||
case 'file':
|
case 'file':
|
||||||
yield {
|
yield {
|
||||||
type: 'attachment_delta',
|
type: 'attachment_delta',
|
||||||
attachments: [{
|
attachments: [
|
||||||
type: 'image',
|
{
|
||||||
url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
|
type: 'image',
|
||||||
mime: chunk.file.mediaType ?? 'image/png',
|
url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
|
||||||
}],
|
mime: chunk.file.mediaType ?? 'image/png',
|
||||||
|
},
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user