fix(agent): preload image base64 via MCP before model input

2026-04-27 07:16:19 +09:00 · 2026-02-17 21:50:51 +08:00
parent daed9d2d95
commit 05905a33da
1 changed files with 131 additions and 42 deletions
@@ -27,8 +27,10 @@ import {
  dedupeAttachments,
  AttachmentsStreamExtractor,
 } from './utils/attachments'
-import type { ContainerFileAttachment, ImageAttachment } from './types/attachment'
-import { readFileSync } from 'fs'
+import type {
+  ContainerFileAttachment,
+  ImageAttachment,
+} from './types/attachment'
 import { getMCPTools } from './tools/mcp'
 import { getTools } from './tools'
 import { buildIdentityHeaders } from './utils/headers'
@@ -121,6 +123,85 @@ export const createAgent = (
    }
  }

+  const prepareInputWithMCPImageBase64 = async (
+    input: AgentInput,
+  ): Promise<AgentInput> => {
+    if (!auth?.bearer || !identity.botId) {
+      return input
+    }
+    const url = `${auth.baseUrl.replace(/\/$/, '')}/bots/${identity.botId}/tools`
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+      Accept: 'application/json, text/event-stream',
+      Authorization: `Bearer ${auth.bearer}`,
+    }
+    if (identity.channelIdentityId) {
+      headers['X-Memoh-Channel-Identity-Id'] = identity.channelIdentityId
+    }
+    if (identity.sessionToken) {
+      headers['X-Memoh-Session-Token'] = identity.sessionToken
+    }
+    if (identity.currentPlatform) {
+      headers['X-Memoh-Current-Platform'] = identity.currentPlatform
+    }
+    if (identity.replyTarget) {
+      headers['X-Memoh-Reply-Target'] = identity.replyTarget
+    }
+    const attachments = await Promise.all(
+      input.attachments.map(async (attachment) => {
+        if (attachment.type !== 'image') {
+          return attachment
+        }
+        const image = attachment as ImageAttachment
+        if (typeof image.base64 === 'string' && image.base64.trim() !== '') {
+          return image
+        }
+        const path = String(image.path ?? '').trim()
+        if (!path) {
+          return image
+        }
+        const quotedPath = `'${path.replace(/'/g, '\'\\\'\'')}'`
+        const command = `base64 ${quotedPath} | tr -d '\\n'`
+        const body = JSON.stringify({
+          jsonrpc: '2.0',
+          id: `read-image-${quotedPath}`,
+          method: 'tools/call',
+          params: {
+            name: 'exec',
+            arguments: { command },
+          },
+        })
+        try {
+          const response = await fetch(url, { method: 'POST', headers, body })
+          if (!response.ok) {
+            return image
+          }
+          const payload = await response.json().catch(() => ({}))
+          const structured = payload?.result?.structuredContent
+          const execResult = (
+            structured && typeof structured === 'object' ? structured : null
+          ) as { stdout?: unknown; exit_code?: unknown } | null
+          const exitCode = Number(execResult?.exit_code ?? 1)
+          const stdout =
+            typeof execResult?.stdout === 'string'
+              ? execResult.stdout.trim()
+              : ''
+          if (exitCode !== 0 || stdout === '') {
+            return image
+          }
+          const mime = String(image.mime ?? '').trim() || 'image/png'
+          return {
+            ...image,
+            base64: `data:${mime};base64,${stdout}`,
+          }
+        } catch {
+          return image
+        }
+      }),
+    )
+    return { ...input, attachments }
+  }
+
  const generateSystemPrompt = async () => {
    const { identityContent, soulContent, toolsContent } =
      await loadSystemFiles()
@@ -154,14 +235,23 @@ export const createAgent = (
        name: 'builtin',
        url: `${baseUrl}/bots/${botId}/tools`,
        headers,
-      }
+      },
    ]
-    const { tools: mcpTools, close: closeMCP } = await getMCPTools([...builtins, ...mcpConnections], {
-      auth,
+    const { tools: mcpTools, close: closeMCP } = await getMCPTools(
+      [...builtins, ...mcpConnections],
+      {
+        auth,
+        fetch,
+        botId,
+      },
+    )
+    const tools = getTools(allowedActions, {
      fetch,
-      botId,
+      model: modelConfig,
+      identity,
+      auth,
+      enableSkill,
    })
-    const tools = getTools(allowedActions, { fetch, model: modelConfig, identity, auth, enableSkill })
    return {
      tools: { ...mcpTools, ...tools } as ToolSet,
      close: closeMCP,
@@ -185,10 +275,15 @@ export const createAgent = (
          .filter((a) => a.type === 'image')
          .map((a) => ({
            type: 'file' as const,
-            path: String((a as ImageAttachment).path || a.metadata?.path || '[image]'),
+            path: String(
+              (a as ImageAttachment).path || a.metadata?.path || '[image]',
+            ),
            metadata: a.metadata,
          }))
-    const allFiles: ContainerFileAttachment[] = [...fallbackFiles, ...unsupportedImages]
+    const allFiles: ContainerFileAttachment[] = [
+      ...fallbackFiles,
+      ...unsupportedImages,
+    ]

    const text = user(input.query, {
      channelIdentityId: identity.channelIdentityId || identity.contactId || '',
@@ -198,39 +293,30 @@ export const createAgent = (
      date: new Date(),
      attachments: allFiles,
    })
-    const imageParts: ImagePart[] = nativeImages.map((image) => {
-      const img = image as ImageAttachment
-      if (img.base64) {
-        return { type: 'image', image: img.base64 } as ImagePart
-      }
-      if (img.path) {
-        try {
-          const data = readFileSync(img.path)
-          const mime = img.mime || 'image/png'
-          return { type: 'image', image: `data:${mime};base64,${data.toString('base64')}` } as ImagePart
-        } catch {
-          return { type: 'image', image: '' } as ImagePart
+    const imageParts: ImagePart[] = nativeImages
+      .map((image) => {
+        const img = image as ImageAttachment
+        if (img.base64) {
+          return { type: 'image', image: img.base64 } as ImagePart
        }
-      }
-      if (img.url) {
-        return { type: 'image', image: img.url } as ImagePart
-      }
-      return { type: 'image', image: '' } as ImagePart
-    }).filter((p) => p.image !== '')
+        if (img.url) {
+          return { type: 'image', image: img.url } as ImagePart
+        }
+        return { type: 'image', image: '' } as ImagePart
+      })
+      .filter((p) => p.image !== '')
    const userMessage: UserModelMessage = {
      role: 'user',
-      content: [
-        { type: 'text', text },
-        ...imageParts,
-      ],
+      content: [{ type: 'text', text }, ...imageParts],
    }
    return userMessage
  }

  const ask = async (input: AgentInput) => {
-    const userPrompt = generateUserPrompt(input)
-    const messages = [...input.messages, userPrompt]
-    input.skills.forEach((skill) => enableSkill(skill))
+    const preparedInput = await prepareInputWithMCPImageBase64(input)
+    const userPrompt = generateUserPrompt(preparedInput)
+    const messages = [...preparedInput.messages, userPrompt]
+    preparedInput.skills.forEach((skill) => enableSkill(skill))
    const systemPrompt = await generateSystemPrompt()
    const { tools, close } = await getAgentTools()
    const { response, reasoning, text, usage } = await generateText({
@@ -368,9 +454,10 @@ export const createAgent = (
  }

  async function* stream(input: AgentInput): AsyncGenerator<AgentAction> {
-    const userPrompt = generateUserPrompt(input)
-    const messages = [...input.messages, userPrompt]
-    input.skills.forEach((skill) => enableSkill(skill))
+    const preparedInput = await prepareInputWithMCPImageBase64(input)
+    const userPrompt = generateUserPrompt(preparedInput)
+    const messages = [...preparedInput.messages, userPrompt]
+    preparedInput.skills.forEach((skill) => enableSkill(skill))
    const systemPrompt = await generateSystemPrompt()
    const attachmentsExtractor = new AttachmentsStreamExtractor()
    const result: {
@@ -496,11 +583,13 @@ export const createAgent = (
        case 'file':
          yield {
            type: 'attachment_delta',
-            attachments: [{
-              type: 'image',
-              url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
-              mime: chunk.file.mediaType ?? 'image/png',
-            }],
+            attachments: [
+              {
+                type: 'image',
+                url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
+                mime: chunk.file.mediaType ?? 'image/png',
+              },
+            ],
          }
      }
    }