import { generateText, ImagePart, LanguageModelUsage, ModelMessage, stepCountIs, streamText, ToolSet, UserModelMessage, } from 'ai' import { AgentInput, AgentParams, AgentSkill, allActions, MCPConnection, Schedule, } from './types' import { ModelInput, hasInputModality } from './types/model' import { system, schedule, user, subagentSystem } from './prompts' import { AuthFetcher } from './index' import { createModel } from './model' import { AgentAction } from './types/action' import { extractAttachmentsFromText, stripAttachmentsFromMessages, dedupeAttachments, AttachmentsStreamExtractor, } from './utils/attachments' import type { ContainerFileAttachment, ImageAttachment, } from './types/attachment' import { getMCPTools } from './tools/mcp' import { getTools } from './tools' import { buildIdentityHeaders } from './utils/headers' export const createAgent = ( { model: modelConfig, activeContextTime = 24 * 60, language = 'Same as the user input', allowedActions = allActions, channels = [], skills = [], mcpConnections = [], currentChannel = 'Unknown Channel', identity = { botId: '', containerId: '', channelIdentityId: '', displayName: '', }, auth, }: AgentParams, fetch: AuthFetcher, ) => { const model = createModel(modelConfig) const enabledSkills: AgentSkill[] = [] const enableSkill = (skill: string) => { const agentSkill = skills.find((s) => s.name === skill) if (agentSkill) { enabledSkills.push(agentSkill) } } const getEnabledSkills = () => { return enabledSkills.map((skill) => skill.name) } const loadSystemFiles = async () => { if (!auth?.bearer || !identity.botId) { return { identityContent: '', soulContent: '', toolsContent: '', } } const readViaMCP = async (path: string): Promise => { const url = `${auth.baseUrl.replace(/\/$/, '')}/bots/${identity.botId}/tools` const headers: Record = { 'Content-Type': 'application/json', Accept: 'application/json, text/event-stream', Authorization: `Bearer ${auth.bearer}`, } if (identity.channelIdentityId) { headers['X-Memoh-Channel-Identity-Id'] = identity.channelIdentityId } const body = JSON.stringify({ jsonrpc: '2.0', id: `read-${path}`, method: 'tools/call', params: { name: 'read', arguments: { path } }, }) const response = await fetch(url, { method: 'POST', headers, body }) if (!response.ok) return '' const data = await response.json().catch(() => ({})) const structured = data?.result?.structuredContent ?? data?.result?.content?.[0]?.text if (typeof structured === 'string') { try { const parsed = JSON.parse(structured) return typeof parsed?.content === 'string' ? parsed.content : '' } catch { return structured } } if (typeof structured === 'object' && structured?.content) { return typeof structured.content === 'string' ? structured.content : '' } return '' } const [identityContent, soulContent, toolsContent] = await Promise.all([ readViaMCP('IDENTITY.md'), readViaMCP('SOUL.md'), readViaMCP('TOOLS.md'), ]) return { identityContent, soulContent, toolsContent, } } const prepareInputWithMCPImageBase64 = async ( input: AgentInput, ): Promise => { if (!auth?.bearer || !identity.botId) { return input } const url = `${auth.baseUrl.replace(/\/$/, '')}/bots/${identity.botId}/tools` const headers: Record = { 'Content-Type': 'application/json', Accept: 'application/json, text/event-stream', Authorization: `Bearer ${auth.bearer}`, } if (identity.channelIdentityId) { headers['X-Memoh-Channel-Identity-Id'] = identity.channelIdentityId } if (identity.sessionToken) { headers['X-Memoh-Session-Token'] = identity.sessionToken } if (identity.currentPlatform) { headers['X-Memoh-Current-Platform'] = identity.currentPlatform } if (identity.replyTarget) { headers['X-Memoh-Reply-Target'] = identity.replyTarget } const attachments = await Promise.all( input.attachments.map(async (attachment) => { if (attachment.type !== 'image') { return attachment } const image = attachment as ImageAttachment if (typeof image.base64 === 'string' && image.base64.trim() !== '') { return image } const path = String(image.path ?? '').trim() if (!path) { return image } const quotedPath = `'${path.replace(/'/g, '\'\\\'\'')}'` const command = `base64 ${quotedPath} | tr -d '\\n'` const body = JSON.stringify({ jsonrpc: '2.0', id: `read-image-${quotedPath}`, method: 'tools/call', params: { name: 'exec', arguments: { command }, }, }) try { const response = await fetch(url, { method: 'POST', headers, body }) if (!response.ok) { return image } const payload = await response.json().catch(() => ({})) const structured = payload?.result?.structuredContent const execResult = ( structured && typeof structured === 'object' ? structured : null ) as { stdout?: unknown; exit_code?: unknown } | null const exitCode = Number(execResult?.exit_code ?? 1) const stdout = typeof execResult?.stdout === 'string' ? execResult.stdout.trim() : '' if (exitCode !== 0 || stdout === '') { return image } const mime = String(image.mime ?? '').trim() || 'image/png' return { ...image, base64: `data:${mime};base64,${stdout}`, } } catch { return image } }), ) return { ...input, attachments } } const generateSystemPrompt = async () => { const { identityContent, soulContent, toolsContent } = await loadSystemFiles() return system({ date: new Date(), language, maxContextLoadTime: activeContextTime, channels, currentChannel, skills, enabledSkills, identityContent, soulContent, toolsContent, }) } const getAgentTools = async () => { const baseUrl = auth.baseUrl.replace(/\/$/, '') const botId = identity.botId.trim() if (!baseUrl || !botId) { return { tools: {}, close: async () => {}, } } const headers = buildIdentityHeaders(identity, auth) const builtins: MCPConnection[] = [ { type: 'http', name: 'builtin', url: `${baseUrl}/bots/${botId}/tools`, headers, }, ] const { tools: mcpTools, close: closeMCP } = await getMCPTools( [...builtins, ...mcpConnections], { auth, fetch, botId, }, ) const tools = getTools(allowedActions, { fetch, model: modelConfig, identity, auth, enableSkill, }) return { tools: { ...mcpTools, ...tools } as ToolSet, close: closeMCP, } } const generateUserPrompt = (input: AgentInput) => { const supportsImage = hasInputModality(modelConfig, ModelInput.Image) // Separate attachments by model capability: native images vs fallback file paths. const nativeImages = supportsImage ? input.attachments.filter((a) => a.type === 'image') : [] const fallbackFiles = input.attachments.filter( (a): a is ContainerFileAttachment => a.type === 'file', ) // Images the model cannot handle natively are mentioned as path references. const unsupportedImages: ContainerFileAttachment[] = supportsImage ? [] : input.attachments .filter((a) => a.type === 'image') .map((a) => ({ type: 'file' as const, path: String( (a as ImageAttachment).path || a.metadata?.path || '[image]', ), metadata: a.metadata, })) const allFiles: ContainerFileAttachment[] = [ ...fallbackFiles, ...unsupportedImages, ] const text = user(input.query, { channelIdentityId: identity.channelIdentityId || identity.contactId || '', displayName: identity.displayName || identity.contactName || 'User', channel: currentChannel, conversationType: identity.conversationType || 'direct', date: new Date(), attachments: allFiles, }) const imageParts: ImagePart[] = nativeImages .map((image) => { const img = image as ImageAttachment if (img.base64) { return { type: 'image', image: img.base64 } as ImagePart } if (img.url) { return { type: 'image', image: img.url } as ImagePart } return { type: 'image', image: '' } as ImagePart }) .filter((p) => p.image !== '') const userMessage: UserModelMessage = { role: 'user', content: [{ type: 'text', text }, ...imageParts], } return userMessage } const ask = async (input: AgentInput) => { const preparedInput = await prepareInputWithMCPImageBase64(input) const userPrompt = generateUserPrompt(preparedInput) const messages = [...preparedInput.messages, userPrompt] preparedInput.skills.forEach((skill) => enableSkill(skill)) const systemPrompt = await generateSystemPrompt() const { tools, close } = await getAgentTools() const { response, reasoning, text, usage } = await generateText({ model, messages, system: systemPrompt, stopWhen: stepCountIs(Infinity), prepareStep: () => { return { system: systemPrompt, } }, onFinish: async () => { await close() }, tools, }) const { cleanedText, attachments: textAttachments } = extractAttachmentsFromText(text) const { messages: strippedMessages, attachments: messageAttachments } = stripAttachmentsFromMessages(response.messages) const allAttachments = dedupeAttachments([ ...textAttachments, ...messageAttachments, ]) return { messages: strippedMessages, reasoning: reasoning.map((part) => part.text), usage, text: cleanedText, attachments: allAttachments, skills: getEnabledSkills(), } } const askAsSubagent = async (params: { input: string; name: string; description: string; messages: ModelMessage[]; }) => { const userPrompt: UserModelMessage = { role: 'user', content: [{ type: 'text', text: params.input }], } const generateSubagentSystemPrompt = () => { return subagentSystem({ date: new Date(), name: params.name, description: params.description, }) } const messages = [...params.messages, userPrompt] const { tools, close } = await getAgentTools() const { response, reasoning, text, usage } = await generateText({ model, messages, system: generateSubagentSystemPrompt(), stopWhen: stepCountIs(Infinity), prepareStep: () => { return { system: generateSubagentSystemPrompt(), } }, onFinish: async () => { await close() }, tools, }) return { messages: [userPrompt, ...response.messages], reasoning: reasoning.map((part) => part.text), usage, text, skills: getEnabledSkills(), } } const triggerSchedule = async (params: { schedule: Schedule; messages: ModelMessage[]; skills: string[]; }) => { const scheduleMessage: UserModelMessage = { role: 'user', content: [ { type: 'text', text: schedule({ schedule: params.schedule, date: new Date() }), }, ], } const messages = [...params.messages, scheduleMessage] params.skills.forEach((skill) => enableSkill(skill)) const { tools, close } = await getAgentTools() const { response, reasoning, text, usage } = await generateText({ model, messages, system: await generateSystemPrompt(), stopWhen: stepCountIs(Infinity), onFinish: async () => { await close() }, tools, }) return { messages: [scheduleMessage, ...response.messages], reasoning: reasoning.map((part) => part.text), usage, text, skills: getEnabledSkills(), } } const resolveStreamErrorMessage = (raw: unknown): string => { if (raw instanceof Error && raw.message.trim()) { return raw.message } if (typeof raw === 'string' && raw.trim()) { return raw } if (raw && typeof raw === 'object') { const candidate = raw as { message?: unknown; error?: unknown } if (typeof candidate.message === 'string' && candidate.message.trim()) { return candidate.message } if (typeof candidate.error === 'string' && candidate.error.trim()) { return candidate.error } if (candidate.error instanceof Error && candidate.error.message.trim()) { return candidate.error.message } } return 'Model stream failed' } async function* stream(input: AgentInput): AsyncGenerator { const preparedInput = await prepareInputWithMCPImageBase64(input) const userPrompt = generateUserPrompt(preparedInput) const messages = [...preparedInput.messages, userPrompt] preparedInput.skills.forEach((skill) => enableSkill(skill)) const systemPrompt = await generateSystemPrompt() const attachmentsExtractor = new AttachmentsStreamExtractor() const result: { messages: ModelMessage[]; reasoning: string[]; usage: LanguageModelUsage | null; } = { messages: [], reasoning: [], usage: null, } const { tools, close } = await getAgentTools() const { fullStream } = streamText({ model, messages, system: systemPrompt, stopWhen: stepCountIs(Infinity), prepareStep: () => { return { system: systemPrompt, } }, tools, onFinish: async ({ usage, reasoning, response }) => { await close() result.usage = usage as never result.reasoning = reasoning.map((part) => part.text) result.messages = response.messages }, }) yield { type: 'agent_start', input, } for await (const chunk of fullStream) { if (chunk.type === 'error') { throw new Error( resolveStreamErrorMessage((chunk as { error?: unknown }).error), ) } switch (chunk.type) { case 'reasoning-start': yield { type: 'reasoning_start', metadata: chunk, } break case 'reasoning-delta': yield { type: 'reasoning_delta', delta: chunk.text, } break case 'reasoning-end': yield { type: 'reasoning_end', metadata: chunk, } break case 'text-start': yield { type: 'text_start', } break case 'text-delta': { const { visibleText, attachments } = attachmentsExtractor.push( chunk.text, ) if (visibleText) { yield { type: 'text_delta', delta: visibleText, } } if (attachments.length) { yield { type: 'attachment_delta', attachments, } } break } case 'text-end': { // Flush any remaining buffered content before ending the text stream. const remainder = attachmentsExtractor.flushRemainder() if (remainder.visibleText) { yield { type: 'text_delta', delta: remainder.visibleText, } } if (remainder.attachments.length) { yield { type: 'attachment_delta', attachments: remainder.attachments, } } yield { type: 'text_end', metadata: chunk, } break } case 'tool-call': yield { type: 'tool_call_start', toolName: chunk.toolName, toolCallId: chunk.toolCallId, input: chunk.input, metadata: chunk, } break case 'tool-result': yield { type: 'tool_call_end', toolName: chunk.toolName, toolCallId: chunk.toolCallId, input: chunk.input, result: chunk.output, metadata: chunk, } break case 'file': yield { type: 'attachment_delta', attachments: [ { type: 'image', url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`, mime: chunk.file.mediaType ?? 'image/png', }, ], } } } const { messages: strippedMessages } = stripAttachmentsFromMessages( result.messages, ) yield { type: 'agent_end', messages: strippedMessages, reasoning: result.reasoning, usage: result.usage!, skills: getEnabledSkills(), } } return { stream, ask, askAsSubagent, triggerSchedule, } }