feat: bot inbox (#77)

* feat: bot inbox * feat: unified header * fix: missing tool_call usage * feat: add group name in header
2026-04-27 07:16:19 +09:00 · 2026-02-22 01:27:24 +08:00
parent 2c6b5e5565
commit c591af14b0
42 changed files with 3367 additions and 260 deletions
@@ -32,6 +32,18 @@ import { getMCPTools } from './tools/mcp'
 import { getTools } from './tools'
 import { buildIdentityHeaders } from './utils/headers'

+const buildStepUsages = (
+  steps: { usage: LanguageModelUsage; response: { messages: unknown[] } }[],
+): (LanguageModelUsage | null)[] => {
+  const usages: (LanguageModelUsage | null)[] = []
+  for (const step of steps) {
+    for (let i = 0; i < step.response.messages.length; i++) {
+      usages.push(i === 0 ? step.usage : null)
+    }
+  }
+  return usages
+}
+
 export const buildNativeImageParts = (attachments: GatewayInputAttachment[]): ImagePart[] => {
  return attachments
    .filter((attachment) =>
@@ -59,6 +71,7 @@ export const createAgent = (
      displayName: '',
    },
    auth,
+    inbox = [],
  }: AgentParams,
  fetch: AuthFetcher,
 ) => {
@@ -144,6 +157,7 @@ export const createAgent = (
      identityContent,
      soulContent,
      toolsContent,
+      inbox,
    })
  }

@@ -203,7 +217,7 @@ export const createAgent = (
    input.skills.forEach((skill) => enableSkill(skill))
    const systemPrompt = await generateSystemPrompt()
    const { tools, close } = await getAgentTools()
-    const { response, reasoning, text, usage } = await generateText({
+    const { response, reasoning, text, usage, steps } = await generateText({
      model,
      messages,
      system: systemPrompt,
@@ -218,6 +232,7 @@ export const createAgent = (
      },
      tools,
    })
+    const stepUsages = buildStepUsages(steps)
    const { cleanedText, attachments: textAttachments } =
      extractAttachmentsFromText(text)
    const { messages: strippedMessages, attachments: messageAttachments } =
@@ -231,6 +246,7 @@ export const createAgent = (
        userPrompt,
        ...strippedMessages,
      ],
+      usages: [null, ...stepUsages] as (LanguageModelUsage | null)[],
      reasoning: reasoning.map((part) => part.text),
      usage,
      text: cleanedText,
@@ -258,7 +274,7 @@ export const createAgent = (
    }
    const messages = [...params.messages, userPrompt]
    const { tools, close } = await getAgentTools()
-    const { response, reasoning, text, usage } = await generateText({
+    const { response, reasoning, text, usage, steps } = await generateText({
      model,
      messages,
      system: generateSubagentSystemPrompt(),
@@ -273,8 +289,10 @@ export const createAgent = (
      },
      tools,
    })
+    const stepUsages = buildStepUsages(steps)
    return {
      messages: [userPrompt, ...response.messages],
+      usages: [null, ...stepUsages] as (LanguageModelUsage | null)[],
      reasoning: reasoning.map((part) => part.text),
      usage,
      text,
@@ -299,7 +317,7 @@ export const createAgent = (
    const messages = [...params.messages, scheduleMessage]
    params.skills.forEach((skill) => enableSkill(skill))
    const { tools, close } = await getAgentTools()
-    const { response, reasoning, text, usage } = await generateText({
+    const { response, reasoning, text, usage, steps } = await generateText({
      model,
      messages,
      system: await generateSystemPrompt(),
@@ -309,8 +327,10 @@ export const createAgent = (
      },
      tools,
    })
+    const stepUsages = buildStepUsages(steps)
    return {
      messages: [scheduleMessage, ...response.messages],
+      usages: [null, ...stepUsages] as (LanguageModelUsage | null)[],
      reasoning: reasoning.map((part) => part.text),
      usage,
      text,
@@ -350,148 +370,157 @@ export const createAgent = (
      messages: ModelMessage[];
      reasoning: string[];
      usage: LanguageModelUsage | null;
+      usages: (LanguageModelUsage | null)[];
    } = {
      messages: [],
      reasoning: [],
      usage: null,
+      usages: [],
    }
    const { tools, close } = await getAgentTools()
-    const { fullStream } = streamText({
-      model,
-      messages,
-      system: systemPrompt,
-      stopWhen: stepCountIs(Infinity),
-      prepareStep: () => {
-        return {
-          system: systemPrompt,
-        }
-      },
-      tools,
-      onFinish: async ({ usage, reasoning, response }) => {
-        await close()
-        result.usage = usage as never
-        result.reasoning = reasoning.map((part) => part.text)
-        result.messages = response.messages
-      },
-    })
-    yield {
-      type: 'agent_start',
-      input,
-    }
-    for await (const chunk of fullStream) {
-      if (chunk.type === 'error') {
-        throw new Error(
-          resolveStreamErrorMessage((chunk as { error?: unknown }).error),
-        )
+    try {
+      const { fullStream } = streamText({
+        model,
+        messages,
+        system: systemPrompt,
+        stopWhen: stepCountIs(Infinity),
+        prepareStep: () => {
+          return {
+            system: systemPrompt,
+          }
+        },
+        tools,
+        onFinish: async ({ usage, reasoning, response, steps }) => {
+          await close()
+          result.usage = usage as never
+          result.reasoning = reasoning.map((part) => part.text)
+          result.messages = response.messages
+          result.usages = buildStepUsages(steps)
+        },
+      })
+      yield {
+        type: 'agent_start',
+        input,
      }
-      switch (chunk.type) {
-        case 'reasoning-start':
-          yield {
-            type: 'reasoning_start',
-            metadata: chunk,
-          }
-          break
-        case 'reasoning-delta':
-          yield {
-            type: 'reasoning_delta',
-            delta: chunk.text,
-          }
-          break
-        case 'reasoning-end':
-          yield {
-            type: 'reasoning_end',
-            metadata: chunk,
-          }
-          break
-        case 'text-start':
-          yield {
-            type: 'text_start',
-          }
-          break
-        case 'text-delta': {
-          const { visibleText, attachments } = attachmentsExtractor.push(
-            chunk.text,
+      for await (const chunk of fullStream) {
+        if (chunk.type === 'error') {
+          throw new Error(
+            resolveStreamErrorMessage((chunk as { error?: unknown }).error),
          )
-          if (visibleText) {
+        }
+        switch (chunk.type) {
+          case 'reasoning-start':
            yield {
-              type: 'text_delta',
-              delta: visibleText,
+              type: 'reasoning_start',
+              metadata: chunk,
            }
+            break
+          case 'reasoning-delta':
+            yield {
+              type: 'reasoning_delta',
+              delta: chunk.text,
+            }
+            break
+          case 'reasoning-end':
+            yield {
+              type: 'reasoning_end',
+              metadata: chunk,
+            }
+            break
+          case 'text-start':
+            yield {
+              type: 'text_start',
+            }
+            break
+          case 'text-delta': {
+            const { visibleText, attachments } = attachmentsExtractor.push(
+              chunk.text,
+            )
+            if (visibleText) {
+              yield {
+                type: 'text_delta',
+                delta: visibleText,
+              }
+            }
+            if (attachments.length) {
+              yield {
+                type: 'attachment_delta',
+                attachments,
+              }
+            }
+            break
          }
-          if (attachments.length) {
+          case 'text-end': {
+            // Flush any remaining buffered content before ending the text stream.
+            const remainder = attachmentsExtractor.flushRemainder()
+            if (remainder.visibleText) {
+              yield {
+                type: 'text_delta',
+                delta: remainder.visibleText,
+              }
+            }
+            if (remainder.attachments.length) {
+              yield {
+                type: 'attachment_delta',
+                attachments: remainder.attachments,
+              }
+            }
+            yield {
+              type: 'text_end',
+              metadata: chunk,
+            }
+            break
+          }
+          case 'tool-call':
+            yield {
+              type: 'tool_call_start',
+              toolName: chunk.toolName,
+              toolCallId: chunk.toolCallId,
+              input: chunk.input,
+              metadata: chunk,
+            }
+            break
+          case 'tool-result':
+            yield {
+              type: 'tool_call_end',
+              toolName: chunk.toolName,
+              toolCallId: chunk.toolCallId,
+              input: chunk.input,
+              result: chunk.output,
+              metadata: chunk,
+            }
+            break
+          case 'file':
            yield {
              type: 'attachment_delta',
-              attachments,
+              attachments: [
+                {
+                  type: 'image',
+                  url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
+                  mime: chunk.file.mediaType ?? 'image/png',
+                },
+              ],
            }
-          }
-          break
        }
-        case 'text-end': {
-          // Flush any remaining buffered content before ending the text stream.
-          const remainder = attachmentsExtractor.flushRemainder()
-          if (remainder.visibleText) {
-            yield {
-              type: 'text_delta',
-              delta: remainder.visibleText,
-            }
-          }
-          if (remainder.attachments.length) {
-            yield {
-              type: 'attachment_delta',
-              attachments: remainder.attachments,
-            }
-          }
-          yield {
-            type: 'text_end',
-            metadata: chunk,
-          }
-          break
-        }
-        case 'tool-call':
-          yield {
-            type: 'tool_call_start',
-            toolName: chunk.toolName,
-            toolCallId: chunk.toolCallId,
-            input: chunk.input,
-            metadata: chunk,
-          }
-          break
-        case 'tool-result':
-          yield {
-            type: 'tool_call_end',
-            toolName: chunk.toolName,
-            toolCallId: chunk.toolCallId,
-            input: chunk.input,
-            result: chunk.output,
-            metadata: chunk,
-          }
-          break
-        case 'file':
-          yield {
-            type: 'attachment_delta',
-            attachments: [
-              {
-                type: 'image',
-                url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
-                mime: chunk.file.mediaType ?? 'image/png',
-              },
-            ],
-          }
      }
-    }
-
-    const { messages: strippedMessages } = stripAttachmentsFromMessages(
-      result.messages,
-    )
-    yield {
-      type: 'agent_end',
-      messages: [
-        userPrompt,
-        ...strippedMessages,
-      ],
-      reasoning: result.reasoning,
-      usage: result.usage!,
-      skills: getEnabledSkills(),
+  
+      const { messages: strippedMessages } = stripAttachmentsFromMessages(
+        result.messages,
+      )
+      yield {
+        type: 'agent_end',
+        messages: [
+          userPrompt,
+          ...strippedMessages,
+        ],
+        usages: [null, ...result.usages],
+        reasoning: result.reasoning,
+        usage: result.usage!,
+        skills: getEnabledSkills(),
+      }
+    } catch (error) {
+      console.error(error)
+      throw error
    }
  }

@@ -75,4 +75,11 @@ export const StdioMCPConnectionModel = z.object({
  cwd: z.string().optional(),
 })

-export const MCPConnectionModel = z.union([HTTPMCPConnectionModel, SSEMCPConnectionModel, StdioMCPConnectionModel])
+export const MCPConnectionModel = z.union([HTTPMCPConnectionModel, SSEMCPConnectionModel, StdioMCPConnectionModel])
+
+export const InboxItemModel = z.object({
+  id: z.string(),
+  source: z.string(),
+  content: z.record(z.string(), z.unknown()).default({}),
+  createdAt: z.string(),
+})
@@ -4,7 +4,7 @@ import { createAgent } from '../agent'
 import { createAuthFetcher, getBaseUrl } from '../index'
 import { ModelConfig } from '../types'
 import { bearerMiddleware } from '../middlewares/bearer'
-import { AgentSkillModel, AllowedActionModel, AttachmentModel, IdentityContextModel, MCPConnectionModel, ModelConfigModel, ScheduleModel } from '../models'
+import { AgentSkillModel, AllowedActionModel, AttachmentModel, IdentityContextModel, InboxItemModel, MCPConnectionModel, ModelConfigModel, ScheduleModel } from '../models'
 import { allActions } from '../types'
 import { sseChunked } from '../utils/sse'

@@ -20,6 +20,7 @@ const AgentModel = z.object({
  identity: IdentityContextModel,
  attachments: z.array(AttachmentModel).optional().default([]),
  mcpConnections: z.array(MCPConnectionModel).optional().default([]),
+  inbox: z.array(InboxItemModel).optional().default([]),
 })

 export const chatModule = new Elysia({ prefix: '/chat' })
@@ -40,6 +41,7 @@ export const chatModule = new Elysia({ prefix: '/chat' })
      },
      skills: body.usableSkills,
      mcpConnections: body.mcpConnections,
+      inbox: body.inbox,
    }, authFetcher)
    return ask({
      query: body.query,
@@ -69,6 +71,7 @@ export const chatModule = new Elysia({ prefix: '/chat' })
        },
        skills: body.usableSkills,
        mcpConnections: body.mcpConnections,
+        inbox: body.inbox,
      }, authFetcher)
      for await (const action of stream({
        query: body.query,
@@ -108,6 +111,7 @@ export const chatModule = new Elysia({ prefix: '/chat' })
      },
      skills: body.usableSkills,
      mcpConnections: body.mcpConnections,
+      inbox: body.inbox,
    }, authFetcher)
    return triggerSchedule({
      schedule: body.schedule,
@@ -1,5 +1,5 @@
 import { block, quote } from './utils'
-import { AgentSkill } from '../types'
+import { AgentSkill, InboxItem } from '../types'

 export interface SystemParams {
  date: Date
@@ -14,6 +14,7 @@ export interface SystemParams {
  soulContent?: string
  toolsContent?: string
  attachments?: string[]
+  inbox?: InboxItem[]
 }

 export const skillPrompt = (skill: AgentSkill) => {
@@ -25,6 +26,21 @@ ${skill.content}
  `.trim()
 }

+const formatInbox = (items: InboxItem[]): string => {
+  if (!items || items.length === 0) return ''
+  return `
+## Inbox
+
+You have ${items.length} unread message(s) in your inbox. These are messages from group conversations where you were not directly mentioned, or notifications from external sources. Review them to stay informed about ongoing discussions.
+
+<inbox>
+${JSON.stringify(items)}
+</inbox>
+
+Use ${quote('search_inbox')} to find older messages by keyword.
+`.trim()
+}
+
 export const system = ({
  date,
  language,
@@ -36,6 +52,7 @@ export const system = ({
  identityContent,
  soulContent,
  toolsContent,
+  inbox = [],
 }: SystemParams) => {
  // ── Static section (stable prefix for LLM prompt caching) ──────────
  const staticHeaders = {
@@ -120,6 +137,8 @@ ${toolsContent}

 ${enabledSkills.map(skill => skillPrompt(skill)).join('\n\n---\n\n')}

+${formatInbox(inbox)}
+
 ## Session Context

 ---
@@ -64,6 +64,7 @@ export interface AgentEndAction extends BaseAction {
  skills: string[]
  reasoning: string[]
  usage: LanguageModelUsage
+  usages: (LanguageModelUsage | null)[]
 }

 export type AgentAction = 
@@ -30,6 +30,13 @@ export enum AgentAction {

 export const allActions = Object.values(AgentAction)

+export interface InboxItem {
+  id: string
+  source: string
+  content: Record<string, unknown>
+  createdAt: string
+}
+
 export interface AgentParams {
  model: ModelConfig
  language?: string
@@ -41,6 +48,7 @@ export interface AgentParams {
  identity?: IdentityContext
  auth: AgentAuthContext
  skills?: AgentSkill[]
+  inbox?: InboxItem[]
 }

 export interface AgentInput {