feat: add media asset system, channel lifecycle refactor, and chat attachments (#54)

This commit is contained in:
BBQ
2026-02-17 19:06:46 +08:00
committed by GitHub
parent 0bdc31311c
commit df7876a30c
106 changed files with 7942 additions and 1274 deletions
+49 -12
View File
@@ -16,6 +16,7 @@ import {
MCPConnection,
Schedule,
} from './types'
import { ModelInput, hasInputModality } from './types/model'
import { system, schedule, user, subagentSystem } from './prompts'
import { AuthFetcher } from './index'
import { createModel } from './model'
@@ -26,7 +27,8 @@ import {
dedupeAttachments,
AttachmentsStreamExtractor,
} from './utils/attachments'
import type { ContainerFileAttachment } from './types/attachment'
import type { ContainerFileAttachment, ImageAttachment } from './types/attachment'
import { readFileSync } from 'fs'
import { getMCPTools } from './tools/mcp'
import { getTools } from './tools'
import { buildIdentityHeaders } from './utils/headers'
@@ -167,27 +169,59 @@ export const createAgent = (
}
const generateUserPrompt = (input: AgentInput) => {
const images = input.attachments.filter(
(attachment) => attachment.type === 'image',
)
const files = input.attachments.filter(
const supportsImage = hasInputModality(modelConfig, ModelInput.Image)
// Separate attachments by model capability: native images vs fallback file paths.
const nativeImages = supportsImage
? input.attachments.filter((a) => a.type === 'image')
: []
const fallbackFiles = input.attachments.filter(
(a): a is ContainerFileAttachment => a.type === 'file',
)
// Images the model cannot handle natively are mentioned as path references.
const unsupportedImages: ContainerFileAttachment[] = supportsImage
? []
: input.attachments
.filter((a) => a.type === 'image')
.map((a) => ({
type: 'file' as const,
path: String((a as ImageAttachment).path || a.metadata?.path || '[image]'),
metadata: a.metadata,
}))
const allFiles: ContainerFileAttachment[] = [...fallbackFiles, ...unsupportedImages]
const text = user(input.query, {
channelIdentityId: identity.channelIdentityId || identity.contactId || '',
displayName: identity.displayName || identity.contactName || 'User',
channel: currentChannel,
conversationType: identity.conversationType || 'direct',
date: new Date(),
attachments: files,
attachments: allFiles,
})
const imageParts: ImagePart[] = nativeImages.map((image) => {
const img = image as ImageAttachment
if (img.base64) {
return { type: 'image', image: img.base64 } as ImagePart
}
if (img.path) {
try {
const data = readFileSync(img.path)
const mime = img.mime || 'image/png'
return { type: 'image', image: `data:${mime};base64,${data.toString('base64')}` } as ImagePart
} catch {
return { type: 'image', image: '' } as ImagePart
}
}
if (img.url) {
return { type: 'image', image: img.url } as ImagePart
}
return { type: 'image', image: '' } as ImagePart
}).filter((p) => p.image !== '')
const userMessage: UserModelMessage = {
role: 'user',
content: [
{ type: 'text', text },
...images.map(
(image) => ({ type: 'image', image: image.base64 }) as ImagePart,
),
...imageParts,
],
}
return userMessage
@@ -461,9 +495,12 @@ export const createAgent = (
break
case 'file':
yield {
type: 'image_delta',
image: chunk.file.base64,
metadata: chunk,
type: 'attachment_delta',
attachments: [{
type: 'image',
url: `data:${chunk.file.mediaType ?? 'image/png'};base64,${chunk.file.base64}`,
mime: chunk.file.mediaType ?? 'image/png',
}],
}
}
}
+5 -2
View File
@@ -16,7 +16,7 @@ export const ClientTypeModel = z.enum([
export const ModelConfigModel = z.object({
modelId: z.string().min(1, 'Model ID is required'),
clientType: ClientTypeModel,
input: z.array(z.enum(['text', 'image'])),
input: z.array(z.enum(['text', 'image', 'audio', 'video', 'file'])),
apiKey: z.string().min(1, 'API key is required'),
baseUrl: z.string(),
})
@@ -49,7 +49,10 @@ export const ScheduleModel = z.object({
export const ImageAttachmentModel = z.object({
type: z.literal('image'),
base64: z.string().min(1, 'Image base64 is required'),
base64: z.string().optional(),
path: z.string().optional(),
mime: z.string().optional(),
name: z.string().optional(),
metadata: z.record(z.string(), z.any()).optional(),
})
-6
View File
@@ -39,11 +39,6 @@ export interface AttachmentDeltaAction extends BaseAction {
attachments: AgentAttachment[]
}
export interface ImageDeltaAction extends BaseAction {
type: 'image_delta'
image: string
}
export interface TextEndAction extends BaseAction {
type: 'text_end'
}
@@ -79,7 +74,6 @@ export type AgentAction =
| TextStartAction
| TextDeltaAction
| AttachmentDeltaAction
| ImageDeltaAction
| TextEndAction
| ToolCallStartAction
| ToolCallEndAction
+7 -1
View File
@@ -1,11 +1,17 @@
export interface BaseAgentAttachment {
type: string
url?: string
name?: string
mime?: string
asset_id?: string
metadata?: Record<string, unknown>
}
export interface ImageAttachment extends BaseAgentAttachment {
type: 'image'
base64: string
base64?: string
url?: string
path?: string
}
export interface ContainerFileAttachment extends BaseAgentAttachment {
+7 -1
View File
@@ -14,6 +14,9 @@ export enum ClientType {
export enum ModelInput {
Text = 'text',
Image = 'image',
Audio = 'audio',
Video = 'video',
File = 'file',
}
export interface ModelConfig {
@@ -22,4 +25,7 @@ export interface ModelConfig {
modelId: string
clientType: ClientType
input: ModelInput[]
}
}
export const hasInputModality = (config: ModelConfig, modality: ModelInput): boolean =>
config.input.includes(modality)
+1 -1
View File
@@ -10,7 +10,7 @@ const ATTACHMENTS_END = '</attachments>'
const getAttachmentKey = (a: AgentAttachment): string => {
switch (a.type) {
case 'file': return `file:${a.path}`
case 'image': return `image:${a.base64.slice(0, 64)}`
case 'image': return `image:${(a.base64 ?? a.url ?? '').slice(0, 64)}`
}
}