feat: add media asset system, channel lifecycle refactor, and chat attachments (#54)

This commit is contained in:
BBQ
2026-02-17 19:06:46 +08:00
committed by GitHub
parent 0bdc31311c
commit df7876a30c
106 changed files with 7942 additions and 1274 deletions
@@ -0,0 +1,70 @@
package flow
import "github.com/memohai/memoh/internal/models"
// attachmentModality maps an attachment type string to the input modality it requires.
var attachmentModality = map[string]string{
"image": models.ModelInputImage,
"audio": models.ModelInputAudio,
"video": models.ModelInputVideo,
"file": models.ModelInputFile,
}
// gatewayAttachment is the structured attachment payload sent to the agent gateway.
// Only fields consumable by the agent/LLM are serialized; internal references
// (asset_id, platform_key, url) are stripped before dispatch.
type gatewayAttachment struct {
Type string `json:"type"`
Base64 string `json:"base64,omitempty"`
Path string `json:"path,omitempty"`
Mime string `json:"mime,omitempty"`
Name string `json:"name,omitempty"`
Metadata map[string]any `json:"metadata,omitempty"`
}
// capabilityRouteResult holds the outcome of splitting attachments by model capability.
type capabilityRouteResult struct {
// Native are attachments the model can consume directly as multimodal input.
Native []gatewayAttachment
// Fallback are attachments whose modality is unsupported; they are converted
// to container file path references for the LLM to access via tools.
Fallback []gatewayAttachment
}
// routeAttachmentsByCapability splits attachments based on the model's supported
// input modalities. Supported modalities produce native multimodal input; unsupported
// modalities produce container path references for tool-based access.
func routeAttachmentsByCapability(modalities []string, attachments []gatewayAttachment) capabilityRouteResult {
supported := make(map[string]struct{}, len(modalities))
for _, m := range modalities {
supported[m] = struct{}{}
}
result := capabilityRouteResult{
Native: make([]gatewayAttachment, 0, len(attachments)),
Fallback: make([]gatewayAttachment, 0),
}
for _, att := range attachments {
requiredModality, known := attachmentModality[att.Type]
if !known {
// Unknown attachment types always go through fallback path.
result.Fallback = append(result.Fallback, att)
continue
}
if _, ok := supported[requiredModality]; ok {
result.Native = append(result.Native, att)
} else {
result.Fallback = append(result.Fallback, att)
}
}
return result
}
// attachmentsToAny converts typed gateway attachments to []any for JSON serialization.
func attachmentsToAny(atts []gatewayAttachment) []any {
out := make([]any, 0, len(atts))
for _, a := range atts {
out = append(out, a)
}
return out
}
@@ -0,0 +1,67 @@
package flow
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestRouteAttachmentsByCapability_AllSupported(t *testing.T) {
modalities := []string{"text", "image", "audio"}
attachments := []gatewayAttachment{
{Type: "image", Base64: "abc"},
{Type: "audio", Path: "/data/voice.wav"},
}
result := routeAttachmentsByCapability(modalities, attachments)
assert.Len(t, result.Native, 2)
assert.Len(t, result.Fallback, 0)
}
func TestRouteAttachmentsByCapability_TextOnly(t *testing.T) {
modalities := []string{"text"}
attachments := []gatewayAttachment{
{Type: "image", Base64: "abc"},
{Type: "video", Path: "/data/video.mp4"},
}
result := routeAttachmentsByCapability(modalities, attachments)
assert.Len(t, result.Native, 0)
assert.Len(t, result.Fallback, 2)
}
func TestRouteAttachmentsByCapability_Mixed(t *testing.T) {
modalities := []string{"text", "image"}
attachments := []gatewayAttachment{
{Type: "image", Base64: "abc"},
{Type: "video", Path: "/data/video.mp4"},
{Type: "audio", Path: "/data/audio.mp3"},
}
result := routeAttachmentsByCapability(modalities, attachments)
assert.Len(t, result.Native, 1)
assert.Equal(t, "image", result.Native[0].Type)
assert.Len(t, result.Fallback, 2)
}
func TestRouteAttachmentsByCapability_UnknownType(t *testing.T) {
modalities := []string{"text", "image"}
attachments := []gatewayAttachment{
{Type: "hologram", Path: "/data/holo.dat"},
}
result := routeAttachmentsByCapability(modalities, attachments)
assert.Len(t, result.Native, 0)
assert.Len(t, result.Fallback, 1)
}
func TestRouteAttachmentsByCapability_Empty(t *testing.T) {
result := routeAttachmentsByCapability([]string{"text"}, nil)
assert.Len(t, result.Native, 0)
assert.Len(t, result.Fallback, 0)
}
func TestAttachmentsToAny(t *testing.T) {
atts := []gatewayAttachment{
{Type: "image", Base64: "abc"},
{Type: "file", Path: "/data/doc.pdf"},
}
result := attachmentsToAny(atts)
assert.Len(t, result, 2)
}
+79 -6
View File
@@ -177,8 +177,8 @@ type resolvedContext struct {
}
func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (resolvedContext, error) {
if strings.TrimSpace(req.Query) == "" {
return resolvedContext{}, fmt.Errorf("query is required")
if strings.TrimSpace(req.Query) == "" && len(req.Attachments) == 0 {
return resolvedContext{}, fmt.Errorf("query or attachments is required")
}
if strings.TrimSpace(req.BotID) == "" {
return resolvedContext{}, fmt.Errorf("bot id is required")
@@ -252,7 +252,7 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r
Model: gatewayModelConfig{
ModelID: chatModel.ModelID,
ClientType: clientType,
Input: chatModel.Input,
Input: chatModel.InputModalities,
APIKey: provider.ApiKey,
BaseURL: provider.BaseUrl,
},
@@ -273,7 +273,7 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r
ConversationType: strings.TrimSpace(req.ConversationType),
SessionToken: req.ChatToken,
},
Attachments: []any{},
Attachments: r.routeAndMergeAttachments(chatModel, req),
}
return resolvedContext{payload: payload, model: chatModel, provider: provider}, nil
@@ -583,6 +583,50 @@ func (r *Resolver) tryStoreStream(ctx context.Context, req conversation.ChatRequ
return false, nil
}
// routeAndMergeAttachments applies CapabilityFallbackPolicy to split
// request attachments by model input modalities, then merges the results
// into a single []any for the gateway request.
func (r *Resolver) routeAndMergeAttachments(model models.GetResponse, req conversation.ChatRequest) []any {
if len(req.Attachments) == 0 {
return []any{}
}
typed := make([]gatewayAttachment, 0, len(req.Attachments))
for _, raw := range req.Attachments {
typed = append(typed, gatewayAttachment{
Type: raw.Type,
Base64: raw.Base64,
Path: raw.Path,
Mime: raw.Mime,
Name: raw.Name,
Metadata: raw.Metadata,
})
}
routed := routeAttachmentsByCapability(model.InputModalities, typed)
// Convert unsupported attachments to file-path references.
for i := range routed.Fallback {
if routed.Fallback[i].Path == "" && routed.Fallback[i].Base64 != "" {
// Cannot downgrade base64-only to path; keep as native so the agent can
// attempt best-effort processing or skip.
routed.Native = append(routed.Native, routed.Fallback[i])
routed.Fallback[i] = gatewayAttachment{}
continue
}
routed.Fallback[i].Type = "file"
}
merged := make([]any, 0, len(routed.Native)+len(routed.Fallback))
merged = append(merged, attachmentsToAny(routed.Native)...)
for _, fb := range routed.Fallback {
if fb.Type == "" {
continue
}
merged = append(merged, fb)
}
if len(merged) == 0 {
return []any{}
}
return merged
}
// --- container resolution ---
func (r *Resolver) resolveContainerID(ctx context.Context, botID, explicit string) string {
@@ -720,7 +764,7 @@ func (r *Resolver) persistUserMessage(ctx context.Context, req conversation.Chat
return fmt.Errorf("bot id is required for persistence")
}
text := strings.TrimSpace(req.Query)
if text == "" {
if text == "" && len(req.Attachments) == 0 {
return nil
}
@@ -743,6 +787,7 @@ func (r *Resolver) persistUserMessage(ctx context.Context, req conversation.Chat
Role: "user",
Content: content,
Metadata: buildRouteMetadata(req),
Assets: chatAttachmentsToAssetRefs(req.Attachments),
})
return err
}
@@ -758,7 +803,9 @@ func (r *Resolver) storeRound(ctx context.Context, req conversation.ChatRequest,
break
}
}
if !req.UserMessagePersisted && !hasUserQuery && strings.TrimSpace(req.Query) != "" {
needUserInRound := !req.UserMessagePersisted && !hasUserQuery &&
(strings.TrimSpace(req.Query) != "" || len(req.Attachments) > 0)
if needUserInRound {
fullRound = append(fullRound, conversation.ModelMessage{
Role: "user",
Content: conversation.NewTextContent(req.Query),
@@ -801,10 +848,14 @@ func (r *Resolver) storeMessages(ctx context.Context, req conversation.ChatReque
messageSenderUserID := ""
externalMessageID := ""
sourceReplyToMessageID := ""
assets := []messagepkg.AssetRef(nil)
if msg.Role == "user" {
messageSenderChannelIdentityID = senderChannelIdentityID
messageSenderUserID = senderUserID
externalMessageID = req.ExternalMessageID
if strings.TrimSpace(msg.TextContent()) == strings.TrimSpace(req.Query) {
assets = chatAttachmentsToAssetRefs(req.Attachments)
}
} else if strings.TrimSpace(req.ExternalMessageID) != "" {
// Assistant/tool/system outputs are linked to the inbound source message for cross-channel reply threading.
sourceReplyToMessageID = req.ExternalMessageID
@@ -820,12 +871,34 @@ func (r *Resolver) storeMessages(ctx context.Context, req conversation.ChatReque
Role: msg.Role,
Content: content,
Metadata: meta,
Assets: assets,
}); err != nil {
r.logger.Warn("persist message failed", slog.Any("error", err))
}
}
}
// chatAttachmentsToAssetRefs converts ChatAttachment slice to message AssetRef slice.
// Only attachments that carry an asset_id are included; others have not been ingested yet.
func chatAttachmentsToAssetRefs(attachments []conversation.ChatAttachment) []messagepkg.AssetRef {
if len(attachments) == 0 {
return nil
}
refs := make([]messagepkg.AssetRef, 0, len(attachments))
for i, att := range attachments {
id := strings.TrimSpace(att.AssetID)
if id == "" {
continue
}
refs = append(refs, messagepkg.AssetRef{
AssetID: id,
Role: "attachment",
Ordinal: i,
})
}
return refs
}
func buildRouteMetadata(req conversation.ChatRequest) map[string]any {
if strings.TrimSpace(req.RouteID) == "" && strings.TrimSpace(req.CurrentChannel) == "" {
return nil
+24 -9
View File
@@ -191,6 +191,20 @@ type ToolCallFunction struct {
Arguments string `json:"arguments"`
}
// ChatAttachment is a media attachment carried in a chat request.
type ChatAttachment struct {
Type string `json:"type"`
Base64 string `json:"base64,omitempty"`
Path string `json:"path,omitempty"`
URL string `json:"url,omitempty"`
PlatformKey string `json:"platform_key,omitempty"`
AssetID string `json:"asset_id,omitempty"`
Name string `json:"name,omitempty"`
Mime string `json:"mime,omitempty"`
Size int64 `json:"size,omitempty"`
Metadata map[string]any `json:"metadata,omitempty"`
}
// ChatRequest is the input for Chat and StreamChat.
type ChatRequest struct {
BotID string `json:"-"`
@@ -206,15 +220,16 @@ type ChatRequest struct {
ConversationType string `json:"-"`
UserMessagePersisted bool `json:"-"`
Query string `json:"query"`
Model string `json:"model,omitempty"`
Provider string `json:"provider,omitempty"`
MaxContextLoadTime int `json:"max_context_load_time,omitempty"`
Channels []string `json:"channels,omitempty"`
CurrentChannel string `json:"current_channel,omitempty"`
Messages []ModelMessage `json:"messages,omitempty"`
Skills []string `json:"skills,omitempty"`
AllowedActions []string `json:"allowed_actions,omitempty"`
Query string `json:"query"`
Model string `json:"model,omitempty"`
Provider string `json:"provider,omitempty"`
MaxContextLoadTime int `json:"max_context_load_time,omitempty"`
Channels []string `json:"channels,omitempty"`
CurrentChannel string `json:"current_channel,omitempty"`
Messages []ModelMessage `json:"messages,omitempty"`
Skills []string `json:"skills,omitempty"`
AllowedActions []string `json:"allowed_actions,omitempty"`
Attachments []ChatAttachment `json:"attachments,omitempty"`
}
// ChatResponse is the output of a non-streaming chat call.