mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
feat: add media asset system, channel lifecycle refactor, and chat attachments (#54)
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
package flow
|
||||
|
||||
import "github.com/memohai/memoh/internal/models"
|
||||
|
||||
// attachmentModality maps an attachment type string to the input modality it requires.
|
||||
var attachmentModality = map[string]string{
|
||||
"image": models.ModelInputImage,
|
||||
"audio": models.ModelInputAudio,
|
||||
"video": models.ModelInputVideo,
|
||||
"file": models.ModelInputFile,
|
||||
}
|
||||
|
||||
// gatewayAttachment is the structured attachment payload sent to the agent gateway.
|
||||
// Only fields consumable by the agent/LLM are serialized; internal references
|
||||
// (asset_id, platform_key, url) are stripped before dispatch.
|
||||
type gatewayAttachment struct {
|
||||
Type string `json:"type"`
|
||||
Base64 string `json:"base64,omitempty"`
|
||||
Path string `json:"path,omitempty"`
|
||||
Mime string `json:"mime,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Metadata map[string]any `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// capabilityRouteResult holds the outcome of splitting attachments by model capability.
|
||||
type capabilityRouteResult struct {
|
||||
// Native are attachments the model can consume directly as multimodal input.
|
||||
Native []gatewayAttachment
|
||||
// Fallback are attachments whose modality is unsupported; they are converted
|
||||
// to container file path references for the LLM to access via tools.
|
||||
Fallback []gatewayAttachment
|
||||
}
|
||||
|
||||
// routeAttachmentsByCapability splits attachments based on the model's supported
|
||||
// input modalities. Supported modalities produce native multimodal input; unsupported
|
||||
// modalities produce container path references for tool-based access.
|
||||
func routeAttachmentsByCapability(modalities []string, attachments []gatewayAttachment) capabilityRouteResult {
|
||||
supported := make(map[string]struct{}, len(modalities))
|
||||
for _, m := range modalities {
|
||||
supported[m] = struct{}{}
|
||||
}
|
||||
|
||||
result := capabilityRouteResult{
|
||||
Native: make([]gatewayAttachment, 0, len(attachments)),
|
||||
Fallback: make([]gatewayAttachment, 0),
|
||||
}
|
||||
for _, att := range attachments {
|
||||
requiredModality, known := attachmentModality[att.Type]
|
||||
if !known {
|
||||
// Unknown attachment types always go through fallback path.
|
||||
result.Fallback = append(result.Fallback, att)
|
||||
continue
|
||||
}
|
||||
if _, ok := supported[requiredModality]; ok {
|
||||
result.Native = append(result.Native, att)
|
||||
} else {
|
||||
result.Fallback = append(result.Fallback, att)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// attachmentsToAny converts typed gateway attachments to []any for JSON serialization.
|
||||
func attachmentsToAny(atts []gatewayAttachment) []any {
|
||||
out := make([]any, 0, len(atts))
|
||||
for _, a := range atts {
|
||||
out = append(out, a)
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
package flow
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRouteAttachmentsByCapability_AllSupported(t *testing.T) {
|
||||
modalities := []string{"text", "image", "audio"}
|
||||
attachments := []gatewayAttachment{
|
||||
{Type: "image", Base64: "abc"},
|
||||
{Type: "audio", Path: "/data/voice.wav"},
|
||||
}
|
||||
result := routeAttachmentsByCapability(modalities, attachments)
|
||||
assert.Len(t, result.Native, 2)
|
||||
assert.Len(t, result.Fallback, 0)
|
||||
}
|
||||
|
||||
func TestRouteAttachmentsByCapability_TextOnly(t *testing.T) {
|
||||
modalities := []string{"text"}
|
||||
attachments := []gatewayAttachment{
|
||||
{Type: "image", Base64: "abc"},
|
||||
{Type: "video", Path: "/data/video.mp4"},
|
||||
}
|
||||
result := routeAttachmentsByCapability(modalities, attachments)
|
||||
assert.Len(t, result.Native, 0)
|
||||
assert.Len(t, result.Fallback, 2)
|
||||
}
|
||||
|
||||
func TestRouteAttachmentsByCapability_Mixed(t *testing.T) {
|
||||
modalities := []string{"text", "image"}
|
||||
attachments := []gatewayAttachment{
|
||||
{Type: "image", Base64: "abc"},
|
||||
{Type: "video", Path: "/data/video.mp4"},
|
||||
{Type: "audio", Path: "/data/audio.mp3"},
|
||||
}
|
||||
result := routeAttachmentsByCapability(modalities, attachments)
|
||||
assert.Len(t, result.Native, 1)
|
||||
assert.Equal(t, "image", result.Native[0].Type)
|
||||
assert.Len(t, result.Fallback, 2)
|
||||
}
|
||||
|
||||
func TestRouteAttachmentsByCapability_UnknownType(t *testing.T) {
|
||||
modalities := []string{"text", "image"}
|
||||
attachments := []gatewayAttachment{
|
||||
{Type: "hologram", Path: "/data/holo.dat"},
|
||||
}
|
||||
result := routeAttachmentsByCapability(modalities, attachments)
|
||||
assert.Len(t, result.Native, 0)
|
||||
assert.Len(t, result.Fallback, 1)
|
||||
}
|
||||
|
||||
func TestRouteAttachmentsByCapability_Empty(t *testing.T) {
|
||||
result := routeAttachmentsByCapability([]string{"text"}, nil)
|
||||
assert.Len(t, result.Native, 0)
|
||||
assert.Len(t, result.Fallback, 0)
|
||||
}
|
||||
|
||||
func TestAttachmentsToAny(t *testing.T) {
|
||||
atts := []gatewayAttachment{
|
||||
{Type: "image", Base64: "abc"},
|
||||
{Type: "file", Path: "/data/doc.pdf"},
|
||||
}
|
||||
result := attachmentsToAny(atts)
|
||||
assert.Len(t, result, 2)
|
||||
}
|
||||
@@ -177,8 +177,8 @@ type resolvedContext struct {
|
||||
}
|
||||
|
||||
func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (resolvedContext, error) {
|
||||
if strings.TrimSpace(req.Query) == "" {
|
||||
return resolvedContext{}, fmt.Errorf("query is required")
|
||||
if strings.TrimSpace(req.Query) == "" && len(req.Attachments) == 0 {
|
||||
return resolvedContext{}, fmt.Errorf("query or attachments is required")
|
||||
}
|
||||
if strings.TrimSpace(req.BotID) == "" {
|
||||
return resolvedContext{}, fmt.Errorf("bot id is required")
|
||||
@@ -252,7 +252,7 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r
|
||||
Model: gatewayModelConfig{
|
||||
ModelID: chatModel.ModelID,
|
||||
ClientType: clientType,
|
||||
Input: chatModel.Input,
|
||||
Input: chatModel.InputModalities,
|
||||
APIKey: provider.ApiKey,
|
||||
BaseURL: provider.BaseUrl,
|
||||
},
|
||||
@@ -273,7 +273,7 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r
|
||||
ConversationType: strings.TrimSpace(req.ConversationType),
|
||||
SessionToken: req.ChatToken,
|
||||
},
|
||||
Attachments: []any{},
|
||||
Attachments: r.routeAndMergeAttachments(chatModel, req),
|
||||
}
|
||||
|
||||
return resolvedContext{payload: payload, model: chatModel, provider: provider}, nil
|
||||
@@ -583,6 +583,50 @@ func (r *Resolver) tryStoreStream(ctx context.Context, req conversation.ChatRequ
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// routeAndMergeAttachments applies CapabilityFallbackPolicy to split
|
||||
// request attachments by model input modalities, then merges the results
|
||||
// into a single []any for the gateway request.
|
||||
func (r *Resolver) routeAndMergeAttachments(model models.GetResponse, req conversation.ChatRequest) []any {
|
||||
if len(req.Attachments) == 0 {
|
||||
return []any{}
|
||||
}
|
||||
typed := make([]gatewayAttachment, 0, len(req.Attachments))
|
||||
for _, raw := range req.Attachments {
|
||||
typed = append(typed, gatewayAttachment{
|
||||
Type: raw.Type,
|
||||
Base64: raw.Base64,
|
||||
Path: raw.Path,
|
||||
Mime: raw.Mime,
|
||||
Name: raw.Name,
|
||||
Metadata: raw.Metadata,
|
||||
})
|
||||
}
|
||||
routed := routeAttachmentsByCapability(model.InputModalities, typed)
|
||||
// Convert unsupported attachments to file-path references.
|
||||
for i := range routed.Fallback {
|
||||
if routed.Fallback[i].Path == "" && routed.Fallback[i].Base64 != "" {
|
||||
// Cannot downgrade base64-only to path; keep as native so the agent can
|
||||
// attempt best-effort processing or skip.
|
||||
routed.Native = append(routed.Native, routed.Fallback[i])
|
||||
routed.Fallback[i] = gatewayAttachment{}
|
||||
continue
|
||||
}
|
||||
routed.Fallback[i].Type = "file"
|
||||
}
|
||||
merged := make([]any, 0, len(routed.Native)+len(routed.Fallback))
|
||||
merged = append(merged, attachmentsToAny(routed.Native)...)
|
||||
for _, fb := range routed.Fallback {
|
||||
if fb.Type == "" {
|
||||
continue
|
||||
}
|
||||
merged = append(merged, fb)
|
||||
}
|
||||
if len(merged) == 0 {
|
||||
return []any{}
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
// --- container resolution ---
|
||||
|
||||
func (r *Resolver) resolveContainerID(ctx context.Context, botID, explicit string) string {
|
||||
@@ -720,7 +764,7 @@ func (r *Resolver) persistUserMessage(ctx context.Context, req conversation.Chat
|
||||
return fmt.Errorf("bot id is required for persistence")
|
||||
}
|
||||
text := strings.TrimSpace(req.Query)
|
||||
if text == "" {
|
||||
if text == "" && len(req.Attachments) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -743,6 +787,7 @@ func (r *Resolver) persistUserMessage(ctx context.Context, req conversation.Chat
|
||||
Role: "user",
|
||||
Content: content,
|
||||
Metadata: buildRouteMetadata(req),
|
||||
Assets: chatAttachmentsToAssetRefs(req.Attachments),
|
||||
})
|
||||
return err
|
||||
}
|
||||
@@ -758,7 +803,9 @@ func (r *Resolver) storeRound(ctx context.Context, req conversation.ChatRequest,
|
||||
break
|
||||
}
|
||||
}
|
||||
if !req.UserMessagePersisted && !hasUserQuery && strings.TrimSpace(req.Query) != "" {
|
||||
needUserInRound := !req.UserMessagePersisted && !hasUserQuery &&
|
||||
(strings.TrimSpace(req.Query) != "" || len(req.Attachments) > 0)
|
||||
if needUserInRound {
|
||||
fullRound = append(fullRound, conversation.ModelMessage{
|
||||
Role: "user",
|
||||
Content: conversation.NewTextContent(req.Query),
|
||||
@@ -801,10 +848,14 @@ func (r *Resolver) storeMessages(ctx context.Context, req conversation.ChatReque
|
||||
messageSenderUserID := ""
|
||||
externalMessageID := ""
|
||||
sourceReplyToMessageID := ""
|
||||
assets := []messagepkg.AssetRef(nil)
|
||||
if msg.Role == "user" {
|
||||
messageSenderChannelIdentityID = senderChannelIdentityID
|
||||
messageSenderUserID = senderUserID
|
||||
externalMessageID = req.ExternalMessageID
|
||||
if strings.TrimSpace(msg.TextContent()) == strings.TrimSpace(req.Query) {
|
||||
assets = chatAttachmentsToAssetRefs(req.Attachments)
|
||||
}
|
||||
} else if strings.TrimSpace(req.ExternalMessageID) != "" {
|
||||
// Assistant/tool/system outputs are linked to the inbound source message for cross-channel reply threading.
|
||||
sourceReplyToMessageID = req.ExternalMessageID
|
||||
@@ -820,12 +871,34 @@ func (r *Resolver) storeMessages(ctx context.Context, req conversation.ChatReque
|
||||
Role: msg.Role,
|
||||
Content: content,
|
||||
Metadata: meta,
|
||||
Assets: assets,
|
||||
}); err != nil {
|
||||
r.logger.Warn("persist message failed", slog.Any("error", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// chatAttachmentsToAssetRefs converts ChatAttachment slice to message AssetRef slice.
|
||||
// Only attachments that carry an asset_id are included; others have not been ingested yet.
|
||||
func chatAttachmentsToAssetRefs(attachments []conversation.ChatAttachment) []messagepkg.AssetRef {
|
||||
if len(attachments) == 0 {
|
||||
return nil
|
||||
}
|
||||
refs := make([]messagepkg.AssetRef, 0, len(attachments))
|
||||
for i, att := range attachments {
|
||||
id := strings.TrimSpace(att.AssetID)
|
||||
if id == "" {
|
||||
continue
|
||||
}
|
||||
refs = append(refs, messagepkg.AssetRef{
|
||||
AssetID: id,
|
||||
Role: "attachment",
|
||||
Ordinal: i,
|
||||
})
|
||||
}
|
||||
return refs
|
||||
}
|
||||
|
||||
func buildRouteMetadata(req conversation.ChatRequest) map[string]any {
|
||||
if strings.TrimSpace(req.RouteID) == "" && strings.TrimSpace(req.CurrentChannel) == "" {
|
||||
return nil
|
||||
|
||||
@@ -191,6 +191,20 @@ type ToolCallFunction struct {
|
||||
Arguments string `json:"arguments"`
|
||||
}
|
||||
|
||||
// ChatAttachment is a media attachment carried in a chat request.
|
||||
type ChatAttachment struct {
|
||||
Type string `json:"type"`
|
||||
Base64 string `json:"base64,omitempty"`
|
||||
Path string `json:"path,omitempty"`
|
||||
URL string `json:"url,omitempty"`
|
||||
PlatformKey string `json:"platform_key,omitempty"`
|
||||
AssetID string `json:"asset_id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Mime string `json:"mime,omitempty"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Metadata map[string]any `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// ChatRequest is the input for Chat and StreamChat.
|
||||
type ChatRequest struct {
|
||||
BotID string `json:"-"`
|
||||
@@ -206,15 +220,16 @@ type ChatRequest struct {
|
||||
ConversationType string `json:"-"`
|
||||
UserMessagePersisted bool `json:"-"`
|
||||
|
||||
Query string `json:"query"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
MaxContextLoadTime int `json:"max_context_load_time,omitempty"`
|
||||
Channels []string `json:"channels,omitempty"`
|
||||
CurrentChannel string `json:"current_channel,omitempty"`
|
||||
Messages []ModelMessage `json:"messages,omitempty"`
|
||||
Skills []string `json:"skills,omitempty"`
|
||||
AllowedActions []string `json:"allowed_actions,omitempty"`
|
||||
Query string `json:"query"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
MaxContextLoadTime int `json:"max_context_load_time,omitempty"`
|
||||
Channels []string `json:"channels,omitempty"`
|
||||
CurrentChannel string `json:"current_channel,omitempty"`
|
||||
Messages []ModelMessage `json:"messages,omitempty"`
|
||||
Skills []string `json:"skills,omitempty"`
|
||||
AllowedActions []string `json:"allowed_actions,omitempty"`
|
||||
Attachments []ChatAttachment `json:"attachments,omitempty"`
|
||||
}
|
||||
|
||||
// ChatResponse is the output of a non-streaming chat call.
|
||||
|
||||
Reference in New Issue
Block a user