From 26b01cc46342f6671ab12e006685e56f19cef901 Mon Sep 17 00:00:00 2001 From: Acbox Date: Mon, 13 Apr 2026 12:58:08 +0800 Subject: [PATCH] fix(media): failed to store the media to `/data/media` and add image part --- eslint.config.mjs | 4 +- internal/channel/inbound/channel.go | 5 +- internal/channel/inbound/channel_test.go | 77 +++++++ internal/conversation/flow/resolver.go | 33 +++ internal/pipeline/adapt.go | 11 +- internal/pipeline/driver.go | 47 +++++ internal/pipeline/driver_test.go | 243 +++++++++++++++++++++++ internal/pipeline/rendering.go | 19 ++ internal/pipeline/rendering_test.go | 73 +++++++ internal/pipeline/types.go | 2 + 10 files changed, 507 insertions(+), 7 deletions(-) create mode 100644 internal/pipeline/driver_test.go create mode 100644 internal/pipeline/rendering_test.go diff --git a/eslint.config.mjs b/eslint.config.mjs index 2fda9867..9aa1393d 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -6,7 +6,7 @@ import vue from 'eslint-plugin-vue' export default [ ...tseslint.configs.recommended, ...vue.configs['flat/recommended'], - { ignores: ['**/node_modules/**', '**/dist/**', '**/cache/**', 'packages/sdk/src/**'] }, + { ignores: ['**/node_modules/**', '**/dist/**', '**/cache/**', '**/target/**', 'packages/sdk/src/**'] }, { files: ['packages/**/*.{js,jsx,ts,tsx}', 'apps/**/*.{js,jsx,ts,tsx}'], languageOptions: { @@ -56,6 +56,8 @@ export default [ files: [ 'apps/web/src/pages/chat/components/tool-call-edit.vue', 'apps/web/src/pages/chat/components/tool-call-write.vue', + 'apps/web/src/pages/home/components/tool-call-edit.vue', + 'apps/web/src/pages/home/components/tool-call-write.vue', ], rules: { 'vue/no-v-html': 'off', diff --git a/internal/channel/inbound/channel.go b/internal/channel/inbound/channel.go index ced6c869..f7081b40 100644 --- a/internal/channel/inbound/channel.go +++ b/internal/channel/inbound/channel.go @@ -417,7 +417,10 @@ func (p *ChannelInboundProcessor) HandleInbound(ctx context.Context, cfg channel if _, loaded := p.pipeline.GetIC(sessionID); !loaded { p.replayPipelineSession(ctx, sessionID) } - event := pipelinepkg.AdaptInbound(msg, sessionID, identity.ChannelIdentityID, identity.DisplayName) + pipelineMsg := msg + pipelineMsg.Message = msg.Message + pipelineMsg.Message.Attachments = resolvedAttachments + event := pipelinepkg.AdaptInbound(pipelineMsg, sessionID, identity.ChannelIdentityID, identity.DisplayName) if p.eventStore != nil { eid, persistErr := p.eventStore.PersistEvent(ctx, identity.BotID, sessionID, event) if persistErr != nil { diff --git a/internal/channel/inbound/channel_test.go b/internal/channel/inbound/channel_test.go index 9cbb1ae5..3594f086 100644 --- a/internal/channel/inbound/channel_test.go +++ b/internal/channel/inbound/channel_test.go @@ -24,6 +24,7 @@ import ( dbsqlc "github.com/memohai/memoh/internal/db/sqlc" "github.com/memohai/memoh/internal/media" messagepkg "github.com/memohai/memoh/internal/message" + pipelinepkg "github.com/memohai/memoh/internal/pipeline" "github.com/memohai/memoh/internal/schedule" ) @@ -1223,6 +1224,82 @@ func TestChannelInboundProcessorIngestsQQFileAttachmentKeepsOriginalExtWhenMimeG } } +func TestChannelInboundProcessorPipelineUsesResolvedAttachments(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "image/jpeg") + _, _ = w.Write([]byte("fake-telegram-photo")) + })) + defer server.Close() + + channelIdentitySvc := &fakeChannelIdentityService{channelIdentity: identities.ChannelIdentity{ID: "channelIdentity-pipeline-asset"}} + policySvc := &fakePolicyService{} + chatSvc := &fakeChatService{resolveResult: route.ResolveConversationResult{ChatID: "chat-pipeline-asset", RouteID: "route-pipeline-asset"}} + gateway := &fakeChatGateway{ + resp: conversation.ChatResponse{ + Messages: []conversation.ModelMessage{ + {Role: "assistant", Content: conversation.NewTextContent("ok")}, + }, + }, + } + processor := NewChannelInboundProcessor(slog.Default(), nil, chatSvc, chatSvc, gateway, channelIdentitySvc, policySvc, nil, "", 0) + mediaSvc := &fakeMediaIngestor{nextID: "asset-pipeline-photo", nextMime: "image/jpeg"} + processor.SetMediaService(mediaSvc) + processor.SetSessionEnsurer(&fakeSessionEnsurer{activeSession: SessionResult{ID: "session-pipeline-asset", Type: "chat"}}) + pipeline := pipelinepkg.NewPipeline(pipelinepkg.RenderParams{}) + processor.SetPipeline(pipeline, nil, nil) + sender := &fakeReplySender{} + + cfg := channel.ChannelConfig{ID: "cfg-pipeline-asset", BotID: "bot-1", ChannelType: channel.ChannelTypeTelegram} + msg := channel.InboundMessage{ + BotID: "bot-1", + Channel: channel.ChannelTypeTelegram, + Message: channel.Message{ + ID: "msg-pipeline-asset-1", + Text: "photo test", + Attachments: []channel.Attachment{ + { + Type: channel.AttachmentImage, + URL: server.URL + "/file/bot123/photo.jpg", + PlatformKey: "tg-photo-1", + Name: "photo.jpg", + Mime: "image/jpeg", + }, + }, + }, + ReplyTarget: "12345", + Sender: channel.Identity{SubjectID: "telegram-user"}, + Conversation: channel.Conversation{ + ID: "12345", + Type: channel.ConversationTypePrivate, + }, + } + + if err := processor.HandleInbound(context.Background(), cfg, msg, sender); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if mediaSvc.calls != 1 { + t.Fatalf("expected media ingest to be called once, got %d", mediaSvc.calls) + } + + ic, ok := pipeline.GetIC("session-pipeline-asset") + if !ok { + t.Fatal("expected pipeline session to be created") + } + if len(ic.Nodes) == 0 || ic.Nodes[0].Message == nil { + t.Fatal("expected first pipeline node to be a message") + } + atts := ic.Nodes[0].Message.Attachments + if len(atts) != 1 { + t.Fatalf("expected one pipeline attachment, got %d", len(atts)) + } + if got := atts[0].FilePath; got != "/data/media/test/asset-pipeline-photo" { + t.Fatalf("expected pipeline attachment path to use media store, got %q", got) + } + if strings.Contains(atts[0].FilePath, "api.telegram.org") { + t.Fatalf("expected pipeline attachment path to avoid telegram url, got %q", atts[0].FilePath) + } +} + func TestChannelInboundProcessorPersonalGroupNonOwnerIgnored(t *testing.T) { channelIdentitySvc := &fakeChannelIdentityService{channelIdentity: identities.ChannelIdentity{ID: "channelIdentity-member"}} policySvc := &fakePolicyService{ownerUserID: "channelIdentity-owner"} diff --git a/internal/conversation/flow/resolver.go b/internal/conversation/flow/resolver.go index fd5829b4..6bca13a1 100644 --- a/internal/conversation/flow/resolver.go +++ b/internal/conversation/flow/resolver.go @@ -145,6 +145,39 @@ func (r *Resolver) Pipeline() *pipelinepkg.Pipeline { return r.pipeline } +// InlineImageAttachments resolves image content hashes to sdk.ImagePart values +// using the configured asset loader. Intended for the discuss driver to inline +// images from new RC segments before calling the LLM. +func (r *Resolver) InlineImageAttachments(ctx context.Context, botID string, refs []pipelinepkg.ImageAttachmentRef) []sdk.ImagePart { + if r == nil || r.assetLoader == nil || len(refs) == 0 { + return nil + } + var parts []sdk.ImagePart + for _, ref := range refs { + contentHash := strings.TrimSpace(ref.ContentHash) + if contentHash == "" { + continue + } + dataURL, mime, err := r.inlineAssetAsDataURL(ctx, botID, contentHash, "image", strings.TrimSpace(ref.Mime)) + if err != nil { + if r.logger != nil { + r.logger.Warn( + "inline discuss image attachment failed", + slog.Any("error", err), + slog.String("bot_id", botID), + slog.String("content_hash", contentHash), + ) + } + continue + } + parts = append(parts, sdk.ImagePart{ + Image: dataURL, + MediaType: mime, + }) + } + return parts +} + type usageInfo struct { InputTokens *int `json:"inputTokens"` OutputTokens *int `json:"outputTokens"` diff --git a/internal/pipeline/adapt.go b/internal/pipeline/adapt.go index 4d81dda5..45b9f88b 100644 --- a/internal/pipeline/adapt.go +++ b/internal/pipeline/adapt.go @@ -90,11 +90,12 @@ func adaptAttachments(atts []channel.Attachment) []Attachment { result := make([]Attachment, 0, len(atts)) for _, a := range atts { att := Attachment{ - Type: string(a.Type), - MimeType: strings.TrimSpace(a.Mime), - FileName: strings.TrimSpace(a.Name), - Width: a.Width, - Height: a.Height, + Type: string(a.Type), + MimeType: strings.TrimSpace(a.Mime), + FileName: strings.TrimSpace(a.Name), + ContentHash: strings.TrimSpace(a.ContentHash), + Width: a.Width, + Height: a.Height, } if a.DurationMs > 0 { att.Duration = int(a.DurationMs / 1000) diff --git a/internal/pipeline/driver.go b/internal/pipeline/driver.go index 120b3aa4..f791af28 100644 --- a/internal/pipeline/driver.go +++ b/internal/pipeline/driver.go @@ -27,6 +27,7 @@ type ResolveRunConfigResult struct { // rounds. Implemented by flow.Resolver. type RunConfigResolver interface { ResolveRunConfig(ctx context.Context, botID, sessionID, channelIdentityID, currentPlatform, replyTarget, conversationType, chatToken string) (ResolveRunConfigResult, error) + InlineImageAttachments(ctx context.Context, botID string, refs []ImageAttachmentRef) []sdk.ImagePart StoreRound(ctx context.Context, botID, sessionID, channelIdentityID, currentPlatform string, messages []sdk.Message, modelID string) error } @@ -256,6 +257,17 @@ func (d *DiscussDriver) handleReplyWithAgent(ctx context.Context, sess *discussS runConfig.SessionType = sessionpkg.TypeDiscuss runConfig.Query = "" + // Inline image attachments from new RC segments so the model receives + // them as native vision input (ImagePart) on the first encounter. + // Subsequent turns only see the file path in the XML rendering. + if runConfig.SupportsImageInput && d.deps.Resolver != nil { + imageRefs := extractNewImageRefs(rc, sess.lastProcessedMs) + if len(imageRefs) > 0 { + imageParts := d.deps.Resolver.InlineImageAttachments(ctx, cfg.BotID, imageRefs) + injectImagePartsIntoLastUserMessage(runConfig.Messages, imageParts) + } + } + isMentioned := wasRecentlyMentioned(rc, sess.lastProcessedMs) lateBinding := buildLateBindingPrompt(isMentioned) runConfig.Messages = append(runConfig.Messages, sdk.UserMessage(lateBinding)) @@ -375,6 +387,41 @@ func (d *DiscussDriver) loadTurnResponses(ctx context.Context, sessionID string) return trs } +// extractNewImageRefs collects ImageAttachmentRef entries from RC segments +// that arrived after afterMs (i.e. new since the last LLM call). +func extractNewImageRefs(rc RenderedContext, afterMs int64) []ImageAttachmentRef { + var refs []ImageAttachmentRef + for _, seg := range rc { + if seg.ReceivedAtMs > afterMs && !seg.IsMyself { + refs = append(refs, seg.ImageRefs...) + } + } + return refs +} + +// injectImagePartsIntoLastUserMessage appends ImageParts to the last user +// message in msgs so the model receives inline vision input. +func injectImagePartsIntoLastUserMessage(msgs []sdk.Message, parts []sdk.ImagePart) { + if len(parts) == 0 { + return + } + extra := make([]sdk.MessagePart, 0, len(parts)) + for _, p := range parts { + if strings.TrimSpace(p.Image) != "" { + extra = append(extra, p) + } + } + if len(extra) == 0 { + return + } + for i := len(msgs) - 1; i >= 0; i-- { + if msgs[i].Role == sdk.MessageRoleUser { + msgs[i].Content = append(msgs[i].Content, extra...) + return + } + } +} + func wasRecentlyMentioned(rc RenderedContext, afterMs int64) bool { for _, seg := range rc { if seg.ReceivedAtMs > afterMs && (seg.MentionsMe || seg.RepliesToMe) { diff --git a/internal/pipeline/driver_test.go b/internal/pipeline/driver_test.go new file mode 100644 index 00000000..a96b167e --- /dev/null +++ b/internal/pipeline/driver_test.go @@ -0,0 +1,243 @@ +package pipeline + +import ( + "context" + "strings" + "testing" + + sdk "github.com/memohai/twilight-ai/sdk" + + agentpkg "github.com/memohai/memoh/internal/agent" +) + +func TestExtractNewImageRefs(t *testing.T) { + rc := RenderedContext{ + {ReceivedAtMs: 100, ImageRefs: []ImageAttachmentRef{{ContentHash: "old-hash", Mime: "image/png"}}}, + {ReceivedAtMs: 200, IsMyself: true, ImageRefs: []ImageAttachmentRef{{ContentHash: "self-hash"}}}, + {ReceivedAtMs: 300, ImageRefs: []ImageAttachmentRef{{ContentHash: "new-hash", Mime: "image/jpeg"}}}, + {ReceivedAtMs: 400, ImageRefs: nil}, + } + + refs := extractNewImageRefs(rc, 150) + if len(refs) != 1 { + t.Fatalf("expected 1 ref, got %d", len(refs)) + } + if refs[0].ContentHash != "new-hash" { + t.Fatalf("expected new-hash, got %q", refs[0].ContentHash) + } + if refs[0].Mime != "image/jpeg" { + t.Fatalf("expected image/jpeg, got %q", refs[0].Mime) + } +} + +func TestExtractNewImageRefs_IncludesMultiple(t *testing.T) { + rc := RenderedContext{ + {ReceivedAtMs: 100}, + {ReceivedAtMs: 200, ImageRefs: []ImageAttachmentRef{ + {ContentHash: "a"}, + {ContentHash: "b"}, + }}, + {ReceivedAtMs: 300, ImageRefs: []ImageAttachmentRef{{ContentHash: "c"}}}, + } + refs := extractNewImageRefs(rc, 50) + if len(refs) != 3 { + t.Fatalf("expected 3 refs, got %d", len(refs)) + } +} + +func TestInjectImagePartsIntoLastUserMessage(t *testing.T) { + msgs := []sdk.Message{ + sdk.UserMessage("hello"), + sdk.AssistantMessage("hi"), + sdk.UserMessage("look at this"), + } + parts := []sdk.ImagePart{ + {Image: "data:image/png;base64,abc", MediaType: "image/png"}, + } + + injectImagePartsIntoLastUserMessage(msgs, parts) + + lastUser := msgs[2] + if len(lastUser.Content) != 2 { + t.Fatalf("expected 2 content parts, got %d", len(lastUser.Content)) + } + imgPart, ok := lastUser.Content[1].(sdk.ImagePart) + if !ok { + t.Fatalf("expected ImagePart, got %T", lastUser.Content[1]) + } + if imgPart.Image != "data:image/png;base64,abc" { + t.Fatalf("unexpected image: %q", imgPart.Image) + } +} + +func TestInjectImagePartsIntoLastUserMessage_Empty(t *testing.T) { + msgs := []sdk.Message{sdk.UserMessage("hello")} + injectImagePartsIntoLastUserMessage(msgs, nil) + if len(msgs[0].Content) != 1 { + t.Fatalf("expected no change, got %d parts", len(msgs[0].Content)) + } +} + +func TestInjectImagePartsIntoLastUserMessage_SkipsEmptyImage(t *testing.T) { + msgs := []sdk.Message{sdk.UserMessage("hello")} + parts := []sdk.ImagePart{{Image: "", MediaType: "image/png"}} + injectImagePartsIntoLastUserMessage(msgs, parts) + if len(msgs[0].Content) != 1 { + t.Fatalf("expected no change, got %d parts", len(msgs[0].Content)) + } +} + +func TestHandleReplyWithAgent_InlinesImages(t *testing.T) { + rc := RenderedContext{ + { + ReceivedAtMs: 200, + Content: []RenderedContentPiece{{Type: "text", Text: `photo`}}, + ImageRefs: []ImageAttachmentRef{{ContentHash: "img-hash", Mime: "image/jpeg"}}, + }, + } + + fakeAgent := &fakeDiscussStreamer{} + + resolver := &fakeRunConfigResolver{ + resolveResult: ResolveRunConfigResult{ + RunConfig: agentpkg.RunConfig{ + SupportsImageInput: true, + }, + ModelID: "model-1", + }, + inlineFn: func(_ context.Context, _ string, refs []ImageAttachmentRef) []sdk.ImagePart { + if len(refs) != 1 || refs[0].ContentHash != "img-hash" { + t.Fatalf("unexpected refs: %v", refs) + } + return []sdk.ImagePart{{Image: "data:image/jpeg;base64,FAKE", MediaType: "image/jpeg"}} + }, + } + + driver := NewDiscussDriver(DiscussDriverDeps{ + Pipeline: NewPipeline(RenderParams{}), + Resolver: resolver, + }) + + sess := &discussSession{ + config: DiscussSessionConfig{ + BotID: "bot-1", + SessionID: "sess-1", + }, + lastProcessedMs: 0, + } + + driver.handleReplyWithAgent(context.Background(), sess, rc, driver.logger, fakeAgent) + + if fakeAgent.lastConfig == nil { + t.Fatal("expected agent to be called") + } + + msgs := fakeAgent.lastConfig.Messages + var userMsgs []sdk.Message + for _, m := range msgs { + if m.Role == sdk.MessageRoleUser { + userMsgs = append(userMsgs, m) + } + } + if len(userMsgs) < 2 { + t.Fatalf("expected at least 2 user messages (rc + late binding), got %d", len(userMsgs)) + } + rcMsg := userMsgs[0] + hasImage := false + for _, part := range rcMsg.Content { + if imgPart, ok := part.(sdk.ImagePart); ok { + hasImage = true + if !strings.HasPrefix(imgPart.Image, "data:image/jpeg;base64,") { + t.Fatalf("unexpected image data: %q", imgPart.Image) + } + } + } + if !hasImage { + t.Fatal("expected image part in RC user message") + } +} + +func TestHandleReplyWithAgent_NoInlineWhenNoVision(t *testing.T) { + rc := RenderedContext{ + { + ReceivedAtMs: 200, + Content: []RenderedContentPiece{{Type: "text", Text: `photo`}}, + ImageRefs: []ImageAttachmentRef{{ContentHash: "img-hash", Mime: "image/jpeg"}}, + }, + } + + fakeAgent := &fakeDiscussStreamer{} + + resolver := &fakeRunConfigResolver{ + resolveResult: ResolveRunConfigResult{ + RunConfig: agentpkg.RunConfig{ + SupportsImageInput: false, + }, + ModelID: "model-1", + }, + inlineFn: func(_ context.Context, _ string, _ []ImageAttachmentRef) []sdk.ImagePart { + t.Fatal("should not be called when model doesn't support vision") + return nil + }, + } + + driver := NewDiscussDriver(DiscussDriverDeps{ + Pipeline: NewPipeline(RenderParams{}), + Resolver: resolver, + }) + + sess := &discussSession{ + config: DiscussSessionConfig{ + BotID: "bot-1", + SessionID: "sess-1", + }, + lastProcessedMs: 0, + } + + driver.handleReplyWithAgent(context.Background(), sess, rc, driver.logger, fakeAgent) + + if fakeAgent.lastConfig == nil { + t.Fatal("expected agent to be called") + } + for _, m := range fakeAgent.lastConfig.Messages { + for _, part := range m.Content { + if _, ok := part.(sdk.ImagePart); ok { + t.Fatal("should not have image parts when vision is not supported") + } + } + } +} + +// --- Test helpers --- + +type fakeDiscussStreamer struct { + lastConfig *agentpkg.RunConfig +} + +func (f *fakeDiscussStreamer) Stream(_ context.Context, cfg agentpkg.RunConfig) <-chan agentpkg.StreamEvent { + f.lastConfig = &cfg + ch := make(chan agentpkg.StreamEvent, 1) + ch <- agentpkg.StreamEvent{Type: agentpkg.EventAgentEnd} + close(ch) + return ch +} + +type fakeRunConfigResolver struct { + resolveResult ResolveRunConfigResult + inlineFn func(ctx context.Context, botID string, refs []ImageAttachmentRef) []sdk.ImagePart +} + +func (f *fakeRunConfigResolver) ResolveRunConfig(_ context.Context, _, _, _, _, _, _, _ string) (ResolveRunConfigResult, error) { + return f.resolveResult, nil +} + +func (f *fakeRunConfigResolver) InlineImageAttachments(ctx context.Context, botID string, refs []ImageAttachmentRef) []sdk.ImagePart { + if f.inlineFn != nil { + return f.inlineFn(ctx, botID, refs) + } + return nil +} + +func (*fakeRunConfigResolver) StoreRound(_ context.Context, _, _, _, _ string, _ []sdk.Message, _ string) error { + return nil +} diff --git a/internal/pipeline/rendering.go b/internal/pipeline/rendering.go index 54493fcf..2d4ed501 100644 --- a/internal/pipeline/rendering.go +++ b/internal/pipeline/rendering.go @@ -14,6 +14,13 @@ type RenderedContentPiece struct { URL string `json:"url,omitempty"` } +// ImageAttachmentRef holds the content hash and MIME type of an image +// attachment that can be inlined as a vision input via the media store. +type ImageAttachmentRef struct { + ContentHash string `json:"content_hash"` + Mime string `json:"mime,omitempty"` +} + // RenderedSegment is a single segment of rendered context, one per IC node. type RenderedSegment struct { ReceivedAtMs int64 `json:"received_at_ms"` @@ -22,6 +29,7 @@ type RenderedSegment struct { IsSelfSent bool `json:"is_self_sent,omitempty"` MentionsMe bool `json:"mentions_me,omitempty"` RepliesToMe bool `json:"replies_to_me,omitempty"` + ImageRefs []ImageAttachmentRef `json:"image_refs,omitempty"` } // RenderedContext is the output of the Rendering layer — a slice of segments. @@ -140,6 +148,16 @@ func renderMessage(msg *ICMessage, params RenderParams) RenderedSegment { pieces := []RenderedContentPiece{{Type: "text", Text: text}} + var imageRefs []ImageAttachmentRef + for _, att := range msg.Attachments { + if strings.EqualFold(att.Type, "image") && att.ContentHash != "" { + imageRefs = append(imageRefs, ImageAttachmentRef{ + ContentHash: att.ContentHash, + Mime: att.MimeType, + }) + } + } + return RenderedSegment{ ReceivedAtMs: msg.ReceivedAtMs, Content: pieces, @@ -147,6 +165,7 @@ func renderMessage(msg *ICMessage, params RenderParams) RenderedSegment { IsSelfSent: msg.IsSelfSent, MentionsMe: mentionsMe, RepliesToMe: repliesToMe, + ImageRefs: imageRefs, } } diff --git a/internal/pipeline/rendering_test.go b/internal/pipeline/rendering_test.go new file mode 100644 index 00000000..df383833 --- /dev/null +++ b/internal/pipeline/rendering_test.go @@ -0,0 +1,73 @@ +package pipeline + +import ( + "testing" + + "github.com/memohai/memoh/internal/channel" +) + +func TestRenderMessage_ImageRefsPopulated(t *testing.T) { + msg := &ICMessage{ + MessageID: "msg-1", + ReceivedAtMs: 100, + TimestampSec: 100, + Content: []ContentNode{{Type: "text", Text: "photo"}}, + Attachments: []Attachment{ + {Type: "image", ContentHash: "hash-1", MimeType: "image/jpeg", FilePath: "/data/media/bot/ab/hash-1.jpg"}, + {Type: "file", ContentHash: "hash-2", MimeType: "application/pdf", FilePath: "/data/media/bot/cd/hash-2.pdf"}, + {Type: "image", MimeType: "image/png"}, + }, + Conversation: ConversationMeta{Channel: "telegram", ConversationType: "private"}, + } + + seg := renderMessage(msg, RenderParams{}) + + if len(seg.ImageRefs) != 1 { + t.Fatalf("expected 1 image ref (only images with ContentHash), got %d", len(seg.ImageRefs)) + } + if seg.ImageRefs[0].ContentHash != "hash-1" { + t.Fatalf("expected hash-1, got %q", seg.ImageRefs[0].ContentHash) + } + if seg.ImageRefs[0].Mime != "image/jpeg" { + t.Fatalf("expected image/jpeg, got %q", seg.ImageRefs[0].Mime) + } +} + +func TestRenderMessage_NoImageRefs(t *testing.T) { + msg := &ICMessage{ + MessageID: "msg-2", + ReceivedAtMs: 200, + TimestampSec: 200, + Content: []ContentNode{{Type: "text", Text: "text only"}}, + Conversation: ConversationMeta{Channel: "telegram", ConversationType: "private"}, + } + + seg := renderMessage(msg, RenderParams{}) + + if len(seg.ImageRefs) != 0 { + t.Fatalf("expected 0 image refs, got %d", len(seg.ImageRefs)) + } +} + +func TestAdaptAttachments_ContentHash(t *testing.T) { + atts := []channel.Attachment{ + {Type: channel.AttachmentImage, ContentHash: "abc123", URL: "/data/media/bot/ab/abc123.jpg", Mime: "image/jpeg"}, + {Type: channel.AttachmentFile, URL: "https://example.com/doc.pdf", Mime: "application/pdf"}, + } + got := adaptAttachments(atts) + if len(got) != 2 { + t.Fatalf("expected 2 attachments, got %d", len(got)) + } + if got[0].ContentHash != "abc123" || got[0].MimeType != "image/jpeg" { + t.Fatalf("unexpected first attachment: %+v", got[0]) + } + if got[0].FilePath != "/data/media/bot/ab/abc123.jpg" { + t.Fatalf("expected FilePath from URL, got %q", got[0].FilePath) + } + if got[1].Type != "file" || got[1].MimeType != "application/pdf" { + t.Fatalf("unexpected second attachment: %+v", got[1]) + } + if got[1].FilePath != "https://example.com/doc.pdf" { + t.Fatalf("expected FilePath from URL, got %q", got[1].FilePath) + } +} diff --git a/internal/pipeline/types.go b/internal/pipeline/types.go index d32d7ad7..ac06c23c 100644 --- a/internal/pipeline/types.go +++ b/internal/pipeline/types.go @@ -45,6 +45,8 @@ type Attachment struct { AltText string `json:"alt_text,omitempty"` // FilePath is the workspace path where the attachment is stored. FilePath string `json:"file_path,omitempty"` + // ContentHash is the media-store content hash for persisted attachments. + ContentHash string `json:"content_hash,omitempty"` } // ForwardInfo describes a forwarded message origin.