From f1dd30a388dc3f8b781ec86790edc1722c434d1c Mon Sep 17 00:00:00 2001 From: Acbox Date: Tue, 31 Mar 2026 15:11:57 +0800 Subject: [PATCH] fix: strip agent tags from IM/WebUI output and fix attachment display after refresh Three independent bugs fixed: 1. IM channels were sending raw // tag blocks alongside file attachments. Now ExtractAssistantOutputs strips these tags before building the outbound channel message. 2. WebUI rendered these tags as markdown after page refresh. Now extractMessageText strips agent tags for non-user messages. 3. WebUI lost attachment blocks after refresh because convertMessagesToChats did not call buildAssetBlocks when merging assistant messages into a pending tool-call group. Also made LinkOutboundAssets session-aware so assets are linked to the correct assistant message. --- .../src/composables/api/useChat.content.ts | 8 ++++++++ apps/web/src/store/chat-list.ts | 2 ++ internal/agent/tags.go | 7 +++++++ .../conversation/flow/assistant_output.go | 2 ++ internal/conversation/flow/resolver_store.go | 19 ++++++++++++++----- internal/handlers/local_channel.go | 5 +++-- 6 files changed, 36 insertions(+), 7 deletions(-) diff --git a/apps/web/src/composables/api/useChat.content.ts b/apps/web/src/composables/api/useChat.content.ts index fbc6b422..72ab3044 100644 --- a/apps/web/src/composables/api/useChat.content.ts +++ b/apps/web/src/composables/api/useChat.content.ts @@ -1,6 +1,8 @@ import type { Message } from './useChat.types' const yamlHeaderRe = /^---\n[\s\S]*?\n---\n?/ +const agentTagsRe = /<(attachments|reactions|speech)>[\s\S]*?<\/\1>/g +const collapsedNewlinesRe = /\n{3,}/g export function extractToolCalls( message: Message, @@ -92,6 +94,8 @@ export function extractMessageText(message: Message): string { if (message.role === 'user') { text = stripYAMLHeader(text) + } else { + text = stripAgentTags(text) } return text } @@ -100,6 +104,10 @@ export function stripYAMLHeader(text: string): string { return text.replace(yamlHeaderRe, '').trim() } +export function stripAgentTags(text: string): string { + return text.replace(agentTagsRe, '').replace(collapsedNewlinesRe, '\n\n').trim() +} + export function extractTextFromContent(content: unknown): string { if (typeof content === 'string') return content.trim() diff --git a/apps/web/src/store/chat-list.ts b/apps/web/src/store/chat-list.ts index 8d020b54..c5f8aa59 100644 --- a/apps/web/src/store/chat-list.ts +++ b/apps/web/src/store/chat-list.ts @@ -278,6 +278,7 @@ export const useChatStore = defineStore('chat', () => { pendingAssistant.blocks.push(block) if (tc.id) pendingToolCallMap.set(tc.id, block) } + pendingAssistant.blocks.push(...buildAssetBlocks(raw)) continue } @@ -286,6 +287,7 @@ export const useChatStore = defineStore('chat', () => { pendingAssistant.blocks.push({ type: 'thinking', content: r, done: true }) } pendingAssistant.blocks.push({ type: 'text', content: text }) + pendingAssistant.blocks.push(...buildAssetBlocks(raw)) flushPending() continue } diff --git a/internal/agent/tags.go b/internal/agent/tags.go index df26cae2..45ebd8df 100644 --- a/internal/agent/tags.go +++ b/internal/agent/tags.go @@ -107,6 +107,13 @@ func SpeechResolver() TagResolver { } } +// StripAgentTags removes all default agent tag blocks (, , ) +// from text, returning only the visible content. +func StripAgentTags(text string) string { + cleaned, _ := ExtractTagsFromText(text, DefaultTagResolvers()) + return cleaned +} + // ExtractTagsFromText extracts and removes all tag blocks from a complete string. func ExtractTagsFromText(text string, resolvers []TagResolver) (string, []TagEvent) { var events []TagEvent diff --git a/internal/conversation/flow/assistant_output.go b/internal/conversation/flow/assistant_output.go index cce974d2..942ef675 100644 --- a/internal/conversation/flow/assistant_output.go +++ b/internal/conversation/flow/assistant_output.go @@ -3,6 +3,7 @@ package flow import ( "strings" + "github.com/memohai/memoh/internal/agent" "github.com/memohai/memoh/internal/conversation" ) @@ -28,6 +29,7 @@ func ExtractAssistantOutputs(messages []conversation.ModelMessage) []conversatio if content == "" && len(parts) == 0 { continue } + content = agent.StripAgentTags(content) outputs = append(outputs, conversation.AssistantOutput{Content: content, Parts: parts}) } return outputs diff --git a/internal/conversation/flow/resolver_store.go b/internal/conversation/flow/resolver_store.go index 12067a54..cf370a1a 100644 --- a/internal/conversation/flow/resolver_store.go +++ b/internal/conversation/flow/resolver_store.go @@ -200,14 +200,23 @@ func (r *Resolver) resolvePersistSenderIDs(ctx context.Context, req conversation } // LinkOutboundAssets links bot-generated assets to the latest assistant -// message for the given bot. Used by the WebSocket path where attachment -// ingestion happens after message persistence. -func (r *Resolver) LinkOutboundAssets(ctx context.Context, botID string, assets []messagepkg.AssetRef) { +// message. When sessionID is provided, the search is scoped to that session; +// otherwise it falls back to a bot-wide search. +// Used by the WebSocket path where attachment ingestion happens after message +// persistence. +func (r *Resolver) LinkOutboundAssets(ctx context.Context, botID, sessionID string, assets []messagepkg.AssetRef) { if r.messageService == nil || len(assets) == 0 || strings.TrimSpace(botID) == "" { return } - // ListLatest returns messages in DESC order (newest first). - msgs, err := r.messageService.ListLatest(ctx, botID, 5) + var ( + msgs []messagepkg.Message + err error + ) + if strings.TrimSpace(sessionID) != "" { + msgs, err = r.messageService.ListLatestBySession(ctx, sessionID, 5) + } else { + msgs, err = r.messageService.ListLatest(ctx, botID, 5) + } if err != nil { r.logger.Warn("LinkOutboundAssets: list latest failed", slog.Any("error", err)) return diff --git a/internal/handlers/local_channel.go b/internal/handlers/local_channel.go index 20e52b3d..a1c3559c 100644 --- a/internal/handlers/local_channel.go +++ b/internal/handlers/local_channel.go @@ -415,6 +415,7 @@ func (h *LocalChannelHandler) HandleWebSocket(c echo.Context) error { activeCancel = streamCancel eventCh := make(chan flow.WSStreamEvent, 64) + sessionID := strings.TrimSpace(msg.SessionID) var ( outboundAssetMu sync.Mutex outboundAssetRefs []messagepkg.AssetRef @@ -426,7 +427,7 @@ func (h *LocalChannelHandler) HandleWebSocket(c echo.Context) error { req := conversation.ChatRequest{ BotID: botID, ChatID: botID, - SessionID: strings.TrimSpace(msg.SessionID), + SessionID: sessionID, Token: bearerToken, UserID: channelIdentityID, SourceChannelIdentityID: channelIdentityID, @@ -462,7 +463,7 @@ func (h *LocalChannelHandler) HandleWebSocket(c echo.Context) error { refs := outboundAssetRefs outboundAssetMu.Unlock() if len(refs) > 0 { - h.resolver.LinkOutboundAssets(context.WithoutCancel(ctx), botID, refs) + h.resolver.LinkOutboundAssets(context.WithoutCancel(ctx), botID, sessionID, refs) } }()