fix(memory): use rune-based truncation for UTF-8 safety

truncateSnippet sliced bytes directly, which could split multi-byte
UTF-8 characters. Switch to []rune so truncation always respects
character boundaries.
This commit is contained in:
Menci
2026-03-08 01:44:53 +08:00
committed by 晨苒
parent 7d10c3e180
commit 71545dd606
2 changed files with 57 additions and 2 deletions
+3 -2
View File
@@ -380,10 +380,11 @@ func (p *BuiltinProvider) Usage(ctx context.Context, filters map[string]any) (Us
func truncateSnippet(s string, n int) string {
trimmed := strings.TrimSpace(s)
if len(trimmed) <= n {
runes := []rune(trimmed)
if len(runes) <= n {
return trimmed
}
return strings.TrimSpace(trimmed[:n]) + "..."
return strings.TrimSpace(string(runes[:n])) + "..."
}
func deduplicateItems(items []MemoryItem) []MemoryItem {
+54
View File
@@ -0,0 +1,54 @@
package provider
import (
"testing"
"unicode/utf8"
)
func TestTruncateSnippet_ASCII(t *testing.T) {
t.Parallel()
got := truncateSnippet("hello world", 5)
if got != "hello..." {
t.Fatalf("expected %q, got %q", "hello...", got)
}
}
func TestTruncateSnippet_NoTruncation(t *testing.T) {
t.Parallel()
got := truncateSnippet("short", 100)
if got != "short" {
t.Fatalf("expected %q, got %q", "short", got)
}
}
func TestTruncateSnippet_CJK(t *testing.T) {
t.Parallel()
// 5 CJK characters (15 bytes in UTF-8), truncate to 3 runes.
got := truncateSnippet("你好世界啊", 3)
if !utf8.ValidString(got) {
t.Fatalf("result is not valid UTF-8: %q", got)
}
if got != "你好世..." {
t.Fatalf("expected %q, got %q", "你好世...", got)
}
}
func TestTruncateSnippet_Emoji(t *testing.T) {
t.Parallel()
// Emoji are 4 bytes each in UTF-8.
got := truncateSnippet("😀😁😂🤣😃", 2)
if !utf8.ValidString(got) {
t.Fatalf("result is not valid UTF-8: %q", got)
}
if got != "😀😁..." {
t.Fatalf("expected %q, got %q", "😀😁...", got)
}
}
func TestTruncateSnippet_TrimWhitespace(t *testing.T) {
t.Parallel()
got := truncateSnippet(" hello ", 100)
if got != "hello" {
t.Fatalf("expected %q, got %q", "hello", got)
}
}