mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
fix(memory): use rune-based truncation for UTF-8 safety
truncateSnippet sliced bytes directly, which could split multi-byte UTF-8 characters. Switch to []rune so truncation always respects character boundaries.
This commit is contained in:
@@ -380,10 +380,11 @@ func (p *BuiltinProvider) Usage(ctx context.Context, filters map[string]any) (Us
|
||||
|
||||
func truncateSnippet(s string, n int) string {
|
||||
trimmed := strings.TrimSpace(s)
|
||||
if len(trimmed) <= n {
|
||||
runes := []rune(trimmed)
|
||||
if len(runes) <= n {
|
||||
return trimmed
|
||||
}
|
||||
return strings.TrimSpace(trimmed[:n]) + "..."
|
||||
return strings.TrimSpace(string(runes[:n])) + "..."
|
||||
}
|
||||
|
||||
func deduplicateItems(items []MemoryItem) []MemoryItem {
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
package provider
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestTruncateSnippet_ASCII(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := truncateSnippet("hello world", 5)
|
||||
if got != "hello..." {
|
||||
t.Fatalf("expected %q, got %q", "hello...", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateSnippet_NoTruncation(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := truncateSnippet("short", 100)
|
||||
if got != "short" {
|
||||
t.Fatalf("expected %q, got %q", "short", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateSnippet_CJK(t *testing.T) {
|
||||
t.Parallel()
|
||||
// 5 CJK characters (15 bytes in UTF-8), truncate to 3 runes.
|
||||
got := truncateSnippet("你好世界啊", 3)
|
||||
if !utf8.ValidString(got) {
|
||||
t.Fatalf("result is not valid UTF-8: %q", got)
|
||||
}
|
||||
if got != "你好世..." {
|
||||
t.Fatalf("expected %q, got %q", "你好世...", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateSnippet_Emoji(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Emoji are 4 bytes each in UTF-8.
|
||||
got := truncateSnippet("😀😁😂🤣😃", 2)
|
||||
if !utf8.ValidString(got) {
|
||||
t.Fatalf("result is not valid UTF-8: %q", got)
|
||||
}
|
||||
if got != "😀😁..." {
|
||||
t.Fatalf("expected %q, got %q", "😀😁...", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateSnippet_TrimWhitespace(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := truncateSnippet(" hello ", 100)
|
||||
if got != "hello" {
|
||||
t.Fatalf("expected %q, got %q", "hello", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user