mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
71545dd606
truncateSnippet sliced bytes directly, which could split multi-byte UTF-8 characters. Switch to []rune so truncation always respects character boundaries.
55 lines
1.2 KiB
Go
55 lines
1.2 KiB
Go
package provider
|
|
|
|
import (
|
|
"testing"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
func TestTruncateSnippet_ASCII(t *testing.T) {
|
|
t.Parallel()
|
|
got := truncateSnippet("hello world", 5)
|
|
if got != "hello..." {
|
|
t.Fatalf("expected %q, got %q", "hello...", got)
|
|
}
|
|
}
|
|
|
|
func TestTruncateSnippet_NoTruncation(t *testing.T) {
|
|
t.Parallel()
|
|
got := truncateSnippet("short", 100)
|
|
if got != "short" {
|
|
t.Fatalf("expected %q, got %q", "short", got)
|
|
}
|
|
}
|
|
|
|
func TestTruncateSnippet_CJK(t *testing.T) {
|
|
t.Parallel()
|
|
// 5 CJK characters (15 bytes in UTF-8), truncate to 3 runes.
|
|
got := truncateSnippet("你好世界啊", 3)
|
|
if !utf8.ValidString(got) {
|
|
t.Fatalf("result is not valid UTF-8: %q", got)
|
|
}
|
|
if got != "你好世..." {
|
|
t.Fatalf("expected %q, got %q", "你好世...", got)
|
|
}
|
|
}
|
|
|
|
func TestTruncateSnippet_Emoji(t *testing.T) {
|
|
t.Parallel()
|
|
// Emoji are 4 bytes each in UTF-8.
|
|
got := truncateSnippet("😀😁😂🤣😃", 2)
|
|
if !utf8.ValidString(got) {
|
|
t.Fatalf("result is not valid UTF-8: %q", got)
|
|
}
|
|
if got != "😀😁..." {
|
|
t.Fatalf("expected %q, got %q", "😀😁...", got)
|
|
}
|
|
}
|
|
|
|
func TestTruncateSnippet_TrimWhitespace(t *testing.T) {
|
|
t.Parallel()
|
|
got := truncateSnippet(" hello ", 100)
|
|
if got != "hello" {
|
|
t.Fatalf("expected %q, got %q", "hello", got)
|
|
}
|
|
}
|