mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
6acdd191c7
commit bcdb026ae43e4f95d0b2c4f9bd440a2df9d6b514 Author: Ran <16112591+chen-ran@users.noreply.github.com> Date: Thu Feb 12 17:10:32 2026 +0800 chore: update DEVELOPMENT.md commit30281742efMerge:ca5c6a15b05f13Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Thu Feb 12 15:49:17 2026 +0800 merge(github/main): integrate fx dependency injection framework Merge upstream fx refactor and adapt all services to use go.uber.org/fx for dependency injection. Resolve conflicts in main.go, server.go, and service constructors while preserving our domain model changes. - Fix telegram adapter panic on shutdown (double close channel) - Fix feishu adapter processing messages after stop - Increase directory lookup timeout from 2s to 5s commitca5c6a1866Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Thu Feb 12 15:33:09 2026 +0800 refactor(core): restructure conversation, channel and message domains - Rename chat module to conversation with flow-based architecture - Move channelidentities into channel/identities subpackage - Add channel/route for routing logic - Add message service with event hub - Add MCP providers: container, directory, schedule - Refactor Feishu/Telegram adapters with directory and stream support - Add platform management page and channel badges in web UI - Update database schema for conversations, messages and channel routes - Add @memoh/shared package for cross-package type definitions commit75e2ef0467Merge:d99ba3801cb6c8Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Thu Feb 12 14:45:49 2026 +0800 merge(github): merge github/main, resolve index.ts URL conflict Keep our defensive absolute-URL check in createAuthFetcher. commitd99ba38b7dMerge:860e20f35ce7d1Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Thu Feb 12 05:20:18 2026 +0800 merge(github): merge github/main, keep our code and docs/spec commit860e20fe70Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Wed Feb 11 22:13:27 2026 +0800 docs(docs): add concepts and style guides for VitePress site - Add concepts: identity-and-binding, index (en/zh) - Add style: terminology (en/zh) - Update index and zh/index - Update .vitepress/config.ts commita75fdb8040Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Wed Feb 11 17:37:16 2026 +0800 refactor(mcp): standardize unified tool gateway on go-sdk Split business executors from federation sources and migrate unified tool/federation transports to the official go-sdk for stricter MCP compliance and safer session lifecycle handling. Add targeted regression tests for accept compatibility, initialization retries, pending cleanup, and include updated swagger artifacts. commit02b33c8e85Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Wed Feb 11 15:42:21 2026 +0800 refactor(core): finalize user-centric identity and policy cleanup Unify auth and chat identity semantics around user_id, enforce personal-bot owner-only authorization, and remove legacy compatibility branches in integration tests. commit06e8619a37Author: BBQ <bbq@BBQdeMacBook-Air.local> Date: Wed Feb 11 14:47:03 2026 +0800 refactor(core): migrate channel identity and binding across app Align channel identity and bind flow across backend and app-facing layers, including generated swagger artifacts and package lock updates while excluding docs content changes.
145 lines
3.9 KiB
Go
145 lines
3.9 KiB
Go
package memory
|
|
|
|
import (
|
|
"reflect"
|
|
"testing"
|
|
)
|
|
|
|
func TestBM25Indexer_TermFrequencies(t *testing.T) {
|
|
indexer := NewBM25Indexer(nil)
|
|
|
|
tests := []struct {
|
|
name string
|
|
lang string
|
|
text string
|
|
want map[string]int
|
|
docLen int
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "English text",
|
|
lang: "en",
|
|
text: "The quick brown fox jumps over the lazy dog",
|
|
// Note: Bleve English analyzer stems words (jumps -> jump, lazy -> lazi) and removes stop words (the, over)
|
|
want: map[string]int{"quick": 1, "brown": 1, "fox": 1, "jump": 1, "lazi": 1, "dog": 1},
|
|
docLen: 6,
|
|
},
|
|
{
|
|
name: "CJK text",
|
|
lang: "cjk",
|
|
text: "你好世界",
|
|
// Note: Bleve CJK analyzer uses bigrams
|
|
want: map[string]int{"你好": 1, "好世": 1, "世界": 1},
|
|
docLen: 3,
|
|
},
|
|
{
|
|
name: "Mixed text with standard analyzer",
|
|
lang: "",
|
|
text: "Go 语言 123",
|
|
// Note: Standard analyzer splits CJK characters individually
|
|
want: map[string]int{"go": 1, "语": 1, "言": 1, "123": 1},
|
|
docLen: 4,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
got, gotLen, err := indexer.TermFrequencies(tt.lang, tt.text)
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("TermFrequencies() error = %v, wantErr %v", err, tt.wantErr)
|
|
return
|
|
}
|
|
if !reflect.DeepEqual(got, tt.want) {
|
|
t.Errorf("TermFrequencies() got = %v, want %v", got, tt.want)
|
|
}
|
|
if gotLen != tt.docLen {
|
|
t.Errorf("TermFrequencies() gotLen = %v, want %v", gotLen, tt.docLen)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestBM25Indexer_BM25Logic(t *testing.T) {
|
|
indexer := NewBM25Indexer(nil)
|
|
|
|
lang := "en"
|
|
tf1 := map[string]int{"golang": 1, "programming": 1}
|
|
len1 := 2
|
|
indices1, values1 := indexer.AddDocument(lang, tf1, len1)
|
|
|
|
tf2 := map[string]int{"golang": 1, "tutorial": 1, "advanced": 1, "topics": 1}
|
|
len2 := 4
|
|
indices2, values2 := indexer.AddDocument(lang, tf2, len2)
|
|
|
|
// In BM25, same term in a shorter doc should have higher weight than in a longer doc.
|
|
var weight1, weight2 float32
|
|
for i, idx := range indices1 {
|
|
if idx == termHash("golang") {
|
|
weight1 = values1[i]
|
|
}
|
|
}
|
|
for i, idx := range indices2 {
|
|
if idx == termHash("golang") {
|
|
weight2 = values2[i]
|
|
}
|
|
}
|
|
|
|
if weight1 <= weight2 {
|
|
t.Errorf("Expected weight in shorter doc (%f) to be higher than in longer doc (%f)", weight1, weight2)
|
|
}
|
|
|
|
// Add a doc without "golang" to increase doc count; IDF should increase.
|
|
oldWeight1 := weight1
|
|
indexer.AddDocument(lang, map[string]int{"rust": 1}, 1)
|
|
indices3, values3 := indexer.AddDocument(lang, tf1, len1)
|
|
|
|
for i, idx := range indices3 {
|
|
if idx == termHash("golang") {
|
|
weight1 = values3[i]
|
|
}
|
|
}
|
|
|
|
if weight1 <= oldWeight1 {
|
|
t.Errorf("Expected weight to increase as IDF increases (more docs without the term), got %f -> %f", oldWeight1, weight1)
|
|
}
|
|
}
|
|
|
|
func TestBM25Indexer_RemoveDocument(t *testing.T) {
|
|
indexer := NewBM25Indexer(nil)
|
|
lang := "en"
|
|
term := "test"
|
|
|
|
tf, docLen, _ := indexer.TermFrequencies(lang, term)
|
|
indexer.AddDocument(lang, tf, docLen)
|
|
|
|
indexer.mu.RLock()
|
|
stats := indexer.stats["en"]
|
|
if stats.DocCount != 1 || stats.DocFreq[term] != 1 {
|
|
t.Errorf("Expected stats to be updated after add, got count=%d, freq=%d", stats.DocCount, stats.DocFreq[term])
|
|
}
|
|
indexer.mu.RUnlock()
|
|
|
|
indexer.RemoveDocument(lang, tf, docLen)
|
|
|
|
indexer.mu.RLock()
|
|
if stats.DocCount != 0 || stats.DocFreq[term] != 0 {
|
|
t.Errorf("Expected stats to be cleared after remove, got count=%d, freq=%d", stats.DocCount, stats.DocFreq[term])
|
|
}
|
|
indexer.mu.RUnlock()
|
|
}
|
|
|
|
func TestTermHash_CollisionResistance(t *testing.T) {
|
|
// Check that different terms get distinct hashes in 20-bit space (no collision in small sample).
|
|
h1 := termHash("apple")
|
|
h2 := termHash("orange")
|
|
h3 := termHash("banana")
|
|
|
|
if h1 == h2 || h2 == h3 || h1 == h3 {
|
|
t.Errorf("Detected unexpected hash collision in small sample: %d, %d, %d", h1, h2, h3)
|
|
}
|
|
|
|
if h1 > sparseDimMask {
|
|
t.Errorf("Hash %d exceeds mask %d", h1, sparseDimMask)
|
|
}
|
|
}
|