feat: add context compaction to automatically summarize old messages (#compaction) (#276)

When input tokens exceed a configurable threshold after a conversation round,
the system asynchronously compacts older messages into a summary. Cascading
compactions reference prior summaries via <prior_context> tags to maintain
conversational continuity without duplicating content.

- Add bot_history_message_compacts table and compact_id on messages
- Add compaction_enabled, compaction_threshold, compaction_model_id to bots
- Implement compaction service (internal/compaction) with LLM summarization
- Integrate into conversation flow: replace compacted messages with summaries
  wrapped in <summary> tags during context loading
- Add REST API endpoints (GET/DELETE /bots/:bot_id/compaction/logs)
- Add frontend Compaction tab with settings and log viewer
- Wire compaction service into both dev (cmd/agent) and prod (cmd/memoh) entry points
- Update test mocks to include new GetBotByID columns
This commit is contained in:
Acbox Liu
2026-03-22 14:26:00 +08:00
committed by GitHub
parent 91e5e44509
commit de62f94315
40 changed files with 2375 additions and 197 deletions
+10 -5
View File
@@ -44,11 +44,13 @@ func (d *fakeDBTX) QueryRow(ctx context.Context, sql string, args ...any) pgx.Ro
// Column order: id, owner_user_id, display_name, avatar_url, is_active, status,
// max_context_load_time, max_context_tokens, language,
// reasoning_enabled, reasoning_effort, chat_model_id, search_provider_id, memory_provider_id,
// heartbeat_enabled, heartbeat_interval, heartbeat_prompt, metadata, created_at, updated_at.
// heartbeat_enabled, heartbeat_interval, heartbeat_prompt,
// compaction_enabled, compaction_threshold, compaction_model_id,
// metadata, created_at, updated_at.
func makeBotRow(botID, ownerUserID pgtype.UUID) *fakeRow {
return &fakeRow{
scanFunc: func(dest ...any) error {
if len(dest) < 20 {
if len(dest) < 23 {
return pgx.ErrNoRows
}
*dest[0].(*pgtype.UUID) = botID
@@ -68,9 +70,12 @@ func makeBotRow(botID, ownerUserID pgtype.UUID) *fakeRow {
*dest[14].(*bool) = false // HeartbeatEnabled
*dest[15].(*int32) = 30 // HeartbeatInterval
*dest[16].(*string) = "" // HeartbeatPrompt
*dest[17].(*[]byte) = []byte(`{}`)
*dest[18].(*pgtype.Timestamptz) = pgtype.Timestamptz{}
*dest[19].(*pgtype.Timestamptz) = pgtype.Timestamptz{}
*dest[17].(*bool) = false // CompactionEnabled
*dest[18].(*int32) = 100000 // CompactionThreshold
*dest[19].(*pgtype.UUID) = pgtype.UUID{} // CompactionModelID
*dest[20].(*[]byte) = []byte(`{}`)
*dest[21].(*pgtype.Timestamptz) = pgtype.Timestamptz{}
*dest[22].(*pgtype.Timestamptz) = pgtype.Timestamptz{}
return nil
},
}