feat: add context compaction to automatically summarize old messages (#compaction) (#276)

When input tokens exceed a configurable threshold after a conversation round,
the system asynchronously compacts older messages into a summary. Cascading
compactions reference prior summaries via <prior_context> tags to maintain
conversational continuity without duplicating content.

- Add bot_history_message_compacts table and compact_id on messages
- Add compaction_enabled, compaction_threshold, compaction_model_id to bots
- Implement compaction service (internal/compaction) with LLM summarization
- Integrate into conversation flow: replace compacted messages with summaries
  wrapped in <summary> tags during context loading
- Add REST API endpoints (GET/DELETE /bots/:bot_id/compaction/logs)
- Add frontend Compaction tab with settings and log viewer
- Wire compaction service into both dev (cmd/agent) and prod (cmd/memoh) entry points
- Update test mocks to include new GetBotByID columns
This commit is contained in:
Acbox Liu
2026-03-22 14:26:00 +08:00
committed by GitHub
parent 91e5e44509
commit de62f94315
40 changed files with 2375 additions and 197 deletions
+27 -21
View File
@@ -94,32 +94,35 @@ func (q *Queries) DeleteBotByID(ctx context.Context, id pgtype.UUID) error {
}
const getBotByID = `-- name: GetBotByID :one
SELECT id, owner_user_id, display_name, avatar_url, is_active, status, max_context_load_time, max_context_tokens, language, reasoning_enabled, reasoning_effort, chat_model_id, search_provider_id, memory_provider_id, heartbeat_enabled, heartbeat_interval, heartbeat_prompt, metadata, created_at, updated_at
SELECT id, owner_user_id, display_name, avatar_url, is_active, status, max_context_load_time, max_context_tokens, language, reasoning_enabled, reasoning_effort, chat_model_id, search_provider_id, memory_provider_id, heartbeat_enabled, heartbeat_interval, heartbeat_prompt, compaction_enabled, compaction_threshold, compaction_model_id, metadata, created_at, updated_at
FROM bots
WHERE id = $1
`
type GetBotByIDRow struct {
ID pgtype.UUID `json:"id"`
OwnerUserID pgtype.UUID `json:"owner_user_id"`
DisplayName pgtype.Text `json:"display_name"`
AvatarUrl pgtype.Text `json:"avatar_url"`
IsActive bool `json:"is_active"`
Status string `json:"status"`
MaxContextLoadTime int32 `json:"max_context_load_time"`
MaxContextTokens int32 `json:"max_context_tokens"`
Language string `json:"language"`
ReasoningEnabled bool `json:"reasoning_enabled"`
ReasoningEffort string `json:"reasoning_effort"`
ChatModelID pgtype.UUID `json:"chat_model_id"`
SearchProviderID pgtype.UUID `json:"search_provider_id"`
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
HeartbeatEnabled bool `json:"heartbeat_enabled"`
HeartbeatInterval int32 `json:"heartbeat_interval"`
HeartbeatPrompt string `json:"heartbeat_prompt"`
Metadata []byte `json:"metadata"`
CreatedAt pgtype.Timestamptz `json:"created_at"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
ID pgtype.UUID `json:"id"`
OwnerUserID pgtype.UUID `json:"owner_user_id"`
DisplayName pgtype.Text `json:"display_name"`
AvatarUrl pgtype.Text `json:"avatar_url"`
IsActive bool `json:"is_active"`
Status string `json:"status"`
MaxContextLoadTime int32 `json:"max_context_load_time"`
MaxContextTokens int32 `json:"max_context_tokens"`
Language string `json:"language"`
ReasoningEnabled bool `json:"reasoning_enabled"`
ReasoningEffort string `json:"reasoning_effort"`
ChatModelID pgtype.UUID `json:"chat_model_id"`
SearchProviderID pgtype.UUID `json:"search_provider_id"`
MemoryProviderID pgtype.UUID `json:"memory_provider_id"`
HeartbeatEnabled bool `json:"heartbeat_enabled"`
HeartbeatInterval int32 `json:"heartbeat_interval"`
HeartbeatPrompt string `json:"heartbeat_prompt"`
CompactionEnabled bool `json:"compaction_enabled"`
CompactionThreshold int32 `json:"compaction_threshold"`
CompactionModelID pgtype.UUID `json:"compaction_model_id"`
Metadata []byte `json:"metadata"`
CreatedAt pgtype.Timestamptz `json:"created_at"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
}
func (q *Queries) GetBotByID(ctx context.Context, id pgtype.UUID) (GetBotByIDRow, error) {
@@ -143,6 +146,9 @@ func (q *Queries) GetBotByID(ctx context.Context, id pgtype.UUID) (GetBotByIDRow
&i.HeartbeatEnabled,
&i.HeartbeatInterval,
&i.HeartbeatPrompt,
&i.CompactionEnabled,
&i.CompactionThreshold,
&i.CompactionModelID,
&i.Metadata,
&i.CreatedAt,
&i.UpdatedAt,