feat: add compaction ratio setting to control partial context compaction

Allow users to configure what percentage of older messages to compact,
keeping the most recent portion intact. Default ratio is 80%, meaning
the oldest 80% of uncompacted messages are summarized while the newest
20% remain as-is for full-fidelity context.
This commit is contained in:
Acbox
2026-03-29 19:14:43 +08:00
parent fc1ef4ddb3
commit 0e646625bf
23 changed files with 181 additions and 37 deletions
+55 -3
View File
@@ -79,6 +79,12 @@ func (s *Service) doCompaction(ctx context.Context, logID pgtype.UUID, sessionUU
return nil
}
toCompact := splitByRatio(messages, cfg.TotalInputTokens, cfg.Ratio)
if len(toCompact) == 0 {
s.completeLog(ctx, logID, "ok", "", "", nil, pgtype.UUID{})
return nil
}
priorLogs, err := s.queries.ListCompactionLogsBySession(ctx, sessionUUID)
if err != nil {
return err
@@ -90,9 +96,9 @@ func (s *Service) doCompaction(ctx context.Context, logID pgtype.UUID, sessionUU
}
}
entries := make([]messageEntry, 0, len(messages))
messageIDs := make([]pgtype.UUID, 0, len(messages))
for _, m := range messages {
entries := make([]messageEntry, 0, len(toCompact))
messageIDs := make([]pgtype.UUID, 0, len(toCompact))
for _, m := range toCompact {
entries = append(entries, messageEntry{
Role: m.Role,
Content: extractTextContent(m.Content),
@@ -258,3 +264,49 @@ func extractTextContent(content []byte) string {
func joinTexts(parts []string) string {
return strings.Join(parts, " ")
}
// splitByRatio splits messages so that roughly the first ratio% (by token weight)
// are returned for compaction, and the rest are kept as-is.
// When ratio >= 100 or totalInputTokens <= 0, all messages are returned.
func splitByRatio(messages []sqlc.ListUncompactedMessagesBySessionRow, totalInputTokens, ratio int) []sqlc.ListUncompactedMessagesBySessionRow {
if ratio >= 100 || ratio <= 0 || totalInputTokens <= 0 || len(messages) == 0 {
return messages
}
keepTokens := totalInputTokens * (100 - ratio) / 100
if keepTokens <= 0 {
return messages
}
accumulated := 0
cutoff := len(messages)
for i := len(messages) - 1; i >= 0; i-- {
accumulated += estimateRowTokens(messages[i])
if accumulated >= keepTokens {
cutoff = i + 1
break
}
}
if cutoff <= 0 {
return nil
}
if cutoff >= len(messages) {
return messages
}
return messages[:cutoff]
}
type usagePayload struct {
OutputTokens *int `json:"output_tokens"`
}
func estimateRowTokens(m sqlc.ListUncompactedMessagesBySessionRow) int {
if len(m.Usage) > 0 {
var u usagePayload
if json.Unmarshal(m.Usage, &u) == nil && u.OutputTokens != nil && *u.OutputTokens > 0 {
return *u.OutputTokens
}
}
return len(m.Content) / 4
}
+10 -8
View File
@@ -28,12 +28,14 @@ type ListLogsResponse struct {
// TriggerConfig holds the parameters needed to trigger a compaction.
type TriggerConfig struct {
BotID string
SessionID string
ModelID string
ClientType string
APIKey string //nolint:gosec // runtime credential, not a hardcoded secret
CodexAccountID string
BaseURL string
HTTPClient *http.Client
BotID string
SessionID string
ModelID string
ClientType string
APIKey string //nolint:gosec // runtime credential, not a hardcoded secret
CodexAccountID string
BaseURL string
HTTPClient *http.Client
Ratio int
TotalInputTokens int
}