From 5e1de4fe7b3f62e8a47def5db97780d93e092048 Mon Sep 17 00:00:00 2001 From: Acbox Date: Thu, 19 Feb 2026 18:28:29 +0800 Subject: [PATCH] fix: include system tokens in max tokens compute --- internal/conversation/flow/resolver.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/internal/conversation/flow/resolver.go b/internal/conversation/flow/resolver.go index 589112d1..74c32995 100644 --- a/internal/conversation/flow/resolver.go +++ b/internal/conversation/flow/resolver.go @@ -252,15 +252,35 @@ func (r *Resolver) resolve(ctx context.Context, req conversation.ChatRequest) (r maxCtx := coalescePositiveInt(req.MaxContextLoadTime, botSettings.MaxContextLoadTime, defaultMaxContextMinutes) maxTokens := botSettings.MaxContextTokens + // Build non-history parts first so we can reserve their token cost before + // trimming history messages. + memoryMsg := r.loadMemoryContextMessage(ctx, req) + var overhead int + if memoryMsg != nil { + overhead += estimateMessageTokens(*memoryMsg) + } + for _, m := range req.Messages { + overhead += estimateMessageTokens(m) + } + // Reserve space for the system prompt built by the agent gateway + // (IDENTITY.md, SOUL.md, TOOLS.md, skills, boilerplate, user prompt, etc.). + const systemPromptReserve = 4096 + overhead += systemPromptReserve + + historyBudget := maxTokens - overhead + if historyBudget < 0 { + historyBudget = 0 + } + var messages []conversation.ModelMessage if !skipHistory && r.conversationSvc != nil { loaded, loadErr := r.loadMessages(ctx, req.ChatID, maxCtx) if loadErr != nil { return resolvedContext{}, loadErr } - messages = trimMessagesByTokens(loaded, maxTokens) + messages = trimMessagesByTokens(loaded, historyBudget) } - if memoryMsg := r.loadMemoryContextMessage(ctx, req); memoryMsg != nil { + if memoryMsg != nil { messages = append(messages, *memoryMsg) } messages = append(messages, req.Messages...)