mirror of
https://github.com/memohai/Memoh.git
synced 2026-04-27 07:16:19 +09:00
c741f2410b
- Treat maxTokens=0 as "unconfigured/unlimited" instead of disabling trimming for any non-positive value (which masked exhausted budgets) - Set historyBudget=1 when maxTokens>0 but overhead exceeds the limit, ensuring aggressive trimming instead of no trimming - Estimate token cost for messages without usage data (len/4 fallback) so user/tool messages are not free-passed during budget accounting
167 lines
5.0 KiB
Go
167 lines
5.0 KiB
Go
package flow
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/memohai/memoh/internal/conversation"
|
|
)
|
|
|
|
func intPtr(v int) *int { return &v }
|
|
|
|
func TestTrimMessagesByTokens_DropsLeadingOrphanTool(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
messages := []messageWithUsage{
|
|
{
|
|
Message: conversation.ModelMessage{
|
|
Role: "user",
|
|
Content: conversation.NewTextContent("1111"),
|
|
},
|
|
},
|
|
{
|
|
Message: conversation.ModelMessage{
|
|
Role: "assistant",
|
|
ToolCalls: []conversation.ToolCall{
|
|
{
|
|
ID: "call-1",
|
|
Type: "function",
|
|
Function: conversation.ToolCallFunction{
|
|
Name: "calc",
|
|
Arguments: `{"x":1}`,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
UsageOutputTokens: intPtr(50),
|
|
},
|
|
{
|
|
Message: conversation.ModelMessage{
|
|
Role: "tool",
|
|
ToolCallID: "call-1",
|
|
Content: conversation.NewTextContent("2"),
|
|
},
|
|
},
|
|
{
|
|
Message: conversation.ModelMessage{
|
|
Role: "assistant",
|
|
Content: conversation.NewTextContent("done"),
|
|
},
|
|
UsageOutputTokens: intPtr(60),
|
|
},
|
|
}
|
|
|
|
// Budget 70: assistant(60) fits, adding assistant-tool-call(50) exceeds →
|
|
// cutoff lands on the tool message which must be skipped.
|
|
trimmed := trimMessagesByTokens(nil, messages, 70)
|
|
if len(trimmed) == 0 {
|
|
t.Fatal("expected non-empty trimmed messages")
|
|
}
|
|
if trimmed[0].Role == "tool" {
|
|
t.Fatal("expected first trimmed message not to be tool")
|
|
}
|
|
}
|
|
|
|
func TestTrimMessagesByTokens_KeepsToolWhenPaired(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
messages := []messageWithUsage{
|
|
{
|
|
Message: conversation.ModelMessage{
|
|
Role: "assistant",
|
|
ToolCalls: []conversation.ToolCall{
|
|
{
|
|
ID: "call-1",
|
|
Type: "function",
|
|
Function: conversation.ToolCallFunction{
|
|
Name: "calc",
|
|
Arguments: `{"x":1}`,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
UsageOutputTokens: intPtr(10),
|
|
},
|
|
{
|
|
Message: conversation.ModelMessage{
|
|
Role: "tool",
|
|
ToolCallID: "call-1",
|
|
Content: conversation.NewTextContent("2"),
|
|
},
|
|
},
|
|
}
|
|
|
|
trimmed := trimMessagesByTokens(nil, messages, 100)
|
|
if len(trimmed) != 2 {
|
|
t.Fatalf("expected 2 messages, got %d", len(trimmed))
|
|
}
|
|
if trimmed[0].Role != "assistant" || trimmed[1].Role != "tool" {
|
|
t.Fatalf("unexpected role order: %q -> %q", trimmed[0].Role, trimmed[1].Role)
|
|
}
|
|
}
|
|
|
|
func TestTrimMessagesByTokens_NoUsage_KeepsAll(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
messages := []messageWithUsage{
|
|
{Message: conversation.ModelMessage{Role: "user", Content: conversation.NewTextContent("hello")}},
|
|
{Message: conversation.ModelMessage{Role: "assistant", Content: conversation.NewTextContent("hi")}},
|
|
}
|
|
|
|
trimmed := trimMessagesByTokens(nil, messages, 10)
|
|
if len(trimmed) != 2 {
|
|
t.Fatalf("messages without outputTokens should all be kept, got %d", len(trimmed))
|
|
}
|
|
}
|
|
|
|
func TestTrimMessagesByTokens_ZeroMeansNoLimit(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
messages := []messageWithUsage{
|
|
{Message: conversation.ModelMessage{Role: "user", Content: conversation.NewTextContent("hello")}, UsageOutputTokens: intPtr(10000)},
|
|
{Message: conversation.ModelMessage{Role: "assistant", Content: conversation.NewTextContent("world")}, UsageOutputTokens: intPtr(10000)},
|
|
}
|
|
|
|
// maxTokens = 0 means "no limit configured", should keep all messages.
|
|
trimmed := trimMessagesByTokens(nil, messages, 0)
|
|
if len(trimmed) != 2 {
|
|
t.Fatalf("maxTokens=0 should keep all messages, got %d", len(trimmed))
|
|
}
|
|
}
|
|
|
|
func TestTrimMessagesByTokens_SmallBudgetTrims(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
messages := []messageWithUsage{
|
|
{Message: conversation.ModelMessage{Role: "user", Content: conversation.NewTextContent("old message")}, UsageOutputTokens: intPtr(100)},
|
|
{Message: conversation.ModelMessage{Role: "assistant", Content: conversation.NewTextContent("old reply")}, UsageOutputTokens: intPtr(200)},
|
|
{Message: conversation.ModelMessage{Role: "user", Content: conversation.NewTextContent("new message")}, UsageOutputTokens: intPtr(50)},
|
|
{Message: conversation.ModelMessage{Role: "assistant", Content: conversation.NewTextContent("new reply")}, UsageOutputTokens: intPtr(60)},
|
|
}
|
|
|
|
// Budget of 1: should trim aggressively, NOT return all messages.
|
|
trimmed := trimMessagesByTokens(nil, messages, 1)
|
|
if len(trimmed) >= len(messages) {
|
|
t.Fatalf("maxTokens=1 should trim history, but got %d messages (same as input)", len(trimmed))
|
|
}
|
|
}
|
|
|
|
func TestTrimMessagesByTokens_EstimatesFallback(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Long user message without usage data — should be estimated.
|
|
longText := make([]byte, 400)
|
|
for i := range longText {
|
|
longText[i] = 'x'
|
|
}
|
|
messages := []messageWithUsage{
|
|
{Message: conversation.ModelMessage{Role: "user", Content: conversation.NewTextContent(string(longText))}},
|
|
{Message: conversation.ModelMessage{Role: "assistant", Content: conversation.NewTextContent("ok")}, UsageOutputTokens: intPtr(10)},
|
|
}
|
|
|
|
// Budget of 50: user message is ~100 estimated tokens (400/4), should be trimmed.
|
|
trimmed := trimMessagesByTokens(nil, messages, 50)
|
|
if len(trimmed) == 2 {
|
|
t.Fatalf("expected long user message without usage to be trimmed via estimation, got %d", len(trimmed))
|
|
}
|
|
}
|