init
This commit is contained in:
@@ -0,0 +1,351 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
import { markPostCompaction } from 'src/bootstrap/state.js'
|
||||
import { getSdkBetas } from '../../bootstrap/state.js'
|
||||
import type { QuerySource } from '../../constants/querySource.js'
|
||||
import type { ToolUseContext } from '../../Tool.js'
|
||||
import type { Message } from '../../types/message.js'
|
||||
import { getGlobalConfig } from '../../utils/config.js'
|
||||
import { getContextWindowForModel } from '../../utils/context.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { isEnvTruthy } from '../../utils/envUtils.js'
|
||||
import { hasExactErrorMessage } from '../../utils/errors.js'
|
||||
import type { CacheSafeParams } from '../../utils/forkedAgent.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { tokenCountWithEstimation } from '../../utils/tokens.js'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
|
||||
import { getMaxOutputTokensForModel } from '../api/claude.js'
|
||||
import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
|
||||
import { setLastSummarizedMessageId } from '../SessionMemory/sessionMemoryUtils.js'
|
||||
import {
|
||||
type CompactionResult,
|
||||
compactConversation,
|
||||
ERROR_MESSAGE_USER_ABORT,
|
||||
type RecompactionInfo,
|
||||
} from './compact.js'
|
||||
import { runPostCompactCleanup } from './postCompactCleanup.js'
|
||||
import { trySessionMemoryCompaction } from './sessionMemoryCompact.js'
|
||||
|
||||
// Reserve this many tokens for output during compaction
|
||||
// Based on p99.99 of compact summary output being 17,387 tokens.
|
||||
const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000
|
||||
|
||||
// Returns the context window size minus the max output tokens for the model
|
||||
export function getEffectiveContextWindowSize(model: string): number {
|
||||
const reservedTokensForSummary = Math.min(
|
||||
getMaxOutputTokensForModel(model),
|
||||
MAX_OUTPUT_TOKENS_FOR_SUMMARY,
|
||||
)
|
||||
let contextWindow = getContextWindowForModel(model, getSdkBetas())
|
||||
|
||||
const autoCompactWindow = process.env.CLAUDE_CODE_AUTO_COMPACT_WINDOW
|
||||
if (autoCompactWindow) {
|
||||
const parsed = parseInt(autoCompactWindow, 10)
|
||||
if (!isNaN(parsed) && parsed > 0) {
|
||||
contextWindow = Math.min(contextWindow, parsed)
|
||||
}
|
||||
}
|
||||
|
||||
return contextWindow - reservedTokensForSummary
|
||||
}
|
||||
|
||||
export type AutoCompactTrackingState = {
|
||||
compacted: boolean
|
||||
turnCounter: number
|
||||
// Unique ID per turn
|
||||
turnId: string
|
||||
// Consecutive autocompact failures. Reset on success.
|
||||
// Used as a circuit breaker to stop retrying when the context is
|
||||
// irrecoverably over the limit (e.g., prompt_too_long).
|
||||
consecutiveFailures?: number
|
||||
}
|
||||
|
||||
export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
|
||||
export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
|
||||
export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
|
||||
export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
|
||||
|
||||
// Stop trying autocompact after this many consecutive failures.
|
||||
// BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
|
||||
// in a single session, wasting ~250K API calls/day globally.
|
||||
const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3
|
||||
|
||||
export function getAutoCompactThreshold(model: string): number {
|
||||
const effectiveContextWindow = getEffectiveContextWindowSize(model)
|
||||
|
||||
const autocompactThreshold =
|
||||
effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
|
||||
|
||||
// Override for easier testing of autocompact
|
||||
const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE
|
||||
if (envPercent) {
|
||||
const parsed = parseFloat(envPercent)
|
||||
if (!isNaN(parsed) && parsed > 0 && parsed <= 100) {
|
||||
const percentageThreshold = Math.floor(
|
||||
effectiveContextWindow * (parsed / 100),
|
||||
)
|
||||
return Math.min(percentageThreshold, autocompactThreshold)
|
||||
}
|
||||
}
|
||||
|
||||
return autocompactThreshold
|
||||
}
|
||||
|
||||
export function calculateTokenWarningState(
|
||||
tokenUsage: number,
|
||||
model: string,
|
||||
): {
|
||||
percentLeft: number
|
||||
isAboveWarningThreshold: boolean
|
||||
isAboveErrorThreshold: boolean
|
||||
isAboveAutoCompactThreshold: boolean
|
||||
isAtBlockingLimit: boolean
|
||||
} {
|
||||
const autoCompactThreshold = getAutoCompactThreshold(model)
|
||||
const threshold = isAutoCompactEnabled()
|
||||
? autoCompactThreshold
|
||||
: getEffectiveContextWindowSize(model)
|
||||
|
||||
const percentLeft = Math.max(
|
||||
0,
|
||||
Math.round(((threshold - tokenUsage) / threshold) * 100),
|
||||
)
|
||||
|
||||
const warningThreshold = threshold - WARNING_THRESHOLD_BUFFER_TOKENS
|
||||
const errorThreshold = threshold - ERROR_THRESHOLD_BUFFER_TOKENS
|
||||
|
||||
const isAboveWarningThreshold = tokenUsage >= warningThreshold
|
||||
const isAboveErrorThreshold = tokenUsage >= errorThreshold
|
||||
|
||||
const isAboveAutoCompactThreshold =
|
||||
isAutoCompactEnabled() && tokenUsage >= autoCompactThreshold
|
||||
|
||||
const actualContextWindow = getEffectiveContextWindowSize(model)
|
||||
const defaultBlockingLimit =
|
||||
actualContextWindow - MANUAL_COMPACT_BUFFER_TOKENS
|
||||
|
||||
// Allow override for testing
|
||||
const blockingLimitOverride = process.env.CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE
|
||||
const parsedOverride = blockingLimitOverride
|
||||
? parseInt(blockingLimitOverride, 10)
|
||||
: NaN
|
||||
const blockingLimit =
|
||||
!isNaN(parsedOverride) && parsedOverride > 0
|
||||
? parsedOverride
|
||||
: defaultBlockingLimit
|
||||
|
||||
const isAtBlockingLimit = tokenUsage >= blockingLimit
|
||||
|
||||
return {
|
||||
percentLeft,
|
||||
isAboveWarningThreshold,
|
||||
isAboveErrorThreshold,
|
||||
isAboveAutoCompactThreshold,
|
||||
isAtBlockingLimit,
|
||||
}
|
||||
}
|
||||
|
||||
export function isAutoCompactEnabled(): boolean {
|
||||
if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
|
||||
return false
|
||||
}
|
||||
// Allow disabling just auto-compact (keeps manual /compact working)
|
||||
if (isEnvTruthy(process.env.DISABLE_AUTO_COMPACT)) {
|
||||
return false
|
||||
}
|
||||
// Check if user has disabled auto-compact in their settings
|
||||
const userConfig = getGlobalConfig()
|
||||
return userConfig.autoCompactEnabled
|
||||
}
|
||||
|
||||
export async function shouldAutoCompact(
|
||||
messages: Message[],
|
||||
model: string,
|
||||
querySource?: QuerySource,
|
||||
// Snip removes messages but the surviving assistant's usage still reflects
|
||||
// pre-snip context, so tokenCountWithEstimation can't see the savings.
|
||||
// Subtract the rough-delta that snip already computed.
|
||||
snipTokensFreed = 0,
|
||||
): Promise<boolean> {
|
||||
// Recursion guards. session_memory and compact are forked agents that
|
||||
// would deadlock.
|
||||
if (querySource === 'session_memory' || querySource === 'compact') {
|
||||
return false
|
||||
}
|
||||
// marble_origami is the ctx-agent — if ITS context blows up and
|
||||
// autocompact fires, runPostCompactCleanup calls resetContextCollapse()
|
||||
// which destroys the MAIN thread's committed log (module-level state
|
||||
// shared across forks). Inside feature() so the string DCEs from
|
||||
// external builds (it's in excluded-strings.txt).
|
||||
if (feature('CONTEXT_COLLAPSE')) {
|
||||
if (querySource === 'marble_origami') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (!isAutoCompactEnabled()) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Reactive-only mode: suppress proactive autocompact, let reactive compact
|
||||
// catch the API's prompt-too-long. feature() wrapper keeps the flag string
|
||||
// out of external builds (REACTIVE_COMPACT is ant-only).
|
||||
// Note: returning false here also means autoCompactIfNeeded never reaches
|
||||
// trySessionMemoryCompaction in the query loop — the /compact call site
|
||||
// still tries session memory first. Revisit if reactive-only graduates.
|
||||
if (feature('REACTIVE_COMPACT')) {
|
||||
if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Context-collapse mode: same suppression. Collapse IS the context
|
||||
// management system when it's on — the 90% commit / 95% blocking-spawn
|
||||
// flow owns the headroom problem. Autocompact firing at effective-13k
|
||||
// (~93% of effective) sits right between collapse's commit-start (90%)
|
||||
// and blocking (95%), so it would race collapse and usually win, nuking
|
||||
// granular context that collapse was about to save. Gating here rather
|
||||
// than in isAutoCompactEnabled() keeps reactiveCompact alive as the 413
|
||||
// fallback (it consults isAutoCompactEnabled directly) and leaves
|
||||
// sessionMemory + manual /compact working.
|
||||
//
|
||||
// Consult isContextCollapseEnabled (not the raw gate) so the
|
||||
// CLAUDE_CONTEXT_COLLAPSE env override is honored here too. require()
|
||||
// inside the block breaks the init-time cycle (this file exports
|
||||
// getEffectiveContextWindowSize which collapse's index imports).
|
||||
if (feature('CONTEXT_COLLAPSE')) {
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
const { isContextCollapseEnabled } =
|
||||
require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js')
|
||||
/* eslint-enable @typescript-eslint/no-require-imports */
|
||||
if (isContextCollapseEnabled()) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed
|
||||
const threshold = getAutoCompactThreshold(model)
|
||||
const effectiveWindow = getEffectiveContextWindowSize(model)
|
||||
|
||||
logForDebugging(
|
||||
`autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`,
|
||||
)
|
||||
|
||||
const { isAboveAutoCompactThreshold } = calculateTokenWarningState(
|
||||
tokenCount,
|
||||
model,
|
||||
)
|
||||
|
||||
return isAboveAutoCompactThreshold
|
||||
}
|
||||
|
||||
export async function autoCompactIfNeeded(
|
||||
messages: Message[],
|
||||
toolUseContext: ToolUseContext,
|
||||
cacheSafeParams: CacheSafeParams,
|
||||
querySource?: QuerySource,
|
||||
tracking?: AutoCompactTrackingState,
|
||||
snipTokensFreed?: number,
|
||||
): Promise<{
|
||||
wasCompacted: boolean
|
||||
compactionResult?: CompactionResult
|
||||
consecutiveFailures?: number
|
||||
}> {
|
||||
if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
|
||||
return { wasCompacted: false }
|
||||
}
|
||||
|
||||
// Circuit breaker: stop retrying after N consecutive failures.
|
||||
// Without this, sessions where context is irrecoverably over the limit
|
||||
// hammer the API with doomed compaction attempts on every turn.
|
||||
if (
|
||||
tracking?.consecutiveFailures !== undefined &&
|
||||
tracking.consecutiveFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES
|
||||
) {
|
||||
return { wasCompacted: false }
|
||||
}
|
||||
|
||||
const model = toolUseContext.options.mainLoopModel
|
||||
const shouldCompact = await shouldAutoCompact(
|
||||
messages,
|
||||
model,
|
||||
querySource,
|
||||
snipTokensFreed,
|
||||
)
|
||||
|
||||
if (!shouldCompact) {
|
||||
return { wasCompacted: false }
|
||||
}
|
||||
|
||||
const recompactionInfo: RecompactionInfo = {
|
||||
isRecompactionInChain: tracking?.compacted === true,
|
||||
turnsSincePreviousCompact: tracking?.turnCounter ?? -1,
|
||||
previousCompactTurnId: tracking?.turnId,
|
||||
autoCompactThreshold: getAutoCompactThreshold(model),
|
||||
querySource,
|
||||
}
|
||||
|
||||
// EXPERIMENT: Try session memory compaction first
|
||||
const sessionMemoryResult = await trySessionMemoryCompaction(
|
||||
messages,
|
||||
toolUseContext.agentId,
|
||||
recompactionInfo.autoCompactThreshold,
|
||||
)
|
||||
if (sessionMemoryResult) {
|
||||
// Reset lastSummarizedMessageId since session memory compaction prunes messages
|
||||
// and the old message UUID will no longer exist after the REPL replaces messages
|
||||
setLastSummarizedMessageId(undefined)
|
||||
runPostCompactCleanup(querySource)
|
||||
// Reset cache read baseline so the post-compact drop isn't flagged as a
|
||||
// break. compactConversation does this internally; SM-compact doesn't.
|
||||
// BQ 2026-03-01: missing this made 20% of tengu_prompt_cache_break events
|
||||
// false positives (systemPromptChanged=true, timeSinceLastAssistantMsg=-1).
|
||||
if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
|
||||
notifyCompaction(querySource ?? 'compact', toolUseContext.agentId)
|
||||
}
|
||||
markPostCompaction()
|
||||
return {
|
||||
wasCompacted: true,
|
||||
compactionResult: sessionMemoryResult,
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const compactionResult = await compactConversation(
|
||||
messages,
|
||||
toolUseContext,
|
||||
cacheSafeParams,
|
||||
true, // Suppress user questions for autocompact
|
||||
undefined, // No custom instructions for autocompact
|
||||
true, // isAutoCompact
|
||||
recompactionInfo,
|
||||
)
|
||||
|
||||
// Reset lastSummarizedMessageId since legacy compaction replaces all messages
|
||||
// and the old message UUID will no longer exist in the new messages array
|
||||
setLastSummarizedMessageId(undefined)
|
||||
runPostCompactCleanup(querySource)
|
||||
|
||||
return {
|
||||
wasCompacted: true,
|
||||
compactionResult,
|
||||
// Reset failure count on success
|
||||
consecutiveFailures: 0,
|
||||
}
|
||||
} catch (error) {
|
||||
if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) {
|
||||
logError(error)
|
||||
}
|
||||
// Increment consecutive failure count for circuit breaker.
|
||||
// The caller threads this through autoCompactTracking so the
|
||||
// next query loop iteration can skip futile retry attempts.
|
||||
const prevFailures = tracking?.consecutiveFailures ?? 0
|
||||
const nextFailures = prevFailures + 1
|
||||
if (nextFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) {
|
||||
logForDebugging(
|
||||
`autocompact: circuit breaker tripped after ${nextFailures} consecutive failures — skipping future attempts this session`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
}
|
||||
return { wasCompacted: false, consecutiveFailures: nextFailures }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user