init

2026-04-25 06:45:36 +09:00
commit e77acee8ba
1903 changed files with 513282 additions and 0 deletions
@@ -0,0 +1,301 @@
+/**
+ * Query profiling utility for measuring and reporting time spent in the query
+ * pipeline from user input to first token arrival. Enable by setting CLAUDE_CODE_PROFILE_QUERY=1
+ *
+ * Uses Node.js built-in performance hooks API for standard timing measurement.
+ * Tracks each query session with detailed checkpoints for identifying bottlenecks.
+ *
+ * Checkpoints tracked (in order):
+ * - query_user_input_received: Start of profiling
+ * - query_context_loading_start/end: Loading system prompts and contexts
+ * - query_query_start: Entry to query call from REPL
+ * - query_fn_entry: Entry to query() function
+ * - query_microcompact_start/end: Microcompaction of messages
+ * - query_autocompact_start/end: Autocompaction check
+ * - query_setup_start/end: StreamingToolExecutor and model setup
+ * - query_api_loop_start: Start of API retry loop
+ * - query_api_streaming_start: Start of streaming API call
+ * - query_tool_schema_build_start/end: Building tool schemas
+ * - query_message_normalization_start/end: Normalizing messages
+ * - query_client_creation_start/end: Creating Anthropic client
+ * - query_api_request_sent: HTTP request dispatched (before await, inside retry body)
+ * - query_response_headers_received: .withResponse() resolved (headers arrived)
+ * - query_first_chunk_received: First streaming chunk received (TTFT)
+ * - query_api_streaming_end: Streaming complete
+ * - query_tool_execution_start/end: Tool execution
+ * - query_recursive_call: Before recursive query call
+ * - query_end: End of query
+ */
+
+import { logForDebugging } from './debug.js'
+import { isEnvTruthy } from './envUtils.js'
+import { formatMs, formatTimelineLine, getPerformance } from './profilerBase.js'
+
+// Module-level state - initialized once when the module loads
+// eslint-disable-next-line custom-rules/no-process-env-top-level
+const ENABLED = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_QUERY)
+
+// Track memory snapshots separately (perf_hooks doesn't track memory)
+const memorySnapshots = new Map<string, NodeJS.MemoryUsage>()
+
+// Track query count for reporting
+let queryCount = 0
+
+// Track first token received time separately for summary
+let firstTokenTime: number | null = null
+
+/**
+ * Start profiling a new query session
+ */
+export function startQueryProfile(): void {
+  if (!ENABLED) return
+
+  const perf = getPerformance()
+
+  // Clear previous marks and memory snapshots
+  perf.clearMarks()
+  memorySnapshots.clear()
+  firstTokenTime = null
+
+  queryCount++
+
+  // Record the start checkpoint
+  queryCheckpoint('query_user_input_received')
+}
+
+/**
+ * Record a checkpoint with the given name
+ */
+export function queryCheckpoint(name: string): void {
+  if (!ENABLED) return
+
+  const perf = getPerformance()
+  perf.mark(name)
+  memorySnapshots.set(name, process.memoryUsage())
+
+  // Track first token specially
+  if (name === 'query_first_chunk_received' && firstTokenTime === null) {
+    const marks = perf.getEntriesByType('mark')
+    if (marks.length > 0) {
+      const lastMark = marks[marks.length - 1]
+      firstTokenTime = lastMark?.startTime ?? 0
+    }
+  }
+}
+
+/**
+ * End the current query profiling session
+ */
+export function endQueryProfile(): void {
+  if (!ENABLED) return
+
+  queryCheckpoint('query_profile_end')
+}
+
+/**
+ * Identify slow operations (> 100ms delta)
+ */
+function getSlowWarning(deltaMs: number, name: string): string {
+  // Don't flag the first checkpoint as slow - it measures time from process start,
+  // not actual processing overhead
+  if (name === 'query_user_input_received') {
+    return ''
+  }
+
+  if (deltaMs > 1000) {
+    return ` ⚠️  VERY SLOW`
+  }
+  if (deltaMs > 100) {
+    return ` ⚠️  SLOW`
+  }
+
+  // Specific warnings for known bottlenecks
+  if (name.includes('git_status') && deltaMs > 50) {
+    return ' ⚠️  git status'
+  }
+  if (name.includes('tool_schema') && deltaMs > 50) {
+    return ' ⚠️  tool schemas'
+  }
+  if (name.includes('client_creation') && deltaMs > 50) {
+    return ' ⚠️  client creation'
+  }
+
+  return ''
+}
+
+/**
+ * Get a formatted report of all checkpoints for the current/last query
+ */
+function getQueryProfileReport(): string {
+  if (!ENABLED) {
+    return 'Query profiling not enabled (set CLAUDE_CODE_PROFILE_QUERY=1)'
+  }
+
+  const perf = getPerformance()
+  const marks = perf.getEntriesByType('mark')
+  if (marks.length === 0) {
+    return 'No query profiling checkpoints recorded'
+  }
+
+  const lines: string[] = []
+  lines.push('='.repeat(80))
+  lines.push(`QUERY PROFILING REPORT - Query #${queryCount}`)
+  lines.push('='.repeat(80))
+  lines.push('')
+
+  // Use first mark as baseline (query start time) to show relative times
+  const baselineTime = marks[0]?.startTime ?? 0
+  let prevTime = baselineTime
+  let apiRequestSentTime = 0
+  let firstChunkTime = 0
+
+  for (const mark of marks) {
+    const relativeTime = mark.startTime - baselineTime
+    const deltaMs = mark.startTime - prevTime
+    lines.push(
+      formatTimelineLine(
+        relativeTime,
+        deltaMs,
+        mark.name,
+        memorySnapshots.get(mark.name),
+        10,
+        9,
+        getSlowWarning(deltaMs, mark.name),
+      ),
+    )
+
+    // Track key milestones for summary (use relative times)
+    if (mark.name === 'query_api_request_sent') {
+      apiRequestSentTime = relativeTime
+    }
+    if (mark.name === 'query_first_chunk_received') {
+      firstChunkTime = relativeTime
+    }
+
+    prevTime = mark.startTime
+  }
+
+  // Calculate summary statistics (relative to baseline)
+  const lastMark = marks[marks.length - 1]
+  const totalTime = lastMark ? lastMark.startTime - baselineTime : 0
+
+  lines.push('')
+  lines.push('-'.repeat(80))
+
+  if (firstChunkTime > 0) {
+    const preRequestOverhead = apiRequestSentTime
+    const networkLatency = firstChunkTime - apiRequestSentTime
+    const preRequestPercent = (
+      (preRequestOverhead / firstChunkTime) *
+      100
+    ).toFixed(1)
+    const networkPercent = ((networkLatency / firstChunkTime) * 100).toFixed(1)
+
+    lines.push(`Total TTFT: ${formatMs(firstChunkTime)}ms`)
+    lines.push(
+      `  - Pre-request overhead: ${formatMs(preRequestOverhead)}ms (${preRequestPercent}%)`,
+    )
+    lines.push(
+      `  - Network latency: ${formatMs(networkLatency)}ms (${networkPercent}%)`,
+    )
+  } else {
+    lines.push(`Total time: ${formatMs(totalTime)}ms`)
+  }
+
+  // Add phase summary
+  lines.push(getPhaseSummary(marks, baselineTime))
+
+  lines.push('='.repeat(80))
+
+  return lines.join('\n')
+}
+
+/**
+ * Get phase-based summary showing time spent in each major phase
+ */
+function getPhaseSummary(
+  marks: Array<{ name: string; startTime: number }>,
+  baselineTime: number,
+): string {
+  const phases: Array<{ name: string; start: string; end: string }> = [
+    {
+      name: 'Context loading',
+      start: 'query_context_loading_start',
+      end: 'query_context_loading_end',
+    },
+    {
+      name: 'Microcompact',
+      start: 'query_microcompact_start',
+      end: 'query_microcompact_end',
+    },
+    {
+      name: 'Autocompact',
+      start: 'query_autocompact_start',
+      end: 'query_autocompact_end',
+    },
+    { name: 'Query setup', start: 'query_setup_start', end: 'query_setup_end' },
+    {
+      name: 'Tool schemas',
+      start: 'query_tool_schema_build_start',
+      end: 'query_tool_schema_build_end',
+    },
+    {
+      name: 'Message normalization',
+      start: 'query_message_normalization_start',
+      end: 'query_message_normalization_end',
+    },
+    {
+      name: 'Client creation',
+      start: 'query_client_creation_start',
+      end: 'query_client_creation_end',
+    },
+    {
+      name: 'Network TTFB',
+      start: 'query_api_request_sent',
+      end: 'query_first_chunk_received',
+    },
+    {
+      name: 'Tool execution',
+      start: 'query_tool_execution_start',
+      end: 'query_tool_execution_end',
+    },
+  ]
+
+  const markMap = new Map(marks.map(m => [m.name, m.startTime - baselineTime]))
+
+  const lines: string[] = []
+  lines.push('')
+  lines.push('PHASE BREAKDOWN:')
+
+  for (const phase of phases) {
+    const startTime = markMap.get(phase.start)
+    const endTime = markMap.get(phase.end)
+
+    if (startTime !== undefined && endTime !== undefined) {
+      const duration = endTime - startTime
+      const bar = '█'.repeat(Math.min(Math.ceil(duration / 10), 50)) // 1 block per 10ms, max 50
+      lines.push(
+        `  ${phase.name.padEnd(22)} ${formatMs(duration).padStart(10)}ms ${bar}`,
+      )
+    }
+  }
+
+  // Calculate pre-API overhead (everything before api_request_sent)
+  const apiRequestSent = markMap.get('query_api_request_sent')
+  if (apiRequestSent !== undefined) {
+    lines.push('')
+    lines.push(
+      `  ${'Total pre-API overhead'.padEnd(22)} ${formatMs(apiRequestSent).padStart(10)}ms`,
+    )
+  }
+
+  return lines.join('\n')
+}
+
+/**
+ * Log the query profile report to debug output
+ */
+export function logQueryProfileReport(): void {
+  if (!ENABLED) return
+  logForDebugging(getQueryProfileReport())
+}