init

2026-04-25 06:45:36 +09:00
commit e77acee8ba
1903 changed files with 513282 additions and 0 deletions
@@ -0,0 +1,222 @@
+import type Anthropic from '@anthropic-ai/sdk'
+import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages.js'
+import {
+  getLastApiCompletionTimestamp,
+  setLastApiCompletionTimestamp,
+} from '../bootstrap/state.js'
+import { STRUCTURED_OUTPUTS_BETA_HEADER } from '../constants/betas.js'
+import type { QuerySource } from '../constants/querySource.js'
+import {
+  getAttributionHeader,
+  getCLISyspromptPrefix,
+} from '../constants/system.js'
+import { logEvent } from '../services/analytics/index.js'
+import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
+import { getAPIMetadata } from '../services/api/claude.js'
+import { getAnthropicClient } from '../services/api/client.js'
+import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
+import { computeFingerprint } from './fingerprint.js'
+import { normalizeModelStringForAPI } from './model/model.js'
+
+type MessageParam = Anthropic.MessageParam
+type TextBlockParam = Anthropic.TextBlockParam
+type Tool = Anthropic.Tool
+type ToolChoice = Anthropic.ToolChoice
+type BetaMessage = Anthropic.Beta.Messages.BetaMessage
+type BetaJSONOutputFormat = Anthropic.Beta.Messages.BetaJSONOutputFormat
+type BetaThinkingConfigParam = Anthropic.Beta.Messages.BetaThinkingConfigParam
+
+export type SideQueryOptions = {
+  /** Model to use for the query */
+  model: string
+  /**
+   * System prompt - string or array of text blocks (will be prefixed with CLI attribution).
+   *
+   * The attribution header is always placed in its own TextBlockParam block to ensure
+   * server-side parsing correctly extracts the cc_entrypoint value without including
+   * system prompt content.
+   */
+  system?: string | TextBlockParam[]
+  /** Messages to send (supports cache_control on content blocks) */
+  messages: MessageParam[]
+  /** Optional tools (supports both standard Tool[] and BetaToolUnion[] for custom tool types) */
+  tools?: Tool[] | BetaToolUnion[]
+  /** Optional tool choice (use { type: 'tool', name: 'x' } for forced output) */
+  tool_choice?: ToolChoice
+  /** Optional JSON output format for structured responses */
+  output_format?: BetaJSONOutputFormat
+  /** Max tokens (default: 1024) */
+  max_tokens?: number
+  /** Max retries (default: 2) */
+  maxRetries?: number
+  /** Abort signal */
+  signal?: AbortSignal
+  /** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
+  skipSystemPromptPrefix?: boolean
+  /** Temperature override */
+  temperature?: number
+  /** Thinking budget (enables thinking), or `false` to send `{ type: 'disabled' }`. */
+  thinking?: number | false
+  /** Stop sequences — generation stops when any of these strings is emitted */
+  stop_sequences?: string[]
+  /** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
+  querySource: QuerySource
+}
+
+/**
+ * Extract text from first user message for fingerprint computation.
+ */
+function extractFirstUserMessageText(messages: MessageParam[]): string {
+  const firstUserMessage = messages.find(m => m.role === 'user')
+  if (!firstUserMessage) return ''
+
+  const content = firstUserMessage.content
+  if (typeof content === 'string') return content
+
+  // Array of content blocks - find first text block
+  const textBlock = content.find(block => block.type === 'text')
+  return textBlock?.type === 'text' ? textBlock.text : ''
+}
+
+/**
+ * Lightweight API wrapper for "side queries" outside the main conversation loop.
+ *
+ * Use this instead of direct client.beta.messages.create() calls to ensure
+ * proper OAuth token validation with fingerprint attribution headers.
+ *
+ * This handles:
+ * - Fingerprint computation for OAuth validation
+ * - Attribution header injection
+ * - CLI system prompt prefix
+ * - Proper betas for the model
+ * - API metadata
+ * - Model string normalization (strips [1m] suffix for API)
+ *
+ * @example
+ * // Permission explainer
+ * await sideQuery({ querySource: 'permission_explainer', model, system: SYSTEM_PROMPT, messages, tools, tool_choice })
+ *
+ * @example
+ * // Session search
+ * await sideQuery({ querySource: 'session_search', model, system: SEARCH_PROMPT, messages })
+ *
+ * @example
+ * // Model validation
+ * await sideQuery({ querySource: 'model_validation', model, max_tokens: 1, messages: [{ role: 'user', content: 'Hi' }] })
+ */
+export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
+  const {
+    model,
+    system,
+    messages,
+    tools,
+    tool_choice,
+    output_format,
+    max_tokens = 1024,
+    maxRetries = 2,
+    signal,
+    skipSystemPromptPrefix,
+    temperature,
+    thinking,
+    stop_sequences,
+  } = opts
+
+  const client = await getAnthropicClient({
+    maxRetries,
+    model,
+    source: 'side_query',
+  })
+  const betas = [...getModelBetas(model)]
+  // Add structured-outputs beta if using output_format and provider supports it
+  if (
+    output_format &&
+    modelSupportsStructuredOutputs(model) &&
+    !betas.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
+  ) {
+    betas.push(STRUCTURED_OUTPUTS_BETA_HEADER)
+  }
+
+  // Extract first user message text for fingerprint
+  const messageText = extractFirstUserMessageText(messages)
+
+  // Compute fingerprint for OAuth attribution
+  const fingerprint = computeFingerprint(messageText, MACRO.VERSION)
+  const attributionHeader = getAttributionHeader(fingerprint)
+
+  // Build system as array to keep attribution header in its own block
+  // (prevents server-side parsing from including system content in cc_entrypoint)
+  const systemBlocks: TextBlockParam[] = [
+    attributionHeader ? { type: 'text', text: attributionHeader } : null,
+    // Skip CLI system prompt prefix for internal classifiers that provide their own prompt
+    ...(skipSystemPromptPrefix
+      ? []
+      : [
+          {
+            type: 'text' as const,
+            text: getCLISyspromptPrefix({
+              isNonInteractive: false,
+              hasAppendSystemPrompt: false,
+            }),
+          },
+        ]),
+    ...(Array.isArray(system)
+      ? system
+      : system
+        ? [{ type: 'text' as const, text: system }]
+        : []),
+  ].filter((block): block is TextBlockParam => block !== null)
+
+  let thinkingConfig: BetaThinkingConfigParam | undefined
+  if (thinking === false) {
+    thinkingConfig = { type: 'disabled' }
+  } else if (thinking !== undefined) {
+    thinkingConfig = {
+      type: 'enabled',
+      budget_tokens: Math.min(thinking, max_tokens - 1),
+    }
+  }
+
+  const normalizedModel = normalizeModelStringForAPI(model)
+  const start = Date.now()
+  // biome-ignore lint/plugin: this IS the wrapper that handles OAuth attribution
+  const response = await client.beta.messages.create(
+    {
+      model: normalizedModel,
+      max_tokens,
+      system: systemBlocks,
+      messages,
+      ...(tools && { tools }),
+      ...(tool_choice && { tool_choice }),
+      ...(output_format && { output_config: { format: output_format } }),
+      ...(temperature !== undefined && { temperature }),
+      ...(stop_sequences && { stop_sequences }),
+      ...(thinkingConfig && { thinking: thinkingConfig }),
+      ...(betas.length > 0 && { betas }),
+      metadata: getAPIMetadata(),
+    },
+    { signal },
+  )
+
+  const requestId =
+    (response as { _request_id?: string | null })._request_id ?? undefined
+  const now = Date.now()
+  const lastCompletion = getLastApiCompletionTimestamp()
+  logEvent('tengu_api_success', {
+    requestId:
+      requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    querySource:
+      opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    model:
+      normalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    inputTokens: response.usage.input_tokens,
+    outputTokens: response.usage.output_tokens,
+    cachedInputTokens: response.usage.cache_read_input_tokens ?? 0,
+    uncachedInputTokens: response.usage.cache_creation_input_tokens ?? 0,
+    durationMsIncludingRetries: now - start,
+    timeSinceLastApiCallMs:
+      lastCompletion !== null ? now - lastCompletion : undefined,
+  })
+  setLastApiCompletionTimestamp(now)
+
+  return response
+}