init
This commit is contained in:
@@ -0,0 +1,307 @@
|
||||
import type { LogOption, SerializedMessage } from '../types/logs.js'
|
||||
import { count } from './array.js'
|
||||
import { logForDebugging } from './debug.js'
|
||||
import { getLogDisplayTitle, logError } from './log.js'
|
||||
import { getSmallFastModel } from './model/model.js'
|
||||
import { isLiteLog, loadFullLog } from './sessionStorage.js'
|
||||
import { sideQuery } from './sideQuery.js'
|
||||
import { jsonParse } from './slowOperations.js'
|
||||
|
||||
// Limits for transcript extraction
|
||||
const MAX_TRANSCRIPT_CHARS = 2000 // Max chars of transcript per session
|
||||
const MAX_MESSAGES_TO_SCAN = 100 // Max messages to scan from start/end
|
||||
const MAX_SESSIONS_TO_SEARCH = 100 // Max sessions to send to the API
|
||||
|
||||
const SESSION_SEARCH_SYSTEM_PROMPT = `Your goal is to find relevant sessions based on a user's search query.
|
||||
|
||||
You will be given a list of sessions with their metadata and a search query. Identify which sessions are most relevant to the query.
|
||||
|
||||
Each session may include:
|
||||
- Title (display name or custom title)
|
||||
- Tag (user-assigned category, shown as [tag: name] - users tag sessions with /tag command to categorize them)
|
||||
- Branch (git branch name, shown as [branch: name])
|
||||
- Summary (AI-generated summary)
|
||||
- First message (beginning of the conversation)
|
||||
- Transcript (excerpt of conversation content)
|
||||
|
||||
IMPORTANT: Tags are user-assigned labels that indicate the session's topic or category. If the query matches a tag exactly or partially, those sessions should be highly prioritized.
|
||||
|
||||
For each session, consider (in order of priority):
|
||||
1. Exact tag matches (highest priority - user explicitly categorized this session)
|
||||
2. Partial tag matches or tag-related terms
|
||||
3. Title matches (custom titles or first message content)
|
||||
4. Branch name matches
|
||||
5. Summary and transcript content matches
|
||||
6. Semantic similarity and related concepts
|
||||
|
||||
CRITICAL: Be VERY inclusive in your matching. Include sessions that:
|
||||
- Contain the query term anywhere in any field
|
||||
- Are semantically related to the query (e.g., "testing" matches sessions about "tests", "unit tests", "QA", etc.)
|
||||
- Discuss topics that could be related to the query
|
||||
- Have transcripts that mention the concept even in passing
|
||||
|
||||
When in doubt, INCLUDE the session. It's better to return too many results than too few. The user can easily scan through results, but missing relevant sessions is frustrating.
|
||||
|
||||
Return sessions ordered by relevance (most relevant first). If truly no sessions have ANY connection to the query, return an empty array - but this should be rare.
|
||||
|
||||
Respond with ONLY the JSON object, no markdown formatting:
|
||||
{"relevant_indices": [2, 5, 0]}`
|
||||
|
||||
type AgenticSearchResult = {
|
||||
relevant_indices: number[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts searchable text content from a message.
|
||||
*/
|
||||
function extractMessageText(message: SerializedMessage): string {
|
||||
if (message.type !== 'user' && message.type !== 'assistant') {
|
||||
return ''
|
||||
}
|
||||
|
||||
const content = 'message' in message ? message.message?.content : undefined
|
||||
if (!content) return ''
|
||||
|
||||
if (typeof content === 'string') {
|
||||
return content
|
||||
}
|
||||
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.map(block => {
|
||||
if (typeof block === 'string') return block
|
||||
if ('text' in block && typeof block.text === 'string') return block.text
|
||||
return ''
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts a truncated transcript from session messages.
|
||||
*/
|
||||
function extractTranscript(messages: SerializedMessage[]): string {
|
||||
if (messages.length === 0) return ''
|
||||
|
||||
// Take messages from start and end to get context
|
||||
const messagesToScan =
|
||||
messages.length <= MAX_MESSAGES_TO_SCAN
|
||||
? messages
|
||||
: [
|
||||
...messages.slice(0, MAX_MESSAGES_TO_SCAN / 2),
|
||||
...messages.slice(-MAX_MESSAGES_TO_SCAN / 2),
|
||||
]
|
||||
|
||||
const text = messagesToScan
|
||||
.map(extractMessageText)
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
|
||||
return text.length > MAX_TRANSCRIPT_CHARS
|
||||
? text.slice(0, MAX_TRANSCRIPT_CHARS) + '…'
|
||||
: text
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a log contains the query term in any searchable field.
|
||||
*/
|
||||
function logContainsQuery(log: LogOption, queryLower: string): boolean {
|
||||
// Check title
|
||||
const title = getLogDisplayTitle(log).toLowerCase()
|
||||
if (title.includes(queryLower)) return true
|
||||
|
||||
// Check custom title
|
||||
if (log.customTitle?.toLowerCase().includes(queryLower)) return true
|
||||
|
||||
// Check tag
|
||||
if (log.tag?.toLowerCase().includes(queryLower)) return true
|
||||
|
||||
// Check branch
|
||||
if (log.gitBranch?.toLowerCase().includes(queryLower)) return true
|
||||
|
||||
// Check summary
|
||||
if (log.summary?.toLowerCase().includes(queryLower)) return true
|
||||
|
||||
// Check first prompt
|
||||
if (log.firstPrompt?.toLowerCase().includes(queryLower)) return true
|
||||
|
||||
// Check transcript (more expensive, do last)
|
||||
if (log.messages && log.messages.length > 0) {
|
||||
const transcript = extractTranscript(log.messages).toLowerCase()
|
||||
if (transcript.includes(queryLower)) return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs an agentic search using Claude to find relevant sessions
|
||||
* based on semantic understanding of the query.
|
||||
*/
|
||||
export async function agenticSessionSearch(
|
||||
query: string,
|
||||
logs: LogOption[],
|
||||
signal?: AbortSignal,
|
||||
): Promise<LogOption[]> {
|
||||
if (!query.trim() || logs.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
const queryLower = query.toLowerCase()
|
||||
|
||||
// Pre-filter: find sessions that contain the query term
|
||||
// This ensures we search relevant sessions, not just recent ones
|
||||
const matchingLogs = logs.filter(log => logContainsQuery(log, queryLower))
|
||||
|
||||
// Take up to MAX_SESSIONS_TO_SEARCH matching logs
|
||||
// If fewer matches, fill remaining slots with recent non-matching logs for context
|
||||
let logsToSearch: LogOption[]
|
||||
if (matchingLogs.length >= MAX_SESSIONS_TO_SEARCH) {
|
||||
logsToSearch = matchingLogs.slice(0, MAX_SESSIONS_TO_SEARCH)
|
||||
} else {
|
||||
const nonMatchingLogs = logs.filter(
|
||||
log => !logContainsQuery(log, queryLower),
|
||||
)
|
||||
const remainingSlots = MAX_SESSIONS_TO_SEARCH - matchingLogs.length
|
||||
logsToSearch = [
|
||||
...matchingLogs,
|
||||
...nonMatchingLogs.slice(0, remainingSlots),
|
||||
]
|
||||
}
|
||||
|
||||
// Debug: log what data we have
|
||||
logForDebugging(
|
||||
`Agentic search: ${logsToSearch.length}/${logs.length} logs, query="${query}", ` +
|
||||
`matching: ${matchingLogs.length}, with messages: ${count(logsToSearch, l => l.messages?.length > 0)}`,
|
||||
)
|
||||
|
||||
// Load full logs for lite logs to get transcript content
|
||||
const logsWithTranscriptsPromises = logsToSearch.map(async log => {
|
||||
if (isLiteLog(log)) {
|
||||
try {
|
||||
return await loadFullLog(log)
|
||||
} catch (error) {
|
||||
logError(error as Error)
|
||||
// If loading fails, use the lite log (no transcript)
|
||||
return log
|
||||
}
|
||||
}
|
||||
return log
|
||||
})
|
||||
const logsWithTranscripts = await Promise.all(logsWithTranscriptsPromises)
|
||||
|
||||
logForDebugging(
|
||||
`Agentic search: loaded ${count(logsWithTranscripts, l => l.messages?.length > 0)}/${logsToSearch.length} logs with transcripts`,
|
||||
)
|
||||
|
||||
// Build session list for the prompt with all searchable metadata
|
||||
const sessionList = logsWithTranscripts
|
||||
.map((log, index) => {
|
||||
const parts: string[] = [`${index}:`]
|
||||
|
||||
// Title (display title, may be custom or from first prompt)
|
||||
const displayTitle = getLogDisplayTitle(log)
|
||||
parts.push(displayTitle)
|
||||
|
||||
// Custom title if different from display title
|
||||
if (log.customTitle && log.customTitle !== displayTitle) {
|
||||
parts.push(`[custom title: ${log.customTitle}]`)
|
||||
}
|
||||
|
||||
// Tag
|
||||
if (log.tag) {
|
||||
parts.push(`[tag: ${log.tag}]`)
|
||||
}
|
||||
|
||||
// Git branch
|
||||
if (log.gitBranch) {
|
||||
parts.push(`[branch: ${log.gitBranch}]`)
|
||||
}
|
||||
|
||||
// Summary
|
||||
if (log.summary) {
|
||||
parts.push(`- Summary: ${log.summary}`)
|
||||
}
|
||||
|
||||
// First prompt content (truncated)
|
||||
if (log.firstPrompt && log.firstPrompt !== 'No prompt') {
|
||||
parts.push(`- First message: ${log.firstPrompt.slice(0, 300)}`)
|
||||
}
|
||||
|
||||
// Transcript excerpt (if messages are available)
|
||||
if (log.messages && log.messages.length > 0) {
|
||||
const transcript = extractTranscript(log.messages)
|
||||
if (transcript) {
|
||||
parts.push(`- Transcript: ${transcript}`)
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join(' ')
|
||||
})
|
||||
.join('\n')
|
||||
|
||||
const userMessage = `Sessions:
|
||||
${sessionList}
|
||||
|
||||
Search query: "${query}"
|
||||
|
||||
Find the sessions that are most relevant to this query.`
|
||||
|
||||
// Debug: log first part of the session list
|
||||
logForDebugging(
|
||||
`Agentic search prompt (first 500 chars): ${userMessage.slice(0, 500)}...`,
|
||||
)
|
||||
|
||||
try {
|
||||
const model = getSmallFastModel()
|
||||
logForDebugging(`Agentic search using model: ${model}`)
|
||||
|
||||
const response = await sideQuery({
|
||||
model,
|
||||
system: SESSION_SEARCH_SYSTEM_PROMPT,
|
||||
messages: [{ role: 'user', content: userMessage }],
|
||||
signal,
|
||||
querySource: 'session_search',
|
||||
})
|
||||
|
||||
// Extract the text content from the response
|
||||
const textContent = response.content.find(block => block.type === 'text')
|
||||
if (!textContent || textContent.type !== 'text') {
|
||||
logForDebugging('No text content in agentic search response')
|
||||
return []
|
||||
}
|
||||
|
||||
// Debug: log the response
|
||||
logForDebugging(`Agentic search response: ${textContent.text}`)
|
||||
|
||||
// Parse the JSON response
|
||||
const jsonMatch = textContent.text.match(/\{[\s\S]*\}/)
|
||||
if (!jsonMatch) {
|
||||
logForDebugging('Could not find JSON in agentic search response')
|
||||
return []
|
||||
}
|
||||
|
||||
const result: AgenticSearchResult = jsonParse(jsonMatch[0])
|
||||
const relevantIndices = result.relevant_indices || []
|
||||
|
||||
// Map indices back to logs (indices are relative to logsWithTranscripts)
|
||||
const relevantLogs = relevantIndices
|
||||
.filter(index => index >= 0 && index < logsWithTranscripts.length)
|
||||
.map(index => logsWithTranscripts[index]!)
|
||||
|
||||
logForDebugging(
|
||||
`Agentic search found ${relevantLogs.length} relevant sessions`,
|
||||
)
|
||||
|
||||
return relevantLogs
|
||||
} catch (error) {
|
||||
logError(error as Error)
|
||||
logForDebugging(`Agentic search error: ${error}`)
|
||||
return []
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user