/** * Client-side secret scanner for team memory (PSR M22174). * * Scans content for credentials before upload so secrets never leave the * user's machine. Uses a curated subset of high-confidence rules from * gitleaks (https://github.com/gitleaks/gitleaks, MIT license) — only * rules with distinctive prefixes that have near-zero false-positive * rates are included. Generic keyword-context rules are omitted. * * Rule IDs and regexes sourced directly from the public gitleaks config: * https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml * * JS regex notes: * - gitleaks uses Go regex; inline (?i) and mode groups (?-i:...) are * not portable to JS. Affected rules are rewritten with explicit * character classes ([a-zA-Z0-9] instead of (?i)[a-z0-9]). * - Trailing boundary alternations like (?:[\x60'"\s;]|\\[nr]|$) from * Go regex are kept (JS $ matches end-of-string in default mode). */ import { capitalize } from '../../utils/stringUtils.js' type SecretRule = { /** Gitleaks rule ID (kebab-case), used in labels and analytics */ id: string /** Regex source, lazily compiled on first scan */ source: string /** Optional JS regex flags (most rules are case-sensitive by default) */ flags?: string } export type SecretMatch = { /** Gitleaks rule ID that matched (e.g., "github-pat", "aws-access-token") */ ruleId: string /** Human-readable label derived from the rule ID */ label: string } // ─── Curated rules ────────────────────────────────────────────── // High-confidence patterns from gitleaks with distinctive prefixes. // Ordered roughly by likelihood of appearing in dev-team content. // Anthropic API key prefix, assembled at runtime so the literal byte // sequence isn't present in the external bundle (excluded-strings check). // join() is not constant-folded by the minifier. const ANT_KEY_PFX = ['sk', 'ant', 'api'].join('-') const SECRET_RULES: SecretRule[] = [ // — Cloud providers — { id: 'aws-access-token', source: '\\b((?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16})\\b', }, { id: 'gcp-api-key', source: '\\b(AIza[\\w-]{35})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'azure-ad-client-secret', source: '(?:^|[\\\\\'"\\x60\\s>=:(,)])([a-zA-Z0-9_~.]{3}\\dQ~[a-zA-Z0-9_~.-]{31,34})(?:$|[\\\\\'"\\x60\\s<),])', }, { id: 'digitalocean-pat', source: '\\b(dop_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'digitalocean-access-token', source: '\\b(doo_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, // — AI APIs — { id: 'anthropic-api-key', source: `\\b(${ANT_KEY_PFX}03-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60'"\\s;]|\\\\[nr]|$)`, }, { id: 'anthropic-admin-api-key', source: '\\b(sk-ant-admin01-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'openai-api-key', source: '\\b(sk-(?:proj|svcacct|admin)-(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})T3BlbkFJ(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})\\b|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'huggingface-access-token', // gitleaks: hf_(?i:[a-z]{34}) → JS: hf_[a-zA-Z]{34} source: '\\b(hf_[a-zA-Z]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, // — Version control — { id: 'github-pat', source: 'ghp_[0-9a-zA-Z]{36}', }, { id: 'github-fine-grained-pat', source: 'github_pat_\\w{82}', }, { id: 'github-app-token', source: '(?:ghu|ghs)_[0-9a-zA-Z]{36}', }, { id: 'github-oauth', source: 'gho_[0-9a-zA-Z]{36}', }, { id: 'github-refresh-token', source: 'ghr_[0-9a-zA-Z]{36}', }, { id: 'gitlab-pat', source: 'glpat-[\\w-]{20}', }, { id: 'gitlab-deploy-token', source: 'gldt-[0-9a-zA-Z_\\-]{20}', }, // — Communication — { id: 'slack-bot-token', source: 'xoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*', }, { id: 'slack-user-token', source: 'xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}', }, { id: 'slack-app-token', source: 'xapp-\\d-[A-Z0-9]+-\\d+-[a-z0-9]+', flags: 'i', }, { id: 'twilio-api-key', source: 'SK[0-9a-fA-F]{32}', }, { id: 'sendgrid-api-token', // gitleaks: SG\.(?i)[a-z0-9=_\-\.]{66} → JS: case-insensitive via flag source: '\\b(SG\\.[a-zA-Z0-9=_\\-.]{66})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, // — Dev tooling — { id: 'npm-access-token', source: '\\b(npm_[a-zA-Z0-9]{36})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'pypi-upload-token', source: 'pypi-AgEIcHlwaS5vcmc[\\w-]{50,1000}', }, { id: 'databricks-api-token', source: '\\b(dapi[a-f0-9]{32}(?:-\\d)?)(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'hashicorp-tf-api-token', // gitleaks: (?i)[a-z0-9]{14}\.(?-i:atlasv1)\.[a-z0-9\-_=]{60,70} // → JS: case-insensitive hex+alnum prefix, literal "atlasv1", case-insensitive suffix source: '[a-zA-Z0-9]{14}\\.atlasv1\\.[a-zA-Z0-9\\-_=]{60,70}', }, { id: 'pulumi-api-token', source: '\\b(pul-[a-f0-9]{40})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'postman-api-token', // gitleaks: PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34} → JS: use [a-fA-F0-9] source: '\\b(PMAK-[a-fA-F0-9]{24}-[a-fA-F0-9]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, // — Observability — { id: 'grafana-api-key', source: '\\b(eyJrIjoi[A-Za-z0-9+/]{70,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'grafana-cloud-api-token', source: '\\b(glc_[A-Za-z0-9+/]{32,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'grafana-service-account-token', source: '\\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'sentry-user-token', source: '\\b(sntryu_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'sentry-org-token', source: '\\bsntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}', }, // — Payment / commerce — { id: 'stripe-access-token', source: '\\b((?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', }, { id: 'shopify-access-token', source: 'shpat_[a-fA-F0-9]{32}', }, { id: 'shopify-shared-secret', source: 'shpss_[a-fA-F0-9]{32}', }, // — Crypto — { id: 'private-key', source: '-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----[\\s\\S-]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----', flags: 'i', }, ] // Lazily compiled pattern cache — compile once on first scan. let compiledRules: Array<{ id: string; re: RegExp }> | null = null function getCompiledRules(): Array<{ id: string; re: RegExp }> { if (compiledRules === null) { compiledRules = SECRET_RULES.map(r => ({ id: r.id, re: new RegExp(r.source, r.flags), })) } return compiledRules } /** * Convert a gitleaks rule ID (kebab-case) to a human-readable label. * e.g., "github-pat" → "GitHub PAT", "aws-access-token" → "AWS Access Token" */ function ruleIdToLabel(ruleId: string): string { // Words where the canonical capitalization differs from title case const specialCase: Record = { aws: 'AWS', gcp: 'GCP', api: 'API', pat: 'PAT', ad: 'AD', tf: 'TF', oauth: 'OAuth', npm: 'NPM', pypi: 'PyPI', jwt: 'JWT', github: 'GitHub', gitlab: 'GitLab', openai: 'OpenAI', digitalocean: 'DigitalOcean', huggingface: 'HuggingFace', hashicorp: 'HashiCorp', sendgrid: 'SendGrid', } return ruleId .split('-') .map(part => specialCase[part] ?? capitalize(part)) .join(' ') } /** * Scan a string for potential secrets. * * Returns one match per rule that fired (deduplicated by rule ID). The * actual matched text is intentionally NOT returned — we never log or * display secret values. */ export function scanForSecrets(content: string): SecretMatch[] { const matches: SecretMatch[] = [] const seen = new Set() for (const rule of getCompiledRules()) { if (seen.has(rule.id)) { continue } if (rule.re.test(content)) { seen.add(rule.id) matches.push({ ruleId: rule.id, label: ruleIdToLabel(rule.id), }) } } return matches } /** * Get a human-readable label for a gitleaks rule ID. * Falls back to kebab-to-Title conversion for unknown IDs. */ export function getSecretLabel(ruleId: string): string { return ruleIdToLabel(ruleId) } /** * Redact any matched secrets in-place with [REDACTED]. * Unlike scanForSecrets, this returns the content with spans replaced * so the surrounding text can still be written to disk safely. */ let redactRules: RegExp[] | null = null export function redactSecrets(content: string): string { redactRules ??= SECRET_RULES.map( r => new RegExp(r.source, (r.flags ?? '').replace('g', '') + 'g'), ) for (const re of redactRules) { // Replace only the captured group, not the full match — patterns include // boundary chars (space, quote, ;) outside the group that must survive. content = content.replace(re, (match, g1) => typeof g1 === 'string' ? match.replace(g1, '[REDACTED]') : '[REDACTED]', ) } return content }