395 lines
11 KiB
TypeScript
395 lines
11 KiB
TypeScript
/**
|
|
* ANSI Parser - Semantic Action Generator
|
|
*
|
|
* A streaming parser for ANSI escape sequences that produces semantic actions.
|
|
* Uses the tokenizer for escape sequence boundary detection, then interprets
|
|
* each sequence to produce structured actions.
|
|
*
|
|
* Key design decisions:
|
|
* - Streaming: can process input incrementally
|
|
* - Semantic output: produces structured actions, not string tokens
|
|
* - Style tracking: maintains current text style state
|
|
*/
|
|
|
|
import { getGraphemeSegmenter } from '../../utils/intl.js'
|
|
import { C0 } from './ansi.js'
|
|
import { CSI, CURSOR_STYLES, ERASE_DISPLAY, ERASE_LINE_REGION } from './csi.js'
|
|
import { DEC } from './dec.js'
|
|
import { parseEsc } from './esc.js'
|
|
import { parseOSC } from './osc.js'
|
|
import { applySGR } from './sgr.js'
|
|
import { createTokenizer, type Token, type Tokenizer } from './tokenize.js'
|
|
import type { Action, Grapheme, TextStyle } from './types.js'
|
|
import { defaultStyle } from './types.js'
|
|
|
|
// =============================================================================
|
|
// Grapheme Utilities
|
|
// =============================================================================
|
|
|
|
function isEmoji(codePoint: number): boolean {
|
|
return (
|
|
(codePoint >= 0x2600 && codePoint <= 0x26ff) ||
|
|
(codePoint >= 0x2700 && codePoint <= 0x27bf) ||
|
|
(codePoint >= 0x1f300 && codePoint <= 0x1f9ff) ||
|
|
(codePoint >= 0x1fa00 && codePoint <= 0x1faff) ||
|
|
(codePoint >= 0x1f1e0 && codePoint <= 0x1f1ff)
|
|
)
|
|
}
|
|
|
|
function isEastAsianWide(codePoint: number): boolean {
|
|
return (
|
|
(codePoint >= 0x1100 && codePoint <= 0x115f) ||
|
|
(codePoint >= 0x2e80 && codePoint <= 0x9fff) ||
|
|
(codePoint >= 0xac00 && codePoint <= 0xd7a3) ||
|
|
(codePoint >= 0xf900 && codePoint <= 0xfaff) ||
|
|
(codePoint >= 0xfe10 && codePoint <= 0xfe1f) ||
|
|
(codePoint >= 0xfe30 && codePoint <= 0xfe6f) ||
|
|
(codePoint >= 0xff00 && codePoint <= 0xff60) ||
|
|
(codePoint >= 0xffe0 && codePoint <= 0xffe6) ||
|
|
(codePoint >= 0x20000 && codePoint <= 0x2fffd) ||
|
|
(codePoint >= 0x30000 && codePoint <= 0x3fffd)
|
|
)
|
|
}
|
|
|
|
function hasMultipleCodepoints(str: string): boolean {
|
|
let count = 0
|
|
for (const _ of str) {
|
|
count++
|
|
if (count > 1) return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
function graphemeWidth(grapheme: string): 1 | 2 {
|
|
if (hasMultipleCodepoints(grapheme)) return 2
|
|
const codePoint = grapheme.codePointAt(0)
|
|
if (codePoint === undefined) return 1
|
|
if (isEmoji(codePoint) || isEastAsianWide(codePoint)) return 2
|
|
return 1
|
|
}
|
|
|
|
function* segmentGraphemes(str: string): Generator<Grapheme> {
|
|
for (const { segment } of getGraphemeSegmenter().segment(str)) {
|
|
yield { value: segment, width: graphemeWidth(segment) }
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Sequence Parsing
|
|
// =============================================================================
|
|
|
|
function parseCSIParams(paramStr: string): number[] {
|
|
if (paramStr === '') return []
|
|
return paramStr.split(/[;:]/).map(s => (s === '' ? 0 : parseInt(s, 10)))
|
|
}
|
|
|
|
/** Parse a raw CSI sequence (e.g., "\x1b[31m") into an action */
|
|
function parseCSI(rawSequence: string): Action | null {
|
|
const inner = rawSequence.slice(2)
|
|
if (inner.length === 0) return null
|
|
|
|
const finalByte = inner.charCodeAt(inner.length - 1)
|
|
const beforeFinal = inner.slice(0, -1)
|
|
|
|
let privateMode = ''
|
|
let paramStr = beforeFinal
|
|
let intermediate = ''
|
|
|
|
if (beforeFinal.length > 0 && '?>='.includes(beforeFinal[0]!)) {
|
|
privateMode = beforeFinal[0]!
|
|
paramStr = beforeFinal.slice(1)
|
|
}
|
|
|
|
const intermediateMatch = paramStr.match(/([^0-9;:]+)$/)
|
|
if (intermediateMatch) {
|
|
intermediate = intermediateMatch[1]!
|
|
paramStr = paramStr.slice(0, -intermediate.length)
|
|
}
|
|
|
|
const params = parseCSIParams(paramStr)
|
|
const p0 = params[0] ?? 1
|
|
const p1 = params[1] ?? 1
|
|
|
|
// SGR (Select Graphic Rendition)
|
|
if (finalByte === CSI.SGR && privateMode === '') {
|
|
return { type: 'sgr', params: paramStr }
|
|
}
|
|
|
|
// Cursor movement
|
|
if (finalByte === CSI.CUU) {
|
|
return {
|
|
type: 'cursor',
|
|
action: { type: 'move', direction: 'up', count: p0 },
|
|
}
|
|
}
|
|
if (finalByte === CSI.CUD) {
|
|
return {
|
|
type: 'cursor',
|
|
action: { type: 'move', direction: 'down', count: p0 },
|
|
}
|
|
}
|
|
if (finalByte === CSI.CUF) {
|
|
return {
|
|
type: 'cursor',
|
|
action: { type: 'move', direction: 'forward', count: p0 },
|
|
}
|
|
}
|
|
if (finalByte === CSI.CUB) {
|
|
return {
|
|
type: 'cursor',
|
|
action: { type: 'move', direction: 'back', count: p0 },
|
|
}
|
|
}
|
|
if (finalByte === CSI.CNL) {
|
|
return { type: 'cursor', action: { type: 'nextLine', count: p0 } }
|
|
}
|
|
if (finalByte === CSI.CPL) {
|
|
return { type: 'cursor', action: { type: 'prevLine', count: p0 } }
|
|
}
|
|
if (finalByte === CSI.CHA) {
|
|
return { type: 'cursor', action: { type: 'column', col: p0 } }
|
|
}
|
|
if (finalByte === CSI.CUP || finalByte === CSI.HVP) {
|
|
return { type: 'cursor', action: { type: 'position', row: p0, col: p1 } }
|
|
}
|
|
if (finalByte === CSI.VPA) {
|
|
return { type: 'cursor', action: { type: 'row', row: p0 } }
|
|
}
|
|
|
|
// Erase
|
|
if (finalByte === CSI.ED) {
|
|
const region = ERASE_DISPLAY[params[0] ?? 0] ?? 'toEnd'
|
|
return { type: 'erase', action: { type: 'display', region } }
|
|
}
|
|
if (finalByte === CSI.EL) {
|
|
const region = ERASE_LINE_REGION[params[0] ?? 0] ?? 'toEnd'
|
|
return { type: 'erase', action: { type: 'line', region } }
|
|
}
|
|
if (finalByte === CSI.ECH) {
|
|
return { type: 'erase', action: { type: 'chars', count: p0 } }
|
|
}
|
|
|
|
// Scroll
|
|
if (finalByte === CSI.SU) {
|
|
return { type: 'scroll', action: { type: 'up', count: p0 } }
|
|
}
|
|
if (finalByte === CSI.SD) {
|
|
return { type: 'scroll', action: { type: 'down', count: p0 } }
|
|
}
|
|
if (finalByte === CSI.DECSTBM) {
|
|
return {
|
|
type: 'scroll',
|
|
action: { type: 'setRegion', top: p0, bottom: p1 },
|
|
}
|
|
}
|
|
|
|
// Cursor save/restore
|
|
if (finalByte === CSI.SCOSC) {
|
|
return { type: 'cursor', action: { type: 'save' } }
|
|
}
|
|
if (finalByte === CSI.SCORC) {
|
|
return { type: 'cursor', action: { type: 'restore' } }
|
|
}
|
|
|
|
// Cursor style
|
|
if (finalByte === CSI.DECSCUSR && intermediate === ' ') {
|
|
const styleInfo = CURSOR_STYLES[p0] ?? CURSOR_STYLES[0]!
|
|
return { type: 'cursor', action: { type: 'style', ...styleInfo } }
|
|
}
|
|
|
|
// Private modes
|
|
if (privateMode === '?' && (finalByte === CSI.SM || finalByte === CSI.RM)) {
|
|
const enabled = finalByte === CSI.SM
|
|
|
|
if (p0 === DEC.CURSOR_VISIBLE) {
|
|
return {
|
|
type: 'cursor',
|
|
action: enabled ? { type: 'show' } : { type: 'hide' },
|
|
}
|
|
}
|
|
if (p0 === DEC.ALT_SCREEN_CLEAR || p0 === DEC.ALT_SCREEN) {
|
|
return { type: 'mode', action: { type: 'alternateScreen', enabled } }
|
|
}
|
|
if (p0 === DEC.BRACKETED_PASTE) {
|
|
return { type: 'mode', action: { type: 'bracketedPaste', enabled } }
|
|
}
|
|
if (p0 === DEC.MOUSE_NORMAL) {
|
|
return {
|
|
type: 'mode',
|
|
action: { type: 'mouseTracking', mode: enabled ? 'normal' : 'off' },
|
|
}
|
|
}
|
|
if (p0 === DEC.MOUSE_BUTTON) {
|
|
return {
|
|
type: 'mode',
|
|
action: { type: 'mouseTracking', mode: enabled ? 'button' : 'off' },
|
|
}
|
|
}
|
|
if (p0 === DEC.MOUSE_ANY) {
|
|
return {
|
|
type: 'mode',
|
|
action: { type: 'mouseTracking', mode: enabled ? 'any' : 'off' },
|
|
}
|
|
}
|
|
if (p0 === DEC.FOCUS_EVENTS) {
|
|
return { type: 'mode', action: { type: 'focusEvents', enabled } }
|
|
}
|
|
}
|
|
|
|
return { type: 'unknown', sequence: rawSequence }
|
|
}
|
|
|
|
/**
|
|
* Identify the type of escape sequence from its raw form.
|
|
*/
|
|
function identifySequence(
|
|
seq: string,
|
|
): 'csi' | 'osc' | 'esc' | 'ss3' | 'unknown' {
|
|
if (seq.length < 2) return 'unknown'
|
|
if (seq.charCodeAt(0) !== C0.ESC) return 'unknown'
|
|
|
|
const second = seq.charCodeAt(1)
|
|
if (second === 0x5b) return 'csi' // [
|
|
if (second === 0x5d) return 'osc' // ]
|
|
if (second === 0x4f) return 'ss3' // O
|
|
return 'esc'
|
|
}
|
|
|
|
// =============================================================================
|
|
// Main Parser
|
|
// =============================================================================
|
|
|
|
/**
|
|
* Parser class - maintains state for streaming/incremental parsing
|
|
*
|
|
* Usage:
|
|
* ```typescript
|
|
* const parser = new Parser()
|
|
* const actions1 = parser.feed('partial\x1b[')
|
|
* const actions2 = parser.feed('31mred') // state maintained internally
|
|
* ```
|
|
*/
|
|
export class Parser {
|
|
private tokenizer: Tokenizer = createTokenizer()
|
|
|
|
style: TextStyle = defaultStyle()
|
|
inLink = false
|
|
linkUrl: string | undefined
|
|
|
|
reset(): void {
|
|
this.tokenizer.reset()
|
|
this.style = defaultStyle()
|
|
this.inLink = false
|
|
this.linkUrl = undefined
|
|
}
|
|
|
|
/** Feed input and get resulting actions */
|
|
feed(input: string): Action[] {
|
|
const tokens = this.tokenizer.feed(input)
|
|
const actions: Action[] = []
|
|
|
|
for (const token of tokens) {
|
|
const tokenActions = this.processToken(token)
|
|
actions.push(...tokenActions)
|
|
}
|
|
|
|
return actions
|
|
}
|
|
|
|
private processToken(token: Token): Action[] {
|
|
switch (token.type) {
|
|
case 'text':
|
|
return this.processText(token.value)
|
|
|
|
case 'sequence':
|
|
return this.processSequence(token.value)
|
|
}
|
|
}
|
|
|
|
private processText(text: string): Action[] {
|
|
// Handle BEL characters embedded in text
|
|
const actions: Action[] = []
|
|
let current = ''
|
|
|
|
for (const char of text) {
|
|
if (char.charCodeAt(0) === C0.BEL) {
|
|
if (current) {
|
|
const graphemes = [...segmentGraphemes(current)]
|
|
if (graphemes.length > 0) {
|
|
actions.push({ type: 'text', graphemes, style: { ...this.style } })
|
|
}
|
|
current = ''
|
|
}
|
|
actions.push({ type: 'bell' })
|
|
} else {
|
|
current += char
|
|
}
|
|
}
|
|
|
|
if (current) {
|
|
const graphemes = [...segmentGraphemes(current)]
|
|
if (graphemes.length > 0) {
|
|
actions.push({ type: 'text', graphemes, style: { ...this.style } })
|
|
}
|
|
}
|
|
|
|
return actions
|
|
}
|
|
|
|
private processSequence(seq: string): Action[] {
|
|
const seqType = identifySequence(seq)
|
|
|
|
switch (seqType) {
|
|
case 'csi': {
|
|
const action = parseCSI(seq)
|
|
if (!action) return []
|
|
if (action.type === 'sgr') {
|
|
this.style = applySGR(action.params, this.style)
|
|
return []
|
|
}
|
|
return [action]
|
|
}
|
|
|
|
case 'osc': {
|
|
// Extract OSC content (between ESC ] and terminator)
|
|
let content = seq.slice(2)
|
|
// Remove terminator (BEL or ESC \)
|
|
if (content.endsWith('\x07')) {
|
|
content = content.slice(0, -1)
|
|
} else if (content.endsWith('\x1b\\')) {
|
|
content = content.slice(0, -2)
|
|
}
|
|
|
|
const action = parseOSC(content)
|
|
if (action) {
|
|
if (action.type === 'link') {
|
|
if (action.action.type === 'start') {
|
|
this.inLink = true
|
|
this.linkUrl = action.action.url
|
|
} else {
|
|
this.inLink = false
|
|
this.linkUrl = undefined
|
|
}
|
|
}
|
|
return [action]
|
|
}
|
|
return []
|
|
}
|
|
|
|
case 'esc': {
|
|
const escContent = seq.slice(1)
|
|
const action = parseEsc(escContent)
|
|
return action ? [action] : []
|
|
}
|
|
|
|
case 'ss3':
|
|
// SS3 sequences are typically cursor keys in application mode
|
|
// For output parsing, treat as unknown
|
|
return [{ type: 'unknown', sequence: seq }]
|
|
|
|
default:
|
|
return [{ type: 'unknown', sequence: seq }]
|
|
}
|
|
}
|
|
}
|