πŸ“„ File detail

ink/termio/parser.ts

🧩 .tsπŸ“ 395 linesπŸ’Ύ 11,546 bytesπŸ“ text
← Back to All Files

🎯 Use case

This file lives under β€œink/”, which covers Ink terminal UI (layouts, TTY IO, keyboard, renderer components). On the API surface it exposes Parser β€” mainly types, interfaces, or factory objects. It composes internal code from utils, ansi, csi, dec, and esc (relative imports). What the file header says: ANSI Parser - Semantic Action Generator A streaming parser for ANSI escape sequences that produces semantic actions. Uses the tokenizer for escape sequence boundary detection, then interprets each sequence to produce structured actions. Key design decisions: - Streaming: can proc.

Generated from folder role, exports, dependency roots, and inline comments β€” not hand-reviewed for every path.

🧠 Inline summary

ANSI Parser - Semantic Action Generator A streaming parser for ANSI escape sequences that produces semantic actions. Uses the tokenizer for escape sequence boundary detection, then interprets each sequence to produce structured actions. Key design decisions: - Streaming: can process input incrementally - Semantic output: produces structured actions, not string tokens - Style tracking: maintains current text style state

πŸ“€ Exports (heuristic)

  • Parser

πŸ–₯️ Source preview

/**
 * ANSI Parser - Semantic Action Generator
 *
 * A streaming parser for ANSI escape sequences that produces semantic actions.
 * Uses the tokenizer for escape sequence boundary detection, then interprets
 * each sequence to produce structured actions.
 *
 * Key design decisions:
 * - Streaming: can process input incrementally
 * - Semantic output: produces structured actions, not string tokens
 * - Style tracking: maintains current text style state
 */

import { getGraphemeSegmenter } from '../../utils/intl.js'
import { C0 } from './ansi.js'
import { CSI, CURSOR_STYLES, ERASE_DISPLAY, ERASE_LINE_REGION } from './csi.js'
import { DEC } from './dec.js'
import { parseEsc } from './esc.js'
import { parseOSC } from './osc.js'
import { applySGR } from './sgr.js'
import { createTokenizer, type Token, type Tokenizer } from './tokenize.js'
import type { Action, Grapheme, TextStyle } from './types.js'
import { defaultStyle } from './types.js'

// =============================================================================
// Grapheme Utilities
// =============================================================================

function isEmoji(codePoint: number): boolean {
  return (
    (codePoint >= 0x2600 && codePoint <= 0x26ff) ||
    (codePoint >= 0x2700 && codePoint <= 0x27bf) ||
    (codePoint >= 0x1f300 && codePoint <= 0x1f9ff) ||
    (codePoint >= 0x1fa00 && codePoint <= 0x1faff) ||
    (codePoint >= 0x1f1e0 && codePoint <= 0x1f1ff)
  )
}

function isEastAsianWide(codePoint: number): boolean {
  return (
    (codePoint >= 0x1100 && codePoint <= 0x115f) ||
    (codePoint >= 0x2e80 && codePoint <= 0x9fff) ||
    (codePoint >= 0xac00 && codePoint <= 0xd7a3) ||
    (codePoint >= 0xf900 && codePoint <= 0xfaff) ||
    (codePoint >= 0xfe10 && codePoint <= 0xfe1f) ||
    (codePoint >= 0xfe30 && codePoint <= 0xfe6f) ||
    (codePoint >= 0xff00 && codePoint <= 0xff60) ||
    (codePoint >= 0xffe0 && codePoint <= 0xffe6) ||
    (codePoint >= 0x20000 && codePoint <= 0x2fffd) ||
    (codePoint >= 0x30000 && codePoint <= 0x3fffd)
  )
}

function hasMultipleCodepoints(str: string): boolean {
  let count = 0
  for (const _ of str) {
    count++
    if (count > 1) return true
  }
  return false
}

function graphemeWidth(grapheme: string): 1 | 2 {
  if (hasMultipleCodepoints(grapheme)) return 2
  const codePoint = grapheme.codePointAt(0)
  if (codePoint === undefined) return 1
  if (isEmoji(codePoint) || isEastAsianWide(codePoint)) return 2
  return 1
}

function* segmentGraphemes(str: string): Generator<Grapheme> {
  for (const { segment } of getGraphemeSegmenter().segment(str)) {
    yield { value: segment, width: graphemeWidth(segment) }
  }
}

// =============================================================================
// Sequence Parsing
// =============================================================================

function parseCSIParams(paramStr: string): number[] {
  if (paramStr === '') return []
  return paramStr.split(/[;:]/).map(s => (s === '' ? 0 : parseInt(s, 10)))
}

/** Parse a raw CSI sequence (e.g., "\x1b[31m") into an action */
function parseCSI(rawSequence: string): Action | null {
  const inner = rawSequence.slice(2)
  if (inner.length === 0) return null

  const finalByte = inner.charCodeAt(inner.length - 1)
  const beforeFinal = inner.slice(0, -1)

  let privateMode = ''
  let paramStr = beforeFinal
  let intermediate = ''

  if (beforeFinal.length > 0 && '?>='.includes(beforeFinal[0]!)) {
    privateMode = beforeFinal[0]!
    paramStr = beforeFinal.slice(1)
  }

  const intermediateMatch = paramStr.match(/([^0-9;:]+)$/)
  if (intermediateMatch) {
    intermediate = intermediateMatch[1]!
    paramStr = paramStr.slice(0, -intermediate.length)
  }

  const params = parseCSIParams(paramStr)
  const p0 = params[0] ?? 1
  const p1 = params[1] ?? 1

  // SGR (Select Graphic Rendition)
  if (finalByte === CSI.SGR && privateMode === '') {
    return { type: 'sgr', params: paramStr }
  }

  // Cursor movement
  if (finalByte === CSI.CUU) {
    return {
      type: 'cursor',
      action: { type: 'move', direction: 'up', count: p0 },
    }
  }
  if (finalByte === CSI.CUD) {
    return {
      type: 'cursor',
      action: { type: 'move', direction: 'down', count: p0 },
    }
  }
  if (finalByte === CSI.CUF) {
    return {
      type: 'cursor',
      action: { type: 'move', direction: 'forward', count: p0 },
    }
  }
  if (finalByte === CSI.CUB) {
    return {
      type: 'cursor',
      action: { type: 'move', direction: 'back', count: p0 },
    }
  }
  if (finalByte === CSI.CNL) {
    return { type: 'cursor', action: { type: 'nextLine', count: p0 } }
  }
  if (finalByte === CSI.CPL) {
    return { type: 'cursor', action: { type: 'prevLine', count: p0 } }
  }
  if (finalByte === CSI.CHA) {
    return { type: 'cursor', action: { type: 'column', col: p0 } }
  }
  if (finalByte === CSI.CUP || finalByte === CSI.HVP) {
    return { type: 'cursor', action: { type: 'position', row: p0, col: p1 } }
  }
  if (finalByte === CSI.VPA) {
    return { type: 'cursor', action: { type: 'row', row: p0 } }
  }

  // Erase
  if (finalByte === CSI.ED) {
    const region = ERASE_DISPLAY[params[0] ?? 0] ?? 'toEnd'
    return { type: 'erase', action: { type: 'display', region } }
  }
  if (finalByte === CSI.EL) {
    const region = ERASE_LINE_REGION[params[0] ?? 0] ?? 'toEnd'
    return { type: 'erase', action: { type: 'line', region } }
  }
  if (finalByte === CSI.ECH) {
    return { type: 'erase', action: { type: 'chars', count: p0 } }
  }

  // Scroll
  if (finalByte === CSI.SU) {
    return { type: 'scroll', action: { type: 'up', count: p0 } }
  }
  if (finalByte === CSI.SD) {
    return { type: 'scroll', action: { type: 'down', count: p0 } }
  }
  if (finalByte === CSI.DECSTBM) {
    return {
      type: 'scroll',
      action: { type: 'setRegion', top: p0, bottom: p1 },
    }
  }

  // Cursor save/restore
  if (finalByte === CSI.SCOSC) {
    return { type: 'cursor', action: { type: 'save' } }
  }
  if (finalByte === CSI.SCORC) {
    return { type: 'cursor', action: { type: 'restore' } }
  }

  // Cursor style
  if (finalByte === CSI.DECSCUSR && intermediate === ' ') {
    const styleInfo = CURSOR_STYLES[p0] ?? CURSOR_STYLES[0]!
    return { type: 'cursor', action: { type: 'style', ...styleInfo } }
  }

  // Private modes
  if (privateMode === '?' && (finalByte === CSI.SM || finalByte === CSI.RM)) {
    const enabled = finalByte === CSI.SM

    if (p0 === DEC.CURSOR_VISIBLE) {
      return {
        type: 'cursor',
        action: enabled ? { type: 'show' } : { type: 'hide' },
      }
    }
    if (p0 === DEC.ALT_SCREEN_CLEAR || p0 === DEC.ALT_SCREEN) {
      return { type: 'mode', action: { type: 'alternateScreen', enabled } }
    }
    if (p0 === DEC.BRACKETED_PASTE) {
      return { type: 'mode', action: { type: 'bracketedPaste', enabled } }
    }
    if (p0 === DEC.MOUSE_NORMAL) {
      return {
        type: 'mode',
        action: { type: 'mouseTracking', mode: enabled ? 'normal' : 'off' },
      }
    }
    if (p0 === DEC.MOUSE_BUTTON) {
      return {
        type: 'mode',
        action: { type: 'mouseTracking', mode: enabled ? 'button' : 'off' },
      }
    }
    if (p0 === DEC.MOUSE_ANY) {
      return {
        type: 'mode',
        action: { type: 'mouseTracking', mode: enabled ? 'any' : 'off' },
      }
    }
    if (p0 === DEC.FOCUS_EVENTS) {
      return { type: 'mode', action: { type: 'focusEvents', enabled } }
    }
  }

  return { type: 'unknown', sequence: rawSequence }
}

/**
 * Identify the type of escape sequence from its raw form.
 */
function identifySequence(
  seq: string,
): 'csi' | 'osc' | 'esc' | 'ss3' | 'unknown' {
  if (seq.length < 2) return 'unknown'
  if (seq.charCodeAt(0) !== C0.ESC) return 'unknown'

  const second = seq.charCodeAt(1)
  if (second === 0x5b) return 'csi' // [
  if (second === 0x5d) return 'osc' // ]
  if (second === 0x4f) return 'ss3' // O
  return 'esc'
}

// =============================================================================
// Main Parser
// =============================================================================

/**
 * Parser class - maintains state for streaming/incremental parsing
 *
 * Usage:
 * ```typescript
 * const parser = new Parser()
 * const actions1 = parser.feed('partial\x1b[')
 * const actions2 = parser.feed('31mred')  // state maintained internally
 * ```
 */
export class Parser {
  private tokenizer: Tokenizer = createTokenizer()

  style: TextStyle = defaultStyle()
  inLink = false
  linkUrl: string | undefined

  reset(): void {
    this.tokenizer.reset()
    this.style = defaultStyle()
    this.inLink = false
    this.linkUrl = undefined
  }

  /** Feed input and get resulting actions */
  feed(input: string): Action[] {
    const tokens = this.tokenizer.feed(input)
    const actions: Action[] = []

    for (const token of tokens) {
      const tokenActions = this.processToken(token)
      actions.push(...tokenActions)
    }

    return actions
  }

  private processToken(token: Token): Action[] {
    switch (token.type) {
      case 'text':
        return this.processText(token.value)

      case 'sequence':
        return this.processSequence(token.value)
    }
  }

  private processText(text: string): Action[] {
    // Handle BEL characters embedded in text
    const actions: Action[] = []
    let current = ''

    for (const char of text) {
      if (char.charCodeAt(0) === C0.BEL) {
        if (current) {
          const graphemes = [...segmentGraphemes(current)]
          if (graphemes.length > 0) {
            actions.push({ type: 'text', graphemes, style: { ...this.style } })
          }
          current = ''
        }
        actions.push({ type: 'bell' })
      } else {
        current += char
      }
    }

    if (current) {
      const graphemes = [...segmentGraphemes(current)]
      if (graphemes.length > 0) {
        actions.push({ type: 'text', graphemes, style: { ...this.style } })
      }
    }

    return actions
  }

  private processSequence(seq: string): Action[] {
    const seqType = identifySequence(seq)

    switch (seqType) {
      case 'csi': {
        const action = parseCSI(seq)
        if (!action) return []
        if (action.type === 'sgr') {
          this.style = applySGR(action.params, this.style)
          return []
        }
        return [action]
      }

      case 'osc': {
        // Extract OSC content (between ESC ] and terminator)
        let content = seq.slice(2)
        // Remove terminator (BEL or ESC \)
        if (content.endsWith('\x07')) {
          content = content.slice(0, -1)
        } else if (content.endsWith('\x1b\\')) {
          content = content.slice(0, -2)
        }

        const action = parseOSC(content)
        if (action) {
          if (action.type === 'link') {
            if (action.action.type === 'start') {
              this.inLink = true
              this.linkUrl = action.action.url
            } else {
              this.inLink = false
              this.linkUrl = undefined
            }
          }
          return [action]
        }
        return []
      }

      case 'esc': {
        const escContent = seq.slice(1)
        const action = parseEsc(escContent)
        return action ? [action] : []
      }

      case 'ss3':
        // SS3 sequences are typically cursor keys in application mode
        // For output parsing, treat as unknown
        return [{ type: 'unknown', sequence: seq }]

      default:
        return [{ type: 'unknown', sequence: seq }]
    }
  }
}