π File detail
utils/bash/parser.ts
π― Use case
This file lives under βutils/β, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, β¦). On the API surface it exposes Node, ParsedCommandData, ensureInitialized, parseCommand, and PARSE_ABORTED (and more) β mainly functions, hooks, or classes. Dependencies touch bun:bundle. It composes internal code from services, debug, and bashParser (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import { feature } from 'bun:bundle' import { logEvent } from '../../services/analytics/index.js' import { logForDebugging } from '../debug.js' import { ensureParserInitialized,
π€ Exports (heuristic)
NodeParsedCommandDataensureInitializedparseCommandPARSE_ABORTEDparseCommandRawextractCommandArguments
π External import roots
Package roots from from "β¦" (relative paths omitted).
bun:bundle
π₯οΈ Source preview
import { feature } from 'bun:bundle'
import { logEvent } from '../../services/analytics/index.js'
import { logForDebugging } from '../debug.js'
import {
ensureParserInitialized,
getParserModule,
type TsNode,
} from './bashParser.js'
export type Node = TsNode
export interface ParsedCommandData {
rootNode: Node
envVars: string[]
commandNode: Node | null
originalCommand: string
}
const MAX_COMMAND_LENGTH = 10000
const DECLARATION_COMMANDS = new Set([
'export',
'declare',
'typeset',
'readonly',
'local',
'unset',
'unsetenv',
])
const ARGUMENT_TYPES = new Set(['word', 'string', 'raw_string', 'number'])
const SUBSTITUTION_TYPES = new Set([
'command_substitution',
'process_substitution',
])
const COMMAND_TYPES = new Set(['command', 'declaration_command'])
let logged = false
function logLoadOnce(success: boolean): void {
if (logged) return
logged = true
logForDebugging(
success ? 'tree-sitter: native module loaded' : 'tree-sitter: unavailable',
)
logEvent('tengu_tree_sitter_load', { success })
}
/**
* Awaits WASM init (Parser.init + Language.load). Must be called before
* parseCommand/parseCommandRaw for the parser to be available. Idempotent.
*/
export async function ensureInitialized(): Promise<void> {
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
await ensureParserInitialized()
}
}
export async function parseCommand(
command: string,
): Promise<ParsedCommandData | null> {
if (!command || command.length > MAX_COMMAND_LENGTH) return null
// Gate: ant-only until pentest. External builds fall back to legacy
// regex/shell-quote path. Guarding the whole body inside the positive
// branch lets Bun DCE the NAPI import AND keeps telemetry honest β we
// only fire tengu_tree_sitter_load when a load was genuinely attempted.
if (feature('TREE_SITTER_BASH')) {
await ensureParserInitialized()
const mod = getParserModule()
logLoadOnce(mod !== null)
if (!mod) return null
try {
const rootNode = mod.parse(command)
if (!rootNode) return null
const commandNode = findCommandNode(rootNode, null)
const envVars = extractEnvVars(commandNode)
return { rootNode, envVars, commandNode, originalCommand: command }
} catch {
return null
}
}
return null
}
/**
* SECURITY: Sentinel for "parser was loaded and attempted, but aborted"
* (timeout / node budget / Rust panic). Distinct from `null` (module not
* loaded). Adversarial input can trigger abort under MAX_COMMAND_LENGTH:
* `(( a[0][0]... ))` with ~2800 subscripts hits PARSE_TIMEOUT_MICROS.
* Callers MUST treat this as fail-closed (too-complex), NOT route to legacy.
*/
export const PARSE_ABORTED = Symbol('parse-aborted')
/**
* Raw parse β skips findCommandNode/extractEnvVars which the security
* walker in ast.ts doesn't use. Saves one tree walk per bash command.
*
* Returns:
* - Node: parse succeeded
* - null: module not loaded / feature off / empty / over-length
* - PARSE_ABORTED: module loaded but parse failed (timeout/panic)
*/
export async function parseCommandRaw(
command: string,
): Promise<Node | null | typeof PARSE_ABORTED> {
if (!command || command.length > MAX_COMMAND_LENGTH) return null
if (feature('TREE_SITTER_BASH') || feature('TREE_SITTER_BASH_SHADOW')) {
await ensureParserInitialized()
const mod = getParserModule()
logLoadOnce(mod !== null)
if (!mod) return null
try {
const result = mod.parse(command)
// SECURITY: Module loaded; null here = timeout/node-budget abort in
// bashParser.ts (PARSE_TIMEOUT_MS=50, MAX_NODES=50_000).
// Previously collapsed into `return null` β parse-unavailable β legacy
// path, which lacks EVAL_LIKE_BUILTINS β `trap`, `enable`, `hash` leaked.
if (result === null) {
logEvent('tengu_tree_sitter_parse_abort', {
cmdLength: command.length,
panic: false,
})
return PARSE_ABORTED
}
return result
} catch {
logEvent('tengu_tree_sitter_parse_abort', {
cmdLength: command.length,
panic: true,
})
return PARSE_ABORTED
}
}
return null
}
function findCommandNode(node: Node, parent: Node | null): Node | null {
const { type, children } = node
if (COMMAND_TYPES.has(type)) return node
// Variable assignment followed by command
if (type === 'variable_assignment' && parent) {
return (
parent.children.find(
c => COMMAND_TYPES.has(c.type) && c.startIndex > node.startIndex,
) ?? null
)
}
// Pipeline: recurse into first child (which may be a redirected_statement)
if (type === 'pipeline') {
for (const child of children) {
const result = findCommandNode(child, node)
if (result) return result
}
return null
}
// Redirected statement: find the command inside
if (type === 'redirected_statement') {
return children.find(c => COMMAND_TYPES.has(c.type)) ?? null
}
// Recursive search
for (const child of children) {
const result = findCommandNode(child, node)
if (result) return result
}
return null
}
function extractEnvVars(commandNode: Node | null): string[] {
if (!commandNode || commandNode.type !== 'command') return []
const envVars: string[] = []
for (const child of commandNode.children) {
if (child.type === 'variable_assignment') {
envVars.push(child.text)
} else if (child.type === 'command_name' || child.type === 'word') {
break
}
}
return envVars
}
export function extractCommandArguments(commandNode: Node): string[] {
// Declaration commands
if (commandNode.type === 'declaration_command') {
const firstChild = commandNode.children[0]
return firstChild && DECLARATION_COMMANDS.has(firstChild.text)
? [firstChild.text]
: []
}
const args: string[] = []
let foundCommandName = false
for (const child of commandNode.children) {
if (child.type === 'variable_assignment') continue
// Command name
if (
child.type === 'command_name' ||
(!foundCommandName && child.type === 'word')
) {
foundCommandName = true
args.push(child.text)
continue
}
// Arguments
if (ARGUMENT_TYPES.has(child.type)) {
args.push(stripQuotes(child.text))
} else if (SUBSTITUTION_TYPES.has(child.type)) {
break
}
}
return args
}
function stripQuotes(text: string): string {
return text.length >= 2 &&
((text[0] === '"' && text.at(-1) === '"') ||
(text[0] === "'" && text.at(-1) === "'"))
? text.slice(1, -1)
: text
}