π File detail
utils/powershell/parser.ts
π― Use case
This file lives under βutils/β, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, β¦). On the API surface it exposes CommandElementChild, ParsedCommandElement, ParsedPowerShellCommand, RawCommandElement, and RawRedirection (and more) β mainly functions, hooks, or classes. Dependencies touch child processes. It composes internal code from debug, memoize, shell, and slowOperations (relative imports). What the file header says: --------------------------------------------------------------------------- Public types describing the parsed output returned to callers. These map to System.Management.Automation.Language AST classes. Raw internal types (RawParsedOutput etc.) are defined further below. --------.
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
--------------------------------------------------------------------------- Public types describing the parsed output returned to callers. These map to System.Management.Automation.Language AST classes. Raw internal types (RawParsedOutput etc.) are defined further below. ---------------------------------------------------------------------------
π€ Exports (heuristic)
CommandElementChildParsedCommandElementParsedPowerShellCommandRawCommandElementRawRedirectionRawPipelineElementRawStatementPARSE_SCRIPT_BODYWINDOWS_MAX_COMMAND_LENGTHMAX_COMMAND_LENGTHmapStatementTypemapElementTypeclassifyCommandNamestripModulePrefixtransformCommandAsttransformExpressionElementtransformRedirectiontransformStatementCOMMON_ALIASESgetAllCommandNamesgetAllCommandsgetAllRedirectionsgetVariablesByScopehasCommandNamedhasDirectoryChangeisSingleCommandcommandHasArgPS_TOKENIZER_DASH_CHARSisPowerShellParametercommandHasArgAbbreviationgetPipelineSegmentsisNullRedirectionTargetgetFileRedirectionsderiveSecurityFlagsparsePowerShellCommandCached
π External import roots
Package roots from from "β¦" (relative paths omitted).
execa
π₯οΈ Source preview
import { execa } from 'execa'
import { logForDebugging } from '../debug.js'
import { memoizeWithLRU } from '../memoize.js'
import { getCachedPowerShellPath } from '../shell/powershellDetection.js'
import { jsonParse } from '../slowOperations.js'
// ---------------------------------------------------------------------------
// Public types describing the parsed output returned to callers.
// These map to System.Management.Automation.Language AST classes.
// Raw internal types (RawParsedOutput etc.) are defined further below.
// ---------------------------------------------------------------------------
/**
* The PowerShell AST element type for pipeline elements.
* Maps directly to CommandBaseAst derivatives in System.Management.Automation.Language.
*/
type PipelineElementType =
| 'CommandAst'
| 'CommandExpressionAst'
| 'ParenExpressionAst'
/**
* The AST node type for individual command elements (arguments, expressions).
* Used to classify each element during the AST walk so TypeScript can derive
* security flags without extra Find-AstNodes calls in PowerShell.
*/
type CommandElementType =
| 'ScriptBlock'
| 'SubExpression'
| 'ExpandableString'
| 'MemberInvocation'
| 'Variable'
| 'StringConstant'
| 'Parameter'
| 'Other'
/**
* A child node of a command element (one level deep). Populated for
* CommandParameterAst β .Argument (colon-bound parameters like
* `-InputObject:$env:SECRET`). Consumers check `child.type` to classify
* the bound value (Variable, StringConstant, Other) without parsing text.
*/
export type CommandElementChild = {
type: CommandElementType
text: string
}
/**
* The PowerShell AST statement type.
* Maps directly to StatementAst derivatives in System.Management.Automation.Language.
*/
type StatementType =
| 'PipelineAst'
| 'PipelineChainAst'
| 'AssignmentStatementAst'
| 'IfStatementAst'
| 'ForStatementAst'
| 'ForEachStatementAst'
| 'WhileStatementAst'
| 'DoWhileStatementAst'
| 'DoUntilStatementAst'
| 'SwitchStatementAst'
| 'TryStatementAst'
| 'TrapStatementAst'
| 'FunctionDefinitionAst'
| 'DataStatementAst'
| 'UnknownStatementAst'
/**
* A command invocation within a pipeline segment.
*/
export type ParsedCommandElement = {
/** The command/cmdlet name (e.g., "Get-ChildItem", "git") */
name: string
/** The command name type: cmdlet, application (exe), or unknown */
nameType: 'cmdlet' | 'application' | 'unknown'
/** The AST element type from PowerShell's parser */
elementType: PipelineElementType
/** All arguments as strings (includes flags like "-Recurse") */
args: string[]
/** The full text of this command element */
text: string
/** AST node types for each element in this command (arguments, expressions, etc.) */
elementTypes?: CommandElementType[]
/**
* Child nodes of each argument, aligned with `args[]` (so
* `children[i]` β `args[i]` β `elementTypes[i+1]`). Only populated for
* Parameter elements with a colon-bound argument. Undefined for elements
* with no children. Lets consumers check `children[i].some(c => c.type
* !== 'StringConstant')` instead of parsing the arg text for `:` + `$`.
*/
children?: (CommandElementChild[] | undefined)[]
/** Redirections on this command element (from nested commands in && / || chains) */
redirections?: ParsedRedirection[]
}
/**
* A redirection found in the command.
*/
type ParsedRedirection = {
/** The redirection operator */
operator: '>' | '>>' | '2>' | '2>>' | '*>' | '*>>' | '2>&1'
/** The target (file path or stream number) */
target: string
/** Whether this is a merging redirection like 2>&1 */
isMerging: boolean
}
/**
* A parsed statement from PowerShell.
* Can be a pipeline, assignment, control flow statement, etc.
*/
type ParsedStatement = {
/** The AST statement type from PowerShell's parser */
statementType: StatementType
/** Individual commands in this statement (for pipelines) */
commands: ParsedCommandElement[]
/** Redirections on this statement */
redirections: ParsedRedirection[]
/** Full text of the statement */
text: string
/**
* For control flow statements (if, for, foreach, while, try, etc.),
* commands found recursively inside the body blocks.
* Uses FindAll() to extract ALL nested CommandAst nodes at any depth.
*/
nestedCommands?: ParsedCommandElement[]
/**
* Security-relevant AST patterns found via FindAll() on the entire statement,
* regardless of statement type. This catches patterns that elementTypes may
* miss (e.g. member invocations inside assignments, subexpressions in
* non-pipeline statements). Computed in the PS1 script using instanceof
* checks against the PowerShell AST type system.
*/
securityPatterns?: {
hasMemberInvocations?: boolean
hasSubExpressions?: boolean
hasExpandableStrings?: boolean
hasScriptBlocks?: boolean
}
}
/**
* A variable reference found in the command.
*/
type ParsedVariable = {
/** The variable path (e.g., "HOME", "env:PATH", "global:x") */
path: string
/** Whether this variable uses splatting (@var instead of $var) */
isSplatted: boolean
}
/**
* A parse error from PowerShell's parser.
*/
type ParseError = {
message: string
errorId: string
}
/**
* The complete parsed result from the PowerShell AST parser.
*/
export type ParsedPowerShellCommand = {
/** Whether the command parsed successfully (no syntax errors) */
valid: boolean
/** Parse errors, if any */
errors: ParseError[]
/** Top-level statements, separated by ; or newlines */
statements: ParsedStatement[]
/** All variable references found */
variables: ParsedVariable[]
/** Whether the token stream contains a stop-parsing (--%) token */
hasStopParsing: boolean
/** The original command text */
originalCommand: string
/**
* All .NET type literals found anywhere in the AST (TypeExpressionAst +
* TypeConstraintAst). TypeName.FullName β the literal text as written, NOT
* the resolved .NET type (e.g. [int] β "int", not "System.Int32").
* Consumed by the CLM-allowlist check in powershellSecurity.ts.
*/
typeLiterals?: string[]
/**
* Whether the command contains `using module` or `using assembly` statements.
* These load external code (modules/assemblies) and execute their top-level
* script body or module initializers. The using statement is a sibling of
* the named blocks on ScriptBlockAst, not a child, so it is not visible
* to Process-BlockStatements or any downstream command walker.
*/
hasUsingStatements?: boolean
/**
* Whether the command contains `#Requires` directives (ScriptRequirements).
* `#Requires -Modules <name>` triggers module loading from PSModulePath.
*/
hasScriptRequirements?: boolean
}
// ---------------------------------------------------------------------------
// Default 5s is fine for interactive use (warm pwsh spawn is ~450ms). Windows
// CI under Defender/AMSI load can exceed 5s on consecutive spawns even after
// CAN_SPAWN_PARSE_SCRIPT() warms the JIT (run 23574701241 windows-shard-5:
// attackVectors F1 hit 2Γ5s timeout β valid:false β 'ask' instead of 'deny').
// Override via env for tests. Read inside parsePowerShellCommandImpl, not
// top-level, per CLAUDE.md (globalSettings.env ordering).
const DEFAULT_PARSE_TIMEOUT_MS = 5_000
function getParseTimeoutMs(): number {
const env = process.env.CLAUDE_CODE_PWSH_PARSE_TIMEOUT_MS
if (env) {
const parsed = parseInt(env, 10)
if (!isNaN(parsed) && parsed > 0) return parsed
}
return DEFAULT_PARSE_TIMEOUT_MS
}
// MAX_COMMAND_LENGTH is derived from PARSE_SCRIPT_BODY.length below (after the
// script body is defined) so it cannot go stale as the script grows.
/**
* The PowerShell parse script inlined as a string constant.
* This avoids needing to read from disk at runtime (the file may not exist
* in bundled builds). The script uses the native PowerShell AST parser to
* analyze a command and output structured JSON.
*/
// Raw types describing PS script JSON output (exported for testing)
export type RawCommandElement = {
type: string // .GetType().Name e.g. "StringConstantExpressionAst"
text: string // .Extent.Text
value?: string // .Value if available (resolves backtick escapes)
expressionType?: string // .Expression.GetType().Name for CommandExpressionAst
children?: { type: string; text: string }[] // CommandParameterAst.Argument, one level
}
export type RawRedirection = {
type: string // "FileRedirectionAst" or "MergingRedirectionAst"
append?: boolean // .Append (FileRedirectionAst only)
fromStream?: string // .FromStream.ToString() e.g. "Output", "Error", "All"
locationText?: string // .Location.Extent.Text (FileRedirectionAst only)
}
export type RawPipelineElement = {
type: string // .GetType().Name e.g. "CommandAst", "CommandExpressionAst"
text: string // .Extent.Text
commandElements?: RawCommandElement[]
redirections?: RawRedirection[]
expressionType?: string // for CommandExpressionAst: .Expression.GetType().Name
}
export type RawStatement = {
type: string // .GetType().Name e.g. "PipelineAst", "IfStatementAst", "TrapStatementAst"
text: string // .Extent.Text
elements?: RawPipelineElement[] // for PipelineAst: the pipeline elements
nestedCommands?: RawPipelineElement[] // commands found via FindAll (all statement types)
redirections?: RawRedirection[] // FileRedirectionAst found via FindAll (non-PipelineAst only)
securityPatterns?: {
// Security-relevant AST node types found via FindAll on the statement
hasMemberInvocations?: boolean
hasSubExpressions?: boolean
hasExpandableStrings?: boolean
hasScriptBlocks?: boolean
}
}
type RawParsedOutput = {
valid: boolean
errors: { message: string; errorId: string }[]
statements: RawStatement[]
variables: { path: string; isSplatted: boolean }[]
hasStopParsing: boolean
originalCommand: string
typeLiterals?: string[]
hasUsingStatements?: boolean
hasScriptRequirements?: boolean
}
// This is the canonical copy of the parse script. There is no separate .ps1 file.
/**
* The core parse logic.
* The command is passed via Base64-encoded $EncodedCommand variable
* to avoid here-string injection attacks.
*
* SECURITY β top-level ParamBlock: ScriptBlockAst.ParamBlock is a SIBLING of
* the named blocks (Begin/Process/End/Clean/DynamicParam), not nested inside
* them, so Process-BlockStatements never reaches it. Commands inside param()
* default-value expressions and attribute arguments (e.g. [ValidateScript({...})])
* were invisible to every downstream check. PoC:
* param($x = (Remove-Item /)); Get-Process β only Get-Process surfaced
* param([ValidateScript({rm /;$true})]$x='t') β rm invisible, runs on bind
* Function-level param() IS covered: FindAll on the FunctionDefinitionAst
* statement recurses into its descendants. The gap was only the script-level
* ParamBlock. ParamBlockAst has .Parameters (not .Statements) so we FindAll
* on it directly rather than reusing Process-BlockStatements. We only emit a
* statement if there is something to report, to avoid noise for plain
* param($x) declarations. (Kept compact in-script to preserve argv budget.)
*/
/**
* PS1 parse script. Comments live here (not inline) β every char inside the
* backticks eats into WINDOWS_MAX_COMMAND_LENGTH (argv budget).
*
* Structure:
* - Get-RawCommandElements: extract CommandAst element data (type, text, value,
* expressionType, children for colon-bound param .Argument)
* - Get-RawRedirections: extract FileRedirectionAst operator+target
* - Get-SecurityPatterns: FindAll for security flags (hasSubExpressions via
* Sub/Array/ParenExpressionAst, hasScriptBlocks, etc.)
* - Type literals: emit TypeExpressionAst names for CLM allowlist check
* - --% token: PS7 MinusMinus, PS5.1 Generic kind
* - CommandExpressionAst.Redirections: inherits from CommandBaseAst β
* `1 > /tmp/x` statement has FileRedirectionAst that element-iteration misses
* - Nested commands: FindAll for ALL statement types (if/for/foreach/while/
* switch/try/function/assignment/PipelineChainAst) β skip direct pipeline
* elements already in the loop
*/
// exported for testing
export const PARSE_SCRIPT_BODY = `
if (-not $EncodedCommand) {
Write-Output '{"valid":false,"errors":[{"message":"No command provided","errorId":"NoInput"}],"statements":[],"variables":[],"hasStopParsing":false,"originalCommand":""}'
exit 0
}
$Command = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($EncodedCommand))
$tokens = $null
$parseErrors = $null
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
$Command,
[ref]$tokens,
[ref]$parseErrors
)
$allVariables = [System.Collections.ArrayList]::new()
function Get-RawCommandElements {
param([System.Management.Automation.Language.CommandAst]$CmdAst)
$elems = [System.Collections.ArrayList]::new()
foreach ($ce in $CmdAst.CommandElements) {
$ceData = @{ type = $ce.GetType().Name; text = $ce.Extent.Text }
if ($ce.PSObject.Properties['Value'] -and $null -ne $ce.Value -and $ce.Value -is [string]) {
$ceData.value = $ce.Value
}
if ($ce -is [System.Management.Automation.Language.CommandExpressionAst]) {
$ceData.expressionType = $ce.Expression.GetType().Name
}
$a=$ce.Argument;if($a){$ceData.children=@(@{type=$a.GetType().Name;text=$a.Extent.Text})}
[void]$elems.Add($ceData)
}
return $elems
}
function Get-RawRedirections {
param($Redirections)
$result = [System.Collections.ArrayList]::new()
foreach ($redir in $Redirections) {
$redirData = @{ type = $redir.GetType().Name }
if ($redir -is [System.Management.Automation.Language.FileRedirectionAst]) {
$redirData.append = [bool]$redir.Append
$redirData.fromStream = $redir.FromStream.ToString()
$redirData.locationText = $redir.Location.Extent.Text
}
[void]$result.Add($redirData)
}
return $result
}
function Get-SecurityPatterns($A) {
$p = @{}
foreach ($n in $A.FindAll({ param($x)
$x -is [System.Management.Automation.Language.MemberExpressionAst] -or
$x -is [System.Management.Automation.Language.SubExpressionAst] -or
$x -is [System.Management.Automation.Language.ArrayExpressionAst] -or
$x -is [System.Management.Automation.Language.ExpandableStringExpressionAst] -or
$x -is [System.Management.Automation.Language.ScriptBlockExpressionAst] -or
$x -is [System.Management.Automation.Language.ParenExpressionAst]
}, $true)) { switch ($n.GetType().Name) {
'InvokeMemberExpressionAst' { $p.hasMemberInvocations = $true }
'MemberExpressionAst' { $p.hasMemberInvocations = $true }
'SubExpressionAst' { $p.hasSubExpressions = $true }
'ArrayExpressionAst' { $p.hasSubExpressions = $true }
'ParenExpressionAst' { $p.hasSubExpressions = $true }
'ExpandableStringExpressionAst' { $p.hasExpandableStrings = $true }
'ScriptBlockExpressionAst' { $p.hasScriptBlocks = $true }
}}
if ($p.Count -gt 0) { return $p }
return $null
}
$varExprs = $ast.FindAll({ param($node) $node -is [System.Management.Automation.Language.VariableExpressionAst] }, $true)
foreach ($v in $varExprs) {
[void]$allVariables.Add(@{
path = $v.VariablePath.ToString()
isSplatted = [bool]$v.Splatted
})
}
$typeLiterals = [System.Collections.ArrayList]::new()
foreach ($t in $ast.FindAll({ param($n)
$n -is [System.Management.Automation.Language.TypeExpressionAst] -or
$n -is [System.Management.Automation.Language.TypeConstraintAst]
}, $true)) { [void]$typeLiterals.Add($t.TypeName.FullName) }
$hasStopParsing = $false
$tk = [System.Management.Automation.Language.TokenKind]
foreach ($tok in $tokens) {
if ($tok.Kind -eq $tk::MinusMinus) { $hasStopParsing = $true; break }
if ($tok.Kind -eq $tk::Generic -and ($tok.Text -replace '[\u2013\u2014\u2015]','-') -eq '--%') {
$hasStopParsing = $true; break
}
}
$statements = [System.Collections.ArrayList]::new()
function Process-BlockStatements {
param($Block)
if (-not $Block) { return }
foreach ($stmt in $Block.Statements) {
$statement = @{
type = $stmt.GetType().Name
text = $stmt.Extent.Text
}
if ($stmt -is [System.Management.Automation.Language.PipelineAst]) {
$elements = [System.Collections.ArrayList]::new()
foreach ($element in $stmt.PipelineElements) {
$elemData = @{
type = $element.GetType().Name
text = $element.Extent.Text
}
if ($element -is [System.Management.Automation.Language.CommandAst]) {
$elemData.commandElements = @(Get-RawCommandElements -CmdAst $element)
$elemData.redirections = @(Get-RawRedirections -Redirections $element.Redirections)
} elseif ($element -is [System.Management.Automation.Language.CommandExpressionAst]) {
$elemData.expressionType = $element.Expression.GetType().Name
$elemData.redirections = @(Get-RawRedirections -Redirections $element.Redirections)
}
[void]$elements.Add($elemData)
}
$statement.elements = @($elements)
$allNestedCmds = $stmt.FindAll(
{ param($node) $node -is [System.Management.Automation.Language.CommandAst] },
$true
)
$nestedCmds = [System.Collections.ArrayList]::new()
foreach ($cmd in $allNestedCmds) {
if ($cmd.Parent -eq $stmt) { continue }
$nested = @{
type = $cmd.GetType().Name
text = $cmd.Extent.Text
commandElements = @(Get-RawCommandElements -CmdAst $cmd)
redirections = @(Get-RawRedirections -Redirections $cmd.Redirections)
}
[void]$nestedCmds.Add($nested)
}
if ($nestedCmds.Count -gt 0) {
$statement.nestedCommands = @($nestedCmds)
}
$r = $stmt.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
if ($r.Count -gt 0) {
$rr = @(Get-RawRedirections -Redirections $r)
$statement.redirections = if ($statement.redirections) { @($statement.redirections) + $rr } else { $rr }
}
} else {
$nestedCmdAsts = $stmt.FindAll(
{ param($node) $node -is [System.Management.Automation.Language.CommandAst] },
$true
)
$nested = [System.Collections.ArrayList]::new()
foreach ($cmd in $nestedCmdAsts) {
[void]$nested.Add(@{
type = 'CommandAst'
text = $cmd.Extent.Text
commandElements = @(Get-RawCommandElements -CmdAst $cmd)
redirections = @(Get-RawRedirections -Redirections $cmd.Redirections)
})
}
if ($nested.Count -gt 0) {
$statement.nestedCommands = @($nested)
}
$r = $stmt.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
if ($r.Count -gt 0) { $statement.redirections = @(Get-RawRedirections -Redirections $r) }
}
$sp = Get-SecurityPatterns $stmt
if ($sp) { $statement.securityPatterns = $sp }
[void]$statements.Add($statement)
}
if ($Block.Traps) {
foreach ($trap in $Block.Traps) {
$statement = @{
type = 'TrapStatementAst'
text = $trap.Extent.Text
}
$nestedCmdAsts = $trap.FindAll(
{ param($node) $node -is [System.Management.Automation.Language.CommandAst] },
$true
)
$nestedCmds = [System.Collections.ArrayList]::new()
foreach ($cmd in $nestedCmdAsts) {
$nested = @{
type = $cmd.GetType().Name
text = $cmd.Extent.Text
commandElements = @(Get-RawCommandElements -CmdAst $cmd)
redirections = @(Get-RawRedirections -Redirections $cmd.Redirections)
}
[void]$nestedCmds.Add($nested)
}
if ($nestedCmds.Count -gt 0) {
$statement.nestedCommands = @($nestedCmds)
}
$r = $trap.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
if ($r.Count -gt 0) { $statement.redirections = @(Get-RawRedirections -Redirections $r) }
$sp = Get-SecurityPatterns $trap
if ($sp) { $statement.securityPatterns = $sp }
[void]$statements.Add($statement)
}
}
}
Process-BlockStatements -Block $ast.BeginBlock
Process-BlockStatements -Block $ast.ProcessBlock
Process-BlockStatements -Block $ast.EndBlock
Process-BlockStatements -Block $ast.CleanBlock
Process-BlockStatements -Block $ast.DynamicParamBlock
if ($ast.ParamBlock) {
$pb = $ast.ParamBlock
$pn = [System.Collections.ArrayList]::new()
foreach ($c in $pb.FindAll({param($n) $n -is [System.Management.Automation.Language.CommandAst]}, $true)) {
[void]$pn.Add(@{type='CommandAst';text=$c.Extent.Text;commandElements=@(Get-RawCommandElements -CmdAst $c);redirections=@(Get-RawRedirections -Redirections $c.Redirections)})
}
$pr = $pb.FindAll({param($n) $n -is [System.Management.Automation.Language.FileRedirectionAst]}, $true)
$ps = Get-SecurityPatterns $pb
if ($pn.Count -gt 0 -or $pr.Count -gt 0 -or $ps) {
$st = @{type='ParamBlockAst';text=$pb.Extent.Text}
if ($pn.Count -gt 0) { $st.nestedCommands = @($pn) }
if ($pr.Count -gt 0) { $st.redirections = @(Get-RawRedirections -Redirections $pr) }
if ($ps) { $st.securityPatterns = $ps }
[void]$statements.Add($st)
}
}
$hasUsingStatements = $ast.UsingStatements -and $ast.UsingStatements.Count -gt 0
$hasScriptRequirements = $ast.ScriptRequirements -ne $null
$output = @{
valid = ($parseErrors.Count -eq 0)
errors = @($parseErrors | ForEach-Object {
@{
message = $_.Message
errorId = $_.ErrorId
}
})
statements = @($statements)
variables = @($allVariables)
hasStopParsing = $hasStopParsing
originalCommand = $Command
typeLiterals = @($typeLiterals)
hasUsingStatements = [bool]$hasUsingStatements
hasScriptRequirements = [bool]$hasScriptRequirements
}
$output | ConvertTo-Json -Depth 10 -Compress
`
// ---------------------------------------------------------------------------
// Windows CreateProcess has a 32,767 char command-line limit. The encoding
// chain is:
// command (N UTF-8 bytes) β Base64 (~4N/3 chars) β $EncodedCommand = '...'\n
// β full script (wrapper + PARSE_SCRIPT_BODY) β UTF-16LE (2Γ bytes)
// β Base64 (4/3Γ chars) β -EncodedCommand argv
// Final cmdline β argv_overhead + (wrapper + 4N/3 + body) Γ 8/3
//
// Solving for N (UTF-8 bytes) with a 32,767 cap:
// script_budget = (32767 - argv_overhead) Γ 3/8
// cmd_b64_budget = script_budget - PARSE_SCRIPT_BODY.length - wrapper
// N = cmd_b64_budget Γ 3/4 - safety_margin
//
// SECURITY: N is a UTF-8 BYTE budget, not a UTF-16 code-unit budget. The
// length gate MUST measure Buffer.byteLength(command, 'utf8'), not
// command.length. A BMP character in U+0800βU+FFFF (CJK ideographs, most
// non-Latin scripts) is 1 UTF-16 code unit but 3 UTF-8 bytes. With
// PARSE_SCRIPT_BODY β 10.6K, N β 1,092 bytes. Comparing against .length
// permits a 1,092-code-unit pure-CJK command (β3,276 UTF-8 bytes) β inner
// base64 β 4,368 chars β final argv β 40K chars, overflowing 32,767 by
// ~7.4K. CreateProcess fails β valid:false β parse-fail degradation (deny
// rules silently downgrade to ask). Finding #36.
//
// COMPUTED from PARSE_SCRIPT_BODY.length so it cannot drift. The prior
// hardcoded value (4,500) was derived from a ~6K body estimate; the body is
// actually ~11K chars, so the real ceiling was ~1,850. Commands in the
// 1,850β4,500 range passed this gate but then failed CreateProcess on
// Windows, returning valid=false and skipping all AST-based security checks.
//
// Unix argv limits are typically 2MB+ (ARG_MAX) with ~128KB per-argument
// limit (MAX_ARG_STRLEN on Linux; macOS has no per-arg limit below ARG_MAX).
// At MAX=4,500 the -EncodedCommand argument is ~45KB β well under either.
// Applying the Windows-derived limit on Unix would REGRESS: commands in the
// ~1Kβ4.5K range previously parsed successfully and reached the sub-command
// deny loop at powershellPermissions.ts; rejecting them pre-spawn degrades
// user-configured deny rules from denyβask for compound commands with a
// denied cmdlet buried mid-script. So the Windows limit is platform-gated.
//
// If the Windows limit becomes too restrictive, switch to -File with a temp
// file for large inputs.
// ---------------------------------------------------------------------------
const WINDOWS_ARGV_CAP = 32_767
// pwsh path + " -NoProfile -NonInteractive -NoLogo -EncodedCommand " +
// argv quoting. A long Windows pwsh path (C:\Program Files\PowerShell\7\
// pwsh.exe) + flags is ~95 chars; 200 leaves headroom for unusual installs.
const FIXED_ARGV_OVERHEAD = 200
// "$EncodedCommand = '" + "'\n" wrapper around the user command's base64
const ENCODED_CMD_WRAPPER = `$EncodedCommand = ''\n`.length
// Margin for base64 padding rounding (β€4 chars at each of 2 levels) and minor
// estimation drift. Multibyte expansion is NOT absorbed here β the gate
// measures actual UTF-8 bytes (Buffer.byteLength), not code units.
const SAFETY_MARGIN = 100
const SCRIPT_CHARS_BUDGET = ((WINDOWS_ARGV_CAP - FIXED_ARGV_OVERHEAD) * 3) / 8
const CMD_B64_BUDGET =
SCRIPT_CHARS_BUDGET - PARSE_SCRIPT_BODY.length - ENCODED_CMD_WRAPPER
// Exported for drift-guard tests (the drift-prone value is the Windows one).
// Unit: UTF-8 BYTES. Compare against Buffer.byteLength, not .length.
export const WINDOWS_MAX_COMMAND_LENGTH = Math.max(
0,
Math.floor((CMD_B64_BUDGET * 3) / 4) - SAFETY_MARGIN,
)
// Pre-existing value, known to work on Unix. See comment above re: why the
// Windows derivation must NOT be applied here. Unit: UTF-8 BYTES β for ASCII
// commands (the common case) bytes==chars so no regression; for multibyte
// commands this is slightly tighter but still far below Unix ARG_MAX (~128KB
// per-arg), so the argv spawn cannot overflow.
const UNIX_MAX_COMMAND_LENGTH = 4_500
// Unit: UTF-8 BYTES (see SECURITY note above).
export const MAX_COMMAND_LENGTH =
process.platform === 'win32'
? WINDOWS_MAX_COMMAND_LENGTH
: UNIX_MAX_COMMAND_LENGTH
const INVALID_RESULT_BASE: Omit<
ParsedPowerShellCommand,
'errors' | 'originalCommand'
> = {
valid: false,
statements: [],
variables: [],
hasStopParsing: false,
}
function makeInvalidResult(
command: string,
message: string,
errorId: string,
): ParsedPowerShellCommand {
return {
...INVALID_RESULT_BASE,
errors: [{ message, errorId }],
originalCommand: command,
}
}
/**
* Base64-encode a string as UTF-16LE, which is the encoding required by
* PowerShell's -EncodedCommand parameter.
*/
function toUtf16LeBase64(text: string): string {
if (typeof Buffer !== 'undefined') {
return Buffer.from(text, 'utf16le').toString('base64')
}
// Fallback for non-Node environments
const bytes: number[] = []
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i)
bytes.push(code & 0xff, (code >> 8) & 0xff)
}
return btoa(bytes.map(b => String.fromCharCode(b)).join(''))
}
/**
* Build the full PowerShell script that parses a command.
* The user command is Base64-encoded (UTF-8) and embedded in a variable
* to prevent injection attacks.
*/
function buildParseScript(command: string): string {
const encoded =
typeof Buffer !== 'undefined'
? Buffer.from(command, 'utf8').toString('base64')
: btoa(
new TextEncoder()
.encode(command)
.reduce((s, b) => s + String.fromCharCode(b), ''),
)
return `$EncodedCommand = '${encoded}'\n${PARSE_SCRIPT_BODY}`
}
/**
* Ensure a value is an array. PowerShell 5.1's ConvertTo-Json may unwrap
* single-element arrays into plain objects.
*/
function ensureArray<T>(value: T | T[] | undefined | null): T[] {
if (value === undefined || value === null) {
return []
}
return Array.isArray(value) ? value : [value]
}
/** Map raw .NET AST type name to our StatementType union */
// exported for testing
export function mapStatementType(rawType: string): StatementType {
switch (rawType) {
case 'PipelineAst':
return 'PipelineAst'
case 'PipelineChainAst':
return 'PipelineChainAst'
case 'AssignmentStatementAst':
return 'AssignmentStatementAst'
case 'IfStatementAst':
return 'IfStatementAst'
case 'ForStatementAst':
return 'ForStatementAst'
case 'ForEachStatementAst':
return 'ForEachStatementAst'
case 'WhileStatementAst':
return 'WhileStatementAst'
case 'DoWhileStatementAst':
return 'DoWhileStatementAst'
case 'DoUntilStatementAst':
return 'DoUntilStatementAst'
case 'SwitchStatementAst':
return 'SwitchStatementAst'
case 'TryStatementAst':
return 'TryStatementAst'
case 'TrapStatementAst':
return 'TrapStatementAst'
case 'FunctionDefinitionAst':
return 'FunctionDefinitionAst'
case 'DataStatementAst':
return 'DataStatementAst'
default:
return 'UnknownStatementAst'
}
}
/** Map raw .NET AST type name to our CommandElementType union */
// exported for testing
export function mapElementType(
rawType: string,
expressionType?: string,
): CommandElementType {
switch (rawType) {
case 'ScriptBlockExpressionAst':
return 'ScriptBlock'
case 'SubExpressionAst':
case 'ArrayExpressionAst':
// SECURITY: ArrayExpressionAst (@()) is a sibling of SubExpressionAst,
// not a subclass. Both evaluate arbitrary pipelines with side effects:
// Get-ChildItem @(Remove-Item ./data) runs Remove-Item inside @().
// Map both to SubExpression so hasSubExpressions fires and isReadOnlyCommand
// rejects (it doesn't check nestedCommands, only pipeline.commands[]).
return 'SubExpression'
case 'ExpandableStringExpressionAst':
return 'ExpandableString'
case 'InvokeMemberExpressionAst':
case 'MemberExpressionAst':
return 'MemberInvocation'
case 'VariableExpressionAst':
return 'Variable'
case 'StringConstantExpressionAst':
case 'ConstantExpressionAst':
// ConstantExpressionAst covers numeric literals (5, 3.14). For
// permission purposes a numeric literal is as safe as a string
// literal β it's an inert value, not code. Without this mapping,
// `-Seconds:5` produced children[0].type='Other' and consumers
// checking `children.some(c => c.type !== 'StringConstant')` would
// false-positive ask on harmless numeric args.
return 'StringConstant'
case 'CommandParameterAst':
return 'Parameter'
case 'ParenExpressionAst':
return 'SubExpression'
case 'CommandExpressionAst':
// Delegate to the wrapped expression type so we catch SubExpressionAst,
// ExpandableStringExpressionAst, ScriptBlockExpressionAst, etc.
// without maintaining a manual list. Falls through to 'Other' if the
// inner type is unrecognised.
if (expressionType) {
return mapElementType(expressionType)
}
return 'Other'
default:
return 'Other'
}
}
/** Classify command name as cmdlet, application, or unknown */
// exported for testing
export function classifyCommandName(
name: string,
): 'cmdlet' | 'application' | 'unknown' {
if (/^[A-Za-z]+-[A-Za-z][A-Za-z0-9_]*$/.test(name)) {
return 'cmdlet'
}
if (/[.\\/]/.test(name)) {
return 'application'
}
return 'unknown'
}
/** Strip module prefix from command name (e.g. "Microsoft.PowerShell.Utility\\Invoke-Expression" -> "Invoke-Expression") */
// exported for testing
export function stripModulePrefix(name: string): string {
const idx = name.lastIndexOf('\\')
if (idx < 0) return name
// Don't strip file paths: drive letters (C:\...), UNC paths (\\server\...), or relative paths (.\, ..\)
if (
/^[A-Za-z]:/.test(name) ||
name.startsWith('\\\\') ||
name.startsWith('.\\') ||
name.startsWith('..\\')
)
return name
return name.substring(idx + 1)
}
/** Transform a raw CommandAst pipeline element into ParsedCommandElement */
// exported for testing
export function transformCommandAst(
raw: RawPipelineElement,
): ParsedCommandElement {
const cmdElements = ensureArray(raw.commandElements)
let name = ''
const args: string[] = []
const elementTypes: CommandElementType[] = []
const children: (CommandElementChild[] | undefined)[] = []
let hasChildren = false
// SECURITY: nameType MUST be computed from the raw name (before
// stripModulePrefix). classifyCommandName('scripts\\Get-Process') returns
// 'application' (contains \\) β the correct answer, since PowerShell resolves
// this as a file path. After stripping it becomes 'Get-Process' which
// classifies as 'cmdlet' β wrong, and allowlist checks would trust it.
// Auto-allow paths gate on nameType !== 'application' to catch this.
// name (stripped) is still used for deny-rule matching symmetry, which is
// fail-safe: deny rules over-match (Module\\Remove-Item still hits a
// Remove-Item deny), allow rules are separately gated by nameType.
let nameType: 'cmdlet' | 'application' | 'unknown' = 'unknown'
if (cmdElements.length > 0) {
const first = cmdElements[0]!
// SECURITY: only trust .value for string-literal element types with a
// string-typed value. Numeric ConstantExpressionAst (e.g. `& 1`) emits an
// integer .value that crashes stripModulePrefix() β parser falls through
// to passthrough. For non-string-literal or non-string .value, use .text.
const isFirstStringLiteral =
first.type === 'StringConstantExpressionAst' ||
first.type === 'ExpandableStringExpressionAst'
const rawNameUnstripped =
isFirstStringLiteral && typeof first.value === 'string'
? first.value
: first.text
// SECURITY: strip surrounding quotes from the command name. When .value is
// unavailable (no StaticType on the raw node), .text preserves quotes β
// `& 'Invoke-Expression' 'x'` yields "'Invoke-Expression'". Stripping here
// at the source means every downstream reader of element.name (deny-rule
// matching, GIT_SAFETY_WRITE_CMDLETS lookup, resolveToCanonical, etc.)
// sees the bare cmdlet name. No-op when .value already stripped.
const rawName = rawNameUnstripped.replace(/^['"]|['"]$/g, '')
// SECURITY: PowerShell built-in cmdlet names are ASCII-only. Non-ASCII
// characters in cmdlet position are inherently suspicious β .NET
// OrdinalIgnoreCase folds U+017F (ΕΏ) β S and U+0131 (Δ±) β I per
// UnicodeData.txt SimpleUppercaseMapping, so PowerShell resolves
// `ΕΏtart-proceΕΏΕΏ` β Start-Process at runtime. JS .toLowerCase() does NOT
// fold these (ΕΏ is already lowercase), so every downstream name
// comparison (NEVER_SUGGEST, deny-rule strEquals, resolveToCanonical,
// security validators) misses. Force 'application' to gate auto-allow
// (blocks at the nameType !== 'application' checks). Finding #31.
// Verified on Windows (pwsh 7.x, 2026-03): ΕΏtart-proceΕΏΕΏ does NOT resolve.
// Retained as defense-in-depth against future .NET/PS behavior changes
// or module-provided command resolution hooks.
if (/[\u0080-\uFFFF]/.test(rawName)) {
nameType = 'application'
} else {
nameType = classifyCommandName(rawName)
}
name = stripModulePrefix(rawName)
elementTypes.push(mapElementType(first.type, first.expressionType))
for (let i = 1; i < cmdElements.length; i++) {
const ce = cmdElements[i]!
// Use resolved .value for string constants (strips quotes, resolves
// backtick escapes like `n -> newline) but keep raw .text for parameters
// (where .value loses the dash prefix, e.g. '-Path' -> 'Path'),
// variables, and other non-string types.
const isStringLiteral =
ce.type === 'StringConstantExpressionAst' ||
ce.type === 'ExpandableStringExpressionAst'
args.push(isStringLiteral && ce.value != null ? ce.value : ce.text)
elementTypes.push(mapElementType(ce.type, ce.expressionType))
// Map raw children (CommandParameterAst.Argument) through
// mapElementType so consumers see 'Variable', 'StringConstant', etc.
const rawChildren = ensureArray(ce.children)
if (rawChildren.length > 0) {
hasChildren = true
children.push(
rawChildren.map(c => ({
type: mapElementType(c.type),
text: c.text,
})),
)
} else {
children.push(undefined)
}
}
}
const result: ParsedCommandElement = {
name,
nameType,
elementType: 'CommandAst',
args,
text: raw.text,
elementTypes,
...(hasChildren ? { children } : {}),
}
// Preserve redirections from nested commands (e.g., in && / || chains)
const rawRedirs = ensureArray(raw.redirections)
if (rawRedirs.length > 0) {
result.redirections = rawRedirs.map(transformRedirection)
}
return result
}
/** Transform a non-CommandAst pipeline element into ParsedCommandElement */
// exported for testing
export function transformExpressionElement(
raw: RawPipelineElement,
): ParsedCommandElement {
const elementType: PipelineElementType =
raw.type === 'ParenExpressionAst'
? 'ParenExpressionAst'
: 'CommandExpressionAst'
const elementTypes: CommandElementType[] = [
mapElementType(raw.type, raw.expressionType),
]
return {
name: raw.text,
nameType: 'unknown',
elementType,
args: [],
text: raw.text,
elementTypes,
}
}
/** Map raw redirection to ParsedRedirection */
// exported for testing
export function transformRedirection(raw: RawRedirection): ParsedRedirection {
if (raw.type === 'MergingRedirectionAst') {
return { operator: '2>&1', target: '', isMerging: true }
}
const append = raw.append ?? false
const fromStream = raw.fromStream ?? 'Output'
let operator: ParsedRedirection['operator']
if (append) {
switch (fromStream) {
case 'Error':
operator = '2>>'
break
case 'All':
operator = '*>>'
break
default:
operator = '>>'
break
}
} else {
switch (fromStream) {
case 'Error':
operator = '2>'
break
case 'All':
operator = '*>'
break
default:
operator = '>'
break
}
}
return { operator, target: raw.locationText ?? '', isMerging: false }
}
/** Transform a raw statement into ParsedStatement */
// exported for testing
export function transformStatement(raw: RawStatement): ParsedStatement {
const statementType = mapStatementType(raw.type)
const commands: ParsedCommandElement[] = []
const redirections: ParsedRedirection[] = []
if (raw.elements) {
// PipelineAst: walk pipeline elements
for (const elem of ensureArray(raw.elements)) {
if (elem.type === 'CommandAst') {
commands.push(transformCommandAst(elem))
for (const redir of ensureArray(elem.redirections)) {
redirections.push(transformRedirection(redir))
}
} else {
commands.push(transformExpressionElement(elem))
// SECURITY: CommandExpressionAst also carries .Redirections (inherited
// from CommandBaseAst). `1 > /tmp/evil.txt` is a CommandExpressionAst
// with a FileRedirectionAst. Must extract here or getFileRedirections()
// misses it and compound commands like `Get-ChildItem; 1 > /tmp/x`
// auto-allow at step 5 (only Get-ChildItem is checked).
for (const redir of ensureArray(elem.redirections)) {
redirections.push(transformRedirection(redir))
}
}
}
// SECURITY: The PS1 PipelineAst branch does a deep FindAll for
// FileRedirectionAst to catch redirections hidden inside:
// - colon-bound ParenExpressionAst args: -Name:('payload' > file)
// - hashtable value statements: @{k='payload' > ~/.bashrc}
// Both are invisible at the element level β the redirection's parent
// is a child of CommandParameterAst / CommandExpressionAst, not a
// separate pipeline element. Merge into statement-level redirections.
//
// The FindAll ALSO re-discovers direct-element redirections already
// captured in the per-element loop above. Dedupe by (operator, target)
// so tests and consumers see the real count.
const seen = new Set(redirections.map(r => `${r.operator}\0${r.target}`))
for (const redir of ensureArray(raw.redirections)) {
const r = transformRedirection(redir)
const key = `${r.operator}\0${r.target}`
if (!seen.has(key)) {
seen.add(key)
redirections.push(r)
}
}
} else {
// Non-pipeline statement: add synthetic command entry with full text
commands.push({
name: raw.text,
nameType: 'unknown',
elementType: 'CommandExpressionAst',
args: [],
text: raw.text,
})
// SECURITY: The PS1 else-branch does a direct recursive FindAll on
// FileRedirectionAst to catch expression redirections inside control flow
// (if/for/foreach/while/switch/try/trap/&& and ||). The CommandAst FindAll
// above CANNOT see these: in if ($x) { 1 > /tmp/evil }, the literal 1 with
// its attached redirection is a CommandExpressionAst β a SIBLING of
// CommandAst in the type hierarchy, not a subclass. So nestedCommands never
// contains it, and without this hoist the redirection is invisible to
// getFileRedirections β step 4.6 misses it β compound commands like
// `Get-Process && 1 > /tmp/evil` auto-allow at step 5 (only Get-Process
// is checked, allowlisted).
//
// Finding FileRedirectionAst DIRECTLY (rather than finding CommandExpressionAst
// and extracting .Redirections) is both simpler and more robust: it catches
// redirections on any node type, including ones we don't know about yet.
//
// Double-counts redirections already on nested CommandAst commands (those are
// extracted at line ~395 into nestedCommands[i].redirections AND found again
// here). Harmless: step 4.6 only checks fileRedirections.length > 0, not
// the exact count. No code does arithmetic on redirection counts.
//
// PS1 SIZE NOTE: The full rationale lives here (TS), not in the PS1 script,
// because PS1 comments bloat the -EncodedCommand payload and push the
// Windows CreateProcess 32K limit. Keep PS1 comments terse; point them here.
for (const redir of ensureArray(raw.redirections)) {
redirections.push(transformRedirection(redir))
}
}
let nestedCommands: ParsedCommandElement[] | undefined
const rawNested = ensureArray(raw.nestedCommands)
if (rawNested.length > 0) {
nestedCommands = rawNested.map(transformCommandAst)
}
const result: ParsedStatement = {
statementType,
commands,
redirections,
text: raw.text,
nestedCommands,
}
if (raw.securityPatterns) {
result.securityPatterns = raw.securityPatterns
}
return result
}
/** Transform the complete raw PS output into ParsedPowerShellCommand */
function transformRawOutput(raw: RawParsedOutput): ParsedPowerShellCommand {
const result: ParsedPowerShellCommand = {
valid: raw.valid,
errors: ensureArray(raw.errors),
statements: ensureArray(raw.statements).map(transformStatement),
variables: ensureArray(raw.variables),
hasStopParsing: raw.hasStopParsing,
originalCommand: raw.originalCommand,
}
const tl = ensureArray(raw.typeLiterals)
if (tl.length > 0) {
result.typeLiterals = tl
}
if (raw.hasUsingStatements) {
result.hasUsingStatements = true
}
if (raw.hasScriptRequirements) {
result.hasScriptRequirements = true
}
return result
}
/**
* Parse a PowerShell command using the native AST parser.
* Spawns pwsh to parse the command and returns structured results.
* Results are memoized by command string.
*
* @param command - The PowerShell command to parse
* @returns Parsed command structure, or a result with valid=false on failure
*/
async function parsePowerShellCommandImpl(
command: string,
): Promise<ParsedPowerShellCommand> {
// SECURITY: MAX_COMMAND_LENGTH is a UTF-8 BYTE budget (see derivation at the
// constant definition). command.length counts UTF-16 code units; a CJK
// character is 1 code unit but 3 UTF-8 bytes, so .length under-reports by
// up to 3Γ and allows argv overflow on Windows β CreateProcess fails β
// valid:false β deny rules degrade to ask. Finding #36.
const commandBytes = Buffer.byteLength(command, 'utf8')
if (commandBytes > MAX_COMMAND_LENGTH) {
logForDebugging(
`PowerShell parser: command too long (${commandBytes} bytes, max ${MAX_COMMAND_LENGTH})`,
)
return makeInvalidResult(
command,
`Command too long for parsing (${commandBytes} bytes). Maximum supported length is ${MAX_COMMAND_LENGTH} bytes.`,
'CommandTooLong',
)
}
const pwshPath = await getCachedPowerShellPath()
if (!pwshPath) {
return makeInvalidResult(
command,
'PowerShell is not available',
'NoPowerShell',
)
}
const script = buildParseScript(command)
// Pass the script to PowerShell via -EncodedCommand.
// -EncodedCommand takes a Base64-encoded UTF-16LE string and executes it,
// which avoids: (1) stdin interactive-mode issues where -File - produces
// PS prompts and ANSI escapes in stdout, (2) command-line escaping issues,
// (3) temp files. The script itself is large but well within OS arg limits
// (Windows: 32K chars, Unix: typically 2MB+).
const encodedScript = toUtf16LeBase64(script)
const args = [
'-NoProfile',
'-NonInteractive',
'-NoLogo',
'-EncodedCommand',
encodedScript,
]
// Spawn pwsh with one retry on timeout. On loaded CI runners (Windows
// especially), pwsh spawn + .NET JIT + ParseInput occasionally exceeds 5s
// even after CAN_SPAWN_PARSE_SCRIPT() warms the JIT. execa kills the process
// but exitCode is undefined, which the old code reported as the misleading
// "pwsh exited with code 1:" with empty stderr. A single retry absorbs
// transient load spikes; a double timeout is reported as PwshTimeout.
const parseTimeoutMs = getParseTimeoutMs()
let stdout = ''
let stderr = ''
let code: number | null = null
let timedOut = false
for (let attempt = 0; attempt < 2; attempt++) {
try {
const result = await execa(pwshPath, args, {
timeout: parseTimeoutMs,
reject: false,
})
stdout = result.stdout
stderr = result.stderr
timedOut = result.timedOut
code = result.failed ? (result.exitCode ?? 1) : 0
} catch (e: unknown) {
logForDebugging(
`PowerShell parser: failed to spawn pwsh: ${e instanceof Error ? e.message : e}`,
)
return makeInvalidResult(
command,
`Failed to spawn PowerShell: ${e instanceof Error ? e.message : e}`,
'PwshSpawnError',
)
}
if (!timedOut) break
logForDebugging(
`PowerShell parser: pwsh timed out after ${parseTimeoutMs}ms (attempt ${attempt + 1})`,
)
}
if (timedOut) {
return makeInvalidResult(
command,
`pwsh timed out after ${parseTimeoutMs}ms (2 attempts)`,
'PwshTimeout',
)
}
if (code !== 0) {
logForDebugging(
`PowerShell parser: pwsh exited with code ${code}, stderr: ${stderr}`,
)
return makeInvalidResult(
command,
`pwsh exited with code ${code}: ${stderr}`,
'PwshError',
)
}
const trimmed = stdout.trim()
if (!trimmed) {
logForDebugging('PowerShell parser: empty stdout from pwsh')
return makeInvalidResult(
command,
'No output from PowerShell parser',
'EmptyOutput',
)
}
try {
const raw = jsonParse(trimmed) as RawParsedOutput
return transformRawOutput(raw)
} catch {
logForDebugging(
`PowerShell parser: invalid JSON output: ${trimmed.slice(0, 200)}`,
)
return makeInvalidResult(
command,
'Invalid JSON from PowerShell parser',
'InvalidJson',
)
}
}
// Error IDs from makeInvalidResult that represent transient process failures.
// These should be evicted from the cache so subsequent calls can retry.
// Deterministic failures (CommandTooLong, syntax errors from successful parses)
// should stay cached since retrying would produce the same result.
const TRANSIENT_ERROR_IDS = new Set([
'PwshSpawnError',
'PwshError',
'PwshTimeout',
'EmptyOutput',
'InvalidJson',
])
const parsePowerShellCommandCached = memoizeWithLRU(
(command: string) => {
const promise = parsePowerShellCommandImpl(command)
// Evict transient failures after resolution so they can be retried.
// The current caller still receives the cached promise for this call,
// ensuring concurrent callers share the same result.
void promise.then(result => {
if (
!result.valid &&
TRANSIENT_ERROR_IDS.has(result.errors[0]?.errorId ?? '')
) {
parsePowerShellCommandCached.cache.delete(command)
}
})
return promise
},
(command: string) => command,
256,
)
export { parsePowerShellCommandCached as parsePowerShellCommand }
// ---------------------------------------------------------------------------
// Analysis helpers β derived from the parsed AST structure.
// ---------------------------------------------------------------------------
/**
* Security-relevant flags derived from the parsed AST.
*/
type SecurityFlags = {
/** Contains $(...) subexpression */
hasSubExpressions: boolean
/** Contains { ... } script block expressions */
hasScriptBlocks: boolean
/** Contains @variable splatting */
hasSplatting: boolean
/** Contains expandable strings with embedded expressions ("...$()...") */
hasExpandableStrings: boolean
/** Contains .NET method invocations ([Type]::Method or $obj.Method()) */
hasMemberInvocations: boolean
/** Contains variable assignments ($x = ...) */
hasAssignments: boolean
/** Uses stop-parsing token (--%) */
hasStopParsing: boolean
}
/**
* Common PowerShell aliases mapped to their canonical cmdlet names.
* Uses Object.create(null) to prevent prototype-chain pollution β attacker-controlled
* command names like 'constructor' or '__proto__' must return undefined, not inherited
* Object.prototype properties.
*/
export const COMMON_ALIASES: Record<string, string> = Object.assign(
Object.create(null) as Record<string, string>,
{
// Directory listing
ls: 'Get-ChildItem',
dir: 'Get-ChildItem',
gci: 'Get-ChildItem',
// Content
cat: 'Get-Content',
type: 'Get-Content',
gc: 'Get-Content',
// Navigation
cd: 'Set-Location',
sl: 'Set-Location',
chdir: 'Set-Location',
pushd: 'Push-Location',
popd: 'Pop-Location',
pwd: 'Get-Location',
gl: 'Get-Location',
// Items
gi: 'Get-Item',
gp: 'Get-ItemProperty',
ni: 'New-Item',
mkdir: 'New-Item',
// `md` is PowerShell's built-in alias for `mkdir`. resolveToCanonical is
// single-hop (no mdβmkdirβNew-Item chaining), so it needs its own entry
// or `md /etc/x` falls through while `mkdir /etc/x` is caught.
md: 'New-Item',
ri: 'Remove-Item',
del: 'Remove-Item',
rd: 'Remove-Item',
rmdir: 'Remove-Item',
rm: 'Remove-Item',
erase: 'Remove-Item',
mi: 'Move-Item',
mv: 'Move-Item',
move: 'Move-Item',
ci: 'Copy-Item',
cp: 'Copy-Item',
copy: 'Copy-Item',
cpi: 'Copy-Item',
si: 'Set-Item',
rni: 'Rename-Item',
ren: 'Rename-Item',
// Process
ps: 'Get-Process',
gps: 'Get-Process',
kill: 'Stop-Process',
spps: 'Stop-Process',
start: 'Start-Process',
saps: 'Start-Process',
sajb: 'Start-Job',
ipmo: 'Import-Module',
// Output
echo: 'Write-Output',
write: 'Write-Output',
sleep: 'Start-Sleep',
// Help
help: 'Get-Help',
man: 'Get-Help',
gcm: 'Get-Command',
// Service
gsv: 'Get-Service',
// Variables
gv: 'Get-Variable',
sv: 'Set-Variable',
// History
h: 'Get-History',
history: 'Get-History',
// Invoke
iex: 'Invoke-Expression',
iwr: 'Invoke-WebRequest',
irm: 'Invoke-RestMethod',
icm: 'Invoke-Command',
ii: 'Invoke-Item',
// PSSession β remote code execution surface
nsn: 'New-PSSession',
etsn: 'Enter-PSSession',
exsn: 'Exit-PSSession',
gsn: 'Get-PSSession',
rsn: 'Remove-PSSession',
// Misc
cls: 'Clear-Host',
clear: 'Clear-Host',
select: 'Select-Object',
where: 'Where-Object',
foreach: 'ForEach-Object',
'%': 'ForEach-Object',
'?': 'Where-Object',
measure: 'Measure-Object',
ft: 'Format-Table',
fl: 'Format-List',
fw: 'Format-Wide',
oh: 'Out-Host',
ogv: 'Out-GridView',
// SECURITY: The following aliases are deliberately omitted because PS Core 6+
// removed them (they collide with native executables). Our allowlist logic
// resolves aliases BEFORE checking safety β if we map 'sort' β 'Sort-Object'
// but PowerShell 7/Windows actually runs sort.exe, we'd auto-allow the wrong
// program.
// 'sc' β sc.exe (Service Controller) β e.g. `sc config Svc binpath= ...`
// 'sort' β sort.exe β e.g. `sort /O C:\evil.txt` (arbitrary file write)
// 'curl' β curl.exe (shipped with Windows 10 1803+)
// 'wget' β wget.exe (if installed)
// Prefer to leave ambiguous aliases unmapped β users can write the full name.
// If adding aliases that resolve to SAFE_OUTPUT_CMDLETS or
// ACCEPT_EDITS_ALLOWED_CMDLETS, verify no native .exe collision on PS Core.
ac: 'Add-Content',
clc: 'Clear-Content',
// Write/export: tee-object/export-csv are in
// CMDLET_PATH_CONFIG so path-level Edit denies fire on the full cmdlet name,
// but PowerShell's built-in aliases fell through to ask-then-approve because
// resolveToCanonical couldn't resolve them). Neither tee-object nor
// export-csv is in SAFE_OUTPUT_CMDLETS or ACCEPT_EDITS_ALLOWED_CMDLETS, so
// the native-exe collision warning above doesn't apply β on Linux PS Core
// where `tee` runs /usr/bin/tee, that binary also writes to its positional
// file arg and we correctly extract+check it.
tee: 'Tee-Object',
epcsv: 'Export-Csv',
sp: 'Set-ItemProperty',
rp: 'Remove-ItemProperty',
cli: 'Clear-Item',
epal: 'Export-Alias',
// Text search
sls: 'Select-String',
},
)
const DIRECTORY_CHANGE_CMDLETS = new Set([
'set-location',
'push-location',
'pop-location',
])
const DIRECTORY_CHANGE_ALIASES = new Set(['cd', 'sl', 'chdir', 'pushd', 'popd'])
/**
* Get all command names across all statements, pipeline segments, and nested commands.
* Returns lowercased names for case-insensitive comparison.
*/
// exported for testing
export function getAllCommandNames(parsed: ParsedPowerShellCommand): string[] {
const names: string[] = []
for (const statement of parsed.statements) {
for (const cmd of statement.commands) {
names.push(cmd.name.toLowerCase())
}
if (statement.nestedCommands) {
for (const cmd of statement.nestedCommands) {
names.push(cmd.name.toLowerCase())
}
}
}
return names
}
/**
* Get all pipeline segments as flat list of commands.
* Useful for checking each command independently.
*/
export function getAllCommands(
parsed: ParsedPowerShellCommand,
): ParsedCommandElement[] {
const commands: ParsedCommandElement[] = []
for (const statement of parsed.statements) {
for (const cmd of statement.commands) {
commands.push(cmd)
}
if (statement.nestedCommands) {
for (const cmd of statement.nestedCommands) {
commands.push(cmd)
}
}
}
return commands
}
/**
* Get all redirections across all statements.
*/
// exported for testing
export function getAllRedirections(
parsed: ParsedPowerShellCommand,
): ParsedRedirection[] {
const redirections: ParsedRedirection[] = []
for (const statement of parsed.statements) {
for (const redir of statement.redirections) {
redirections.push(redir)
}
// Include redirections from nested commands (e.g., from && / || chains)
if (statement.nestedCommands) {
for (const cmd of statement.nestedCommands) {
if (cmd.redirections) {
for (const redir of cmd.redirections) {
redirections.push(redir)
}
}
}
}
}
return redirections
}
/**
* Get all variables, optionally filtered by scope (e.g., 'env').
* Variable paths in PowerShell can have scopes like "env:PATH", "global:x".
*/
export function getVariablesByScope(
parsed: ParsedPowerShellCommand,
scope: string,
): ParsedVariable[] {
const prefix = scope.toLowerCase() + ':'
return parsed.variables.filter(v => v.path.toLowerCase().startsWith(prefix))
}
/**
* Check if any command in the parsed result matches a given name (case-insensitive).
* Handles common aliases too.
*/
export function hasCommandNamed(
parsed: ParsedPowerShellCommand,
name: string,
): boolean {
const lowerName = name.toLowerCase()
const canonicalFromAlias = COMMON_ALIASES[lowerName]?.toLowerCase()
for (const cmdName of getAllCommandNames(parsed)) {
if (cmdName === lowerName) {
return true
}
// Check if the command is an alias that resolves to the requested name
const canonical = COMMON_ALIASES[cmdName]?.toLowerCase()
if (canonical === lowerName) {
return true
}
// Check if the requested name is an alias and the command is its canonical form
if (canonicalFromAlias && cmdName === canonicalFromAlias) {
return true
}
// Check if both resolve to the same canonical cmdlet (alias-to-alias match)
if (canonical && canonicalFromAlias && canonical === canonicalFromAlias) {
return true
}
}
return false
}
/**
* Check if the command contains any directory-changing commands.
* (Set-Location, cd, sl, chdir, Push-Location, pushd, Pop-Location, popd)
*/
// exported for testing
export function hasDirectoryChange(parsed: ParsedPowerShellCommand): boolean {
for (const cmdName of getAllCommandNames(parsed)) {
if (
DIRECTORY_CHANGE_CMDLETS.has(cmdName) ||
DIRECTORY_CHANGE_ALIASES.has(cmdName)
) {
return true
}
}
return false
}
/**
* Check if the command is a single simple command (no pipes, no semicolons, no operators).
*/
// exported for testing
export function isSingleCommand(parsed: ParsedPowerShellCommand): boolean {
const stmt = parsed.statements[0]
return (
parsed.statements.length === 1 &&
stmt !== undefined &&
stmt.commands.length === 1 &&
(!stmt.nestedCommands || stmt.nestedCommands.length === 0)
)
}
/**
* Check if a specific command has a given argument/flag (case-insensitive).
* Useful for checking "-EncodedCommand", "-Recurse", etc.
*/
export function commandHasArg(
command: ParsedCommandElement,
arg: string,
): boolean {
const lowerArg = arg.toLowerCase()
return command.args.some(a => a.toLowerCase() === lowerArg)
}
/**
* Tokenizer-level dash characters that PowerShell's parser accepts as
* parameter prefixes. SpecialCharacters.IsDash (CharTraits.cs) accepts exactly
* these four: ASCII hyphen-minus, en-dash, em-dash, horizontal bar. These are
* tokenizer-level β they apply to ALL cmdlet parameters, not just argv to
* powershell.exe (contrast with `/` which is an argv-parser quirk of
* powershell.exe 5.1 only; see PS_ALT_PARAM_PREFIXES in powershellSecurity.ts).
*
* Extent.Text preserves the raw character; transformCommandAst uses ce.text
* for CommandParameterAst elements, so these reach callers unchanged.
*/
export const PS_TOKENIZER_DASH_CHARS = new Set([
'-', // U+002D hyphen-minus (ASCII)
'\u2013', // en-dash
'\u2014', // em-dash
'\u2015', // horizontal bar
])
/**
* Determines if an argument is a PowerShell parameter (flag), using the AST
* element type as ground truth when available.
*
* The parser maps CommandParameterAst β 'Parameter' regardless of which dash
* character the user typed β PowerShell's tokenizer handles that. So when
* elementType is available, it's authoritative:
* - 'Parameter' β true (covers `-Path`, `βPath`, `βPath`, `βPath`)
* - anything else β false (a quoted "-Path" is StringConstant, not a param)
*
* When elementType is unavailable (backward compat / no AST detail), fall back
* to a char check against PS_TOKENIZER_DASH_CHARS.
*/
export function isPowerShellParameter(
arg: string,
elementType?: CommandElementType,
): boolean {
if (elementType !== undefined) {
return elementType === 'Parameter'
}
return arg.length > 0 && PS_TOKENIZER_DASH_CHARS.has(arg[0]!)
}
/**
* Check if any argument on a command is an unambiguous abbreviation of a PowerShell parameter.
* PowerShell allows parameter abbreviation as long as the prefix is unambiguous.
* The minPrefix is the shortest unambiguous prefix for the parameter.
* For example, minPrefix '-en' for fullParam '-encodedcommand' matches '-en', '-enc', '-enco', etc.
*/
export function commandHasArgAbbreviation(
command: ParsedCommandElement,
fullParam: string,
minPrefix: string,
): boolean {
const lowerFull = fullParam.toLowerCase()
const lowerMin = minPrefix.toLowerCase()
return command.args.some(a => {
// Strip colon-bound value (e.g., -en:base64value -> -en)
const colonIndex = a.indexOf(':', 1)
const paramPart = colonIndex > 0 ? a.slice(0, colonIndex) : a
// Strip backtick escapes β PowerShell resolves `-Member`Name` to
// `-MemberName` but Extent.Text preserves the backtick, causing
// prefix-comparison misses on the raw text.
const lower = paramPart.replace(/`/g, '').toLowerCase()
return (
lower.startsWith(lowerMin) &&
lowerFull.startsWith(lower) &&
lower.length <= lowerFull.length
)
})
}
/**
* Split a parsed command into its pipeline segments for per-segment permission checking.
* Returns each pipeline's commands separately.
*/
export function getPipelineSegments(
parsed: ParsedPowerShellCommand,
): ParsedStatement[] {
return parsed.statements
}
/**
* True if a redirection target is PowerShell's `$null` automatic variable.
* `> $null` discards output (like /dev/null) β not a filesystem write.
* `$null` cannot be reassigned, so this is safe to treat as a no-op sink.
* `${null}` is the same automatic variable via curly-brace syntax. Spaces
* inside the braces (`${ null }`) name a different variable, so no regex.
*/
export function isNullRedirectionTarget(target: string): boolean {
const t = target.trim().toLowerCase()
return t === '$null' || t === '${null}'
}
/**
* Get output redirections (file redirections, not merging redirections).
* Returns only redirections that write to files.
*/
// exported for testing
export function getFileRedirections(
parsed: ParsedPowerShellCommand,
): ParsedRedirection[] {
return getAllRedirections(parsed).filter(
r => !r.isMerging && !isNullRedirectionTarget(r.target),
)
}
/**
* Derive security-relevant flags from the parsed command structure.
* This replaces the previous approach of computing flags in PowerShell via
* separate Find-AstNodes calls. Instead, the PS1 script tags each element
* with its AST node type, and this function walks those types.
*/
// exported for testing
export function deriveSecurityFlags(
parsed: ParsedPowerShellCommand,
): SecurityFlags {
const flags: SecurityFlags = {
hasSubExpressions: false,
hasScriptBlocks: false,
hasSplatting: false,
hasExpandableStrings: false,
hasMemberInvocations: false,
hasAssignments: false,
hasStopParsing: parsed.hasStopParsing,
}
function checkElements(cmd: ParsedCommandElement): void {
if (!cmd.elementTypes) {
return
}
for (const et of cmd.elementTypes) {
switch (et) {
case 'ScriptBlock':
flags.hasScriptBlocks = true
break
case 'SubExpression':
flags.hasSubExpressions = true
break
case 'ExpandableString':
flags.hasExpandableStrings = true
break
case 'MemberInvocation':
flags.hasMemberInvocations = true
break
}
}
}
for (const stmt of parsed.statements) {
if (stmt.statementType === 'AssignmentStatementAst') {
flags.hasAssignments = true
}
for (const cmd of stmt.commands) {
checkElements(cmd)
}
if (stmt.nestedCommands) {
for (const cmd of stmt.nestedCommands) {
checkElements(cmd)
}
}
// securityPatterns provides a belt-and-suspenders check that catches
// patterns elementTypes may miss (e.g. member invocations inside
// assignments, subexpressions in non-pipeline statements).
if (stmt.securityPatterns) {
if (stmt.securityPatterns.hasMemberInvocations) {
flags.hasMemberInvocations = true
}
if (stmt.securityPatterns.hasSubExpressions) {
flags.hasSubExpressions = true
}
if (stmt.securityPatterns.hasExpandableStrings) {
flags.hasExpandableStrings = true
}
if (stmt.securityPatterns.hasScriptBlocks) {
flags.hasScriptBlocks = true
}
}
}
for (const v of parsed.variables) {
if (v.isSplatted) {
flags.hasSplatting = true
break
}
}
return flags
}
// Raw types exported for testing (function exports are inline above)