π File detail
tools/BashTool/sedValidation.ts
π― Use case
This module implements the βBashToolβ tool (Bash) β something the model can call at runtime alongside other agent tools. On the API surface it exposes isLinePrintingCommand, isPrintCommand, sedCommandIsAllowedByAllowlist, hasFileArgs, and extractSedExpressions (and more) β mainly functions, hooks, or classes. It composes internal code from Tool and utils (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import type { ToolPermissionContext } from '../../Tool.js' import { splitCommand_DEPRECATED } from '../../utils/bash/commands.js' import { tryParseShellCommand } from '../../utils/bash/shellQuote.js' import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
π€ Exports (heuristic)
isLinePrintingCommandisPrintCommandsedCommandIsAllowedByAllowlisthasFileArgsextractSedExpressionscheckSedConstraints
π₯οΈ Source preview
import type { ToolPermissionContext } from '../../Tool.js'
import { splitCommand_DEPRECATED } from '../../utils/bash/commands.js'
import { tryParseShellCommand } from '../../utils/bash/shellQuote.js'
import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
/**
* Helper: Validate flags against an allowlist
* Handles both single flags and combined flags (e.g., -nE)
* @param flags Array of flags to validate
* @param allowedFlags Array of allowed single-character and long flags
* @returns true if all flags are valid, false otherwise
*/
function validateFlagsAgainstAllowlist(
flags: string[],
allowedFlags: string[],
): boolean {
for (const flag of flags) {
// Handle combined flags like -nE or -Er
if (flag.startsWith('-') && !flag.startsWith('--') && flag.length > 2) {
// Check each character in combined flag
for (let i = 1; i < flag.length; i++) {
const singleFlag = '-' + flag[i]
if (!allowedFlags.includes(singleFlag)) {
return false
}
}
} else {
// Single flag or long flag
if (!allowedFlags.includes(flag)) {
return false
}
}
}
return true
}
/**
* Pattern 1: Check if this is a line printing command with -n flag
* Allows: sed -n 'N' | sed -n 'N,M' with optional -E, -r, -z flags
* Allows semicolon-separated print commands like: sed -n '1p;2p;3p'
* File arguments are ALLOWED for this pattern
* @internal Exported for testing
*/
export function isLinePrintingCommand(
command: string,
expressions: string[],
): boolean {
const sedMatch = command.match(/^\s*sed\s+/)
if (!sedMatch) return false
const withoutSed = command.slice(sedMatch[0].length)
const parseResult = tryParseShellCommand(withoutSed)
if (!parseResult.success) return false
const parsed = parseResult.tokens
// Extract all flags
const flags: string[] = []
for (const arg of parsed) {
if (typeof arg === 'string' && arg.startsWith('-') && arg !== '--') {
flags.push(arg)
}
}
// Validate flags - only allow -n, -E, -r, -z and their long forms
const allowedFlags = [
'-n',
'--quiet',
'--silent',
'-E',
'--regexp-extended',
'-r',
'-z',
'--zero-terminated',
'--posix',
]
if (!validateFlagsAgainstAllowlist(flags, allowedFlags)) {
return false
}
// Check if -n flag is present (required for Pattern 1)
let hasNFlag = false
for (const flag of flags) {
if (flag === '-n' || flag === '--quiet' || flag === '--silent') {
hasNFlag = true
break
}
// Check in combined flags
if (flag.startsWith('-') && !flag.startsWith('--') && flag.includes('n')) {
hasNFlag = true
break
}
}
// Must have -n flag for Pattern 1
if (!hasNFlag) {
return false
}
// Must have at least one expression
if (expressions.length === 0) {
return false
}
// All expressions must be print commands (strict allowlist)
// Allow semicolon-separated commands
for (const expr of expressions) {
const commands = expr.split(';')
for (const cmd of commands) {
if (!isPrintCommand(cmd.trim())) {
return false
}
}
}
return true
}
/**
* Helper: Check if a single command is a valid print command
* STRICT ALLOWLIST - only these exact forms are allowed:
* - p (print all)
* - Np (print line N, where N is digits)
* - N,Mp (print lines N through M)
* Anything else (including w, W, e, E commands) is rejected.
* @internal Exported for testing
*/
export function isPrintCommand(cmd: string): boolean {
if (!cmd) return false
// Single strict regex that only matches allowed print commands
// ^(?:\d+|\d+,\d+)?p$ matches: p, 1p, 123p, 1,5p, 10,200p
return /^(?:\d+|\d+,\d+)?p$/.test(cmd)
}
/**
* Pattern 2: Check if this is a substitution command
* Allows: sed 's/pattern/replacement/flags' where flags are only: g, p, i, I, m, M, 1-9
* When allowFileWrites is true, allows -i flag and file arguments for in-place editing
* When allowFileWrites is false (default), requires stdout-only (no file arguments, no -i flag)
* @internal Exported for testing
*/
function isSubstitutionCommand(
command: string,
expressions: string[],
hasFileArguments: boolean,
options?: { allowFileWrites?: boolean },
): boolean {
const allowFileWrites = options?.allowFileWrites ?? false
// When not allowing file writes, must NOT have file arguments
if (!allowFileWrites && hasFileArguments) {
return false
}
const sedMatch = command.match(/^\s*sed\s+/)
if (!sedMatch) return false
const withoutSed = command.slice(sedMatch[0].length)
const parseResult = tryParseShellCommand(withoutSed)
if (!parseResult.success) return false
const parsed = parseResult.tokens
// Extract all flags
const flags: string[] = []
for (const arg of parsed) {
if (typeof arg === 'string' && arg.startsWith('-') && arg !== '--') {
flags.push(arg)
}
}
// Validate flags based on mode
// Base allowed flags for both modes
const allowedFlags = ['-E', '--regexp-extended', '-r', '--posix']
// When allowing file writes, also permit -i and --in-place
if (allowFileWrites) {
allowedFlags.push('-i', '--in-place')
}
if (!validateFlagsAgainstAllowlist(flags, allowedFlags)) {
return false
}
// Must have exactly one expression
if (expressions.length !== 1) {
return false
}
const expr = expressions[0]!.trim()
// STRICT ALLOWLIST: Must be exactly a substitution command starting with 's'
// This rejects standalone commands like 'e', 'w file', etc.
if (!expr.startsWith('s')) {
return false
}
// Parse substitution: s/pattern/replacement/flags
// Only allow / as delimiter (strict)
const substitutionMatch = expr.match(/^s\/(.*?)$/)
if (!substitutionMatch) {
return false
}
const rest = substitutionMatch[1]!
// Find the positions of / delimiters
let delimiterCount = 0
let lastDelimiterPos = -1
let i = 0
while (i < rest.length) {
if (rest[i] === '\\') {
// Skip escaped character
i += 2
continue
}
if (rest[i] === '/') {
delimiterCount++
lastDelimiterPos = i
}
i++
}
// Must have found exactly 2 delimiters (pattern and replacement)
if (delimiterCount !== 2) {
return false
}
// Extract flags (everything after the last delimiter)
const exprFlags = rest.slice(lastDelimiterPos + 1)
// Validate flags: only allow g, p, i, I, m, M, and optionally ONE digit 1-9
const allowedFlagChars = /^[gpimIM]*[1-9]?[gpimIM]*$/
if (!allowedFlagChars.test(exprFlags)) {
return false
}
return true
}
/**
* Checks if a sed command is allowed by the allowlist.
* The allowlist patterns themselves are strict enough to reject dangerous operations.
* @param command The sed command to check
* @param options.allowFileWrites When true, allows -i flag and file arguments for substitution commands
* @returns true if the command is allowed (matches allowlist and passes denylist check), false otherwise
*/
export function sedCommandIsAllowedByAllowlist(
command: string,
options?: { allowFileWrites?: boolean },
): boolean {
const allowFileWrites = options?.allowFileWrites ?? false
// Extract sed expressions (content inside quotes where actual sed commands live)
let expressions: string[]
try {
expressions = extractSedExpressions(command)
} catch (_error) {
// If parsing failed, treat as not allowed
return false
}
// Check if sed command has file arguments
const hasFileArguments = hasFileArgs(command)
// Check if command matches allowlist patterns
let isPattern1 = false
let isPattern2 = false
if (allowFileWrites) {
// When allowing file writes, only check substitution commands (Pattern 2 variant)
// Pattern 1 (line printing) doesn't need file writes
isPattern2 = isSubstitutionCommand(command, expressions, hasFileArguments, {
allowFileWrites: true,
})
} else {
// Standard read-only mode: check both patterns
isPattern1 = isLinePrintingCommand(command, expressions)
isPattern2 = isSubstitutionCommand(command, expressions, hasFileArguments)
}
if (!isPattern1 && !isPattern2) {
return false
}
// Pattern 2 does not allow semicolons (command separators)
// Pattern 1 allows semicolons for separating print commands
for (const expr of expressions) {
if (isPattern2 && expr.includes(';')) {
return false
}
}
// Defense-in-depth: Even if allowlist matches, check denylist
for (const expr of expressions) {
if (containsDangerousOperations(expr)) {
return false
}
}
return true
}
/**
* Check if a sed command has file arguments (not just stdin)
* @internal Exported for testing
*/
export function hasFileArgs(command: string): boolean {
const sedMatch = command.match(/^\s*sed\s+/)
if (!sedMatch) return false
const withoutSed = command.slice(sedMatch[0].length)
const parseResult = tryParseShellCommand(withoutSed)
if (!parseResult.success) return true
const parsed = parseResult.tokens
try {
let argCount = 0
let hasEFlag = false
for (let i = 0; i < parsed.length; i++) {
const arg = parsed[i]
// Handle both string arguments and glob patterns (like *.log)
if (typeof arg !== 'string' && typeof arg !== 'object') continue
// If it's a glob pattern, it counts as a file argument
if (
typeof arg === 'object' &&
arg !== null &&
'op' in arg &&
arg.op === 'glob'
) {
return true
}
// Skip non-string arguments that aren't glob patterns
if (typeof arg !== 'string') continue
// Handle -e flag followed by expression
if ((arg === '-e' || arg === '--expression') && i + 1 < parsed.length) {
hasEFlag = true
i++ // Skip the next argument since it's the expression
continue
}
// Handle --expression=value format
if (arg.startsWith('--expression=')) {
hasEFlag = true
continue
}
// Handle -e=value format (non-standard but defense in depth)
if (arg.startsWith('-e=')) {
hasEFlag = true
continue
}
// Skip other flags
if (arg.startsWith('-')) continue
argCount++
// If we used -e flags, ALL non-flag arguments are file arguments
if (hasEFlag) {
return true
}
// If we didn't use -e flags, the first non-flag argument is the sed expression,
// so we need more than 1 non-flag argument to have file arguments
if (argCount > 1) {
return true
}
}
return false
} catch (_error) {
return true // Assume dangerous if parsing fails
}
}
/**
* Extract sed expressions from command, ignoring flags and filenames
* @param command Full sed command
* @returns Array of sed expressions to check for dangerous operations
* @throws Error if parsing fails
* @internal Exported for testing
*/
export function extractSedExpressions(command: string): string[] {
const expressions: string[] = []
// Calculate withoutSed by trimming off the first N characters (removing 'sed ')
const sedMatch = command.match(/^\s*sed\s+/)
if (!sedMatch) return expressions
const withoutSed = command.slice(sedMatch[0].length)
// Reject dangerous flag combinations like -ew, -eW, -ee, -we (combined -e/-w with dangerous commands)
if (/-e[wWe]/.test(withoutSed) || /-w[eE]/.test(withoutSed)) {
throw new Error('Dangerous flag combination detected')
}
// Use shell-quote to parse the arguments properly
const parseResult = tryParseShellCommand(withoutSed)
if (!parseResult.success) {
// Malformed shell syntax - throw error to be caught by caller
throw new Error(`Malformed shell syntax: ${parseResult.error}`)
}
const parsed = parseResult.tokens
try {
let foundEFlag = false
let foundExpression = false
for (let i = 0; i < parsed.length; i++) {
const arg = parsed[i]
// Skip non-string arguments (like control operators)
if (typeof arg !== 'string') continue
// Handle -e flag followed by expression
if ((arg === '-e' || arg === '--expression') && i + 1 < parsed.length) {
foundEFlag = true
const nextArg = parsed[i + 1]
if (typeof nextArg === 'string') {
expressions.push(nextArg)
i++ // Skip the next argument since we consumed it
}
continue
}
// Handle --expression=value format
if (arg.startsWith('--expression=')) {
foundEFlag = true
expressions.push(arg.slice('--expression='.length))
continue
}
// Handle -e=value format (non-standard but defense in depth)
if (arg.startsWith('-e=')) {
foundEFlag = true
expressions.push(arg.slice('-e='.length))
continue
}
// Skip other flags
if (arg.startsWith('-')) continue
// If we haven't found any -e flags, the first non-flag argument is the sed expression
if (!foundEFlag && !foundExpression) {
expressions.push(arg)
foundExpression = true
continue
}
// If we've already found -e flags or a standalone expression,
// remaining non-flag arguments are filenames
break
}
} catch (error) {
// If shell-quote parsing fails, treat the sed command as unsafe
throw new Error(
`Failed to parse sed command: ${error instanceof Error ? error.message : 'Unknown error'}`,
)
}
return expressions
}
/**
* Check if a sed expression contains dangerous operations (denylist)
* @param expression Single sed expression (without quotes)
* @returns true if dangerous, false if safe
*/
function containsDangerousOperations(expression: string): boolean {
const cmd = expression.trim()
if (!cmd) return false
// CONSERVATIVE REJECTIONS: Broadly reject patterns that could be dangerous
// When in doubt, treat as unsafe
// Reject non-ASCII characters (Unicode homoglyphs, combining chars, etc.)
// Examples: ο½ (fullwidth), α΄‘ (small capital), wΜ (combining tilde)
// Check for characters outside ASCII range (0x01-0x7F, excluding null byte)
// eslint-disable-next-line no-control-regex
if (/[^\x01-\x7F]/.test(cmd)) {
return true
}
// Reject curly braces (blocks) - too complex to parse
if (cmd.includes('{') || cmd.includes('}')) {
return true
}
// Reject newlines - multi-line commands are too complex
if (cmd.includes('\n')) {
return true
}
// Reject comments (# not immediately after s command)
// Comments look like: #comment or start with #
// Delimiter looks like: s#pattern#replacement#
const hashIndex = cmd.indexOf('#')
if (hashIndex !== -1 && !(hashIndex > 0 && cmd[hashIndex - 1] === 's')) {
return true
}
// Reject negation operator
// Negation can appear: at start (!/pattern/), after address (/pattern/!, 1,10!, $!)
// Delimiter looks like: s!pattern!replacement! (has 's' before it)
if (/^!/.test(cmd) || /[/\d$]!/.test(cmd)) {
return true
}
// Reject tilde in GNU step address format (digit~digit, ,~digit, or $~digit)
// Allow whitespace around tilde
if (/\d\s*~\s*\d|,\s*~\s*\d|\$\s*~\s*\d/.test(cmd)) {
return true
}
// Reject comma at start (bare comma is shorthand for 1,$ address range)
if (/^,/.test(cmd)) {
return true
}
// Reject comma followed by +/- (GNU offset addresses)
if (/,\s*[+-]/.test(cmd)) {
return true
}
// Reject backslash tricks:
// 1. s\ (substitution with backslash delimiter)
// 2. \X where X could be an alternate delimiter (|, #, %, etc.) - not regex escapes
if (/s\\/.test(cmd) || /\\[|#%@]/.test(cmd)) {
return true
}
// Reject escaped slashes followed by w/W (patterns like /\/path\/to\/file/w)
if (/\\\/.*[wW]/.test(cmd)) {
return true
}
// Reject malformed/suspicious patterns we don't understand
// If there's a slash followed by non-slash chars, then whitespace, then dangerous commands
// Examples: /pattern w file, /pattern e cmd, /foo X;w file
if (/\/[^/]*\s+[wWeE]/.test(cmd)) {
return true
}
// Reject malformed substitution commands that don't follow normal pattern
// Examples: s/foobareoutput.txt (missing delimiters), s/foo/bar//w (extra delimiter)
if (/^s\//.test(cmd) && !/^s\/[^/]*\/[^/]*\/[^/]*$/.test(cmd)) {
return true
}
// PARANOID: Reject any command starting with 's' that ends with dangerous chars (w, W, e, E)
// and doesn't match our known safe substitution pattern. This catches malformed s commands
// with non-slash delimiters that might be trying to use dangerous flags.
if (/^s./.test(cmd) && /[wWeE]$/.test(cmd)) {
// Check if it's a properly formed substitution (any delimiter, not just /)
const properSubst = /^s([^\\\n]).*?\1.*?\1[^wWeE]*$/.test(cmd)
if (!properSubst) {
return true
}
}
// Check for dangerous write commands
// Patterns: [address]w filename, [address]W filename, /pattern/w filename, /pattern/W filename
// Simplified to avoid exponential backtracking (CodeQL issue)
// Check for w/W in contexts where it would be a command (with optional whitespace)
if (
/^[wW]\s*\S+/.test(cmd) || // At start: w file
/^\d+\s*[wW]\s*\S+/.test(cmd) || // After line number: 1w file or 1 w file
/^\$\s*[wW]\s*\S+/.test(cmd) || // After $: $w file or $ w file
/^\/[^/]*\/[IMim]*\s*[wW]\s*\S+/.test(cmd) || // After pattern: /pattern/w file
/^\d+,\d+\s*[wW]\s*\S+/.test(cmd) || // After range: 1,10w file
/^\d+,\$\s*[wW]\s*\S+/.test(cmd) || // After range: 1,$w file
/^\/[^/]*\/[IMim]*,\/[^/]*\/[IMim]*\s*[wW]\s*\S+/.test(cmd) // After pattern range: /s/,/e/w file
) {
return true
}
// Check for dangerous execute commands
// Patterns: [address]e [command], /pattern/e [command], or commands starting with e
// Simplified to avoid exponential backtracking (CodeQL issue)
// Check for e in contexts where it would be a command (with optional whitespace)
if (
/^e/.test(cmd) || // At start: e cmd
/^\d+\s*e/.test(cmd) || // After line number: 1e or 1 e
/^\$\s*e/.test(cmd) || // After $: $e or $ e
/^\/[^/]*\/[IMim]*\s*e/.test(cmd) || // After pattern: /pattern/e
/^\d+,\d+\s*e/.test(cmd) || // After range: 1,10e
/^\d+,\$\s*e/.test(cmd) || // After range: 1,$e
/^\/[^/]*\/[IMim]*,\/[^/]*\/[IMim]*\s*e/.test(cmd) // After pattern range: /s/,/e/e
) {
return true
}
// Check for substitution commands with dangerous flags
// Pattern: s<delim>pattern<delim>replacement<delim>flags where flags contain w or e
// Per POSIX, sed allows any character except backslash and newline as delimiter
const substitutionMatch = cmd.match(/s([^\\\n]).*?\1.*?\1(.*?)$/)
if (substitutionMatch) {
const flags = substitutionMatch[2] || ''
// Check for write flag: s/old/new/w filename or s/old/new/gw filename
if (flags.includes('w') || flags.includes('W')) {
return true
}
// Check for execute flag: s/old/new/e or s/old/new/ge
if (flags.includes('e') || flags.includes('E')) {
return true
}
}
// Check for y (transliterate) command followed by dangerous operations
// Pattern: y<delim>source<delim>dest<delim> followed by anything
// The y command uses same delimiter syntax as s command
// PARANOID: Reject any y command that has w/W/e/E anywhere after the delimiters
const yCommandMatch = cmd.match(/y([^\\\n])/)
if (yCommandMatch) {
// If we see a y command, check if there's any w, W, e, or E in the entire command
// This is paranoid but safe - y commands are rare and w/e after y is suspicious
if (/[wWeE]/.test(cmd)) {
return true
}
}
return false
}
/**
* Cross-cutting validation step for sed commands.
*
* This is a constraint check that blocks dangerous sed operations regardless of mode.
* It returns 'passthrough' for non-sed commands or safe sed commands,
* and 'ask' for dangerous sed operations (w/W/e/E commands).
*
* @param input - Object containing the command string
* @param toolPermissionContext - Context containing mode and permissions
* @returns
* - 'ask' if any sed command contains dangerous operations
* - 'passthrough' if no sed commands or all are safe
*/
export function checkSedConstraints(
input: { command: string },
toolPermissionContext: ToolPermissionContext,
): PermissionResult {
const commands = splitCommand_DEPRECATED(input.command)
for (const cmd of commands) {
// Skip non-sed commands
const trimmed = cmd.trim()
const baseCmd = trimmed.split(/\s+/)[0]
if (baseCmd !== 'sed') {
continue
}
// In acceptEdits mode, allow file writes (-i flag) but still block dangerous operations
const allowFileWrites = toolPermissionContext.mode === 'acceptEdits'
const isAllowed = sedCommandIsAllowedByAllowlist(trimmed, {
allowFileWrites,
})
if (!isAllowed) {
return {
behavior: 'ask',
message:
'sed command requires approval (contains potentially dangerous operations)',
decisionReason: {
type: 'other',
reason:
'sed command contains operations that require explicit approval (e.g., write commands, execute commands)',
},
}
}
}
// No dangerous sed commands found (or no sed commands at all)
return {
behavior: 'passthrough',
message: 'No dangerous sed operations detected',
}
}