π File detail
utils/gitDiff.ts
π― Use case
This file lives under βutils/β, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, β¦). On the API surface it exposes GitDiffStats, PerFileStats, GitDiffResult, fetchGitDiff, and fetchGitDiffHunks (and more) β mainly functions, hooks, or classes. Dependencies touch text diffing, Node filesystem, and Node path helpers. It composes internal code from cwd, detectRepository, execFileNoThrow, file, and git (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import type { StructuredPatchHunk } from 'diff' import { access, readFile } from 'fs/promises' import { dirname, join, relative, sep } from 'path' import { getCwd } from './cwd.js' import { getCachedRepository } from './detectRepository.js'
π€ Exports (heuristic)
GitDiffStatsPerFileStatsGitDiffResultfetchGitDifffetchGitDiffHunksNumstatResultparseGitNumstatparseGitDiffparseShortstatToolUseDifffetchSingleFileGitDiff
π External import roots
Package roots from from "β¦" (relative paths omitted).
difffspath
π₯οΈ Source preview
import type { StructuredPatchHunk } from 'diff'
import { access, readFile } from 'fs/promises'
import { dirname, join, relative, sep } from 'path'
import { getCwd } from './cwd.js'
import { getCachedRepository } from './detectRepository.js'
import { execFileNoThrow, execFileNoThrowWithCwd } from './execFileNoThrow.js'
import { isFileWithinReadSizeLimit } from './file.js'
import {
findGitRoot,
getDefaultBranch,
getGitDir,
getIsGit,
gitExe,
} from './git.js'
export type GitDiffStats = {
filesCount: number
linesAdded: number
linesRemoved: number
}
export type PerFileStats = {
added: number
removed: number
isBinary: boolean
isUntracked?: boolean
}
export type GitDiffResult = {
stats: GitDiffStats
perFileStats: Map<string, PerFileStats>
hunks: Map<string, StructuredPatchHunk[]>
}
const GIT_TIMEOUT_MS = 5000
const MAX_FILES = 50
const MAX_DIFF_SIZE_BYTES = 1_000_000 // 1 MB - skip files larger than this
const MAX_LINES_PER_FILE = 400 // GitHub's auto-load limit
const MAX_FILES_FOR_DETAILS = 500 // Skip per-file details if more files than this
/**
* Fetch git diff stats and hunks comparing working tree to HEAD.
* Returns null if not in a git repo or if git commands fail.
*
* Returns null during merge/rebase/cherry-pick/revert operations since the
* working tree contains incoming changes that weren't intentionally
* made by the user.
*/
export async function fetchGitDiff(): Promise<GitDiffResult | null> {
const isGit = await getIsGit()
if (!isGit) return null
// Skip diff calculation during transient git states since the
// working tree contains incoming changes, not user-intentional edits
if (await isInTransientGitState()) {
return null
}
// Quick probe: use --shortstat to get totals without loading all content.
// This is O(1) memory and lets us detect massive diffs (e.g., jj workspaces)
// before committing to expensive operations.
const { stdout: shortstatOut, code: shortstatCode } = await execFileNoThrow(
gitExe(),
['--no-optional-locks', 'diff', 'HEAD', '--shortstat'],
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
)
if (shortstatCode === 0) {
const quickStats = parseShortstat(shortstatOut)
if (quickStats && quickStats.filesCount > MAX_FILES_FOR_DETAILS) {
// Too many files - return accurate totals but skip per-file details
// to avoid loading hundreds of MB into memory
return {
stats: quickStats,
perFileStats: new Map(),
hunks: new Map(),
}
}
}
// Get stats via --numstat (all uncommitted changes vs HEAD)
const { stdout: numstatOut, code: numstatCode } = await execFileNoThrow(
gitExe(),
['--no-optional-locks', 'diff', 'HEAD', '--numstat'],
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
)
if (numstatCode !== 0) return null
const { stats, perFileStats } = parseGitNumstat(numstatOut)
// Include untracked files (new files not yet staged)
// Just filenames - no content reading for performance
const remainingSlots = MAX_FILES - perFileStats.size
if (remainingSlots > 0) {
const untrackedStats = await fetchUntrackedFiles(remainingSlots)
if (untrackedStats) {
stats.filesCount += untrackedStats.size
for (const [path, fileStats] of untrackedStats) {
perFileStats.set(path, fileStats)
}
}
}
// Return stats only - hunks are fetched on-demand via fetchGitDiffHunks()
// to avoid expensive git diff HEAD call on every poll
return { stats, perFileStats, hunks: new Map() }
}
/**
* Fetch git diff hunks on-demand (for DiffDialog).
* Separated from fetchGitDiff() to avoid expensive calls during polling.
*/
export async function fetchGitDiffHunks(): Promise<
Map<string, StructuredPatchHunk[]>
> {
const isGit = await getIsGit()
if (!isGit) return new Map()
if (await isInTransientGitState()) {
return new Map()
}
const { stdout: diffOut, code: diffCode } = await execFileNoThrow(
gitExe(),
['--no-optional-locks', 'diff', 'HEAD'],
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
)
if (diffCode !== 0) {
return new Map()
}
return parseGitDiff(diffOut)
}
export type NumstatResult = {
stats: GitDiffStats
perFileStats: Map<string, PerFileStats>
}
/**
* Parse git diff --numstat output into stats.
* Format: <added>\t<removed>\t<filename>
* Binary files show '-' for counts.
* Only stores first MAX_FILES entries in perFileStats.
*/
export function parseGitNumstat(stdout: string): NumstatResult {
const lines = stdout.trim().split('\n').filter(Boolean)
let added = 0
let removed = 0
let validFileCount = 0
const perFileStats = new Map<string, PerFileStats>()
for (const line of lines) {
const parts = line.split('\t')
// Valid numstat lines have exactly 3 tab-separated parts: added, removed, filename
if (parts.length < 3) continue
validFileCount++
const addStr = parts[0]
const remStr = parts[1]
const filePath = parts.slice(2).join('\t') // filename may contain tabs
const isBinary = addStr === '-' || remStr === '-'
const fileAdded = isBinary ? 0 : parseInt(addStr ?? '0', 10) || 0
const fileRemoved = isBinary ? 0 : parseInt(remStr ?? '0', 10) || 0
added += fileAdded
removed += fileRemoved
// Only store first MAX_FILES entries
if (perFileStats.size < MAX_FILES) {
perFileStats.set(filePath, {
added: fileAdded,
removed: fileRemoved,
isBinary,
})
}
}
return {
stats: {
filesCount: validFileCount,
linesAdded: added,
linesRemoved: removed,
},
perFileStats,
}
}
/**
* Parse unified diff output into per-file hunks.
* Splits by "diff --git" and parses each file's hunks.
*
* Applies limits:
* - MAX_FILES: stop after this many files
* - Files >1MB: skipped entirely (not in result map)
* - Files β€1MB: parsed but limited to MAX_LINES_PER_FILE lines
*/
export function parseGitDiff(
stdout: string,
): Map<string, StructuredPatchHunk[]> {
const result = new Map<string, StructuredPatchHunk[]>()
if (!stdout.trim()) return result
// Split by file diffs
const fileDiffs = stdout.split(/^diff --git /m).filter(Boolean)
for (const fileDiff of fileDiffs) {
// Stop after MAX_FILES
if (result.size >= MAX_FILES) break
// Skip files larger than 1MB
if (fileDiff.length > MAX_DIFF_SIZE_BYTES) {
continue
}
const lines = fileDiff.split('\n')
// Extract filename from first line: "a/path/to/file b/path/to/file"
const headerMatch = lines[0]?.match(/^a\/(.+?) b\/(.+)$/)
if (!headerMatch) continue
const filePath = headerMatch[2] ?? headerMatch[1] ?? ''
// Find and parse hunks
const fileHunks: StructuredPatchHunk[] = []
let currentHunk: StructuredPatchHunk | null = null
let lineCount = 0
for (let i = 1; i < lines.length; i++) {
const line = lines[i] ?? ''
// StructuredPatchHunk header: @@ -oldStart,oldLines +newStart,newLines @@
const hunkMatch = line.match(
/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/,
)
if (hunkMatch) {
if (currentHunk) {
fileHunks.push(currentHunk)
}
currentHunk = {
oldStart: parseInt(hunkMatch[1] ?? '0', 10),
oldLines: parseInt(hunkMatch[2] ?? '1', 10),
newStart: parseInt(hunkMatch[3] ?? '0', 10),
newLines: parseInt(hunkMatch[4] ?? '1', 10),
lines: [],
}
continue
}
// Skip binary file markers and other metadata
if (
line.startsWith('index ') ||
line.startsWith('---') ||
line.startsWith('+++') ||
line.startsWith('new file') ||
line.startsWith('deleted file') ||
line.startsWith('old mode') ||
line.startsWith('new mode') ||
line.startsWith('Binary files')
) {
continue
}
// Add diff lines to current hunk (with line limit)
if (
currentHunk &&
(line.startsWith('+') ||
line.startsWith('-') ||
line.startsWith(' ') ||
line === '')
) {
// Stop adding lines once we hit the limit
if (lineCount >= MAX_LINES_PER_FILE) {
continue
}
// Force a flat string copy to break V8 sliced string references.
// When split() creates lines, V8 creates "sliced strings" that reference
// the parent. This keeps the entire parent string (~MBs) alive as long as
// any line is retained. Using '' + line forces a new flat string allocation,
// unlike slice(0) which V8 may optimize to return the same reference.
currentHunk.lines.push('' + line)
lineCount++
}
}
// Don't forget the last hunk
if (currentHunk) {
fileHunks.push(currentHunk)
}
if (fileHunks.length > 0) {
result.set(filePath, fileHunks)
}
}
return result
}
/**
* Check if we're in a transient git state (merge, rebase, cherry-pick, or revert).
* During these operations, we skip diff calculation since the working
* tree contains incoming changes that weren't intentionally made.
*
* Uses fs.access to check for transient ref files, avoiding process spawns.
*/
async function isInTransientGitState(): Promise<boolean> {
const gitDir = await getGitDir(getCwd())
if (!gitDir) return false
const transientFiles = [
'MERGE_HEAD',
'REBASE_HEAD',
'CHERRY_PICK_HEAD',
'REVERT_HEAD',
]
const results = await Promise.all(
transientFiles.map(file =>
access(join(gitDir, file))
.then(() => true)
.catch(() => false),
),
)
return results.some(Boolean)
}
/**
* Fetch untracked file names (no content reading).
* Returns file paths only - they'll be displayed with a note to stage them.
*
* @param maxFiles Maximum number of untracked files to include
*/
async function fetchUntrackedFiles(
maxFiles: number,
): Promise<Map<string, PerFileStats> | null> {
// Get list of untracked files (excludes gitignored)
const { stdout, code } = await execFileNoThrow(
gitExe(),
['--no-optional-locks', 'ls-files', '--others', '--exclude-standard'],
{ timeout: GIT_TIMEOUT_MS, preserveOutputOnError: false },
)
if (code !== 0 || !stdout.trim()) return null
const untrackedPaths = stdout.trim().split('\n').filter(Boolean)
if (untrackedPaths.length === 0) return null
const perFileStats = new Map<string, PerFileStats>()
// Just record filenames, no content reading
for (const filePath of untrackedPaths.slice(0, maxFiles)) {
perFileStats.set(filePath, {
added: 0,
removed: 0,
isBinary: false,
isUntracked: true,
})
}
return perFileStats
}
/**
* Parse git diff --shortstat output into stats.
* Format: " 1648 files changed, 52341 insertions(+), 8123 deletions(-)"
*
* This is O(1) memory regardless of diff size - git computes totals without
* loading all content. Used as a quick probe before expensive operations.
*/
export function parseShortstat(stdout: string): GitDiffStats | null {
// Match: "N files changed" with optional ", N insertions(+)" and ", N deletions(-)"
const match = stdout.match(
/(\d+)\s+files?\s+changed(?:,\s+(\d+)\s+insertions?\(\+\))?(?:,\s+(\d+)\s+deletions?\(-\))?/,
)
if (!match) return null
return {
filesCount: parseInt(match[1] ?? '0', 10),
linesAdded: parseInt(match[2] ?? '0', 10),
linesRemoved: parseInt(match[3] ?? '0', 10),
}
}
const SINGLE_FILE_DIFF_TIMEOUT_MS = 3000
export type ToolUseDiff = {
filename: string
status: 'modified' | 'added'
additions: number
deletions: number
changes: number
patch: string
/** GitHub "owner/repo" when available (null for non-github.com or unknown repos) */
repository: string | null
}
/**
* Fetch a structured diff for a single file against the merge base with the
* default branch. This produces a PR-like diff showing all changes since
* the branch diverged. Falls back to diffing against HEAD if the merge base
* cannot be determined (e.g., on the default branch itself).
* For untracked files, generates a synthetic diff showing all additions.
* Returns null if not in a git repo or if git commands fail.
*/
export async function fetchSingleFileGitDiff(
absoluteFilePath: string,
): Promise<ToolUseDiff | null> {
const gitRoot = findGitRoot(dirname(absoluteFilePath))
if (!gitRoot) return null
const gitPath = relative(gitRoot, absoluteFilePath).split(sep).join('/')
const repository = getCachedRepository()
// Check if the file is tracked by git
const { code: lsFilesCode } = await execFileNoThrowWithCwd(
gitExe(),
['--no-optional-locks', 'ls-files', '--error-unmatch', gitPath],
{ cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS },
)
if (lsFilesCode === 0) {
// File is tracked - diff against merge base for PR-like view
const diffRef = await getDiffRef(gitRoot)
const { stdout, code } = await execFileNoThrowWithCwd(
gitExe(),
['--no-optional-locks', 'diff', diffRef, '--', gitPath],
{ cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS },
)
if (code !== 0) return null
if (!stdout) return null
return {
...parseRawDiffToToolUseDiff(gitPath, stdout, 'modified'),
repository,
}
}
// File is untracked - generate synthetic diff
const syntheticDiff = await generateSyntheticDiff(gitPath, absoluteFilePath)
if (!syntheticDiff) return null
return { ...syntheticDiff, repository }
}
/**
* Parse raw unified diff output into the structured ToolUseDiff format.
* Extracts only the hunk content (starting from @@) as the patch,
* and counts additions/deletions.
*/
function parseRawDiffToToolUseDiff(
filename: string,
rawDiff: string,
status: 'modified' | 'added',
): Omit<ToolUseDiff, 'repository'> {
const lines = rawDiff.split('\n')
const patchLines: string[] = []
let inHunks = false
let additions = 0
let deletions = 0
for (const line of lines) {
if (line.startsWith('@@')) {
inHunks = true
}
if (inHunks) {
patchLines.push(line)
if (line.startsWith('+') && !line.startsWith('+++')) {
additions++
} else if (line.startsWith('-') && !line.startsWith('---')) {
deletions++
}
}
}
return {
filename,
status,
additions,
deletions,
changes: additions + deletions,
patch: patchLines.join('\n'),
}
}
/**
* Determine the best ref to diff against for a PR-like diff.
* Priority:
* 1. CLAUDE_CODE_BASE_REF env var (set externally, e.g. by CCR managed containers)
* 2. Merge base with the default branch (best guess)
* 3. HEAD (fallback if merge-base fails)
*/
async function getDiffRef(gitRoot: string): Promise<string> {
const baseBranch =
process.env.CLAUDE_CODE_BASE_REF || (await getDefaultBranch())
const { stdout, code } = await execFileNoThrowWithCwd(
gitExe(),
['--no-optional-locks', 'merge-base', 'HEAD', baseBranch],
{ cwd: gitRoot, timeout: SINGLE_FILE_DIFF_TIMEOUT_MS },
)
if (code === 0 && stdout.trim()) {
return stdout.trim()
}
return 'HEAD'
}
async function generateSyntheticDiff(
gitPath: string,
absoluteFilePath: string,
): Promise<Omit<ToolUseDiff, 'repository'> | null> {
try {
if (!isFileWithinReadSizeLimit(absoluteFilePath, MAX_DIFF_SIZE_BYTES)) {
return null
}
const content = await readFile(absoluteFilePath, 'utf-8')
const lines = content.split('\n')
// Remove trailing empty line from split if file ends with newline
if (lines.length > 0 && lines.at(-1) === '') {
lines.pop()
}
const lineCount = lines.length
const addedLines = lines.map(line => `+${line}`).join('\n')
const patch = `@@ -0,0 +1,${lineCount} @@\n${addedLines}`
return {
filename: gitPath,
status: 'added',
additions: lineCount,
deletions: 0,
changes: lineCount,
patch,
}
} catch {
return null
}
}