π File detail
utils/claudemd.ts
π― Use case
This file lives under βutils/β, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, β¦). On the API surface it exposes MAX_MEMORY_CHARACTER_COUNT, MemoryFileInfo, stripHtmlComments, processMemoryFile, and processMdRules (and more) β mainly functions, hooks, or classes. Dependencies touch bun:bundle, ignore, lodash-es, and marked. It composes internal code from bootstrap, memdir, services, config, and debug (relative imports). What the file header says: Files are loaded in the following order: 1. Managed memory (eg. /etc/claude-code/CLAUDE.md) - Global instructions for all users 2. User memory (~/.claude/CLAUDE.md) - Private global instructions for all projects 3. Project memory (CLAUDE.md, .claude/CLAUDE.md, and .claude/rules/*.
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
Files are loaded in the following order: 1. Managed memory (eg. /etc/claude-code/CLAUDE.md) - Global instructions for all users 2. User memory (~/.claude/CLAUDE.md) - Private global instructions for all projects 3. Project memory (CLAUDE.md, .claude/CLAUDE.md, and .claude/rules/*.md in project roots) - Instructions checked into the codebase 4. Local memory (CLAUDE.local.md in project roots) - Private project-specific instructions Files are loaded in reverse order of priority, i.e. the latest files are highest priority with the model paying more attention to them. File discovery: - User memory is loaded from the user's home directory - Project and Local files are discovered by traversing from the current directory up to root - Files closer to the current directory have higher priority (loaded later) - CLAUDE.md, .claude/CLAUDE.md, and all .md files in .claude/rules/ are checked in each directory for Project memory Memory @include directive: - Memory files can include other files using @ notation - Syntax: @path, @./relative/path, @~/home/path, or @/absolute/path - @path (without prefix) is treated as a relative path (same as @./path) - Works in leaf text nodes only (not inside c
π€ Exports (heuristic)
MAX_MEMORY_CHARACTER_COUNTMemoryFileInfostripHtmlCommentsprocessMemoryFileprocessMdRulesgetMemoryFilesclearMemoryFileCachesresetGetMemoryFilesCachegetLargeMemoryFilesfilterInjectedMemoryFilesgetClaudeMdsgetManagedAndUserConditionalRulesgetMemoryFilesForNestedDirectorygetConditionalRulesForCwdLevelDirectoryprocessConditionedMdRulesExternalClaudeMdIncludegetExternalClaudeMdIncludeshasExternalClaudeMdIncludesshouldShowClaudeMdExternalIncludesWarningisMemoryFilePathgetAllMemoryFilePaths
π External import roots
Package roots from from "β¦" (relative paths omitted).
bun:bundleignorelodash-esmarkedpathpicomatchsrc
π₯οΈ Source preview
/**
* Files are loaded in the following order:
*
* 1. Managed memory (eg. /etc/claude-code/CLAUDE.md) - Global instructions for all users
* 2. User memory (~/.claude/CLAUDE.md) - Private global instructions for all projects
* 3. Project memory (CLAUDE.md, .claude/CLAUDE.md, and .claude/rules/*.md in project roots) - Instructions checked into the codebase
* 4. Local memory (CLAUDE.local.md in project roots) - Private project-specific instructions
*
* Files are loaded in reverse order of priority, i.e. the latest files are highest priority
* with the model paying more attention to them.
*
* File discovery:
* - User memory is loaded from the user's home directory
* - Project and Local files are discovered by traversing from the current directory up to root
* - Files closer to the current directory have higher priority (loaded later)
* - CLAUDE.md, .claude/CLAUDE.md, and all .md files in .claude/rules/ are checked in each directory for Project memory
*
* Memory @include directive:
* - Memory files can include other files using @ notation
* - Syntax: @path, @./relative/path, @~/home/path, or @/absolute/path
* - @path (without prefix) is treated as a relative path (same as @./path)
* - Works in leaf text nodes only (not inside code blocks or code strings)
* - Included files are added as separate entries before the including file
* - Circular references are prevented by tracking processed files
* - Non-existent files are silently ignored
*/
import { feature } from 'bun:bundle'
import ignore from 'ignore'
import memoize from 'lodash-es/memoize.js'
import { Lexer } from 'marked'
import {
basename,
dirname,
extname,
isAbsolute,
join,
parse,
relative,
sep,
} from 'path'
import picomatch from 'picomatch'
import { logEvent } from 'src/services/analytics/index.js'
import {
getAdditionalDirectoriesForClaudeMd,
getOriginalCwd,
} from '../bootstrap/state.js'
import { truncateEntrypointContent } from '../memdir/memdir.js'
import { getAutoMemEntrypoint, isAutoMemoryEnabled } from '../memdir/paths.js'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
import {
getCurrentProjectConfig,
getManagedClaudeRulesDir,
getMemoryPath,
getUserClaudeRulesDir,
} from './config.js'
import { logForDebugging } from './debug.js'
import { logForDiagnosticsNoPII } from './diagLogs.js'
import { getClaudeConfigHomeDir, isEnvTruthy } from './envUtils.js'
import { getErrnoCode } from './errors.js'
import { normalizePathForComparison } from './file.js'
import { cacheKeys, type FileStateCache } from './fileStateCache.js'
import {
parseFrontmatter,
splitPathInFrontmatter,
} from './frontmatterParser.js'
import { getFsImplementation, safeResolvePath } from './fsOperations.js'
import { findCanonicalGitRoot, findGitRoot } from './git.js'
import {
executeInstructionsLoadedHooks,
hasInstructionsLoadedHook,
type InstructionsLoadReason,
type InstructionsMemoryType,
} from './hooks.js'
import type { MemoryType } from './memory/types.js'
import { expandPath } from './path.js'
import { pathInWorkingPath } from './permissions/filesystem.js'
import { isSettingSourceEnabled } from './settings/constants.js'
import { getInitialSettings } from './settings/settings.js'
/* eslint-disable @typescript-eslint/no-require-imports */
const teamMemPaths = feature('TEAMMEM')
? (require('../memdir/teamMemPaths.js') as typeof import('../memdir/teamMemPaths.js'))
: null
/* eslint-enable @typescript-eslint/no-require-imports */
let hasLoggedInitialLoad = false
const MEMORY_INSTRUCTION_PROMPT =
'Codebase and user instructions are shown below. Be sure to adhere to these instructions. IMPORTANT: These instructions OVERRIDE any default behavior and you MUST follow them exactly as written.'
// Recommended max character count for a memory file
export const MAX_MEMORY_CHARACTER_COUNT = 40000
// File extensions that are allowed for @include directives
// This prevents binary files (images, PDFs, etc.) from being loaded into memory
const TEXT_FILE_EXTENSIONS = new Set([
// Markdown and text
'.md',
'.txt',
'.text',
// Data formats
'.json',
'.yaml',
'.yml',
'.toml',
'.xml',
'.csv',
// Web
'.html',
'.htm',
'.css',
'.scss',
'.sass',
'.less',
// JavaScript/TypeScript
'.js',
'.ts',
'.tsx',
'.jsx',
'.mjs',
'.cjs',
'.mts',
'.cts',
// Python
'.py',
'.pyi',
'.pyw',
// Ruby
'.rb',
'.erb',
'.rake',
// Go
'.go',
// Rust
'.rs',
// Java/Kotlin/Scala
'.java',
'.kt',
'.kts',
'.scala',
// C/C++
'.c',
'.cpp',
'.cc',
'.cxx',
'.h',
'.hpp',
'.hxx',
// C#
'.cs',
// Swift
'.swift',
// Shell
'.sh',
'.bash',
'.zsh',
'.fish',
'.ps1',
'.bat',
'.cmd',
// Config
'.env',
'.ini',
'.cfg',
'.conf',
'.config',
'.properties',
// Database
'.sql',
'.graphql',
'.gql',
// Protocol
'.proto',
// Frontend frameworks
'.vue',
'.svelte',
'.astro',
// Templating
'.ejs',
'.hbs',
'.pug',
'.jade',
// Other languages
'.php',
'.pl',
'.pm',
'.lua',
'.r',
'.R',
'.dart',
'.ex',
'.exs',
'.erl',
'.hrl',
'.clj',
'.cljs',
'.cljc',
'.edn',
'.hs',
'.lhs',
'.elm',
'.ml',
'.mli',
'.f',
'.f90',
'.f95',
'.for',
// Build files
'.cmake',
'.make',
'.makefile',
'.gradle',
'.sbt',
// Documentation
'.rst',
'.adoc',
'.asciidoc',
'.org',
'.tex',
'.latex',
// Lock files (often text-based)
'.lock',
// Misc
'.log',
'.diff',
'.patch',
])
export type MemoryFileInfo = {
path: string
type: MemoryType
content: string
parent?: string // Path of the file that included this one
globs?: string[] // Glob patterns for file paths this rule applies to
// True when auto-injection transformed `content` (stripped HTML comments,
// stripped frontmatter, truncated MEMORY.md) such that it no longer matches
// the bytes on disk. When set, `rawContent` holds the unmodified disk bytes
// so callers can cache a `isPartialView` readFileState entry β presence in
// cache provides dedup + change detection, but Edit/Write still require an
// explicit Read before proceeding.
contentDiffersFromDisk?: boolean
rawContent?: string
}
function pathInOriginalCwd(path: string): boolean {
return pathInWorkingPath(path, getOriginalCwd())
}
/**
* Parses raw content to extract both content and glob patterns from frontmatter
* @param rawContent Raw file content with frontmatter
* @returns Object with content and globs (undefined if no paths or match-all pattern)
*/
function parseFrontmatterPaths(rawContent: string): {
content: string
paths?: string[]
} {
const { frontmatter, content } = parseFrontmatter(rawContent)
if (!frontmatter.paths) {
return { content }
}
const patterns = splitPathInFrontmatter(frontmatter.paths)
.map(pattern => {
// Remove /** suffix - ignore library treats 'path' as matching both
// the path itself and everything inside it
return pattern.endsWith('/**') ? pattern.slice(0, -3) : pattern
})
.filter((p: string) => p.length > 0)
// If all patterns are ** (match-all), treat as no globs (undefined)
// This means the file applies to all paths
if (patterns.length === 0 || patterns.every((p: string) => p === '**')) {
return { content }
}
return { content, paths: patterns }
}
/**
* Strip block-level HTML comments (<!-- ... -->) from markdown content.
*
* Uses the marked lexer to identify comments at the block level only, so
* comments inside inline code spans and fenced code blocks are preserved.
* Inline HTML comments inside a paragraph are also left intact; the intended
* use case is authorial notes that occupy their own lines.
*
* Unclosed comments (`<!--` with no matching `-->`) are left in place so a
* typo doesn't silently swallow the rest of the file.
*/
export function stripHtmlComments(content: string): {
content: string
stripped: boolean
} {
if (!content.includes('<!--')) {
return { content, stripped: false }
}
// gfm:false is fine here β html-block detection is a CommonMark rule.
return stripHtmlCommentsFromTokens(new Lexer({ gfm: false }).lex(content))
}
function stripHtmlCommentsFromTokens(tokens: ReturnType<Lexer['lex']>): {
content: string
stripped: boolean
} {
let result = ''
let stripped = false
// A well-formed HTML comment span. Non-greedy so multiple comments on the
// same line are matched independently; [\s\S] to span newlines.
const commentSpan = /<!--[\s\S]*?-->/g
for (const token of tokens) {
if (token.type === 'html') {
const trimmed = token.raw.trimStart()
if (trimmed.startsWith('<!--') && trimmed.includes('-->')) {
// Per CommonMark, a type-2 HTML block ends at the *line* containing
// `-->`, so text after `-->` on that line is part of this token.
// Strip only the comment spans and keep any residual content.
const residue = token.raw.replace(commentSpan, '')
stripped = true
if (residue.trim().length > 0) {
// Residual content exists (e.g. `<!-- note --> Use bun`): keep it.
result += residue
}
continue
}
}
result += token.raw
}
return { content: result, stripped }
}
/**
* Parses raw memory file content into a MemoryFileInfo. Pure function β no I/O.
*
* When includeBasePath is given, @include paths are resolved in the same lex
* pass and returned alongside the parsed file (so processMemoryFile doesn't
* need to lex the same content a second time).
*/
function parseMemoryFileContent(
rawContent: string,
filePath: string,
type: MemoryType,
includeBasePath?: string,
): { info: MemoryFileInfo | null; includePaths: string[] } {
// Skip non-text files to prevent loading binary data (images, PDFs, etc.) into memory
const ext = extname(filePath).toLowerCase()
if (ext && !TEXT_FILE_EXTENSIONS.has(ext)) {
logForDebugging(`Skipping non-text file in @include: ${filePath}`)
return { info: null, includePaths: [] }
}
const { content: withoutFrontmatter, paths } =
parseFrontmatterPaths(rawContent)
// Lex once so strip and @include-extract share the same tokens. gfm:false
// is required by extract (so ~/path doesn't tokenize as strikethrough) and
// doesn't affect strip (html blocks are a CommonMark rule).
const hasComment = withoutFrontmatter.includes('<!--')
const tokens =
hasComment || includeBasePath !== undefined
? new Lexer({ gfm: false }).lex(withoutFrontmatter)
: undefined
// Only rebuild via tokens when a comment actually needs stripping β
// marked normalises \r\n during lex, so round-tripping a CRLF file
// through token.raw would spuriously flip contentDiffersFromDisk.
const strippedContent =
hasComment && tokens
? stripHtmlCommentsFromTokens(tokens).content
: withoutFrontmatter
const includePaths =
tokens && includeBasePath !== undefined
? extractIncludePathsFromTokens(tokens, includeBasePath)
: []
// Truncate MEMORY.md entrypoints to the line AND byte caps
let finalContent = strippedContent
if (type === 'AutoMem' || type === 'TeamMem') {
finalContent = truncateEntrypointContent(strippedContent).content
}
// Covers frontmatter strip, HTML comment strip, and MEMORY.md truncation
const contentDiffersFromDisk = finalContent !== rawContent
return {
info: {
path: filePath,
type,
content: finalContent,
globs: paths,
contentDiffersFromDisk,
rawContent: contentDiffersFromDisk ? rawContent : undefined,
},
includePaths,
}
}
function handleMemoryFileReadError(error: unknown, filePath: string): void {
const code = getErrnoCode(error)
// ENOENT = file doesn't exist, EISDIR = is a directory β both expected
if (code === 'ENOENT' || code === 'EISDIR') {
return
}
// Log permission errors (EACCES) as they're actionable
if (code === 'EACCES') {
// Don't log the full file path to avoid PII/security issues
logEvent('tengu_claude_md_permission_error', {
is_access_error: 1,
has_home_dir: filePath.includes(getClaudeConfigHomeDir()) ? 1 : 0,
})
}
}
/**
* Used by processMemoryFile β getMemoryFiles so the event loop stays
* responsive during the directory walk (many readFile attempts, most
* ENOENT). When includeBasePath is given, @include paths are resolved in
* the same lex pass and returned alongside the parsed file.
*/
async function safelyReadMemoryFileAsync(
filePath: string,
type: MemoryType,
includeBasePath?: string,
): Promise<{ info: MemoryFileInfo | null; includePaths: string[] }> {
try {
const fs = getFsImplementation()
const rawContent = await fs.readFile(filePath, { encoding: 'utf-8' })
return parseMemoryFileContent(rawContent, filePath, type, includeBasePath)
} catch (error) {
handleMemoryFileReadError(error, filePath)
return { info: null, includePaths: [] }
}
}
type MarkdownToken = {
type: string
text?: string
href?: string
tokens?: MarkdownToken[]
raw?: string
items?: MarkdownToken[]
}
// Extract @path include references from pre-lexed tokens and resolve to
// absolute paths. Skips html tokens so @paths inside block comments are
// ignored β the caller may pass pre-strip tokens.
function extractIncludePathsFromTokens(
tokens: ReturnType<Lexer['lex']>,
basePath: string,
): string[] {
const absolutePaths = new Set<string>()
// Extract @paths from a text string and add resolved paths to absolutePaths.
function extractPathsFromText(textContent: string) {
const includeRegex = /(?:^|\s)@((?:[^\s\\]|\\ )+)/g
let match
while ((match = includeRegex.exec(textContent)) !== null) {
let path = match[1]
if (!path) continue
// Strip fragment identifiers (#heading, #section-name, etc.)
const hashIndex = path.indexOf('#')
if (hashIndex !== -1) {
path = path.substring(0, hashIndex)
}
if (!path) continue
// Unescape the spaces in the path
path = path.replace(/\\ /g, ' ')
// Accept @path, @./path, @~/path, or @/path
if (path) {
const isValidPath =
path.startsWith('./') ||
path.startsWith('~/') ||
(path.startsWith('/') && path !== '/') ||
(!path.startsWith('@') &&
!path.match(/^[#%^&*()]+/) &&
path.match(/^[a-zA-Z0-9._-]/))
if (isValidPath) {
const resolvedPath = expandPath(path, dirname(basePath))
absolutePaths.add(resolvedPath)
}
}
}
}
// Recursively process elements to find text nodes
function processElements(elements: MarkdownToken[]) {
for (const element of elements) {
if (element.type === 'code' || element.type === 'codespan') {
continue
}
// For html tokens that contain comments, strip the comment spans and
// check the residual for @paths (e.g. `<!-- note --> @./file.md`).
// Other html tokens (non-comment tags) are skipped entirely.
if (element.type === 'html') {
const raw = element.raw || ''
const trimmed = raw.trimStart()
if (trimmed.startsWith('<!--') && trimmed.includes('-->')) {
const commentSpan = /<!--[\s\S]*?-->/g
const residue = raw.replace(commentSpan, '')
if (residue.trim().length > 0) {
extractPathsFromText(residue)
}
}
continue
}
// Process text nodes
if (element.type === 'text') {
extractPathsFromText(element.text || '')
}
// Recurse into children tokens
if (element.tokens) {
processElements(element.tokens)
}
// Special handling for list structures
if (element.items) {
processElements(element.items)
}
}
}
processElements(tokens as MarkdownToken[])
return [...absolutePaths]
}
const MAX_INCLUDE_DEPTH = 5
/**
* Checks whether a CLAUDE.md file path is excluded by the claudeMdExcludes setting.
* Only applies to User, Project, and Local memory types.
* Managed, AutoMem, and TeamMem types are never excluded.
*
* Matches both the original path and the realpath-resolved path to handle symlinks
* (e.g., /tmp -> /private/tmp on macOS).
*/
function isClaudeMdExcluded(filePath: string, type: MemoryType): boolean {
if (type !== 'User' && type !== 'Project' && type !== 'Local') {
return false
}
const patterns = getInitialSettings().claudeMdExcludes
if (!patterns || patterns.length === 0) {
return false
}
const matchOpts = { dot: true }
const normalizedPath = filePath.replaceAll('\\', '/')
// Build an expanded pattern list that includes realpath-resolved versions of
// absolute patterns. This handles symlinks like /tmp -> /private/tmp on macOS:
// the user writes "/tmp/project/CLAUDE.md" in their exclude, but the system
// resolves the CWD to "/private/tmp/project/...", so the file path uses the
// real path. By resolving the patterns too, both sides match.
const expandedPatterns = resolveExcludePatterns(patterns).filter(
p => p.length > 0,
)
if (expandedPatterns.length === 0) {
return false
}
return picomatch.isMatch(normalizedPath, expandedPatterns, matchOpts)
}
/**
* Expands exclude patterns by resolving symlinks in absolute path prefixes.
* For each absolute pattern (starting with /), tries to resolve the longest
* existing directory prefix via realpathSync and adds the resolved version.
* Glob patterns (containing *) have their static prefix resolved.
*/
function resolveExcludePatterns(patterns: string[]): string[] {
const fs = getFsImplementation()
const expanded: string[] = patterns.map(p => p.replaceAll('\\', '/'))
for (const normalized of expanded) {
// Only resolve absolute patterns β glob-only patterns like "**/*.md" don't have
// a filesystem prefix to resolve
if (!normalized.startsWith('/')) {
continue
}
// Find the static prefix before any glob characters
const globStart = normalized.search(/[*?{[]/)
const staticPrefix =
globStart === -1 ? normalized : normalized.slice(0, globStart)
const dirToResolve = dirname(staticPrefix)
try {
// sync IO: called from sync context (isClaudeMdExcluded -> processMemoryFile -> getMemoryFiles)
const resolvedDir = fs.realpathSync(dirToResolve).replaceAll('\\', '/')
if (resolvedDir !== dirToResolve) {
const resolvedPattern =
resolvedDir + normalized.slice(dirToResolve.length)
expanded.push(resolvedPattern)
}
} catch {
// Directory doesn't exist; skip resolution for this pattern
}
}
return expanded
}
/**
* Recursively processes a memory file and all its @include references
* Returns an array of MemoryFileInfo objects with includes first, then main file
*/
export async function processMemoryFile(
filePath: string,
type: MemoryType,
processedPaths: Set<string>,
includeExternal: boolean,
depth: number = 0,
parent?: string,
): Promise<MemoryFileInfo[]> {
// Skip if already processed or max depth exceeded.
// Normalize paths for comparison to handle Windows drive letter casing
// differences (e.g., C:\Users vs c:\Users).
const normalizedPath = normalizePathForComparison(filePath)
if (processedPaths.has(normalizedPath) || depth >= MAX_INCLUDE_DEPTH) {
return []
}
// Skip if path is excluded by claudeMdExcludes setting
if (isClaudeMdExcluded(filePath, type)) {
return []
}
// Resolve symlink path early for @import resolution
const { resolvedPath, isSymlink } = safeResolvePath(
getFsImplementation(),
filePath,
)
processedPaths.add(normalizedPath)
if (isSymlink) {
processedPaths.add(normalizePathForComparison(resolvedPath))
}
const { info: memoryFile, includePaths: resolvedIncludePaths } =
await safelyReadMemoryFileAsync(filePath, type, resolvedPath)
if (!memoryFile || !memoryFile.content.trim()) {
return []
}
// Add parent information
if (parent) {
memoryFile.parent = parent
}
const result: MemoryFileInfo[] = []
// Add the main file first (parent before children)
result.push(memoryFile)
for (const resolvedIncludePath of resolvedIncludePaths) {
const isExternal = !pathInOriginalCwd(resolvedIncludePath)
if (isExternal && !includeExternal) {
continue
}
// Recursively process included files with this file as parent
const includedFiles = await processMemoryFile(
resolvedIncludePath,
type,
processedPaths,
includeExternal,
depth + 1,
filePath, // Pass current file as parent
)
result.push(...includedFiles)
}
return result
}
/**
* Processes all .md files in the .claude/rules/ directory and its subdirectories
* @param rulesDir The path to the rules directory
* @param type Type of memory file (User, Project, Local)
* @param processedPaths Set of already processed file paths
* @param includeExternal Whether to include external files
* @param conditionalRule If true, only include files with frontmatter paths; if false, only include files without frontmatter paths
* @param visitedDirs Set of already visited directory real paths (for cycle detection)
* @returns Array of MemoryFileInfo objects
*/
export async function processMdRules({
rulesDir,
type,
processedPaths,
includeExternal,
conditionalRule,
visitedDirs = new Set(),
}: {
rulesDir: string
type: MemoryType
processedPaths: Set<string>
includeExternal: boolean
conditionalRule: boolean
visitedDirs?: Set<string>
}): Promise<MemoryFileInfo[]> {
if (visitedDirs.has(rulesDir)) {
return []
}
try {
const fs = getFsImplementation()
const { resolvedPath: resolvedRulesDir, isSymlink } = safeResolvePath(
fs,
rulesDir,
)
visitedDirs.add(rulesDir)
if (isSymlink) {
visitedDirs.add(resolvedRulesDir)
}
const result: MemoryFileInfo[] = []
let entries: import('fs').Dirent[]
try {
entries = await fs.readdir(resolvedRulesDir)
} catch (e: unknown) {
const code = getErrnoCode(e)
if (code === 'ENOENT' || code === 'EACCES' || code === 'ENOTDIR') {
return []
}
throw e
}
for (const entry of entries) {
const entryPath = join(rulesDir, entry.name)
const { resolvedPath: resolvedEntryPath, isSymlink } = safeResolvePath(
fs,
entryPath,
)
// Use Dirent methods for non-symlinks to avoid extra stat calls.
// For symlinks, we need stat to determine what the target is.
const stats = isSymlink ? await fs.stat(resolvedEntryPath) : null
const isDirectory = stats ? stats.isDirectory() : entry.isDirectory()
const isFile = stats ? stats.isFile() : entry.isFile()
if (isDirectory) {
result.push(
...(await processMdRules({
rulesDir: resolvedEntryPath,
type,
processedPaths,
includeExternal,
conditionalRule,
visitedDirs,
})),
)
} else if (isFile && entry.name.endsWith('.md')) {
const files = await processMemoryFile(
resolvedEntryPath,
type,
processedPaths,
includeExternal,
)
result.push(
...files.filter(f => (conditionalRule ? f.globs : !f.globs)),
)
}
}
return result
} catch (error) {
if (error instanceof Error && error.message.includes('EACCES')) {
logEvent('tengu_claude_rules_md_permission_error', {
is_access_error: 1,
has_home_dir: rulesDir.includes(getClaudeConfigHomeDir()) ? 1 : 0,
})
}
return []
}
}
export const getMemoryFiles = memoize(
async (forceIncludeExternal: boolean = false): Promise<MemoryFileInfo[]> => {
const startTime = Date.now()
logForDiagnosticsNoPII('info', 'memory_files_started')
const result: MemoryFileInfo[] = []
const processedPaths = new Set<string>()
const config = getCurrentProjectConfig()
const includeExternal =
forceIncludeExternal ||
config.hasClaudeMdExternalIncludesApproved ||
false
// Process Managed file first (always loaded - policy settings)
const managedClaudeMd = getMemoryPath('Managed')
result.push(
...(await processMemoryFile(
managedClaudeMd,
'Managed',
processedPaths,
includeExternal,
)),
)
// Process Managed .claude/rules/*.md files
const managedClaudeRulesDir = getManagedClaudeRulesDir()
result.push(
...(await processMdRules({
rulesDir: managedClaudeRulesDir,
type: 'Managed',
processedPaths,
includeExternal,
conditionalRule: false,
})),
)
// Process User file (only if userSettings is enabled)
if (isSettingSourceEnabled('userSettings')) {
const userClaudeMd = getMemoryPath('User')
result.push(
...(await processMemoryFile(
userClaudeMd,
'User',
processedPaths,
true, // User memory can always include external files
)),
)
// Process User ~/.claude/rules/*.md files
const userClaudeRulesDir = getUserClaudeRulesDir()
result.push(
...(await processMdRules({
rulesDir: userClaudeRulesDir,
type: 'User',
processedPaths,
includeExternal: true,
conditionalRule: false,
})),
)
}
// Then process Project and Local files
const dirs: string[] = []
const originalCwd = getOriginalCwd()
let currentDir = originalCwd
while (currentDir !== parse(currentDir).root) {
dirs.push(currentDir)
currentDir = dirname(currentDir)
}
// When running from a git worktree nested inside its main repo (e.g.,
// .claude/worktrees/<name>/ from `claude -w`), the upward walk passes
// through both the worktree root and the main repo root. Both contain
// checked-in files like CLAUDE.md and .claude/rules/*.md, so the same
// content gets loaded twice. Skip Project-type (checked-in) files from
// directories above the worktree but within the main repo β the worktree
// already has its own checkout. CLAUDE.local.md is gitignored so it only
// exists in the main repo and is still loaded.
// See: https://github.com/anthropics/claude-code/issues/29599
const gitRoot = findGitRoot(originalCwd)
const canonicalRoot = findCanonicalGitRoot(originalCwd)
const isNestedWorktree =
gitRoot !== null &&
canonicalRoot !== null &&
normalizePathForComparison(gitRoot) !==
normalizePathForComparison(canonicalRoot) &&
pathInWorkingPath(gitRoot, canonicalRoot)
// Process from root downward to CWD
for (const dir of dirs.reverse()) {
// In a nested worktree, skip checked-in files from the main repo's
// working tree (dirs inside canonicalRoot but outside the worktree).
const skipProject =
isNestedWorktree &&
pathInWorkingPath(dir, canonicalRoot) &&
!pathInWorkingPath(dir, gitRoot)
// Try reading CLAUDE.md (Project) - only if projectSettings is enabled
if (isSettingSourceEnabled('projectSettings') && !skipProject) {
const projectPath = join(dir, 'CLAUDE.md')
result.push(
...(await processMemoryFile(
projectPath,
'Project',
processedPaths,
includeExternal,
)),
)
// Try reading .claude/CLAUDE.md (Project)
const dotClaudePath = join(dir, '.claude', 'CLAUDE.md')
result.push(
...(await processMemoryFile(
dotClaudePath,
'Project',
processedPaths,
includeExternal,
)),
)
// Try reading .claude/rules/*.md files (Project)
const rulesDir = join(dir, '.claude', 'rules')
result.push(
...(await processMdRules({
rulesDir,
type: 'Project',
processedPaths,
includeExternal,
conditionalRule: false,
})),
)
}
// Try reading CLAUDE.local.md (Local) - only if localSettings is enabled
if (isSettingSourceEnabled('localSettings')) {
const localPath = join(dir, 'CLAUDE.local.md')
result.push(
...(await processMemoryFile(
localPath,
'Local',
processedPaths,
includeExternal,
)),
)
}
}
// Process CLAUDE.md from additional directories (--add-dir) if env var is enabled
// This is controlled by CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD and defaults to off
// Note: we don't check isSettingSourceEnabled('projectSettings') here because --add-dir
// is an explicit user action and the SDK defaults settingSources to [] when not specified
if (isEnvTruthy(process.env.CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD)) {
const additionalDirs = getAdditionalDirectoriesForClaudeMd()
for (const dir of additionalDirs) {
// Try reading CLAUDE.md from the additional directory
const projectPath = join(dir, 'CLAUDE.md')
result.push(
...(await processMemoryFile(
projectPath,
'Project',
processedPaths,
includeExternal,
)),
)
// Try reading .claude/CLAUDE.md from the additional directory
const dotClaudePath = join(dir, '.claude', 'CLAUDE.md')
result.push(
...(await processMemoryFile(
dotClaudePath,
'Project',
processedPaths,
includeExternal,
)),
)
// Try reading .claude/rules/*.md files from the additional directory
const rulesDir = join(dir, '.claude', 'rules')
result.push(
...(await processMdRules({
rulesDir,
type: 'Project',
processedPaths,
includeExternal,
conditionalRule: false,
})),
)
}
}
// Memdir entrypoint (memory.md) - only if feature is on and file exists
if (isAutoMemoryEnabled()) {
const { info: memdirEntry } = await safelyReadMemoryFileAsync(
getAutoMemEntrypoint(),
'AutoMem',
)
if (memdirEntry) {
const normalizedPath = normalizePathForComparison(memdirEntry.path)
if (!processedPaths.has(normalizedPath)) {
processedPaths.add(normalizedPath)
result.push(memdirEntry)
}
}
}
// Team memory entrypoint - only if feature is on and file exists
if (feature('TEAMMEM') && teamMemPaths!.isTeamMemoryEnabled()) {
const { info: teamMemEntry } = await safelyReadMemoryFileAsync(
teamMemPaths!.getTeamMemEntrypoint(),
'TeamMem',
)
if (teamMemEntry) {
const normalizedPath = normalizePathForComparison(teamMemEntry.path)
if (!processedPaths.has(normalizedPath)) {
processedPaths.add(normalizedPath)
result.push(teamMemEntry)
}
}
}
const totalContentLength = result.reduce(
(sum, f) => sum + f.content.length,
0,
)
logForDiagnosticsNoPII('info', 'memory_files_completed', {
duration_ms: Date.now() - startTime,
file_count: result.length,
total_content_length: totalContentLength,
})
const typeCounts: Record<string, number> = {}
for (const f of result) {
typeCounts[f.type] = (typeCounts[f.type] ?? 0) + 1
}
if (!hasLoggedInitialLoad) {
hasLoggedInitialLoad = true
logEvent('tengu_claudemd__initial_load', {
file_count: result.length,
total_content_length: totalContentLength,
user_count: typeCounts['User'] ?? 0,
project_count: typeCounts['Project'] ?? 0,
local_count: typeCounts['Local'] ?? 0,
managed_count: typeCounts['Managed'] ?? 0,
automem_count: typeCounts['AutoMem'] ?? 0,
...(feature('TEAMMEM')
? { teammem_count: typeCounts['TeamMem'] ?? 0 }
: {}),
duration_ms: Date.now() - startTime,
})
}
// Fire InstructionsLoaded hook for each instruction file loaded
// (fire-and-forget, audit/observability only).
// AutoMem/TeamMem are intentionally excluded β they're a separate
// memory system, not "instructions" in the CLAUDE.md/rules sense.
// Gated on !forceIncludeExternal: the forceIncludeExternal=true variant
// is only used by getExternalClaudeMdIncludes() for approval checks, not
// for building context β firing the hook there would double-fire on startup.
// The one-shot flag is consumed on every !forceIncludeExternal cache miss
// (NOT gated on hasInstructionsLoadedHook) so the flag is released even
// when no hook is configured β otherwise a mid-session hook registration
// followed by a direct .cache.clear() would spuriously fire with a stale
// 'session_start' reason.
if (!forceIncludeExternal) {
const eagerLoadReason = consumeNextEagerLoadReason()
if (eagerLoadReason !== undefined && hasInstructionsLoadedHook()) {
for (const file of result) {
if (!isInstructionsMemoryType(file.type)) continue
const loadReason = file.parent ? 'include' : eagerLoadReason
void executeInstructionsLoadedHooks(
file.path,
file.type,
loadReason,
{
globs: file.globs,
parentFilePath: file.parent,
},
)
}
}
}
return result
},
)
function isInstructionsMemoryType(
type: MemoryType,
): type is InstructionsMemoryType {
return (
type === 'User' ||
type === 'Project' ||
type === 'Local' ||
type === 'Managed'
)
}
// Load reason to report for top-level (non-included) files on the next eager
// getMemoryFiles() pass. Set to 'compact' by resetGetMemoryFilesCache when
// compaction clears the cache, so the InstructionsLoaded hook reports the
// reload correctly instead of misreporting it as 'session_start'. One-shot:
// reset to 'session_start' after being read.
let nextEagerLoadReason: InstructionsLoadReason = 'session_start'
// Whether the InstructionsLoaded hook should fire on the next cache miss.
// true initially (for session_start), consumed after firing, re-enabled only
// by resetGetMemoryFilesCache(). Callers that only need cache invalidation
// for correctness (e.g. worktree enter/exit, settings sync, /memory dialog)
// should use clearMemoryFileCaches() instead to avoid spurious hook fires.
let shouldFireHook = true
function consumeNextEagerLoadReason(): InstructionsLoadReason | undefined {
if (!shouldFireHook) return undefined
shouldFireHook = false
const reason = nextEagerLoadReason
nextEagerLoadReason = 'session_start'
return reason
}
/**
* Clears the getMemoryFiles memoize cache
* without firing the InstructionsLoaded hook.
*
* Use this for cache invalidation that is purely for correctness (e.g.
* worktree enter/exit, settings sync, /memory dialog). For events that
* represent instructions actually being reloaded into context (e.g.
* compaction), use resetGetMemoryFilesCache() instead.
*/
export function clearMemoryFileCaches(): void {
// ?.cache because tests spyOn this, which replaces the memoize wrapper.
getMemoryFiles.cache?.clear?.()
}
export function resetGetMemoryFilesCache(
reason: InstructionsLoadReason = 'session_start',
): void {
nextEagerLoadReason = reason
shouldFireHook = true
clearMemoryFileCaches()
}
export function getLargeMemoryFiles(files: MemoryFileInfo[]): MemoryFileInfo[] {
return files.filter(f => f.content.length > MAX_MEMORY_CHARACTER_COUNT)
}
/**
* When tengu_moth_copse is on, the findRelevantMemories prefetch surfaces
* memory files via attachments, so the MEMORY.md index is no longer injected
* into the system prompt. Callsites that care about "what's actually in
* context" (context builder, /context viz) should filter through this.
*/
export function filterInjectedMemoryFiles(
files: MemoryFileInfo[],
): MemoryFileInfo[] {
const skipMemoryIndex = getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_moth_copse',
false,
)
if (!skipMemoryIndex) return files
return files.filter(f => f.type !== 'AutoMem' && f.type !== 'TeamMem')
}
export const getClaudeMds = (
memoryFiles: MemoryFileInfo[],
filter?: (type: MemoryType) => boolean,
): string => {
const memories: string[] = []
const skipProjectLevel = getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_paper_halyard',
false,
)
for (const file of memoryFiles) {
if (filter && !filter(file.type)) continue
if (skipProjectLevel && (file.type === 'Project' || file.type === 'Local'))
continue
if (file.content) {
const description =
file.type === 'Project'
? ' (project instructions, checked into the codebase)'
: file.type === 'Local'
? " (user's private project instructions, not checked in)"
: feature('TEAMMEM') && file.type === 'TeamMem'
? ' (shared team memory, synced across the organization)'
: file.type === 'AutoMem'
? " (user's auto-memory, persists across conversations)"
: " (user's private global instructions for all projects)"
const content = file.content.trim()
if (feature('TEAMMEM') && file.type === 'TeamMem') {
memories.push(
`Contents of ${file.path}${description}:\n\n<team-memory-content source="shared">\n${content}\n</team-memory-content>`,
)
} else {
memories.push(`Contents of ${file.path}${description}:\n\n${content}`)
}
}
}
if (memories.length === 0) {
return ''
}
return `${MEMORY_INSTRUCTION_PROMPT}\n\n${memories.join('\n\n')}`
}
/**
* Gets managed and user conditional rules that match the target path.
* This is the first phase of nested memory loading.
*
* @param targetPath The target file path to match against glob patterns
* @param processedPaths Set of already processed file paths (will be mutated)
* @returns Array of MemoryFileInfo objects for matching conditional rules
*/
export async function getManagedAndUserConditionalRules(
targetPath: string,
processedPaths: Set<string>,
): Promise<MemoryFileInfo[]> {
const result: MemoryFileInfo[] = []
// Process Managed conditional .claude/rules/*.md files
const managedClaudeRulesDir = getManagedClaudeRulesDir()
result.push(
...(await processConditionedMdRules(
targetPath,
managedClaudeRulesDir,
'Managed',
processedPaths,
false,
)),
)
if (isSettingSourceEnabled('userSettings')) {
// Process User conditional .claude/rules/*.md files
const userClaudeRulesDir = getUserClaudeRulesDir()
result.push(
...(await processConditionedMdRules(
targetPath,
userClaudeRulesDir,
'User',
processedPaths,
true,
)),
)
}
return result
}
/**
* Gets memory files for a single nested directory (between CWD and target).
* Loads CLAUDE.md, unconditional rules, and conditional rules for that directory.
*
* @param dir The directory to process
* @param targetPath The target file path (for conditional rule matching)
* @param processedPaths Set of already processed file paths (will be mutated)
* @returns Array of MemoryFileInfo objects
*/
export async function getMemoryFilesForNestedDirectory(
dir: string,
targetPath: string,
processedPaths: Set<string>,
): Promise<MemoryFileInfo[]> {
const result: MemoryFileInfo[] = []
// Process project memory files (CLAUDE.md and .claude/CLAUDE.md)
if (isSettingSourceEnabled('projectSettings')) {
const projectPath = join(dir, 'CLAUDE.md')
result.push(
...(await processMemoryFile(
projectPath,
'Project',
processedPaths,
false,
)),
)
const dotClaudePath = join(dir, '.claude', 'CLAUDE.md')
result.push(
...(await processMemoryFile(
dotClaudePath,
'Project',
processedPaths,
false,
)),
)
}
// Process local memory file (CLAUDE.local.md)
if (isSettingSourceEnabled('localSettings')) {
const localPath = join(dir, 'CLAUDE.local.md')
result.push(
...(await processMemoryFile(localPath, 'Local', processedPaths, false)),
)
}
const rulesDir = join(dir, '.claude', 'rules')
// Process project unconditional .claude/rules/*.md files, which were not eagerly loaded
// Use a separate processedPaths set to avoid marking conditional rule files as processed
const unconditionalProcessedPaths = new Set(processedPaths)
result.push(
...(await processMdRules({
rulesDir,
type: 'Project',
processedPaths: unconditionalProcessedPaths,
includeExternal: false,
conditionalRule: false,
})),
)
// Process project conditional .claude/rules/*.md files
result.push(
...(await processConditionedMdRules(
targetPath,
rulesDir,
'Project',
processedPaths,
false,
)),
)
// processedPaths must be seeded with unconditional paths for subsequent directories
for (const path of unconditionalProcessedPaths) {
processedPaths.add(path)
}
return result
}
/**
* Gets conditional rules for a CWD-level directory (from root up to CWD).
* Only processes conditional rules since unconditional rules are already loaded eagerly.
*
* @param dir The directory to process
* @param targetPath The target file path (for conditional rule matching)
* @param processedPaths Set of already processed file paths (will be mutated)
* @returns Array of MemoryFileInfo objects
*/
export async function getConditionalRulesForCwdLevelDirectory(
dir: string,
targetPath: string,
processedPaths: Set<string>,
): Promise<MemoryFileInfo[]> {
const rulesDir = join(dir, '.claude', 'rules')
return processConditionedMdRules(
targetPath,
rulesDir,
'Project',
processedPaths,
false,
)
}
/**
* Processes all .md files in the .claude/rules/ directory and its subdirectories,
* filtering to only include files with frontmatter paths that match the target path
* @param targetPath The file path to match against frontmatter glob patterns
* @param rulesDir The path to the rules directory
* @param type Type of memory file (User, Project, Local)
* @param processedPaths Set of already processed file paths
* @param includeExternal Whether to include external files
* @returns Array of MemoryFileInfo objects that match the target path
*/
export async function processConditionedMdRules(
targetPath: string,
rulesDir: string,
type: MemoryType,
processedPaths: Set<string>,
includeExternal: boolean,
): Promise<MemoryFileInfo[]> {
const conditionedRuleMdFiles = await processMdRules({
rulesDir,
type,
processedPaths,
includeExternal,
conditionalRule: true,
})
// Filter to only include files whose globs patterns match the targetPath
return conditionedRuleMdFiles.filter(file => {
if (!file.globs || file.globs.length === 0) {
return false
}
// For Project rules: glob patterns are relative to the directory containing .claude
// For Managed/User rules: glob patterns are relative to the original CWD
const baseDir =
type === 'Project'
? dirname(dirname(rulesDir)) // Parent of .claude
: getOriginalCwd() // Project root for managed/user rules
const relativePath = isAbsolute(targetPath)
? relative(baseDir, targetPath)
: targetPath
// ignore() throws on empty strings, paths escaping the base (../),
// and absolute paths (Windows cross-drive relative() returns absolute).
// Files outside baseDir can't match baseDir-relative globs anyway.
if (
!relativePath ||
relativePath.startsWith('..') ||
isAbsolute(relativePath)
) {
return false
}
return ignore().add(file.globs).ignores(relativePath)
})
}
export type ExternalClaudeMdInclude = {
path: string
parent: string
}
export function getExternalClaudeMdIncludes(
files: MemoryFileInfo[],
): ExternalClaudeMdInclude[] {
const externals: ExternalClaudeMdInclude[] = []
for (const file of files) {
if (file.type !== 'User' && file.parent && !pathInOriginalCwd(file.path)) {
externals.push({ path: file.path, parent: file.parent })
}
}
return externals
}
export function hasExternalClaudeMdIncludes(files: MemoryFileInfo[]): boolean {
return getExternalClaudeMdIncludes(files).length > 0
}
export async function shouldShowClaudeMdExternalIncludesWarning(): Promise<boolean> {
const config = getCurrentProjectConfig()
if (
config.hasClaudeMdExternalIncludesApproved ||
config.hasClaudeMdExternalIncludesWarningShown
) {
return false
}
return hasExternalClaudeMdIncludes(await getMemoryFiles(true))
}
/**
* Check if a file path is a memory file (CLAUDE.md, CLAUDE.local.md, or .claude/rules/*.md)
*/
export function isMemoryFilePath(filePath: string): boolean {
const name = basename(filePath)
// CLAUDE.md or CLAUDE.local.md anywhere
if (name === 'CLAUDE.md' || name === 'CLAUDE.local.md') {
return true
}
// .md files in .claude/rules/ directories
if (
name.endsWith('.md') &&
filePath.includes(`${sep}.claude${sep}rules${sep}`)
) {
return true
}
return false
}
/**
* Get all memory file paths from both standard discovery and readFileState.
* Combines:
* - getMemoryFiles() paths (CWD upward to root)
* - readFileState paths matching memory patterns (includes child directories)
*/
export function getAllMemoryFilePaths(
files: MemoryFileInfo[],
readFileState: FileStateCache,
): string[] {
const paths = new Set<string>()
for (const file of files) {
if (file.content.trim().length > 0) {
paths.add(file.path)
}
}
// Add memory files from readFileState (includes child directories)
for (const filePath of cacheKeys(readFileState)) {
if (isMemoryFilePath(filePath)) {
paths.add(filePath)
}
}
return Array.from(paths)
}