π File detail
services/api/promptCacheBreakDetection.ts
π― Use case
This file lives under βservices/β, which covers long-lived services (LSP, MCP, OAuth, tool execution, memory, compaction, voice, settings sync, β¦). On the API surface it exposes CACHE_TTL_1HOUR_MS, PromptStateSnapshot, recordPromptState, checkResponseForCacheBreak, and notifyCacheDeletion (and more) β mainly functions, hooks, or classes. Dependencies touch @anthropic-ai, text diffing, Node filesystem, and Node path helpers. It composes internal code from constants and analytics (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs' import { createPatch } from 'diff' import { mkdir, writeFile } from 'fs/promises' import { join } from 'path'
π€ Exports (heuristic)
CACHE_TTL_1HOUR_MSPromptStateSnapshotrecordPromptStatecheckResponseForCacheBreaknotifyCacheDeletionnotifyCompactioncleanupAgentTrackingresetPromptCacheBreakDetection
π External import roots
Package roots from from "β¦" (relative paths omitted).
@anthropic-aidifffspathsrc
π₯οΈ Source preview
import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
import { createPatch } from 'diff'
import { mkdir, writeFile } from 'fs/promises'
import { join } from 'path'
import type { AgentId } from 'src/types/ids.js'
import type { Message } from 'src/types/message.js'
import { logForDebugging } from 'src/utils/debug.js'
import { djb2Hash } from 'src/utils/hash.js'
import { logError } from 'src/utils/log.js'
import { getClaudeTempDir } from 'src/utils/permissions/filesystem.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import type { QuerySource } from '../../constants/querySource.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
function getCacheBreakDiffPath(): string {
const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
let suffix = ''
for (let i = 0; i < 4; i++) {
suffix += chars[Math.floor(Math.random() * chars.length)]
}
return join(getClaudeTempDir(), `cache-break-${suffix}.diff`)
}
type PreviousState = {
systemHash: number
toolsHash: number
/** Hash of system blocks WITH cache_control intact. Catches scope/TTL flips
* (globalβorg, 1hβ5m) that stripCacheControl erases from systemHash. */
cacheControlHash: number
toolNames: string[]
/** Per-tool schema hash. Diffed to name which tool's description changed
* when toolSchemasChanged but added=removed=0 (77% of tool breaks per
* BQ 2026-03-22). AgentTool/SkillTool embed dynamic agent/command lists. */
perToolHashes: Record<string, number>
systemCharCount: number
model: string
fastMode: boolean
/** 'tool_based' | 'system_prompt' | 'none' β flips when MCP tools are
* discovered/removed. */
globalCacheStrategy: string
/** Sorted beta header list. Diffed to show which headers were added/removed. */
betas: string[]
/** AFK_MODE_BETA_HEADER presence β should NOT break cache anymore
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
autoModeActive: boolean
/** Overage state flip β should NOT break cache anymore (eligibility is
* latched session-stable in should1hCacheTTL). Tracked to verify the fix. */
isUsingOverage: boolean
/** Cache-editing beta header presence β should NOT break cache anymore
* (sticky-on latched in claude.ts). Tracked to verify the fix. */
cachedMCEnabled: boolean
/** Resolved effort (env β options β model default). Goes into output_config
* or anthropic_internal.effort_override. */
effortValue: string
/** Hash of getExtraBodyParams() β catches CLAUDE_CODE_EXTRA_BODY and
* anthropic_internal changes. */
extraBodyHash: number
callCount: number
pendingChanges: PendingChanges | null
prevCacheReadTokens: number | null
/** Set when cached microcompact sends cache_edits deletions. Cache reads
* will legitimately drop β this is expected, not a break. */
cacheDeletionsPending: boolean
buildDiffableContent: () => string
}
type PendingChanges = {
systemPromptChanged: boolean
toolSchemasChanged: boolean
modelChanged: boolean
fastModeChanged: boolean
cacheControlChanged: boolean
globalCacheStrategyChanged: boolean
betasChanged: boolean
autoModeChanged: boolean
overageChanged: boolean
cachedMCChanged: boolean
effortChanged: boolean
extraBodyChanged: boolean
addedToolCount: number
removedToolCount: number
systemCharDelta: number
addedTools: string[]
removedTools: string[]
changedToolSchemas: string[]
previousModel: string
newModel: string
prevGlobalCacheStrategy: string
newGlobalCacheStrategy: string
addedBetas: string[]
removedBetas: string[]
prevEffortValue: string
newEffortValue: string
buildPrevDiffableContent: () => string
}
const previousStateBySource = new Map<string, PreviousState>()
// Cap the number of tracked sources to prevent unbounded memory growth.
// Each entry stores a ~300KB+ diffableContent string (serialized system prompt
// + tool schemas). Without a cap, spawning many subagents (each with a unique
// agentId key) causes the map to grow indefinitely.
const MAX_TRACKED_SOURCES = 10
const TRACKED_SOURCE_PREFIXES = [
'repl_main_thread',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
]
// Minimum absolute token drop required to trigger a cache break warning.
// Small drops (e.g., a few thousand tokens) can happen due to normal variation
// and aren't worth alerting on.
const MIN_CACHE_MISS_TOKENS = 2_000
// Anthropic's server-side prompt cache TTL thresholds to test.
// Cache breaks after these durations are likely due to TTL expiration
// rather than client-side changes.
const CACHE_TTL_5MIN_MS = 5 * 60 * 1000
export const CACHE_TTL_1HOUR_MS = 60 * 60 * 1000
// Models to exclude from cache break detection (e.g., haiku has different caching behavior)
function isExcludedModel(model: string): boolean {
return model.includes('haiku')
}
/**
* Returns the tracking key for a querySource, or null if untracked.
* Compact shares the same server-side cache as repl_main_thread
* (same cacheSafeParams), so they share tracking state.
*
* For subagents with a tracked querySource, uses the unique agentId to
* isolate tracking state. This prevents false positive cache break
* notifications when multiple instances of the same agent type run
* concurrently.
*
* Untracked sources (speculation, session_memory, prompt_suggestion, etc.)
* are short-lived forked agents where cache break detection provides no
* value β they run 1-3 turns with a fresh agentId each time, so there's
* nothing meaningful to compare against. Their cache metrics are still
* logged via tengu_api_success for analytics.
*/
function getTrackingKey(
querySource: QuerySource,
agentId?: AgentId,
): string | null {
if (querySource === 'compact') return 'repl_main_thread'
for (const prefix of TRACKED_SOURCE_PREFIXES) {
if (querySource.startsWith(prefix)) return agentId || querySource
}
return null
}
function stripCacheControl(
items: ReadonlyArray<Record<string, unknown>>,
): unknown[] {
return items.map(item => {
if (!('cache_control' in item)) return item
const { cache_control: _, ...rest } = item
return rest
})
}
function computeHash(data: unknown): number {
const str = jsonStringify(data)
if (typeof Bun !== 'undefined') {
const hash = Bun.hash(str)
// Bun.hash can return bigint for large inputs; convert to number safely
return typeof hash === 'bigint' ? Number(hash & 0xffffffffn) : hash
}
// Fallback for non-Bun runtimes (e.g. Node.js via npm global install)
return djb2Hash(str)
}
/** MCP tool names are user-controlled (server config) and may leak filepaths.
* Collapse them to 'mcp'; built-in names are a fixed vocabulary. */
function sanitizeToolName(name: string): string {
return name.startsWith('mcp__') ? 'mcp' : name
}
function computePerToolHashes(
strippedTools: ReadonlyArray<unknown>,
names: string[],
): Record<string, number> {
const hashes: Record<string, number> = {}
for (let i = 0; i < strippedTools.length; i++) {
hashes[names[i] ?? `__idx_${i}`] = computeHash(strippedTools[i])
}
return hashes
}
function getSystemCharCount(system: TextBlockParam[]): number {
let total = 0
for (const block of system) {
total += block.text.length
}
return total
}
function buildDiffableContent(
system: TextBlockParam[],
tools: BetaToolUnion[],
model: string,
): string {
const systemText = system.map(b => b.text).join('\n\n')
const toolDetails = tools
.map(t => {
if (!('name' in t)) return 'unknown'
const desc = 'description' in t ? t.description : ''
const schema = 'input_schema' in t ? jsonStringify(t.input_schema) : ''
return `${t.name}\n description: ${desc}\n input_schema: ${schema}`
})
.sort()
.join('\n\n')
return `Model: ${model}\n\n=== System Prompt ===\n\n${systemText}\n\n=== Tools (${tools.length}) ===\n\n${toolDetails}\n`
}
/** Extended tracking snapshot β everything that could affect the server-side
* cache key that we can observe from the client. All fields are optional so
* the call site can add incrementally; undefined fields compare as stable. */
export type PromptStateSnapshot = {
system: TextBlockParam[]
toolSchemas: BetaToolUnion[]
querySource: QuerySource
model: string
agentId?: AgentId
fastMode?: boolean
globalCacheStrategy?: string
betas?: readonly string[]
autoModeActive?: boolean
isUsingOverage?: boolean
cachedMCEnabled?: boolean
effortValue?: string | number
extraBodyParams?: unknown
}
/**
* Phase 1 (pre-call): Record the current prompt/tool state and detect what changed.
* Does NOT fire events β just stores pending changes for phase 2 to use.
*/
export function recordPromptState(snapshot: PromptStateSnapshot): void {
try {
const {
system,
toolSchemas,
querySource,
model,
agentId,
fastMode,
globalCacheStrategy = '',
betas = [],
autoModeActive = false,
isUsingOverage = false,
cachedMCEnabled = false,
effortValue,
extraBodyParams,
} = snapshot
const key = getTrackingKey(querySource, agentId)
if (!key) return
const strippedSystem = stripCacheControl(
system as unknown as ReadonlyArray<Record<string, unknown>>,
)
const strippedTools = stripCacheControl(
toolSchemas as unknown as ReadonlyArray<Record<string, unknown>>,
)
const systemHash = computeHash(strippedSystem)
const toolsHash = computeHash(strippedTools)
// Hash the full system array INCLUDING cache_control β this catches
// scope flips (globalβorg/none) and TTL flips (1hβ5m) that the stripped
// hash can't see because the text content is identical.
const cacheControlHash = computeHash(
system.map(b => ('cache_control' in b ? b.cache_control : null)),
)
const toolNames = toolSchemas.map(t => ('name' in t ? t.name : 'unknown'))
// Only compute per-tool hashes when the aggregate changed β common case
// (tools unchanged) skips N extra jsonStringify calls.
const computeToolHashes = () =>
computePerToolHashes(strippedTools, toolNames)
const systemCharCount = getSystemCharCount(system)
const lazyDiffableContent = () =>
buildDiffableContent(system, toolSchemas, model)
const isFastMode = fastMode ?? false
const sortedBetas = [...betas].sort()
const effortStr = effortValue === undefined ? '' : String(effortValue)
const extraBodyHash =
extraBodyParams === undefined ? 0 : computeHash(extraBodyParams)
const prev = previousStateBySource.get(key)
if (!prev) {
// Evict oldest entries if map is at capacity
while (previousStateBySource.size >= MAX_TRACKED_SOURCES) {
const oldest = previousStateBySource.keys().next().value
if (oldest !== undefined) previousStateBySource.delete(oldest)
}
previousStateBySource.set(key, {
systemHash,
toolsHash,
cacheControlHash,
toolNames,
systemCharCount,
model,
fastMode: isFastMode,
globalCacheStrategy,
betas: sortedBetas,
autoModeActive,
isUsingOverage,
cachedMCEnabled,
effortValue: effortStr,
extraBodyHash,
callCount: 1,
pendingChanges: null,
prevCacheReadTokens: null,
cacheDeletionsPending: false,
buildDiffableContent: lazyDiffableContent,
perToolHashes: computeToolHashes(),
})
return
}
prev.callCount++
const systemPromptChanged = systemHash !== prev.systemHash
const toolSchemasChanged = toolsHash !== prev.toolsHash
const modelChanged = model !== prev.model
const fastModeChanged = isFastMode !== prev.fastMode
const cacheControlChanged = cacheControlHash !== prev.cacheControlHash
const globalCacheStrategyChanged =
globalCacheStrategy !== prev.globalCacheStrategy
const betasChanged =
sortedBetas.length !== prev.betas.length ||
sortedBetas.some((b, i) => b !== prev.betas[i])
const autoModeChanged = autoModeActive !== prev.autoModeActive
const overageChanged = isUsingOverage !== prev.isUsingOverage
const cachedMCChanged = cachedMCEnabled !== prev.cachedMCEnabled
const effortChanged = effortStr !== prev.effortValue
const extraBodyChanged = extraBodyHash !== prev.extraBodyHash
if (
systemPromptChanged ||
toolSchemasChanged ||
modelChanged ||
fastModeChanged ||
cacheControlChanged ||
globalCacheStrategyChanged ||
betasChanged ||
autoModeChanged ||
overageChanged ||
cachedMCChanged ||
effortChanged ||
extraBodyChanged
) {
const prevToolSet = new Set(prev.toolNames)
const newToolSet = new Set(toolNames)
const prevBetaSet = new Set(prev.betas)
const newBetaSet = new Set(sortedBetas)
const addedTools = toolNames.filter(n => !prevToolSet.has(n))
const removedTools = prev.toolNames.filter(n => !newToolSet.has(n))
const changedToolSchemas: string[] = []
if (toolSchemasChanged) {
const newHashes = computeToolHashes()
for (const name of toolNames) {
if (!prevToolSet.has(name)) continue
if (newHashes[name] !== prev.perToolHashes[name]) {
changedToolSchemas.push(name)
}
}
prev.perToolHashes = newHashes
}
prev.pendingChanges = {
systemPromptChanged,
toolSchemasChanged,
modelChanged,
fastModeChanged,
cacheControlChanged,
globalCacheStrategyChanged,
betasChanged,
autoModeChanged,
overageChanged,
cachedMCChanged,
effortChanged,
extraBodyChanged,
addedToolCount: addedTools.length,
removedToolCount: removedTools.length,
addedTools,
removedTools,
changedToolSchemas,
systemCharDelta: systemCharCount - prev.systemCharCount,
previousModel: prev.model,
newModel: model,
prevGlobalCacheStrategy: prev.globalCacheStrategy,
newGlobalCacheStrategy: globalCacheStrategy,
addedBetas: sortedBetas.filter(b => !prevBetaSet.has(b)),
removedBetas: prev.betas.filter(b => !newBetaSet.has(b)),
prevEffortValue: prev.effortValue,
newEffortValue: effortStr,
buildPrevDiffableContent: prev.buildDiffableContent,
}
} else {
prev.pendingChanges = null
}
prev.systemHash = systemHash
prev.toolsHash = toolsHash
prev.cacheControlHash = cacheControlHash
prev.toolNames = toolNames
prev.systemCharCount = systemCharCount
prev.model = model
prev.fastMode = isFastMode
prev.globalCacheStrategy = globalCacheStrategy
prev.betas = sortedBetas
prev.autoModeActive = autoModeActive
prev.isUsingOverage = isUsingOverage
prev.cachedMCEnabled = cachedMCEnabled
prev.effortValue = effortStr
prev.extraBodyHash = extraBodyHash
prev.buildDiffableContent = lazyDiffableContent
} catch (e: unknown) {
logError(e)
}
}
/**
* Phase 2 (post-call): Check the API response's cache tokens to determine
* if a cache break actually occurred. If it did, use the pending changes
* from phase 1 to explain why.
*/
export async function checkResponseForCacheBreak(
querySource: QuerySource,
cacheReadTokens: number,
cacheCreationTokens: number,
messages: Message[],
agentId?: AgentId,
requestId?: string | null,
): Promise<void> {
try {
const key = getTrackingKey(querySource, agentId)
if (!key) return
const state = previousStateBySource.get(key)
if (!state) return
// Skip excluded models (e.g., haiku has different caching behavior)
if (isExcludedModel(state.model)) return
const prevCacheRead = state.prevCacheReadTokens
state.prevCacheReadTokens = cacheReadTokens
// Calculate time since last call for TTL detection by finding the most recent
// assistant message timestamp in the messages array (before the current response)
const lastAssistantMessage = messages.findLast(m => m.type === 'assistant')
const timeSinceLastAssistantMsg = lastAssistantMessage
? Date.now() - new Date(lastAssistantMessage.timestamp).getTime()
: null
// Skip the first call β no previous value to compare against
if (prevCacheRead === null) return
const changes = state.pendingChanges
// Cache deletions via cached microcompact intentionally reduce the cached
// prefix. The drop in cache read tokens is expected β reset the baseline
// so we don't false-positive on the next call.
if (state.cacheDeletionsPending) {
state.cacheDeletionsPending = false
logForDebugging(
`[PROMPT CACHE] cache deletion applied, cache read: ${prevCacheRead} β ${cacheReadTokens} (expected drop)`,
)
// Don't flag as a break β the remaining state is still valid
state.pendingChanges = null
return
}
// Detect a cache break: cache read dropped >5% from previous AND
// the absolute drop exceeds the minimum threshold.
const tokenDrop = prevCacheRead - cacheReadTokens
if (
cacheReadTokens >= prevCacheRead * 0.95 ||
tokenDrop < MIN_CACHE_MISS_TOKENS
) {
state.pendingChanges = null
return
}
// Build explanation from pending changes (if any)
const parts: string[] = []
if (changes) {
if (changes.modelChanged) {
parts.push(
`model changed (${changes.previousModel} β ${changes.newModel})`,
)
}
if (changes.systemPromptChanged) {
const charDelta = changes.systemCharDelta
const charInfo =
charDelta === 0
? ''
: charDelta > 0
? ` (+${charDelta} chars)`
: ` (${charDelta} chars)`
parts.push(`system prompt changed${charInfo}`)
}
if (changes.toolSchemasChanged) {
const toolDiff =
changes.addedToolCount > 0 || changes.removedToolCount > 0
? ` (+${changes.addedToolCount}/-${changes.removedToolCount} tools)`
: ' (tool prompt/schema changed, same tool set)'
parts.push(`tools changed${toolDiff}`)
}
if (changes.fastModeChanged) {
parts.push('fast mode toggled')
}
if (changes.globalCacheStrategyChanged) {
parts.push(
`global cache strategy changed (${changes.prevGlobalCacheStrategy || 'none'} β ${changes.newGlobalCacheStrategy || 'none'})`,
)
}
if (
changes.cacheControlChanged &&
!changes.globalCacheStrategyChanged &&
!changes.systemPromptChanged
) {
// Only report as standalone cause if nothing else explains it β
// otherwise the scope/TTL flip is a consequence, not the root cause.
parts.push('cache_control changed (scope or TTL)')
}
if (changes.betasChanged) {
const added = changes.addedBetas.length
? `+${changes.addedBetas.join(',')}`
: ''
const removed = changes.removedBetas.length
? `-${changes.removedBetas.join(',')}`
: ''
const diff = [added, removed].filter(Boolean).join(' ')
parts.push(`betas changed${diff ? ` (${diff})` : ''}`)
}
if (changes.autoModeChanged) {
parts.push('auto mode toggled')
}
if (changes.overageChanged) {
parts.push('overage state changed (TTL latched, no flip)')
}
if (changes.cachedMCChanged) {
parts.push('cached microcompact toggled')
}
if (changes.effortChanged) {
parts.push(
`effort changed (${changes.prevEffortValue || 'default'} β ${changes.newEffortValue || 'default'})`,
)
}
if (changes.extraBodyChanged) {
parts.push('extra body params changed')
}
}
// Check if time gap suggests TTL expiration
const lastAssistantMsgOver5minAgo =
timeSinceLastAssistantMsg !== null &&
timeSinceLastAssistantMsg > CACHE_TTL_5MIN_MS
const lastAssistantMsgOver1hAgo =
timeSinceLastAssistantMsg !== null &&
timeSinceLastAssistantMsg > CACHE_TTL_1HOUR_MS
// Post PR #19823 BQ analysis (bq-queries/prompt-caching/cache_break_pr19823_analysis.sql):
// when all client-side flags are false and the gap is under TTL, ~90% of breaks
// are server-side routing/eviction or billed/inference disagreement. Label
// accordingly instead of implying a CC bug hunt.
let reason: string
if (parts.length > 0) {
reason = parts.join(', ')
} else if (lastAssistantMsgOver1hAgo) {
reason = 'possible 1h TTL expiry (prompt unchanged)'
} else if (lastAssistantMsgOver5minAgo) {
reason = 'possible 5min TTL expiry (prompt unchanged)'
} else if (timeSinceLastAssistantMsg !== null) {
reason = 'likely server-side (prompt unchanged, <5min gap)'
} else {
reason = 'unknown cause'
}
logEvent('tengu_prompt_cache_break', {
systemPromptChanged: changes?.systemPromptChanged ?? false,
toolSchemasChanged: changes?.toolSchemasChanged ?? false,
modelChanged: changes?.modelChanged ?? false,
fastModeChanged: changes?.fastModeChanged ?? false,
cacheControlChanged: changes?.cacheControlChanged ?? false,
globalCacheStrategyChanged: changes?.globalCacheStrategyChanged ?? false,
betasChanged: changes?.betasChanged ?? false,
autoModeChanged: changes?.autoModeChanged ?? false,
overageChanged: changes?.overageChanged ?? false,
cachedMCChanged: changes?.cachedMCChanged ?? false,
effortChanged: changes?.effortChanged ?? false,
extraBodyChanged: changes?.extraBodyChanged ?? false,
addedToolCount: changes?.addedToolCount ?? 0,
removedToolCount: changes?.removedToolCount ?? 0,
systemCharDelta: changes?.systemCharDelta ?? 0,
// Tool names are sanitized: built-in names are a fixed vocabulary,
// MCP tools collapse to 'mcp' (user-configured, could leak paths).
addedTools: (changes?.addedTools ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
removedTools: (changes?.removedTools ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
changedToolSchemas: (changes?.changedToolSchemas ?? [])
.map(sanitizeToolName)
.join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
// Beta header names and cache strategy are fixed enum-like values,
// not code or filepaths. requestId is an opaque server-generated ID.
addedBetas: (changes?.addedBetas ?? []).join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
removedBetas: (changes?.removedBetas ?? []).join(
',',
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
prevGlobalCacheStrategy: (changes?.prevGlobalCacheStrategy ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
newGlobalCacheStrategy: (changes?.newGlobalCacheStrategy ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
callNumber: state.callCount,
prevCacheReadTokens: prevCacheRead,
cacheReadTokens,
cacheCreationTokens,
timeSinceLastAssistantMsg: timeSinceLastAssistantMsg ?? -1,
lastAssistantMsgOver5minAgo,
lastAssistantMsgOver1hAgo,
requestId: (requestId ??
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
// Write diff file for ant debugging via --debug. The path is included in
// the summary log so ants can find it (DevBar UI removed β event data
// flows reliably to BQ for analytics).
let diffPath: string | undefined
if (changes?.buildPrevDiffableContent) {
diffPath = await writeCacheBreakDiff(
changes.buildPrevDiffableContent(),
state.buildDiffableContent(),
)
}
const diffSuffix = diffPath ? `, diff: ${diffPath}` : ''
const summary = `[PROMPT CACHE BREAK] ${reason} [source=${querySource}, call #${state.callCount}, cache read: ${prevCacheRead} β ${cacheReadTokens}, creation: ${cacheCreationTokens}${diffSuffix}]`
logForDebugging(summary, { level: 'warn' })
state.pendingChanges = null
} catch (e: unknown) {
logError(e)
}
}
/**
* Call when cached microcompact sends cache_edits deletions.
* The next API response will have lower cache read tokens β that's
* expected, not a cache break.
*/
export function notifyCacheDeletion(
querySource: QuerySource,
agentId?: AgentId,
): void {
const key = getTrackingKey(querySource, agentId)
const state = key ? previousStateBySource.get(key) : undefined
if (state) {
state.cacheDeletionsPending = true
}
}
/**
* Call after compaction to reset the cache read baseline.
* Compaction legitimately reduces message count, so cache read tokens
* will naturally drop on the next call β that's not a break.
*/
export function notifyCompaction(
querySource: QuerySource,
agentId?: AgentId,
): void {
const key = getTrackingKey(querySource, agentId)
const state = key ? previousStateBySource.get(key) : undefined
if (state) {
state.prevCacheReadTokens = null
}
}
export function cleanupAgentTracking(agentId: AgentId): void {
previousStateBySource.delete(agentId)
}
export function resetPromptCacheBreakDetection(): void {
previousStateBySource.clear()
}
async function writeCacheBreakDiff(
prevContent: string,
newContent: string,
): Promise<string | undefined> {
try {
const diffPath = getCacheBreakDiffPath()
await mkdir(getClaudeTempDir(), { recursive: true })
const patch = createPatch(
'prompt-state',
prevContent,
newContent,
'before',
'after',
)
await writeFile(diffPath, patch)
return diffPath
} catch {
return undefined
}
}