π File detail
services/api/withRetry.ts
π― Use case
This file lives under βservices/β, which covers long-lived services (LSP, MCP, OAuth, tool execution, memory, compaction, voice, settings sync, β¦). On the API surface it exposes BASE_DELAY_MS, RetryContext, CannotRetryError, FallbackTriggeredError, and getRetryDelay (and more) β mainly functions, hooks, or classes. Dependencies touch bun:bundle, @anthropic-ai, and src. It composes internal code from utils, analytics, rateLimitMocking, errors, and errorUtils (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import { feature } from 'bun:bundle' import type Anthropic from '@anthropic-ai/sdk' import { APIConnectionError, APIError,
π€ Exports (heuristic)
BASE_DELAY_MSRetryContextCannotRetryErrorFallbackTriggeredErrorgetRetryDelayparseMaxTokensContextOverflowErroris529ErrorgetDefaultMaxRetries
π External import roots
Package roots from from "β¦" (relative paths omitted).
bun:bundle@anthropic-aisrc
π₯οΈ Source preview
import { feature } from 'bun:bundle'
import type Anthropic from '@anthropic-ai/sdk'
import {
APIConnectionError,
APIError,
APIUserAbortError,
} from '@anthropic-ai/sdk'
import type { QuerySource } from 'src/constants/querySource.js'
import type { SystemAPIErrorMessage } from 'src/types/message.js'
import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
import { logForDebugging } from 'src/utils/debug.js'
import { logError } from 'src/utils/log.js'
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
import {
clearApiKeyHelperCache,
clearAwsCredentialsCache,
clearGcpCredentialsCache,
getClaudeAIOAuthTokens,
handleOAuth401Error,
isClaudeAISubscriber,
isEnterpriseSubscriber,
} from '../../utils/auth.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import { errorMessage } from '../../utils/errors.js'
import {
type CooldownReason,
handleFastModeOverageRejection,
handleFastModeRejectedByAPI,
isFastModeCooldown,
isFastModeEnabled,
triggerFastModeCooldown,
} from '../../utils/fastMode.js'
import { isNonCustomOpusModel } from '../../utils/model/model.js'
import { disableKeepAlive } from '../../utils/proxy.js'
import { sleep } from '../../utils/sleep.js'
import type { ThinkingConfig } from '../../utils/thinking.js'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../analytics/index.js'
import {
checkMockRateLimitError,
isMockRateLimitError,
} from '../rateLimitMocking.js'
import { REPEATED_529_ERROR_MESSAGE } from './errors.js'
import { extractConnectionErrorDetails } from './errorUtils.js'
const abortError = () => new APIUserAbortError()
const DEFAULT_MAX_RETRIES = 10
const FLOOR_OUTPUT_TOKENS = 3000
const MAX_529_RETRIES = 3
export const BASE_DELAY_MS = 500
// Foreground query sources where the user IS blocking on the result β these
// retry on 529. Everything else (summaries, titles, suggestions, classifiers)
// bails immediately: during a capacity cascade each retry is 3-10Γ gateway
// amplification, and the user never sees those fail anyway. New sources
// default to no-retry β add here only if the user is waiting on the result.
const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([
'repl_main_thread',
'repl_main_thread:outputStyle:custom',
'repl_main_thread:outputStyle:Explanatory',
'repl_main_thread:outputStyle:Learning',
'sdk',
'agent:custom',
'agent:default',
'agent:builtin',
'compact',
'hook_agent',
'hook_prompt',
'verification_agent',
'side_question',
// Security classifiers β must complete for auto-mode correctness.
// yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' β that's
// type-only). bash_classifier is ant-only; feature-gate so the string
// tree-shakes out of external builds (excluded-strings.txt).
'auto_mode',
...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []),
])
function shouldRetry529(querySource: QuerySource | undefined): boolean {
// undefined β retry (conservative for untagged call paths)
return (
querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource)
)
}
// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529
// indefinitely with higher backoff and periodic keep-alive yields so the host
// environment does not mark the session idle mid-wait.
// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap
// until there's a dedicated keep-alive channel.
const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000
const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000
const HEARTBEAT_INTERVAL_MS = 30_000
function isPersistentRetryEnabled(): boolean {
return feature('UNATTENDED_RETRY')
? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY)
: false
}
function isTransientCapacityError(error: unknown): boolean {
return (
is529Error(error) || (error instanceof APIError && error.status === 429)
)
}
function isStaleConnectionError(error: unknown): boolean {
if (!(error instanceof APIConnectionError)) {
return false
}
const details = extractConnectionErrorDetails(error)
return details?.code === 'ECONNRESET' || details?.code === 'EPIPE'
}
export interface RetryContext {
maxTokensOverride?: number
model: string
thinkingConfig: ThinkingConfig
fastMode?: boolean
}
interface RetryOptions {
maxRetries?: number
model: string
fallbackModel?: string
thinkingConfig: ThinkingConfig
fastMode?: boolean
signal?: AbortSignal
querySource?: QuerySource
/**
* Pre-seed the consecutive 529 counter. Used when this retry loop is a
* non-streaming fallback after a streaming 529 β the streaming 529 should
* count toward MAX_529_RETRIES so total 529s-before-fallback is consistent
* regardless of which request mode hit the overload.
*/
initialConsecutive529Errors?: number
}
export class CannotRetryError extends Error {
constructor(
public readonly originalError: unknown,
public readonly retryContext: RetryContext,
) {
const message = errorMessage(originalError)
super(message)
this.name = 'RetryError'
// Preserve the original stack trace if available
if (originalError instanceof Error && originalError.stack) {
this.stack = originalError.stack
}
}
}
export class FallbackTriggeredError extends Error {
constructor(
public readonly originalModel: string,
public readonly fallbackModel: string,
) {
super(`Model fallback triggered: ${originalModel} -> ${fallbackModel}`)
this.name = 'FallbackTriggeredError'
}
}
export async function* withRetry<T>(
getClient: () => Promise<Anthropic>,
operation: (
client: Anthropic,
attempt: number,
context: RetryContext,
) => Promise<T>,
options: RetryOptions,
): AsyncGenerator<SystemAPIErrorMessage, T> {
const maxRetries = getMaxRetries(options)
const retryContext: RetryContext = {
model: options.model,
thinkingConfig: options.thinkingConfig,
...(isFastModeEnabled() && { fastMode: options.fastMode }),
}
let client: Anthropic | null = null
let consecutive529Errors = options.initialConsecutive529Errors ?? 0
let lastError: unknown
let persistentAttempt = 0
for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
if (options.signal?.aborted) {
throw new APIUserAbortError()
}
// Capture whether fast mode is active before this attempt
// (fallback may change the state mid-loop)
const wasFastModeActive = isFastModeEnabled()
? retryContext.fastMode && !isFastModeCooldown()
: false
try {
// Check for mock rate limits (used by /mock-limits command for Ant employees)
if (process.env.USER_TYPE === 'ant') {
const mockError = checkMockRateLimitError(
retryContext.model,
wasFastModeActive,
)
if (mockError) {
throw mockError
}
}
// Get a fresh client instance on first attempt or after authentication errors
// - 401 for first-party API authentication failures
// - 403 "OAuth token has been revoked" (another process refreshed the token)
// - Bedrock-specific auth errors (403 or CredentialsProviderError)
// - Vertex-specific auth errors (credential refresh failures, 401)
// - ECONNRESET/EPIPE: stale keep-alive socket; disable pooling and reconnect
const isStaleConnection = isStaleConnectionError(lastError)
if (
isStaleConnection &&
getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_disable_keepalive_on_econnreset',
false,
)
) {
logForDebugging(
'Stale connection (ECONNRESET/EPIPE) β disabling keep-alive for retry',
)
disableKeepAlive()
}
if (
client === null ||
(lastError instanceof APIError && lastError.status === 401) ||
isOAuthTokenRevokedError(lastError) ||
isBedrockAuthError(lastError) ||
isVertexAuthError(lastError) ||
isStaleConnection
) {
// On 401 "token expired" or 403 "token revoked", force a token refresh
if (
(lastError instanceof APIError && lastError.status === 401) ||
isOAuthTokenRevokedError(lastError)
) {
const failedAccessToken = getClaudeAIOAuthTokens()?.accessToken
if (failedAccessToken) {
await handleOAuth401Error(failedAccessToken)
}
}
client = await getClient()
}
return await operation(client, attempt, retryContext)
} catch (error) {
lastError = error
logForDebugging(
`API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`,
{ level: 'error' },
)
// Fast mode fallback: on 429/529, either wait and retry (short delays)
// or fall back to standard speed (long delays) to avoid cache thrashing.
// Skip in persistent mode: the short-retry path below loops with fast
// mode still active, so its `continue` never reaches the attempt clamp
// and the for-loop terminates. Persistent sessions want the chunked
// keep-alive path instead of fast-mode cache-preservation anyway.
if (
wasFastModeActive &&
!isPersistentRetryEnabled() &&
error instanceof APIError &&
(error.status === 429 || is529Error(error))
) {
// If the 429 is specifically because extra usage (overage) is not
// available, permanently disable fast mode with a specific message.
const overageReason = error.headers?.get(
'anthropic-ratelimit-unified-overage-disabled-reason',
)
if (overageReason !== null && overageReason !== undefined) {
handleFastModeOverageRejection(overageReason)
retryContext.fastMode = false
continue
}
const retryAfterMs = getRetryAfterMs(error)
if (retryAfterMs !== null && retryAfterMs < SHORT_RETRY_THRESHOLD_MS) {
// Short retry-after: wait and retry with fast mode still active
// to preserve prompt cache (same model name on retry).
await sleep(retryAfterMs, options.signal, { abortError })
continue
}
// Long or unknown retry-after: enter cooldown (switches to standard
// speed model), with a minimum floor to avoid flip-flopping.
const cooldownMs = Math.max(
retryAfterMs ?? DEFAULT_FAST_MODE_FALLBACK_HOLD_MS,
MIN_COOLDOWN_MS,
)
const cooldownReason: CooldownReason = is529Error(error)
? 'overloaded'
: 'rate_limit'
triggerFastModeCooldown(Date.now() + cooldownMs, cooldownReason)
if (isFastModeEnabled()) {
retryContext.fastMode = false
}
continue
}
// Fast mode fallback: if the API rejects the fast mode parameter
// (e.g., org doesn't have fast mode enabled), permanently disable fast
// mode and retry at standard speed.
if (wasFastModeActive && isFastModeNotEnabledError(error)) {
handleFastModeRejectedByAPI()
retryContext.fastMode = false
continue
}
// Non-foreground sources bail immediately on 529 β no retry amplification
// during capacity cascades. User never sees these fail.
if (is529Error(error) && !shouldRetry529(options.querySource)) {
logEvent('tengu_api_529_background_dropped', {
query_source:
options.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
throw new CannotRetryError(error, retryContext)
}
// Track consecutive 529 errors
if (
is529Error(error) &&
// If FALLBACK_FOR_ALL_PRIMARY_MODELS is not set, fall through only if the primary model is a non-custom Opus model.
// TODO: Revisit if the isNonCustomOpusModel check should still exist, or if isNonCustomOpusModel is a stale artifact of when Claude Code was hardcoded on Opus.
(process.env.FALLBACK_FOR_ALL_PRIMARY_MODELS ||
(!isClaudeAISubscriber() && isNonCustomOpusModel(options.model)))
) {
consecutive529Errors++
if (consecutive529Errors >= MAX_529_RETRIES) {
// Check if fallback model is specified
if (options.fallbackModel) {
logEvent('tengu_api_opus_fallback_triggered', {
original_model:
options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
fallback_model:
options.fallbackModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
provider: getAPIProviderForStatsig(),
})
// Throw special error to indicate fallback was triggered
throw new FallbackTriggeredError(
options.model,
options.fallbackModel,
)
}
if (
process.env.USER_TYPE === 'external' &&
!process.env.IS_SANDBOX &&
!isPersistentRetryEnabled()
) {
logEvent('tengu_api_custom_529_overloaded_error', {})
throw new CannotRetryError(
new Error(REPEATED_529_ERROR_MESSAGE),
retryContext,
)
}
}
}
// Only retry if the error indicates we should
const persistent =
isPersistentRetryEnabled() && isTransientCapacityError(error)
if (attempt > maxRetries && !persistent) {
throw new CannotRetryError(error, retryContext)
}
// AWS/GCP errors aren't always APIError, but can be retried
const handledCloudAuthError =
handleAwsCredentialError(error) || handleGcpCredentialError(error)
if (
!handledCloudAuthError &&
(!(error instanceof APIError) || !shouldRetry(error))
) {
throw new CannotRetryError(error, retryContext)
}
// Handle max tokens context overflow errors by adjusting max_tokens for the next attempt
// NOTE: With extended-context-window beta, this 400 error should not occur.
// The API now returns 'model_context_window_exceeded' stop_reason instead.
// Keeping for backward compatibility.
if (error instanceof APIError) {
const overflowData = parseMaxTokensContextOverflowError(error)
if (overflowData) {
const { inputTokens, contextLimit } = overflowData
const safetyBuffer = 1000
const availableContext = Math.max(
0,
contextLimit - inputTokens - safetyBuffer,
)
if (availableContext < FLOOR_OUTPUT_TOKENS) {
logError(
new Error(
`availableContext ${availableContext} is less than FLOOR_OUTPUT_TOKENS ${FLOOR_OUTPUT_TOKENS}`,
),
)
throw error
}
// Ensure we have enough tokens for thinking + at least 1 output token
const minRequired =
(retryContext.thinkingConfig.type === 'enabled'
? retryContext.thinkingConfig.budgetTokens
: 0) + 1
const adjustedMaxTokens = Math.max(
FLOOR_OUTPUT_TOKENS,
availableContext,
minRequired,
)
retryContext.maxTokensOverride = adjustedMaxTokens
logEvent('tengu_max_tokens_context_overflow_adjustment', {
inputTokens,
contextLimit,
adjustedMaxTokens,
attempt,
})
continue
}
}
// For other errors, proceed with normal retry logic
// Get retry-after header if available
const retryAfter = getRetryAfter(error)
let delayMs: number
if (persistent && error instanceof APIError && error.status === 429) {
persistentAttempt++
// Window-based limits (e.g. 5hr Max/Pro) include a reset timestamp.
// Wait until reset rather than polling every 5 min uselessly.
const resetDelay = getRateLimitResetDelayMs(error)
delayMs =
resetDelay ??
Math.min(
getRetryDelay(
persistentAttempt,
retryAfter,
PERSISTENT_MAX_BACKOFF_MS,
),
PERSISTENT_RESET_CAP_MS,
)
} else if (persistent) {
persistentAttempt++
// Retry-After is a server directive and bypasses maxDelayMs inside
// getRetryDelay (intentional β honoring it is correct). Cap at the
// 6hr reset-cap here so a pathological header can't wait unbounded.
delayMs = Math.min(
getRetryDelay(
persistentAttempt,
retryAfter,
PERSISTENT_MAX_BACKOFF_MS,
),
PERSISTENT_RESET_CAP_MS,
)
} else {
delayMs = getRetryDelay(attempt, retryAfter)
}
// In persistent mode the for-loop `attempt` is clamped at maxRetries+1;
// use persistentAttempt for telemetry/yields so they show the true count.
const reportedAttempt = persistent ? persistentAttempt : attempt
logEvent('tengu_api_retry', {
attempt: reportedAttempt,
delayMs: delayMs,
error: (error as APIError)
.message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
status: (error as APIError).status,
provider: getAPIProviderForStatsig(),
})
if (persistent) {
if (delayMs > 60_000) {
logEvent('tengu_api_persistent_retry_wait', {
status: (error as APIError).status,
delayMs,
attempt: reportedAttempt,
provider: getAPIProviderForStatsig(),
})
}
// Chunk long sleeps so the host sees periodic stdout activity and
// does not mark the session idle. Each yield surfaces as
// {type:'system', subtype:'api_retry'} on stdout via QueryEngine.
let remaining = delayMs
while (remaining > 0) {
if (options.signal?.aborted) throw new APIUserAbortError()
if (error instanceof APIError) {
yield createSystemAPIErrorMessage(
error,
remaining,
reportedAttempt,
maxRetries,
)
}
const chunk = Math.min(remaining, HEARTBEAT_INTERVAL_MS)
await sleep(chunk, options.signal, { abortError })
remaining -= chunk
}
// Clamp so the for-loop never terminates. Backoff uses the separate
// persistentAttempt counter which keeps growing to the 5-min cap.
if (attempt >= maxRetries) attempt = maxRetries
} else {
if (error instanceof APIError) {
yield createSystemAPIErrorMessage(error, delayMs, attempt, maxRetries)
}
await sleep(delayMs, options.signal, { abortError })
}
}
}
throw new CannotRetryError(lastError, retryContext)
}
function getRetryAfter(error: unknown): string | null {
return (
((error as { headers?: { 'retry-after'?: string } }).headers?.[
'retry-after'
] ||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
((error as APIError).headers as Headers)?.get?.('retry-after')) ??
null
)
}
export function getRetryDelay(
attempt: number,
retryAfterHeader?: string | null,
maxDelayMs = 32000,
): number {
if (retryAfterHeader) {
const seconds = parseInt(retryAfterHeader, 10)
if (!isNaN(seconds)) {
return seconds * 1000
}
}
const baseDelay = Math.min(
BASE_DELAY_MS * Math.pow(2, attempt - 1),
maxDelayMs,
)
const jitter = Math.random() * 0.25 * baseDelay
return baseDelay + jitter
}
export function parseMaxTokensContextOverflowError(error: APIError):
| {
inputTokens: number
maxTokens: number
contextLimit: number
}
| undefined {
if (error.status !== 400 || !error.message) {
return undefined
}
if (
!error.message.includes(
'input length and `max_tokens` exceed context limit',
)
) {
return undefined
}
// Example format: "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000"
const regex =
/input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)/
const match = error.message.match(regex)
if (!match || match.length !== 4) {
return undefined
}
if (!match[1] || !match[2] || !match[3]) {
logError(
new Error(
'Unable to parse max_tokens from max_tokens exceed context limit error message',
),
)
return undefined
}
const inputTokens = parseInt(match[1], 10)
const maxTokens = parseInt(match[2], 10)
const contextLimit = parseInt(match[3], 10)
if (isNaN(inputTokens) || isNaN(maxTokens) || isNaN(contextLimit)) {
return undefined
}
return { inputTokens, maxTokens, contextLimit }
}
// TODO: Replace with a response header check once the API adds a dedicated
// header for fast-mode rejection (e.g., x-fast-mode-rejected). String-matching
// the error message is fragile and will break if the API wording changes.
function isFastModeNotEnabledError(error: unknown): boolean {
if (!(error instanceof APIError)) {
return false
}
return (
error.status === 400 &&
(error.message?.includes('Fast mode is not enabled') ?? false)
)
}
export function is529Error(error: unknown): boolean {
if (!(error instanceof APIError)) {
return false
}
// Check for 529 status code or overloaded error in message
return (
error.status === 529 ||
// See below: the SDK sometimes fails to properly pass the 529 status code during streaming
(error.message?.includes('"type":"overloaded_error"') ?? false)
)
}
function isOAuthTokenRevokedError(error: unknown): boolean {
return (
error instanceof APIError &&
error.status === 403 &&
(error.message?.includes('OAuth token has been revoked') ?? false)
)
}
function isBedrockAuthError(error: unknown): boolean {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
// AWS libs reject without an API call if .aws holds a past Expiration value
// otherwise, API calls that receive expired tokens give generic 403
// "The security token included in the request is invalid"
if (
isAwsCredentialsProviderError(error) ||
(error instanceof APIError && error.status === 403)
) {
return true
}
}
return false
}
/**
* Clear AWS auth caches if appropriate.
* @returns true if action was taken.
*/
function handleAwsCredentialError(error: unknown): boolean {
if (isBedrockAuthError(error)) {
clearAwsCredentialsCache()
return true
}
return false
}
// google-auth-library throws plain Error (no typed name like AWS's
// CredentialsProviderError). Match common SDK-level credential-failure messages.
function isGoogleAuthLibraryCredentialError(error: unknown): boolean {
if (!(error instanceof Error)) return false
const msg = error.message
return (
msg.includes('Could not load the default credentials') ||
msg.includes('Could not refresh access token') ||
msg.includes('invalid_grant')
)
}
function isVertexAuthError(error: unknown): boolean {
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) {
// SDK-level: google-auth-library fails in prepareOptions() before the HTTP call
if (isGoogleAuthLibraryCredentialError(error)) {
return true
}
// Server-side: Vertex returns 401 for expired/invalid tokens
if (error instanceof APIError && error.status === 401) {
return true
}
}
return false
}
/**
* Clear GCP auth caches if appropriate.
* @returns true if action was taken.
*/
function handleGcpCredentialError(error: unknown): boolean {
if (isVertexAuthError(error)) {
clearGcpCredentialsCache()
return true
}
return false
}
function shouldRetry(error: APIError): boolean {
// Never retry mock errors - they're from /mock-limits command for testing
if (isMockRateLimitError(error)) {
return false
}
// Persistent mode: 429/529 always retryable, bypass subscriber gates and
// x-should-retry header.
if (isPersistentRetryEnabled() && isTransientCapacityError(error)) {
return true
}
// CCR mode: auth is via infrastructure-provided JWTs, so a 401/403 is a
// transient blip (auth service flap, network hiccup) rather than bad
// credentials. Bypass x-should-retry:false β the server assumes we'd retry
// the same bad key, but our key is fine.
if (
isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) &&
(error.status === 401 || error.status === 403)
) {
return true
}
// Check for overloaded errors first by examining the message content
// The SDK sometimes fails to properly pass the 529 status code during streaming,
// so we need to check the error message directly
if (error.message?.includes('"type":"overloaded_error"')) {
return true
}
// Check for max tokens context overflow errors that we can handle
if (parseMaxTokensContextOverflowError(error)) {
return true
}
// Note this is not a standard header.
const shouldRetryHeader = error.headers?.get('x-should-retry')
// If the server explicitly says whether or not to retry, obey.
// For Max and Pro users, should-retry is true, but in several hours, so we shouldn't.
// Enterprise users can retry because they typically use PAYG instead of rate limits.
if (
shouldRetryHeader === 'true' &&
(!isClaudeAISubscriber() || isEnterpriseSubscriber())
) {
return true
}
// Ants can ignore x-should-retry: false for 5xx server errors only.
// For other status codes (401, 403, 400, 429, etc.), respect the header.
if (shouldRetryHeader === 'false') {
const is5xxError = error.status !== undefined && error.status >= 500
if (!(process.env.USER_TYPE === 'ant' && is5xxError)) {
return false
}
}
if (error instanceof APIConnectionError) {
return true
}
if (!error.status) return false
// Retry on request timeouts.
if (error.status === 408) return true
// Retry on lock timeouts.
if (error.status === 409) return true
// Retry on rate limits, but not for ClaudeAI Subscription users
// Enterprise users can retry because they typically use PAYG instead of rate limits
if (error.status === 429) {
return !isClaudeAISubscriber() || isEnterpriseSubscriber()
}
// Clear API key cache on 401 and allow retry.
// OAuth token handling is done in the main retry loop via handleOAuth401Error.
if (error.status === 401) {
clearApiKeyHelperCache()
return true
}
// Retry on 403 "token revoked" (same refresh logic as 401, see above)
if (isOAuthTokenRevokedError(error)) {
return true
}
// Retry internal errors.
if (error.status && error.status >= 500) return true
return false
}
export function getDefaultMaxRetries(): number {
if (process.env.CLAUDE_CODE_MAX_RETRIES) {
return parseInt(process.env.CLAUDE_CODE_MAX_RETRIES, 10)
}
return DEFAULT_MAX_RETRIES
}
function getMaxRetries(options: RetryOptions): number {
return options.maxRetries ?? getDefaultMaxRetries()
}
const DEFAULT_FAST_MODE_FALLBACK_HOLD_MS = 30 * 60 * 1000 // 30 minutes
const SHORT_RETRY_THRESHOLD_MS = 20 * 1000 // 20 seconds
const MIN_COOLDOWN_MS = 10 * 60 * 1000 // 10 minutes
function getRetryAfterMs(error: APIError): number | null {
const retryAfter = getRetryAfter(error)
if (retryAfter) {
const seconds = parseInt(retryAfter, 10)
if (!isNaN(seconds)) {
return seconds * 1000
}
}
return null
}
function getRateLimitResetDelayMs(error: APIError): number | null {
const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset')
if (!resetHeader) return null
const resetUnixSec = Number(resetHeader)
if (!Number.isFinite(resetUnixSec)) return null
const delayMs = resetUnixSec * 1000 - Date.now()
if (delayMs <= 0) return null
return Math.min(delayMs, PERSISTENT_RESET_CAP_MS)
}