π File detail
bridge/bridgeMain.ts
π― Use case
This file lives under βbridge/β, which covers the bridge between the UI/shell and the agent (IPC, REPL hooks, permissions, session glue). On the API surface it exposes BackoffConfig, runBridgeLoop, isConnectionError, isServerError, and ParsedArgs (and more) β mainly functions, hooks, or classes. Dependencies touch bun:bundle, crypto, Node OS/process metadata, and Node path helpers. It composes internal code from constants, services, utils, bridgeApi, and bridgeStatusUtil (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import { feature } from 'bun:bundle' import { randomUUID } from 'crypto' import { hostname, tmpdir } from 'os' import { basename, join, resolve } from 'path' import { getRemoteSessionUrl } from '../constants/product.js'
π€ Exports (heuristic)
BackoffConfigrunBridgeLoopisConnectionErrorisServerErrorParsedArgsparseArgsbridgeMainBridgeHeadlessPermanentErrorHeadlessBridgeOptsrunBridgeHeadless
π External import roots
Package roots from from "β¦" (relative paths omitted).
bun:bundlecryptoospath
π₯οΈ Source preview
import { feature } from 'bun:bundle'
import { randomUUID } from 'crypto'
import { hostname, tmpdir } from 'os'
import { basename, join, resolve } from 'path'
import { getRemoteSessionUrl } from '../constants/product.js'
import { shutdownDatadog } from '../services/analytics/datadog.js'
import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
import { checkGate_CACHED_OR_BLOCKING } from '../services/analytics/growthbook.js'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
logEventAsync,
} from '../services/analytics/index.js'
import { isInBundledMode } from '../utils/bundledMode.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
import { errorMessage } from '../utils/errors.js'
import { truncateToWidth } from '../utils/format.js'
import { logError } from '../utils/log.js'
import { sleep } from '../utils/sleep.js'
import { createAgentWorktree, removeAgentWorktree } from '../utils/worktree.js'
import {
BridgeFatalError,
createBridgeApiClient,
isExpiredErrorType,
isSuppressible403,
validateBridgeId,
} from './bridgeApi.js'
import { formatDuration } from './bridgeStatusUtil.js'
import { createBridgeLogger } from './bridgeUI.js'
import { createCapacityWake } from './capacityWake.js'
import { describeAxiosError } from './debugUtils.js'
import { createTokenRefreshScheduler } from './jwtUtils.js'
import { getPollIntervalConfig } from './pollConfig.js'
import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
import { getTrustedDeviceToken } from './trustedDevice.js'
import {
BRIDGE_LOGIN_ERROR,
type BridgeApiClient,
type BridgeConfig,
type BridgeLogger,
DEFAULT_SESSION_TIMEOUT_MS,
type SessionDoneStatus,
type SessionHandle,
type SessionSpawner,
type SessionSpawnOpts,
type SpawnMode,
} from './types.js'
import {
buildCCRv2SdkUrl,
buildSdkUrl,
decodeWorkSecret,
registerWorker,
sameSessionId,
} from './workSecret.js'
export type BackoffConfig = {
connInitialMs: number
connCapMs: number
connGiveUpMs: number
generalInitialMs: number
generalCapMs: number
generalGiveUpMs: number
/** SIGTERMβSIGKILL grace period on shutdown. Default 30s. */
shutdownGraceMs?: number
/** stopWorkWithRetry base delay (1s/2s/4s backoff). Default 1000ms. */
stopWorkBaseDelayMs?: number
}
const DEFAULT_BACKOFF: BackoffConfig = {
connInitialMs: 2_000,
connCapMs: 120_000, // 2 minutes
connGiveUpMs: 600_000, // 10 minutes
generalInitialMs: 500,
generalCapMs: 30_000,
generalGiveUpMs: 600_000, // 10 minutes
}
/** Status update interval for the live display (ms). */
const STATUS_UPDATE_INTERVAL_MS = 1_000
const SPAWN_SESSIONS_DEFAULT = 32
/**
* GrowthBook gate for multi-session spawn modes (--spawn / --capacity / --create-session-in-dir).
* Sibling of tengu_ccr_bridge_multi_environment (multiple envs per host:dir) β
* this one enables multiple sessions per environment.
* Rollout staged via targeting rules: ants first, then gradual external.
*
* Uses the blocking gate check so a stale disk-cache miss doesn't unfairly
* deny access. The fast path (cache has true) is still instant; only the
* cold-start path awaits the server fetch, and that fetch also seeds the
* disk cache for next time.
*/
async function isMultiSessionSpawnEnabled(): Promise<boolean> {
return checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge_multi_session')
}
/**
* Returns the threshold for detecting system sleep/wake in the poll loop.
* Must exceed the max backoff cap β otherwise normal backoff delays trigger
* false sleep detection (resetting the error budget indefinitely). Using
* 2Γ the connection backoff cap, matching the pattern in WebSocketTransport
* and replBridge.
*/
function pollSleepDetectionThresholdMs(backoff: BackoffConfig): number {
return backoff.connCapMs * 2
}
/**
* Returns the args that must precede CLI flags when spawning a child claude
* process. In compiled binaries, process.execPath is the claude binary itself
* and args go directly to it. In npm installs (node running cli.js),
* process.execPath is the node runtime β the child spawn must pass the script
* path as the first arg, otherwise node interprets --sdk-url as a node option
* and exits with "bad option: --sdk-url". See anthropics/claude-code#28334.
*/
function spawnScriptArgs(): string[] {
if (isInBundledMode() || !process.argv[1]) {
return []
}
return [process.argv[1]]
}
/** Attempt to spawn a session; returns error string if spawn throws. */
function safeSpawn(
spawner: SessionSpawner,
opts: SessionSpawnOpts,
dir: string,
): SessionHandle | string {
try {
return spawner.spawn(opts, dir)
} catch (err) {
const errMsg = errorMessage(err)
logError(new Error(`Session spawn failed: ${errMsg}`))
return errMsg
}
}
export async function runBridgeLoop(
config: BridgeConfig,
environmentId: string,
environmentSecret: string,
api: BridgeApiClient,
spawner: SessionSpawner,
logger: BridgeLogger,
signal: AbortSignal,
backoffConfig: BackoffConfig = DEFAULT_BACKOFF,
initialSessionId?: string,
getAccessToken?: () => string | undefined | Promise<string | undefined>,
): Promise<void> {
// Local abort controller so that onSessionDone can stop the poll loop.
// Linked to the incoming signal so external aborts also work.
const controller = new AbortController()
if (signal.aborted) {
controller.abort()
} else {
signal.addEventListener('abort', () => controller.abort(), { once: true })
}
const loopSignal = controller.signal
const activeSessions = new Map<string, SessionHandle>()
const sessionStartTimes = new Map<string, number>()
const sessionWorkIds = new Map<string, string>()
// Compat-surface ID (session_*) computed once at spawn and cached so
// cleanup and status-update ticks use the same key regardless of whether
// the tengu_bridge_repl_v2_cse_shim_enabled gate flips mid-session.
const sessionCompatIds = new Map<string, string>()
// Session ingress JWTs for heartbeat auth, keyed by sessionId.
// Stored separately from handle.accessToken because the token refresh
// scheduler overwrites that field with the OAuth token (~3h55m in).
const sessionIngressTokens = new Map<string, string>()
const sessionTimers = new Map<string, ReturnType<typeof setTimeout>>()
const completedWorkIds = new Set<string>()
const sessionWorktrees = new Map<
string,
{
worktreePath: string
worktreeBranch?: string
gitRoot?: string
hookBased?: boolean
}
>()
// Track sessions killed by the timeout watchdog so onSessionDone can
// distinguish them from server-initiated or shutdown interrupts.
const timedOutSessions = new Set<string>()
// Sessions that already have a title (server-set or bridge-derived) so
// onFirstUserMessage doesn't clobber a user-assigned --name / web rename.
// Keyed by compatSessionId to match logger.setSessionTitle's key.
const titledSessions = new Set<string>()
// Signal to wake the at-capacity sleep early when a session completes,
// so the bridge can immediately accept new work.
const capacityWake = createCapacityWake(loopSignal)
/**
* Heartbeat all active work items.
* Returns 'ok' if at least one heartbeat succeeded, 'auth_failed' if any
* got a 401/403 (JWT expired β re-queued via reconnectSession so the next
* poll delivers fresh work), or 'failed' if all failed for other reasons.
*/
async function heartbeatActiveWorkItems(): Promise<
'ok' | 'auth_failed' | 'fatal' | 'failed'
> {
let anySuccess = false
let anyFatal = false
const authFailedSessions: string[] = []
for (const [sessionId] of activeSessions) {
const workId = sessionWorkIds.get(sessionId)
const ingressToken = sessionIngressTokens.get(sessionId)
if (!workId || !ingressToken) {
continue
}
try {
await api.heartbeatWork(environmentId, workId, ingressToken)
anySuccess = true
} catch (err) {
logForDebugging(
`[bridge:heartbeat] Failed for sessionId=${sessionId} workId=${workId}: ${errorMessage(err)}`,
)
if (err instanceof BridgeFatalError) {
logEvent('tengu_bridge_heartbeat_error', {
status:
err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
error_type: (err.status === 401 || err.status === 403
? 'auth_failed'
: 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
if (err.status === 401 || err.status === 403) {
authFailedSessions.push(sessionId)
} else {
// 404/410 = environment expired or deleted β no point retrying
anyFatal = true
}
}
}
}
// JWT expired β trigger server-side re-dispatch. Without this, work stays
// ACK'd out of the Redis PEL and poll returns empty forever (CC-1263).
// The existingHandle path below delivers the fresh token to the child.
// sessionId is already in the format /bridge/reconnect expects: it comes
// from work.data.id, which matches the server's EnvironmentInstance store
// (cse_* under the compat gate, session_* otherwise).
for (const sessionId of authFailedSessions) {
logger.logVerbose(
`Session ${sessionId} token expired β re-queuing via bridge/reconnect`,
)
try {
await api.reconnectSession(environmentId, sessionId)
logForDebugging(
`[bridge:heartbeat] Re-queued sessionId=${sessionId} via bridge/reconnect`,
)
} catch (err) {
logger.logError(
`Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
)
logForDebugging(
`[bridge:heartbeat] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
{ level: 'error' },
)
}
}
if (anyFatal) {
return 'fatal'
}
if (authFailedSessions.length > 0) {
return 'auth_failed'
}
return anySuccess ? 'ok' : 'failed'
}
// Sessions spawned with CCR v2 env vars. v2 children cannot use OAuth
// tokens (CCR worker endpoints validate the JWT's session_id claim,
// register_worker.go:32), so onRefresh triggers server re-dispatch
// instead β the next poll delivers fresh work with a new JWT via the
// existingHandle path below.
const v2Sessions = new Set<string>()
// Proactive token refresh: schedules a timer 5min before the session
// ingress JWT expires. v1 delivers OAuth directly; v2 calls
// reconnectSession to trigger server re-dispatch (CC-1263: without
// this, v2 daemon sessions silently die at ~5h since the server does
// not auto-re-dispatch ACK'd work on lease expiry).
const tokenRefresh = getAccessToken
? createTokenRefreshScheduler({
getAccessToken,
onRefresh: (sessionId, oauthToken) => {
const handle = activeSessions.get(sessionId)
if (!handle) {
return
}
if (v2Sessions.has(sessionId)) {
logger.logVerbose(
`Refreshing session ${sessionId} token via bridge/reconnect`,
)
void api
.reconnectSession(environmentId, sessionId)
.catch((err: unknown) => {
logger.logError(
`Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
)
logForDebugging(
`[bridge:token] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
{ level: 'error' },
)
})
} else {
handle.updateAccessToken(oauthToken)
}
},
label: 'bridge',
})
: null
const loopStartTime = Date.now()
// Track all in-flight cleanup promises (stopWork, worktree removal) so
// the shutdown sequence can await them before process.exit().
const pendingCleanups = new Set<Promise<unknown>>()
function trackCleanup(p: Promise<unknown>): void {
pendingCleanups.add(p)
void p.finally(() => pendingCleanups.delete(p))
}
let connBackoff = 0
let generalBackoff = 0
let connErrorStart: number | null = null
let generalErrorStart: number | null = null
let lastPollErrorTime: number | null = null
let statusUpdateTimer: ReturnType<typeof setInterval> | null = null
// Set by BridgeFatalError and give-up paths so the shutdown block can
// skip the resume message (resume is impossible after env expiry/auth
// failure/sustained connection errors).
let fatalExit = false
logForDebugging(
`[bridge:work] Starting poll loop spawnMode=${config.spawnMode} maxSessions=${config.maxSessions} environmentId=${environmentId}`,
)
logForDiagnosticsNoPII('info', 'bridge_loop_started', {
max_sessions: config.maxSessions,
spawn_mode: config.spawnMode,
})
// For ant users, show where session debug logs will land so they can tail them.
// sessionRunner.ts uses the same base path. File appears once a session spawns.
if (process.env.USER_TYPE === 'ant') {
let debugGlob: string
if (config.debugFile) {
const ext = config.debugFile.lastIndexOf('.')
debugGlob =
ext > 0
? `${config.debugFile.slice(0, ext)}-*${config.debugFile.slice(ext)}`
: `${config.debugFile}-*`
} else {
debugGlob = join(tmpdir(), 'claude', 'bridge-session-*.log')
}
logger.setDebugLogPath(debugGlob)
}
logger.printBanner(config, environmentId)
// Seed the logger's session count + spawn mode before any render. Without
// this, setAttached() below renders with the logger's default sessionMax=1,
// showing "Capacity: 0/1" until the status ticker kicks in (which is gated
// by !initialSessionId and only starts after the poll loop picks up work).
logger.updateSessionCount(0, config.maxSessions, config.spawnMode)
// If an initial session was pre-created, show its URL from the start so
// the user can click through immediately (matching /remote-control behavior).
if (initialSessionId) {
logger.setAttached(initialSessionId)
}
/** Refresh the inline status display. Shows idle or active depending on state. */
function updateStatusDisplay(): void {
// Push the session count (no-op when maxSessions === 1) so the
// next renderStatusLine tick shows the current count.
logger.updateSessionCount(
activeSessions.size,
config.maxSessions,
config.spawnMode,
)
// Push per-session activity into the multi-session display.
for (const [sid, handle] of activeSessions) {
const act = handle.currentActivity
if (act) {
logger.updateSessionActivity(sessionCompatIds.get(sid) ?? sid, act)
}
}
if (activeSessions.size === 0) {
logger.updateIdleStatus()
return
}
// Show the most recently started session that is still actively working.
// Sessions whose current activity is 'result' or 'error' are between
// turns β the CLI emitted its result but the process stays alive waiting
// for the next user message. Skip updating so the status line keeps
// whatever state it had (Attached / session title).
const [sessionId, handle] = [...activeSessions.entries()].pop()!
const startTime = sessionStartTimes.get(sessionId)
if (!startTime) return
const activity = handle.currentActivity
if (!activity || activity.type === 'result' || activity.type === 'error') {
// Session is between turns β keep current status (Attached/titled).
// In multi-session mode, still refresh so bullet-list activities stay current.
if (config.maxSessions > 1) logger.refreshDisplay()
return
}
const elapsed = formatDuration(Date.now() - startTime)
// Build trail from recent tool activities (last 5)
const trail = handle.activities
.filter(a => a.type === 'tool_start')
.slice(-5)
.map(a => a.summary)
logger.updateSessionStatus(sessionId, elapsed, activity, trail)
}
/** Start the status display update ticker. */
function startStatusUpdates(): void {
stopStatusUpdates()
// Call immediately so the first transition (e.g. Connecting β Ready)
// happens without delay, avoiding concurrent timer races.
updateStatusDisplay()
statusUpdateTimer = setInterval(
updateStatusDisplay,
STATUS_UPDATE_INTERVAL_MS,
)
}
/** Stop the status display update ticker. */
function stopStatusUpdates(): void {
if (statusUpdateTimer) {
clearInterval(statusUpdateTimer)
statusUpdateTimer = null
}
}
function onSessionDone(
sessionId: string,
startTime: number,
handle: SessionHandle,
): (status: SessionDoneStatus) => void {
return (rawStatus: SessionDoneStatus): void => {
const workId = sessionWorkIds.get(sessionId)
activeSessions.delete(sessionId)
sessionStartTimes.delete(sessionId)
sessionWorkIds.delete(sessionId)
sessionIngressTokens.delete(sessionId)
const compatId = sessionCompatIds.get(sessionId) ?? sessionId
sessionCompatIds.delete(sessionId)
logger.removeSession(compatId)
titledSessions.delete(compatId)
v2Sessions.delete(sessionId)
// Clear per-session timeout timer
const timer = sessionTimers.get(sessionId)
if (timer) {
clearTimeout(timer)
sessionTimers.delete(sessionId)
}
// Clear token refresh timer
tokenRefresh?.cancel(sessionId)
// Wake the at-capacity sleep so the bridge can accept new work immediately
capacityWake.wake()
// If the session was killed by the timeout watchdog, treat it as a
// failed session (not a server/shutdown interrupt) so we still call
// stopWork and archiveSession below.
const wasTimedOut = timedOutSessions.delete(sessionId)
const status: SessionDoneStatus =
wasTimedOut && rawStatus === 'interrupted' ? 'failed' : rawStatus
const durationMs = Date.now() - startTime
logForDebugging(
`[bridge:session] sessionId=${sessionId} workId=${workId ?? 'unknown'} exited status=${status} duration=${formatDuration(durationMs)}`,
)
logEvent('tengu_bridge_session_done', {
status:
status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
duration_ms: durationMs,
})
logForDiagnosticsNoPII('info', 'bridge_session_done', {
status,
duration_ms: durationMs,
})
// Clear the status display before printing final log
logger.clearStatus()
stopStatusUpdates()
// Build error message from stderr if available
const stderrSummary =
handle.lastStderr.length > 0 ? handle.lastStderr.join('\n') : undefined
let failureMessage: string | undefined
switch (status) {
case 'completed':
logger.logSessionComplete(sessionId, durationMs)
break
case 'failed':
// Skip failure log during shutdown β the child exits non-zero when
// killed, which is expected and not a real failure.
// Also skip for timeout-killed sessions β the timeout watchdog
// already logged a clear timeout message.
if (!wasTimedOut && !loopSignal.aborted) {
failureMessage = stderrSummary ?? 'Process exited with error'
logger.logSessionFailed(sessionId, failureMessage)
logError(new Error(`Bridge session failed: ${failureMessage}`))
}
break
case 'interrupted':
logger.logVerbose(`Session ${sessionId} interrupted`)
break
}
// Notify the server that this work item is done. Skip for interrupted
// sessions β interrupts are either server-initiated (the server already
// knows) or caused by bridge shutdown (which calls stopWork() separately).
if (status !== 'interrupted' && workId) {
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
workId,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
completedWorkIds.add(workId)
}
// Clean up worktree if one was created for this session
const wt = sessionWorktrees.get(sessionId)
if (wt) {
sessionWorktrees.delete(sessionId)
trackCleanup(
removeAgentWorktree(
wt.worktreePath,
wt.worktreeBranch,
wt.gitRoot,
wt.hookBased,
).catch((err: unknown) =>
logger.logVerbose(
`Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
),
),
)
}
// Lifecycle decision: in multi-session mode, keep the bridge running
// after a session completes. In single-session mode, abort the poll
// loop so the bridge exits cleanly.
if (status !== 'interrupted' && !loopSignal.aborted) {
if (config.spawnMode !== 'single-session') {
// Multi-session: archive the completed session so it doesn't linger
// as stale in the web UI. archiveSession is idempotent (409 if already
// archived), so double-archiving at shutdown is safe.
// sessionId arrived as cse_* from the work poll (infrastructure-layer
// tag). archiveSession hits /v1/sessions/{id}/archive which is the
// compat surface and validates TagSession (session_*). Re-tag β same
// UUID underneath.
trackCleanup(
api
.archiveSession(compatId)
.catch((err: unknown) =>
logger.logVerbose(
`Failed to archive session ${sessionId}: ${errorMessage(err)}`,
),
),
)
logForDebugging(
`[bridge:session] Session ${status}, returning to idle (multi-session mode)`,
)
} else {
// Single-session: coupled lifecycle β tear down environment
logForDebugging(
`[bridge:session] Session ${status}, aborting poll loop to tear down environment`,
)
controller.abort()
return
}
}
if (!loopSignal.aborted) {
startStatusUpdates()
}
}
}
// Start the idle status display immediately β unless we have a pre-created
// session, in which case setAttached() already set up the display and the
// poll loop will start status updates when it picks up the session.
if (!initialSessionId) {
startStatusUpdates()
}
while (!loopSignal.aborted) {
// Fetched once per iteration β the GrowthBook cache refreshes every
// 5 min, so a loop running at the at-capacity rate picks up config
// changes within one sleep cycle.
const pollConfig = getPollIntervalConfig()
try {
const work = await api.pollForWork(
environmentId,
environmentSecret,
loopSignal,
pollConfig.reclaim_older_than_ms,
)
// Log reconnection if we were previously disconnected
const wasDisconnected =
connErrorStart !== null || generalErrorStart !== null
if (wasDisconnected) {
const disconnectedMs =
Date.now() - (connErrorStart ?? generalErrorStart ?? Date.now())
logger.logReconnected(disconnectedMs)
logForDebugging(
`[bridge:poll] Reconnected after ${formatDuration(disconnectedMs)}`,
)
logEvent('tengu_bridge_reconnected', {
disconnected_ms: disconnectedMs,
})
}
connBackoff = 0
generalBackoff = 0
connErrorStart = null
generalErrorStart = null
lastPollErrorTime = null
// Null response = no work available in the queue.
// Add a minimum delay to avoid hammering the server.
if (!work) {
// Use live check (not a snapshot) since sessions can end during poll.
const atCap = activeSessions.size >= config.maxSessions
if (atCap) {
const atCapMs = pollConfig.multisession_poll_interval_ms_at_capacity
// Heartbeat loops WITHOUT polling. When at-capacity polling is also
// enabled (atCapMs > 0), the loop tracks a deadline and breaks out
// to poll at that interval β heartbeat and poll compose instead of
// one suppressing the other. We break out to poll when:
// - Poll deadline reached (atCapMs > 0 only)
// - Auth fails (JWT expired β poll refreshes tokens)
// - Capacity wake fires (session ended β poll for new work)
// - Loop aborted (shutdown)
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
logEvent('tengu_bridge_heartbeat_mode_entered', {
active_sessions: activeSessions.size,
heartbeat_interval_ms:
pollConfig.non_exclusive_heartbeat_interval_ms,
})
// Deadline computed once at entry β GB updates to atCapMs don't
// shift an in-flight deadline (next entry picks up the new value).
const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
let hbResult: 'ok' | 'auth_failed' | 'fatal' | 'failed' = 'ok'
let hbCycles = 0
while (
!loopSignal.aborted &&
activeSessions.size >= config.maxSessions &&
(pollDeadline === null || Date.now() < pollDeadline)
) {
// Re-read config each cycle so GrowthBook updates take effect
const hbConfig = getPollIntervalConfig()
if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
// Capture capacity signal BEFORE the async heartbeat call so
// a session ending during the HTTP request is caught by the
// subsequent sleep (instead of being lost to a replaced controller).
const cap = capacityWake.signal()
hbResult = await heartbeatActiveWorkItems()
if (hbResult === 'auth_failed' || hbResult === 'fatal') {
cap.cleanup()
break
}
hbCycles++
await sleep(
hbConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
cap.cleanup()
}
// Determine exit reason for telemetry
const exitReason =
hbResult === 'auth_failed' || hbResult === 'fatal'
? hbResult
: loopSignal.aborted
? 'shutdown'
: activeSessions.size < config.maxSessions
? 'capacity_changed'
: pollDeadline !== null && Date.now() >= pollDeadline
? 'poll_due'
: 'config_disabled'
logEvent('tengu_bridge_heartbeat_mode_exited', {
reason:
exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
heartbeat_cycles: hbCycles,
active_sessions: activeSessions.size,
})
if (exitReason === 'poll_due') {
// bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
// so the once-per-10min poll_due poll is invisible at counter=2.
// Log it here so verification runs see both endpoints in the debug log.
logForDebugging(
`[bridge:poll] Heartbeat poll_due after ${hbCycles} cycles β falling through to pollForWork`,
)
}
// On auth_failed or fatal, sleep before polling to avoid a tight
// poll+heartbeat loop. Auth_failed: heartbeatActiveWorkItems
// already called reconnectSession β the sleep gives the server
// time to propagate the re-queue. Fatal (404/410): may be a
// single work item GCd while the environment is still valid.
// Use atCapMs if enabled, else the heartbeat interval as a floor
// (guaranteed > 0 here) so heartbeat-only configs don't tight-loop.
if (hbResult === 'auth_failed' || hbResult === 'fatal') {
const cap = capacityWake.signal()
await sleep(
atCapMs > 0
? atCapMs
: pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
cap.cleanup()
}
} else if (atCapMs > 0) {
// Heartbeat disabled: slow poll as liveness signal.
const cap = capacityWake.signal()
await sleep(atCapMs, cap.signal)
cap.cleanup()
}
} else {
const interval =
activeSessions.size > 0
? pollConfig.multisession_poll_interval_ms_partial_capacity
: pollConfig.multisession_poll_interval_ms_not_at_capacity
await sleep(interval, loopSignal)
}
continue
}
// At capacity β we polled to keep the heartbeat alive, but cannot
// accept new work right now. We still enter the switch below so that
// token refreshes for existing sessions are processed (the case
// 'session' handler checks for existing sessions before the inner
// capacity guard).
const atCapacityBeforeSwitch = activeSessions.size >= config.maxSessions
// Skip work items that have already been completed and stopped.
// The server may re-deliver stale work before processing our stop
// request, which would otherwise cause a duplicate session spawn.
if (completedWorkIds.has(work.id)) {
logForDebugging(
`[bridge:work] Skipping already-completed workId=${work.id}`,
)
// Respect capacity throttle β without a sleep here, persistent stale
// redeliveries would tight-loop at poll-request speed (the !work
// branch above is the only sleep, and work != null skips it).
if (atCapacityBeforeSwitch) {
const cap = capacityWake.signal()
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
await sleep(
pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
} else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
await sleep(
pollConfig.multisession_poll_interval_ms_at_capacity,
cap.signal,
)
}
cap.cleanup()
} else {
await sleep(1000, loopSignal)
}
continue
}
// Decode the work secret for session spawning and to extract the JWT
// used for the ack call below.
let secret
try {
secret = decodeWorkSecret(work.secret)
} catch (err) {
const errMsg = errorMessage(err)
logger.logError(
`Failed to decode work secret for workId=${work.id}: ${errMsg}`,
)
logEvent('tengu_bridge_work_secret_failed', {})
// Can't ack (needs the JWT we failed to decode). stopWork uses OAuth,
// so it's callable here β prevents XAUTOCLAIM from re-delivering this
// poisoned item every reclaim_older_than_ms cycle.
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
// Respect capacity throttle before retrying β without a sleep here,
// repeated decode failures at capacity would tight-loop at
// poll-request speed (work != null skips the !work sleep above).
if (atCapacityBeforeSwitch) {
const cap = capacityWake.signal()
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
await sleep(
pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
} else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
await sleep(
pollConfig.multisession_poll_interval_ms_at_capacity,
cap.signal,
)
}
cap.cleanup()
}
continue
}
// Explicitly acknowledge after committing to handle the work β NOT
// before. The at-capacity guard inside case 'session' can break
// without spawning; acking there would permanently lose the work.
// Ack failures are non-fatal: server re-delivers, and existingHandle
// / completedWorkIds paths handle the dedup.
const ackWork = async (): Promise<void> => {
logForDebugging(`[bridge:work] Acknowledging workId=${work.id}`)
try {
await api.acknowledgeWork(
environmentId,
work.id,
secret.session_ingress_token,
)
} catch (err) {
logForDebugging(
`[bridge:work] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
)
}
}
const workType: string = work.data.type
switch (work.data.type) {
case 'healthcheck':
await ackWork()
logForDebugging('[bridge:work] Healthcheck received')
logger.logVerbose('Healthcheck received')
break
case 'session': {
const sessionId = work.data.id
try {
validateBridgeId(sessionId, 'session_id')
} catch {
await ackWork()
logger.logError(`Invalid session_id received: ${sessionId}`)
break
}
// If the session is already running, deliver the fresh token so
// the child process can reconnect its WebSocket with the new
// session ingress token. This handles the case where the server
// re-dispatches work for an existing session after the WS drops.
const existingHandle = activeSessions.get(sessionId)
if (existingHandle) {
existingHandle.updateAccessToken(secret.session_ingress_token)
sessionIngressTokens.set(sessionId, secret.session_ingress_token)
sessionWorkIds.set(sessionId, work.id)
// Re-schedule next refresh from the fresh JWT's expiry. onRefresh
// branches on v2Sessions so both v1 and v2 are safe here.
tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
logForDebugging(
`[bridge:work] Updated access token for existing sessionId=${sessionId} workId=${work.id}`,
)
await ackWork()
break
}
// At capacity β token refresh for existing sessions is handled
// above, but we cannot spawn new ones. The post-switch capacity
// sleep will throttle the loop; just break here.
if (activeSessions.size >= config.maxSessions) {
logForDebugging(
`[bridge:work] At capacity (${activeSessions.size}/${config.maxSessions}), cannot spawn new session for workId=${work.id}`,
)
break
}
await ackWork()
const spawnStartTime = Date.now()
// CCR v2 path: register this bridge as the session worker, get the
// epoch, and point the child at /v1/code/sessions/{id}. The child
// already has the full v2 client (SSETransport + CCRClient) β same
// code path environment-manager launches in containers.
//
// v1 path: Session-Ingress WebSocket. Uses config.sessionIngressUrl
// (not secret.api_base_url, which may point to a remote proxy tunnel
// that doesn't know about locally-created sessions).
let sdkUrl: string
let useCcrV2 = false
let workerEpoch: number | undefined
// Server decides per-session via the work secret; env var is the
// ant-dev override (e.g. forcing v2 before the server flag is on).
if (
secret.use_code_sessions === true ||
isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
) {
sdkUrl = buildCCRv2SdkUrl(config.apiBaseUrl, sessionId)
// Retry once on transient failure (network blip, 500) before
// permanently giving up and killing the session.
for (let attempt = 1; attempt <= 2; attempt++) {
try {
workerEpoch = await registerWorker(
sdkUrl,
secret.session_ingress_token,
)
useCcrV2 = true
logForDebugging(
`[bridge:session] CCR v2: registered worker sessionId=${sessionId} epoch=${workerEpoch} attempt=${attempt}`,
)
break
} catch (err) {
const errMsg = errorMessage(err)
if (attempt < 2) {
logForDebugging(
`[bridge:session] CCR v2: registerWorker attempt ${attempt} failed, retrying: ${errMsg}`,
)
await sleep(2_000, loopSignal)
if (loopSignal.aborted) break
continue
}
logger.logError(
`CCR v2 worker registration failed for session ${sessionId}: ${errMsg}`,
)
logError(new Error(`registerWorker failed: ${errMsg}`))
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
}
}
if (!useCcrV2) break
} else {
sdkUrl = buildSdkUrl(config.sessionIngressUrl, sessionId)
}
// In worktree mode, on-demand sessions get an isolated git worktree
// so concurrent sessions don't interfere with each other's file
// changes. The pre-created initial session (if any) runs in
// config.dir so the user's first session lands in the directory they
// invoked `rc` from β matching the old single-session UX.
// In same-dir and single-session modes, all sessions share config.dir.
// Capture spawnMode before the await below β the `w` key handler
// mutates config.spawnMode directly, and createAgentWorktree can
// take 1-2s, so reading config.spawnMode after the await can
// produce contradictory analytics (spawn_mode:'same-dir', in_worktree:true).
const spawnModeAtDecision = config.spawnMode
let sessionDir = config.dir
let worktreeCreateMs = 0
if (
spawnModeAtDecision === 'worktree' &&
(initialSessionId === undefined ||
!sameSessionId(sessionId, initialSessionId))
) {
const wtStart = Date.now()
try {
const wt = await createAgentWorktree(
`bridge-${safeFilenameId(sessionId)}`,
)
worktreeCreateMs = Date.now() - wtStart
sessionWorktrees.set(sessionId, {
worktreePath: wt.worktreePath,
worktreeBranch: wt.worktreeBranch,
gitRoot: wt.gitRoot,
hookBased: wt.hookBased,
})
sessionDir = wt.worktreePath
logForDebugging(
`[bridge:session] Created worktree for sessionId=${sessionId} at ${wt.worktreePath}`,
)
} catch (err) {
const errMsg = errorMessage(err)
logger.logError(
`Failed to create worktree for session ${sessionId}: ${errMsg}`,
)
logError(new Error(`Worktree creation failed: ${errMsg}`))
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
break
}
}
logForDebugging(
`[bridge:session] Spawning sessionId=${sessionId} sdkUrl=${sdkUrl}`,
)
// compat-surface session_* form for logger/Sessions-API calls.
// Work poll returns cse_* under v2 compat; convert before spawn so
// the onFirstUserMessage callback can close over it.
const compatSessionId = toCompatSessionId(sessionId)
const spawnResult = safeSpawn(
spawner,
{
sessionId,
sdkUrl,
accessToken: secret.session_ingress_token,
useCcrV2,
workerEpoch,
onFirstUserMessage: text => {
// Server-set titles (--name, web rename) win. fetchSessionTitle
// runs concurrently; if it already populated titledSessions,
// skip. If it hasn't resolved yet, the derived title sticks β
// acceptable since the server had no title at spawn time.
if (titledSessions.has(compatSessionId)) return
titledSessions.add(compatSessionId)
const title = deriveSessionTitle(text)
logger.setSessionTitle(compatSessionId, title)
logForDebugging(
`[bridge:title] derived title for ${compatSessionId}: ${title}`,
)
void import('./createSession.js')
.then(({ updateBridgeSessionTitle }) =>
updateBridgeSessionTitle(compatSessionId, title, {
baseUrl: config.apiBaseUrl,
}),
)
.catch(err =>
logForDebugging(
`[bridge:title] failed to update title for ${compatSessionId}: ${err}`,
{ level: 'error' },
),
)
},
},
sessionDir,
)
if (typeof spawnResult === 'string') {
logger.logError(
`Failed to spawn session ${sessionId}: ${spawnResult}`,
)
// Clean up worktree if one was created for this session
const wt = sessionWorktrees.get(sessionId)
if (wt) {
sessionWorktrees.delete(sessionId)
trackCleanup(
removeAgentWorktree(
wt.worktreePath,
wt.worktreeBranch,
wt.gitRoot,
wt.hookBased,
).catch((err: unknown) =>
logger.logVerbose(
`Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
),
),
)
}
completedWorkIds.add(work.id)
trackCleanup(
stopWorkWithRetry(
api,
environmentId,
work.id,
logger,
backoffConfig.stopWorkBaseDelayMs,
),
)
break
}
const handle = spawnResult
const spawnDurationMs = Date.now() - spawnStartTime
logEvent('tengu_bridge_session_started', {
active_sessions: activeSessions.size,
spawn_mode:
spawnModeAtDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
in_worktree: sessionWorktrees.has(sessionId),
spawn_duration_ms: spawnDurationMs,
worktree_create_ms: worktreeCreateMs,
inProtectedNamespace: isInProtectedNamespace(),
})
logForDiagnosticsNoPII('info', 'bridge_session_started', {
spawn_mode: spawnModeAtDecision,
in_worktree: sessionWorktrees.has(sessionId),
spawn_duration_ms: spawnDurationMs,
worktree_create_ms: worktreeCreateMs,
})
activeSessions.set(sessionId, handle)
sessionWorkIds.set(sessionId, work.id)
sessionIngressTokens.set(sessionId, secret.session_ingress_token)
sessionCompatIds.set(sessionId, compatSessionId)
const startTime = Date.now()
sessionStartTimes.set(sessionId, startTime)
// Use a generic prompt description since we no longer get startup_context
logger.logSessionStart(sessionId, `Session ${sessionId}`)
// Compute the actual debug file path (mirrors sessionRunner.ts logic)
const safeId = safeFilenameId(sessionId)
let sessionDebugFile: string | undefined
if (config.debugFile) {
const ext = config.debugFile.lastIndexOf('.')
if (ext > 0) {
sessionDebugFile = `${config.debugFile.slice(0, ext)}-${safeId}${config.debugFile.slice(ext)}`
} else {
sessionDebugFile = `${config.debugFile}-${safeId}`
}
} else if (config.verbose || process.env.USER_TYPE === 'ant') {
sessionDebugFile = join(
tmpdir(),
'claude',
`bridge-session-${safeId}.log`,
)
}
if (sessionDebugFile) {
logger.logVerbose(`Debug log: ${sessionDebugFile}`)
}
// Register in the sessions Map before starting status updates so the
// first render tick shows the correct count and bullet list in sync.
logger.addSession(
compatSessionId,
getRemoteSessionUrl(compatSessionId, config.sessionIngressUrl),
)
// Start live status updates and transition to "Attached" state.
startStatusUpdates()
logger.setAttached(compatSessionId)
// One-shot title fetch. If the session already has a title (set via
// --name, web rename, or /remote-control), display it and mark as
// titled so the first-user-message fallback doesn't overwrite it.
// Otherwise onFirstUserMessage derives one from the first prompt.
void fetchSessionTitle(compatSessionId, config.apiBaseUrl)
.then(title => {
if (title && activeSessions.has(sessionId)) {
titledSessions.add(compatSessionId)
logger.setSessionTitle(compatSessionId, title)
logForDebugging(
`[bridge:title] server title for ${compatSessionId}: ${title}`,
)
}
})
.catch(err =>
logForDebugging(
`[bridge:title] failed to fetch title for ${compatSessionId}: ${err}`,
{ level: 'error' },
),
)
// Start per-session timeout watchdog
const timeoutMs =
config.sessionTimeoutMs ?? DEFAULT_SESSION_TIMEOUT_MS
if (timeoutMs > 0) {
const timer = setTimeout(
onSessionTimeout,
timeoutMs,
sessionId,
timeoutMs,
logger,
timedOutSessions,
handle,
)
sessionTimers.set(sessionId, timer)
}
// Schedule proactive token refresh before the JWT expires.
// onRefresh branches on v2Sessions: v1 delivers OAuth to the
// child, v2 triggers server re-dispatch via reconnectSession.
if (useCcrV2) {
v2Sessions.add(sessionId)
}
tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
void handle.done.then(onSessionDone(sessionId, startTime, handle))
break
}
default:
await ackWork()
// Gracefully ignore unknown work types. The backend may send new
// types before the bridge client is updated.
logForDebugging(
`[bridge:work] Unknown work type: ${workType}, skipping`,
)
break
}
// When at capacity, throttle the loop. The switch above still runs so
// existing-session token refreshes are processed, but we sleep here
// to avoid busy-looping. Include the capacity wake signal so the
// sleep is interrupted immediately when a session completes.
if (atCapacityBeforeSwitch) {
const cap = capacityWake.signal()
if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
await sleep(
pollConfig.non_exclusive_heartbeat_interval_ms,
cap.signal,
)
} else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
await sleep(
pollConfig.multisession_poll_interval_ms_at_capacity,
cap.signal,
)
}
cap.cleanup()
}
} catch (err) {
if (loopSignal.aborted) {
break
}
// Fatal errors (401/403) β no point retrying, auth won't fix itself
if (err instanceof BridgeFatalError) {
fatalExit = true
// Server-enforced expiry gets a clean status message, not an error
if (isExpiredErrorType(err.errorType)) {
logger.logStatus(err.message)
} else if (isSuppressible403(err)) {
// Cosmetic 403 errors (e.g., external_poll_sessions scope,
// environments:manage permission) β don't show to user
logForDebugging(`[bridge:work] Suppressed 403 error: ${err.message}`)
} else {
logger.logError(err.message)
logError(err)
}
logEvent('tengu_bridge_fatal_error', {
status: err.status,
error_type:
err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
logForDiagnosticsNoPII(
isExpiredErrorType(err.errorType) ? 'info' : 'error',
'bridge_fatal_error',
{ status: err.status, error_type: err.errorType },
)
break
}
const errMsg = describeAxiosError(err)
if (isConnectionError(err) || isServerError(err)) {
const now = Date.now()
// Detect system sleep/wake: if the gap since the last poll error
// greatly exceeds the expected backoff, the machine likely slept.
// Reset error tracking so the bridge retries with a fresh budget.
if (
lastPollErrorTime !== null &&
now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
) {
logForDebugging(
`[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
)
logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
gapMs: now - lastPollErrorTime,
})
connErrorStart = null
connBackoff = 0
generalErrorStart = null
generalBackoff = 0
}
lastPollErrorTime = now
if (!connErrorStart) {
connErrorStart = now
}
const elapsed = now - connErrorStart
if (elapsed >= backoffConfig.connGiveUpMs) {
logger.logError(
`Server unreachable for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
)
logEvent('tengu_bridge_poll_give_up', {
error_type:
'connection' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
elapsed_ms: elapsed,
})
logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
error_type: 'connection',
elapsed_ms: elapsed,
})
fatalExit = true
break
}
// Reset the other track when switching error types
generalErrorStart = null
generalBackoff = 0
connBackoff = connBackoff
? Math.min(connBackoff * 2, backoffConfig.connCapMs)
: backoffConfig.connInitialMs
const delay = addJitter(connBackoff)
logger.logVerbose(
`Connection error, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
)
logger.updateReconnectingStatus(
formatDelay(delay),
formatDuration(elapsed),
)
// The poll_due heartbeat-loop exit leaves a healthy lease exposed to
// this backoff path. Heartbeat before each sleep so /poll outages
// (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced
// to avoid) don't kill the 300s lease TTL. No-op when activeSessions
// is empty or heartbeat is disabled.
if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
}
await sleep(delay, loopSignal)
} else {
const now = Date.now()
// Sleep detection for general errors (same logic as connection errors)
if (
lastPollErrorTime !== null &&
now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
) {
logForDebugging(
`[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
)
logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
gapMs: now - lastPollErrorTime,
})
connErrorStart = null
connBackoff = 0
generalErrorStart = null
generalBackoff = 0
}
lastPollErrorTime = now
if (!generalErrorStart) {
generalErrorStart = now
}
const elapsed = now - generalErrorStart
if (elapsed >= backoffConfig.generalGiveUpMs) {
logger.logError(
`Persistent errors for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
)
logEvent('tengu_bridge_poll_give_up', {
error_type:
'general' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
elapsed_ms: elapsed,
})
logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
error_type: 'general',
elapsed_ms: elapsed,
})
fatalExit = true
break
}
// Reset the other track when switching error types
connErrorStart = null
connBackoff = 0
generalBackoff = generalBackoff
? Math.min(generalBackoff * 2, backoffConfig.generalCapMs)
: backoffConfig.generalInitialMs
const delay = addJitter(generalBackoff)
logger.logVerbose(
`Poll failed, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
)
logger.updateReconnectingStatus(
formatDelay(delay),
formatDuration(elapsed),
)
if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
await heartbeatActiveWorkItems()
}
await sleep(delay, loopSignal)
}
}
}
// Clean up
stopStatusUpdates()
logger.clearStatus()
const loopDurationMs = Date.now() - loopStartTime
logEvent('tengu_bridge_shutdown', {
active_sessions: activeSessions.size,
loop_duration_ms: loopDurationMs,
})
logForDiagnosticsNoPII('info', 'bridge_shutdown', {
active_sessions: activeSessions.size,
loop_duration_ms: loopDurationMs,
})
// Graceful shutdown: kill active sessions, report them as interrupted,
// archive sessions, then deregister the environment so the web UI shows
// the bridge as offline.
// Collect all session IDs to archive on exit. This includes:
// 1. Active sessions (snapshot before killing β onSessionDone clears maps)
// 2. The initial auto-created session (may never have had work dispatched)
// api.archiveSession is idempotent (409 if already archived), so
// double-archiving is safe.
const sessionsToArchive = new Set(activeSessions.keys())
if (initialSessionId) {
sessionsToArchive.add(initialSessionId)
}
// Snapshot before killing β onSessionDone clears sessionCompatIds.
const compatIdSnapshot = new Map(sessionCompatIds)
if (activeSessions.size > 0) {
logForDebugging(
`[bridge:shutdown] Shutting down ${activeSessions.size} active session(s)`,
)
logger.logStatus(
`Shutting down ${activeSessions.size} active session(s)\u2026`,
)
// Snapshot work IDs before killing β onSessionDone clears the maps when
// each child exits, so we need a copy for the stopWork calls below.
const shutdownWorkIds = new Map(sessionWorkIds)
for (const [sessionId, handle] of activeSessions.entries()) {
logForDebugging(
`[bridge:shutdown] Sending SIGTERM to sessionId=${sessionId}`,
)
handle.kill()
}
const timeout = new AbortController()
await Promise.race([
Promise.allSettled([...activeSessions.values()].map(h => h.done)),
sleep(backoffConfig.shutdownGraceMs ?? 30_000, timeout.signal),
])
timeout.abort()
// SIGKILL any processes that didn't respond to SIGTERM within the grace window
for (const [sid, handle] of activeSessions.entries()) {
logForDebugging(`[bridge:shutdown] Force-killing stuck sessionId=${sid}`)
handle.forceKill()
}
// Clear any remaining session timeout and refresh timers
for (const timer of sessionTimers.values()) {
clearTimeout(timer)
}
sessionTimers.clear()
tokenRefresh?.cancelAll()
// Clean up any remaining worktrees from active sessions.
// Snapshot and clear the map first so onSessionDone (which may fire
// during the await below when handle.done resolves) won't try to
// remove the same worktrees again.
if (sessionWorktrees.size > 0) {
const remainingWorktrees = [...sessionWorktrees.values()]
sessionWorktrees.clear()
logForDebugging(
`[bridge:shutdown] Cleaning up ${remainingWorktrees.length} worktree(s)`,
)
await Promise.allSettled(
remainingWorktrees.map(wt =>
removeAgentWorktree(
wt.worktreePath,
wt.worktreeBranch,
wt.gitRoot,
wt.hookBased,
),
),
)
}
// Stop all active work items so the server knows they're done
await Promise.allSettled(
[...shutdownWorkIds.entries()].map(([sessionId, workId]) => {
return api
.stopWork(environmentId, workId, true)
.catch(err =>
logger.logVerbose(
`Failed to stop work ${workId} for session ${sessionId}: ${errorMessage(err)}`,
),
)
}),
)
}
// Ensure all in-flight cleanup (stopWork, worktree removal) from
// onSessionDone completes before deregistering β otherwise
// process.exit() can kill them mid-flight.
if (pendingCleanups.size > 0) {
await Promise.allSettled([...pendingCleanups])
}
// In single-session mode with a known session, leave the session and
// environment alive so `claude remote-control --session-id=<id>` can resume.
// The backend GCs stale environments via a 4h TTL (BRIDGE_LAST_POLL_TTL).
// Archiving the session or deregistering the environment would make the
// printed resume command a lie β deregister deletes Firestore + Redis stream.
// Skip when the loop exited fatally (env expired, auth failed, give-up) β
// resume is impossible in those cases and the message would contradict the
// error already printed.
// feature('KAIROS') gate: --session-id is ant-only; without the gate,
// revert to the pre-PR behavior (archive + deregister on every shutdown).
if (
feature('KAIROS') &&
config.spawnMode === 'single-session' &&
initialSessionId &&
!fatalExit
) {
logger.logStatus(
`Resume this session by running \`claude remote-control --continue\``,
)
logForDebugging(
`[bridge:shutdown] Skipping archive+deregister to allow resume of session ${initialSessionId}`,
)
return
}
// Archive all known sessions so they don't linger as idle/running on the
// server after the bridge goes offline.
if (sessionsToArchive.size > 0) {
logForDebugging(
`[bridge:shutdown] Archiving ${sessionsToArchive.size} session(s)`,
)
await Promise.allSettled(
[...sessionsToArchive].map(sessionId =>
api
.archiveSession(
compatIdSnapshot.get(sessionId) ?? toCompatSessionId(sessionId),
)
.catch(err =>
logger.logVerbose(
`Failed to archive session ${sessionId}: ${errorMessage(err)}`,
),
),
),
)
}
// Deregister the environment so the web UI shows the bridge as offline
// and the Redis stream is cleaned up.
try {
await api.deregisterEnvironment(environmentId)
logForDebugging(
`[bridge:shutdown] Environment deregistered, bridge offline`,
)
logger.logVerbose('Environment deregistered.')
} catch (err) {
logger.logVerbose(`Failed to deregister environment: ${errorMessage(err)}`)
}
// Clear the crash-recovery pointer β the env is gone, pointer would be
// stale. The early return above (resumable SIGINT shutdown) skips this,
// leaving the pointer as a backup for the printed --session-id hint.
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(config.dir)
logger.logVerbose('Environment offline.')
}
const CONNECTION_ERROR_CODES = new Set([
'ECONNREFUSED',
'ECONNRESET',
'ETIMEDOUT',
'ENETUNREACH',
'EHOSTUNREACH',
])
export function isConnectionError(err: unknown): boolean {
if (
err &&
typeof err === 'object' &&
'code' in err &&
typeof err.code === 'string' &&
CONNECTION_ERROR_CODES.has(err.code)
) {
return true
}
return false
}
/** Detect HTTP 5xx errors from axios (code: 'ERR_BAD_RESPONSE'). */
export function isServerError(err: unknown): boolean {
return (
!!err &&
typeof err === 'object' &&
'code' in err &&
typeof err.code === 'string' &&
err.code === 'ERR_BAD_RESPONSE'
)
}
/** Add Β±25% jitter to a delay value. */
function addJitter(ms: number): number {
return Math.max(0, ms + ms * 0.25 * (2 * Math.random() - 1))
}
function formatDelay(ms: number): string {
return ms >= 1000 ? `${(ms / 1000).toFixed(1)}s` : `${Math.round(ms)}ms`
}
/**
* Retry stopWork with exponential backoff (3 attempts, 1s/2s/4s).
* Ensures the server learns the work item ended, preventing server-side zombies.
*/
async function stopWorkWithRetry(
api: BridgeApiClient,
environmentId: string,
workId: string,
logger: BridgeLogger,
baseDelayMs = 1000,
): Promise<void> {
const MAX_ATTEMPTS = 3
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
try {
await api.stopWork(environmentId, workId, false)
logForDebugging(
`[bridge:work] stopWork succeeded for workId=${workId} on attempt ${attempt}/${MAX_ATTEMPTS}`,
)
return
} catch (err) {
// Auth/permission errors won't be fixed by retrying
if (err instanceof BridgeFatalError) {
if (isSuppressible403(err)) {
logForDebugging(
`[bridge:work] Suppressed stopWork 403 for ${workId}: ${err.message}`,
)
} else {
logger.logError(`Failed to stop work ${workId}: ${err.message}`)
}
logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
attempts: attempt,
fatal: true,
})
return
}
const errMsg = errorMessage(err)
if (attempt < MAX_ATTEMPTS) {
const delay = addJitter(baseDelayMs * Math.pow(2, attempt - 1))
logger.logVerbose(
`Failed to stop work ${workId} (attempt ${attempt}/${MAX_ATTEMPTS}), retrying in ${formatDelay(delay)}: ${errMsg}`,
)
await sleep(delay)
} else {
logger.logError(
`Failed to stop work ${workId} after ${MAX_ATTEMPTS} attempts: ${errMsg}`,
)
logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
attempts: MAX_ATTEMPTS,
})
}
}
}
}
function onSessionTimeout(
sessionId: string,
timeoutMs: number,
logger: BridgeLogger,
timedOutSessions: Set<string>,
handle: SessionHandle,
): void {
logForDebugging(
`[bridge:session] sessionId=${sessionId} timed out after ${formatDuration(timeoutMs)}`,
)
logEvent('tengu_bridge_session_timeout', {
timeout_ms: timeoutMs,
})
logger.logSessionFailed(
sessionId,
`Session timed out after ${formatDuration(timeoutMs)}`,
)
timedOutSessions.add(sessionId)
handle.kill()
}
export type ParsedArgs = {
verbose: boolean
sandbox: boolean
debugFile?: string
sessionTimeoutMs?: number
permissionMode?: string
name?: string
/** Value passed to --spawn (if any); undefined if no --spawn flag was given. */
spawnMode: SpawnMode | undefined
/** Value passed to --capacity (if any); undefined if no --capacity flag was given. */
capacity: number | undefined
/** --[no-]create-session-in-dir override; undefined = use default (on). */
createSessionInDir: boolean | undefined
/** Resume an existing session instead of creating a new one. */
sessionId?: string
/** Resume the last session in this directory (reads bridge-pointer.json). */
continueSession: boolean
help: boolean
error?: string
}
const SPAWN_FLAG_VALUES = ['session', 'same-dir', 'worktree'] as const
function parseSpawnValue(raw: string | undefined): SpawnMode | string {
if (raw === 'session') return 'single-session'
if (raw === 'same-dir') return 'same-dir'
if (raw === 'worktree') return 'worktree'
return `--spawn requires one of: ${SPAWN_FLAG_VALUES.join(', ')} (got: ${raw ?? '<missing>'})`
}
function parseCapacityValue(raw: string | undefined): number | string {
const n = raw === undefined ? NaN : parseInt(raw, 10)
if (isNaN(n) || n < 1) {
return `--capacity requires a positive integer (got: ${raw ?? '<missing>'})`
}
return n
}
export function parseArgs(args: string[]): ParsedArgs {
let verbose = false
let sandbox = false
let debugFile: string | undefined
let sessionTimeoutMs: number | undefined
let permissionMode: string | undefined
let name: string | undefined
let help = false
let spawnMode: SpawnMode | undefined
let capacity: number | undefined
let createSessionInDir: boolean | undefined
let sessionId: string | undefined
let continueSession = false
for (let i = 0; i < args.length; i++) {
const arg = args[i]!
if (arg === '--help' || arg === '-h') {
help = true
} else if (arg === '--verbose' || arg === '-v') {
verbose = true
} else if (arg === '--sandbox') {
sandbox = true
} else if (arg === '--no-sandbox') {
sandbox = false
} else if (arg === '--debug-file' && i + 1 < args.length) {
debugFile = resolve(args[++i]!)
} else if (arg.startsWith('--debug-file=')) {
debugFile = resolve(arg.slice('--debug-file='.length))
} else if (arg === '--session-timeout' && i + 1 < args.length) {
sessionTimeoutMs = parseInt(args[++i]!, 10) * 1000
} else if (arg.startsWith('--session-timeout=')) {
sessionTimeoutMs =
parseInt(arg.slice('--session-timeout='.length), 10) * 1000
} else if (arg === '--permission-mode' && i + 1 < args.length) {
permissionMode = args[++i]!
} else if (arg.startsWith('--permission-mode=')) {
permissionMode = arg.slice('--permission-mode='.length)
} else if (arg === '--name' && i + 1 < args.length) {
name = args[++i]!
} else if (arg.startsWith('--name=')) {
name = arg.slice('--name='.length)
} else if (
feature('KAIROS') &&
arg === '--session-id' &&
i + 1 < args.length
) {
sessionId = args[++i]!
if (!sessionId) {
return makeError('--session-id requires a value')
}
} else if (feature('KAIROS') && arg.startsWith('--session-id=')) {
sessionId = arg.slice('--session-id='.length)
if (!sessionId) {
return makeError('--session-id requires a value')
}
} else if (feature('KAIROS') && (arg === '--continue' || arg === '-c')) {
continueSession = true
} else if (arg === '--spawn' || arg.startsWith('--spawn=')) {
if (spawnMode !== undefined) {
return makeError('--spawn may only be specified once')
}
const raw = arg.startsWith('--spawn=')
? arg.slice('--spawn='.length)
: args[++i]
const v = parseSpawnValue(raw)
if (v === 'single-session' || v === 'same-dir' || v === 'worktree') {
spawnMode = v
} else {
return makeError(v)
}
} else if (arg === '--capacity' || arg.startsWith('--capacity=')) {
if (capacity !== undefined) {
return makeError('--capacity may only be specified once')
}
const raw = arg.startsWith('--capacity=')
? arg.slice('--capacity='.length)
: args[++i]
const v = parseCapacityValue(raw)
if (typeof v === 'number') capacity = v
else return makeError(v)
} else if (arg === '--create-session-in-dir') {
createSessionInDir = true
} else if (arg === '--no-create-session-in-dir') {
createSessionInDir = false
} else {
return makeError(
`Unknown argument: ${arg}\nRun 'claude remote-control --help' for usage.`,
)
}
}
// Note: gate check for --spawn/--capacity/--create-session-in-dir is in bridgeMain
// (gate-aware error). Flag cross-validation happens here.
// --capacity only makes sense for multi-session modes.
if (spawnMode === 'single-session' && capacity !== undefined) {
return makeError(
`--capacity cannot be used with --spawn=session (single-session mode has fixed capacity 1).`,
)
}
// --session-id / --continue resume a specific session on its original
// environment; incompatible with spawn-related flags (which configure
// fresh session creation), and mutually exclusive with each other.
if (
(sessionId || continueSession) &&
(spawnMode !== undefined ||
capacity !== undefined ||
createSessionInDir !== undefined)
) {
return makeError(
`--session-id and --continue cannot be used with --spawn, --capacity, or --create-session-in-dir.`,
)
}
if (sessionId && continueSession) {
return makeError(`--session-id and --continue cannot be used together.`)
}
return {
verbose,
sandbox,
debugFile,
sessionTimeoutMs,
permissionMode,
name,
spawnMode,
capacity,
createSessionInDir,
sessionId,
continueSession,
help,
}
function makeError(error: string): ParsedArgs {
return {
verbose,
sandbox,
debugFile,
sessionTimeoutMs,
permissionMode,
name,
spawnMode,
capacity,
createSessionInDir,
sessionId,
continueSession,
help,
error,
}
}
}
async function printHelp(): Promise<void> {
// Use EXTERNAL_PERMISSION_MODES for help text β internal modes (bubble)
// are ant-only and auto is feature-gated; they're still accepted by validation.
const { EXTERNAL_PERMISSION_MODES } = await import('../types/permissions.js')
const modes = EXTERNAL_PERMISSION_MODES.join(', ')
const showServer = await isMultiSessionSpawnEnabled()
const serverOptions = showServer
? ` --spawn <mode> Spawn mode: same-dir, worktree, session
(default: same-dir)
--capacity <N> Max concurrent sessions in worktree or
same-dir mode (default: ${SPAWN_SESSIONS_DEFAULT})
--[no-]create-session-in-dir Pre-create a session in the current
directory; in worktree mode this session
stays in cwd while on-demand sessions get
isolated worktrees (default: on)
`
: ''
const serverDescription = showServer
? `
Remote Control runs as a persistent server that accepts multiple concurrent
sessions in the current directory. One session is pre-created on start so
you have somewhere to type immediately. Use --spawn=worktree to isolate
each on-demand session in its own git worktree, or --spawn=session for
the classic single-session mode (exits when that session ends). Press 'w'
during runtime to toggle between same-dir and worktree.
`
: ''
const serverNote = showServer
? ` - Worktree mode requires a git repository or WorktreeCreate/WorktreeRemove hooks
`
: ''
const help = `
Remote Control - Connect your local environment to claude.ai/code
USAGE
claude remote-control [options]
OPTIONS
--name <name> Name for the session (shown in claude.ai/code)
${
feature('KAIROS')
? ` -c, --continue Resume the last session in this directory
--session-id <id> Resume a specific session by ID (cannot be
used with spawn flags or --continue)
`
: ''
} --permission-mode <mode> Permission mode for spawned sessions
(${modes})
--debug-file <path> Write debug logs to file
-v, --verbose Enable verbose output
-h, --help Show this help
${serverOptions}
DESCRIPTION
Remote Control allows you to control sessions on your local device from
claude.ai/code (https://claude.ai/code). Run this command in the
directory you want to work in, then connect from the Claude app or web.
${serverDescription}
NOTES
- You must be logged in with a Claude account that has a subscription
- Run \`claude\` first in the directory to accept the workspace trust dialog
${serverNote}`
// biome-ignore lint/suspicious/noConsole: intentional help output
console.log(help)
}
const TITLE_MAX_LEN = 80
/** Derive a session title from a user message: first line, truncated. */
function deriveSessionTitle(text: string): string {
// Collapse whitespace β newlines/tabs would break the single-line status display.
const flat = text.replace(/\s+/g, ' ').trim()
return truncateToWidth(flat, TITLE_MAX_LEN)
}
/**
* One-shot fetch of a session's title via GET /v1/sessions/{id}.
*
* Uses `getBridgeSession` from createSession.ts (ccr-byoc headers + org UUID)
* rather than the environments-level bridgeApi client, whose headers make the
* Sessions API return 404. Returns undefined if the session has no title yet
* or the fetch fails β the caller falls back to deriving a title from the
* first user message.
*/
async function fetchSessionTitle(
compatSessionId: string,
baseUrl: string,
): Promise<string | undefined> {
const { getBridgeSession } = await import('./createSession.js')
const session = await getBridgeSession(compatSessionId, { baseUrl })
return session?.title || undefined
}
export async function bridgeMain(args: string[]): Promise<void> {
const parsed = parseArgs(args)
if (parsed.help) {
await printHelp()
return
}
if (parsed.error) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(`Error: ${parsed.error}`)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
const {
verbose,
sandbox,
debugFile,
sessionTimeoutMs,
permissionMode,
name,
spawnMode: parsedSpawnMode,
capacity: parsedCapacity,
createSessionInDir: parsedCreateSessionInDir,
sessionId: parsedSessionId,
continueSession,
} = parsed
// Mutable so --continue can set it from the pointer file. The #20460
// resume flow below then treats it the same as an explicit --session-id.
let resumeSessionId = parsedSessionId
// When --continue found a pointer, this is the directory it came from
// (may be a worktree sibling, not `dir`). On resume-flow deterministic
// failure, clear THIS file so --continue doesn't keep hitting the same
// dead session. Undefined for explicit --session-id (leaves pointer alone).
let resumePointerDir: string | undefined
const usedMultiSessionFeature =
parsedSpawnMode !== undefined ||
parsedCapacity !== undefined ||
parsedCreateSessionInDir !== undefined
// Validate permission mode early so the user gets an error before
// the bridge starts polling for work.
if (permissionMode !== undefined) {
const { PERMISSION_MODES } = await import('../types/permissions.js')
const valid: readonly string[] = PERMISSION_MODES
if (!valid.includes(permissionMode)) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Invalid permission mode '${permissionMode}'. Valid modes: ${valid.join(', ')}`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
}
const dir = resolve('.')
// The bridge fast-path bypasses init.ts, so we must enable config reading
// before any code that transitively calls getGlobalConfig()
const { enableConfigs, checkHasTrustDialogAccepted } = await import(
'../utils/config.js'
)
enableConfigs()
// Initialize analytics and error reporting sinks. The bridge bypasses the
// setup() init flow, so we call initSinks() directly to attach sinks here.
const { initSinks } = await import('../utils/sinks.js')
initSinks()
// Gate-aware validation: --spawn / --capacity / --create-session-in-dir require
// the multi-session gate. parseArgs has already validated flag combinations;
// here we only check the gate since that requires an async GrowthBook call.
// Runs after enableConfigs() (GrowthBook cache reads global config) and after
// initSinks() so the denial event can be enqueued.
const multiSessionEnabled = await isMultiSessionSpawnEnabled()
if (usedMultiSessionFeature && !multiSessionEnabled) {
await logEventAsync('tengu_bridge_multi_session_denied', {
used_spawn: parsedSpawnMode !== undefined,
used_capacity: parsedCapacity !== undefined,
used_create_session_in_dir: parsedCreateSessionInDir !== undefined,
})
// logEventAsync only enqueues β process.exit() discards buffered events.
// Flush explicitly, capped at 500ms to match gracefulShutdown.ts.
// (sleep() doesn't unref its timer, but process.exit() follows immediately
// so the ref'd timer can't delay shutdown.)
await Promise.race([
Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
sleep(500, undefined, { unref: true }),
]).catch(() => {})
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
'Error: Multi-session Remote Control is not enabled for your account yet.',
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Set the bootstrap CWD so that trust checks, project config lookups, and
// git utilities (getBranch, getRemoteUrl) resolve against the correct path.
const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
setOriginalCwd(dir)
setCwdState(dir)
// The bridge bypasses main.tsx (which renders the interactive TrustDialog via showSetupScreens),
// so we must verify trust was previously established by a normal `claude` session.
if (!checkHasTrustDialogAccepted()) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
`Error: Workspace not trusted. Please run \`claude\` in ${dir} first to review and accept the workspace trust dialog.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Resolve auth
const { clearOAuthTokenCache, checkAndRefreshOAuthTokenIfNeeded } =
await import('../utils/auth.js')
const { getBridgeAccessToken, getBridgeBaseUrl } = await import(
'./bridgeConfig.js'
)
const bridgeToken = getBridgeAccessToken()
if (!bridgeToken) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(BRIDGE_LOGIN_ERROR)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// First-time remote dialog β explain what bridge does and get consent
const {
getGlobalConfig,
saveGlobalConfig,
getCurrentProjectConfig,
saveCurrentProjectConfig,
} = await import('../utils/config.js')
if (!getGlobalConfig().remoteDialogSeen) {
const readline = await import('readline')
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
})
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.log(
'\nRemote Control lets you access this CLI session from the web (claude.ai/code)\nor the Claude app, so you can pick up where you left off on any device.\n\nYou can disconnect remote access anytime by running /remote-control again.\n',
)
const answer = await new Promise<string>(resolve => {
rl.question('Enable Remote Control? (y/n) ', resolve)
})
rl.close()
saveGlobalConfig(current => {
if (current.remoteDialogSeen) return current
return { ...current, remoteDialogSeen: true }
})
if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(0)
}
}
// --continue: resolve the most recent session from the crash-recovery
// pointer and chain into the #20460 --session-id flow. Worktree-aware:
// checks current dir first (fast path, zero exec), then fans out to git
// worktree siblings if that misses β the REPL bridge writes to
// getOriginalCwd() which EnterWorktreeTool/activeWorktreeSession can
// point at a worktree while the user's shell is at the repo root.
// KAIROS-gated at parseArgs β continueSession is always false in external
// builds, so this block tree-shakes.
if (feature('KAIROS') && continueSession) {
const { readBridgePointerAcrossWorktrees } = await import(
'./bridgePointer.js'
)
const found = await readBridgePointerAcrossWorktrees(dir)
if (!found) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: No recent session found in this directory or its worktrees. Run \`claude remote-control\` to start a new one.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
const { pointer, dir: pointerDir } = found
const ageMin = Math.round(pointer.ageMs / 60_000)
const ageStr = ageMin < 60 ? `${ageMin}m` : `${Math.round(ageMin / 60)}h`
const fromWt = pointerDir !== dir ? ` from worktree ${pointerDir}` : ''
// biome-ignore lint/suspicious/noConsole: intentional info output
console.error(
`Resuming session ${pointer.sessionId} (${ageStr} ago)${fromWt}\u2026`,
)
resumeSessionId = pointer.sessionId
// Track where the pointer came from so the #20460 exit(1) paths below
// clear the RIGHT file on deterministic failure β otherwise --continue
// would keep hitting the same dead session. May be a worktree sibling.
resumePointerDir = pointerDir
}
// In production, baseUrl is the Anthropic API (from OAuth config).
// CLAUDE_BRIDGE_BASE_URL overrides this for ant local dev only.
const baseUrl = getBridgeBaseUrl()
// For non-localhost targets, require HTTPS to protect credentials.
if (
baseUrl.startsWith('http://') &&
!baseUrl.includes('localhost') &&
!baseUrl.includes('127.0.0.1')
) {
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
'Error: Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Session ingress URL for WebSocket connections. In production this is the
// same as baseUrl (Envoy routes /v1/session_ingress/* to session-ingress).
// Locally, session-ingress runs on a different port (9413) than the
// contain-provide-api (8211), so CLAUDE_BRIDGE_SESSION_INGRESS_URL must be
// set explicitly. Ant-only, matching CLAUDE_BRIDGE_BASE_URL.
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
const { getBranch, getRemoteUrl, findGitRoot } = await import(
'../utils/git.js'
)
// Precheck worktree availability for the first-run dialog and the `w`
// toggle. Unconditional so we know upfront whether worktree is an option.
const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
const worktreeAvailable = hasWorktreeCreateHook() || findGitRoot(dir) !== null
// Load saved per-project spawn-mode preference. Gated by multiSessionEnabled
// so a GrowthBook rollback cleanly reverts users to single-session β
// otherwise a saved pref would silently re-enable multi-session behavior
// (worktree isolation, 32 max sessions, w toggle) despite the gate being off.
// Also guard against a stale worktree pref left over from when this dir WAS
// a git repo (or the user copied config) β clear it on disk so the warning
// doesn't repeat on every launch.
let savedSpawnMode = multiSessionEnabled
? getCurrentProjectConfig().remoteControlSpawnMode
: undefined
if (savedSpawnMode === 'worktree' && !worktreeAvailable) {
// biome-ignore lint/suspicious/noConsole: intentional warning output
console.error(
'Warning: Saved spawn mode is worktree but this directory is not a git repository. Falling back to same-dir.',
)
savedSpawnMode = undefined
saveCurrentProjectConfig(current => {
if (current.remoteControlSpawnMode === undefined) return current
return { ...current, remoteControlSpawnMode: undefined }
})
}
// First-run spawn-mode choice: ask once per project when the choice is
// meaningful (gate on, both modes available, no explicit override, not
// resuming). Saves to ProjectConfig so subsequent runs skip this.
if (
multiSessionEnabled &&
!savedSpawnMode &&
worktreeAvailable &&
parsedSpawnMode === undefined &&
!resumeSessionId &&
process.stdin.isTTY
) {
const readline = await import('readline')
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
})
// biome-ignore lint/suspicious/noConsole: intentional dialog output
console.log(
`\nClaude Remote Control is launching in spawn mode which lets you create new sessions in this project from Claude Code on Web or your Mobile app. Learn more here: https://code.claude.com/docs/en/remote-control\n\n` +
`Spawn mode for this project:\n` +
` [1] same-dir \u2014 sessions share the current directory (default)\n` +
` [2] worktree \u2014 each session gets an isolated git worktree\n\n` +
`This can be changed later or explicitly set with --spawn=same-dir or --spawn=worktree.\n`,
)
const answer = await new Promise<string>(resolve => {
rl.question('Choose [1/2] (default: 1): ', resolve)
})
rl.close()
const chosen: 'same-dir' | 'worktree' =
answer.trim() === '2' ? 'worktree' : 'same-dir'
savedSpawnMode = chosen
logEvent('tengu_bridge_spawn_mode_chosen', {
spawn_mode:
chosen as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
saveCurrentProjectConfig(current => {
if (current.remoteControlSpawnMode === chosen) return current
return { ...current, remoteControlSpawnMode: chosen }
})
}
// Determine effective spawn mode.
// Precedence: resume > explicit --spawn > saved project pref > gate default
// - resuming via --continue / --session-id: always single-session (resume
// targets one specific session in its original directory)
// - explicit --spawn flag: use that value directly (does not persist)
// - saved ProjectConfig.remoteControlSpawnMode: set by first-run dialog or `w`
// - default with gate on: same-dir (persistent multi-session, shared cwd)
// - default with gate off: single-session (unchanged legacy behavior)
// Track how spawn mode was determined, for rollout analytics.
type SpawnModeSource = 'resume' | 'flag' | 'saved' | 'gate_default'
let spawnModeSource: SpawnModeSource
let spawnMode: SpawnMode
if (resumeSessionId) {
spawnMode = 'single-session'
spawnModeSource = 'resume'
} else if (parsedSpawnMode !== undefined) {
spawnMode = parsedSpawnMode
spawnModeSource = 'flag'
} else if (savedSpawnMode !== undefined) {
spawnMode = savedSpawnMode
spawnModeSource = 'saved'
} else {
spawnMode = multiSessionEnabled ? 'same-dir' : 'single-session'
spawnModeSource = 'gate_default'
}
const maxSessions =
spawnMode === 'single-session'
? 1
: (parsedCapacity ?? SPAWN_SESSIONS_DEFAULT)
// Pre-create an empty session on start so the user has somewhere to type
// immediately, running in the current directory (exempted from worktree
// creation in the spawn loop). On by default; --no-create-session-in-dir
// opts out for a pure on-demand server where every session is isolated.
// The effectiveResumeSessionId guard at the creation site handles the
// resume case (skip creation when resume succeeded; fall through to
// fresh creation on env-mismatch fallback).
const preCreateSession = parsedCreateSessionInDir ?? true
// Without --continue: a leftover pointer means the previous run didn't
// shut down cleanly (crash, kill -9, terminal closed). Clear it so the
// stale env doesn't linger past its relevance. Runs in all modes
// (clearBridgePointer is a no-op when no file exists) β covers the
// gate-transition case where a user crashed in single-session mode then
// starts fresh in worktree mode. Only single-session mode writes new
// pointers.
if (!resumeSessionId) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(dir)
}
// Worktree mode requires either git or WorktreeCreate/WorktreeRemove hooks.
// Only reachable via explicit --spawn=worktree (default is same-dir);
// saved worktree pref was already guarded above.
if (spawnMode === 'worktree' && !worktreeAvailable) {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Worktree mode requires a git repository or WorktreeCreate hooks configured. Use --spawn=session for single-session mode.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const machineName = hostname()
const bridgeId = randomUUID()
const { handleOAuth401Error } = await import('../utils/auth.js')
const api = createBridgeApiClient({
baseUrl,
getAccessToken: getBridgeAccessToken,
runnerVersion: MACRO.VERSION,
onDebug: logForDebugging,
onAuth401: handleOAuth401Error,
getTrustedDeviceToken,
})
// When resuming a session via --session-id, fetch it to learn its
// environment_id and reuse that for registration (idempotent on the
// backend). Left undefined otherwise β the backend rejects
// client-generated UUIDs and will allocate a fresh environment.
// feature('KAIROS') gate: --session-id is ant-only; parseArgs already
// rejects the flag when the gate is off, so resumeSessionId is always
// undefined here in external builds β this guard is for tree-shaking.
let reuseEnvironmentId: string | undefined
if (feature('KAIROS') && resumeSessionId) {
try {
validateBridgeId(resumeSessionId, 'sessionId')
} catch {
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Invalid session ID "${resumeSessionId}". Session IDs must not contain unsafe characters.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Proactively refresh the OAuth token β getBridgeSession uses raw axios
// without the withOAuthRetry 401-refresh logic. An expired-but-present
// token would otherwise produce a misleading "not found" error.
await checkAndRefreshOAuthTokenIfNeeded()
clearOAuthTokenCache()
const { getBridgeSession } = await import('./createSession.js')
const session = await getBridgeSession(resumeSessionId, {
baseUrl,
getAccessToken: getBridgeAccessToken,
})
if (!session) {
// Session gone on server β pointer is stale. Clear it so the user
// isn't re-prompted next launch. (Explicit --session-id leaves the
// pointer alone β it's an independent file they may not even have.)
// resumePointerDir may be a worktree sibling β clear THAT file.
if (resumePointerDir) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(resumePointerDir)
}
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Session ${resumeSessionId} not found. It may have been archived or expired, or your login may have lapsed (run \`claude /login\`).`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
if (!session.environment_id) {
if (resumePointerDir) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(resumePointerDir)
}
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
`Error: Session ${resumeSessionId} has no environment_id. It may never have been attached to a bridge.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
reuseEnvironmentId = session.environment_id
logForDebugging(
`[bridge:init] Resuming session ${resumeSessionId} on environment ${reuseEnvironmentId}`,
)
}
const config: BridgeConfig = {
dir,
machineName,
branch,
gitRepoUrl,
maxSessions,
spawnMode,
verbose,
sandbox,
bridgeId,
workerType: 'claude_code',
environmentId: randomUUID(),
reuseEnvironmentId,
apiBaseUrl: baseUrl,
sessionIngressUrl,
debugFile,
sessionTimeoutMs,
}
logForDebugging(
`[bridge:init] bridgeId=${bridgeId}${reuseEnvironmentId ? ` reuseEnvironmentId=${reuseEnvironmentId}` : ''} dir=${dir} branch=${branch} gitRepoUrl=${gitRepoUrl} machine=${machineName}`,
)
logForDebugging(
`[bridge:init] apiBaseUrl=${baseUrl} sessionIngressUrl=${sessionIngressUrl}`,
)
logForDebugging(
`[bridge:init] sandbox=${sandbox}${debugFile ? ` debugFile=${debugFile}` : ''}`,
)
// Register the bridge environment before entering the poll loop.
let environmentId: string
let environmentSecret: string
try {
const reg = await api.registerBridgeEnvironment(config)
environmentId = reg.environment_id
environmentSecret = reg.environment_secret
} catch (err) {
logEvent('tengu_bridge_registration_failed', {
status: err instanceof BridgeFatalError ? err.status : undefined,
})
// Registration failures are fatal β print a clean message instead of a stack trace.
// biome-ignore lint/suspicious/noConsole:: intentional console output
console.error(
err instanceof BridgeFatalError && err.status === 404
? 'Remote Control environments are not available for your account.'
: `Error: ${errorMessage(err)}`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
// Tracks whether the --session-id resume flow completed successfully.
// Used below to skip fresh session creation and seed initialSessionId.
// Cleared on env mismatch so we gracefully fall back to a new session.
let effectiveResumeSessionId: string | undefined
if (feature('KAIROS') && resumeSessionId) {
if (reuseEnvironmentId && environmentId !== reuseEnvironmentId) {
// Backend returned a different environment_id β the original env
// expired or was reaped. Reconnect won't work against the new env
// (session is bound to the old one). Log to sentry for visibility
// and fall through to fresh session creation on the new env.
logError(
new Error(
`Bridge resume env mismatch: requested ${reuseEnvironmentId}, backend returned ${environmentId}. Falling back to fresh session.`,
),
)
// biome-ignore lint/suspicious/noConsole: intentional warning output
console.warn(
`Warning: Could not resume session ${resumeSessionId} β its environment has expired. Creating a fresh session instead.`,
)
// Don't deregister β we're going to use this new environment.
// effectiveResumeSessionId stays undefined β fresh session path below.
} else {
// Force-stop any stale worker instances for this session and re-queue
// it so our poll loop picks it up. Must happen after registration so
// the backend knows a live worker exists for the environment.
//
// The pointer stores a session_* ID but /bridge/reconnect looks
// sessions up by their infra tag (cse_*) when ccr_v2_compat_enabled
// is on. Try both; the conversion is a no-op if already cse_*.
const infraResumeId = toInfraSessionId(resumeSessionId)
const reconnectCandidates =
infraResumeId === resumeSessionId
? [resumeSessionId]
: [resumeSessionId, infraResumeId]
let reconnected = false
let lastReconnectErr: unknown
for (const candidateId of reconnectCandidates) {
try {
await api.reconnectSession(environmentId, candidateId)
logForDebugging(
`[bridge:init] Session ${candidateId} re-queued via bridge/reconnect`,
)
effectiveResumeSessionId = resumeSessionId
reconnected = true
break
} catch (err) {
lastReconnectErr = err
logForDebugging(
`[bridge:init] reconnectSession(${candidateId}) failed: ${errorMessage(err)}`,
)
}
}
if (!reconnected) {
const err = lastReconnectErr
// Do NOT deregister on transient reconnect failure β at this point
// environmentId IS the session's own environment. Deregistering
// would make retry impossible. The backend's 4h TTL cleans up.
const isFatal = err instanceof BridgeFatalError
// Clear pointer only on fatal reconnect failure. Transient failures
// ("try running the same command again") should keep the pointer so
// next launch re-prompts β that IS the retry mechanism.
if (resumePointerDir && isFatal) {
const { clearBridgePointer } = await import('./bridgePointer.js')
await clearBridgePointer(resumePointerDir)
}
// biome-ignore lint/suspicious/noConsole: intentional error output
console.error(
isFatal
? `Error: ${errorMessage(err)}`
: `Error: Failed to reconnect session ${resumeSessionId}: ${errorMessage(err)}\nThe session may still be resumable β try running the same command again.`,
)
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(1)
}
}
}
logForDebugging(
`[bridge:init] Registered, server environmentId=${environmentId}`,
)
const startupPollConfig = getPollIntervalConfig()
logEvent('tengu_bridge_started', {
max_sessions: config.maxSessions,
has_debug_file: !!config.debugFile,
sandbox: config.sandbox,
verbose: config.verbose,
heartbeat_interval_ms:
startupPollConfig.non_exclusive_heartbeat_interval_ms,
spawn_mode:
config.spawnMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
spawn_mode_source:
spawnModeSource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
multi_session_gate: multiSessionEnabled,
pre_create_session: preCreateSession,
worktree_available: worktreeAvailable,
})
logForDiagnosticsNoPII('info', 'bridge_started', {
max_sessions: config.maxSessions,
sandbox: config.sandbox,
spawn_mode: config.spawnMode,
})
const spawner = createSessionSpawner({
execPath: process.execPath,
scriptArgs: spawnScriptArgs(),
env: process.env,
verbose,
sandbox,
debugFile,
permissionMode,
onDebug: logForDebugging,
onActivity: (sessionId, activity) => {
logForDebugging(
`[bridge:activity] sessionId=${sessionId} ${activity.type} ${activity.summary}`,
)
},
onPermissionRequest: (sessionId, request, _accessToken) => {
logForDebugging(
`[bridge:perm] sessionId=${sessionId} tool=${request.request.tool_name} request_id=${request.request_id} (not auto-approving)`,
)
},
})
const logger = createBridgeLogger({ verbose })
const { parseGitHubRepository } = await import('../utils/detectRepository.js')
const ownerRepo = gitRepoUrl ? parseGitHubRepository(gitRepoUrl) : null
// Use the repo name from the parsed owner/repo, or fall back to the dir basename
const repoName = ownerRepo ? ownerRepo.split('/').pop()! : basename(dir)
logger.setRepoInfo(repoName, branch)
// `w` toggle is available iff we're in a multi-session mode AND worktree
// is a valid option. When unavailable, the mode suffix and hint are hidden.
const toggleAvailable = spawnMode !== 'single-session' && worktreeAvailable
if (toggleAvailable) {
// Safe cast: spawnMode is not single-session (checked above), and the
// saved-worktree-in-non-git guard + exit check above ensure worktree
// is only reached when available.
logger.setSpawnModeDisplay(spawnMode as 'same-dir' | 'worktree')
}
// Listen for keys: space toggles QR code, w toggles spawn mode
const onStdinData = (data: Buffer): void => {
if (data[0] === 0x03 || data[0] === 0x04) {
// Ctrl+C / Ctrl+D β trigger graceful shutdown
process.emit('SIGINT')
return
}
if (data[0] === 0x20 /* space */) {
logger.toggleQr()
return
}
if (data[0] === 0x77 /* 'w' */) {
if (!toggleAvailable) return
const newMode: 'same-dir' | 'worktree' =
config.spawnMode === 'same-dir' ? 'worktree' : 'same-dir'
config.spawnMode = newMode
logEvent('tengu_bridge_spawn_mode_toggled', {
spawn_mode:
newMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
logger.logStatus(
newMode === 'worktree'
? 'Spawn mode: worktree (new sessions get isolated git worktrees)'
: 'Spawn mode: same-dir (new sessions share the current directory)',
)
logger.setSpawnModeDisplay(newMode)
logger.refreshDisplay()
saveCurrentProjectConfig(current => {
if (current.remoteControlSpawnMode === newMode) return current
return { ...current, remoteControlSpawnMode: newMode }
})
return
}
}
if (process.stdin.isTTY) {
process.stdin.setRawMode(true)
process.stdin.resume()
process.stdin.on('data', onStdinData)
}
const controller = new AbortController()
const onSigint = (): void => {
logForDebugging('[bridge:shutdown] SIGINT received, shutting down')
controller.abort()
}
const onSigterm = (): void => {
logForDebugging('[bridge:shutdown] SIGTERM received, shutting down')
controller.abort()
}
process.on('SIGINT', onSigint)
process.on('SIGTERM', onSigterm)
// Auto-create an empty session so the user has somewhere to type
// immediately (matching /remote-control behavior). Controlled by
// preCreateSession: on by default; --no-create-session-in-dir opts out.
// When a --session-id resume succeeded, skip creation entirely β the
// session already exists and bridge/reconnect has re-queued it.
// When resume was requested but failed on env mismatch, effectiveResumeSessionId
// is undefined, so we fall through to fresh session creation (honoring the
// "Creating a fresh session instead" warning printed above).
let initialSessionId: string | null =
feature('KAIROS') && effectiveResumeSessionId
? effectiveResumeSessionId
: null
if (preCreateSession && !(feature('KAIROS') && effectiveResumeSessionId)) {
const { createBridgeSession } = await import('./createSession.js')
try {
initialSessionId = await createBridgeSession({
environmentId,
title: name,
events: [],
gitRepoUrl,
branch,
signal: controller.signal,
baseUrl,
getAccessToken: getBridgeAccessToken,
permissionMode,
})
if (initialSessionId) {
logForDebugging(
`[bridge:init] Created initial session ${initialSessionId}`,
)
}
} catch (err) {
logForDebugging(
`[bridge:init] Session creation failed (non-fatal): ${errorMessage(err)}`,
)
}
}
// Crash-recovery pointer: write immediately so kill -9 at any point
// after this leaves a recoverable trail. Covers both fresh sessions and
// resumed ones (so a second crash after resume is still recoverable).
// Cleared when runBridgeLoop falls through to archive+deregister; left in
// place on the SIGINT resumable-shutdown return (backup for when the user
// closes the terminal before copying the printed --session-id hint).
// Refreshed hourly so a 5h+ session that crashes still has a fresh
// pointer (staleness checks file mtime, backend TTL is rolling-from-poll).
let pointerRefreshTimer: ReturnType<typeof setInterval> | null = null
// Single-session only: --continue forces single-session mode on resume,
// so a pointer written in multi-session mode would contradict the user's
// config when they try to resume. The resumable-shutdown path is also
// gated to single-session (line ~1254) so the pointer would be orphaned.
if (initialSessionId && spawnMode === 'single-session') {
const { writeBridgePointer } = await import('./bridgePointer.js')
const pointerPayload = {
sessionId: initialSessionId,
environmentId,
source: 'standalone' as const,
}
await writeBridgePointer(config.dir, pointerPayload)
pointerRefreshTimer = setInterval(
writeBridgePointer,
60 * 60 * 1000,
config.dir,
pointerPayload,
)
// Don't let the interval keep the process alive on its own.
pointerRefreshTimer.unref?.()
}
try {
await runBridgeLoop(
config,
environmentId,
environmentSecret,
api,
spawner,
logger,
controller.signal,
undefined,
initialSessionId ?? undefined,
async () => {
// Clear the memoized OAuth token cache so we re-read from secure
// storage, picking up tokens refreshed by child processes.
clearOAuthTokenCache()
// Proactively refresh the token if it's expired on disk too.
await checkAndRefreshOAuthTokenIfNeeded()
return getBridgeAccessToken()
},
)
} finally {
if (pointerRefreshTimer !== null) {
clearInterval(pointerRefreshTimer)
}
process.off('SIGINT', onSigint)
process.off('SIGTERM', onSigterm)
process.stdin.off('data', onStdinData)
if (process.stdin.isTTY) {
process.stdin.setRawMode(false)
}
process.stdin.pause()
}
// The bridge bypasses init.ts (and its graceful shutdown handler), so we
// must exit explicitly.
// eslint-disable-next-line custom-rules/no-process-exit
process.exit(0)
}
// βββ Headless bridge (daemon worker) ββββββββββββββββββββββββββββββββββββββββ
/**
* Thrown by runBridgeHeadless for configuration issues the supervisor should
* NOT retry (trust not accepted, worktree unavailable, http-not-https). The
* daemon worker catches this and exits with EXIT_CODE_PERMANENT so the
* supervisor parks the worker instead of respawning it on backoff.
*/
export class BridgeHeadlessPermanentError extends Error {
constructor(message: string) {
super(message)
this.name = 'BridgeHeadlessPermanentError'
}
}
export type HeadlessBridgeOpts = {
dir: string
name?: string
spawnMode: 'same-dir' | 'worktree'
capacity: number
permissionMode?: string
sandbox: boolean
sessionTimeoutMs?: number
createSessionOnStart: boolean
getAccessToken: () => string | undefined
onAuth401: (failedToken: string) => Promise<boolean>
log: (s: string) => void
}
/**
* Non-interactive bridge entrypoint for the `remoteControl` daemon worker.
*
* Linear subset of bridgeMain(): no readline dialogs, no stdin key handlers,
* no TUI, no process.exit(). Config comes from the caller (daemon.json), auth
* comes via IPC (supervisor's AuthManager), logs go to the worker's stdout
* pipe. Throws on fatal errors β the worker catches and maps permanent vs
* transient to the right exit code.
*
* Resolves cleanly when `signal` aborts and the poll loop tears down.
*/
export async function runBridgeHeadless(
opts: HeadlessBridgeOpts,
signal: AbortSignal,
): Promise<void> {
const { dir, log } = opts
// Worker inherits the supervisor's CWD. chdir first so git utilities
// (getBranch/getRemoteUrl) β which read from bootstrap CWD state set
// below β resolve against the right repo.
process.chdir(dir)
const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
setOriginalCwd(dir)
setCwdState(dir)
const { enableConfigs, checkHasTrustDialogAccepted } = await import(
'../utils/config.js'
)
enableConfigs()
const { initSinks } = await import('../utils/sinks.js')
initSinks()
if (!checkHasTrustDialogAccepted()) {
throw new BridgeHeadlessPermanentError(
`Workspace not trusted: ${dir}. Run \`claude\` in that directory first to accept the trust dialog.`,
)
}
if (!opts.getAccessToken()) {
// Transient β supervisor's AuthManager may pick up a token on next cycle.
throw new Error(BRIDGE_LOGIN_ERROR)
}
const { getBridgeBaseUrl } = await import('./bridgeConfig.js')
const baseUrl = getBridgeBaseUrl()
if (
baseUrl.startsWith('http://') &&
!baseUrl.includes('localhost') &&
!baseUrl.includes('127.0.0.1')
) {
throw new BridgeHeadlessPermanentError(
'Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
)
}
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
const { getBranch, getRemoteUrl, findGitRoot } = await import(
'../utils/git.js'
)
const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
if (opts.spawnMode === 'worktree') {
const worktreeAvailable =
hasWorktreeCreateHook() || findGitRoot(dir) !== null
if (!worktreeAvailable) {
throw new BridgeHeadlessPermanentError(
`Worktree mode requires a git repository or WorktreeCreate hooks. Directory ${dir} has neither.`,
)
}
}
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const machineName = hostname()
const bridgeId = randomUUID()
const config: BridgeConfig = {
dir,
machineName,
branch,
gitRepoUrl,
maxSessions: opts.capacity,
spawnMode: opts.spawnMode,
verbose: false,
sandbox: opts.sandbox,
bridgeId,
workerType: 'claude_code',
environmentId: randomUUID(),
apiBaseUrl: baseUrl,
sessionIngressUrl,
sessionTimeoutMs: opts.sessionTimeoutMs,
}
const api = createBridgeApiClient({
baseUrl,
getAccessToken: opts.getAccessToken,
runnerVersion: MACRO.VERSION,
onDebug: log,
onAuth401: opts.onAuth401,
getTrustedDeviceToken,
})
let environmentId: string
let environmentSecret: string
try {
const reg = await api.registerBridgeEnvironment(config)
environmentId = reg.environment_id
environmentSecret = reg.environment_secret
} catch (err) {
// Transient β let supervisor backoff-retry.
throw new Error(`Bridge registration failed: ${errorMessage(err)}`)
}
const spawner = createSessionSpawner({
execPath: process.execPath,
scriptArgs: spawnScriptArgs(),
env: process.env,
verbose: false,
sandbox: opts.sandbox,
permissionMode: opts.permissionMode,
onDebug: log,
})
const logger = createHeadlessBridgeLogger(log)
logger.printBanner(config, environmentId)
let initialSessionId: string | undefined
if (opts.createSessionOnStart) {
const { createBridgeSession } = await import('./createSession.js')
try {
const sid = await createBridgeSession({
environmentId,
title: opts.name,
events: [],
gitRepoUrl,
branch,
signal,
baseUrl,
getAccessToken: opts.getAccessToken,
permissionMode: opts.permissionMode,
})
if (sid) {
initialSessionId = sid
log(`created initial session ${sid}`)
}
} catch (err) {
log(`session pre-creation failed (non-fatal): ${errorMessage(err)}`)
}
}
await runBridgeLoop(
config,
environmentId,
environmentSecret,
api,
spawner,
logger,
signal,
undefined,
initialSessionId,
async () => opts.getAccessToken(),
)
}
/** BridgeLogger adapter that routes everything to a single line-log fn. */
function createHeadlessBridgeLogger(log: (s: string) => void): BridgeLogger {
const noop = (): void => {}
return {
printBanner: (cfg, envId) =>
log(
`registered environmentId=${envId} dir=${cfg.dir} spawnMode=${cfg.spawnMode} capacity=${cfg.maxSessions}`,
),
logSessionStart: (id, _prompt) => log(`session start ${id}`),
logSessionComplete: (id, ms) => log(`session complete ${id} (${ms}ms)`),
logSessionFailed: (id, err) => log(`session failed ${id}: ${err}`),
logStatus: log,
logVerbose: log,
logError: s => log(`error: ${s}`),
logReconnected: ms => log(`reconnected after ${ms}ms`),
addSession: (id, _url) => log(`session attached ${id}`),
removeSession: id => log(`session detached ${id}`),
updateIdleStatus: noop,
updateReconnectingStatus: noop,
updateSessionStatus: noop,
updateSessionActivity: noop,
updateSessionCount: noop,
updateFailedStatus: noop,
setSpawnModeDisplay: noop,
setRepoInfo: noop,
setDebugLogPath: noop,
setAttached: noop,
setSessionTitle: noop,
clearStatus: noop,
toggleQr: noop,
refreshDisplay: noop,
}
}