π File detail
utils/sessionStoragePortable.ts
π― Use case
This file lives under βutils/β, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, β¦). On the API surface it exposes LITE_READ_BUF_SIZE, validateUuid, unescapeJsonString, extractJsonStringField, and extractLastJsonStringField (and more) β mainly functions, hooks, or classes. Dependencies touch crypto, Node filesystem, and Node path helpers. It composes internal code from envUtils, getWorktreePathsPortable, and hash (relative imports). What the file header says: Portable session storage utilities. Pure Node.js β no internal dependencies on logging, experiments, or feature flags. Shared between the CLI (src/utils/sessionStorage.ts) and the VS Code extension (packages/claude-vscode/src/common-host/sessionStorage.ts).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
Portable session storage utilities. Pure Node.js β no internal dependencies on logging, experiments, or feature flags. Shared between the CLI (src/utils/sessionStorage.ts) and the VS Code extension (packages/claude-vscode/src/common-host/sessionStorage.ts).
π€ Exports (heuristic)
LITE_READ_BUF_SIZEvalidateUuidunescapeJsonStringextractJsonStringFieldextractLastJsonStringFieldextractFirstPromptFromHeadreadHeadAndTailLiteSessionFilereadSessionLiteMAX_SANITIZED_LENGTHsanitizePathgetProjectsDirgetProjectDircanonicalizePathfindProjectDirresolveSessionFilePathSKIP_PRECOMPACT_THRESHOLDreadTranscriptForLoad
π External import roots
Package roots from from "β¦" (relative paths omitted).
cryptofspath
π₯οΈ Source preview
/**
* Portable session storage utilities.
*
* Pure Node.js β no internal dependencies on logging, experiments, or feature
* flags. Shared between the CLI (src/utils/sessionStorage.ts) and the VS Code
* extension (packages/claude-vscode/src/common-host/sessionStorage.ts).
*/
import type { UUID } from 'crypto'
import { open as fsOpen, readdir, realpath, stat } from 'fs/promises'
import { join } from 'path'
import { getClaudeConfigHomeDir } from './envUtils.js'
import { getWorktreePathsPortable } from './getWorktreePathsPortable.js'
import { djb2Hash } from './hash.js'
/** Size of the head/tail buffer for lite metadata reads. */
export const LITE_READ_BUF_SIZE = 65536
// ---------------------------------------------------------------------------
// UUID validation
// ---------------------------------------------------------------------------
const uuidRegex =
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
export function validateUuid(maybeUuid: unknown): UUID | null {
if (typeof maybeUuid !== 'string') return null
return uuidRegex.test(maybeUuid) ? (maybeUuid as UUID) : null
}
// ---------------------------------------------------------------------------
// JSON string field extraction β no full parse, works on truncated lines
// ---------------------------------------------------------------------------
/**
* Unescape a JSON string value extracted as raw text.
* Only allocates a new string when escape sequences are present.
*/
export function unescapeJsonString(raw: string): string {
if (!raw.includes('\\')) return raw
try {
return JSON.parse(`"${raw}"`)
} catch {
return raw
}
}
/**
* Extracts a simple JSON string field value from raw text without full parsing.
* Looks for `"key":"value"` or `"key": "value"` patterns.
* Returns the first match, or undefined if not found.
*/
export function extractJsonStringField(
text: string,
key: string,
): string | undefined {
const patterns = [`"${key}":"`, `"${key}": "`]
for (const pattern of patterns) {
const idx = text.indexOf(pattern)
if (idx < 0) continue
const valueStart = idx + pattern.length
let i = valueStart
while (i < text.length) {
if (text[i] === '\\') {
i += 2
continue
}
if (text[i] === '"') {
return unescapeJsonString(text.slice(valueStart, i))
}
i++
}
}
return undefined
}
/**
* Like extractJsonStringField but finds the LAST occurrence.
* Useful for fields that are appended (customTitle, tag, etc.).
*/
export function extractLastJsonStringField(
text: string,
key: string,
): string | undefined {
const patterns = [`"${key}":"`, `"${key}": "`]
let lastValue: string | undefined
for (const pattern of patterns) {
let searchFrom = 0
while (true) {
const idx = text.indexOf(pattern, searchFrom)
if (idx < 0) break
const valueStart = idx + pattern.length
let i = valueStart
while (i < text.length) {
if (text[i] === '\\') {
i += 2
continue
}
if (text[i] === '"') {
lastValue = unescapeJsonString(text.slice(valueStart, i))
break
}
i++
}
searchFrom = i + 1
}
}
return lastValue
}
// ---------------------------------------------------------------------------
// First prompt extraction from head chunk
// ---------------------------------------------------------------------------
/**
* Pattern matching auto-generated or system messages that should be skipped
* when looking for the first meaningful user prompt. Matches anything that
* starts with a lowercase XML-like tag (IDE context, hook output, task
* notifications, channel messages, etc.) or a synthetic interrupt marker.
*/
const SKIP_FIRST_PROMPT_PATTERN =
/^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/
const COMMAND_NAME_RE = /<command-name>(.*?)<\/command-name>/
/**
* Extracts the first meaningful user prompt from a JSONL head chunk.
*
* Skips tool_result messages, isMeta, isCompactSummary, command-name messages,
* and auto-generated patterns (session hooks, tick, IDE metadata, etc.).
* Truncates to 200 chars.
*/
export function extractFirstPromptFromHead(head: string): string {
let start = 0
let commandFallback = ''
while (start < head.length) {
const newlineIdx = head.indexOf('\n', start)
const line =
newlineIdx >= 0 ? head.slice(start, newlineIdx) : head.slice(start)
start = newlineIdx >= 0 ? newlineIdx + 1 : head.length
if (!line.includes('"type":"user"') && !line.includes('"type": "user"'))
continue
if (line.includes('"tool_result"')) continue
if (line.includes('"isMeta":true') || line.includes('"isMeta": true'))
continue
if (
line.includes('"isCompactSummary":true') ||
line.includes('"isCompactSummary": true')
)
continue
try {
const entry = JSON.parse(line) as Record<string, unknown>
if (entry.type !== 'user') continue
const message = entry.message as Record<string, unknown> | undefined
if (!message) continue
const content = message.content
const texts: string[] = []
if (typeof content === 'string') {
texts.push(content)
} else if (Array.isArray(content)) {
for (const block of content as Record<string, unknown>[]) {
if (block.type === 'text' && typeof block.text === 'string') {
texts.push(block.text as string)
}
}
}
for (const raw of texts) {
let result = raw.replace(/\n/g, ' ').trim()
if (!result) continue
// Skip slash-command messages but remember first as fallback
const cmdMatch = COMMAND_NAME_RE.exec(result)
if (cmdMatch) {
if (!commandFallback) commandFallback = cmdMatch[1]!
continue
}
// Format bash input with ! prefix before the generic XML skip
const bashMatch = /<bash-input>([\s\S]*?)<\/bash-input>/.exec(result)
if (bashMatch) return `! ${bashMatch[1]!.trim()}`
if (SKIP_FIRST_PROMPT_PATTERN.test(result)) continue
if (result.length > 200) {
result = result.slice(0, 200).trim() + '\u2026'
}
return result
}
} catch {
continue
}
}
if (commandFallback) return commandFallback
return ''
}
// ---------------------------------------------------------------------------
// File I/O β read head and tail of a file
// ---------------------------------------------------------------------------
/**
* Reads the first and last LITE_READ_BUF_SIZE bytes of a file.
*
* For small files where head covers tail, `tail === head`.
* Accepts a shared Buffer to avoid per-file allocation overhead.
* Returns `{ head: '', tail: '' }` on any error.
*/
export async function readHeadAndTail(
filePath: string,
fileSize: number,
buf: Buffer,
): Promise<{ head: string; tail: string }> {
try {
const fh = await fsOpen(filePath, 'r')
try {
const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0)
if (headResult.bytesRead === 0) return { head: '', tail: '' }
const head = buf.toString('utf8', 0, headResult.bytesRead)
const tailOffset = Math.max(0, fileSize - LITE_READ_BUF_SIZE)
let tail = head
if (tailOffset > 0) {
const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset)
tail = buf.toString('utf8', 0, tailResult.bytesRead)
}
return { head, tail }
} finally {
await fh.close()
}
} catch {
return { head: '', tail: '' }
}
}
export type LiteSessionFile = {
mtime: number
size: number
head: string
tail: string
}
/**
* Opens a single session file, stats it, and reads head + tail in one fd.
* Allocates its own buffer β safe for concurrent use with Promise.all.
* Returns null on any error.
*/
export async function readSessionLite(
filePath: string,
): Promise<LiteSessionFile | null> {
try {
const fh = await fsOpen(filePath, 'r')
try {
const stat = await fh.stat()
const buf = Buffer.allocUnsafe(LITE_READ_BUF_SIZE)
const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0)
if (headResult.bytesRead === 0) return null
const head = buf.toString('utf8', 0, headResult.bytesRead)
const tailOffset = Math.max(0, stat.size - LITE_READ_BUF_SIZE)
let tail = head
if (tailOffset > 0) {
const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset)
tail = buf.toString('utf8', 0, tailResult.bytesRead)
}
return { mtime: stat.mtime.getTime(), size: stat.size, head, tail }
} finally {
await fh.close()
}
} catch {
return null
}
}
// ---------------------------------------------------------------------------
// Path sanitization
// ---------------------------------------------------------------------------
/**
* Maximum length for a single filesystem path component (directory or file name).
* Most filesystems (ext4, APFS, NTFS) limit individual components to 255 bytes.
* We use 200 to leave room for the hash suffix and separator.
*/
export const MAX_SANITIZED_LENGTH = 200
function simpleHash(str: string): string {
return Math.abs(djb2Hash(str)).toString(36)
}
/**
* Makes a string safe for use as a directory or file name.
* Replaces all non-alphanumeric characters with hyphens.
* This ensures compatibility across all platforms, including Windows
* where characters like colons are reserved.
*
* For deeply nested paths that would exceed filesystem limits (255 bytes),
* truncates and appends a hash suffix for uniqueness.
*
* @param name - The string to make safe (e.g., '/Users/foo/my-project' or 'plugin:name:server')
* @returns A safe name (e.g., '-Users-foo-my-project' or 'plugin-name-server')
*/
export function sanitizePath(name: string): string {
const sanitized = name.replace(/[^a-zA-Z0-9]/g, '-')
if (sanitized.length <= MAX_SANITIZED_LENGTH) {
return sanitized
}
const hash =
typeof Bun !== 'undefined' ? Bun.hash(name).toString(36) : simpleHash(name)
return `${sanitized.slice(0, MAX_SANITIZED_LENGTH)}-${hash}`
}
// ---------------------------------------------------------------------------
// Project directory discovery (shared by listSessions & getSessionMessages)
// ---------------------------------------------------------------------------
export function getProjectsDir(): string {
return join(getClaudeConfigHomeDir(), 'projects')
}
export function getProjectDir(projectDir: string): string {
return join(getProjectsDir(), sanitizePath(projectDir))
}
/**
* Resolves a directory path to its canonical form using realpath + NFC
* normalization. Falls back to NFC-only if realpath fails (e.g., the
* directory doesn't exist yet). Ensures symlinked paths (e.g.,
* /tmp β /private/tmp on macOS) resolve to the same project directory.
*/
export async function canonicalizePath(dir: string): Promise<string> {
try {
return (await realpath(dir)).normalize('NFC')
} catch {
return dir.normalize('NFC')
}
}
/**
* Finds the project directory for a given path, tolerating hash mismatches
* for long paths (>200 chars). The CLI uses Bun.hash while the SDK under
* Node.js uses simpleHash β for paths that exceed MAX_SANITIZED_LENGTH,
* these produce different directory suffixes. This function falls back to
* prefix-based scanning when the exact match doesn't exist.
*/
export async function findProjectDir(
projectPath: string,
): Promise<string | undefined> {
const exact = getProjectDir(projectPath)
try {
await readdir(exact)
return exact
} catch {
// Exact match failed β for short paths this means no sessions exist.
// For long paths, try prefix matching to handle hash mismatches.
const sanitized = sanitizePath(projectPath)
if (sanitized.length <= MAX_SANITIZED_LENGTH) {
return undefined
}
const prefix = sanitized.slice(0, MAX_SANITIZED_LENGTH)
const projectsDir = getProjectsDir()
try {
const dirents = await readdir(projectsDir, { withFileTypes: true })
const match = dirents.find(
d => d.isDirectory() && d.name.startsWith(prefix + '-'),
)
return match ? join(projectsDir, match.name) : undefined
} catch {
return undefined
}
}
}
/**
* Resolve a sessionId to its on-disk JSONL file path.
*
* When `dir` is provided: canonicalize it, look in that project's directory
* (with findProjectDir fallback for Bun/Node hash mismatches), then fall back
* to sibling git worktrees. `projectPath` in the result is the canonical
* user-facing directory the file was found under.
*
* When `dir` is omitted: scan all project directories under ~/.claude/projects/.
* `projectPath` is undefined in this case (no meaningful project path to report).
*
* Existence is checked by stat (operate-then-catch-ENOENT, no existsSync).
* Zero-byte files are treated as not-found so callers continue searching past
* a truncated copy to find a valid one in a sibling directory.
*
* `fileSize` is returned so callers (loadSessionBuffer) don't need to re-stat.
*
* Shared by getSessionInfoImpl and getSessionMessagesImpl β the caller
* invokes its own reader (readSessionLite / loadSessionBuffer) on the
* resolved path.
*/
export async function resolveSessionFilePath(
sessionId: string,
dir?: string,
): Promise<
| { filePath: string; projectPath: string | undefined; fileSize: number }
| undefined
> {
const fileName = `${sessionId}.jsonl`
if (dir) {
const canonical = await canonicalizePath(dir)
const projectDir = await findProjectDir(canonical)
if (projectDir) {
const filePath = join(projectDir, fileName)
try {
const s = await stat(filePath)
if (s.size > 0)
return { filePath, projectPath: canonical, fileSize: s.size }
} catch {
// ENOENT/EACCES β keep searching
}
}
// Worktree fallback β sessions may live under a different worktree root
let worktreePaths: string[]
try {
worktreePaths = await getWorktreePathsPortable(canonical)
} catch {
worktreePaths = []
}
for (const wt of worktreePaths) {
if (wt === canonical) continue
const wtProjectDir = await findProjectDir(wt)
if (!wtProjectDir) continue
const filePath = join(wtProjectDir, fileName)
try {
const s = await stat(filePath)
if (s.size > 0) return { filePath, projectPath: wt, fileSize: s.size }
} catch {
// ENOENT/EACCES β keep searching
}
}
return undefined
}
// No dir β scan all project directories
const projectsDir = getProjectsDir()
let dirents: string[]
try {
dirents = await readdir(projectsDir)
} catch {
return undefined
}
for (const name of dirents) {
const filePath = join(projectsDir, name, fileName)
try {
const s = await stat(filePath)
if (s.size > 0)
return { filePath, projectPath: undefined, fileSize: s.size }
} catch {
// ENOENT/ENOTDIR β not in this project, keep scanning
}
}
return undefined
}
// ---------------------------------------------------------------------------
// Compact-boundary chunked read (shared by loadTranscriptFile & SDK getSessionMessages)
// ---------------------------------------------------------------------------
/** Chunk size for the forward transcript reader. 1 MB balances I/O calls vs buffer growth. */
const TRANSCRIPT_READ_CHUNK_SIZE = 1024 * 1024
/**
* File size below which precompact filtering is skipped.
* Large sessions (>5 MB) almost always have compact boundaries β they got big
* because of many turns triggering auto-compact.
*/
export const SKIP_PRECOMPACT_THRESHOLD = 5 * 1024 * 1024
/** Marker bytes searched for when locating the boundary. Lazy: allocated on
* first use, not at module load. Most sessions never resume. */
let _compactBoundaryMarker: Buffer | undefined
function compactBoundaryMarker(): Buffer {
return (_compactBoundaryMarker ??= Buffer.from('"compact_boundary"'))
}
/**
* Confirm a byte-matched line is a real compact_boundary (marker can appear
* inside user content) and check for preservedSegment.
*/
function parseBoundaryLine(
line: string,
): { hasPreservedSegment: boolean } | null {
try {
const parsed = JSON.parse(line) as {
type?: string
subtype?: string
compactMetadata?: { preservedSegment?: unknown }
}
if (parsed.type !== 'system' || parsed.subtype !== 'compact_boundary') {
return null
}
return {
hasPreservedSegment: Boolean(parsed.compactMetadata?.preservedSegment),
}
} catch {
return null
}
}
/**
* Single forward chunked read for the --resume load path. Attr-snap lines
* are skipped at the fd level; compact boundaries truncate in-stream. Peak
* is the output size, not the file size.
*
* The surviving (last) attr-snap is appended at EOF instead of in-place;
* restoreAttributionStateFromSnapshots only reads [length-1] so position
* doesn't matter.
*/
type Sink = { buf: Buffer; len: number; cap: number }
function sinkWrite(s: Sink, src: Buffer, start: number, end: number): void {
const n = end - start
if (n <= 0) return
if (s.len + n > s.buf.length) {
const grown = Buffer.allocUnsafe(
Math.min(Math.max(s.buf.length * 2, s.len + n), s.cap),
)
s.buf.copy(grown, 0, 0, s.len)
s.buf = grown
}
src.copy(s.buf, s.len, start, end)
s.len += n
}
function hasPrefix(
src: Buffer,
prefix: Buffer,
at: number,
end: number,
): boolean {
return (
end - at >= prefix.length &&
src.compare(prefix, 0, prefix.length, at, at + prefix.length) === 0
)
}
const ATTR_SNAP_PREFIX = Buffer.from('{"type":"attribution-snapshot"')
const SYSTEM_PREFIX = Buffer.from('{"type":"system"')
const LF = 0x0a
const LF_BYTE = Buffer.from([LF])
const BOUNDARY_SEARCH_BOUND = 256 // marker sits ~28 bytes in; 256 is slack
type LoadState = {
out: Sink
boundaryStartOffset: number
hasPreservedSegment: boolean
lastSnapSrc: Buffer | null // most-recent attr-snap, appended at EOF
lastSnapLen: number
lastSnapBuf: Buffer | undefined
bufFileOff: number // file offset of buf[0]
carryLen: number
carryBuf: Buffer | undefined
straddleSnapCarryLen: number // per-chunk; reset by processStraddle
straddleSnapTailEnd: number
}
// Line spanning the chunk seam. 0 = fall through to concat.
function processStraddle(
s: LoadState,
chunk: Buffer,
bytesRead: number,
): number {
s.straddleSnapCarryLen = 0
s.straddleSnapTailEnd = 0
if (s.carryLen === 0) return 0
const cb = s.carryBuf!
const firstNl = chunk.indexOf(LF)
if (firstNl === -1 || firstNl >= bytesRead) return 0
const tailEnd = firstNl + 1
if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) {
s.straddleSnapCarryLen = s.carryLen
s.straddleSnapTailEnd = tailEnd
s.lastSnapSrc = null
} else if (s.carryLen < ATTR_SNAP_PREFIX.length) {
return 0 // too short to rule out attr-snap
} else {
if (hasPrefix(cb, SYSTEM_PREFIX, 0, s.carryLen)) {
const hit = parseBoundaryLine(
cb.toString('utf-8', 0, s.carryLen) +
chunk.toString('utf-8', 0, firstNl),
)
if (hit?.hasPreservedSegment) {
s.hasPreservedSegment = true
} else if (hit) {
s.out.len = 0
s.boundaryStartOffset = s.bufFileOff
s.hasPreservedSegment = false
s.lastSnapSrc = null
}
}
sinkWrite(s.out, cb, 0, s.carryLen)
sinkWrite(s.out, chunk, 0, tailEnd)
}
s.bufFileOff += s.carryLen + tailEnd
s.carryLen = 0
return tailEnd
}
// Strip attr-snaps, truncate on boundaries. Kept lines write as runs.
function scanChunkLines(
s: LoadState,
buf: Buffer,
boundaryMarker: Buffer,
): { lastSnapStart: number; lastSnapEnd: number; trailStart: number } {
let boundaryAt = buf.indexOf(boundaryMarker)
let runStart = 0
let lineStart = 0
let lastSnapStart = -1
let lastSnapEnd = -1
let nl = buf.indexOf(LF)
while (nl !== -1) {
const lineEnd = nl + 1
if (boundaryAt !== -1 && boundaryAt < lineStart) {
boundaryAt = buf.indexOf(boundaryMarker, lineStart)
}
if (hasPrefix(buf, ATTR_SNAP_PREFIX, lineStart, lineEnd)) {
sinkWrite(s.out, buf, runStart, lineStart)
lastSnapStart = lineStart
lastSnapEnd = lineEnd
runStart = lineEnd
} else if (
boundaryAt >= lineStart &&
boundaryAt < Math.min(lineStart + BOUNDARY_SEARCH_BOUND, lineEnd)
) {
const hit = parseBoundaryLine(buf.toString('utf-8', lineStart, nl))
if (hit?.hasPreservedSegment) {
s.hasPreservedSegment = true // don't truncate; preserved msgs already in output
} else if (hit) {
s.out.len = 0
s.boundaryStartOffset = s.bufFileOff + lineStart
s.hasPreservedSegment = false
s.lastSnapSrc = null
lastSnapStart = -1
s.straddleSnapCarryLen = 0
runStart = lineStart
}
boundaryAt = buf.indexOf(
boundaryMarker,
boundaryAt + boundaryMarker.length,
)
}
lineStart = lineEnd
nl = buf.indexOf(LF, lineStart)
}
sinkWrite(s.out, buf, runStart, lineStart)
return { lastSnapStart, lastSnapEnd, trailStart: lineStart }
}
// In-buf snap wins over straddle (later in file). carryBuf still valid here.
function captureSnap(
s: LoadState,
buf: Buffer,
chunk: Buffer,
lastSnapStart: number,
lastSnapEnd: number,
): void {
if (lastSnapStart !== -1) {
s.lastSnapLen = lastSnapEnd - lastSnapStart
if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) {
s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen)
}
buf.copy(s.lastSnapBuf, 0, lastSnapStart, lastSnapEnd)
s.lastSnapSrc = s.lastSnapBuf
} else if (s.straddleSnapCarryLen > 0) {
s.lastSnapLen = s.straddleSnapCarryLen + s.straddleSnapTailEnd
if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) {
s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen)
}
s.carryBuf!.copy(s.lastSnapBuf, 0, 0, s.straddleSnapCarryLen)
chunk.copy(s.lastSnapBuf, s.straddleSnapCarryLen, 0, s.straddleSnapTailEnd)
s.lastSnapSrc = s.lastSnapBuf
}
}
function captureCarry(s: LoadState, buf: Buffer, trailStart: number): void {
s.carryLen = buf.length - trailStart
if (s.carryLen > 0) {
if (s.carryBuf === undefined || s.carryLen > s.carryBuf.length) {
s.carryBuf = Buffer.allocUnsafe(s.carryLen)
}
buf.copy(s.carryBuf, 0, trailStart, buf.length)
}
}
function finalizeOutput(s: LoadState): void {
if (s.carryLen > 0) {
const cb = s.carryBuf!
if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) {
s.lastSnapSrc = cb
s.lastSnapLen = s.carryLen
} else {
sinkWrite(s.out, cb, 0, s.carryLen)
}
}
if (s.lastSnapSrc) {
if (s.out.len > 0 && s.out.buf[s.out.len - 1] !== LF) {
sinkWrite(s.out, LF_BYTE, 0, 1)
}
sinkWrite(s.out, s.lastSnapSrc, 0, s.lastSnapLen)
}
}
export async function readTranscriptForLoad(
filePath: string,
fileSize: number,
): Promise<{
boundaryStartOffset: number
postBoundaryBuf: Buffer
hasPreservedSegment: boolean
}> {
const boundaryMarker = compactBoundaryMarker()
const CHUNK_SIZE = TRANSCRIPT_READ_CHUNK_SIZE
const s: LoadState = {
out: {
// Gated callers enter with fileSize > 5MB, so min(fileSize, 8MB) lands
// in [5, 8]MB; large boundaryless sessions (24-31MB output) take 2
// grows. Ungated callers (attribution.ts) pass small files too β the
// min just right-sizes the initial buf, no grows.
buf: Buffer.allocUnsafe(Math.min(fileSize, 8 * 1024 * 1024)),
len: 0,
// +1: finalizeOutput may insert one LF between a non-LF-terminated
// carry and the reordered last attr-snap (crash-truncated file).
cap: fileSize + 1,
},
boundaryStartOffset: 0,
hasPreservedSegment: false,
lastSnapSrc: null,
lastSnapLen: 0,
lastSnapBuf: undefined,
bufFileOff: 0,
carryLen: 0,
carryBuf: undefined,
straddleSnapCarryLen: 0,
straddleSnapTailEnd: 0,
}
const chunk = Buffer.allocUnsafe(CHUNK_SIZE)
const fd = await fsOpen(filePath, 'r')
try {
let filePos = 0
while (filePos < fileSize) {
const { bytesRead } = await fd.read(
chunk,
0,
Math.min(CHUNK_SIZE, fileSize - filePos),
filePos,
)
if (bytesRead === 0) break
filePos += bytesRead
const chunkOff = processStraddle(s, chunk, bytesRead)
let buf: Buffer
if (s.carryLen > 0) {
const bufLen = s.carryLen + (bytesRead - chunkOff)
buf = Buffer.allocUnsafe(bufLen)
s.carryBuf!.copy(buf, 0, 0, s.carryLen)
chunk.copy(buf, s.carryLen, chunkOff, bytesRead)
} else {
buf = chunk.subarray(chunkOff, bytesRead)
}
const r = scanChunkLines(s, buf, boundaryMarker)
captureSnap(s, buf, chunk, r.lastSnapStart, r.lastSnapEnd)
captureCarry(s, buf, r.trailStart)
s.bufFileOff += r.trailStart
}
finalizeOutput(s)
} finally {
await fd.close()
}
return {
boundaryStartOffset: s.boundaryStartOffset,
postBoundaryBuf: s.out.buf.subarray(0, s.out.len),
hasPreservedSegment: s.hasPreservedSegment,
}
}