π File detail
utils/mcpOutputStorage.ts
π― Use case
This file lives under βutils/β, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, β¦). On the API surface it exposes getFormatDescription, getLargeOutputInstructions, extensionForMimeType, isBinaryContentType, and PersistBinaryResult (and more) β mainly functions, hooks, or classes. Dependencies touch Node filesystem and Node path helpers. It composes internal code from services, errors, format, log, and toolResultStorage (relative imports).
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
import { writeFile } from 'fs/promises' import { join } from 'path' import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent,
π€ Exports (heuristic)
getFormatDescriptiongetLargeOutputInstructionsextensionForMimeTypeisBinaryContentTypePersistBinaryResultpersistBinaryContentgetBinaryBlobSavedMessage
π External import roots
Package roots from from "β¦" (relative paths omitted).
fspath
π₯οΈ Source preview
import { writeFile } from 'fs/promises'
import { join } from 'path'
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import type { MCPResultType } from '../services/mcp/client.js'
import { toError } from './errors.js'
import { formatFileSize } from './format.js'
import { logError } from './log.js'
import { ensureToolResultsDir, getToolResultsDir } from './toolResultStorage.js'
/**
* Generates a format description string based on the MCP result type and schema.
*/
export function getFormatDescription(
type: MCPResultType,
schema?: unknown,
): string {
switch (type) {
case 'toolResult':
return 'Plain text'
case 'structuredContent':
return schema ? `JSON with schema: ${schema}` : 'JSON'
case 'contentArray':
return schema ? `JSON array with schema: ${schema}` : 'JSON array'
}
}
/**
* Generates instruction text for Claude to read from a saved output file.
*
* @param rawOutputPath - Path to the saved output file
* @param contentLength - Length of the content in characters
* @param formatDescription - Description of the content format
* @param maxReadLength - Optional max chars for Read tool (for Bash output context)
* @returns Instruction text to include in the tool result
*/
export function getLargeOutputInstructions(
rawOutputPath: string,
contentLength: number,
formatDescription: string,
maxReadLength?: number,
): string {
const baseInstructions =
`Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Output has been saved to ${rawOutputPath}.\n` +
`Format: ${formatDescription}\n` +
`Use offset and limit parameters to read specific portions of the file, search within it for specific content, and jq to make structured queries.\n` +
`REQUIREMENTS FOR SUMMARIZATION/ANALYSIS/REVIEW:\n` +
`- You MUST read the content from the file at ${rawOutputPath} in sequential chunks until 100% of the content has been read.\n`
const truncationWarning = maxReadLength
? `- If you receive truncation warnings when reading the file ("[N lines truncated]"), reduce the chunk size until you have read 100% of the content without truncation ***DO NOT PROCEED UNTIL YOU HAVE DONE THIS***. Bash output is limited to ${maxReadLength.toLocaleString()} chars.\n`
: `- If you receive truncation warnings when reading the file, reduce the chunk size until you have read 100% of the content without truncation.\n`
const completionRequirement = `- Before producing ANY summary or analysis, you MUST explicitly describe what portion of the content you have read. ***If you did not read the entire content, you MUST explicitly state this.***\n`
return baseInstructions + truncationWarning + completionRequirement
}
/**
* Map a mime type to a file extension. Conservative: known types get their
* proper extension; unknown types get 'bin'. The extension matters because
* the Read tool dispatches on it (PDFs, images, etc. need the right ext).
*/
export function extensionForMimeType(mimeType: string | undefined): string {
if (!mimeType) return 'bin'
// Strip any charset/boundary parameter
const mt = (mimeType.split(';')[0] ?? '').trim().toLowerCase()
switch (mt) {
case 'application/pdf':
return 'pdf'
case 'application/json':
return 'json'
case 'text/csv':
return 'csv'
case 'text/plain':
return 'txt'
case 'text/html':
return 'html'
case 'text/markdown':
return 'md'
case 'application/zip':
return 'zip'
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
return 'docx'
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
return 'xlsx'
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
return 'pptx'
case 'application/msword':
return 'doc'
case 'application/vnd.ms-excel':
return 'xls'
case 'audio/mpeg':
return 'mp3'
case 'audio/wav':
return 'wav'
case 'audio/ogg':
return 'ogg'
case 'video/mp4':
return 'mp4'
case 'video/webm':
return 'webm'
case 'image/png':
return 'png'
case 'image/jpeg':
return 'jpg'
case 'image/gif':
return 'gif'
case 'image/webp':
return 'webp'
case 'image/svg+xml':
return 'svg'
default:
return 'bin'
}
}
/**
* Heuristic for whether a content-type header indicates binary content that
* should be saved to disk rather than put into the model context.
* Text-ish types (text/*, json, xml, form data) are treated as non-binary.
*/
export function isBinaryContentType(contentType: string): boolean {
if (!contentType) return false
const mt = (contentType.split(';')[0] ?? '').trim().toLowerCase()
if (mt.startsWith('text/')) return false
// Structured text formats delivered with an application/ type. Use suffix
// or exact match rather than substring so 'openxmlformats' (docx/xlsx) stays binary.
if (mt.endsWith('+json') || mt === 'application/json') return false
if (mt.endsWith('+xml') || mt === 'application/xml') return false
if (mt.startsWith('application/javascript')) return false
if (mt === 'application/x-www-form-urlencoded') return false
return true
}
export type PersistBinaryResult =
| { filepath: string; size: number; ext: string }
| { error: string }
/**
* Write raw binary bytes to the tool-results directory with a mime-derived
* extension. Unlike persistToolResult (which stringifies), this writes the
* bytes as-is so the resulting file can be opened with native tools (Read
* for PDFs, pandas for xlsx, etc.).
*/
export async function persistBinaryContent(
bytes: Buffer,
mimeType: string | undefined,
persistId: string,
): Promise<PersistBinaryResult> {
await ensureToolResultsDir()
const ext = extensionForMimeType(mimeType)
const filepath = join(getToolResultsDir(), `${persistId}.${ext}`)
try {
await writeFile(filepath, bytes)
} catch (error) {
const err = toError(error)
logError(err)
return { error: err.message }
}
// mime type and extension are safe fixed-vocabulary strings (not paths/code)
logEvent('tengu_binary_content_persisted', {
mimeType: (mimeType ??
'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
sizeBytes: bytes.length,
ext: ext as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
})
return { filepath, size: bytes.length, ext }
}
/**
* Build a short message telling Claude where binary content was saved.
* Just states the path β no prescriptive hint, since what the model can
* actually do with the file depends on provider/tooling.
*/
export function getBinaryBlobSavedMessage(
filepath: string,
mimeType: string | undefined,
size: number,
sourceDescription: string,
): string {
const mt = mimeType || 'unknown type'
return `${sourceDescription}Binary content (${mt}, ${formatFileSize(size)}) saved to ${filepath}`
}