πŸ“„ File detail

utils/plugins/fetchTelemetry.ts

🧩 .tsπŸ“ 136 linesπŸ’Ύ 4,922 bytesπŸ“ text
← Back to All Files

🎯 Use case

This file lives under β€œutils/”, which covers cross-cutting helpers (shell, tempfiles, settings, messages, process input, …). On the API surface it exposes PluginFetchSource, PluginFetchOutcome, logPluginFetch, and classifyFetchError β€” mainly functions, hooks, or classes. It composes internal code from services and officialMarketplace (relative imports). What the file header says: Telemetry for plugin/marketplace fetches that hit the network. Added for inc-5046 (GitHub complained about claude-plugins-official load). Before this, fetch operations only had logForDebugging β€” no way to measure actual network volume. This surfaces what's hitting GitHub vs GCS v.

Generated from folder role, exports, dependency roots, and inline comments β€” not hand-reviewed for every path.

🧠 Inline summary

Telemetry for plugin/marketplace fetches that hit the network. Added for inc-5046 (GitHub complained about claude-plugins-official load). Before this, fetch operations only had logForDebugging β€” no way to measure actual network volume. This surfaces what's hitting GitHub vs GCS vs user-hosted so we can see the GCS migration take effect and catch future hot-path regressions before GitHub emails us again. Volume: these fire at startup (install-counts 24h-TTL) and on explicit user action (install/update). NOT per-interaction. Similar envelope to tengu_binary_download_*.

πŸ“€ Exports (heuristic)

  • PluginFetchSource
  • PluginFetchOutcome
  • logPluginFetch
  • classifyFetchError

πŸ–₯️ Source preview

/**
 * Telemetry for plugin/marketplace fetches that hit the network.
 *
 * Added for inc-5046 (GitHub complained about claude-plugins-official load).
 * Before this, fetch operations only had logForDebugging β€” no way to measure
 * actual network volume. This surfaces what's hitting GitHub vs GCS vs
 * user-hosted so we can see the GCS migration take effect and catch future
 * hot-path regressions before GitHub emails us again.
 *
 * Volume: these fire at startup (install-counts 24h-TTL)
 * and on explicit user action (install/update). NOT per-interaction. Similar
 * envelope to tengu_binary_download_*.
 */

import {
  logEvent,
  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS as SafeString,
} from '../../services/analytics/index.js'
import { OFFICIAL_MARKETPLACE_NAME } from './officialMarketplace.js'

export type PluginFetchSource =
  | 'install_counts'
  | 'marketplace_clone'
  | 'marketplace_pull'
  | 'marketplace_url'
  | 'plugin_clone'
  | 'mcpb'

export type PluginFetchOutcome = 'success' | 'failure' | 'cache_hit'

// Allowlist of public hosts we report by name. Anything else (enterprise
// git, self-hosted, internal) is bucketed as 'other' β€” we don't want
// internal hostnames (git.mycorp.internal) landing in telemetry. Bounded
// cardinality also keeps the dashboard host-breakdown tractable.
const KNOWN_PUBLIC_HOSTS = new Set([
  'github.com',
  'raw.githubusercontent.com',
  'objects.githubusercontent.com',
  'gist.githubusercontent.com',
  'gitlab.com',
  'bitbucket.org',
  'codeberg.org',
  'dev.azure.com',
  'ssh.dev.azure.com',
  'storage.googleapis.com', // GCS β€” where Dickson's migration points
])

/**
 * Extract hostname from a URL or git spec and bucket to the allowlist.
 * Handles `https://host/...`, `git@host:path`, `ssh://host/...`.
 * Returns a known public host, 'other' (parseable but not allowlisted β€”
 * don't leak private hostnames), or 'unknown' (unparseable / local path).
 */
function extractHost(urlOrSpec: string): string {
  let host: string
  const scpMatch = /^[^@/]+@([^:/]+):/.exec(urlOrSpec)
  if (scpMatch) {
    host = scpMatch[1]!
  } else {
    try {
      host = new URL(urlOrSpec).hostname
    } catch {
      return 'unknown'
    }
  }
  const normalized = host.toLowerCase()
  return KNOWN_PUBLIC_HOSTS.has(normalized) ? normalized : 'other'
}

/**
 * True if the URL/spec points at anthropics/claude-plugins-official β€” the
 * repo GitHub complained about. Lets the dashboard separate "our problem"
 * traffic from user-configured marketplaces.
 */
function isOfficialRepo(urlOrSpec: string): boolean {
  return urlOrSpec.includes(`anthropics/${OFFICIAL_MARKETPLACE_NAME}`)
}

export function logPluginFetch(
  source: PluginFetchSource,
  urlOrSpec: string | undefined,
  outcome: PluginFetchOutcome,
  durationMs: number,
  errorKind?: string,
): void {
  // String values are bounded enums / hostname-only β€” no code, no paths,
  // no raw error messages. Same privacy envelope as tengu_web_fetch_host.
  logEvent('tengu_plugin_remote_fetch', {
    source: source as SafeString,
    host: (urlOrSpec ? extractHost(urlOrSpec) : 'unknown') as SafeString,
    is_official: urlOrSpec ? isOfficialRepo(urlOrSpec) : false,
    outcome: outcome as SafeString,
    duration_ms: Math.round(durationMs),
    ...(errorKind && { error_kind: errorKind as SafeString }),
  })
}

/**
 * Classify an error into a stable bucket for the error_kind field. Keeps
 * cardinality bounded β€” raw error messages would explode dashboard grouping.
 *
 * Handles both axios Error objects (Node.js error codes like ENOTFOUND) and
 * git stderr strings (human phrases like "Could not resolve host"). DNS
 * checked BEFORE timeout because gitClone's error enhancement at
 * marketplaceManager.ts:~950 rewrites DNS failures to include the word
 * "timeout" β€” ordering the other way would misclassify git DNS as timeout.
 */
export function classifyFetchError(error: unknown): string {
  const msg = String((error as { message?: unknown })?.message ?? error)
  if (
    /ENOTFOUND|ECONNREFUSED|EAI_AGAIN|Could not resolve host|Connection refused/i.test(
      msg,
    )
  ) {
    return 'dns_or_refused'
  }
  if (/ETIMEDOUT|timed out|timeout/i.test(msg)) return 'timeout'
  if (
    /ECONNRESET|socket hang up|Connection reset by peer|remote end hung up/i.test(
      msg,
    )
  ) {
    return 'conn_reset'
  }
  if (/403|401|authentication|permission denied/i.test(msg)) return 'auth'
  if (/404|not found|repository not found/i.test(msg)) return 'not_found'
  if (/certificate|SSL|TLS|unable to get local issuer/i.test(msg)) return 'tls'
  // Schema validation throws "Invalid response format" (install_counts) β€”
  // distinguish from true unknowns so the dashboard can
  // see "server sent garbage" separately.
  if (/Invalid response format|Invalid marketplace schema/i.test(msg)) {
    return 'invalid_schema'
  }
  return 'other'
}