π File detail
services/voiceKeyterms.ts
π― Use case
This file lives under βservices/β, which covers long-lived services (LSP, MCP, OAuth, tool execution, memory, compaction, voice, settings sync, β¦). On the API surface it exposes splitIdentifier and getVoiceKeyterms β mainly functions, hooks, or classes. Dependencies touch Node path helpers. It composes internal code from bootstrap and utils (relative imports). What the file header says: Voice keyterms for improving STT accuracy in the voice_stream endpoint. Provides domain-specific vocabulary hints (Deepgram "keywords") so the STT engine correctly recognises coding terminology, project names, and branch names that would otherwise be misheard.
Generated from folder role, exports, dependency roots, and inline comments β not hand-reviewed for every path.
π§ Inline summary
Voice keyterms for improving STT accuracy in the voice_stream endpoint. Provides domain-specific vocabulary hints (Deepgram "keywords") so the STT engine correctly recognises coding terminology, project names, and branch names that would otherwise be misheard.
π€ Exports (heuristic)
splitIdentifiergetVoiceKeyterms
π External import roots
Package roots from from "β¦" (relative paths omitted).
path
π₯οΈ Source preview
// Voice keyterms for improving STT accuracy in the voice_stream endpoint.
//
// Provides domain-specific vocabulary hints (Deepgram "keywords") so the STT
// engine correctly recognises coding terminology, project names, and branch
// names that would otherwise be misheard.
import { basename } from 'path'
import { getProjectRoot } from '../bootstrap/state.js'
import { getBranch } from '../utils/git.js'
// βββ Global keyterms ββββββββββββββββββββββββββββββββββββββββββββββββ
const GLOBAL_KEYTERMS: readonly string[] = [
// Terms Deepgram consistently mangles without keyword hints.
// Note: "Claude" and "Anthropic" are already server-side base keyterms.
// Avoid terms nobody speaks aloud as-spelled (stdout β "standard out").
'MCP',
'symlink',
'grep',
'regex',
'localhost',
'codebase',
'TypeScript',
'JSON',
'OAuth',
'webhook',
'gRPC',
'dotfiles',
'subagent',
'worktree',
]
// βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
/**
* Split an identifier (camelCase, PascalCase, kebab-case, snake_case, or
* path segments) into individual words. Fragments of 2 chars or fewer are
* discarded to avoid noise.
*/
export function splitIdentifier(name: string): string[] {
return name
.replace(/([a-z])([A-Z])/g, '$1 $2')
.split(/[-_./\s]+/)
.map(w => w.trim())
.filter(w => w.length > 2 && w.length <= 20)
}
function fileNameWords(filePath: string): string[] {
const stem = basename(filePath).replace(/\.[^.]+$/, '')
return splitIdentifier(stem)
}
// βββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββ
const MAX_KEYTERMS = 50
/**
* Build a list of keyterms for the voice_stream STT endpoint.
*
* Combines hardcoded global coding terms with session context (project name,
* git branch, recent files) without any model calls.
*/
export async function getVoiceKeyterms(
recentFiles?: ReadonlySet<string>,
): Promise<string[]> {
const terms = new Set<string>(GLOBAL_KEYTERMS)
// Project root basename as a single term β users say "claude CLI internal"
// as a phrase, not isolated words. Keeping the whole basename lets the
// STT's keyterm boosting match the phrase regardless of separator.
try {
const projectRoot = getProjectRoot()
if (projectRoot) {
const name = basename(projectRoot)
if (name.length > 2 && name.length <= 50) {
terms.add(name)
}
}
} catch {
// getProjectRoot() may throw if not initialised yet β ignore
}
// Git branch words (e.g. "feat/voice-keyterms" β "feat", "voice", "keyterms")
try {
const branch = await getBranch()
if (branch) {
for (const word of splitIdentifier(branch)) {
terms.add(word)
}
}
} catch {
// getBranch() may fail if not in a git repo β ignore
}
// Recent file names β only scan enough to fill remaining slots
if (recentFiles) {
for (const filePath of recentFiles) {
if (terms.size >= MAX_KEYTERMS) break
for (const word of fileNameWords(filePath)) {
terms.add(word)
}
}
}
return [...terms].slice(0, MAX_KEYTERMS)
}