diff --git a/src/features/boundaries.ts b/src/features/boundaries.ts index 05f7738f..0857ffc6 100644 --- a/src/features/boundaries.ts +++ b/src/features/boundaries.ts @@ -235,30 +235,23 @@ interface EvaluateBoundariesOpts { noTests?: boolean; } -export function evaluateBoundaries( - db: BetterSqlite3Database, - boundaryConfig: BoundaryConfig | undefined, - opts: EvaluateBoundariesOpts = {}, -): { violations: BoundaryViolation[]; violationCount: number } { - if (!boundaryConfig) return { violations: [], violationCount: 0 }; - - const { valid, errors } = validateBoundaryConfig(boundaryConfig); - if (!valid) { - throw new BoundaryError(`Invalid boundary configuration: ${errors.join('; ')}`); - } - - const modules = resolveModules(boundaryConfig); - if (modules.size === 0) return { violations: [], violationCount: 0 }; - - let allRules: BoundaryRule[] = []; - if (boundaryConfig.preset) { - allRules = generatePresetRules(modules, boundaryConfig.preset); - } +function collectAllRules( + boundaryConfig: BoundaryConfig, + modules: Map, +): BoundaryRule[] { + const rules: BoundaryRule[] = boundaryConfig.preset + ? generatePresetRules(modules, boundaryConfig.preset) + : []; if (boundaryConfig.rules && Array.isArray(boundaryConfig.rules)) { - allRules = allRules.concat(boundaryConfig.rules); + return rules.concat(boundaryConfig.rules); } - if (allRules.length === 0) return { violations: [], violationCount: 0 }; + return rules; +} +function loadImportEdges( + db: BetterSqlite3Database, + opts: EvaluateBoundariesOpts, +): Array<{ source: string; target: string }> { let edges: Array<{ source: string; target: string }>; try { edges = db @@ -281,38 +274,63 @@ export function evaluateBoundaries( const scope = new Set(opts.scopeFiles); edges = edges.filter((e) => scope.has(e.source)); } + return edges; +} - const violations: BoundaryViolation[] = []; +function ruleViolated(rule: BoundaryRule, toModule: string): boolean { + if (rule.notTo?.includes(toModule)) return true; + if (rule.onlyTo && !rule.onlyTo.includes(toModule)) return true; + return false; +} - for (const edge of edges) { - const fromModule = classifyFile(edge.source, modules); - const toModule = classifyFile(edge.target, modules); +function emitEdgeViolations( + edge: { source: string; target: string }, + fromModule: string, + toModule: string, + allRules: BoundaryRule[], + violations: BoundaryViolation[], +): void { + for (const rule of allRules) { + if (rule.from !== fromModule) continue; + if (!ruleViolated(rule, toModule)) continue; + violations.push({ + rule: 'boundaries', + name: `${fromModule} -> ${toModule}`, + file: edge.source, + targetFile: edge.target, + message: rule.message || `${fromModule} must not depend on ${toModule}`, + value: 1, + threshold: 0, + }); + } +} - if (!fromModule || !toModule) continue; +export function evaluateBoundaries( + db: BetterSqlite3Database, + boundaryConfig: BoundaryConfig | undefined, + opts: EvaluateBoundariesOpts = {}, +): { violations: BoundaryViolation[]; violationCount: number } { + if (!boundaryConfig) return { violations: [], violationCount: 0 }; - for (const rule of allRules) { - if (rule.from !== fromModule) continue; + const { valid, errors } = validateBoundaryConfig(boundaryConfig); + if (!valid) { + throw new BoundaryError(`Invalid boundary configuration: ${errors.join('; ')}`); + } - let isViolation = false; + const modules = resolveModules(boundaryConfig); + if (modules.size === 0) return { violations: [], violationCount: 0 }; - if (rule.notTo?.includes(toModule)) { - isViolation = true; - } else if (rule.onlyTo && !rule.onlyTo.includes(toModule)) { - isViolation = true; - } + const allRules = collectAllRules(boundaryConfig, modules); + if (allRules.length === 0) return { violations: [], violationCount: 0 }; - if (isViolation) { - violations.push({ - rule: 'boundaries', - name: `${fromModule} -> ${toModule}`, - file: edge.source, - targetFile: edge.target, - message: rule.message || `${fromModule} must not depend on ${toModule}`, - value: 1, - threshold: 0, - }); - } - } + const edges = loadImportEdges(db, opts); + const violations: BoundaryViolation[] = []; + + for (const edge of edges) { + const fromModule = classifyFile(edge.source, modules); + const toModule = classifyFile(edge.target, modules); + if (!fromModule || !toModule) continue; + emitEdgeViolations(edge, fromModule, toModule, allRules, violations); } return { violations, violationCount: violations.length }; diff --git a/src/features/cfg.ts b/src/features/cfg.ts index 7736c741..382d32a6 100644 --- a/src/features/cfg.ts +++ b/src/features/cfg.ts @@ -18,7 +18,13 @@ import { } from '../db/index.js'; import { debug, info } from '../infrastructure/logger.js'; import { paginateResult } from '../shared/paginate.js'; -import type { BetterSqlite3Database, Definition, NodeRow, TreeSitterNode } from '../types.js'; +import type { + BetterSqlite3Database, + CfgRulesConfig, + Definition, + NodeRow, + TreeSitterNode, +} from '../types.js'; import { findNodes } from './shared/find-nodes.js'; export { _makeCfgRules as makeCfgRules, CFG_RULES }; @@ -122,9 +128,8 @@ async function initCfgParsers( let getParserFn: unknown = null; if (needsFallback) { - const { createParsers } = await import('../domain/parser.js'); - parsers = await createParsers(); const mod = await import('../domain/parser.js'); + parsers = await mod.createParsers(); getParserFn = mod.getParser; } @@ -187,7 +192,7 @@ interface VisitorCfgResult { function buildVisitorCfgMap( tree: { rootNode: TreeSitterNode } | null | undefined, - cfgRules: unknown, + cfgRules: CfgRulesConfig, symbols: FileSymbols, langId: string, ): Map | null { @@ -203,9 +208,8 @@ function buildVisitorCfgMap( if (!needsVisitor) return null; const visitor = createCfgVisitor(cfgRules); - const typedRules = cfgRules as { functionNodes: string[] }; const walkerOpts = { - functionNodeTypes: new Set(typedRules.functionNodes), + functionNodeTypes: new Set(cfgRules.functionNodes), nestingNodeTypes: new Set(), getFunctionName: (node: TreeSitterNode) => { const nameNode = node.childForFieldName?.('name'); @@ -365,79 +369,91 @@ function persistVisitorFileCfg( return count; } -export async function buildCFGData( +/** + * Build a single native bulk-insert entry for one definition. + * Returns null when the def has no CFG blocks or no associated node row. + */ +function buildNativeCfgEntry( db: BetterSqlite3Database, - fileSymbols: Map, - rootDir: string, - engineOpts?: { - nativeDb?: { bulkInsertCfg?(entries: Array>): number }; - suspendJsDb?: () => void; - resumeJsDb?: () => void; - }, -): Promise { - // Fast path: when all function/method defs already have native CFG data, - // skip WASM parser init, tree parsing, and JS visitor entirely — just persist. - const allNative = allCfgNative(fileSymbols); + def: Definition, + relPath: string, +): Record | null { + if (def.kind !== 'function' && def.kind !== 'method') return null; + if (!def.line) return null; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return null; + + const cfg = def.cfg as { blocks?: CfgBuildBlock[]; edges?: CfgBuildEdge[] } | undefined; + if (!cfg?.blocks?.length) return null; + + return { + nodeId, + blocks: cfg.blocks.map((b) => ({ + index: b.index, + blockType: b.type, + startLine: b.startLine ?? undefined, + endLine: b.endLine ?? undefined, + label: b.label ?? undefined, + })), + edges: (cfg.edges || []).map((e) => ({ + sourceIndex: e.sourceIndex, + targetIndex: e.targetIndex, + kind: e.kind, + })), + }; +} - // ── Native bulk-insert fast path ────────────────────────────────────── - // The Rust bulkInsertCfg handles delete-before-insert atomically on a - // single rusqlite connection, so there is no dual-connection WAL conflict. +/** + * Native bulk-insert fast path. The Rust bulkInsertCfg handles + * delete-before-insert atomically on a single rusqlite connection, so there + * is no dual-connection WAL conflict. Returns true if this path handled the + * request (caller should return early); false to fall through to WASM/JS. + */ +function tryNativeBulkInsertCfg( + db: BetterSqlite3Database, + fileSymbols: Map, + engineOpts: + | { + nativeDb?: { bulkInsertCfg?(entries: Array>): number }; + suspendJsDb?: () => void; + resumeJsDb?: () => void; + } + | undefined, +): boolean { const nativeDb = engineOpts?.nativeDb; - if (allNative && nativeDb?.bulkInsertCfg) { - const entries: Array> = []; - for (const [relPath, symbols] of fileSymbols) { - const ext = path.extname(relPath).toLowerCase(); - if (!CFG_EXTENSIONS.has(ext)) continue; + if (!nativeDb?.bulkInsertCfg) return false; - for (const def of symbols.definitions) { - if (def.kind !== 'function' && def.kind !== 'method') continue; - if (!def.line) continue; - - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - - const cfg = def.cfg as { blocks?: CfgBuildBlock[]; edges?: CfgBuildEdge[] } | undefined; - if (!cfg?.blocks?.length) continue; - - entries.push({ - nodeId, - blocks: cfg.blocks.map((b) => ({ - index: b.index, - blockType: b.type, - startLine: b.startLine ?? undefined, - endLine: b.endLine ?? undefined, - label: b.label ?? undefined, - })), - edges: (cfg.edges || []).map((e) => ({ - sourceIndex: e.sourceIndex, - targetIndex: e.targetIndex, - kind: e.kind, - })), - }); - } - } + const entries: Array> = []; + for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + if (!CFG_EXTENSIONS.has(ext)) continue; - if (entries.length > 0) { - let inserted = 0; - try { - engineOpts?.suspendJsDb?.(); - inserted = nativeDb.bulkInsertCfg(entries); - } finally { - engineOpts?.resumeJsDb?.(); - } - info(`CFG (native bulk): ${inserted} functions analyzed`); + for (const def of symbols.definitions) { + const entry = buildNativeCfgEntry(db, def, relPath); + if (entry) entries.push(entry); } - return; } - const extToLang = buildExtToLangMap(); - let parsers: unknown = null; - let getParserFn: unknown = null; - - if (!allNative) { - ({ parsers, getParserFn } = await initCfgParsers(fileSymbols)); + if (entries.length > 0) { + let inserted = 0; + try { + engineOpts?.suspendJsDb?.(); + inserted = nativeDb.bulkInsertCfg(entries); + } finally { + engineOpts?.resumeJsDb?.(); + } + info(`CFG (native bulk): ${inserted} functions analyzed`); } + return true; +} +interface CfgInsertStatements { + insertBlock: ReturnType; + insertEdge: ReturnType; +} + +function prepareCfgInsertStatements(db: BetterSqlite3Database): CfgInsertStatements { const insertBlock = db.prepare( `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) VALUES (?, ?, ?, ?, ?, ?)`, @@ -446,15 +462,31 @@ export async function buildCFGData( `INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) VALUES (?, ?, ?, ?)`, ); - let analyzed = 0; + return { insertBlock, insertEdge }; +} +/** + * Persist CFG for every CFG-eligible file inside a single transaction. + * Dispatches to native fast path or visitor path per file. + */ +function persistAllFileCfgs( + db: BetterSqlite3Database, + fileSymbols: Map, + rootDir: string, + allNative: boolean, + extToLang: Map, + parsers: unknown, + getParserFn: unknown, + stmts: CfgInsertStatements, +): number { + let analyzed = 0; const tx = db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { const ext = path.extname(relPath).toLowerCase(); if (!CFG_EXTENSIONS.has(ext)) continue; if (allNative && !symbols._tree) { - analyzed += persistNativeFileCfg(db, symbols, relPath, insertBlock, insertEdge); + analyzed += persistNativeFileCfg(db, symbols, relPath, stmts.insertBlock, stmts.insertEdge); continue; } @@ -466,13 +498,52 @@ export async function buildCFGData( extToLang, parsers, getParserFn, - insertBlock, - insertEdge, + stmts.insertBlock, + stmts.insertEdge, ); } }); - tx(); + return analyzed; +} + +export async function buildCFGData( + db: BetterSqlite3Database, + fileSymbols: Map, + rootDir: string, + engineOpts?: { + nativeDb?: { bulkInsertCfg?(entries: Array>): number }; + suspendJsDb?: () => void; + resumeJsDb?: () => void; + }, +): Promise { + // Fast path: when all function/method defs already have native CFG data, + // skip WASM parser init, tree parsing, and JS visitor entirely — just persist. + const allNative = allCfgNative(fileSymbols); + + if (allNative && tryNativeBulkInsertCfg(db, fileSymbols, engineOpts)) { + return; + } + + const extToLang = buildExtToLangMap(); + let parsers: unknown = null; + let getParserFn: unknown = null; + + if (!allNative) { + ({ parsers, getParserFn } = await initCfgParsers(fileSymbols)); + } + + const stmts = prepareCfgInsertStatements(db); + const analyzed = persistAllFileCfgs( + db, + fileSymbols, + rootDir, + allNative, + extToLang, + parsers, + getParserFn, + stmts, + ); if (analyzed > 0) { info(`CFG: ${analyzed} functions analyzed`); diff --git a/src/features/check.ts b/src/features/check.ts index 28902280..99fe6f3a 100644 --- a/src/features/check.ts +++ b/src/features/check.ts @@ -22,6 +22,29 @@ interface ParsedDiff { newFiles: Set; } +const HUNK_RE = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/; +const NEW_FILE_RE = /^\+\+\+ b\/(.+)/; + +function pushHunkRanges( + line: string, + currentFile: string, + changedRanges: Map, + oldRanges: Map, +): void { + const hunkMatch = line.match(HUNK_RE); + if (!hunkMatch) return; + const oldStart = parseInt(hunkMatch[1]!, 10); + const oldCount = parseInt(hunkMatch[2] || '1', 10); + if (oldCount > 0) { + oldRanges.get(currentFile)!.push({ start: oldStart, end: oldStart + oldCount - 1 }); + } + const newStart = parseInt(hunkMatch[3]!, 10); + const newCount = parseInt(hunkMatch[4] || '1', 10); + if (newCount > 0) { + changedRanges.get(currentFile)!.push({ start: newStart, end: newStart + newCount - 1 }); + } +} + export function parseDiffOutput(diffOutput: string): ParsedDiff { const changedRanges = new Map(); const oldRanges = new Map(); @@ -38,7 +61,7 @@ export function parseDiffOutput(diffOutput: string): ParsedDiff { prevIsDevNull = false; continue; } - const fileMatch = line.match(/^\+\+\+ b\/(.+)/); + const fileMatch = line.match(NEW_FILE_RE); if (fileMatch) { currentFile = fileMatch[1]!; if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); @@ -47,19 +70,7 @@ export function parseDiffOutput(diffOutput: string): ParsedDiff { prevIsDevNull = false; continue; } - const hunkMatch = line.match(/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/); - if (hunkMatch && currentFile) { - const oldStart = parseInt(hunkMatch[1]!, 10); - const oldCount = parseInt(hunkMatch[2] || '1', 10); - if (oldCount > 0) { - oldRanges.get(currentFile)!.push({ start: oldStart, end: oldStart + oldCount - 1 }); - } - const newStart = parseInt(hunkMatch[3]!, 10); - const newCount = parseInt(hunkMatch[4] || '1', 10); - if (newCount > 0) { - changedRanges.get(currentFile)!.push({ start: newStart, end: newStart + newCount - 1 }); - } - } + if (currentFile) pushHunkRanges(line, currentFile, changedRanges, oldRanges); } return { changedRanges, oldRanges, newFiles }; } @@ -96,6 +107,26 @@ interface BlastRadiusResult { violations: BlastRadiusViolation[]; } +type DefRow = { + id: number; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; +}; + +function rangesOverlap(defLine: number, endLine: number, ranges: DiffRange[]): boolean { + for (const range of ranges) { + if (range.start <= endLine && range.end >= defLine) return true; + } + return false; +} + +function defEndLine(def: DefRow, nextDef: DefRow | undefined): number { + return def.end_line || (nextDef ? nextDef.line - 1 : 999999); +} + export function checkMaxBlastRadius( db: BetterSqlite3Database, changedRanges: Map, @@ -105,34 +136,18 @@ export function checkMaxBlastRadius( ): BlastRadiusResult { const violations: BlastRadiusViolation[] = []; let maxFound = 0; + const defsStmt = db.prepare( + `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, + ); for (const [file, ranges] of changedRanges) { if (noTests && isTestFile(file)) continue; - const defs = db - .prepare( - `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, - ) - .all(file) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - end_line: number | null; - }>; + const defs = defsStmt.all(file) as DefRow[]; for (let i = 0; i < defs.length; i++) { const def = defs[i]!; - const nextDef = defs[i + 1]; - const endLine = def.end_line || (nextDef ? nextDef.line - 1 : 999999); - let overlaps = false; - for (const range of ranges) { - if (range.start <= endLine && range.end >= def.line) { - overlaps = true; - break; - } - } - if (!overlaps) continue; + const endLine = defEndLine(def, defs[i + 1]); + if (!rangesOverlap(def.line, endLine, ranges)) continue; const { totalDependents: totalCallers } = bfsTransitiveCallers(db, def.id, { noTests, @@ -364,11 +379,13 @@ function runPredicates( return predicates; } -const EMPTY_CHECK: CheckResult = { - predicates: [], - summary: { total: 0, passed: 0, failed: 0, changedFiles: 0, newFiles: 0 }, - passed: true, -}; +function makeEmptyCheck(): CheckResult { + return { + predicates: [], + summary: { total: 0, passed: 0, failed: 0, changedFiles: 0, newFiles: 0 }, + passed: true, + }; +} export function checkData(customDbPath: string | undefined, opts: CheckOpts = {}): CheckResult { const db = openReadonlyOrFail(customDbPath); @@ -394,10 +411,10 @@ export function checkData(customDbPath: string | undefined, opts: CheckOpts = {} return { error: `Failed to run git diff: ${(e as Error).message}` }; } - if (!diffOutput.trim()) return EMPTY_CHECK; + if (!diffOutput.trim()) return makeEmptyCheck(); const diff = parseDiffOutput(diffOutput); - if (diff.changedRanges.size === 0) return EMPTY_CHECK; + if (diff.changedRanges.size === 0) return makeEmptyCheck(); const predicates = runPredicates(db, diff, flags, repoRoot, noTests, maxDepth); diff --git a/src/features/cochange.ts b/src/features/cochange.ts index ffda28d2..2c4b9c37 100644 --- a/src/features/cochange.ts +++ b/src/features/cochange.ts @@ -137,77 +137,50 @@ export function computeCoChanges( return { pairs: results, fileCommitCounts }; } -export function analyzeCoChanges( - customDbPath?: string, - opts: { - since?: string; - minSupport?: number; - maxFilesPerCommit?: number; - full?: boolean; - } = {}, -): - | { pairsFound: number; commitsScanned: number; since: string; minSupport: number } - | { error: string } { - const dbPath = findDbPath(customDbPath); - const db = openDb(dbPath); - initSchema(db); - - const repoRoot = path.resolve(path.dirname(dbPath), '..'); - - if (!fs.existsSync(path.join(repoRoot, '.git'))) { - closeDb(db); - return { error: `Not a git repository: ${repoRoot}` }; - } - - const since = opts.since || '1 year ago'; - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; - - // Check for incremental state - let afterSha: string | null = null; - if (!opts.full) { - try { - const row = db - .prepare<{ value: string }>( - "SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'", - ) - .get(); - if (row) afterSha = row.value; - } catch { - /* table may not exist yet */ - } +/** Read the SHA of the most recently analyzed commit (incremental state). */ +function loadLastAnalyzedSha(db: BetterSqlite3Database): string | null { + try { + const row = db + .prepare<{ value: string }>( + "SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'", + ) + .get(); + return row ? row.value : null; + } catch { + /* table may not exist yet */ + return null; } +} - // If full re-scan, clear existing data - if (opts.full) { - db.exec('DELETE FROM co_changes'); - db.exec('DELETE FROM co_change_meta'); - db.exec('DELETE FROM file_commit_counts'); - } +/** Wipe all co-change tables for a full re-scan. */ +function clearCoChangeTables(db: BetterSqlite3Database): void { + db.exec('DELETE FROM co_changes'); + db.exec('DELETE FROM co_change_meta'); + db.exec('DELETE FROM file_commit_counts'); +} - // Collect known files from the graph for filtering - let knownFiles: Set | null = null; +/** Collect the set of files currently tracked by the graph for filtering. */ +function loadKnownFiles(db: BetterSqlite3Database): Set | null { try { const rows = db.prepare<{ file: string }>('SELECT DISTINCT file FROM nodes').all(); - knownFiles = new Set(rows.map((r) => r.file)); + return new Set(rows.map((r) => r.file)); } catch { /* nodes table may not exist */ + return null; } +} - const { commits } = scanGitHistory(repoRoot, { since, afterSha }); - const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { - minSupport, - maxFilesPerCommit, - knownFiles, - }); - - // Upsert per-file commit counts so Jaccard can be recomputed from totals +/** Upsert per-file commit counts and pair counts (Jaccard recomputed later). */ +function persistCoChangeResults( + db: BetterSqlite3Database, + fileCommitCounts: Map, + coChanges: Map, +): void { const fileCountUpsert = db.prepare(` INSERT INTO file_commit_counts (file, commit_count) VALUES (?, ?) ON CONFLICT(file) DO UPDATE SET commit_count = commit_count + excluded.commit_count `); - // Upsert pair counts (accumulate commit_count, jaccard placeholder — recomputed below) const pairUpsert = db.prepare(` INSERT INTO co_changes (file_a, file_b, commit_count, jaccard, last_commit_epoch) VALUES (?, ?, ?, 0, ?) @@ -226,24 +199,31 @@ export function analyzeCoChanges( } }); insertMany(); +} - // Recompute Jaccard for all affected pairs from total file commit counts - const affectedFiles = [...fileCommitCounts.keys()]; - if (affectedFiles.length > 0) { - const ph = affectedFiles.map(() => '?').join(','); - db.prepare(` - UPDATE co_changes SET jaccard = ( - SELECT CAST(co_changes.commit_count AS REAL) / ( - COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count - ) - FROM file_commit_counts fa, file_commit_counts fb - WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b +/** Recompute Jaccard for every pair touching any file in `affectedFiles`. */ +function recomputeJaccardForAffected(db: BetterSqlite3Database, affectedFiles: string[]): void { + if (affectedFiles.length === 0) return; + const ph = affectedFiles.map(() => '?').join(','); + db.prepare(` + UPDATE co_changes SET jaccard = ( + SELECT CAST(co_changes.commit_count AS REAL) / ( + COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count ) - WHERE file_a IN (${ph}) OR file_b IN (${ph}) - `).run(...affectedFiles, ...affectedFiles); - } + FROM file_commit_counts fa, file_commit_counts fb + WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b + ) + WHERE file_a IN (${ph}) OR file_b IN (${ph}) + `).run(...affectedFiles, ...affectedFiles); +} - // Update metadata +/** Update co_change_meta with the latest analyzer run parameters. */ +function updateCoChangeMeta( + db: BetterSqlite3Database, + commits: CommitEntry[], + since: string, + minSupport: number, +): void { const metaUpsert = db.prepare(` INSERT INTO co_change_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value @@ -254,6 +234,49 @@ export function analyzeCoChanges( metaUpsert.run('analyzed_at', new Date().toISOString()); metaUpsert.run('since', since); metaUpsert.run('min_support', String(minSupport)); +} + +export function analyzeCoChanges( + customDbPath?: string, + opts: { + since?: string; + minSupport?: number; + maxFilesPerCommit?: number; + full?: boolean; + } = {}, +): + | { pairsFound: number; commitsScanned: number; since: string; minSupport: number } + | { error: string } { + const dbPath = findDbPath(customDbPath); + const db = openDb(dbPath); + initSchema(db); + + const repoRoot = path.resolve(path.dirname(dbPath), '..'); + + if (!fs.existsSync(path.join(repoRoot, '.git'))) { + closeDb(db); + return { error: `Not a git repository: ${repoRoot}` }; + } + + const since = opts.since || '1 year ago'; + const minSupport = opts.minSupport ?? 3; + const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; + + const afterSha = opts.full ? null : loadLastAnalyzedSha(db); + if (opts.full) clearCoChangeTables(db); + + const knownFiles = loadKnownFiles(db); + + const { commits } = scanGitHistory(repoRoot, { since, afterSha }); + const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { + minSupport, + maxFilesPerCommit, + knownFiles, + }); + + persistCoChangeResults(db, fileCommitCounts, coChanges); + recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); + updateCoChangeMeta(db, commits, since, minSupport); const totalPairs = db .prepare<{ cnt: number }>('SELECT COUNT(*) as cnt FROM co_changes') diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 509d0347..307ed854 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -31,44 +31,36 @@ const COMPLEXITY_EXTENSIONS = buildExtensionSet(COMPLEXITY_RULES); // ─── Halstead Metrics Computation ───────────────────────────────────────── -export function computeHalsteadMetrics( - functionNode: TreeSitterNode, - language: string, -): HalsteadDerivedMetrics | null { - const rules = HALSTEAD_RULES.get(language) as HalsteadRules | undefined; - if (!rules) return null; - - const operators = new Map(); // type -> count - const operands = new Map(); // text -> count - - function walk(node: TreeSitterNode | null): void { - if (!node) return; - - // Skip type annotation subtrees - if (rules?.skipTypes.has(node.type)) return; +/** Classify a tree-sitter node as a Halstead operator or operand, + * updating the running counts. Pure helper extracted from computeHalsteadMetrics + * to keep the dispatcher thin. */ +function classifyHalsteadToken( + node: TreeSitterNode, + rules: HalsteadRules, + operators: Map, + operands: Map, +): void { + // Compound operators (non-leaf): count the node type as an operator + if (rules.compoundOperators.has(node.type)) { + operators.set(node.type, (operators.get(node.type) || 0) + 1); + } - // Compound operators (non-leaf): count the node type as an operator - if (rules?.compoundOperators.has(node.type)) { + // Leaf nodes: classify as operator or operand + if (node.childCount === 0) { + if (rules.operatorLeafTypes.has(node.type)) { operators.set(node.type, (operators.get(node.type) || 0) + 1); - } - - // Leaf nodes: classify as operator or operand - if (node.childCount === 0) { - if (rules?.operatorLeafTypes.has(node.type)) { - operators.set(node.type, (operators.get(node.type) || 0) + 1); - } else if (rules?.operandLeafTypes.has(node.type)) { - const text = node.text; - operands.set(text, (operands.get(text) || 0) + 1); - } - } - - for (let i = 0; i < node.childCount; i++) { - walk(node.child(i)); + } else if (rules.operandLeafTypes.has(node.type)) { + const text = node.text; + operands.set(text, (operands.get(text) || 0) + 1); } } +} - walk(functionNode); - +/** Build a HalsteadDerivedMetrics summary from the raw operator/operand counts. */ +function summarizeHalsteadCounts( + operators: Map, + operands: Map, +): HalsteadDerivedMetrics { const n1 = operators.size; // distinct operators const n2 = operands.size; // distinct operands let bigN1 = 0; // total operators @@ -79,7 +71,6 @@ export function computeHalsteadMetrics( const vocabulary = n1 + n2; const length = bigN1 + bigN2; - // Guard against zero const volume = vocabulary > 0 ? length * Math.log2(vocabulary) : 0; const difficulty = n2 > 0 ? (n1 / 2) * (bigN2 / n2) : 0; const effort = difficulty * volume; @@ -99,6 +90,31 @@ export function computeHalsteadMetrics( }; } +export function computeHalsteadMetrics( + functionNode: TreeSitterNode, + language: string, +): HalsteadDerivedMetrics | null { + const rules = HALSTEAD_RULES.get(language) as HalsteadRules | undefined; + if (!rules) return null; + + const operators = new Map(); // type -> count + const operands = new Map(); // text -> count + + function walk(node: TreeSitterNode | null): void { + if (!node) return; + // Skip type annotation subtrees + if (rules?.skipTypes.has(node.type)) return; + classifyHalsteadToken(node, rules as HalsteadRules, operators, operands); + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i)); + } + } + + walk(functionNode); + + return summarizeHalsteadCounts(operators, operands); +} + // ─── LOC Metrics Computation ────────────────────────────────────────────── // Delegated to ast-analysis/metrics.js; re-exported for backward compatibility. export const computeLOCMetrics = _computeLOCMetrics; @@ -535,6 +551,89 @@ function upsertAstComplexity( return 1; } +/** Decision outcome for a single definition during native bulk-row collection. + * - 'skip': the definition is legitimately ignorable (non-function, missing line, + * interface stub, unsupported language). + * - 'fallback': a genuine function body is missing precomputed complexity — + * the whole native fast path must abort to JS. + * - 'emit': the definition has complexity data and a row was (or will be) appended. */ +type NativeRowDecision = 'skip' | 'fallback' | 'emit'; + +/** Classify a definition relative to the native bulk path. Returns + * 'skip' to ignore it, 'fallback' to bail out, or 'emit' if the row should be added. */ +function classifyDefinitionForNativeBulk( + def: FileSymbols['definitions'][0], + langSupported: boolean, +): NativeRowDecision { + if (def.kind !== 'function' && def.kind !== 'method') return 'skip'; + if (!def.line) return 'skip'; + if (!def.complexity) { + // Interface/type property signatures and single-line stubs are extracted + // as methods but the native engine correctly never assigns complexity. + // Mirror the leniency in initWasmParsersIfNeeded to avoid bailing out + // of the native bulk-insert path for every TypeScript codebase (#846). + if (def.name.includes('.') || !def.endLine || def.endLine <= def.line) return 'skip'; + // Languages without complexity rules will never have data — skip them + // rather than bailing out of the entire native bulk path. + if (!langSupported) return 'skip'; + return 'fallback'; // genuine function body missing complexity — needs JS fallback + } + return 'emit'; +} + +/** Build a single native-bulk row from a definition with complexity data. */ +function buildNativeBulkRow( + nodeId: number, + def: FileSymbols['definitions'][0], +): Record { + const ch = def.complexity?.halstead; + const cl = def.complexity?.loc; + return { + nodeId, + cognitive: def.complexity?.cognitive ?? 0, + cyclomatic: def.complexity?.cyclomatic ?? 0, + maxNesting: def.complexity?.maxNesting ?? 0, + loc: cl ? cl.loc : 0, + sloc: cl ? cl.sloc : 0, + commentLines: cl ? cl.commentLines : 0, + halsteadN1: ch ? ch.n1 : 0, + halsteadN2: ch ? ch.n2 : 0, + halsteadBigN1: ch ? ch.bigN1 : 0, + halsteadBigN2: ch ? ch.bigN2 : 0, + halsteadVocabulary: ch ? ch.vocabulary : 0, + halsteadLength: ch ? ch.length : 0, + halsteadVolume: ch ? ch.volume : 0, + halsteadDifficulty: ch ? ch.difficulty : 0, + halsteadEffort: ch ? ch.effort : 0, + halsteadBugs: ch ? ch.bugs : 0, + maintainabilityIndex: def.complexity?.maintainabilityIndex ?? 0, + }; +} + +/** Try to collect a single file's definitions into native-bulk rows. + * Returns 'fallback' if any definition forces a JS fallback. */ +function collectFileBulkRows( + db: BetterSqlite3Database, + relPath: string, + symbols: FileSymbols, + rows: Array>, +): NativeRowDecision { + const ext = path.extname(relPath).toLowerCase(); + const langId = symbols._langId || ''; + const langSupported = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId); + + for (const def of symbols.definitions) { + const decision = classifyDefinitionForNativeBulk(def, langSupported); + if (decision === 'skip') continue; + if (decision === 'fallback') return 'fallback'; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) continue; + rows.push(buildNativeBulkRow(nodeId, def)); + } + return 'emit'; +} + /** Collect native bulk-insert rows from precomputed complexity data. * Returns the rows array, or null if any definition is missing complexity * (signalling that JS fallback is needed). */ @@ -543,53 +642,9 @@ function collectNativeBulkRows( fileSymbols: Map, ): Array> | null { const rows: Array> = []; - for (const [relPath, symbols] of fileSymbols) { - const ext = path.extname(relPath).toLowerCase(); - const langId = symbols._langId || ''; - const langSupported = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId); - - for (const def of symbols.definitions) { - if (def.kind !== 'function' && def.kind !== 'method') continue; - if (!def.line) continue; - // Interface/type property signatures and single-line stubs are extracted - // as methods but the native engine correctly never assigns complexity. - // Mirror the leniency in initWasmParsersIfNeeded to avoid bailing out - // of the native bulk-insert path for every TypeScript codebase (#846). - if (!def.complexity) { - if (def.name.includes('.') || !def.endLine || def.endLine <= def.line) continue; - // Languages without complexity rules will never have data — skip them - // rather than bailing out of the entire native bulk path. - if (!langSupported) continue; - return null; // genuine function body missing complexity — needs JS fallback - } - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - const ch = def.complexity.halstead; - const cl = def.complexity.loc; - rows.push({ - nodeId, - cognitive: def.complexity.cognitive ?? 0, - cyclomatic: def.complexity.cyclomatic ?? 0, - maxNesting: def.complexity.maxNesting ?? 0, - loc: cl ? cl.loc : 0, - sloc: cl ? cl.sloc : 0, - commentLines: cl ? cl.commentLines : 0, - halsteadN1: ch ? ch.n1 : 0, - halsteadN2: ch ? ch.n2 : 0, - halsteadBigN1: ch ? ch.bigN1 : 0, - halsteadBigN2: ch ? ch.bigN2 : 0, - halsteadVocabulary: ch ? ch.vocabulary : 0, - halsteadLength: ch ? ch.length : 0, - halsteadVolume: ch ? ch.volume : 0, - halsteadDifficulty: ch ? ch.difficulty : 0, - halsteadEffort: ch ? ch.effort : 0, - halsteadBugs: ch ? ch.bugs : 0, - maintainabilityIndex: def.complexity.maintainabilityIndex ?? 0, - }); - } + if (collectFileBulkRows(db, relPath, symbols, rows) === 'fallback') return null; } - return rows; } diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index d85bcb66..804e7aa1 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -675,6 +675,51 @@ interface BfsParentEntry { expression: string; } +type DataflowNeighbor = { + id: number; + file: string; + edge_kind: string; + expression: string; +}; + +interface DataflowBfsState { + visited: Set; + parent: Map; + nextQueue: number[]; + found: boolean; +} + +/** + * Process a single neighbor in the dataflow BFS. Returns true once the target + * has been reached so the caller can stop expanding. + */ +function processDataflowNeighbor( + n: DataflowNeighbor, + currentId: number, + targetId: number, + noTests: boolean, + state: DataflowBfsState, +): boolean { + if (noTests && isTestFile(n.file)) return false; + const entry: BfsParentEntry = { + parentId: currentId, + edgeKind: n.edge_kind, + expression: n.expression, + }; + if (n.id === targetId) { + if (!state.found) { + state.found = true; + state.parent.set(n.id, entry); + } + return true; + } + if (state.visited.has(n.id)) return false; + state.visited.add(n.id); + state.parent.set(n.id, entry); + state.nextQueue.push(n.id); + return false; +} + /** BFS through dataflow edges to find a path from source to target. */ function bfsDataflowPath( db: BetterSqlite3Database, @@ -689,50 +734,28 @@ function bfsDataflowPath( WHERE d.source_id = ? AND d.kind IN ('flows_to', 'returns')`, ); - const visited = new Set([sourceId]); - const parent = new Map(); + const state: DataflowBfsState = { + visited: new Set([sourceId]), + parent: new Map(), + nextQueue: [], + found: false, + }; let queue = [sourceId]; - let found = false; for (let depth = 1; depth <= maxDepth; depth++) { - const nextQueue: number[] = []; + state.nextQueue = []; for (const currentId of queue) { - const neighbors = neighborStmt.all(currentId) as Array<{ - id: number; - file: string; - edge_kind: string; - expression: string; - }>; + const neighbors = neighborStmt.all(currentId) as DataflowNeighbor[]; for (const n of neighbors) { - if (noTests && isTestFile(n.file)) continue; - if (n.id === targetId) { - if (!found) { - found = true; - parent.set(n.id, { - parentId: currentId, - edgeKind: n.edge_kind, - expression: n.expression, - }); - } - continue; - } - if (!visited.has(n.id)) { - visited.add(n.id); - parent.set(n.id, { - parentId: currentId, - edgeKind: n.edge_kind, - expression: n.expression, - }); - nextQueue.push(n.id); - } + processDataflowNeighbor(n, currentId, targetId, noTests, state); } } - if (found) break; - queue = nextQueue; + if (state.found) break; + queue = state.nextQueue; if (queue.length === 0) break; } - return found ? parent : null; + return state.found ? state.parent : null; } /** Reconstruct a path from BFS parent map. */ diff --git a/src/features/flow.ts b/src/features/flow.ts index 18c52215..e2a4f1f2 100644 --- a/src/features/flow.ts +++ b/src/features/flow.ts @@ -133,6 +133,41 @@ interface BfsState { truncated: boolean; } +interface FlowBfsFrame { + visited: Set; + cycles: Array<{ from: string; to: string; depth: number }>; + nodeDepths: Map; + idToNode: Map; + nextFrontier: number[]; + levelNodes: NodeInfo[]; +} + +/** Process one callee row, recording cycle hits or expanding frontier. */ +function processFlowCallee( + c: CalleeRow, + fid: number, + depth: number, + noTests: boolean, + frame: FlowBfsFrame, +): void { + if (noTests && isTestFile(c.file)) return; + + if (frame.visited.has(c.id)) { + const fromNode = frame.idToNode.get(fid); + if (fromNode) { + frame.cycles.push({ from: fromNode.name, to: c.name, depth }); + } + return; + } + + frame.visited.add(c.id); + frame.nextFrontier.push(c.id); + const nodeInfo: NodeInfo = toSymbolRef(c); + frame.levelNodes.push(nodeInfo); + frame.nodeDepths.set(c.id, depth); + frame.idToNode.set(c.id, nodeInfo); +} + /** Forward BFS through callees, collecting steps, cycles, and node depth info. */ function bfsCallees( db: ReturnType, @@ -157,37 +192,26 @@ function bfsCallees( ); for (let d = 1; d <= maxDepth; d++) { - const nextFrontier: number[] = []; - const levelNodes: NodeInfo[] = []; + const frame: FlowBfsFrame = { + visited, + cycles, + nodeDepths, + idToNode, + nextFrontier: [], + levelNodes: [], + }; for (const fid of frontier) { - const callees = calleesStmt.all(fid); - - for (const c of callees) { - if (noTests && isTestFile(c.file)) continue; - - if (visited.has(c.id)) { - const fromNode = idToNode.get(fid); - if (fromNode) { - cycles.push({ from: fromNode.name, to: c.name, depth: d }); - } - continue; - } - - visited.add(c.id); - nextFrontier.push(c.id); - const nodeInfo: NodeInfo = toSymbolRef(c); - levelNodes.push(nodeInfo); - nodeDepths.set(c.id, d); - idToNode.set(c.id, nodeInfo); + for (const c of calleesStmt.all(fid)) { + processFlowCallee(c, fid, d, noTests, frame); } } - if (levelNodes.length > 0) { - steps.push({ depth: d, nodes: levelNodes }); + if (frame.levelNodes.length > 0) { + steps.push({ depth: d, nodes: frame.levelNodes }); } - frontier = nextFrontier; + frontier = frame.nextFrontier; if (frontier.length === 0) break; if (d === maxDepth && frontier.length > 0) truncated = true; } diff --git a/src/features/graph-enrichment.ts b/src/features/graph-enrichment.ts index 564cc500..886d09bf 100644 --- a/src/features/graph-enrichment.ts +++ b/src/features/graph-enrichment.ts @@ -336,13 +336,13 @@ interface FileLevelEdge { target: string; } -function prepareFileLevelData( +/** Load file-level import/call edges from the DB and optionally exclude test files. */ +function loadFileLevelEdges( db: BetterSqlite3Database, noTests: boolean, minConf: number, - cfg: PlotConfig, -): GraphData { - let edges = db +): FileLevelEdge[] { + const edges = db .prepare( ` SELECT DISTINCT n1.file AS source, n2.file AS target @@ -354,73 +354,118 @@ function prepareFileLevelData( `, ) .all(minConf); - if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)); - - const files = new Set(); - for (const { source, target } of edges) { - files.add(source); - files.add(target); - } - - const fileIds = new Map(); - let idx = 0; - for (const f of files) fileIds.set(f, idx++); + return noTests ? edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)) : edges; +} - // Fan-in/fan-out +/** Compute fan-in and fan-out for each file from a list of edges. */ +function computeFileFanCounts(edges: FileLevelEdge[]): { + fanInCount: Map; + fanOutCount: Map; +} { const fanInCount = new Map(); const fanOutCount = new Map(); for (const { source, target } of edges) { fanOutCount.set(source, (fanOutCount.get(source) || 0) + 1); fanInCount.set(target, (fanInCount.get(target) || 0) + 1); } + return { fanInCount, fanOutCount }; +} - // Communities via graph subsystem +/** Run Louvain community detection on the file-level graph. Returns empty map on failure. */ +function detectFileCommunities(files: Set, edges: FileLevelEdge[]): Map { const communityMap = new Map(); - if (files.size > 0) { - try { - const fileGraph = new CodeGraph(); - for (const f of files) fileGraph.addNode(f); - for (const { source, target } of edges) { - if (source !== target && !fileGraph.hasEdge(source, target)) - fileGraph.addEdge(source, target); - } - const { assignments } = louvainCommunities(fileGraph); - for (const [file, cid] of assignments) communityMap.set(file, cid); - } catch { - // ignore + if (files.size === 0) return communityMap; + try { + const fileGraph = new CodeGraph(); + for (const f of files) fileGraph.addNode(f); + for (const { source, target } of edges) { + if (source !== target && !fileGraph.hasEdge(source, target)) + fileGraph.addEdge(source, target); } + const { assignments } = louvainCommunities(fileGraph); + for (const [file, cid] of assignments) communityMap.set(file, cid); + } catch { + // louvain can fail on disconnected graphs } + return communityMap; +} - const visNodes: VisNode[] = [...files].map((f) => { - const id = fileIds.get(f)!; - const community = communityMap.get(f) ?? null; - const fanIn = fanInCount.get(f) || 0; - const fanOut = fanOutCount.get(f) || 0; - const directory = path.dirname(f); - const color: string = - cfg.colorBy === 'community' && community !== null - ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] || '#ccc' - : cfg.nodeColors?.file || (DEFAULT_NODE_COLORS as Record).file || '#ccc'; - - return { - id, - label: path.basename(f), - title: f, - color, - kind: 'file', - role: '', - file: f, - line: 0, - community, - cognitive: null, - cyclomatic: null, - maintainabilityIndex: null, - fanIn, - fanOut, - directory, - risk: [], - }; - }); +/** Build a VisNode for a single file, applying color based on cfg.colorBy. */ +function buildFileVisNode( + file: string, + id: number, + community: number | null, + fanIn: number, + fanOut: number, + cfg: PlotConfig, +): VisNode { + const color: string = + cfg.colorBy === 'community' && community !== null + ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] || '#ccc' + : cfg.nodeColors?.file || (DEFAULT_NODE_COLORS as Record).file || '#ccc'; + + return { + id, + label: path.basename(file), + title: file, + color, + kind: 'file', + role: '', + file, + line: 0, + community, + cognitive: null, + cyclomatic: null, + maintainabilityIndex: null, + fanIn, + fanOut, + directory: path.dirname(file), + risk: [], + }; +} + +/** Select seed node IDs for the file-level graph based on configured strategy. */ +function selectFileSeedNodes(visNodes: VisNode[], cfg: PlotConfig): (number | string)[] { + if (cfg.seedStrategy === 'top-fanin') { + const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn); + return sorted.slice(0, cfg.seedCount || 30).map((n) => n.id); + } + // Both 'entry' and the default fallback include every node — file-level graphs + // don't track per-file roles, so 'entry' has no meaningful filter. + return visNodes.map((n) => n.id); +} + +function prepareFileLevelData( + db: BetterSqlite3Database, + noTests: boolean, + minConf: number, + cfg: PlotConfig, +): GraphData { + const edges = loadFileLevelEdges(db, noTests, minConf); + + const files = new Set(); + for (const { source, target } of edges) { + files.add(source); + files.add(target); + } + + const fileIds = new Map(); + let idx = 0; + for (const f of files) fileIds.set(f, idx++); + + const { fanInCount, fanOutCount } = computeFileFanCounts(edges); + const communityMap = detectFileCommunities(files, edges); + + const visNodes: VisNode[] = [...files].map((f) => + buildFileVisNode( + f, + fileIds.get(f)!, + communityMap.get(f) ?? null, + fanInCount.get(f) || 0, + fanOutCount.get(f) || 0, + cfg, + ), + ); const visEdges: VisEdge[] = edges.map(({ source, target }, i) => ({ id: `e${i}`, @@ -428,17 +473,7 @@ function prepareFileLevelData( to: fileIds.get(target)!, })); - let seedNodeIds: (number | string)[]; - if (cfg.seedStrategy === 'top-fanin') { - const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn); - seedNodeIds = sorted.slice(0, cfg.seedCount || 30).map((n) => n.id); - } else if (cfg.seedStrategy === 'entry') { - seedNodeIds = visNodes.map((n) => n.id); - } else { - seedNodeIds = visNodes.map((n) => n.id); - } - - return { nodes: visNodes, edges: visEdges, seedNodeIds }; + return { nodes: visNodes, edges: visEdges, seedNodeIds: selectFileSeedNodes(visNodes, cfg) }; } // ─── HTML Generation (thin wrapper) ────────────────────────────────── diff --git a/src/features/owners.ts b/src/features/owners.ts index 5c278ce6..93d1259f 100644 --- a/src/features/owners.ts +++ b/src/features/owners.ts @@ -139,18 +139,25 @@ interface OwnersDataOpts { boundary?: boolean; } -export function ownersData( - customDbPath?: string, - opts: OwnersDataOpts = {}, -): { +interface OwnedSymbol { + name: string; + kind: string; + file: string; + line: number; + owners: string[]; +} + +interface OwnerBoundary { + from: OwnedSymbol; + to: OwnedSymbol; + edgeKind: string; +} + +interface OwnersDataResult { codeownersFile: string | null; files: { file: string; owners: string[] }[]; - symbols: { name: string; kind: string; file: string; line: number; owners: string[] }[]; - boundaries: { - from: { name: string; kind: string; file: string; line: number; owners: string[] }; - to: { name: string; kind: string; file: string; line: number; owners: string[] }; - edgeKind: string; - }[]; + symbols: OwnedSymbol[]; + boundaries: OwnerBoundary[]; summary: { totalFiles: number; ownedFiles: number; @@ -159,160 +166,193 @@ export function ownersData( ownerCount: number; byOwner: { owner: string; fileCount: number }[]; }; +} + +interface BetterSqlite3DatabaseLike { + prepare(sql: string): { all(...params: unknown[]): unknown[] }; + close(): void; +} + +function emptyOwnersResult(codeownersFile: string | null): OwnersDataResult { + return { + codeownersFile, + files: [], + symbols: [], + boundaries: [], + summary: { + totalFiles: 0, + ownedFiles: 0, + unownedFiles: 0, + coveragePercent: 0, + ownerCount: 0, + byOwner: [], + }, + }; +} + +/** Load all distinct files from the DB and apply test/file filters. */ +function loadFilteredFiles(db: BetterSqlite3DatabaseLike, opts: OwnersDataOpts): string[] { + let allFiles = (db.prepare('SELECT DISTINCT file FROM nodes').all() as { file: string }[]).map( + (r) => r.file, + ); + if (opts.noTests) allFiles = allFiles.filter((f) => !isTestFile(f)); + const fileFilters = normalizeFileFilter(opts.file); + if (fileFilters.length > 0) { + allFiles = allFiles.filter((f) => fileFilters.some((filter) => f.includes(filter))); + } + return allFiles; +} + +/** Build owner index (owner -> list of files) and count owned files. */ +function buildOwnerIndex(fileOwners: { file: string; owners: string[] }[]): { + ownerIndex: Map; + ownedCount: number; } { + const ownerIndex = new Map(); + let ownedCount = 0; + for (const fo of fileOwners) { + if (fo.owners.length > 0) ownedCount++; + for (const o of fo.owners) { + if (!ownerIndex.has(o)) ownerIndex.set(o, []); + ownerIndex.get(o)!.push(fo.file); + } + } + return { ownerIndex, ownedCount }; +} + +/** Load symbols restricted to the given file set, applying noTests and kind filters. */ +function loadSymbolsForFiles( + db: BetterSqlite3DatabaseLike, + fileSet: Set, + opts: OwnersDataOpts, + rules: CodeownersRule[], +): OwnedSymbol[] { + let symbols = ( + db.prepare('SELECT name, kind, file, line FROM nodes').all() as { + name: string; + kind: string; + file: string; + line: number; + }[] + ).filter((n) => fileSet.has(n.file)); + + if (opts.noTests) symbols = symbols.filter((s) => !isTestFile(s.file)); + if (opts.kind) symbols = symbols.filter((s) => s.kind === opts.kind); + + return symbols.map((s) => ({ ...s, owners: matchOwners(s.file, rules) })); +} + +interface CallEdgeRow { + id: number; + edgeKind: string; + srcName: string; + srcKind: string; + srcFile: string; + srcLine: number; + tgtName: string; + tgtKind: string; + tgtFile: string; + tgtLine: number; +} + +/** Compute cross-owner call boundaries. Returns empty array when boundary mode is off. */ +function computeOwnerBoundaries( + db: BetterSqlite3DatabaseLike, + rules: CodeownersRule[], + noTests: boolean, +): OwnerBoundary[] { + const edges = db + .prepare( + `SELECT e.id, e.kind AS edgeKind, + s.name AS srcName, s.kind AS srcKind, s.file AS srcFile, s.line AS srcLine, + t.name AS tgtName, t.kind AS tgtKind, t.file AS tgtFile, t.line AS tgtLine + FROM edges e + JOIN nodes s ON e.source_id = s.id + JOIN nodes t ON e.target_id = t.id + WHERE e.kind = 'calls'`, + ) + .all() as CallEdgeRow[]; + + const boundaries: OwnerBoundary[] = []; + for (const e of edges) { + if (noTests && (isTestFile(e.srcFile) || isTestFile(e.tgtFile))) continue; + const srcOwners = matchOwners(e.srcFile, rules); + const tgtOwners = matchOwners(e.tgtFile, rules); + // Cross-boundary: different owner sets (sort for deterministic comparison + output) + const sortedSrc = [...srcOwners].sort(); + const sortedTgt = [...tgtOwners].sort(); + const srcKey = sortedSrc.join(','); + const tgtKey = sortedTgt.join(','); + if (srcKey === tgtKey) continue; + boundaries.push({ + from: { + name: e.srcName, + kind: e.srcKind, + file: e.srcFile, + line: e.srcLine, + owners: sortedSrc, + }, + to: { name: e.tgtName, kind: e.tgtKind, file: e.tgtFile, line: e.tgtLine, owners: sortedTgt }, + edgeKind: e.edgeKind, + }); + } + return boundaries; +} + +/** Build summary stats (totals, coverage, by-owner counts). */ +function buildOwnersSummary( + totalFiles: number, + ownedCount: number, + ownerIndex: Map, +): OwnersDataResult['summary'] { + const byOwner = [...ownerIndex.entries()] + .map(([owner, files]) => ({ owner, fileCount: files.length })) + .sort((a, b) => b.fileCount - a.fileCount); + + return { + totalFiles, + ownedFiles: ownedCount, + unownedFiles: totalFiles - ownedCount, + coveragePercent: totalFiles > 0 ? Math.round((ownedCount / totalFiles) * 100) : 0, + ownerCount: ownerIndex.size, + byOwner, + }; +} + +export function ownersData(customDbPath?: string, opts: OwnersDataOpts = {}): OwnersDataResult { const db = openReadonlyOrFail(customDbPath); try { const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); const parsed = parseCodeowners(repoRoot); - if (!parsed) { - return { - codeownersFile: null, - files: [], - symbols: [], - boundaries: [], - summary: { - totalFiles: 0, - ownedFiles: 0, - unownedFiles: 0, - coveragePercent: 0, - ownerCount: 0, - byOwner: [], - }, - }; - } - - // Get all distinct files from nodes - let allFiles = (db.prepare('SELECT DISTINCT file FROM nodes').all() as { file: string }[]).map( - (r) => r.file, - ); + if (!parsed) return emptyOwnersResult(null); - if (opts.noTests) allFiles = allFiles.filter((f) => !isTestFile(f)); - const fileFilters = normalizeFileFilter(opts.file); - if (fileFilters.length > 0) { - allFiles = allFiles.filter((f) => fileFilters.some((filter) => f.includes(filter))); - } - - // Map files to owners - const fileOwners = allFiles.map((file) => ({ - file, - owners: matchOwners(file, parsed.rules), - })); - - // Build owner-to-files index - const ownerIndex = new Map(); - let ownedCount = 0; - for (const fo of fileOwners) { - if (fo.owners.length > 0) ownedCount++; - for (const o of fo.owners) { - if (!ownerIndex.has(o)) ownerIndex.set(o, []); - ownerIndex.get(o)!.push(fo.file); - } - } + // Stage 1: load files and bucket them by owner + const allFiles = loadFilteredFiles(db, opts); + const fileOwners = allFiles.map((file) => ({ file, owners: matchOwners(file, parsed.rules) })); + const { ownerIndex, ownedCount } = buildOwnerIndex(fileOwners); - // Filter files if --owner specified - let filteredFiles = fileOwners; - if (opts.owner) { - filteredFiles = fileOwners.filter((fo) => fo.owners.includes(opts.owner!)); - } + // Stage 2: apply optional --owner filter + const filteredFiles = opts.owner + ? fileOwners.filter((fo) => fo.owners.includes(opts.owner!)) + : fileOwners; - // Get symbols for filtered files + // Stage 3: load symbols for filtered files const fileSet = new Set(filteredFiles.map((fo) => fo.file)); - let symbols = ( - db.prepare('SELECT name, kind, file, line FROM nodes').all() as { - name: string; - kind: string; - file: string; - line: number; - }[] - ).filter((n) => fileSet.has(n.file)); - - if (opts.noTests) symbols = symbols.filter((s) => !isTestFile(s.file)); - if (opts.kind) symbols = symbols.filter((s) => s.kind === opts.kind); - - const symbolsWithOwners = symbols.map((s) => ({ - ...s, - owners: matchOwners(s.file, parsed.rules), - })); - - // Boundary analysis — cross-owner call edges - const boundaries: { - from: { name: string; kind: string; file: string; line: number; owners: string[] }; - to: { name: string; kind: string; file: string; line: number; owners: string[] }; - edgeKind: string; - }[] = []; - if (opts.boundary) { - const edges = db - .prepare( - `SELECT e.id, e.kind AS edgeKind, - s.name AS srcName, s.kind AS srcKind, s.file AS srcFile, s.line AS srcLine, - t.name AS tgtName, t.kind AS tgtKind, t.file AS tgtFile, t.line AS tgtLine - FROM edges e - JOIN nodes s ON e.source_id = s.id - JOIN nodes t ON e.target_id = t.id - WHERE e.kind = 'calls'`, - ) - .all() as { - id: number; - edgeKind: string; - srcName: string; - srcKind: string; - srcFile: string; - srcLine: number; - tgtName: string; - tgtKind: string; - tgtFile: string; - tgtLine: number; - }[]; - - for (const e of edges) { - if (opts.noTests && (isTestFile(e.srcFile) || isTestFile(e.tgtFile))) continue; - const srcOwners = matchOwners(e.srcFile, parsed.rules); - const tgtOwners = matchOwners(e.tgtFile, parsed.rules); - // Cross-boundary: different owner sets - const srcKey = srcOwners.sort().join(','); - const tgtKey = tgtOwners.sort().join(','); - if (srcKey !== tgtKey) { - boundaries.push({ - from: { - name: e.srcName, - kind: e.srcKind, - file: e.srcFile, - line: e.srcLine, - owners: srcOwners, - }, - to: { - name: e.tgtName, - kind: e.tgtKind, - file: e.tgtFile, - line: e.tgtLine, - owners: tgtOwners, - }, - edgeKind: e.edgeKind, - }); - } - } - } + const symbolsWithOwners = loadSymbolsForFiles(db, fileSet, opts, parsed.rules); - // Summary - const byOwner = [...ownerIndex.entries()] - .map(([owner, files]) => ({ owner, fileCount: files.length })) - .sort((a, b) => b.fileCount - a.fileCount); + // Stage 4: optional boundary analysis (cross-owner call edges) + const boundaries = opts.boundary + ? computeOwnerBoundaries(db, parsed.rules, opts.noTests ?? false) + : []; return { codeownersFile: parsed.path, files: filteredFiles, symbols: symbolsWithOwners, boundaries, - summary: { - totalFiles: allFiles.length, - ownedFiles: ownedCount, - unownedFiles: allFiles.length - ownedCount, - coveragePercent: allFiles.length > 0 ? Math.round((ownedCount / allFiles.length) * 100) : 0, - ownerCount: ownerIndex.size, - byOwner, - }, + summary: buildOwnersSummary(allFiles.length, ownedCount, ownerIndex), }; } finally { db.close(); diff --git a/src/features/sequence.ts b/src/features/sequence.ts index aa891d78..db2db7fb 100644 --- a/src/features/sequence.ts +++ b/src/features/sequence.ts @@ -91,6 +91,40 @@ interface BfsResult { truncated: boolean; } +type CalleeNode = { id: number; name: string; file: string; kind: string; line: number }; + +interface BfsFrame { + visited: Set; + messages: SequenceMessage[]; + fileSet: Set; + idToNode: Map; + nextFrontier: number[]; +} + +function processCallee( + c: CalleeNode, + caller: CalleeNode, + depth: number, + noTests: boolean, + frame: BfsFrame, +): void { + if (noTests && isTestFile(c.file)) return; + + frame.fileSet.add(c.file); + frame.messages.push({ + from: caller.file, + to: c.file, + label: c.name, + type: 'call', + depth, + }); + + if (frame.visited.has(c.id)) return; + frame.visited.add(c.id); + frame.nextFrontier.push(c.id); + frame.idToNode.set(c.id, c); +} + function bfsCallees( repo: Repository, matchNode: MatchNode, @@ -101,46 +135,25 @@ function bfsCallees( let frontier = [matchNode.id]; const messages: SequenceMessage[] = []; const fileSet = new Set([matchNode.file]); - const idToNode = new Map< - number, - { id: number; name: string; file: string; kind: string; line: number } - >(); + const idToNode = new Map(); idToNode.set(matchNode.id, matchNode); let truncated = false; for (let d = 1; d <= maxDepth; d++) { - const nextFrontier: number[] = []; + const frame: BfsFrame = { visited, messages, fileSet, idToNode, nextFrontier: [] }; for (const fid of frontier) { - const callees = repo.findCallees(fid); const caller = idToNode.get(fid)!; - - for (const c of callees) { - if (noTests && isTestFile(c.file)) continue; - - fileSet.add(c.file); - messages.push({ - from: caller.file, - to: c.file, - label: c.name, - type: 'call', - depth: d, - }); - - if (visited.has(c.id)) continue; - - visited.add(c.id); - nextFrontier.push(c.id); - idToNode.set(c.id, c); + for (const c of repo.findCallees(fid)) { + processCallee(c, caller, d, noTests, frame); } } - frontier = nextFrontier; + frontier = frame.nextFrontier; if (frontier.length === 0) break; - if (d === maxDepth && frontier.length > 0) { - const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); - if (hasMoreCalls) truncated = true; + if (d === maxDepth && frontier.some((fid) => repo.findCallees(fid).length > 0)) { + truncated = true; } } @@ -174,26 +187,16 @@ function annotateDataflow( } } -function _annotateDataflowImpl( - db: BetterSqlite3Database, +type DataflowStmts = { + getReturns: ReturnType; + getFlowsTo: ReturnType; +}; + +function appendReturnMessages( messages: SequenceMessage[], - idToNode: Map, + nodeByNameFile: Map, + stmts: DataflowStmts, ): void { - const nodeByNameFile = new Map(); - for (const n of idToNode.values()) { - nodeByNameFile.set(`${n.name}|${n.file}`, n); - } - - const getReturns = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.source_id = ? AND d.kind = 'returns'`, - ); - const getFlowsTo = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.target_id = ? AND d.kind = 'flows_to' - ORDER BY d.param_index`, - ); - const seenReturns = new Set(); for (const msg of [...messages]) { if (msg.type !== 'call') continue; @@ -203,40 +206,67 @@ function _annotateDataflowImpl( const returnKey = `${msg.to}->${msg.from}:${msg.label}`; if (seenReturns.has(returnKey)) continue; - const returns = getReturns.all(targetNode.id) as { expression: string }[]; - - if (returns.length > 0) { - seenReturns.add(returnKey); - const expr = returns[0]!.expression || 'result'; - messages.push({ - from: msg.to, - to: msg.from, - label: expr, - type: 'return', - depth: msg.depth, - }); - } + const returns = stmts.getReturns.all(targetNode.id) as { expression: string }[]; + if (returns.length === 0) continue; + + seenReturns.add(returnKey); + messages.push({ + from: msg.to, + to: msg.from, + label: returns[0]!.expression || 'result', + type: 'return', + depth: msg.depth, + }); } +} +function annotateCallParams( + messages: SequenceMessage[], + nodeByNameFile: Map, + stmts: DataflowStmts, +): void { for (const msg of messages) { if (msg.type !== 'call') continue; const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); if (!targetNode) continue; - const params = getFlowsTo.all(targetNode.id) as { expression: string }[]; - - if (params.length > 0) { - const paramNames = params - .map((p) => p.expression) - .filter(Boolean) - .slice(0, 3); - if (paramNames.length > 0) { - msg.label = `${msg.label}(${paramNames.join(', ')})`; - } + const params = stmts.getFlowsTo.all(targetNode.id) as { expression: string }[]; + const paramNames = params + .map((p) => p.expression) + .filter(Boolean) + .slice(0, 3); + if (paramNames.length > 0) { + msg.label = `${msg.label}(${paramNames.join(', ')})`; } } } +function _annotateDataflowImpl( + db: BetterSqlite3Database, + messages: SequenceMessage[], + idToNode: Map, +): void { + const nodeByNameFile = new Map(); + for (const n of idToNode.values()) { + nodeByNameFile.set(`${n.name}|${n.file}`, n); + } + + const stmts: DataflowStmts = { + getReturns: db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.source_id = ? AND d.kind = 'returns'`, + ), + getFlowsTo: db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.target_id = ? AND d.kind = 'flows_to' + ORDER BY d.param_index`, + ), + }; + + appendReturnMessages(messages, nodeByNameFile, stmts); + annotateCallParams(messages, nodeByNameFile, stmts); +} + interface Participant { id: string; label: string; diff --git a/src/features/structure-query.ts b/src/features/structure-query.ts index 21a9f671..952bcf1b 100644 --- a/src/features/structure-query.ts +++ b/src/features/structure-query.ts @@ -227,6 +227,96 @@ interface HotspotsDataOpts { noTests?: boolean; } +type HotspotEntry = { + name: string; + kind: string; + lineCount: number | null; + symbolCount: number | null; + importCount: number | null; + exportCount: number | null; + fanIn: number | null; + fanOut: number | null; + cohesion: number | null; + fileCount: number | null; + density: number; + coupling: number; +}; + +/** Compute density from either fileCount/symbolCount or lineCount/symbolCount. */ +function computeHotspotDensity( + symbolCount: number | null, + fileCount: number | null, + lineCount: number | null, +): number { + if ((fileCount ?? 0) > 0) return (symbolCount || 0) / (fileCount ?? 1); + if ((lineCount ?? 0) > 0) return (symbolCount || 0) / (lineCount ?? 1); + return 0; +} + +/** Map a native-engine hotspot row (camelCase keys) to the public HotspotEntry shape. */ +function mapNativeHotspotRow(r: { + name: string; + kind: string; + lineCount: number | null; + symbolCount: number | null; + importCount: number | null; + exportCount: number | null; + fanIn: number | null; + fanOut: number | null; + cohesion: number | null; + fileCount: number | null; +}): HotspotEntry { + return { + name: r.name, + kind: r.kind, + lineCount: r.lineCount, + symbolCount: r.symbolCount, + importCount: r.importCount, + exportCount: r.exportCount, + fanIn: r.fanIn, + fanOut: r.fanOut, + cohesion: r.cohesion, + fileCount: r.fileCount, + density: computeHotspotDensity(r.symbolCount, r.fileCount, r.lineCount), + coupling: (r.fanIn || 0) + (r.fanOut || 0), + }; +} + +/** Map a JS-path hotspot row (snake_case keys from SQLite) to the public HotspotEntry shape. */ +function mapJsHotspotRow(r: HotspotRow): HotspotEntry { + return { + name: r.name, + kind: r.kind, + lineCount: r.line_count, + symbolCount: r.symbol_count, + importCount: r.import_count, + exportCount: r.export_count, + fanIn: r.fan_in, + fanOut: r.fan_out, + cohesion: r.cohesion, + fileCount: r.file_count, + density: computeHotspotDensity(r.symbol_count, r.file_count, r.line_count), + coupling: (r.fan_in || 0) + (r.fan_out || 0), + }; +} + +/** ORDER BY clause for each ranking dimension (strategy pattern). */ +const HOTSPOT_ORDER_BY: Record = { + 'fan-in': 'nm.fan_in DESC NULLS LAST', + 'fan-out': 'nm.fan_out DESC NULLS LAST', + density: 'nm.symbol_count DESC NULLS LAST', + coupling: '(COALESCE(nm.fan_in, 0) + COALESCE(nm.fan_out, 0)) DESC NULLS LAST', +}; + +/** Build the JS-path SQL query for a given metric and test filter. */ +function buildHotspotQuery(metric: string, testFilter: string): string { + const orderBy = HOTSPOT_ORDER_BY[metric] ?? HOTSPOT_ORDER_BY['fan-in']; + return `SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count, + nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count + FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id + WHERE n.kind = ? ${testFilter} ORDER BY ${orderBy} LIMIT ?`; +} + export function hotspotsData( customDbPath?: string, opts: HotspotsDataOpts = {}, @@ -242,96 +332,21 @@ export function hotspotsData( const level = opts.level || 'file'; const limit = opts.limit || 10; const noTests = opts.noTests || false; - const kind = level === 'directory' ? 'directory' : 'file'; - const mapRow = (r: { - name: string; - kind: string; - lineCount: number | null; - symbolCount: number | null; - importCount: number | null; - exportCount: number | null; - fanIn: number | null; - fanOut: number | null; - cohesion: number | null; - fileCount: number | null; - }) => ({ - name: r.name, - kind: r.kind, - lineCount: r.lineCount, - symbolCount: r.symbolCount, - importCount: r.importCount, - exportCount: r.exportCount, - fanIn: r.fanIn, - fanOut: r.fanOut, - cohesion: r.cohesion, - fileCount: r.fileCount, - density: - (r.fileCount ?? 0) > 0 - ? (r.symbolCount || 0) / (r.fileCount ?? 1) - : (r.lineCount ?? 0) > 0 - ? (r.symbolCount || 0) / (r.lineCount ?? 1) - : 0, - coupling: (r.fanIn || 0) + (r.fanOut || 0), - }); - // ── Native fast path: single query instead of 4 eagerly prepared ── if (nativeDb?.getHotspots) { const rows = nativeDb.getHotspots(kind, metric, noTests, limit); - const hotspots = rows.map(mapRow); + const hotspots = rows.map(mapNativeHotspotRow); const base = { metric, level, limit, hotspots }; return paginateResult(base, 'hotspots', { limit: opts.limit, offset: opts.offset }); } // ── JS fallback ─────────────────────────────────────────────────── const testFilter = testFilterSQL('n.name', noTests && kind === 'file'); - - const HOTSPOT_QUERIES: Record = { - 'fan-in': db.prepare(` - SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count, - nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count - FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id - WHERE n.kind = ? ${testFilter} ORDER BY nm.fan_in DESC NULLS LAST LIMIT ?`), - 'fan-out': db.prepare(` - SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count, - nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count - FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id - WHERE n.kind = ? ${testFilter} ORDER BY nm.fan_out DESC NULLS LAST LIMIT ?`), - density: db.prepare(` - SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count, - nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count - FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id - WHERE n.kind = ? ${testFilter} ORDER BY nm.symbol_count DESC NULLS LAST LIMIT ?`), - coupling: db.prepare(` - SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count, - nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count - FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id - WHERE n.kind = ? ${testFilter} ORDER BY (COALESCE(nm.fan_in, 0) + COALESCE(nm.fan_out, 0)) DESC NULLS LAST LIMIT ?`), - }; - - const stmt = HOTSPOT_QUERIES[metric] ?? HOTSPOT_QUERIES['fan-in']; - const rows = stmt!.all(kind, limit); - - const hotspots = rows.map((r) => ({ - name: r.name, - kind: r.kind, - lineCount: r.line_count, - symbolCount: r.symbol_count, - importCount: r.import_count, - exportCount: r.export_count, - fanIn: r.fan_in, - fanOut: r.fan_out, - cohesion: r.cohesion, - fileCount: r.file_count, - density: - (r.file_count ?? 0) > 0 - ? (r.symbol_count || 0) / (r.file_count ?? 1) - : (r.line_count ?? 0) > 0 - ? (r.symbol_count || 0) / (r.line_count ?? 1) - : 0, - coupling: (r.fan_in || 0) + (r.fan_out || 0), - })); + const stmt = db.prepare(buildHotspotQuery(metric, testFilter)); + const rows = stmt.all(kind, limit) as HotspotRow[]; + const hotspots = rows.map(mapJsHotspotRow); const base = { metric, level, limit, hotspots }; return paginateResult(base, 'hotspots', { limit: opts.limit, offset: opts.offset }); diff --git a/src/features/structure.ts b/src/features/structure.ts index 8fe6b5a9..3e531cba 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -532,6 +532,56 @@ function batchUpdateRoles( })(); } +interface CallableNodeRow { + id: number; + name: string; + kind: string; + file: string; + fan_in: number; + fan_out: number; +} + +/** Build the activeFiles set: files with at least one callable connected to the graph. */ +function buildActiveFilesSet(rows: CallableNodeRow[]): Set { + const activeFiles = new Set(); + for (const r of rows) { + if ((r.fan_in > 0 || r.fan_out > 0) && r.kind !== 'constant') { + activeFiles.add(r.file); + } + } + return activeFiles; +} + +/** Map callable rows to classifier input objects, attaching exported/prod-fan-in/active-file metadata. */ +function buildClassifierInput( + rows: CallableNodeRow[], + exportedIds: Set, + prodFanInMap: Map, + activeFiles: Set, +): Array<{ + id: string; + name: string; + kind: string; + file: string; + fanIn: number; + fanOut: number; + isExported: boolean; + productionFanIn: number; + hasActiveFileSiblings: boolean | undefined; +}> { + return rows.map((r) => ({ + id: String(r.id), + name: r.name, + kind: r.kind, + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + isExported: exportedIds.has(r.id), + productionFanIn: prodFanInMap.get(r.id) || 0, + hasActiveFileSiblings: r.kind === 'constant' ? activeFiles.has(r.file) : undefined, + })); +} + function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSummary): RoleSummary { // Leaf kinds (parameter, property) can never have callers/callees. // Classify them directly as dead-leaf without the expensive fan-in/fan-out JOINs. @@ -558,14 +608,7 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm ) fo ON n.id = fo.source_id WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property')`, ) - .all() as { - id: number; - name: string; - kind: string; - file: string; - fan_in: number; - fan_out: number; - }[]; + .all() as CallableNodeRow[]; if (rows.length === 0 && leafRows.length === 0) return emptySummary; @@ -629,28 +672,9 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm prodFanInMap.set(r.target_id, r.cnt); } - // Files with at least one callable (non-constant) connected to the graph. - // Constants in these files are likely consumed locally via identifier reference. - const activeFiles = new Set(); - for (const r of rows) { - if ((r.fan_in > 0 || r.fan_out > 0) && r.kind !== 'constant') { - activeFiles.add(r.file); - } - } - // Delegate classification to the pure-logic classifier - const classifierInput = rows.map((r) => ({ - id: String(r.id), - name: r.name, - kind: r.kind, - file: r.file, - fanIn: r.fan_in, - fanOut: r.fan_out, - isExported: exportedIds.has(r.id), - productionFanIn: prodFanInMap.get(r.id) || 0, - hasActiveFileSiblings: r.kind === 'constant' ? activeFiles.has(r.file) : undefined, - })); - + const activeFiles = buildActiveFilesSet(rows); + const classifierInput = buildClassifierInput(rows, exportedIds, prodFanInMap, activeFiles); const roleMap = classifyRoles(classifierInput); const { summary, idsByRole } = buildRoleSummary(rows, leafRows, roleMap, emptySummary); @@ -733,14 +757,7 @@ function classifyNodeRolesIncremental( WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property') AND n.file IN (${placeholders})`, ) - .all(...allAffectedFiles) as { - id: number; - name: string; - kind: string; - file: string; - fan_in: number; - fan_out: number; - }[]; + .all(...allAffectedFiles) as CallableNodeRow[]; if (rows.length === 0 && leafRows.length === 0) return emptySummary; @@ -810,25 +827,8 @@ function classifyNodeRolesIncremental( } // 5. Classify affected nodes using global medians - const activeFiles = new Set(); - for (const r of rows) { - if ((r.fan_in > 0 || r.fan_out > 0) && r.kind !== 'constant') { - activeFiles.add(r.file); - } - } - - const classifierInput = rows.map((r) => ({ - id: String(r.id), - name: r.name, - kind: r.kind, - file: r.file, - fanIn: r.fan_in, - fanOut: r.fan_out, - isExported: exportedIds.has(r.id), - productionFanIn: prodFanInMap.get(r.id) || 0, - hasActiveFileSiblings: r.kind === 'constant' ? activeFiles.has(r.file) : undefined, - })); - + const activeFiles = buildActiveFilesSet(rows); + const classifierInput = buildClassifierInput(rows, exportedIds, prodFanInMap, activeFiles); const roleMap = classifyRoles(classifierInput, globalMedians); // 6. Build summary (only for affected nodes) and update only those nodes