Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ tiers.
- `context` — inbound/outbound refs and participating flows for one symbol.
- `impact` — dependents of a target up to a configurable depth, with a risk tier.
- `detect_changes` — map an uncommitted or committed diff to affected symbols.
- `sql` — read-only SQL against the local temporal store (the `cochanges` and `symbol_summaries` tables), 5 s timeout. The node/edge graph lives in `graph.lbug` (ADR 0016) and is reached via the typed tools (`query`/`context`/`impact`) or Cypher via the MCP `sql` tool's `cypher` arg — NOT via this SQL path.
- `sql` — read-only SQL against the single-file `store.sqlite` index (ADR 0019). `nodes`, `edges`, `embeddings`, `cochanges`, `symbol_summaries`, and `store_meta` are all directly SQL-queryable (e.g. `SELECT id, name FROM nodes WHERE kind = 'Function'`; reach kind-specific fields via SQLite JSON1, `payload->>'$.field'`). 5 s timeout. The typed tools (`query`/`context`/`impact`) remain the high-level path; the `cypher` arg is reserved for community-fork graph adapters and is not supported by the default backend.

Run `codehub analyze` after pulling new commits so the index stays aligned
with the working tree. `codehub status` reports staleness.
Expand Down
147 changes: 147 additions & 0 deletions packages/analysis/src/business-logic.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import { strict as assert } from "node:assert";
import { test } from "node:test";
import {
classifyBusinessCandidate,
classifyPlumbing,
type PlumbingFeatures,
SIEVE_VALIDATED_LANGUAGES,
} from "./business-logic.js";

/** Build a feature vector with all-zero defaults, overriding what the case needs. */
function feat(over: Partial<PlumbingFeatures>): PlumbingFeatures {
return {
nSerializationCalls: 0,
nDomainSignals: 0,
nPlumbingSignals: 0,
isOrmModel: false,
...over,
};
}

// ── The domain-signal veto: any real decision forces abstain ────────────────

test("a symbol with any domain signal abstains, even amid plumbing", () => {
// The dangerous error is calling a domain rule "plumbing". A serializer call
// sitting next to a domain conditional must NOT be swept into plumbing.
const v = classifyPlumbing(
feat({ nDomainSignals: 1, nSerializationCalls: 2, nPlumbingSignals: 3 }),
);
assert.equal(v.likelyPlumbing, false);
assert.equal(v.tier, "none");
assert.equal(v.plumbingConfidence, 0);
});

// ── Tier 1: serialization-pure (precision ~1.0) ─────────────────────────────

test("a pure serializer (no domain signal) is tier-1 plumbing at 0.95", () => {
// e.g. cosmic-DDD `to_dict` / a Marshal helper.
const v = classifyPlumbing(feat({ nSerializationCalls: 1 }));
assert.equal(v.likelyPlumbing, true);
assert.equal(v.tier, "serialization-pure");
assert.equal(v.plumbingConfidence, 0.95);
});

// ── Tier 2: plumbing-no-domain (precision ~0.94) ────────────────────────────

test("plumbing signals with no domain signal, not ORM, is tier-2 plumbing at 0.90", () => {
// e.g. a DI-wiring constructor or a logging wrapper.
const v = classifyPlumbing(feat({ nPlumbingSignals: 2 }));
assert.equal(v.likelyPlumbing, true);
assert.equal(v.tier, "plumbing-no-domain");
assert.equal(v.plumbingConfidence, 0.9);
});

test("an ORM entity with plumbing signals is excluded from tier-2 (abstains)", () => {
// ORM entities carry domain methods; the rule must not sweep them up.
const v = classifyPlumbing(feat({ nPlumbingSignals: 2, isOrmModel: true }));
assert.equal(v.likelyPlumbing, false);
assert.equal(v.tier, "none");
});

// ── Abstention: no signal at all ────────────────────────────────────────────

test("a symbol with no serialization, no plumbing, no domain signal abstains", () => {
const v = classifyPlumbing(feat({}));
assert.equal(v.likelyPlumbing, false);
assert.equal(v.tier, "none");
});

// ── Regression fixtures (the iter-0 cases Laith pinned) ─────────────────────

test("regression: AbstractRepository (infra base, no domain rule) reads plumbing", () => {
// An abstract repository base: plumbing signals (persistence wiring), zero
// domain decision, and it is NOT itself an ORM-mapped entity row. Must flag
// plumbing — this inverted in an earlier iteration and is pinned here.
const v = classifyPlumbing(feat({ nPlumbingSignals: 1, nDomainSignals: 0, isOrmModel: false }));
assert.equal(v.likelyPlumbing, true);
});

test("regression: Batch.allocate (domain rule) is never called plumbing", () => {
// The canonical domain method: a conditional on available quantity + a raised
// domain exception => nDomainSignals > 0 => abstain. The sieve must never hide it.
const v = classifyPlumbing(feat({ nDomainSignals: 2, nPlumbingSignals: 0 }));
assert.equal(v.likelyPlumbing, false);
assert.equal(v.tier, "none");
});

// ── Determinism ─────────────────────────────────────────────────────────────

test("classifyPlumbing is a pure function — identical inputs, identical verdict", () => {
const f = feat({ nSerializationCalls: 1, nPlumbingSignals: 1 });
const a = classifyPlumbing(f);
const b = classifyPlumbing(f);
assert.deepEqual(a, b);
});

test("validated-language set is exactly python/java/go", () => {
assert.equal(SIEVE_VALIDATED_LANGUAGES.has("python"), true);
assert.equal(SIEVE_VALIDATED_LANGUAGES.has("java"), true);
assert.equal(SIEVE_VALIDATED_LANGUAGES.has("go"), true);
assert.equal(SIEVE_VALIDATED_LANGUAGES.has("ruby"), false);
});

// ── candidate_business: the recall-first complement of the sieve ────────────

test("candidate_business is the exact complement of likely_plumbing", () => {
// The core invariant: every symbol is either confident-plumbing or a
// candidate, never both, never neither. Spot-check across the rule surface.
for (const f of [
feat({}),
feat({ nSerializationCalls: 1 }),
feat({ nPlumbingSignals: 2 }),
feat({ nPlumbingSignals: 2, isOrmModel: true }),
feat({ nDomainSignals: 3, nPlumbingSignals: 1 }),
]) {
const sieve = classifyPlumbing(f);
const cand = classifyBusinessCandidate(f);
assert.equal(cand.candidateBusiness, !sieve.likelyPlumbing);
assert.deepEqual(cand.plumbing, sieve); // carries the verdict through verbatim
}
});

test("recall-first: a bare domain conditional stays a candidate", () => {
// A symbol with a domain decision and no plumbing must be a candidate — this
// is the recall the tag is built to protect (don't drop domain logic).
const v = classifyBusinessCandidate(feat({ nDomainSignals: 1 }));
assert.equal(v.candidateBusiness, true);
});

test("recall-first: a symbol with NO signals at all is still a candidate", () => {
// The sieve only removes CONFIDENT plumbing. An empty/unknown symbol is kept
// as a candidate rather than silently excluded — high recall by construction.
const v = classifyBusinessCandidate(feat({}));
assert.equal(v.candidateBusiness, true);
assert.equal(v.plumbing.tier, "none");
});

test("confident plumbing is NOT a candidate", () => {
// A pure serializer is removed from the candidate set.
const v = classifyBusinessCandidate(feat({ nSerializationCalls: 1 }));
assert.equal(v.candidateBusiness, false);
});

test("regression: Batch.allocate is a business candidate, AbstractRepository is not", () => {
// The two pinned cases, viewed through the candidate tag.
assert.equal(classifyBusinessCandidate(feat({ nDomainSignals: 2 })).candidateBusiness, true);
assert.equal(classifyBusinessCandidate(feat({ nPlumbingSignals: 1 })).candidateBusiness, false);
});
185 changes: 185 additions & 0 deletions packages/analysis/src/business-logic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/**
* Deterministic business-logic / plumbing classifier for
* `@opencodehub/analysis`.
*
* This is the SIEVE half of business-logic detection: a high-precision,
* conservative rule that flags symbols which are almost certainly plumbing
* (serialization, DTO mapping, transport, DI wiring) and ABSTAINS everywhere
* else. It does NOT assert "this is business logic" — calling a real domain
* rule "plumbing" and hiding it is the costly error, so the rule is tuned for
* plumbing PRECISION and stays silent when unsure.
*
* ## Provenance
*
* The rule was distilled from a teacher/student loop: a 3-model LLM panel
* labeled ~300 symbols across 4 repos (Python / Java / Go), a shallow decision
* tree was fit, and the two cleanest, highest-precision plumbing leaves were
* lifted out as the shippable rule. Measured plumbing precision on the labeled
* corpus: 0.936 aggregate, and >= 0.85 on EVERY repo under per-repo evaluation
* (py-flask 1.00, java-petclinic 0.94, go-clean 0.92, py-cosmic-ddd 0.89). The
* full classifier (asserting business too) did not generalize cross-repo and is
* intentionally NOT shipped here — only the plumbing direction is.
*
* ## Determinism
*
* Pure function of the per-symbol feature vector — no I/O, no model, no
* randomness. The same inputs always yield the same verdict, so the result is
* safe to persist into `nodes.payload` and survives the `graphHash` byte-
* identity contract. Mirrors the `page-rank.ts` "request-time deterministic
* kernel" idiom.
*
* ## Feature binding
*
* The kernel consumes a small {@link PlumbingFeatures} struct. OCH's ingestion
* computes these from the AST at parse time (the same place
* `cyclomaticComplexity` is produced); see the companion extractor spec. The
* kernel is deliberately decoupled from HOW the features are computed so the
* rule can be unit-tested in isolation and re-tuned without touching ingestion.
*/

/**
* The minimal per-symbol feature vector the plumbing sieve needs. Every field
* is a non-negative integer count or a boolean, computable deterministically
* from the symbol's AST + its place in the file.
*/
export interface PlumbingFeatures {
/**
* Count of serialization / wire-format calls in the body: `json.dumps`,
* `model_dump`, `to_dict`, `Marshal`/`Unmarshal`, `writeValue`, `JSON.parse`,
* etc. A serializer with no domain decision is plumbing.
*/
readonly nSerializationCalls: number;
/**
* Count of POSITIVE domain-logic signals: conditionals comparing domain
* values (not None/nil/type guards), arithmetic/aggregation on domain
* quantities, raised domain exceptions, state-machine transitions. When this
* is > 0 the symbol carries a real decision and the sieve MUST abstain — the
* recall-first half (business detection) owns those.
*/
readonly nDomainSignals: number;
/**
* Count of NEGATIVE plumbing signals: raw-SQL execution, DI wiring,
* framework callbacks/registration, logging/metrics/tracing calls,
* pass-through attribute assignments.
*/
readonly nPlumbingSignals: number;
/**
* True when the symbol is (or is a method on) an ORM-mapped persistence
* entity. ORM entities frequently carry domain methods, so a symbol on one
* is NOT swept into plumbing by the sieve — the rule excludes it.
*/
readonly isOrmModel: boolean;
}

/** Advisory verdict written into `nodes.payload`. */
export interface PlumbingVerdict {
/**
* `true` only when the rule is confident the symbol is plumbing. `false`
* means ABSTAIN — NOT an assertion that the symbol is business logic. A
* consumer should treat `false` as "no signal", never as "this is business".
*/
readonly likelyPlumbing: boolean;
/**
* Confidence in [0, 1] attached to a `likelyPlumbing: true` verdict, keyed to
* the tier that fired. `0` when abstaining. Tier confidences are the measured
* per-tier precisions, rounded: 0.95 (serialization-pure) / 0.90 (standard).
*/
readonly plumbingConfidence: number;
/**
* Which rule tier fired, for auditability. `"none"` when abstaining.
* - `"serialization-pure"`: a serializer with zero domain signal (precision ~1.0).
* - `"plumbing-no-domain"`: plumbing signals present, zero domain signal,
* not an ORM entity (precision ~0.94).
*/
readonly tier: "serialization-pure" | "plumbing-no-domain" | "none";
}

const ABSTAIN: PlumbingVerdict = {
likelyPlumbing: false,
plumbingConfidence: 0,
tier: "none",
};

/**
* Classify one symbol. Two tiers, evaluated high-confidence first; both require
* ZERO domain signal so a symbol that carries any real decision always abstains.
*
* Tier 1 (conf 0.95): serialization calls present AND no domain signal.
* Tier 2 (conf 0.90): plumbing signals present AND no domain signal AND not an ORM entity.
*
* Anything else abstains. The order matters only for the reported `tier`;
* the two tiers never disagree on `likelyPlumbing`.
*/
export function classifyPlumbing(f: PlumbingFeatures): PlumbingVerdict {
// A real domain decision anywhere in the symbol vetoes the sieve outright.
if (f.nDomainSignals > 0) return ABSTAIN;

if (f.nSerializationCalls > 0) {
return { likelyPlumbing: true, plumbingConfidence: 0.95, tier: "serialization-pure" };
}

if (f.nPlumbingSignals > 0 && !f.isOrmModel) {
return { likelyPlumbing: true, plumbingConfidence: 0.9, tier: "plumbing-no-domain" };
}

return ABSTAIN;
}

/**
* The recall-first complement of the sieve: a symbol is a `candidate_business`
* unless the sieve is confident it is plumbing. This is the "look here for
* domain logic" tag the user gets at analyze time without a query, labels, or
* embeddings.
*
* ## Why subtraction, not assertion
*
* Asserting "this IS business logic" needs a trained classifier and did not
* generalize across repos (held-out F1 ~0.3). SUBTRACTING confident plumbing
* does generalize, because the plumbing sieve does. So the candidate set is
* "everything the sieve did not remove" — recall-first BY CONSTRUCTION: a
* symbol only loses the tag when we are confident it is plumbing, so real
* domain logic cannot be silently dropped.
*
* ## Measured (286 labeled symbols, Python / Java / Go)
*
* Business RECALL 0.925 (misses 6 of 80 business symbols); per-repo recall
* 0.80–1.00 (flask 1.00, java 0.96, go 0.88, cosmic 0.80). Precision 0.385 —
* the tag fires on ~67% of symbols, which is the intended recall-first trade:
* the tag is the safety net (nothing important falls out), and an optional
* embedding-derived rank orders the candidates so the most domain-like surface
* first. The tag NEVER tries to be precise on its own.
*/
export interface BusinessCandidateVerdict {
/**
* `true` when the symbol is a candidate for business logic — i.e. the sieve
* did NOT classify it as plumbing. High recall, low precision by design. A
* consumer should treat this as "worth a look", not "confirmed business".
*/
readonly candidateBusiness: boolean;
/**
* The complementary plumbing verdict that produced this tag, carried through
* for auditability so a consumer can see WHY a symbol was (or was not) a
* candidate without re-running the sieve.
*/
readonly plumbing: PlumbingVerdict;
}

/**
* Tag a symbol as a business-logic candidate. Pure complement of
* {@link classifyPlumbing}: `candidateBusiness === !likelyPlumbing`. Shares the
* exact same feature inputs so the two tags can never disagree about a symbol
* (every symbol is either confident-plumbing or a candidate, never both,
* never neither).
*/
export function classifyBusinessCandidate(f: PlumbingFeatures): BusinessCandidateVerdict {
const plumbing = classifyPlumbing(f);
return { candidateBusiness: !plumbing.likelyPlumbing, plumbing };
}

/**
* Languages the sieve is validated on. The rule's precision floor was measured
* on Python, Java, and Go corpora; calling it on other languages is allowed but
* unvalidated, so the analyze pass should gate on this set and skip the rest
* rather than emit an unbacked verdict.
*/
export const SIEVE_VALIDATED_LANGUAGES: ReadonlySet<string> = new Set(["python", "java", "go"]);
10 changes: 10 additions & 0 deletions packages/analysis/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
export type { ApiImpactFilter, ApiImpactRow } from "./api-impact.js";
export { listApiImpact, scoreRisk, worseRisk } from "./api-impact.js";
export type {
BusinessCandidateVerdict,
PlumbingFeatures,
PlumbingVerdict,
} from "./business-logic.js";
export {
classifyBusinessCandidate,
classifyPlumbing,
SIEVE_VALIDATED_LANGUAGES,
} from "./business-logic.js";
export {
type ChangePackInternal,
COST_TOKENIZER_MODEL,
Expand Down
Loading