From aa19365180f6370a78eef5595139f337aba9fcff Mon Sep 17 00:00:00 2001 From: Sayt-0 Date: Thu, 2 Jul 2026 17:36:03 +0200 Subject: [PATCH 1/4] feat(review-pr): incremental reviews of new commits since last review Re-reviews only the range from the last reviewed SHA to HEAD instead of the full PR diff, tracking state via the commit_id GitHub records on completed reviews. Falls back to a full review on force-push or rebase, base merge-ins, or any error, and dedupes findings against already-posted comments so full re-reviews never duplicate threads. Adds the incremental input (default true) to the action and the reusable workflow. --- .github/workflows/review-pr.yml | 6 + AGENTS.md | 9 + review-pr/README.md | 40 +++ review-pr/action.yml | 74 +++- review-pr/agents/pr-review.yaml | 11 +- review-pr/agents/refs/posting-format.md | 16 +- .../__tests__/dedupe-findings.test.ts | 201 +++++++++++ src/dedupe-findings/dedupe-findings.ts | 171 ++++++++++ src/dedupe-findings/index.ts | 89 +++++ .../__tests__/incremental-review.test.ts | 323 ++++++++++++++++++ .../__tests__/index.test.ts | 228 +++++++++++++ src/incremental-review/incremental-review.ts | 224 ++++++++++++ src/incremental-review/index.ts | 204 +++++++++++ tsup.config.ts | 2 + 14 files changed, 1590 insertions(+), 8 deletions(-) create mode 100644 src/dedupe-findings/__tests__/dedupe-findings.test.ts create mode 100644 src/dedupe-findings/dedupe-findings.ts create mode 100644 src/dedupe-findings/index.ts create mode 100644 src/incremental-review/__tests__/incremental-review.test.ts create mode 100644 src/incremental-review/__tests__/index.test.ts create mode 100644 src/incremental-review/incremental-review.ts create mode 100644 src/incremental-review/index.ts diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml index 7dc1e6b..50d5623 100644 --- a/.github/workflows/review-pr.yml +++ b/.github/workflows/review-pr.yml @@ -46,6 +46,11 @@ on: required: false type: string default: 'moderate' + incremental: + description: "When true (default), re-reviews only the commits pushed since the last completed docker-agent review instead of the full PR diff. Falls back to a full review when no previous review exists, the history was rewritten (force-push/rebase), or the base branch was merged in. Set to false to force full reviews." + required: false + type: boolean + default: true trigger-run-id: description: "Workflow run ID from pr-review-trigger.yml — used to download the event context artifact. When set, resolve-context job downloads the artifact and routes to the appropriate job." required: false @@ -418,6 +423,7 @@ jobs: exclude-paths: ${{ inputs.exclude-paths }} max-diff-lines: ${{ inputs.max-diff-lines }} confidence-threshold: ${{ inputs.confidence-threshold }} + incremental: ${{ inputs.incremental }} model: ${{ inputs.model }} github-token: ${{ env.GITHUB_APP_TOKEN || github.token }} anthropic-api-key: ${{ env.ANTHROPIC_API_KEY_FROM_SSM || secrets.ANTHROPIC_API_KEY }} diff --git a/AGENTS.md b/AGENTS.md index 5fc6699..402cbd9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,10 +39,18 @@ Anything else here (workflows under `.github/workflows/`, scripts, tests) exists │ │ ├── aws-credentials.ts │ │ ├── github-app.ts # Reads docker-agent-action/github-app from Secrets Manager; exports GITHUB_APP_TOKEN (a PAT) + ORG_MEMBERSHIP_TOKEN. │ │ └── __tests__/ +│ ├── dedupe-findings/ # Drops review comments duplicating findings from previous review cycles. +│ │ ├── index.ts # CLI entry → bundled to dist/dedupe-findings.js (staged at /tmp/dedupe-findings.js for the agent). +│ │ ├── dedupe-findings.ts # Core dedupeComments() pure function (path + line proximity + heading similarity). +│ │ └── __tests__/ │ ├── filter-diff/ # Strips excluded-path sections from a unified diff. │ │ ├── index.ts # CLI entry → bundled to dist/filter-diff.js │ │ ├── filter-diff.ts # Core filterDiff() pure function + applyFilter() I/O wrapper. │ │ └── __tests__/ +│ ├── incremental-review/ # Narrows pr.diff to commits since the last completed review. +│ │ ├── index.ts # CLI entry → bundled to dist/incremental-review.js +│ │ ├── incremental-review.ts # Core planIncrementalReview()/findLastReviewedSha() pure functions. +│ │ └── __tests__/ │ ├── score-confidence/ # Per-finding confidence scoring for the PR review pipeline. │ │ ├── index.ts # CLI entry → bundled to dist/score-confidence.js │ │ ├── score-confidence.ts # Core scoreFinding()/scoreFindings() pure functions + posting policy. @@ -172,6 +180,7 @@ The action runs untrusted input (PR titles, bodies, comments, diffs) through an - `pull_request` action `review_requested` when `github.event.requested_reviewer.login == 'docker-agent'` - `@docker-agent` mentions on PR/issue comments — these run the `.github/actions/mention-reply` handler (sets `should-reply` and builds the context prompt) and then the `review-pr/mention-reply` sub-action (referenced from a pinned SHA, not present as a local path on every commit). The `pr-review-mention-reply.yaml` agent handles the actual reply. - Diffs over 1500 lines are **chunked at file boundaries** in `review-pr/action.yml` (see "Split diff into chunks"). Per-file **risk scoring** (security paths, line counts, error-handling patterns) prioritizes verifier attention. +- **Incremental reviews** (default on, `incremental` input): a re-review only diffs `last-reviewed-SHA..HEAD` instead of the full PR diff. The last reviewed SHA is read from the `commit_id` GitHub records on the bot's completed reviews (assessment or LGTM bodies only — timeout/failure fallbacks don't count), so state survives across runs with no extra writes. `src/incremental-review/` plans the mode and falls back to a full review on force-push/rebase (SHA missing or not an ancestor of HEAD), base-branch merge-ins, net-zero changes, or any error. In incremental mode the original full diff is preserved at `pr_full.diff` for stale-thread resolution and suggestion-anchor validation (GitHub validates anchors against the full PR diff). On re-reviews, `src/dedupe-findings/` (staged at `/tmp/dedupe-findings.js`, run by the agent per `posting-format.md` against the pre-fetched `/tmp/existing_review_comments.json`) drops findings matching already-posted bot comments by file path + line proximity (±3) + finding-heading similarity, so a full re-review after a rebase doesn't duplicate threads. - Per-finding **confidence scoring** assigns each verified finding a precise 0–100 score (band: strong/moderate/weak/negligible) from the verifier's `verdict`, `evidence_strength`, and `context_completeness`, plus drafter↔verifier severity concordance and scope. `src/score-confidence/score-confidence.ts` is the **single source of truth** for the model (weights, bands, threshold, posting policy); the "Confidence Scoring" section of `review-pr/agents/pr-review.yaml` mirrors it as a strict lookup table so the orchestrator can apply it inline (the gitignored `dist/` is not available at agent runtime). Change one, change both — the unit tests pin every value. Security and high-severity CONFIRMED/LIKELY findings are always posted regardless of score; below-threshold findings are surfaced in a summary rather than silently dropped. The inline-posting cutoff is configurable via the `confidence-threshold` action input (a band name or a number clamped to 30–100, default `moderate` = 55); the action resolves it by invoking the bundled `dist/score-confidence.js resolve-threshold` CLI (so the resolution logic stays in TypeScript, not bash) and injects it into the agent prompt, and `scoreFinding`/`scoreFindings` accept a matching `postThreshold` option. - Stale review threads on lines no longer in the diff are auto-resolved via GraphQL `resolveReviewThread`. Threads with no `` marker are never touched. diff --git a/review-pr/README.md b/review-pr/README.md index bca7370..a7b5836 100644 --- a/review-pr/README.md +++ b/review-pr/README.md @@ -262,6 +262,7 @@ When using `docker/docker-agent-action/.github/workflows/review-pr.yml`: | `model` | Model override (e.g., `anthropic/claude-haiku-4-5`) | - | | `add-prompt-files` | Comma-separated files to append to the prompt | - | | `confidence-threshold` | Min confidence to post a finding inline: band (`strong`/`moderate`/`medium`/`weak`) or a number (clamped to 30–100) | `moderate` | +| `incremental` | Review only the commits pushed since the last completed review (falls back to a full review when unsafe) | `true` | ### `review-pr` (Composite Action) @@ -285,6 +286,7 @@ PR number and comment ID are auto-detected from `github.event` when not provided | `github-token` | GitHub token | No | | `add-prompt-files` | Comma-separated files to append to the prompt | No | | `confidence-threshold` | Min confidence to post a finding inline: band (`strong`/`moderate`/`medium`/`weak`) or a number clamped to 30–100 (default `moderate`) | No | +| `incremental` | Review only commits since the last completed review (default `true`; falls back to full when unsafe) | No | \*API keys are optional when using the reusable workflow (credentials are fetched via OIDC). Only required when using the composite action directly without OIDC. @@ -357,6 +359,44 @@ AGENTS.md + PR Diff → Drafter (hypotheses) → Verifier (confirm + evidence si → Confidence score (0–100) → Post Comments ``` +### Incremental Reviews + +By default, a re-review only covers the **commits pushed since the last completed +review** instead of re-reviewing the full PR diff. This saves tokens, avoids +duplicate comments, and skips code that has not changed since the previous cycle. + +How it works: + +1. The last reviewed commit is read from the metadata GitHub records on every + posted review (`commit_id` on the Reviews API is the PR head SHA at posting + time), so the state survives across workflow runs with no extra bookkeeping. + Only completed reviews count — a timed-out or failed run does not mark + commits as reviewed. +2. The diff handed to the agent becomes `git diff ..HEAD`, + restricted to files that are still part of the full PR diff (so inline + comments never anchor outside what GitHub accepts). +3. The action **falls back to a full review** whenever incremental diffing + would be unsafe or meaningless: + +| Situation | Behavior | +| --------- | -------- | +| No previous completed review | Full review | +| Force-push or rebase rewrote the history | Full review | +| Base branch merged into the PR since last review | Full review | +| Last reviewed SHA missing from the clone | Full review | +| Changes since last review cancel out | Full review | +| `incremental: false` input | Full review | + +4. On any full re-review, findings are **deduplicated** against the comments + already posted on the PR (matched by file path, line proximity, and finding + heading similarity — see [`src/dedupe-findings/`](../src/dedupe-findings/dedupe-findings.ts)), + so a rebase does not produce duplicate threads. The plumbing that decides + between incremental and full mode is implemented and unit-tested in + [`src/incremental-review/`](../src/incremental-review/incremental-review.ts). + +Set `incremental: false` (workflow or action input) to force a full review on +every trigger. + Each verified finding gets a precise **confidence score** (0–100) and a band (strong / moderate / weak / negligible), computed deterministically from the verifier's verdict, evidence strength, and context completeness, plus the diff --git a/review-pr/action.yml b/review-pr/action.yml index 2077e84..8d365af 100644 --- a/review-pr/action.yml +++ b/review-pr/action.yml @@ -74,6 +74,10 @@ inputs: description: "Minimum confidence for posting a finding as an inline comment. Accepts a band name (`strong`=80, `moderate`/`medium`=55, `weak`=30) or a number (clamped to 30-100; the weak-band floor is the lowest meaningful cutoff, so e.g. `10` is raised to `30`). Findings that score below the threshold (and are not security or high-severity) are collapsed into the lower-confidence summary instead of posted inline; they are never silently dropped. Security findings and high-severity CONFIRMED/LIKELY findings are always posted regardless. Default: moderate (55), which preserves the prior behavior." required: false default: "moderate" + incremental: + description: "When 'true' (default), re-reviews only the commits pushed since the last completed docker-agent review instead of the full PR diff. Falls back to a full review when no previous review exists, the history was rewritten (force-push/rebase), or the base branch was merged in. Set to 'false' to force a full review." + required: false + default: "true" outputs: exit-code: @@ -266,6 +270,25 @@ runs: fi fi + # Incremental review: when a previous completed review exists, narrow pr.diff + # to the commits pushed since it. The original full diff is preserved at + # pr_full.diff for stale-thread resolution and suggestion-anchor validation. + # All decision logic (state tracking via review commit_id metadata, rebase and + # force-push detection, base-merge detection) lives in src/incremental-review. + - name: Compute incremental review range + if: hashFiles('pr.diff') != '' && steps.lock-check.outputs.skip != 'true' + id: incremental + shell: bash + env: + ACTION_PATH: ${{ github.action_path }} + GITHUB_TOKEN: ${{ steps.resolve-token.outputs.token }} + PR_NUMBER: ${{ steps.resolve-context.outputs.pr-number }} + INCREMENTAL: ${{ inputs.incremental }} + run: | + BASE_REF=$(jq -r '.baseRefName // ""' pr_metadata.json 2>/dev/null || echo "") + export BASE_REF + node "$ACTION_PATH/../dist/incremental-review.js" pr.diff + - name: Filter excluded paths from diff if: hashFiles('pr.diff') != '' && inputs.exclude-paths != '' shell: bash @@ -398,8 +421,14 @@ runs: # A. Parse diff → file:line set # Tracks current file from "diff --git" headers, line numbers from @@ hunks, # and emits "file:line" for each added (+) line. - if [ ! -f pr.diff ]; then - echo "ℹ️ No pr.diff found — skipping stale thread resolution" + # In incremental mode pr.diff only covers commits since the last review, so + # staleness is judged against the preserved full PR diff (pr_full.diff). + DIFF_FILE=pr.diff + if [ -f pr_full.diff ]; then + DIFF_FILE=pr_full.diff + fi + if [ ! -f "$DIFF_FILE" ]; then + echo "ℹ️ No $DIFF_FILE found — skipping stale thread resolution" exit 0 fi @@ -426,7 +455,7 @@ runs: } /^ / { line++ } /^-/ { next } - ' pr.diff | sort -u > $DIFF_LINES_FILE + ' "$DIFF_FILE" | sort -u > $DIFF_LINES_FILE DIFF_LINE_COUNT=$(wc -l < $DIFF_LINES_FILE | tr -d ' ') echo "📄 Found $DIFF_LINE_COUNT unique file:line pairs in diff" @@ -709,6 +738,8 @@ runs: REPO: ${{ github.repository }} CONFIDENCE_SCORE: ${{ steps.resolve-confidence.outputs.score }} CONFIDENCE_LABEL: ${{ steps.resolve-confidence.outputs.label }} + INCREMENTAL_MODE: ${{ steps.incremental.outputs.mode }} + LAST_REVIEWED_SHA: ${{ steps.incremental.outputs.last-reviewed-sha }} run: | title=$(jq -r '.title' pr_metadata.json) author=$(jq -r '.author.login' pr_metadata.json) @@ -740,6 +771,15 @@ runs: echo "- **Inline confidence threshold**: ${CONFIDENCE_SCORE}/100 (${CONFIDENCE_LABEL} band). When applying the Confidence Scoring posting policy, post a non-forced finding as an inline comment only when its confidence score is **${CONFIDENCE_SCORE} or higher**. A finding that is in scope and not dismissed but scores **below ${CONFIDENCE_SCORE}** must NOT be posted inline — surface it under \"Lower-confidence findings (not posted inline)\" in the review body instead, exactly as the posting policy already prescribes for below-threshold findings (the policy's negligible-band rules still govern what is summarized vs. dropped)." echo "- This threshold does NOT override the security floor or the high-severity always-post rule: \`security\` findings and high-severity CONFIRMED/LIKELY findings are ALWAYS posted inline regardless of the threshold." echo "" + if [ "$INCREMENTAL_MODE" = "incremental" ]; then + echo "## Incremental Review" + echo "" + echo "This is an INCREMENTAL review: \`pr.diff\` contains only the changes pushed since the last completed review (commit \`${LAST_REVIEWED_SHA}\`). Earlier commits were already reviewed — do NOT re-review them or fetch the full PR diff yourself." + echo "- The \"Changed Files\" list above still describes the whole PR; review only the files present in \`pr.diff\`." + echo "- Start the review body with a note that this review covers only the commits since \`${LAST_REVIEWED_SHA:0:12}\`, and scope the assessment label to those changes." + echo "- If \`pr.diff\` is missing, do NOT fall back to fetching the full diff — post a COMMENT review explaining the incremental diff was unavailable." + echo "" + fi echo "---" echo "" echo "## Instructions" @@ -786,6 +826,34 @@ runs: else echo "::warning::validate-suggestions.js not found in dist — suggestion validation will be skipped" fi + # Stage the finding deduplicator the same way (see posting-format.md). + if [ -f "$ACTION_PATH/../dist/dedupe-findings.js" ]; then + cp "$ACTION_PATH/../dist/dedupe-findings.js" /tmp/dedupe-findings.js + else + echo "::warning::dedupe-findings.js not found in dist — finding deduplication will be skipped" + fi + + # Pre-fetch the PR's existing inline review comments so the agent can drop + # findings already posted in a previous review cycle (the dedupe CLI is + # fail-open: a missing or empty file simply skips deduplication). + - name: Fetch existing review comments + if: steps.lock-check.outputs.skip != 'true' + continue-on-error: true + shell: bash + env: + GH_TOKEN: ${{ steps.resolve-token.outputs.token }} + PR_NUMBER: ${{ steps.resolve-context.outputs.pr-number }} + REPO: ${{ github.repository }} + run: | + set -o pipefail + if gh api --paginate "repos/$REPO/pulls/$PR_NUMBER/comments" \ + | jq -s 'map(if type == "array" then . else [.] end) | add // []' \ + > /tmp/existing_review_comments.json; then + echo "✅ Fetched $(jq 'length' /tmp/existing_review_comments.json) existing review comment(s)" + else + rm -f /tmp/existing_review_comments.json + echo "::warning::Failed to fetch existing review comments — deduplication will be skipped" + fi # ======================================== # RUN REVIEW using root docker-agent-action diff --git a/review-pr/agents/pr-review.yaml b/review-pr/agents/pr-review.yaml index 3a3bf31..5159562 100644 --- a/review-pr/agents/pr-review.yaml +++ b/review-pr/agents/pr-review.yaml @@ -242,7 +242,10 @@ agents: security findings (review manually)" citing the verifier's stated mitigation. Before posting, verify each inline comment per Confidence Scoring: its body ends with a valid confidence table, and no non-forced comment scores below the inline threshold T. - Then post the review. + Then run the deduplication step from the posting template + (`node /tmp/dedupe-findings.js /tmp/review_comments.json /tmp/existing_review_comments.json`) + so findings already posted in a previous review cycle are not posted twice, and post + the review. 10. Order inline comments forced-first: list the forced comments (high-severity and security CONFIRMED/LIKELY) first, then the remaining (non-forced) comments by confidence, highest first — so a forced finding is never buried beneath a higher-scoring non-forced @@ -462,8 +465,10 @@ agents: otherwise keep the suggestion as prose. See `/tmp/refs/posting-format.md` for the exact field layout. GitHub is strict: a suggestion anchored outside the diff or on a deleted line makes the WHOLE review fail (422). The mandatory validation step in the posting - template (`node /tmp/validate-suggestions.js /tmp/review_comments.json pr.diff`) strips - any malformed suggestion block before posting — run it; do not skip it. + template strips any malformed suggestion block before posting — run it; do not skip it. + It validates against `pr_full.diff` when that file exists (incremental mode preserves + the full PR diff there — the diff GitHub validates anchors against) and `pr.diff` + otherwise; the posting template selects the right file. ## Domain-Specific Review Output diff --git a/review-pr/agents/refs/posting-format.md b/review-pr/agents/refs/posting-format.md index 28ecc0e..d5fe1e7 100644 --- a/review-pr/agents/refs/posting-format.md +++ b/review-pr/agents/refs/posting-format.md @@ -78,12 +78,24 @@ jq --arg path "$file_path" --argjson start "$start_line_number" --argjson line " /tmp/review_comments.json > /tmp/review_comments.tmp \ && mv /tmp/review_comments.tmp /tmp/review_comments.json +# Deduplicate against previous review cycles BEFORE validating. On a re-review +# the pipeline tends to re-derive findings that are already posted on the PR; +# this drops any new comment matching an existing bot comment (same file, +# nearby line, similar finding heading) so the PR never gets duplicate threads. +# Fail-open: if either file is missing the step changes nothing. +node /tmp/dedupe-findings.js /tmp/review_comments.json /tmp/existing_review_comments.json + # Validate & sanitize suggestion blocks BEFORE posting. GitHub rejects the # ENTIRE review (HTTP 422) if any one suggestion anchors outside the diff or to a # deleted line, so this strips malformed suggestion blocks (keeping the prose # finding) so one bad suggestion can't lose the whole review. Safe to run even -# when there are no suggestions. `pr.diff` is the pre-fetched diff in the workdir. -node /tmp/validate-suggestions.js /tmp/review_comments.json pr.diff +# when there are no suggestions. Validate against the FULL PR diff: in +# incremental mode the workdir has both pr.diff (incremental) and pr_full.diff +# (full PR diff — what GitHub validates anchors against); otherwise pr.diff IS +# the full diff. +VALIDATION_DIFF=pr.diff +if [ -f pr_full.diff ]; then VALIDATION_DIFF=pr_full.diff; fi +node /tmp/validate-suggestions.js /tmp/review_comments.json "$VALIDATION_DIFF" # Defensive: remove any comments with empty bodies before posting jq '[.[] | select(.body | length > 0)]' /tmp/review_comments.json > /tmp/review_comments.tmp \ diff --git a/src/dedupe-findings/__tests__/dedupe-findings.test.ts b/src/dedupe-findings/__tests__/dedupe-findings.test.ts new file mode 100644 index 0000000..63b2af4 --- /dev/null +++ b/src/dedupe-findings/__tests__/dedupe-findings.test.ts @@ -0,0 +1,201 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * Unit tests for the dedupe-findings matching logic. + * + * These pin the contract that duplicates are dropped only on a triple match + * (path + line proximity + signature similarity) and that human comments and + * bot replies never suppress a finding. + */ +import { describe, expect, it } from 'vitest'; +import { + dedupeComments, + type ExistingComment, + findingSignature, + type NewComment, + signatureSimilarity, +} from '../dedupe-findings.js'; + +const MARKER = ''; +const LEGACY_MARKER = ''; +const REPLY_MARKER = ''; + +function existing(overrides: Partial = {}): ExistingComment { + return { + path: 'src/app.ts', + line: 42, + body: `**[high] Nil pointer dereference on user object**\n\ndetails\n\n${MARKER}`, + ...overrides, + }; +} + +function fresh(overrides: Partial = {}): NewComment { + return { + path: 'src/app.ts', + line: 42, + body: `**[high] Nil pointer dereference on user object**\n\nre-derived details\n\n${MARKER}`, + ...overrides, + }; +} + +describe('findingSignature', () => { + it('extracts normalized tokens from the leading bold heading', () => { + expect(findingSignature('**[high] Race condition in cache-refresh**\n\nbody')).toEqual([ + 'high', + 'race', + 'condition', + 'in', + 'cache', + 'refresh', + ]); + }); + + it('falls back to the first non-empty line when no bold block exists', () => { + expect(findingSignature('\n\nUnchecked error return\nmore')).toEqual([ + 'unchecked', + 'error', + 'return', + ]); + }); + + it('deduplicates repeated tokens', () => { + expect(findingSignature('**error error error**')).toEqual(['error']); + }); + + it('returns null for empty or symbol-only bodies', () => { + expect(findingSignature('')).toBeNull(); + expect(findingSignature('***')).toBeNull(); + }); +}); + +describe('signatureSimilarity', () => { + it('is 1 for identical token sets', () => { + expect(signatureSimilarity(['a', 'b'], ['a', 'b'])).toBe(1); + }); + + it('is 0 for disjoint token sets', () => { + expect(signatureSimilarity(['a'], ['b'])).toBe(0); + }); + + it('computes Jaccard for partial overlap', () => { + // {a,b,c} ∩ {b,c,d} = 2, union = 4 + expect(signatureSimilarity(['a', 'b', 'c'], ['b', 'c', 'd'])).toBe(0.5); + }); + + it('is 0 when either side is empty', () => { + expect(signatureSimilarity([], ['a'])).toBe(0); + expect(signatureSimilarity(['a'], [])).toBe(0); + }); +}); + +describe('dedupeComments', () => { + it('drops a comment matching an existing finding on path, line, and signature', () => { + const result = dedupeComments([fresh()], [existing()]); + expect(result.kept).toEqual([]); + expect(result.dropped).toEqual([ + expect.objectContaining({ path: 'src/app.ts', line: 42, matchedLine: 42 }), + ]); + }); + + it('drops a comment whose line shifted within the tolerance', () => { + const result = dedupeComments([fresh({ line: 44 })], [existing()]); + expect(result.kept).toEqual([]); + expect(result.dropped).toHaveLength(1); + }); + + it('keeps a comment whose line is beyond the tolerance', () => { + const result = dedupeComments([fresh({ line: 50 })], [existing()]); + expect(result.kept).toHaveLength(1); + expect(result.dropped).toEqual([]); + }); + + it('keeps a comment on a different file even with identical text', () => { + const result = dedupeComments([fresh({ path: 'src/other.ts' })], [existing()]); + expect(result.kept).toHaveLength(1); + }); + + it('keeps a comment whose finding text differs (same spot, new issue)', () => { + const result = dedupeComments( + [fresh({ body: `**[medium] Unclosed file handle leaks descriptor**\n\n${MARKER}` })], + [existing()], + ); + expect(result.kept).toHaveLength(1); + }); + + it('never dedupes against human comments (no marker)', () => { + const result = dedupeComments( + [fresh()], + [existing({ body: '**[high] Nil pointer dereference on user object**\n\nI agree' })], + ); + expect(result.kept).toHaveLength(1); + }); + + it('never dedupes against bot conversational replies', () => { + const result = dedupeComments( + [fresh()], + [ + existing({ + body: `**[high] Nil pointer dereference on user object**\n\nreply\n\n${REPLY_MARKER}`, + }), + ], + ); + expect(result.kept).toHaveLength(1); + }); + + it('dedupes against legacy cagent-review comments during migration', () => { + const result = dedupeComments( + [fresh()], + [ + existing({ + body: `**[high] Nil pointer dereference on user object**\n\n${LEGACY_MARKER}`, + }), + ], + ); + expect(result.dropped).toHaveLength(1); + }); + + it('matches outdated existing comments via original_line when line is null', () => { + const result = dedupeComments([fresh()], [existing({ line: null, original_line: 41 })]); + expect(result.dropped).toHaveLength(1); + }); + + it('skips existing comments with no usable anchor at all', () => { + const result = dedupeComments([fresh()], [existing({ line: null, original_line: null })]); + expect(result.kept).toHaveLength(1); + }); + + it('keeps malformed new comments for downstream validation to handle', () => { + const malformed: NewComment[] = [ + { body: 'no path or line' }, + { path: 'src/app.ts', line: 'not-a-number', body: 'x' }, + ]; + const result = dedupeComments(malformed, [existing()]); + expect(result.kept).toEqual(malformed); + }); + + it('returns everything unchanged when there are no existing bot comments', () => { + const comments = [fresh(), fresh({ path: 'b.ts' })]; + const result = dedupeComments(comments, []); + expect(result.kept).toEqual(comments); + expect(result.dropped).toEqual([]); + }); + + it('preserves extra fields (side, start_line) on kept comments', () => { + const comment = fresh({ path: 'src/other.ts', side: 'LEFT', start_line: 40 }); + const result = dedupeComments([comment], [existing()]); + expect(result.kept[0]).toBe(comment); + }); + + it('honors custom tolerance and similarity options', () => { + const strict = dedupeComments([fresh({ line: 44 })], [existing()], { lineTolerance: 1 }); + expect(strict.kept).toHaveLength(1); + + const lax = dedupeComments( + [fresh({ body: `**[high] Nil pointer somewhere else entirely**\n\n${MARKER}` })], + [existing()], + { similarityThreshold: 0.2 }, + ); + expect(lax.dropped).toHaveLength(1); + }); +}); diff --git a/src/dedupe-findings/dedupe-findings.ts b/src/dedupe-findings/dedupe-findings.ts new file mode 100644 index 0000000..36e7196 --- /dev/null +++ b/src/dedupe-findings/dedupe-findings.ts @@ -0,0 +1,171 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * dedupe-findings — core logic for dropping review comments that duplicate a + * finding already posted on the PR in a previous review cycle. + * + * On a full re-review (e.g. after a rebase forces the incremental path to + * fall back), the pipeline re-analyzes the whole diff and tends to re-derive + * the same findings. Posting them again creates duplicate threads. This module + * compares each about-to-be-posted comment against the bot's existing inline + * review comments and drops the duplicates. + * + * Matching model (all three must hold): + * 1. same file path; + * 2. line proximity — the anchors are within `lineTolerance` lines of each + * other (pushes shift line numbers slightly; GitHub nulls `line` on + * outdated comments, in which case `original_line` is used); + * 3. finding-signature similarity — the normalized token set of the + * comment's heading (the finding type + one-line summary the agent puts + * in the leading `**[severity] …**` block) has a Jaccard similarity of at + * least `similarityThreshold` with the existing comment's heading. + * + * Only existing comments that carry a review marker (`` + * or the legacy ``) participate — human comments and the + * bot's conversational replies (whose marker is `-review-reply`) never + * suppress a finding. + */ + +// The reply marker "" does not contain +// "" as a substring (the space before "-->" +// differs), so this check cannot match reply comments. +const REVIEW_MARKERS = ['', '']; + +export interface NewComment { + path?: unknown; + line?: unknown; + body?: unknown; + [key: string]: unknown; +} + +export interface ExistingComment { + path?: string | null; + line?: number | null; + original_line?: number | null; + body?: string | null; +} + +export interface DedupeOptions { + /** Max distance between line anchors to still count as the same spot. */ + lineTolerance?: number; + /** Minimum Jaccard similarity between finding signatures (0..1]. */ + similarityThreshold?: number; +} + +export interface DroppedComment { + path: string; + line: number; + matchedLine: number; + signature: string; +} + +export interface DedupeResult { + kept: NewComment[]; + dropped: DroppedComment[]; +} + +const DEFAULT_LINE_TOLERANCE = 3; +const DEFAULT_SIMILARITY_THRESHOLD = 0.5; + +/** + * Extract the normalized token set identifying a finding from a comment body. + * + * Prefers the first bold block (`**[severity] summary**` per the posting + * format); falls back to the first non-empty line. A leading `[severity]` / + * `[category]` tag is kept as tokens — it participates in the similarity so a + * "security" and a "logic_error" finding on the same line don't collapse. + * Returns null when no usable text exists. + */ +export function findingSignature(body: string): string[] | null { + const bold = body.match(/\*\*([^*]+)\*\*/); + const heading = bold?.[1] ?? body.split('\n').find((line) => line.trim().length > 0) ?? ''; + const tokens = heading + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter((token) => token.length > 0); + return tokens.length > 0 ? [...new Set(tokens)] : null; +} + +export function signatureSimilarity(a: string[], b: string[]): number { + if (a.length === 0 || b.length === 0) return 0; + const setB = new Set(b); + let intersection = 0; + for (const token of a) { + if (setB.has(token)) intersection++; + } + const union = a.length + b.length - intersection; + return union === 0 ? 0 : intersection / union; +} + +function isBotReviewComment(comment: ExistingComment): boolean { + const body = comment.body ?? ''; + return REVIEW_MARKERS.some((marker) => body.includes(marker)); +} + +function anchorLine(comment: ExistingComment): number | null { + // GitHub nulls `line` when a push outdates the comment position but keeps + // the original anchor in `original_line`. + const line = comment.line ?? comment.original_line; + return typeof line === 'number' && Number.isInteger(line) && line > 0 ? line : null; +} + +/** + * Partition `newComments` into comments to post and duplicates of existing + * bot findings. Malformed new comments (no path/line/body) are always kept — + * downstream validation owns rejecting them. + */ +export function dedupeComments( + newComments: NewComment[], + existingComments: ExistingComment[], + opts: DedupeOptions = {}, +): DedupeResult { + const lineTolerance = opts.lineTolerance ?? DEFAULT_LINE_TOLERANCE; + const similarityThreshold = opts.similarityThreshold ?? DEFAULT_SIMILARITY_THRESHOLD; + + const candidates = existingComments + .filter((comment) => isBotReviewComment(comment)) + .map((comment) => ({ + path: comment.path ?? '', + line: anchorLine(comment), + signature: findingSignature(comment.body ?? ''), + })) + .filter( + (comment): comment is { path: string; line: number; signature: string[] } => + comment.path !== '' && comment.line !== null && comment.signature !== null, + ); + + if (candidates.length === 0) { + return { kept: [...newComments], dropped: [] }; + } + + const kept: NewComment[] = []; + const dropped: DroppedComment[] = []; + + for (const comment of newComments) { + const path = typeof comment.path === 'string' ? comment.path : ''; + const line = + typeof comment.line === 'number' && Number.isInteger(comment.line) ? comment.line : null; + const signature = typeof comment.body === 'string' ? findingSignature(comment.body) : null; + + if (path === '' || line === null || signature === null) { + kept.push(comment); + continue; + } + + const match = candidates.find( + (candidate) => + candidate.path === path && + Math.abs(candidate.line - line) <= lineTolerance && + signatureSimilarity(signature, candidate.signature) >= similarityThreshold, + ); + + if (match) { + dropped.push({ path, line, matchedLine: match.line, signature: signature.join(' ') }); + } else { + kept.push(comment); + } + } + + return { kept, dropped }; +} diff --git a/src/dedupe-findings/index.ts b/src/dedupe-findings/index.ts new file mode 100644 index 0000000..aef0944 --- /dev/null +++ b/src/dedupe-findings/index.ts @@ -0,0 +1,89 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * dedupe-findings CLI entrypoint. + * + * Usage: + * node dist/dedupe-findings.js + * + * newCommentsJsonPath Path to the inline-comments JSON array the agent + * built (e.g. /tmp/review_comments.json). Read and, + * when duplicates are found, overwritten in-place. + * existingCommentsJsonPath Path to the JSON array of the PR's existing + * review comments (as returned by + * GET /pulls/{n}/comments, pre-fetched by the + * workflow to /tmp/existing_review_comments.json). + * + * Behavior is fail-open so it can never block a legitimate review: + * - missing CLI args → exit 1 (usage error); + * - new-comments file absent/unparseable → warn, exit 0, no change; + * - existing file absent/unparseable → warn, exit 0, no change + * (nothing to dedupe against — post everything); + * - otherwise → drop only duplicates, exit 0. + * + * All progress is written to stderr so it surfaces in the Actions log without + * polluting any captured stdout. + * + * See dedupe-findings.ts for the pure matching logic. + */ +import { readFileSync, writeFileSync } from 'node:fs'; +import { dedupeComments, type ExistingComment, type NewComment } from './dedupe-findings.js'; + +const [, , newCommentsPath, existingCommentsPath] = process.argv; + +if (!newCommentsPath || !existingCommentsPath) { + process.stderr.write('Usage: dedupe-findings \n'); + process.exit(1); +} + +function warn(message: string): void { + process.stderr.write(`${message}\n`); +} + +function readJsonArray(path: string, label: string): unknown[] | null { + // Read directly and handle failure in the catch rather than guarding with + // existsSync first (avoids a check-then-use file-system race). + try { + const parsed = JSON.parse(readFileSync(path, 'utf-8')); + if (!Array.isArray(parsed)) { + warn(`⚠️ ${path} is not a JSON array — skipping ${label}`); + return null; + } + return parsed; + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + warn(`⚠️ No ${label} file at ${path} — skipping deduplication`); + } else { + warn( + `⚠️ Could not read ${path} (${err instanceof Error ? err.message : String(err)}) — skipping deduplication`, + ); + } + return null; + } +} + +const newComments = readJsonArray(newCommentsPath, 'new comments'); +if (newComments === null) process.exit(0); + +const existingComments = readJsonArray(existingCommentsPath, 'existing comments'); +if (existingComments === null) process.exit(0); + +const result = dedupeComments(newComments as NewComment[], existingComments as ExistingComment[]); + +for (const drop of result.dropped) { + warn( + `⏭️ Dropped duplicate finding on ${drop.path}:${drop.line} ` + + `(matches existing comment at line ${drop.matchedLine}: "${drop.signature}")`, + ); +} + +if (result.dropped.length > 0) { + writeFileSync(newCommentsPath, `${JSON.stringify(result.kept, null, 2)}\n`, 'utf-8'); + warn( + `✅ Deduplication: kept ${result.kept.length}, ` + + `dropped ${result.dropped.length} duplicate(s) (rewrote ${newCommentsPath})`, + ); +} else { + warn(`✅ Deduplication: kept ${result.kept.length}, dropped 0 (no changes)`); +} diff --git a/src/incremental-review/__tests__/incremental-review.test.ts b/src/incremental-review/__tests__/incremental-review.test.ts new file mode 100644 index 0000000..ebc00c7 --- /dev/null +++ b/src/incremental-review/__tests__/incremental-review.test.ts @@ -0,0 +1,323 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * Unit tests for the incremental-review core logic. + * + * These pin the three safety-critical behaviors: + * - findLastReviewedSha only trusts completed docker-agent reviews; + * - planIncrementalReview falls back to a full review on every ambiguous + * git state (force-push, rebase, merged base, missing objects); + * - restrictDiffToFiles never lets the incremental diff reference a file + * outside the full PR diff (which would 422 on inline comments). + */ +import { describe, expect, it } from 'vitest'; +import { + findLastReviewedSha, + type GitResult, + listDiffFiles, + planIncrementalReview, + type ReviewLike, + restrictDiffToFiles, +} from '../incremental-review.js'; + +const SHA_A = 'a'.repeat(40); +const SHA_B = 'b'.repeat(40); +const HEAD_SHA = 'c'.repeat(40); + +function review(overrides: Partial = {}): ReviewLike { + return { + user: { login: 'docker-agent' }, + body: '### Assessment: 🟢 APPROVE', + commit_id: SHA_A, + submitted_at: '2026-01-01T10:00:00Z', + ...overrides, + }; +} + +describe('findLastReviewedSha', () => { + it('returns the SHA of the only completed bot review', () => { + expect(findLastReviewedSha([review()])).toBe(SHA_A); + }); + + it('returns the newest completed review when several exist', () => { + const reviews = [ + review({ commit_id: SHA_A, submitted_at: '2026-01-01T10:00:00Z' }), + review({ commit_id: SHA_B, submitted_at: '2026-01-02T10:00:00Z' }), + ]; + expect(findLastReviewedSha(reviews)).toBe(SHA_B); + }); + + it('prefers the later list entry on equal timestamps (API returns oldest-first)', () => { + const reviews = [review({ commit_id: SHA_A }), review({ commit_id: SHA_B })]; + expect(findLastReviewedSha(reviews)).toBe(SHA_B); + }); + + it('accepts the LGTM fallback body as a completed review', () => { + expect(findLastReviewedSha([review({ body: '🟢 **No issues found** — LGTM!' })])).toBe(SHA_A); + }); + + it('accepts the GitHub App bot login variant', () => { + expect(findLastReviewedSha([review({ user: { login: 'docker-agent[bot]' } })])).toBe(SHA_A); + }); + + it('ignores reviews from other users even with a matching body', () => { + expect(findLastReviewedSha([review({ user: { login: 'alice' } })])).toBeNull(); + }); + + it('ignores timeout and failure fallback reviews (commits stay unreviewed)', () => { + const reviews = [ + review({ body: '⏱️ **PR Review Timed Out** — retry.' }), + review({ body: '❌ **PR Review Failed** — logs.' }), + ]; + expect(findLastReviewedSha(reviews)).toBeNull(); + }); + + it('ignores reviews with a malformed or missing commit_id', () => { + expect(findLastReviewedSha([review({ commit_id: 'deadbeef' })])).toBeNull(); + expect(findLastReviewedSha([review({ commit_id: null })])).toBeNull(); + }); + + it('ignores reviews without a parseable submitted_at', () => { + expect(findLastReviewedSha([review({ submitted_at: null })])).toBeNull(); + expect(findLastReviewedSha([review({ submitted_at: 'not-a-date' })])).toBeNull(); + }); + + it('returns null for an empty review list', () => { + expect(findLastReviewedSha([])).toBeNull(); + }); + + it('respects a custom bot login', () => { + expect(findLastReviewedSha([review({ user: { login: 'my-bot' } })], 'my-bot')).toBe(SHA_A); + expect(findLastReviewedSha([review()], 'my-bot')).toBeNull(); + }); +}); + +describe('planIncrementalReview', () => { + /** + * Build a fake git runner from a command→result table. Commands are keyed by + * their joined argv; unlisted commands fail. + */ + function fakeGit(table: Record) { + return (args: string[]): GitResult => table[args.join(' ')] ?? { ok: false, stdout: '' }; + } + + const MERGE_BASE = 'd'.repeat(40); + + function happyTable(): Record { + return { + 'rev-parse HEAD': { ok: true, stdout: `${HEAD_SHA}\n` }, + [`cat-file -e ${SHA_A}^{commit}`]: { ok: true, stdout: '' }, + [`merge-base --is-ancestor ${SHA_A} HEAD`]: { ok: true, stdout: '' }, + [`merge-base origin/main ${SHA_A}`]: { ok: true, stdout: `${MERGE_BASE}\n` }, + 'merge-base origin/main HEAD': { ok: true, stdout: `${MERGE_BASE}\n` }, + }; + } + + it('plans an incremental review when the last SHA is a clean ancestor', () => { + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(happyTable()), + }); + expect(plan).toEqual({ + mode: 'incremental', + reason: 'ok', + lastReviewedSha: SHA_A, + headSha: HEAD_SHA, + }); + }); + + it('falls back when there is no previous review', () => { + const plan = planIncrementalReview({ + lastReviewedSha: null, + baseRef: 'main', + git: fakeGit({}), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('no-previous-review'); + }); + + it('falls back when HEAD is the last reviewed commit (explicit re-request)', () => { + const table = happyTable(); + table['rev-parse HEAD'] = { ok: true, stdout: `${SHA_A}\n` }; + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(table), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('no-new-commits'); + }); + + it('falls back when the last reviewed SHA is not in the local clone (force-push)', () => { + const table = happyTable(); + delete table[`cat-file -e ${SHA_A}^{commit}`]; + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(table), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('unknown-sha'); + }); + + it('falls back when the last reviewed SHA is not an ancestor of HEAD (rebase)', () => { + const table = happyTable(); + table[`merge-base --is-ancestor ${SHA_A} HEAD`] = { ok: false, stdout: '' }; + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(table), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('history-rewritten'); + }); + + it('falls back when the base branch was merged into the PR after the last review', () => { + const table = happyTable(); + table['merge-base origin/main HEAD'] = { ok: true, stdout: `${'e'.repeat(40)}\n` }; + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(table), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('base-merged-in'); + }); + + it('falls back when the base ref is empty or cannot be resolved', () => { + const empty = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: '', + git: fakeGit(happyTable()), + }); + expect(empty.mode).toBe('full'); + expect(empty.reason).toBe('base-unresolved'); + + const table = happyTable(); + delete table['merge-base origin/main HEAD']; + const unresolved = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(table), + }); + expect(unresolved.mode).toBe('full'); + expect(unresolved.reason).toBe('base-unresolved'); + }); + + it('rejects a base ref that could be parsed as a git option', () => { + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: '--upload-pack=evil', + git: fakeGit(happyTable()), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('base-unresolved'); + }); + + it('falls back when HEAD cannot be resolved', () => { + const table = happyTable(); + table['rev-parse HEAD'] = { ok: false, stdout: '' }; + const plan = planIncrementalReview({ + lastReviewedSha: SHA_A, + baseRef: 'main', + git: fakeGit(table), + }); + expect(plan.mode).toBe('full'); + expect(plan.reason).toBe('error'); + }); +}); + +const DIFF = [ + 'diff --git a/src/kept.ts b/src/kept.ts', + 'index 111..222 100644', + '--- a/src/kept.ts', + '+++ b/src/kept.ts', + '@@ -1,2 +1,3 @@', + ' line1', + '+added', + ' line2', + 'diff --git a/src/dropped.ts b/src/dropped.ts', + 'index 333..444 100644', + '--- a/src/dropped.ts', + '+++ b/src/dropped.ts', + '@@ -1 +1 @@', + '-old', + '+new', + '', +].join('\n'); + +describe('listDiffFiles', () => { + it('collects paths from ---/+++ lines', () => { + expect(listDiffFiles(DIFF)).toEqual(new Set(['src/kept.ts', 'src/dropped.ts'])); + }); + + it('collects deletion paths (only --- a/ carries the real path)', () => { + const deletion = [ + 'diff --git a/gone.ts b/gone.ts', + 'deleted file mode 100644', + '--- a/gone.ts', + '+++ /dev/null', + '@@ -1 +0,0 @@', + '-bye', + ].join('\n'); + expect(listDiffFiles(deletion)).toEqual(new Set(['gone.ts'])); + }); + + it('collects both sides of a pure rename', () => { + const rename = [ + 'diff --git a/old-name.ts b/new-name.ts', + 'similarity index 100%', + 'rename from old-name.ts', + 'rename to new-name.ts', + ].join('\n'); + expect(listDiffFiles(rename)).toEqual(new Set(['old-name.ts', 'new-name.ts'])); + }); + + it('returns an empty set for an empty diff', () => { + expect(listDiffFiles('')).toEqual(new Set()); + }); +}); + +describe('restrictDiffToFiles', () => { + it('keeps only sections whose file is in the allowed set', () => { + const result = restrictDiffToFiles(DIFF, new Set(['src/kept.ts'])); + expect(result.keptFiles).toBe(1); + expect(result.droppedFiles).toEqual(['src/dropped.ts']); + expect(result.restricted).toContain('diff --git a/src/kept.ts b/src/kept.ts'); + expect(result.restricted).not.toContain('src/dropped.ts'); + }); + + it('keeps everything when all files are allowed', () => { + const result = restrictDiffToFiles(DIFF, new Set(['src/kept.ts', 'src/dropped.ts'])); + expect(result.keptFiles).toBe(2); + expect(result.droppedFiles).toEqual([]); + expect(result.restricted).toBe(DIFF); + }); + + it('drops everything when nothing is allowed', () => { + const result = restrictDiffToFiles(DIFF, new Set()); + expect(result.keptFiles).toBe(0); + expect(result.droppedFiles).toEqual(['src/kept.ts', 'src/dropped.ts']); + }); + + it('keeps a renamed section when either side is allowed', () => { + const rename = [ + 'diff --git a/old-name.ts b/new-name.ts', + 'similarity index 100%', + 'rename from old-name.ts', + 'rename to new-name.ts', + ].join('\n'); + const result = restrictDiffToFiles(rename, new Set(['new-name.ts'])); + expect(result.keptFiles).toBe(1); + }); + + it('handles an empty diff', () => { + expect(restrictDiffToFiles('', new Set(['a.ts']))).toEqual({ + restricted: '', + keptFiles: 0, + droppedFiles: [], + }); + }); +}); diff --git a/src/incremental-review/__tests__/index.test.ts b/src/incremental-review/__tests__/index.test.ts new file mode 100644 index 0000000..2890efc --- /dev/null +++ b/src/incremental-review/__tests__/index.test.ts @@ -0,0 +1,228 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * Unit tests for the incremental-review CLI wiring (main with injected deps). + * + * These pin the file effects and the fail-open contract: pr.diff is rewritten + * only on the happy path (with the full diff preserved), and every error path + * reports mode=full without touching the diff. + */ +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import * as core from '@actions/core'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('@actions/core'); + +import type { GitResult, ReviewLike } from '../incremental-review.js'; +import { fullDiffPath, main } from '../index.js'; + +const SHA_A = 'a'.repeat(40); +const HEAD_SHA = 'c'.repeat(40); +const MERGE_BASE = 'd'.repeat(40); + +const FULL_DIFF = [ + 'diff --git a/src/kept.ts b/src/kept.ts', + '--- a/src/kept.ts', + '+++ b/src/kept.ts', + '@@ -1,2 +1,3 @@', + ' line1', + '+added-in-old-commit', + ' line2', + 'diff --git a/src/new.ts b/src/new.ts', + '--- a/src/new.ts', + '+++ b/src/new.ts', + '@@ -1 +1,2 @@', + ' base', + '+added-in-new-commit', + '', +].join('\n'); + +const INCREMENTAL_DIFF = [ + 'diff --git a/src/new.ts b/src/new.ts', + '--- a/src/new.ts', + '+++ b/src/new.ts', + '@@ -1 +1,2 @@', + ' base', + '+added-in-new-commit', + 'diff --git a/src/net-zero.ts b/src/net-zero.ts', + '--- a/src/net-zero.ts', + '+++ b/src/net-zero.ts', + '@@ -1 +1 @@', + '-x', + '+x2', + '', +].join('\n'); + +function completedReview(): ReviewLike { + return { + user: { login: 'docker-agent' }, + body: '### Assessment: 🟢 APPROVE', + commit_id: SHA_A, + submitted_at: '2026-01-01T10:00:00Z', + }; +} + +/** Fake git that answers the happy-path plan and writes the incremental diff. */ +function fakeGit(incrementalDiff: string) { + return (args: string[]): GitResult => { + const cmd = args.join(' '); + if (cmd === 'rev-parse HEAD') return { ok: true, stdout: `${HEAD_SHA}\n` }; + if (cmd === `cat-file -e ${SHA_A}^{commit}`) return { ok: true, stdout: '' }; + if (cmd === `merge-base --is-ancestor ${SHA_A} HEAD`) return { ok: true, stdout: '' }; + if (cmd.startsWith('merge-base origin/main')) return { ok: true, stdout: `${MERGE_BASE}\n` }; + if (args[0] === 'diff') { + const outputArg = args.find((a) => a.startsWith('--output=')); + if (outputArg) writeFileSync(outputArg.slice('--output='.length), incrementalDiff, 'utf-8'); + return { ok: true, stdout: '' }; + } + return { ok: false, stdout: '' }; + }; +} + +function outputsByName(): Record { + const map: Record = {}; + for (const [name, value] of vi.mocked(core.setOutput).mock.calls) { + map[String(name)] = String(value); + } + return map; +} + +describe('incremental-review main', () => { + let dir: string; + let diffPath: string; + const savedEnv = { ...process.env }; + + beforeEach(() => { + vi.clearAllMocks(); + dir = mkdtempSync(join(tmpdir(), 'incremental-review-test-')); + diffPath = join(dir, 'pr.diff'); + writeFileSync(diffPath, FULL_DIFF, 'utf-8'); + process.env.GITHUB_TOKEN = 'tok'; + process.env.GITHUB_REPOSITORY = 'docker/repo'; + process.env.PR_NUMBER = '7'; + process.env.BASE_REF = 'main'; + delete process.env.INCREMENTAL; + delete process.env.REVIEW_BOT_LOGIN; + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + process.env = { ...savedEnv }; + }); + + it('rewrites pr.diff with the restricted incremental diff and preserves the full diff', async () => { + await main(diffPath, { + git: fakeGit(INCREMENTAL_DIFF), + fetchReviews: async () => [completedReview()], + }); + + const outputs = outputsByName(); + expect(outputs.mode).toBe('incremental'); + expect(outputs.reason).toBe('ok'); + expect(outputs['last-reviewed-sha']).toBe(SHA_A); + + const rewritten = readFileSync(diffPath, 'utf-8'); + expect(rewritten).toContain('src/new.ts'); + // net-zero.ts is not in the full PR diff — restricted out. + expect(rewritten).not.toContain('src/net-zero.ts'); + expect(readFileSync(fullDiffPath(diffPath), 'utf-8')).toBe(FULL_DIFF); + }); + + it('reports full/disabled without any API or git calls when INCREMENTAL=false', async () => { + process.env.INCREMENTAL = 'false'; + const git = vi.fn(); + const fetchReviews = vi.fn(); + + await main(diffPath, { git, fetchReviews }); + + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'disabled' }); + expect(git).not.toHaveBeenCalled(); + expect(fetchReviews).not.toHaveBeenCalled(); + expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); + }); + + it('reports full/no-previous-review and leaves the diff untouched', async () => { + await main(diffPath, { + git: fakeGit(INCREMENTAL_DIFF), + fetchReviews: async () => [], + }); + + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'no-previous-review' }); + expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); + }); + + it('fails open to a full review when the reviews API errors', async () => { + await main(diffPath, { + git: fakeGit(INCREMENTAL_DIFF), + fetchReviews: async () => { + throw new Error('boom'); + }, + }); + + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'error' }); + expect(vi.mocked(core.warning)).toHaveBeenCalledWith(expect.stringContaining('boom')); + expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); + }); + + it('fails open when token or PR number is missing', async () => { + delete process.env.GITHUB_TOKEN; + delete process.env.GH_TOKEN; + await main(diffPath, { git: fakeGit(INCREMENTAL_DIFF), fetchReviews: async () => [] }); + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'error' }); + }); + + it('reports full/no-diff when the diff file does not exist', async () => { + rmSync(diffPath); + await main(diffPath, { + git: fakeGit(INCREMENTAL_DIFF), + fetchReviews: async () => [completedReview()], + }); + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'no-diff' }); + }); + + it('falls back to full when the incremental diff is net-zero against the PR diff', async () => { + const netZeroOnly = [ + 'diff --git a/src/net-zero.ts b/src/net-zero.ts', + '--- a/src/net-zero.ts', + '+++ b/src/net-zero.ts', + '@@ -1 +1 @@', + '-x', + '+x2', + '', + ].join('\n'); + + await main(diffPath, { + git: fakeGit(netZeroOnly), + fetchReviews: async () => [completedReview()], + }); + + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'net-zero-changes' }); + expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); + }); + + it('fails open to full when git diff itself fails', async () => { + const git = (args: string[]): GitResult => { + if (args[0] === 'diff') return { ok: false, stdout: '' }; + return fakeGit('')(args); + }; + + await main(diffPath, { git, fetchReviews: async () => [completedReview()] }); + + expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'error' }); + expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); + }); +}); + +describe('fullDiffPath', () => { + it('inserts _full before the .diff extension', () => { + expect(fullDiffPath('pr.diff')).toBe('pr_full.diff'); + expect(fullDiffPath('/work/pr.diff')).toBe('/work/pr_full.diff'); + }); + + it('appends _full when there is no .diff extension', () => { + expect(fullDiffPath('prdiff')).toBe('prdiff_full'); + }); +}); diff --git a/src/incremental-review/incremental-review.ts b/src/incremental-review/incremental-review.ts new file mode 100644 index 0000000..b3f1af7 --- /dev/null +++ b/src/incremental-review/incremental-review.ts @@ -0,0 +1,224 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * incremental-review — core logic for reviewing only the commits pushed since + * the last docker-agent review, instead of re-reviewing the full PR diff on + * every trigger. + * + * ## State tracking design + * + * The "last reviewed commit SHA" is read from the metadata GitHub records on + * every posted review: `commit_id` on `GET /pulls/{n}/reviews` is the PR head + * SHA at posting time. This survives across workflow runs, requires no extra + * writes, and cannot be edited away like a marker embedded in a comment body. + * Only reviews that represent a *completed* run count — an assessment body + * ("### Assessment:") or the zero-findings LGTM fallback. Timeout and failure + * fallback reviews do NOT mark commits as reviewed, so the next run re-covers + * them. + * + * ## Fallbacks to a full review (see planIncrementalReview) + * + * no-previous-review no completed docker-agent review found on the PR + * no-new-commits HEAD is the last reviewed commit (explicit re-request) + * unknown-sha last reviewed SHA is absent from the local clone + * history-rewritten last reviewed SHA is not an ancestor of HEAD + * (force-push or rebase — the incremental range would + * be meaningless) + * base-unresolved origin/ could not be resolved locally + * base-merged-in the base branch was merged into the PR branch after + * the last review (the range would drag in base changes) + * + * The incremental diff (`git diff ..HEAD`) is additionally + * intersected at file level with the full PR diff: a file whose changes since + * the last review cancel out against the base (net-zero) is not part of the + * PR diff, so GitHub would reject inline comments anchored there (HTTP 422). + */ + +/** Result of one git invocation. Injectable for deterministic tests. */ +export interface GitResult { + ok: boolean; + stdout: string; +} + +export type GitRunner = (args: string[]) => GitResult; + +export interface ReviewLike { + user?: { login?: string | null } | null; + body?: string | null; + commit_id?: string | null; + submitted_at?: string | null; +} + +export type ReviewMode = 'incremental' | 'full'; + +export interface IncrementalPlan { + mode: ReviewMode; + /** 'ok' for incremental mode; otherwise the fallback reason. */ + reason: string; + lastReviewedSha: string | null; + headSha: string | null; +} + +// Full 40-hex only: the value is passed to git as an argument, so anything +// looser (e.g. a body that starts with "-") must never get through. +const SHA40 = /^[0-9a-f]{40}$/i; + +// Bodies that mark a review run as completed. The timeout ("⏱️ **PR Review +// Timed Out**") and failure ("❌ **PR Review Failed**") fallbacks match +// neither, so unreviewed commits stay unreviewed. +const COMPLETED_BODY_MARKERS = ['### Assessment:', '🟢 **No issues found**']; + +// GitHub presents the bot identity as "docker-agent" when posting with a +// machine user token, or "docker-agent[bot]" through a GitHub App installation +// token. Match both (same convention as src/rate-limit). +function matchesBotLogin(login: string | null | undefined, botLogin: string): boolean { + return login === botLogin || login === `${botLogin}[bot]`; +} + +/** + * Find the head SHA recorded on the most recent *completed* docker-agent + * review of the PR. Returns null when no such review exists. + */ +export function findLastReviewedSha( + reviews: ReviewLike[], + botLogin = 'docker-agent', +): string | null { + let best: { sha: string; at: number } | null = null; + for (const review of reviews) { + if (!matchesBotLogin(review.user?.login, botLogin)) continue; + const body = review.body ?? ''; + if (!COMPLETED_BODY_MARKERS.some((marker) => body.includes(marker))) continue; + const sha = review.commit_id ?? ''; + if (!SHA40.test(sha)) continue; + if (!review.submitted_at) continue; + const at = Date.parse(review.submitted_at); + if (!Number.isFinite(at)) continue; + // >= so that among equal timestamps the later list entry (newest — the + // Reviews API returns oldest-first) wins. + if (best === null || at >= best.at) best = { sha, at }; + } + return best?.sha ?? null; +} + +/** + * Decide whether an incremental review is safe, using only local git state. + * Every ambiguous situation falls back to a full review — a full review is + * always correct, an incremental one is only an optimization. + */ +export function planIncrementalReview(opts: { + lastReviewedSha: string | null; + baseRef: string; + git: GitRunner; +}): IncrementalPlan { + const { lastReviewedSha: sha, baseRef, git } = opts; + + if (!sha) { + return { mode: 'full', reason: 'no-previous-review', lastReviewedSha: null, headSha: null }; + } + + const head = git(['rev-parse', 'HEAD']); + const headSha = head.ok ? head.stdout.trim() : ''; + if (!SHA40.test(headSha)) { + return { mode: 'full', reason: 'error', lastReviewedSha: sha, headSha: null }; + } + + if (headSha.toLowerCase() === sha.toLowerCase()) { + return { mode: 'full', reason: 'no-new-commits', lastReviewedSha: sha, headSha }; + } + + if (!git(['cat-file', '-e', `${sha}^{commit}`]).ok) { + return { mode: 'full', reason: 'unknown-sha', lastReviewedSha: sha, headSha }; + } + + if (!git(['merge-base', '--is-ancestor', sha, 'HEAD']).ok) { + return { mode: 'full', reason: 'history-rewritten', lastReviewedSha: sha, headSha }; + } + + // Refnames cannot start with "-", so this also keeps the value safe as a + // git argument. + if (!baseRef || baseRef.startsWith('-')) { + return { mode: 'full', reason: 'base-unresolved', lastReviewedSha: sha, headSha }; + } + + const mergeBaseAtLastReview = git(['merge-base', `origin/${baseRef}`, sha]); + const mergeBaseNow = git(['merge-base', `origin/${baseRef}`, 'HEAD']); + if (!mergeBaseAtLastReview.ok || !mergeBaseNow.ok) { + return { mode: 'full', reason: 'base-unresolved', lastReviewedSha: sha, headSha }; + } + if (mergeBaseAtLastReview.stdout.trim() !== mergeBaseNow.stdout.trim()) { + return { mode: 'full', reason: 'base-merged-in', lastReviewedSha: sha, headSha }; + } + + return { mode: 'incremental', reason: 'ok', lastReviewedSha: sha, headSha }; +} + +/** + * Collect every file path referenced by a unified diff. Paths are read from + * the same path-bearing line types filter-diff handles: `--- a/`, `+++ b/`, + * and rename lines (renames also record the old name so either side matches). + */ +export function listDiffFiles(diffContent: string): Set { + const files = new Set(); + for (const line of diffContent.split('\n')) { + if (line.startsWith('--- a/')) files.add(line.slice(6)); + else if (line.startsWith('+++ b/')) files.add(line.slice(6)); + else if (line.startsWith('rename from ')) files.add(line.slice(12)); + else if (line.startsWith('rename to ')) files.add(line.slice(10)); + } + return files; +} + +export interface RestrictResult { + restricted: string; + keptFiles: number; + droppedFiles: string[]; +} + +/** + * Keep only the diff sections whose file path appears in `allowed` (the full + * PR diff's file set). Sections for net-zero files are dropped so inline + * comments are never anchored outside the PR diff. + */ +export function restrictDiffToFiles(diffContent: string, allowed: Set): RestrictResult { + if (diffContent === '') { + return { restricted: '', keptFiles: 0, droppedFiles: [] }; + } + + const lines = diffContent.split('\n'); + const outputLines: string[] = []; + let sectionLines: string[] = []; + let sectionFiles: string[] = []; + let keptFiles = 0; + const droppedFiles: string[] = []; + + const flushSection = (): void => { + if (sectionLines.length === 0) return; + if (sectionFiles.some((file) => allowed.has(file))) { + for (const line of sectionLines) outputLines.push(line); + keptFiles++; + } else { + droppedFiles.push(sectionFiles[0] ?? '(unknown)'); + } + sectionLines = []; + sectionFiles = []; + }; + + for (const line of lines) { + if (line.startsWith('diff --git ')) { + flushSection(); + sectionLines.push(line); + } else if (sectionLines.length > 0) { + if (line.startsWith('--- a/')) sectionFiles.push(line.slice(6)); + else if (line.startsWith('+++ b/')) sectionFiles.push(line.slice(6)); + else if (line.startsWith('rename to ')) sectionFiles.push(line.slice(10)); + sectionLines.push(line); + } else { + // Content before the first diff section (e.g. preamble) — preserve. + outputLines.push(line); + } + } + flushSection(); + + return { restricted: outputLines.join('\n'), keptFiles, droppedFiles }; +} diff --git a/src/incremental-review/index.ts b/src/incremental-review/index.ts new file mode 100644 index 0000000..4c58d8a --- /dev/null +++ b/src/incremental-review/index.ts @@ -0,0 +1,204 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * incremental-review CLI entrypoint. + * + * Usage: + * node dist/incremental-review.js + * + * diffPath Path to the pre-fetched full PR diff (e.g. pr.diff). When an + * incremental review is possible the file is overwritten with the + * incremental diff and the original is preserved next to it as + * pr_full.diff (derived: `_full.diff`) for anchor validation + * and stale-thread resolution. + * + * Environment: + * GITHUB_TOKEN / GH_TOKEN Token with pull-request read scope + * GITHUB_REPOSITORY "owner/repo" + * PR_NUMBER Pull request number + * BASE_REF PR base branch name (e.g. "main") + * INCREMENTAL "true" (default) enables incremental review; + * "false" forces a full review + * REVIEW_BOT_LOGIN Bot login whose reviews mark commits as reviewed + * (default "docker-agent") + * + * Outputs (via @actions/core.setOutput): + * mode "incremental" | "full" + * reason "ok" for incremental, otherwise the fallback reason + * last-reviewed-sha SHA of the last completed review ("" when none) + * + * Fail-open: every error path leaves the diff untouched and reports mode=full — + * a full review is always correct, incremental is only an optimization. + */ +import { spawnSync } from 'node:child_process'; +import { copyFileSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import * as core from '@actions/core'; +import { Octokit } from '@octokit/rest'; +import { + findLastReviewedSha, + type GitResult, + type GitRunner, + listDiffFiles, + planIncrementalReview, + type ReviewLike, + restrictDiffToFiles, +} from './incremental-review.js'; + +export function runGit(args: string[]): GitResult { + const res = spawnSync('git', args, { encoding: 'utf-8' }); + return { ok: res.status === 0, stdout: res.stdout ?? '' }; +} + +function setOutputs(mode: string, reason: string, lastReviewedSha: string | null): void { + core.setOutput('mode', mode); + core.setOutput('reason', reason); + core.setOutput('last-reviewed-sha', lastReviewedSha ?? ''); +} + +/** Derive the preserved-full-diff path: "pr.diff" → "pr_full.diff". */ +export function fullDiffPath(diffPath: string): string { + return diffPath.endsWith('.diff') + ? `${diffPath.slice(0, -'.diff'.length)}_full.diff` + : `${diffPath}_full`; +} + +async function fetchReviews( + token: string, + owner: string, + repo: string, + prNumber: number, +): Promise { + const octokit = new Octokit({ auth: token }); + return (await octokit.paginate(octokit.rest.pulls.listReviews, { + owner, + repo, + pull_number: prNumber, + per_page: 100, + })) as ReviewLike[]; +} + +export interface MainDeps { + git?: GitRunner; + fetchReviews?: typeof fetchReviews; +} + +export async function main(diffPath: string, deps: MainDeps = {}): Promise { + const git = deps.git ?? runGit; + const fetch = deps.fetchReviews ?? fetchReviews; + + const enabled = (process.env.INCREMENTAL ?? 'true').trim().toLowerCase() !== 'false'; + if (!enabled) { + core.info('ℹ️ Incremental review disabled via input — running a full review'); + setOutputs('full', 'disabled', null); + return; + } + + const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? ''; + const repository = process.env.GITHUB_REPOSITORY ?? ''; + const prNumber = Number.parseInt(process.env.PR_NUMBER ?? '', 10); + const baseRef = (process.env.BASE_REF ?? '').trim(); + const botLogin = process.env.REVIEW_BOT_LOGIN?.trim() || 'docker-agent'; + + const slashIdx = repository.indexOf('/'); + if (!token || slashIdx < 0 || !Number.isInteger(prNumber) || prNumber <= 0) { + core.warning( + 'incremental-review: missing token, repository, or PR number — falling back to full review', + ); + setOutputs('full', 'error', null); + return; + } + const owner = repository.slice(0, slashIdx); + const repo = repository.slice(slashIdx + 1); + + let fullDiff: string; + try { + fullDiff = readFileSync(diffPath, 'utf-8'); + } catch { + core.warning(`incremental-review: cannot read ${diffPath} — falling back to full review`); + setOutputs('full', 'no-diff', null); + return; + } + + let lastReviewedSha: string | null; + try { + lastReviewedSha = findLastReviewedSha(await fetch(token, owner, repo, prNumber), botLogin); + } catch (err: unknown) { + core.warning( + `incremental-review: failed to list reviews (${err instanceof Error ? err.message : String(err)}) — falling back to full review`, + ); + setOutputs('full', 'error', null); + return; + } + + const plan = planIncrementalReview({ lastReviewedSha, baseRef, git }); + if (plan.mode === 'full') { + core.info(`ℹ️ Full review: ${plan.reason}`); + setOutputs('full', plan.reason, plan.lastReviewedSha); + return; + } + + // planIncrementalReview guarantees lastReviewedSha is a valid ancestor SHA here. + const sha = plan.lastReviewedSha as string; + const outPath = join(tmpdir(), `incremental-${process.pid}.diff`); + try { + if (!git(['diff', sha, 'HEAD', `--output=${outPath}`]).ok) { + core.warning('incremental-review: git diff failed — falling back to full review'); + setOutputs('full', 'error', sha); + return; + } + + const incrementalDiff = readFileSync(outPath, 'utf-8'); + const result = restrictDiffToFiles(incrementalDiff, listDiffFiles(fullDiff)); + + for (const dropped of result.droppedFiles) { + core.info(`⏭️ Dropped from incremental diff (net-zero vs base): ${dropped}`); + } + + if (result.keptFiles === 0) { + // Changes since the last review cancel out against the base — a full + // review still covers the PR correctly, so fall back rather than hand + // the agent an empty diff. + core.info('ℹ️ Incremental diff is empty after restriction — running a full review'); + setOutputs('full', 'net-zero-changes', sha); + return; + } + + const preservedPath = fullDiffPath(diffPath); + copyFileSync(diffPath, preservedPath); + writeFileSync(diffPath, result.restricted, 'utf-8'); + + const fullLines = fullDiff.split('\n').length; + const incLines = result.restricted.split('\n').length; + core.info( + `✅ Incremental review: diffing ${sha.slice(0, 12)}..HEAD ` + + `(${incLines} lines vs ${fullLines} full, ${result.keptFiles} files; ` + + `full diff preserved at ${preservedPath})`, + ); + setOutputs('incremental', 'ok', sha); + } catch (err: unknown) { + core.warning( + `incremental-review failed (${err instanceof Error ? err.message : String(err)}) — falling back to full review`, + ); + setOutputs('full', 'error', sha); + } finally { + rmSync(outPath, { force: true }); + } +} + +if (process.argv[1]?.endsWith('incremental-review.js') && !process.env.VITEST) { + const [, , diffPath] = process.argv; + if (!diffPath) { + process.stderr.write('Usage: incremental-review \n'); + process.exit(1); + } + main(diffPath).catch((err: unknown) => { + // Last-resort fail-open: report a full review rather than failing the step. + core.warning(`incremental-review failed: ${err instanceof Error ? err.message : String(err)}`); + core.setOutput('mode', 'full'); + core.setOutput('reason', 'error'); + core.setOutput('last-reviewed-sha', ''); + }); +} diff --git a/tsup.config.ts b/tsup.config.ts index bab39c4..e1c31d1 100644 --- a/tsup.config.ts +++ b/tsup.config.ts @@ -26,7 +26,9 @@ const entry = { 'auto-filter-diff': src('auto-filter-diff'), 'check-org-membership': src('check-org-membership'), credentials: src('credentials'), + 'dedupe-findings': src('dedupe-findings'), 'filter-diff': src('filter-diff'), + 'incremental-review': src('incremental-review'), main: src('main'), 'mention-reply': src('mention-reply'), 'migrate-consumer-refs': src('migrate-consumer-refs'), From 6629f01bca3f829414b837d5786fa2353ddbc2fb Mon Sep 17 00:00:00 2001 From: Sayt-0 Date: Fri, 3 Jul 2026 21:30:21 +0200 Subject: [PATCH 2/4] fix(review-pr): restore pr.diff when the incremental rewrite fails If writeFileSync throws mid-write after the full diff was already preserved, pr.diff could be left partially written while mode=full is reported, so downstream steps would review a corrupted diff. The catch block now restores pr.diff from the preserved copy and removes pr_full.diff, whose presence would otherwise signal incremental mode to downstream steps. The restore only runs when the backup copy completed, so a partial backup can never overwrite an intact pr.diff. --- .../__tests__/index.test.ts | 42 ++++++++++++++++++- src/incremental-review/index.ts | 18 +++++++- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/incremental-review/__tests__/index.test.ts b/src/incremental-review/__tests__/index.test.ts index 2890efc..08ccfe3 100644 --- a/src/incremental-review/__tests__/index.test.ts +++ b/src/incremental-review/__tests__/index.test.ts @@ -6,9 +6,10 @@ * * These pin the file effects and the fail-open contract: pr.diff is rewritten * only on the happy path (with the full diff preserved), and every error path - * reports mode=full without touching the diff. + * reports mode=full with the full diff left in pr.diff — restored from the + * preserved copy when the rewrite had already started. */ -import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import * as core from '@actions/core'; @@ -16,6 +17,22 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; vi.mock('@actions/core'); +// Passthrough fs mock: writeFileSync leaves a truncated file and throws when +// targeting failWriteAt, simulating a mid-write failure (e.g. disk full). +const fsControl = vi.hoisted(() => ({ failWriteAt: '' })); + +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + const writeFileSync: typeof actual.writeFileSync = (file, data, options) => { + if (fsControl.failWriteAt !== '' && file === fsControl.failWriteAt) { + actual.writeFileSync(file, '', 'utf-8'); + throw new Error('ENOSPC: no space left on device, write'); + } + actual.writeFileSync(file, data, options); + }; + return { ...actual, writeFileSync }; +}); + import type { GitResult, ReviewLike } from '../incremental-review.js'; import { fullDiffPath, main } from '../index.js'; @@ -97,6 +114,7 @@ describe('incremental-review main', () => { beforeEach(() => { vi.clearAllMocks(); + fsControl.failWriteAt = ''; dir = mkdtempSync(join(tmpdir(), 'incremental-review-test-')); diffPath = join(dir, 'pr.diff'); writeFileSync(diffPath, FULL_DIFF, 'utf-8'); @@ -214,6 +232,26 @@ describe('incremental-review main', () => { expect(outputsByName()).toMatchObject({ mode: 'full', reason: 'error' }); expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); }); + + it('restores pr.diff from the preserved copy when the incremental rewrite fails', async () => { + fsControl.failWriteAt = diffPath; + + await main(diffPath, { + git: fakeGit(INCREMENTAL_DIFF), + fetchReviews: async () => [completedReview()], + }); + + const outputs = outputsByName(); + expect(outputs.mode).toBe('full'); + expect(outputs.reason).toBe('error'); + expect(outputs['last-reviewed-sha']).toBe(SHA_A); + expect(vi.mocked(core.warning)).toHaveBeenCalledWith(expect.stringContaining('ENOSPC')); + + // pr.diff must be intact and the preserved copy gone, so downstream steps + // see a plain full-review state. + expect(readFileSync(diffPath, 'utf-8')).toBe(FULL_DIFF); + expect(existsSync(fullDiffPath(diffPath))).toBe(false); + }); }); describe('fullDiffPath', () => { diff --git a/src/incremental-review/index.ts b/src/incremental-review/index.ts index 4c58d8a..5941dd9 100644 --- a/src/incremental-review/index.ts +++ b/src/incremental-review/index.ts @@ -28,7 +28,8 @@ * reason "ok" for incremental, otherwise the fallback reason * last-reviewed-sha SHA of the last completed review ("" when none) * - * Fail-open: every error path leaves the diff untouched and reports mode=full — + * Fail-open: every error path reports mode=full and leaves the full PR diff in + * place (restored from the preserved copy if the rewrite had already started) — * a full review is always correct, incremental is only an optimization. */ import { spawnSync } from 'node:child_process'; @@ -143,6 +144,8 @@ export async function main(diffPath: string, deps: MainDeps = {}): Promise // planIncrementalReview guarantees lastReviewedSha is a valid ancestor SHA here. const sha = plan.lastReviewedSha as string; const outPath = join(tmpdir(), `incremental-${process.pid}.diff`); + const preservedPath = fullDiffPath(diffPath); + let preserved = false; try { if (!git(['diff', sha, 'HEAD', `--output=${outPath}`]).ok) { core.warning('incremental-review: git diff failed — falling back to full review'); @@ -166,8 +169,8 @@ export async function main(diffPath: string, deps: MainDeps = {}): Promise return; } - const preservedPath = fullDiffPath(diffPath); copyFileSync(diffPath, preservedPath); + preserved = true; writeFileSync(diffPath, result.restricted, 'utf-8'); const fullLines = fullDiff.split('\n').length; @@ -182,6 +185,17 @@ export async function main(diffPath: string, deps: MainDeps = {}): Promise core.warning( `incremental-review failed (${err instanceof Error ? err.message : String(err)}) — falling back to full review`, ); + if (preserved) { + // The rewrite may have left diffPath partial while mode=full is reported. + // Restore the original and drop the preserved copy, whose presence would + // signal incremental mode downstream. + try { + copyFileSync(preservedPath, diffPath); + rmSync(preservedPath, { force: true }); + } catch { + // Keep the preserved copy: it is the only intact full diff left. + } + } setOutputs('full', 'error', sha); } finally { rmSync(outPath, { force: true }); From 6b6da7797ad4debb2f102b88dae42748712785be Mon Sep 17 00:00:00 2001 From: Sayt-0 Date: Fri, 3 Jul 2026 21:30:30 +0200 Subject: [PATCH 3/4] fix(review-pr): ignore multi-line bold spans in dedupe finding signatures In findingSignature, [^*]+ also matches newlines, so a bold span wrapping across lines pulled every line up to the closing marker into the token set. That inflates the Jaccard union enough to fall below the similarity threshold and miss a duplicate. Restrict the match to single-line bold blocks; multi-line headings now use the existing first-non-empty-line fallback. --- .../__tests__/dedupe-findings.test.ts | 20 +++++++++++++++++++ src/dedupe-findings/dedupe-findings.ts | 13 ++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/dedupe-findings/__tests__/dedupe-findings.test.ts b/src/dedupe-findings/__tests__/dedupe-findings.test.ts index 63b2af4..52b5945 100644 --- a/src/dedupe-findings/__tests__/dedupe-findings.test.ts +++ b/src/dedupe-findings/__tests__/dedupe-findings.test.ts @@ -59,6 +59,14 @@ describe('findingSignature', () => { ]); }); + it('ignores bold spans that wrap across lines and falls back to the first line', () => { + expect(findingSignature('**[high] Race condition\nin cache-refresh**\n\nbody')).toEqual([ + 'high', + 'race', + 'condition', + ]); + }); + it('deduplicates repeated tokens', () => { expect(findingSignature('**error error error**')).toEqual(['error']); }); @@ -104,6 +112,18 @@ describe('dedupeComments', () => { expect(result.dropped).toHaveLength(1); }); + it('drops a duplicate even when the existing bold heading wraps across lines', () => { + const wrapped = + '**[high] Nil pointer dereference on user object\n' + + 'which can crash the request handler when the session store returns an expired entry**'; + const result = dedupeComments( + [fresh()], + [existing({ body: `${wrapped}\n\ndetails\n\n${MARKER}` })], + ); + expect(result.kept).toEqual([]); + expect(result.dropped).toHaveLength(1); + }); + it('keeps a comment whose line is beyond the tolerance', () => { const result = dedupeComments([fresh({ line: 50 })], [existing()]); expect(result.kept).toHaveLength(1); diff --git a/src/dedupe-findings/dedupe-findings.ts b/src/dedupe-findings/dedupe-findings.ts index 36e7196..77282f4 100644 --- a/src/dedupe-findings/dedupe-findings.ts +++ b/src/dedupe-findings/dedupe-findings.ts @@ -71,14 +71,15 @@ const DEFAULT_SIMILARITY_THRESHOLD = 0.5; /** * Extract the normalized token set identifying a finding from a comment body. * - * Prefers the first bold block (`**[severity] summary**` per the posting - * format); falls back to the first non-empty line. A leading `[severity]` / - * `[category]` tag is kept as tokens — it participates in the similarity so a - * "security" and a "logic_error" finding on the same line don't collapse. - * Returns null when no usable text exists. + * Prefers the first single-line bold block (`**[severity] summary**` per the + * posting format); falls back to the first non-empty line. Bold spans that + * wrap across lines are skipped so they cannot inflate the token set. + * A leading `[severity]` / `[category]` tag is kept as tokens — it + * participates in the similarity so a "security" and a "logic_error" finding + * on the same line don't collapse. Returns null when no usable text exists. */ export function findingSignature(body: string): string[] | null { - const bold = body.match(/\*\*([^*]+)\*\*/); + const bold = body.match(/\*\*([^*\n]+)\*\*/); const heading = bold?.[1] ?? body.split('\n').find((line) => line.trim().length > 0) ?? ''; const tokens = heading .toLowerCase() From a20755a407a8bd32c8ff2300b821eaf7d7167f52 Mon Sep 17 00:00:00 2001 From: Sayt0 <138035894+Sayt-0@users.noreply.github.com> Date: Fri, 3 Jul 2026 22:51:33 +0200 Subject: [PATCH 4/4] Update src/incremental-review/incremental-review.ts Co-authored-by: Derek Misler Signed-off-by: Sayt0 <138035894+Sayt-0@users.noreply.github.com> --- src/incremental-review/incremental-review.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/incremental-review/incremental-review.ts b/src/incremental-review/incremental-review.ts index b3f1af7..63e4c34 100644 --- a/src/incremental-review/incremental-review.ts +++ b/src/incremental-review/incremental-review.ts @@ -211,6 +211,9 @@ export function restrictDiffToFiles(diffContent: string, allowed: Set): } else if (sectionLines.length > 0) { if (line.startsWith('--- a/')) sectionFiles.push(line.slice(6)); else if (line.startsWith('+++ b/')) sectionFiles.push(line.slice(6)); + if (line.startsWith('--- a/')) sectionFiles.push(line.slice(6)); + else if (line.startsWith('+++ b/')) sectionFiles.push(line.slice(6)); + else if (line.startsWith('rename from ')) sectionFiles.push(line.slice(12)); else if (line.startsWith('rename to ')) sectionFiles.push(line.slice(10)); sectionLines.push(line); } else {