diff --git a/Model/lib/wdk/model/records/commentTableQueries.xml b/Model/lib/wdk/model/records/commentTableQueries.xml index 2c578927d..9c15eda7e 100644 --- a/Model/lib/wdk/model/records/commentTableQueries.xml +++ b/Model/lib/wdk/model/records/commentTableQueries.xml @@ -64,6 +64,8 @@ + + @@ -94,7 +96,28 @@ END AS pmids_link, gene_counts.geneCount, &&selectReviewed&& - CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization ) as user_name_org + CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization , + CASE + WHEN aiprov.comment_id IS NULL THEN '' + WHEN aiprov.is_edited THEN ' · AI-assisted (edited)' + ELSE ' · AI-assisted (as-is)' + END) as user_name_org, + CASE + WHEN aiprov.comment_id IS NULL + THEN CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization ) + ELSE CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization , + '
AI-assisted · ', + CASE WHEN aiprov.is_edited THEN 'edited' ELSE 'as-is' END, + '') + END as user_name_org_display, + -- download-only AI provenance source: 'pubmed' | 'upload' | 'N/A' + COALESCE(car2.source_kind, 'N/A') as ai_source_kind FROM @REMOTE_COMMENT_SCHEMA@MappedComment c INNER JOIN webready.GeneAttributes_p ga ON c.project_name = ga.project_id AND c.stable_id = ga.source_id @@ -107,8 +130,23 @@ ) files ON c.comment_id = files.comment_id LEFT JOIN ( SELECT comment_id, string_agg(source_id,',') as pmids - FROM @REMOTE_COMMENT_SCHEMA@CommentReference - WHERE database_name='pubmed' + FROM ( + SELECT comment_id, source_id + FROM @REMOTE_COMMENT_SCHEMA@CommentReference + WHERE database_name='pubmed' + UNION + SELECT cap.comment_id, car.pubmed_id AS source_id + FROM @REMOTE_COMMENT_SCHEMA@comment_ai_provenance cap + JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_run car ON cap.run_job_id = car.job_id + WHERE car.source_kind = 'pubmed' AND car.pubmed_id IS NOT NULL + UNION + SELECT cap.comment_id, car.external_ref AS source_id + FROM @REMOTE_COMMENT_SCHEMA@comment_ai_provenance cap + JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_run car ON cap.run_job_id = car.job_id + WHERE car.source_kind = 'upload' + AND car.external_ref_kind = 'pubmed' + AND car.external_ref IS NOT NULL + ) merged GROUP BY comment_id ) refs ON c.comment_id = refs.comment_id LEFT JOIN ( @@ -117,6 +155,10 @@ WHERE stable_id IN (SELECT source_id FROM webready.GeneAttributes_p where org_abbrev IN (%%PARTITION_KEYS%%)) GROUP BY comment_id ) gene_counts ON c.comment_id = gene_counts.comment_id + LEFT JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_provenance aiprov + ON c.comment_id = aiprov.comment_id + LEFT JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_run car2 + ON aiprov.run_job_id = car2.job_id LEFT JOIN ( SELECT dr.primary_identifier AS comment_id , ta.gene_source_id FROM sres.dbref dr diff --git a/Model/lib/wdk/model/records/geneRecord.xml b/Model/lib/wdk/model/records/geneRecord.xml index 655887f05..87e25ed2e 100644 --- a/Model/lib/wdk/model/records/geneRecord.xml +++ b/Model/lib/wdk/model/records/geneRecord.xml @@ -1679,22 +1679,26 @@ name" internal="true"/> displayName="User Comments" inReportMaker="true" queryRef="CommentTables.GeneComments"> + - - - - + + + + + + + + - - - - @@ -1710,7 +1714,12 @@ name" internal="true"/> - + + + + + + @@ -1723,22 +1732,25 @@ name" internal="true"/> inReportMaker="false" displayName="Community Annotations" queryRef="CommentTables.CommunityComments"> + - - - - + + + + + + + + - - - - diff --git a/docs/superpowers/plans/2026-06-24-ai-provenance-gene-comments-table.md b/docs/superpowers/plans/2026-06-24-ai-provenance-gene-comments-table.md new file mode 100644 index 000000000..9c98b7b7c --- /dev/null +++ b/docs/superpowers/plans/2026-06-24-ai-provenance-gene-comments-table.md @@ -0,0 +1,240 @@ +# AI Provenance in Gene-Page User Comments Table — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Show, in the gene-page User Comments table, which comments were AI-assisted and whether the reviewer edited the AI text or published it as-is — plus surface the AI source PMID in the existing PubMed column. + +**Architecture:** Pure WDK-model change in one repo (`ApiCommonModel`). Extend the `GeneComments` SQL query to join the two AI sidecar tables, emit a styled-pill "display" copy of the `user_name_org` column alongside a plain-text copy, and `UNION` the AI run-row PMID into the PubMed aggregate. Wire the two copies into `geneRecord.xml` so the on-screen table shows the pill while downloads/sorting use clean text. + +**Tech Stack:** WDK model XML, PostgreSQL SQL (the comment DB), Maven build. + +## Global Constraints + +- **Scope: gene page only.** Modify only `CommentTables.GeneComments` and the `UserComments` table in `geneRecord.xml`. Do **not** touch `CommunityComments`, `GenomeComments`, `PopsetComments`, or their record tables. +- **Single repo.** All edits are in `ApiCommonModel`. No web-monorepo / front-end change (pill styling is inline CSS). +- **Sidecar tables, exact names** (in the comment schema, reached via the `@REMOTE_COMMENT_SCHEMA@` macro): + - `comment_ai_provenance` — columns used: `comment_id`, `run_job_id`, `is_edited`. + - `comment_ai_run` — columns used: `job_id`, `source_kind`, `pubmed_id`. +- **Join keys:** `comment_ai_provenance.comment_id = MappedComment.comment_id`; `comment_ai_provenance.run_job_id = comment_ai_run.job_id`. `comment_ai_provenance` is 1 row per `comment_id` (PK), so the join cannot multiply comment rows. +- **Pill style (inline CSS, verbatim):** `display:inline-block;margin-left:6px;padding:1px 6px;border-radius:8px;background-color:#0a7c8a;color:#fff;font-size:0.85em;font-weight:500;white-space:nowrap;` +- **Pill text:** `AI-assisted · edited` or `AI-assisted · as-is`. **Plain-text suffix** (download column): ` · AI-assisted (edited)` or ` · AI-assisted (as-is)`. +- **Build prerequisite** (per project `CLAUDE.md`): `install/` and `WDK/` must already be built into `~/.m2` before building this module. + +--- + +## File Structure + +| File | Responsibility | Action | +|------|----------------|--------| +| `Model/lib/wdk/model/records/commentTableQueries.xml` | `GeneComments` SQL: joins, split `user_name_org`/`user_name_org_display` columns, PMID union | Modify | +| `Model/lib/wdk/model/records/geneRecord.xml` | `UserComments` table: present the two "Made by" columns | Modify | + +There are no automated unit tests for WDK model SQL/XML in this repo. The per-change gates are **(1) XML well-formedness** (`xmllint --noout`) and **(2) the Maven build**. Full validation is a **manual step on a deployed instance** (and an optional direct-SQL smoke test against the dev comment DB) — see the Manual Verification section at the end. + +--- + +## Task 1: Extend the `GeneComments` SQL query + +**Files:** +- Modify: `Model/lib/wdk/model/records/commentTableQueries.xml` (the `GeneComments` ``, lines ~40–162) + +**Interfaces:** +- Produces (for Task 2): SQL output columns `user_name_org` (plain text incl. provenance suffix) and `user_name_org_display` (name + inline-styled pill HTML). +- Produces: `pmids` aggregate now also contains AI run-row PMIDs. + +All four edits below are in the same `GeneComments` query block. Apply them, then run the single verification at the end of the task. + +- [ ] **Step 1: Add the `user_name_org_display` column declaration** + +In the `` list for `GeneComments` (currently `commentTableQueries.xml:66`), add the new column right after the existing `user_name_org` declaration. + +Find: +```xml + + +``` +Replace with: +```xml + + + +``` + +- [ ] **Step 2: Split the `user_name_org` SELECT expression into plain + display** + +Find the final SELECT item (currently `commentTableQueries.xml:96-97`): +```sql + &&selectReviewed&& + CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization ) as user_name_org + FROM +``` +Replace with: +```sql + &&selectReviewed&& + CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization , + CASE + WHEN aiprov.comment_id IS NULL THEN '' + WHEN aiprov.is_edited THEN ' · AI-assisted (edited)' + ELSE ' · AI-assisted (as-is)' + END) as user_name_org, + CASE + WHEN aiprov.comment_id IS NULL + THEN CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization ) + ELSE CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization , + 'AI-assisted · ', + CASE WHEN aiprov.is_edited THEN 'edited' ELSE 'as-is' END, + '') + END as user_name_org_display + FROM +``` +(The ` · ` separator in the plain column is a literal middot UTF-8 character; the file is UTF-8. The display column uses the `·` HTML entity, which the record-table renderer renders as `·`.) + +- [ ] **Step 3: Add the `comment_ai_provenance` LEFT JOIN** + +Find the `gene_counts` LEFT JOIN block (currently ends at `commentTableQueries.xml:119`): +```sql + ) gene_counts ON c.comment_id = gene_counts.comment_id + LEFT JOIN ( + SELECT dr.primary_identifier AS comment_id , ta.gene_source_id +``` +Replace with (insert the new join between `gene_counts` and `integrated_comments`): +```sql + ) gene_counts ON c.comment_id = gene_counts.comment_id + LEFT JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_provenance aiprov + ON c.comment_id = aiprov.comment_id + LEFT JOIN ( + SELECT dr.primary_identifier AS comment_id , ta.gene_source_id +``` +(Only `comment_ai_provenance` is joined here — that is all the `user_name_org` columns need. The run table is joined inside the PMID subquery in the next step.) + +- [ ] **Step 4: Union the AI run-row PMID into the `refs` subquery** + +Find the `refs` subquery (currently `commentTableQueries.xml:108-113`): +```sql + LEFT JOIN ( + SELECT comment_id, string_agg(source_id,',') as pmids + FROM @REMOTE_COMMENT_SCHEMA@CommentReference + WHERE database_name='pubmed' + GROUP BY comment_id + ) refs ON c.comment_id = refs.comment_id +``` +Replace with: +```sql + LEFT JOIN ( + SELECT comment_id, string_agg(source_id,',') as pmids + FROM ( + SELECT comment_id, source_id + FROM @REMOTE_COMMENT_SCHEMA@CommentReference + WHERE database_name='pubmed' + UNION + SELECT cap.comment_id, car.pubmed_id AS source_id + FROM @REMOTE_COMMENT_SCHEMA@comment_ai_provenance cap + JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_run car ON cap.run_job_id = car.job_id + WHERE car.source_kind = 'pubmed' AND car.pubmed_id IS NOT NULL + ) merged + GROUP BY comment_id + ) refs ON c.comment_id = refs.comment_id +``` +(`UNION` (not `UNION ALL`) dedupes a PMID that is present both as a manual `CommentReference` and as the AI source. The downstream `pmids_link` CASE is unchanged.) + +- [ ] **Step 5: Verify XML well-formedness** + +Run: +```bash +cd /home/maccallr/work/ai-wdk/project_home/ApiCommonModel +xmllint --noout Model/lib/wdk/model/records/commentTableQueries.xml && echo WELL_FORMED +``` +Expected: prints `WELL_FORMED` with no parser errors. (Catches a missing quote/paren/tag from the SQL edits.) + +- [ ] **Step 6: Commit** + +```bash +cd /home/maccallr/work/ai-wdk/project_home/ApiCommonModel +git add Model/lib/wdk/model/records/commentTableQueries.xml +git commit -m "feat: AI provenance + source PMID in GeneComments query + +Join comment_ai_provenance for the Made-by pill, emit a plain +user_name_org (downloads/sort) plus a styled user_name_org_display +column, and union the AI run-row PMID into the PubMed aggregate. + +Co-Authored-By: Claude Opus 4.8 " +``` + +--- + +## Task 2: Present the two "Made by" columns in `geneRecord.xml` + +**Files:** +- Modify: `Model/lib/wdk/model/records/geneRecord.xml` (the `UserComments` table, line ~1713) + +**Interfaces:** +- Consumes (from Task 1): SQL columns `user_name_org` and `user_name_org_display`. + +- [ ] **Step 1: Replace the single Made-by columnAttribute with two** + +Find (currently `geneRecord.xml:1713`): +```xml + +``` +Replace with: +```xml + + + + +``` +(`FieldScope` treats `internal` and `inReportMaker` independently: on screen the `NON_INTERNAL` scope hides `user_name_org` and shows `user_name_org_display`; in report maker the `REPORT_MAKER` scope excludes `user_name_org_display` and includes the plain `user_name_org`.) + +- [ ] **Step 2: Verify XML well-formedness** + +Run: +```bash +cd /home/maccallr/work/ai-wdk/project_home/ApiCommonModel +xmllint --noout Model/lib/wdk/model/records/geneRecord.xml && echo WELL_FORMED +``` +Expected: prints `WELL_FORMED`. + +- [ ] **Step 3: Build the model module** + +Run (per project `CLAUDE.md` — assumes `install/` and `WDK/` already built into `~/.m2`): +```bash +cd /home/maccallr/work/ai-wdk/project_home/ApiCommonModel +mvn -q clean install +``` +Expected: `BUILD SUCCESS`. This confirms both modified files assemble into the model artifact. (Build failure here most likely means a typo in a column name or malformed XML.) + +- [ ] **Step 4: Commit** + +```bash +cd /home/maccallr/work/ai-wdk/project_home/ApiCommonModel +git add Model/lib/wdk/model/records/geneRecord.xml +git commit -m "feat: show AI-assisted pill in gene-page User Comments Made-by column + +Split the Made-by attribute into a plain internal column (downloads/sort) +and a display column carrying the inline-styled AI-assisted pill. + +Co-Authored-By: Claude Opus 4.8 " +``` + +--- + +## Manual Verification (deployed instance) + +These cannot be automated in this repo (no DB/model-runtime in the build). Run them on a deployed dev instance whose comment DB has the two sidecar tables exposed under `@REMOTE_COMMENT_SCHEMA@`. They mirror the spec's verification checklist. + +- [ ] **Coordination precheck:** confirm `comment_ai_run` and `comment_ai_provenance` are reachable under the schema that `@REMOTE_COMMENT_SCHEMA@` resolves to (the same place `CommentReference` lives). If not yet mapped, the query errors — escalate before deploying. +- [ ] **Optional direct-SQL smoke test:** run the edited `GeneComments` SQL against the dev comment DB (substitute the real schema for `@REMOTE_COMMENT_SCHEMA@`, a real `org_abbrev`/`source_id`) for a gene known to have (a) a human comment, (b) an edited AI comment, (c) an as-is AI comment. Confirm `user_name_org`, `user_name_org_display`, and `pmids` come back as expected and the row count is unchanged vs the pre-change query (the provenance join must not multiply rows). +- [ ] **Human comment (regression):** gene page shows "Made by" = name+org, **no pill**; PubMed column unchanged. +- [ ] **AI comment, edited:** shows the teal `AI-assisted · edited` pill; its source PMID appears (linked) in the PubMed column even with no `CommentReference` row. +- [ ] **AI comment, as-is:** shows `AI-assisted · as-is`. +- [ ] **Upload-source AI comment:** shows the pill; PubMed column empty (no PMID). +- [ ] **Download:** add the User Comments table to a report → "Made by" column is plain text `Name, Org · AI-assisted (edited)` with **no `` markup**; no separate display column appears. +- [ ] **PMID dedupe:** an AI comment whose source PMID is also a manual `CommentReference` shows that PMID once. + +--- + +## Self-Review + +- **Spec coverage:** join (Task 1 Step 3) ✓; two-column split (Task 1 Step 2, Task 2 Step 1) ✓; inline-CSS pill (Task 1 Step 2, constraint) ✓; clean-download split via `internal`/`inReportMaker` (Task 2 Step 1) ✓; PMID union (Task 1 Step 4) ✓; gene-only scope (Global Constraints) ✓; coordination note + verification (Manual Verification) ✓. +- **Placeholder scan:** none — every step shows exact find/replace content and exact commands. +- **Type/name consistency:** SQL columns `user_name_org` / `user_name_org_display` are produced in Task 1 and consumed by the same names in Task 2; join aliases `aiprov` (top level) and `cap`/`car` (subquery) are self-contained per scope. diff --git a/docs/superpowers/specs/2026-06-24-ai-provenance-gene-comments-table-design.md b/docs/superpowers/specs/2026-06-24-ai-provenance-gene-comments-table-design.md new file mode 100644 index 000000000..ed2209241 --- /dev/null +++ b/docs/superpowers/specs/2026-06-24-ai-provenance-gene-comments-table-design.md @@ -0,0 +1,230 @@ +# Design: AI-assisted provenance in the gene-page User Comments table + +**Date:** 2026-06-24 +**Repo:** `ApiCommonModel` +**Status:** Approved for implementation planning + +## Context + +VEuPathDB has added a new kind of user comment — an AI-assisted gene-publication +summary. The comment-generation, review, and publish flow is complete: publishing +an AI-assisted comment creates an ordinary `comments` row plus two sidecar rows in +the comment schema: + +- `comment_ai_provenance` (per published comment): `comment_id`, `run_job_id`, + `is_edited` (true iff the published text differs from the AI original), + `created_at`. +- `comment_ai_run` (shared LLM-output cache, keyed by `job_id`): source kind + (`pubmed` | `upload`), `pubmed_id`, `external_url`, `external_title`, + `pdf_content_sha256`, the AI original headline/content, etc. + +See `ApiCommonWebsite/Service/CLAUDE-ai-user-comments.md` for the full back-end +design and `GetCommentAiProvenanceQuery.java` for the canonical join pattern +(`comment_ai_provenance p JOIN comment_ai_run r ON p.run_job_id = r.job_id`). + +This spec covers **one thing only**: showing users which rows in the gene-page +User Comments table were AI-assisted, and whether the human reviewer edited the +AI text or published it as-is. + +## Goal + +In the gene-page **User Comments** table (`CommentTables.GeneComments` → +`UserComments` table in `geneRecord.xml`), each comment row should communicate: + +1. **Whether the comment was AI-assisted** (a `comment_ai_provenance` row exists). +2. **If AI-assisted, whether it was edited or published as-is** (`is_edited`). + +An AI-assisted comment is still published by a real, logged-in user who reviewed +it — so the framing is "made by Dr So-and-so, **AI-assisted**", never "made by AI". + +## Decisions (all approved) + +| # | Decision | Choice | +|---|----------|--------| +| 1 | What to show | AI-assisted yes/no **+** edited-vs-as-is | +| 2 | Placement | Fold into the existing **"Made by"** column (no new column) | +| 3 | Visual treatment | A small **styled teal pill** after the name (matches the FE "Beta" badge) | +| 4 | Pill styling mechanism | **Inline CSS** in the SQL string (self-contained, single repo, matches `snp_context` precedent) | +| 5 | Downloads / sort | Two-column split so downloads are **HTML-free plain text** carrying ` · AI-assisted (edited\|as-is)`, and sort keys off the plain name | +| 6 | Source paper | Fold `comment_ai_run.pubmed_id` into the **existing PubMed column** via a `UNION` in the `refs` subquery | +| 7 | Records/scope | **Gene page only** (`GeneComments`); other comment tables untouched | + +## Technical findings (verified) + +- **Reachability:** `GeneComments` already `LEFT JOIN`s sibling tables in the + comment schema (`CommentFile`, `CommentReference`). `comment_ai_provenance` and + `comment_ai_run` live in that same schema, so they are reachable with the + identical `@REMOTE_COMMENT_SCHEMA@` prefix. +- **HTML rendering:** the record-table renderer renders a cell value as HTML when + it looks like HTML (`RecordTable.jsx`, `MesaUtils.isHtml(val)`) — this is how + the existing `pmids_link` `` renders. So an inline-styled `` pill in a + column value will render. +- **`internal` vs `inReportMaker` are independent scopes** (`FieldScope.java`): + the report-maker scope only excludes `inReportMaker=false` fields (internal + fields still flow into reports); the on-screen scope excludes `internal=true`. + Both flags are settable per `columnAttribute` (`wdkModel.rng:1468-1473`). This + is what makes the two-column split work cleanly. +- **No HTML stripping** in the tabular reporter — a column's value is dumped + verbatim, which is exactly why the *display* (pill) column must be excluded from + reports and a *plain* column provided for download. + +## Changes + +### 1. `Model/lib/wdk/model/records/commentTableQueries.xml` — `GeneComments` query + +**a. Add the provenance joins** (same pattern as the existing `files` / `refs` +joins): + +```sql +LEFT JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_provenance p + ON c.comment_id = p.comment_id +LEFT JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_run r + ON p.run_job_id = r.job_id +``` + +`p.comment_id IS NOT NULL` ⇒ AI-assisted. `p.is_edited` ⇒ edited vs as-is. + +**b. Replace the single `user_name_org` SELECT expression with two columns.** +The current expression is: + +```sql +CONCAT(u.first_name , ' ' , u.last_name , ', ' , u.organization ) as user_name_org +``` + +Becomes a base name expression plus two derived columns: + +```sql +-- plain text: download- and sort-safe; carries a plain provenance suffix +CONCAT( + u.first_name, ' ', u.last_name, ', ', u.organization, + CASE + WHEN p.comment_id IS NULL THEN '' + WHEN p.is_edited THEN ' · AI-assisted (edited)' + ELSE ' · AI-assisted (as-is)' + END +) AS user_name_org, + +-- display: name + inline-styled teal pill (rendered as HTML on screen) +CONCAT( + u.first_name, ' ', u.last_name, ', ', u.organization, + CASE + WHEN p.comment_id IS NULL THEN '' + WHEN p.is_edited THEN + ' ' + || 'AI-assisted · edited' + ELSE + ' ' + || 'AI-assisted · as-is' + END +) AS user_name_org_display +``` + +Notes: +- Postgres string concatenation uses `||`; the existing query already mixes + `CONCAT(...)` and `||` styles — match whichever the surrounding file prefers at + implementation time (the `pmids_link` CASE uses `CONCAT`). +- Both `edited` and `as-is` use the **same** teal (`#0a7c8a`); only the text + differs. (Hex finalisable at implementation; chosen to read as a "Beta"-style + teal pill.) +- Add the new `` declaration to the + `GeneComments` `` column list (alongside the existing + ``). + +**c. Fold the AI source PMID into the existing PubMed column.** Replace the +current `refs` subquery: + +```sql +LEFT JOIN ( + SELECT comment_id, string_agg(source_id,',') as pmids + FROM @REMOTE_COMMENT_SCHEMA@CommentReference + WHERE database_name='pubmed' + GROUP BY comment_id +) refs ON c.comment_id = refs.comment_id +``` + +with a `UNION` that adds the run-row PMID for AI comments: + +```sql +LEFT JOIN ( + SELECT comment_id, string_agg(source_id, ',') AS pmids + FROM ( + SELECT comment_id, source_id + FROM @REMOTE_COMMENT_SCHEMA@CommentReference + WHERE database_name = 'pubmed' + UNION -- UNION (not ALL) dedupes a PMID present in both + SELECT p.comment_id, r.pubmed_id AS source_id + FROM @REMOTE_COMMENT_SCHEMA@comment_ai_provenance p + JOIN @REMOTE_COMMENT_SCHEMA@comment_ai_run r ON p.run_job_id = r.job_id + WHERE r.source_kind = 'pubmed' AND r.pubmed_id IS NOT NULL + ) merged + GROUP BY comment_id +) refs ON c.comment_id = refs.comment_id +``` + +The downstream `pmids_link` CASE (which builds the `` link from `refs.pmids`) +is unchanged and keeps working. Upload-source AI comments have no PMID +(`external_url` instead) and correctly do not appear in the PubMed column. + +### 2. `Model/lib/wdk/model/records/geneRecord.xml` — `UserComments` table + +Currently: + +```xml + +``` + +Becomes two `columnAttribute`s: + +```xml + + + + +``` + +Result: +- **On screen** (`NON_INTERNAL` scope): `user_name_org` is hidden; `user_name_org_display` + renders as "Made by" with the pill. +- **Report maker / download** (`REPORT_MAKER` scope): `user_name_org_display` is + excluded; `user_name_org` is included as "Made by", clean plain text. +- **Sort / search**: operate on the plain `user_name_org`. + +## Coordination / deployment note (not a code change here) + +The `comment_ai_run` and `comment_ai_provenance` tables must be reachable under +`@REMOTE_COMMENT_SCHEMA@` (the website's mapped/FDW view of the comment DB), the +same way `CommentReference` and `CommentFile` already are. Confirm with whoever +provisions the comment-DB replication/mapping that the two new sidecar tables are +exposed there before this query ships. If they are not yet mapped, the joins will +fail at query time. + +## Out of scope + +- Sorting *by AI-assisted status* as a first-class field (the plain + `user_name_org` suffix is incidentally sortable; no dedicated sort field). +- De-duplication warnings. +- `/user-comments/show` page changes (separate follow-up). +- Other comment tables (`CommunityComments`, `GenomeComments`, `PopsetComments`). +- Making the pill themeable from the front-end (rejected in favour of inline CSS; + would require a shared CSS class and web-monorepo coordination). + +## Verification + +1. **Build:** the model builds cleanly (`mvn clean install` per project + `CLAUDE.md` build order: `install/` → `WDK/` → target module). +2. **Human comment (regression):** a gene with an ordinary user comment shows + "Made by" with name+org and **no pill**; PubMed column unchanged. +3. **AI comment, edited:** a published AI comment with `is_edited=true` shows the + teal `AI-assisted · edited` pill after the name; its source PMID appears in the + PubMed column (as a link) even though no `CommentReference` row exists. +4. **AI comment, as-is:** `is_edited=false` shows `AI-assisted · as-is`. +5. **Upload-source AI comment:** shows the pill; PubMed column empty (no PMID). +6. **Download:** add the User Comments table to a report/download → "Made by" + column contains plain text `Name, Org · AI-assisted (edited)` with **no HTML + markup**; no `user_name_org_display` column appears. +7. **PMID dedupe:** an AI comment whose source PMID is also present as a manual + `CommentReference` shows that PMID **once** (UNION dedupe).