diff --git a/survey-psychometric-validity-assistant/README.md b/survey-psychometric-validity-assistant/README.md new file mode 100644 index 00000000..f25d32f1 --- /dev/null +++ b/survey-psychometric-validity-assistant/README.md @@ -0,0 +1,26 @@ +# Survey Psychometric Validity Assistant + +Self-contained SCIBASE issue #16 slice for AI-assisted peer review of survey manuscripts. The assistant blocks or escalates AI review output when a submitted survey scale has psychometric validity gaps that would make the downstream review misleading. + +The module uses synthetic packets only and has no network calls, external AI APIs, private manuscripts, credentials, or live data. + +## What it checks + +- reverse-coded item handling drift +- low Cronbach alpha or missing internal-consistency evidence +- factor-loading cross-loads and weak primary loadings +- confirmatory factor analysis sample-size shortfall +- missing or inconsistent Likert anchors +- construct-claim mismatch between manuscript claims and validated scales +- release readiness for AI-generated peer-review output + +## Run + +```bash +npm run check +npm test +npm run demo +npm run demo:video +``` + +Demo outputs are written to `reports/`. diff --git a/survey-psychometric-validity-assistant/demo.js b/survey-psychometric-validity-assistant/demo.js new file mode 100644 index 00000000..900d0367 --- /dev/null +++ b/survey-psychometric-validity-assistant/demo.js @@ -0,0 +1,29 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import { evaluateSurveyPacket, renderMarkdownReport } from "./src/guard.js"; +import { cleanPacket, riskyPacket } from "./src/samplePackets.js"; + +const reportsDir = new URL("./reports/", import.meta.url); +await mkdir(reportsDir, { recursive: true }); + +const clean = evaluateSurveyPacket(cleanPacket); +const risky = evaluateSurveyPacket(riskyPacket); + +await writeFile(new URL("clean-result.json", reportsDir), JSON.stringify(clean, null, 2)); +await writeFile(new URL("risky-result.json", reportsDir), JSON.stringify(risky, null, 2)); +await writeFile(new URL("risky-report.md", reportsDir), renderMarkdownReport(risky)); + +const svg = ` + + + Survey Psychometric Validity Assistant + Packet: ${risky.packetId} + Decision: ${risky.decision} + Severity score: ${risky.severityScore} | Findings: ${risky.findingCount} + Top gates: reverse coding, reliability, CFA sample size, factor cross-loads + Action: hold AI peer-review release for psychometric review + + +`; +await writeFile(new URL("summary.svg", reportsDir), svg); + +console.log(JSON.stringify({ clean: clean.decision, risky: risky.decision, reportsDir: reportsDir.pathname }, null, 2)); diff --git a/survey-psychometric-validity-assistant/make-demo-video.js b/survey-psychometric-validity-assistant/make-demo-video.js new file mode 100644 index 00000000..cf9432e0 --- /dev/null +++ b/survey-psychometric-validity-assistant/make-demo-video.js @@ -0,0 +1,39 @@ +import { spawnSync } from "node:child_process"; +import { mkdir } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; + +const reportsDir = new URL("./reports/", import.meta.url); +await mkdir(reportsDir, { recursive: true }); + +const output = new URL("demo.mp4", reportsDir); +const font = "C\\:/Windows/Fonts/arial.ttf"; +const filter = [ + "color=c=0xf8fafc:s=960x540:d=5:r=12", + "drawbox=x=48:y=52:w=864:h=436:color=0xcbd5e1:t=2", + `drawtext=fontfile='${font}':text='Survey Psychometric Validity Assistant':x=78:y=92:fontsize=34:fontcolor=0x111827`, + `drawtext=fontfile='${font}':text='AI review release gate for survey manuscripts':x=78:y=146:fontsize=23:fontcolor=0x334155`, + `drawtext=fontfile='${font}':text='Checks reverse coding, reliability, factor loadings, CFA sample size':x=78:y=204:fontsize=22:fontcolor=0x111827`, + `drawtext=fontfile='${font}':text='Risky packet decision HOLD':x=78:y=276:fontsize=34:fontcolor=0x991b1b`, + `drawtext=fontfile='${font}':text='Action - psychometric reviewer required before AI output is trusted':x=78:y=344:fontsize=22:fontcolor=0x111827`, + "drawbox=x=78:y=402:w=660:h=18:color=0xfee2e2:t=18", + "drawbox=x=78:y=402:w=540:h=18:color=0xef4444:t=18" +].join(","); + +const result = spawnSync("ffmpeg", [ + "-y", + "-f", + "lavfi", + "-i", + filter, + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + fileURLToPath(output) +], { stdio: "inherit" }); + +if (result.status !== 0) { + throw new Error(`ffmpeg failed with status ${result.status}`); +} + +console.log(`wrote ${fileURLToPath(output)}`); diff --git a/survey-psychometric-validity-assistant/package.json b/survey-psychometric-validity-assistant/package.json new file mode 100644 index 00000000..4d089e2d --- /dev/null +++ b/survey-psychometric-validity-assistant/package.json @@ -0,0 +1,13 @@ +{ + "name": "survey-psychometric-validity-assistant", + "version": "1.0.0", + "description": "Deterministic survey psychometric validity assistant for SCIBASE issue #16.", + "type": "module", + "scripts": { + "check": "node --check src/guard.js && node --check src/samplePackets.js && node --check test.js && node --check demo.js && node --check make-demo-video.js", + "test": "node test.js", + "demo": "node demo.js", + "demo:video": "node make-demo-video.js" + }, + "license": "MIT" +} diff --git a/survey-psychometric-validity-assistant/reports/clean-result.json b/survey-psychometric-validity-assistant/reports/clean-result.json new file mode 100644 index 00000000..8baa29d1 --- /dev/null +++ b/survey-psychometric-validity-assistant/reports/clean-result.json @@ -0,0 +1,13 @@ +{ + "packetId": "survey-clean-001", + "title": "Validated Remote Collaboration Burnout Scale", + "decision": "RELEASE", + "severityScore": 0, + "findingCount": 0, + "findings": [], + "releaseGate": { + "canReleaseAiReview": true, + "requiresPsychometricReviewer": false, + "rationale": "Psychometric evidence is sufficient for AI peer-review release." + } +} \ No newline at end of file diff --git a/survey-psychometric-validity-assistant/reports/demo.mp4 b/survey-psychometric-validity-assistant/reports/demo.mp4 new file mode 100644 index 00000000..27378144 Binary files /dev/null and b/survey-psychometric-validity-assistant/reports/demo.mp4 differ diff --git a/survey-psychometric-validity-assistant/reports/risky-report.md b/survey-psychometric-validity-assistant/reports/risky-report.md new file mode 100644 index 00000000..07cdf89f --- /dev/null +++ b/survey-psychometric-validity-assistant/reports/risky-report.md @@ -0,0 +1,30 @@ +# Survey Psychometric Validity Report + +Packet: survey-risky-017 +Decision: HOLD +Severity score: 39 +Findings: 17 + +## Findings +- [medium] thin_likert_anchor_set: Survey reports 3 Likert anchors; expected at least 5. +- [high] cfa_sample_size_shortfall: CFA sample size 96 is below 280 for 28 parameters. +- [high] low_internal_consistency: Workload Anxiety Draft Scale reports alpha 0.58, below 0.7. +- [medium] weak_primary_loading: wa_1 has primary loading 0.42, below 0.5. +- [critical] reverse_coding_drift: wa_2 is reverse coded but not reverse scored before scale scoring. +- [medium] weak_primary_loading: wa_2 has primary loading 0.39, below 0.5. +- [medium] weak_primary_loading: wa_3 has primary loading 0.44, below 0.5. +- [medium] weak_primary_loading: wa_4 has primary loading 0.31, below 0.5. +- [high] factor_cross_loading: wa_1 has secondary loading 0.41, above 0.32. +- [high] factor_cross_loading: wa_3 has secondary loading 0.37, above 0.32. +- [high] missing_internal_consistency: Productivity Perception Index does not report Cronbach alpha or equivalent reliability evidence. +- [medium] weak_primary_loading: ppi_1 has primary loading 0.47, below 0.5. +- [medium] weak_primary_loading: ppi_2 has primary loading 0.36, below 0.5. +- [high] factor_cross_loading: ppi_2 has secondary loading 0.34, above 0.32. +- [high] construct_claim_mismatch: Manuscript claims anxiety, but no submitted scale validates that construct directly. +- [high] construct_claim_mismatch: Manuscript claims productivity, but no submitted scale validates that construct directly. +- [high] construct_claim_mismatch: Manuscript claims clinical burnout, but no submitted scale validates that construct directly. + +## Release gate +- Can release AI review: false +- Requires psychometric reviewer: true +- Rationale: Psychometric validity issues must be reviewed before AI output is trusted. diff --git a/survey-psychometric-validity-assistant/reports/risky-result.json b/survey-psychometric-validity-assistant/reports/risky-result.json new file mode 100644 index 00000000..0caa7603 --- /dev/null +++ b/survey-psychometric-validity-assistant/reports/risky-result.json @@ -0,0 +1,188 @@ +{ + "packetId": "survey-risky-017", + "title": "AI Workload Anxiety and Productivity Claims", + "decision": "HOLD", + "severityScore": 39, + "findingCount": 17, + "findings": [ + { + "severity": "medium", + "code": "thin_likert_anchor_set", + "message": "Survey reports 3 Likert anchors; expected at least 5.", + "evidence": { + "anchors": [ + "Never", + "Sometimes", + "Often" + ] + } + }, + { + "severity": "high", + "code": "cfa_sample_size_shortfall", + "message": "CFA sample size 96 is below 280 for 28 parameters.", + "evidence": { + "sampleSize": 96, + "plannedCfaParameters": 28, + "minimumCfaSample": 280 + } + }, + { + "severity": "high", + "code": "low_internal_consistency", + "message": "Workload Anxiety Draft Scale reports alpha 0.58, below 0.7.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "alpha": 0.58 + } + }, + { + "severity": "medium", + "code": "weak_primary_loading", + "message": "wa_1 has primary loading 0.42, below 0.5.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_1", + "loading": 0.42 + } + }, + { + "severity": "critical", + "code": "reverse_coding_drift", + "message": "wa_2 is reverse coded but not reverse scored before scale scoring.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_2" + } + }, + { + "severity": "medium", + "code": "weak_primary_loading", + "message": "wa_2 has primary loading 0.39, below 0.5.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_2", + "loading": 0.39 + } + }, + { + "severity": "medium", + "code": "weak_primary_loading", + "message": "wa_3 has primary loading 0.44, below 0.5.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_3", + "loading": 0.44 + } + }, + { + "severity": "medium", + "code": "weak_primary_loading", + "message": "wa_4 has primary loading 0.31, below 0.5.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_4", + "loading": 0.31 + } + }, + { + "severity": "high", + "code": "factor_cross_loading", + "message": "wa_1 has secondary loading 0.41, above 0.32.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_1", + "secondaryLoading": 0.41 + } + }, + { + "severity": "high", + "code": "factor_cross_loading", + "message": "wa_3 has secondary loading 0.37, above 0.32.", + "evidence": { + "scale": "Workload Anxiety Draft Scale", + "itemId": "wa_3", + "secondaryLoading": 0.37 + } + }, + { + "severity": "high", + "code": "missing_internal_consistency", + "message": "Productivity Perception Index does not report Cronbach alpha or equivalent reliability evidence.", + "evidence": { + "scale": "Productivity Perception Index" + } + }, + { + "severity": "medium", + "code": "weak_primary_loading", + "message": "ppi_1 has primary loading 0.47, below 0.5.", + "evidence": { + "scale": "Productivity Perception Index", + "itemId": "ppi_1", + "loading": 0.47 + } + }, + { + "severity": "medium", + "code": "weak_primary_loading", + "message": "ppi_2 has primary loading 0.36, below 0.5.", + "evidence": { + "scale": "Productivity Perception Index", + "itemId": "ppi_2", + "loading": 0.36 + } + }, + { + "severity": "high", + "code": "factor_cross_loading", + "message": "ppi_2 has secondary loading 0.34, above 0.32.", + "evidence": { + "scale": "Productivity Perception Index", + "itemId": "ppi_2", + "secondaryLoading": 0.34 + } + }, + { + "severity": "high", + "code": "construct_claim_mismatch", + "message": "Manuscript claims anxiety, but no submitted scale validates that construct directly.", + "evidence": { + "claim": "anxiety", + "availableConstructs": [ + "workload anxiety", + "productivity perception" + ] + } + }, + { + "severity": "high", + "code": "construct_claim_mismatch", + "message": "Manuscript claims productivity, but no submitted scale validates that construct directly.", + "evidence": { + "claim": "productivity", + "availableConstructs": [ + "workload anxiety", + "productivity perception" + ] + } + }, + { + "severity": "high", + "code": "construct_claim_mismatch", + "message": "Manuscript claims clinical burnout, but no submitted scale validates that construct directly.", + "evidence": { + "claim": "clinical burnout", + "availableConstructs": [ + "workload anxiety", + "productivity perception" + ] + } + } + ], + "releaseGate": { + "canReleaseAiReview": false, + "requiresPsychometricReviewer": true, + "rationale": "Psychometric validity issues must be reviewed before AI output is trusted." + } +} \ No newline at end of file diff --git a/survey-psychometric-validity-assistant/reports/summary.svg b/survey-psychometric-validity-assistant/reports/summary.svg new file mode 100644 index 00000000..9e101e2d --- /dev/null +++ b/survey-psychometric-validity-assistant/reports/summary.svg @@ -0,0 +1,12 @@ + + + + Survey Psychometric Validity Assistant + Packet: survey-risky-017 + Decision: HOLD + Severity score: 39 | Findings: 17 + Top gates: reverse coding, reliability, CFA sample size, factor cross-loads + Action: hold AI peer-review release for psychometric review + + + \ No newline at end of file diff --git a/survey-psychometric-validity-assistant/src/guard.js b/survey-psychometric-validity-assistant/src/guard.js new file mode 100644 index 00000000..f178bb96 --- /dev/null +++ b/survey-psychometric-validity-assistant/src/guard.js @@ -0,0 +1,177 @@ +const DEFAULTS = { + minimumAlpha: 0.7, + minimumLoading: 0.5, + maximumCrossLoading: 0.32, + minimumLikertAnchors: 5, + cfaCasesPerParameter: 10 +}; + +function addFinding(findings, severity, code, message, evidence) { + findings.push({ severity, code, message, evidence }); +} + +function normalize(value) { + return String(value ?? "").trim().toLowerCase(); +} + +function evaluateScale(scale, findings, options) { + if (typeof scale.cronbachAlpha !== "number") { + addFinding( + findings, + "high", + "missing_internal_consistency", + `${scale.name} does not report Cronbach alpha or equivalent reliability evidence.`, + { scale: scale.name } + ); + } else if (scale.cronbachAlpha < options.minimumAlpha) { + addFinding( + findings, + "high", + "low_internal_consistency", + `${scale.name} reports alpha ${scale.cronbachAlpha}, below ${options.minimumAlpha}.`, + { scale: scale.name, alpha: scale.cronbachAlpha } + ); + } + + for (const item of scale.items ?? []) { + if (item.reverseCoded && !item.reverseScored) { + addFinding( + findings, + "critical", + "reverse_coding_drift", + `${item.id} is reverse coded but not reverse scored before scale scoring.`, + { scale: scale.name, itemId: item.id } + ); + } + + if (typeof item.loading === "number" && item.loading < options.minimumLoading) { + addFinding( + findings, + "medium", + "weak_primary_loading", + `${item.id} has primary loading ${item.loading}, below ${options.minimumLoading}.`, + { scale: scale.name, itemId: item.id, loading: item.loading } + ); + } + } + + for (const cross of scale.crossLoadings ?? []) { + if (cross.secondaryLoading >= options.maximumCrossLoading) { + addFinding( + findings, + "high", + "factor_cross_loading", + `${cross.itemId} has secondary loading ${cross.secondaryLoading}, above ${options.maximumCrossLoading}.`, + { scale: scale.name, itemId: cross.itemId, secondaryLoading: cross.secondaryLoading } + ); + } + } +} + +export function evaluateSurveyPacket(packet, customOptions = {}) { + const options = { ...DEFAULTS, ...customOptions }; + const findings = []; + + const anchors = packet.likertAnchors ?? []; + if (anchors.length < options.minimumLikertAnchors) { + addFinding( + findings, + "medium", + "thin_likert_anchor_set", + `Survey reports ${anchors.length} Likert anchors; expected at least ${options.minimumLikertAnchors}.`, + { anchors } + ); + } + + const minimumCfaSample = (packet.plannedCfaParameters ?? 0) * options.cfaCasesPerParameter; + if (packet.plannedCfaParameters && packet.sampleSize < minimumCfaSample) { + addFinding( + findings, + "high", + "cfa_sample_size_shortfall", + `CFA sample size ${packet.sampleSize} is below ${minimumCfaSample} for ${packet.plannedCfaParameters} parameters.`, + { + sampleSize: packet.sampleSize, + plannedCfaParameters: packet.plannedCfaParameters, + minimumCfaSample + } + ); + } + + for (const scale of packet.scales ?? []) { + evaluateScale(scale, findings, options); + } + + const scaleConstructs = new Set((packet.scales ?? []).map((scale) => normalize(scale.construct))); + for (const claim of packet.constructClaims ?? []) { + if (!scaleConstructs.has(normalize(claim))) { + addFinding( + findings, + "high", + "construct_claim_mismatch", + `Manuscript claims ${claim}, but no submitted scale validates that construct directly.`, + { claim, availableConstructs: [...scaleConstructs] } + ); + } + } + + const severityScore = findings.reduce((sum, finding) => { + if (finding.severity === "critical") return sum + 5; + if (finding.severity === "high") return sum + 3; + if (finding.severity === "medium") return sum + 1; + return sum; + }, 0); + + const decision = findings.some((finding) => finding.severity === "critical") || severityScore >= 9 + ? "HOLD" + : severityScore >= 3 + ? "REVIEW" + : "RELEASE"; + + return { + packetId: packet.id, + title: packet.title, + decision, + severityScore, + findingCount: findings.length, + findings, + releaseGate: { + canReleaseAiReview: decision === "RELEASE", + requiresPsychometricReviewer: decision !== "RELEASE", + rationale: decision === "RELEASE" + ? "Psychometric evidence is sufficient for AI peer-review release." + : "Psychometric validity issues must be reviewed before AI output is trusted." + } + }; +} + +export function renderMarkdownReport(result) { + const lines = [ + `# Survey Psychometric Validity Report`, + ``, + `Packet: ${result.packetId}`, + `Decision: ${result.decision}`, + `Severity score: ${result.severityScore}`, + `Findings: ${result.findingCount}`, + ``, + `## Findings` + ]; + + if (result.findings.length === 0) { + lines.push(`- No blocking psychometric validity findings.`); + } else { + for (const finding of result.findings) { + lines.push(`- [${finding.severity}] ${finding.code}: ${finding.message}`); + } + } + + lines.push( + ``, + `## Release gate`, + `- Can release AI review: ${result.releaseGate.canReleaseAiReview}`, + `- Requires psychometric reviewer: ${result.releaseGate.requiresPsychometricReviewer}`, + `- Rationale: ${result.releaseGate.rationale}` + ); + + return `${lines.join("\n")}\n`; +} diff --git a/survey-psychometric-validity-assistant/src/samplePackets.js b/survey-psychometric-validity-assistant/src/samplePackets.js new file mode 100644 index 00000000..76253d5a --- /dev/null +++ b/survey-psychometric-validity-assistant/src/samplePackets.js @@ -0,0 +1,75 @@ +export const cleanPacket = { + id: "survey-clean-001", + title: "Validated Remote Collaboration Burnout Scale", + decisionContext: "pre-release AI peer review", + constructClaims: ["burnout", "remote collaboration fatigue"], + sampleSize: 420, + plannedCfaParameters: 32, + likertAnchors: ["Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree"], + scales: [ + { + name: "Remote Collaboration Fatigue", + construct: "remote collaboration fatigue", + cronbachAlpha: 0.86, + items: [ + { id: "rcf_1", loading: 0.74 }, + { id: "rcf_2", loading: 0.69 }, + { id: "rcf_3", loading: 0.71 }, + { id: "rcf_4", loading: 0.66, reverseCoded: true, reverseScored: true } + ], + crossLoadings: [ + { itemId: "rcf_2", secondaryLoading: 0.18 } + ] + }, + { + name: "Burnout Short Form", + construct: "burnout", + cronbachAlpha: 0.82, + items: [ + { id: "bsf_1", loading: 0.72 }, + { id: "bsf_2", loading: 0.77 }, + { id: "bsf_3", loading: 0.68 } + ], + crossLoadings: [] + } + ] +}; + +export const riskyPacket = { + id: "survey-risky-017", + title: "AI Workload Anxiety and Productivity Claims", + decisionContext: "pre-release AI peer review", + constructClaims: ["anxiety", "productivity", "clinical burnout"], + sampleSize: 96, + plannedCfaParameters: 28, + likertAnchors: ["Never", "Sometimes", "Often"], + scales: [ + { + name: "Workload Anxiety Draft Scale", + construct: "workload anxiety", + cronbachAlpha: 0.58, + items: [ + { id: "wa_1", loading: 0.42 }, + { id: "wa_2", loading: 0.39, reverseCoded: true, reverseScored: false }, + { id: "wa_3", loading: 0.44 }, + { id: "wa_4", loading: 0.31 } + ], + crossLoadings: [ + { itemId: "wa_1", secondaryLoading: 0.41 }, + { itemId: "wa_3", secondaryLoading: 0.37 } + ] + }, + { + name: "Productivity Perception Index", + construct: "productivity perception", + cronbachAlpha: null, + items: [ + { id: "ppi_1", loading: 0.47 }, + { id: "ppi_2", loading: 0.36 } + ], + crossLoadings: [ + { itemId: "ppi_2", secondaryLoading: 0.34 } + ] + } + ] +}; diff --git a/survey-psychometric-validity-assistant/test.js b/survey-psychometric-validity-assistant/test.js new file mode 100644 index 00000000..819ed1ea --- /dev/null +++ b/survey-psychometric-validity-assistant/test.js @@ -0,0 +1,25 @@ +import assert from "node:assert/strict"; +import { evaluateSurveyPacket, renderMarkdownReport } from "./src/guard.js"; +import { cleanPacket, riskyPacket } from "./src/samplePackets.js"; + +const clean = evaluateSurveyPacket(cleanPacket); +assert.equal(clean.decision, "RELEASE"); +assert.equal(clean.findingCount, 0); +assert.equal(clean.releaseGate.canReleaseAiReview, true); + +const risky = evaluateSurveyPacket(riskyPacket); +assert.equal(risky.decision, "HOLD"); +assert.equal(risky.releaseGate.requiresPsychometricReviewer, true); +assert.ok(risky.findings.some((finding) => finding.code === "reverse_coding_drift")); +assert.ok(risky.findings.some((finding) => finding.code === "low_internal_consistency")); +assert.ok(risky.findings.some((finding) => finding.code === "missing_internal_consistency")); +assert.ok(risky.findings.some((finding) => finding.code === "factor_cross_loading")); +assert.ok(risky.findings.some((finding) => finding.code === "cfa_sample_size_shortfall")); +assert.ok(risky.findings.some((finding) => finding.code === "construct_claim_mismatch")); + +const report = renderMarkdownReport(risky); +assert.match(report, /Survey Psychometric Validity Report/); +assert.match(report, /Decision: HOLD/); +assert.match(report, /reverse_coding_drift/); + +console.log("survey psychometric validity assistant tests passed");