From 9def5f5e9675776503ddd5d62674bac92464de07 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 07:05:27 +0000 Subject: [PATCH 1/8] Add markdown-toc.js: GitHub-accurate table-of-contents generator Zero-dependency Node script. Extracts headings (code-fence aware), builds GitHub-compatible anchor slugs with duplicate disambiguation, prints to stdout or injects an idempotent TOC block via --write. Documented in README. --- README.md | 32 ++++++++ markdown-toc.js | 195 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 markdown-toc.js diff --git a/README.md b/README.md index 262cdc7..452a77a 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ A curated collection of highly robust, custom-built Node/Python automation scrip 1. [auto-classify-projects.js](#1-auto-classify-projectsjs) 2. [sync-profile-readme.js](#2-sync-profile-readmejs) 3. [oss-contributor-log.py](#3-oss-contributor-logpy) +4. [markdown-toc.js](#4-markdown-tocjs) --- @@ -75,5 +76,36 @@ python3 oss-contributor-log.py show --- +## 4. `markdown-toc.js` +> **Generate a GitHub-accurate table of contents from any Markdown file's headings.** + +A zero-dependency Node script that reads a Markdown file, extracts its headings, and builds a table of contents with anchor links that actually resolve on GitHub. Print it to stdout, or inject it straight into the file between `` markers. + +### ⚑ Key Features: +* **GitHub-accurate slugs**: Mirrors GitHub's real anchor algorithm β€” punctuation stripped, spaces hyphenated without collapsing, duplicate headings disambiguated (`#getting-started`, `#getting-started-1`). The links work, not just look like they should. +* **Code-fence aware**: Skips `#` lines inside fenced code blocks (```` ``` ```` / `~~~`), so a commented shell command never sneaks into your TOC. +* **Idempotent `--write`**: Updates the block between `` and `` in place. Run it on every commit β€” it replaces, never duplicates. No markers? It drops them in right after your H1. +* **Level control**: `--min-level` / `--max-level` to skip the H1 title and ignore deep sub-headings. +* **Zero dependencies**: Pure Node `fs`. Nothing to install. + +### πŸš€ Usage: +```bash +# Print a TOC to stdout (default: levels 2–4, skipping the H1 title) +node markdown-toc.js README.md + +# Inject/update the TOC block inside the file +node markdown-toc.js README.md --write + +# Only H2 and H3 +node markdown-toc.js docs/guide.md --min-level 2 --max-level 3 --write +``` + +Add `` and `` where you want it, then wire `--write` into a pre-commit hook to keep it fresh automatically. + +### πŸ“¦ Reusable functions: +The script also exports its internals (`slugify`, `extractHeadings`, `buildToc`, `injectToc`) so you can `require()` it in your own tooling. + +--- + ## πŸ“œ License MIT β€” Do whatever you want with these scripts! diff --git a/markdown-toc.js b/markdown-toc.js new file mode 100644 index 0000000..68bd9c0 --- /dev/null +++ b/markdown-toc.js @@ -0,0 +1,195 @@ +#!/usr/bin/env node +/** + * markdown-toc.js + * + * Generate a GitHub-flavored table of contents from a Markdown file's headings. + * + * - Parses ATX headings (`#`, `##`, ...), ignoring anything inside fenced code blocks. + * - Builds GitHub-compatible anchor slugs (lowercase, spaces -> hyphens, punctuation + * stripped, duplicate slugs disambiguated with -1, -2, ...). + * - Prints the TOC to stdout, or with --write injects/updates it between + * and markers in the file. + * + * Zero dependencies. Works on any Node >= 14. + * + * Usage: + * node markdown-toc.js README.md # print TOC to stdout + * node markdown-toc.js README.md --write # update TOC block in the file + * node markdown-toc.js README.md --min-level 2 --max-level 3 + * node markdown-toc.js --help + */ + +'use strict'; + +const fs = require('fs'); + +const START_MARKER = ''; +const END_MARKER = ''; + +function printHelp() { + console.log(`markdown-toc.js β€” generate a GitHub-style table of contents + +Usage: + node markdown-toc.js [options] + +Options: + --write Inject/update the TOC between ${START_MARKER} and ${END_MARKER} + markers in the file (added near the top if no markers exist). + --min-level Shallowest heading level to include (default: 2 β€” skips the H1 title). + --max-level Deepest heading level to include (default: 4). + --help Show this help. + +Examples: + node markdown-toc.js README.md + node markdown-toc.js README.md --write + node markdown-toc.js docs/guide.md --min-level 2 --max-level 3 --write +`); +} + +/** + * GitHub's anchor-slug algorithm (close enough for headings without HTML/emoji): + * lowercase, strip anything that isn't a word char, space, or hyphen, then + * convert spaces to hyphens. + */ +function slugify(text, seen) { + // Mirror GitHub's slugger: lowercase, strip punctuation/symbols, then turn each + // remaining space into a hyphen WITHOUT collapsing runs. "Install & Setup" -> the + // "&" is removed leaving two spaces -> "install--setup", matching GitHub's anchor. + let slug = text + .trim() + .toLowerCase() + .replace(/[^\w\s-]/g, '') // drop punctuation/symbols (keeps word chars, spaces, hyphens) + .replace(/ /g, '-'); // each space -> a hyphen (no collapsing) + + // Disambiguate duplicate slugs the way GitHub does: foo, foo-1, foo-2, ... + const base = slug; + let n = seen.get(base) || 0; + if (n > 0) slug = `${base}-${n}`; + seen.set(base, n + 1); + return slug; +} + +/** + * Extract headings from markdown, skipping fenced code blocks (``` or ~~~). + * Returns [{ level, text }]. + */ +function extractHeadings(markdown, minLevel, maxLevel) { + const lines = markdown.split('\n'); + const headings = []; + let fence = null; // current code-fence marker, or null + + for (const line of lines) { + const fenceMatch = line.match(/^\s*(```+|~~~+)/); + if (fenceMatch) { + const marker = fenceMatch[1][0]; // ` or ~ + if (fence === null) fence = marker; + else if (fence === marker) fence = null; + continue; + } + if (fence !== null) continue; // inside a code block + + const h = line.match(/^(#{1,6})\s+(.*?)\s*#*\s*$/); + if (!h) continue; + const level = h[1].length; + if (level < minLevel || level > maxLevel) continue; + const text = h[2].trim(); + if (text) headings.push({ level, text }); + } + return headings; +} + +/** Strip inline markdown (links, code, bold/italic) from heading display text. */ +function cleanText(text) { + return text + .replace(/\[([^\]]+)\]\([^)]*\)/g, '$1') // [label](url) -> label + .replace(/[`*_~]/g, '') // code/bold/italic markers + .trim(); +} + +function buildToc(headings) { + if (headings.length === 0) return ''; + const seen = new Map(); + const minLevel = Math.min(...headings.map((h) => h.level)); + const out = []; + for (const { level, text } of headings) { + const display = cleanText(text); + const slug = slugify(display, seen); + const indent = ' '.repeat(level - minLevel); + out.push(`${indent}- [${display}](#${slug})`); + } + return out.join('\n'); +} + +/** Replace the block between markers, or insert one after the first H1 (or at top). */ +function injectToc(markdown, toc) { + const block = `${START_MARKER}\n${toc}\n${END_MARKER}`; + const start = markdown.indexOf(START_MARKER); + const end = markdown.indexOf(END_MARKER); + + if (start !== -1 && end !== -1 && end > start) { + return markdown.slice(0, start) + block + markdown.slice(end + END_MARKER.length); + } + + // No markers: insert after the first H1 if there is one, else at the very top. + const lines = markdown.split('\n'); + const h1 = lines.findIndex((l) => /^#\s+/.test(l)); + if (h1 !== -1) { + lines.splice(h1 + 1, 0, '', block); + return lines.join('\n'); + } + return `${block}\n\n${markdown}`; +} + +function parseArgs(argv) { + const args = { file: null, write: false, minLevel: 2, maxLevel: 4, help: false }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === '--help' || a === '-h') args.help = true; + else if (a === '--write') args.write = true; + else if (a === '--min-level') args.minLevel = parseInt(argv[++i], 10); + else if (a === '--max-level') args.maxLevel = parseInt(argv[++i], 10); + else if (!a.startsWith('-') && args.file === null) args.file = a; + } + return args; +} + +function main() { + const args = parseArgs(process.argv.slice(2)); + + if (args.help || !args.file) { + printHelp(); + process.exit(args.help ? 0 : 1); + } + + if (Number.isNaN(args.minLevel) || Number.isNaN(args.maxLevel) || args.minLevel > args.maxLevel) { + console.error('Error: invalid --min-level / --max-level range.'); + process.exit(1); + } + + if (!fs.existsSync(args.file)) { + console.error(`Error: file not found: ${args.file}`); + process.exit(1); + } + + const markdown = fs.readFileSync(args.file, 'utf8'); + const headings = extractHeadings(markdown, args.minLevel, args.maxLevel); + + if (headings.length === 0) { + console.error(`No headings found between level ${args.minLevel} and ${args.maxLevel}.`); + process.exit(1); + } + + const toc = buildToc(headings); + + if (args.write) { + const updated = injectToc(markdown, toc); + fs.writeFileSync(args.file, updated); + console.error(`βœ“ TOC written to ${args.file} (${headings.length} headings).`); + } else { + console.log(toc); + } +} + +if (require.main === module) main(); + +module.exports = { slugify, extractHeadings, buildToc, cleanText, injectToc }; From 68f61d051baf97d02935995b80d3c855d8fecb96 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 07:17:24 +0000 Subject: [PATCH 2/8] Add --check mode to markdown-toc.js for CI / pre-commit Verifies the TOC is current without writing: exit 0 (up to date), 1 (stale), 2 (no markers). Documented in README. Tested across all three exit states. --- README.md | 6 +++++- markdown-toc.js | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 452a77a..b063226 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ A zero-dependency Node script that reads a Markdown file, extracts its headings, * **GitHub-accurate slugs**: Mirrors GitHub's real anchor algorithm β€” punctuation stripped, spaces hyphenated without collapsing, duplicate headings disambiguated (`#getting-started`, `#getting-started-1`). The links work, not just look like they should. * **Code-fence aware**: Skips `#` lines inside fenced code blocks (```` ``` ```` / `~~~`), so a commented shell command never sneaks into your TOC. * **Idempotent `--write`**: Updates the block between `` and `` in place. Run it on every commit β€” it replaces, never duplicates. No markers? It drops them in right after your H1. +* **CI-friendly `--check`**: Verifies the TOC is current without touching the file. Exit `0` (up to date), `1` (stale β€” run `--write`), or `2` (no markers). Drop it in a CI step or pre-commit hook so a forgotten TOC update fails the build. * **Level control**: `--min-level` / `--max-level` to skip the H1 title and ignore deep sub-headings. * **Zero dependencies**: Pure Node `fs`. Nothing to install. @@ -98,9 +99,12 @@ node markdown-toc.js README.md --write # Only H2 and H3 node markdown-toc.js docs/guide.md --min-level 2 --max-level 3 --write + +# CI / pre-commit: fail if the TOC is stale (exit 1), missing markers (exit 2), else pass (exit 0) +node markdown-toc.js README.md --check ``` -Add `` and `` where you want it, then wire `--write` into a pre-commit hook to keep it fresh automatically. +Add `` and `` where you want it, then wire `--write` into a pre-commit hook to keep it fresh automatically β€” or `--check` into CI so a stale TOC fails the build. ### πŸ“¦ Reusable functions: The script also exports its internals (`slugify`, `extractHeadings`, `buildToc`, `injectToc`) so you can `require()` it in your own tooling. diff --git a/markdown-toc.js b/markdown-toc.js index 68bd9c0..7857bb1 100644 --- a/markdown-toc.js +++ b/markdown-toc.js @@ -35,6 +35,8 @@ Usage: Options: --write Inject/update the TOC between ${START_MARKER} and ${END_MARKER} markers in the file (added near the top if no markers exist). + --check Don't modify anything. Exit 0 if the TOC block is up to date, + 1 if it's stale, 2 if no TOC markers exist. CI / pre-commit friendly. --min-level Shallowest heading level to include (default: 2 β€” skips the H1 title). --max-level Deepest heading level to include (default: 4). --help Show this help. @@ -42,6 +44,7 @@ Options: Examples: node markdown-toc.js README.md node markdown-toc.js README.md --write + node markdown-toc.js README.md --check # fails CI if the TOC is out of date node markdown-toc.js docs/guide.md --min-level 2 --max-level 3 --write `); } @@ -141,11 +144,12 @@ function injectToc(markdown, toc) { } function parseArgs(argv) { - const args = { file: null, write: false, minLevel: 2, maxLevel: 4, help: false }; + const args = { file: null, write: false, check: false, minLevel: 2, maxLevel: 4, help: false }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === '--help' || a === '-h') args.help = true; else if (a === '--write') args.write = true; + else if (a === '--check') args.check = true; else if (a === '--min-level') args.minLevel = parseInt(argv[++i], 10); else if (a === '--max-level') args.maxLevel = parseInt(argv[++i], 10); else if (!a.startsWith('-') && args.file === null) args.file = a; @@ -181,6 +185,20 @@ function main() { const toc = buildToc(headings); + if (args.check) { + const hasMarkers = markdown.includes(START_MARKER) && markdown.includes(END_MARKER); + if (!hasMarkers) { + console.error(`βœ— ${args.file}: no TOC markers found. Add ${START_MARKER} / ${END_MARKER} or run with --write.`); + process.exit(2); + } + if (injectToc(markdown, toc) === markdown) { + console.error(`βœ“ ${args.file}: TOC is up to date.`); + process.exit(0); + } + console.error(`βœ— ${args.file}: TOC is out of date. Run: node markdown-toc.js ${args.file} --write`); + process.exit(1); + } + if (args.write) { const updated = injectToc(markdown, toc); fs.writeFileSync(args.file, updated); From 01cfc41efdc044be4bb5ac1fcaffaf0840efdf72 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 25 Jun 2026 12:38:18 +0000 Subject: [PATCH 3/8] Add link-check.js: find broken local links + anchors in Markdown - Detects relative file links pointing at missing paths - Validates #anchors (same-file and cross-file) using GitHub's slug algorithm - Ignores links in fenced code blocks and inline code spans - Network-free by default; --external lists URLs without fetching - CI-friendly exit codes (0 ok / 1 broken / 2 usage); --json output - Documented in README with usage and feature list --- README.md | 39 +++++++ link-check.js | 302 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 341 insertions(+) create mode 100644 link-check.js diff --git a/README.md b/README.md index b063226..74d3535 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ A curated collection of highly robust, custom-built Node/Python automation scrip 2. [sync-profile-readme.js](#2-sync-profile-readmejs) 3. [oss-contributor-log.py](#3-oss-contributor-logpy) 4. [markdown-toc.js](#4-markdown-tocjs) +5. [link-check.js](#5-link-checkjs) --- @@ -111,5 +112,43 @@ The script also exports its internals (`slugify`, `extractHeadings`, `buildToc`, --- +## 5. `link-check.js` +> **Find broken local links in Markdown before they embarrass you in a README.** + +A zero-dependency Node script that catches the two link failures README maintainers hit most: relative file links pointing at a path that no longer exists, and in-page `#anchor` links to a heading you renamed or deleted. It validates anchors with GitHub's real slug algorithm β€” the same one `markdown-toc.js` uses β€” so `#section` links resolve exactly the way they will on github.com. + +### ⚑ Key Features: +* **Dead-file detection**: Resolves every relative `[text](path)` and `![alt](path)` against the filesystem (relative to the Markdown file itself) and flags anything missing. Handles URL-encoded paths and `file.md#fragment` forms. +* **Anchor validation**: For `#anchor` links β€” both same-file and `other.md#anchor` β€” it builds the target file's heading slugs and confirms the anchor actually exists. Also honors explicit `` / `id="..."` anchors. +* **No false positives from code**: Links inside fenced code blocks (```` ``` ````/`~~~`) and inline `` `code spans` `` are ignored, so a sample command never reads as a broken link. +* **Network-free by default**: External URLs (`http`, `https`, `mailto`, `tel`) are never fetched β€” the check is deterministic and safe for CI. Pass `--external` to list them for a manual eyeball. +* **CI / pre-commit ready**: Exit `0` (all local links resolve), `1` (broken links found), or `2` (usage error). Multiple files per run. +* **Zero dependencies**: Pure Node `fs` + `path`. + +### πŸš€ Usage: +```bash +# Check one file +node link-check.js README.md + +# Check several at once +node link-check.js README.md docs/*.md + +# Also list external URLs (never fetched, just printed) +node link-check.js README.md --external + +# Machine-readable report +node link-check.js README.md --json + +# CI: fail the build on any broken link +node link-check.js README.md docs/guide.md +``` + +Pairs naturally with `markdown-toc.js`: generate the TOC, then verify every link in it (and everywhere else) still resolves. Wire both into a pre-commit hook and your docs stop rotting. + +### πŸ“¦ Reusable functions: +Exports `slugify`, `cleanText`, `parseMarkdown`, `classify`, and `checkFile` for use in your own tooling via `require()`. + +--- + ## πŸ“œ License MIT β€” Do whatever you want with these scripts! diff --git a/link-check.js b/link-check.js new file mode 100644 index 0000000..8768491 --- /dev/null +++ b/link-check.js @@ -0,0 +1,302 @@ +#!/usr/bin/env node +/** + * link-check.js + * + * Find broken links in Markdown files β€” the local kind that rot silently when you + * rename a file or edit a heading. Catches the two failures README maintainers hit + * most: + * + * 1. Relative file links that point at a path that no longer exists on disk + * ([guide](docs/guide.md) after you moved docs/guide.md). + * 2. In-page anchor links to a heading that isn't there anymore + * ([jump](#install) after you renamed the "Install" section). + * + * It uses GitHub's real anchor-slug algorithm, so #section links are validated the + * same way GitHub resolves them β€” the same logic markdown-toc.js uses to build a TOC. + * + * External links (http/https/mailto/tel) are reported as skipped by default β€” no + * network calls, so the check is deterministic and CI-safe. Pass --external to print + * them so you can eyeball the list. + * + * Zero dependencies. Works on any Node >= 14. + * + * Usage: + * node link-check.js README.md # check one file + * node link-check.js README.md docs/*.md # check several + * node link-check.js README.md --json # machine-readable report + * node link-check.js README.md --external # also list (don't verify) external URLs + * node link-check.js --help + * + * Exit codes (CI / pre-commit friendly): + * 0 all local links resolve + * 1 one or more broken links found + * 2 usage error (no files, missing file, bad flag) + */ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +function printHelp() { + console.log(`link-check.js β€” find broken local links in Markdown + +Usage: + node link-check.js [more.md ...] [options] + +Options: + --json Emit a JSON report instead of human-readable text. + --external List external URLs (http/https/mailto/tel) instead of silently + skipping them. They are never fetched β€” listed only. + --quiet Only print broken links (and nothing on success). + --help Show this help. + +Exit codes: + 0 all local links resolve + 1 broken link(s) found + 2 usage error + +Examples: + node link-check.js README.md + node link-check.js README.md docs/guide.md + node link-check.js README.md --check >/dev/null && echo ok # in CI +`); +} + +/** + * GitHub's anchor-slug algorithm β€” identical to markdown-toc.js so that #anchor + * links validate against the same slugs a generated TOC would produce. + */ +function slugify(text, seen) { + let slug = text + .trim() + .toLowerCase() + .replace(/[^\w\s-]/g, '') // drop punctuation/symbols (keep word chars, spaces, hyphens) + .replace(/ /g, '-'); // each space -> a hyphen, no collapsing + + const base = slug; + const n = seen.get(base) || 0; + if (n > 0) slug = `${base}-${n}`; + seen.set(base, n + 1); + return slug; +} + +/** Strip inline markdown from heading text before slugging (mirrors markdown-toc.js). */ +function cleanText(text) { + return text + .replace(/\[([^\]]+)\]\([^)]*\)/g, '$1') // [label](url) -> label + .replace(/[`*_~]/g, '') + .trim(); +} + +/** + * Walk the file once, tracking fenced code blocks so we ignore both headings and + * links that live inside them. Returns { anchors: Set, links: [...] }. + * + * Each link: { text, target, line, kind: 'image' | 'link' }. + */ +function parseMarkdown(markdown) { + const lines = markdown.split('\n'); + const anchors = new Set(); + const seenSlugs = new Map(); + const links = []; + let fence = null; + + // Inline link / image: ![alt](target) or [text](target). The leading "!" (optional) + // marks an image. We capture the target up to the first ")" or whitespace, which + // keeps `(url "title")` titles from leaking into the path. + const linkRe = /(!?)\[([^\]]*)\]\(\s*([^)\s]+)(?:\s+[^)]*)?\)/g; + + lines.forEach((rawLine, i) => { + const fenceMatch = rawLine.match(/^\s*(```+|~~~+)/); + if (fenceMatch) { + const marker = fenceMatch[1][0]; + if (fence === null) fence = marker; + else if (fence === marker) fence = null; + return; + } + if (fence !== null) return; // inside a code block β€” skip headings and links + + // Headings become anchors. + const h = rawLine.match(/^(#{1,6})\s+(.*?)\s*#*\s*$/); + if (h && h[2].trim()) { + anchors.add(slugify(cleanText(h[2].trim()), seenSlugs)); + } + + // Explicit anchors authors drop in HTML: or id="x". + const idMatches = rawLine.matchAll(/(?:name|id)\s*=\s*["']([^"']+)["']/g); + for (const m of idMatches) anchors.add(m[1].toLowerCase()); + + // Links on this line. Strip inline code spans first so `[x](y)` inside backticks + // doesn't count as a real link. + const line = rawLine.replace(/`[^`]*`/g, (s) => ' '.repeat(s.length)); + let m; + while ((m = linkRe.exec(line)) !== null) { + links.push({ + kind: m[1] === '!' ? 'image' : 'link', + text: m[2], + target: m[3].trim(), + line: i + 1, + }); + } + }); + + return { anchors, links }; +} + +const EXTERNAL_RE = /^(https?:|mailto:|tel:|ftp:|\/\/)/i; + +/** Classify and resolve a single link against the parsed file. */ +function classify(link, file, anchorsByFile) { + const { target } = link; + + if (target.startsWith('#')) { + return { type: 'anchor', anchor: target.slice(1).toLowerCase() }; + } + if (EXTERNAL_RE.test(target)) { + return { type: 'external' }; + } + + // Local path, optionally with a #fragment. + const hashIdx = target.indexOf('#'); + const relPath = hashIdx === -1 ? target : target.slice(0, hashIdx); + const fragment = hashIdx === -1 ? null : target.slice(hashIdx + 1).toLowerCase(); + const decoded = decodeURIComponent(relPath); + const resolved = path.resolve(path.dirname(file), decoded); + + return { type: 'file', resolved, fragment, relPath: decoded }; +} + +function checkFile(file, opts, anchorCache) { + const broken = []; + const external = []; + let markdown; + try { + markdown = fs.readFileSync(file, 'utf8'); + } catch (err) { + return { file, error: `cannot read file: ${err.message}`, broken, external, total: 0 }; + } + + const { anchors, links } = parseMarkdown(markdown); + anchorCache.set(path.resolve(file), anchors); + + for (const link of links) { + if (!link.target || link.target.startsWith('<')) continue; // empty or templated + const info = classify(link, file); + + if (info.type === 'external') { + external.push(link); + continue; + } + + if (info.type === 'anchor') { + if (!anchors.has(info.anchor)) { + broken.push({ ...link, reason: `no heading anchors to "#${info.anchor}" in this file` }); + } + continue; + } + + // info.type === 'file' + if (!fs.existsSync(info.resolved)) { + broken.push({ ...link, reason: `path not found: ${info.relPath}` }); + continue; + } + + // If it points at a markdown file AND carries a #fragment, validate the anchor too. + if (info.fragment && /\.(md|markdown)$/i.test(info.resolved)) { + let targetAnchors = anchorCache.get(path.resolve(info.resolved)); + if (!targetAnchors) { + try { + targetAnchors = parseMarkdown(fs.readFileSync(info.resolved, 'utf8')).anchors; + anchorCache.set(path.resolve(info.resolved), targetAnchors); + } catch { + targetAnchors = null; + } + } + if (targetAnchors && !targetAnchors.has(info.fragment)) { + broken.push({ + ...link, + reason: `"${info.relPath}" exists but has no anchor "#${info.fragment}"`, + }); + } + } + } + + return { file, broken, external, total: links.length }; +} + +function main(argv) { + const args = argv.slice(2); + if (args.includes('--help') || args.length === 0) { + printHelp(); + process.exit(args.length === 0 ? 2 : 0); + } + + const opts = { + json: args.includes('--json'), + external: args.includes('--external'), + quiet: args.includes('--quiet'), + }; + const files = args.filter((a) => !a.startsWith('--')); + + const known = new Set(['--json', '--external', '--quiet', '--help']); + const badFlag = args.find((a) => a.startsWith('--') && !known.has(a)); + if (badFlag) { + console.error(`Unknown option: ${badFlag}\nRun with --help for usage.`); + process.exit(2); + } + if (files.length === 0) { + console.error('No Markdown files given.\nRun with --help for usage.'); + process.exit(2); + } + + const anchorCache = new Map(); + const results = files.map((f) => checkFile(f, opts, anchorCache)); + + if (opts.json) { + console.log(JSON.stringify(results, null, 2)); + const anyBroken = results.some((r) => r.error || r.broken.length); + process.exit(anyBroken ? 1 : 0); + } + + let totalBroken = 0; + let usageError = false; + for (const r of results) { + if (r.error) { + console.error(`βœ— ${r.file}: ${r.error}`); + usageError = true; + continue; + } + if (r.broken.length === 0) { + if (!opts.quiet) { + const ext = opts.external && r.external.length ? ` (${r.external.length} external, not checked)` : ''; + console.log(`βœ“ ${r.file} β€” ${r.total} link(s), all local links resolve${ext}`); + } + } else { + totalBroken += r.broken.length; + console.log(`βœ— ${r.file} β€” ${r.broken.length} broken:`); + for (const b of r.broken) { + console.log(` ${r.file}:${b.line} [${b.text}](${b.target})`); + console.log(` ↳ ${b.reason}`); + } + } + if (opts.external && r.external && r.external.length) { + console.log(` external (listed, not fetched):`); + for (const e of r.external) console.log(` ${r.file}:${e.line} ${e.target}`); + } + } + + if (usageError && totalBroken === 0) process.exit(2); + if (totalBroken > 0) { + console.log(`\n${totalBroken} broken link(s) across ${files.length} file(s).`); + process.exit(1); + } + process.exit(0); +} + +// Export internals for reuse in your own tooling. +module.exports = { slugify, cleanText, parseMarkdown, classify, checkFile }; + +if (require.main === module) { + main(process.argv); +} From 200ab7324d9f2a77355783a52f0d4581ddb4cfe0 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 26 Jun 2026 12:39:58 +0000 Subject: [PATCH 4/8] Add examples/: check-docs.sh, CI workflow, pre-commit hook for docs checks Concrete copy-paste artifacts wiring link-check.js (and opt-in markdown-toc.js --check) into CI and git hooks. Tested on this repo. README section + note on the marker false-positive caveat. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01YC1umQHkdcQLWnoF5GKE45 --- README.md | 31 ++++++++++++++++++++++ examples/check-docs.sh | 57 +++++++++++++++++++++++++++++++++++++++++ examples/docs-check.yml | 32 +++++++++++++++++++++++ examples/git-pre-commit | 29 +++++++++++++++++++++ 4 files changed, 149 insertions(+) create mode 100755 examples/check-docs.sh create mode 100644 examples/docs-check.yml create mode 100755 examples/git-pre-commit diff --git a/README.md b/README.md index 74d3535..e268f66 100644 --- a/README.md +++ b/README.md @@ -150,5 +150,36 @@ Exports `slugify`, `cleanText`, `parseMarkdown`, `classify`, and `checkFile` for --- +## πŸ” Wire the docs checks into CI / pre-commit + +`link-check.js` and `markdown-toc.js` are most useful when they run automatically β€” so docs rot +fails a build instead of sitting unnoticed. The [`examples/`](examples/) folder has copy-paste +artifacts for both: + +| File | What it is | How to use | +|------|-----------|-----------| +| [`examples/check-docs.sh`](examples/check-docs.sh) | One command that link-checks every tracked Markdown file (and, opt-in, verifies marker-based TOCs). Exit `0`/`1`. | `bash examples/check-docs.sh` | +| [`examples/docs-check.yml`](examples/docs-check.yml) | GitHub Actions workflow β€” runs the checks on every push/PR that touches Markdown. | Copy to `.github/workflows/docs-check.yml` | +| [`examples/git-pre-commit`](examples/git-pre-commit) | Pre-commit hook β€” blocks a commit that introduces a broken link (checks only staged Markdown). | `cp examples/git-pre-commit .git/hooks/pre-commit && chmod +x .git/hooks/pre-commit` | + +```bash +# Try it right now, on this repo: +bash examples/check-docs.sh +# β†’ Checking links in N Markdown file(s)… +# βœ“ All docs checks passed. +``` + +To also verify a marker-based table of contents, list the files that use `` markers: + +```bash +TOC_FILES="README.md docs/guide.md" bash examples/check-docs.sh +``` + +> **Note:** the TOC check is opt-in per file because `markdown-toc.js --check` keys off the literal +> `` markers β€” a doc that only *mentions* those strings in prose (like this README) would +> read as having a TOC it doesn't. `link-check.js` has no such caveat and runs on everything. + +--- + ## πŸ“œ License MIT β€” Do whatever you want with these scripts! diff --git a/examples/check-docs.sh b/examples/check-docs.sh new file mode 100755 index 0000000..21396c1 --- /dev/null +++ b/examples/check-docs.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# +# check-docs.sh β€” run the docs checks across a repo in one command. +# +# What it does: +# 1. Finds every Markdown file tracked by git (skips .git, node_modules, vendor). +# 2. Runs link-check.js over all of them β€” fails on any broken local link or dead anchor. +# 3. Optionally runs markdown-toc.js --check on files that opt in (see TOC_FILES below). +# +# Exit codes: 0 = everything passes, 1 = a check failed. Safe for CI and pre-commit. +# +# Usage: +# examples/check-docs.sh # check the whole repo +# SCRIPTS_DIR=. examples/check-docs.sh # if the scripts live somewhere custom +# +# Requires: node, git. Zero npm dependencies. + +set -euo pipefail + +# Where link-check.js / markdown-toc.js live. Default: repo root (one level up from examples/). +SCRIPTS_DIR="${SCRIPTS_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}" + +# Files that use markers and should have their TOC verified. Space-separated. +# Leave empty to skip the TOC check entirely (link-check still runs on everything). +TOC_FILES="${TOC_FILES:-}" + +fail=0 + +# 1 + 2: link-check every tracked Markdown file. +mapfile -t md_files < <(git ls-files '*.md' '*.markdown' | grep -vE '(^|/)(node_modules|vendor)/' || true) + +if [ "${#md_files[@]}" -eq 0 ]; then + echo "No Markdown files tracked by git β€” nothing to check." + exit 0 +fi + +echo "β†’ Checking links in ${#md_files[@]} Markdown file(s)…" +if ! node "$SCRIPTS_DIR/link-check.js" "${md_files[@]}"; then + fail=1 +fi + +# 3: opt-in TOC freshness check. +if [ -n "$TOC_FILES" ]; then + echo "β†’ Checking table of contents is current…" + for f in $TOC_FILES; do + if ! node "$SCRIPTS_DIR/markdown-toc.js" "$f" --check; then + fail=1 + fi + done +fi + +if [ "$fail" -ne 0 ]; then + echo "βœ— Docs checks failed." + exit 1 +fi + +echo "βœ“ All docs checks passed." diff --git a/examples/docs-check.yml b/examples/docs-check.yml new file mode 100644 index 0000000..aaa5215 --- /dev/null +++ b/examples/docs-check.yml @@ -0,0 +1,32 @@ +# Example GitHub Actions workflow: fail the build when docs links rot. +# +# Drop this in your repo at .github/workflows/docs-check.yml. It runs on every push and PR +# that touches Markdown, checks all local links + anchors with link-check.js, and (optionally) +# verifies any marker-based table of contents is current. +# +# Adjust the `cp` step to point at wherever you keep these scripts β€” vendored in your repo, +# fetched from a release, or installed as a submodule. The example below assumes link-check.js +# and markdown-toc.js sit at the repo root alongside examples/check-docs.sh. + +name: docs-check + +on: + push: + paths: ['**.md', '**.markdown'] + pull_request: + paths: ['**.md', '**.markdown'] + +jobs: + links: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '20' + + # Runs link-check.js across every tracked Markdown file. + # Set TOC_FILES to also verify blocks, e.g. TOC_FILES="README.md docs/guide.md". + - name: Check Markdown links + run: bash examples/check-docs.sh diff --git a/examples/git-pre-commit b/examples/git-pre-commit new file mode 100755 index 0000000..157113d --- /dev/null +++ b/examples/git-pre-commit @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# git pre-commit hook: block a commit if it would introduce a broken Markdown link. +# +# Install: +# cp examples/git-pre-commit .git/hooks/pre-commit +# chmod +x .git/hooks/pre-commit +# +# It only checks the Markdown files you're actually committing (fast), using link-check.js. +# Bypass once with `git commit --no-verify` if you really must. +# +# Assumes link-check.js is at the repo root. Change SCRIPTS_DIR if it lives elsewhere. + +set -euo pipefail + +SCRIPTS_DIR="${SCRIPTS_DIR:-$(git rev-parse --show-toplevel)}" + +# Staged, added/copied/modified Markdown files only. +mapfile -t staged < <(git diff --cached --name-only --diff-filter=ACM -- '*.md' '*.markdown') + +if [ "${#staged[@]}" -eq 0 ]; then + exit 0 +fi + +echo "pre-commit: checking links in ${#staged[@]} staged Markdown file(s)…" +if ! node "$SCRIPTS_DIR/link-check.js" "${staged[@]}"; then + echo "βœ— Broken Markdown link(s) found β€” commit blocked. Fix them or use 'git commit --no-verify'." + exit 1 +fi From 5215f781186f5149046eacbd4e612d809506ea60 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 27 Jun 2026 12:41:58 +0000 Subject: [PATCH 5/8] Add frontmatter-lint.js: lint YAML frontmatter in Markdown Zero-dependency, network-free Node linter for the --- frontmatter block: required-key checks, YYYY-MM-DD date validation, boolean/list type checks, duplicate-key and unterminated-block detection. Supports --dir recursion, --json, --quiet, and CI exit codes (0/1/2). Practical YAML subset parser (scalars, quoted strings, flow + block lists, booleans, numbers). Tested against the blog-drafts repo plus crafted bad-input fixtures. README section 6 + TOC added; all links verified. --- README.md | 34 +++++ frontmatter-lint.js | 313 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 347 insertions(+) create mode 100755 frontmatter-lint.js diff --git a/README.md b/README.md index e268f66..f868d35 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ A curated collection of highly robust, custom-built Node/Python automation scrip 3. [oss-contributor-log.py](#3-oss-contributor-logpy) 4. [markdown-toc.js](#4-markdown-tocjs) 5. [link-check.js](#5-link-checkjs) +6. [frontmatter-lint.js](#6-frontmatter-lintjs) --- @@ -150,6 +151,39 @@ Exports `slugify`, `cleanText`, `parseMarkdown`, `classify`, and `checkFile` for --- +## 6. `frontmatter-lint.js` +> **Catch broken YAML frontmatter before it breaks your site build or your content tracker.** + +A zero-dependency Node script that lints the `---` frontmatter block at the top of Markdown files. It's the third leg of the docs-quality set (`link-check.js` for links, `markdown-toc.js` for the TOC, this for metadata) β€” built for content repos where a typo'd `date` or a `tags` field that's secretly a string silently breaks a static-site build or a posting pipeline. + +### ⚑ Key Features: +* **Required-key checks**: Flags missing or empty keys (default `title,date`, configurable via `--require`). An empty `title: ""` fails too, not just an absent one. +* **Type validation**: `--date-keys` must be a real `YYYY-MM-DD` calendar date (rejects `2026-13-45`); `--bool-keys` must be a true boolean (catches the classic `draft: "true"` string); `--list` keys must be an actual array (catches `tags: ai, llm` that should be `[ai, llm]`). +* **Practical YAML subset**: Parses scalars, quoted strings, flow lists (`[a, b]`), block lists (`- item`), booleans, numbers, and dates. Duplicate keys, unterminated blocks, and unsupported nested maps are reported instead of silently mis-parsed. +* **Whole-tree mode**: `--dir ` recursively lints every `.md`/`.markdown` file (skips `.git`/`node_modules`). `--allow-missing` lets files without frontmatter pass. +* **CI / pre-commit ready**: Exit `0` (all clean), `1` (lint problems), or `2` (usage/IO error). `--json` for machine-readable output, `--quiet` to print only failures. +* **Zero dependencies**: Pure Node `fs` + `path`. Network-free and deterministic. + +### πŸš€ Usage: +```bash +# Lint one post +node frontmatter-lint.js post.md + +# Lint a whole content directory with a custom required set +node frontmatter-lint.js --dir blog --require title,date,tags,draft + +# Treat tags AND categories as lists; draft AND featured as booleans +node frontmatter-lint.js post.md --list tags,categories --bool-keys draft,featured + +# CI: fail the build on any frontmatter problem +node frontmatter-lint.js --dir content --require title,date --allow-missing +``` + +### πŸ“¦ Reusable functions: +Exports `extractFrontmatter`, `parseFrontmatter`, `coerceScalar`, `isValidIsoDate`, and `lintFile` for use in your own tooling via `require()`. + +--- + ## πŸ” Wire the docs checks into CI / pre-commit `link-check.js` and `markdown-toc.js` are most useful when they run automatically β€” so docs rot diff --git a/frontmatter-lint.js b/frontmatter-lint.js new file mode 100755 index 0000000..52926e4 --- /dev/null +++ b/frontmatter-lint.js @@ -0,0 +1,313 @@ +#!/usr/bin/env node +/** + * frontmatter-lint.js + * + * Lint the YAML frontmatter block at the top of Markdown files. + * + * Catches the boring mistakes that break static-site builds and content trackers: + * - missing required keys (e.g. title, date) + * - a date that isn't a real YYYY-MM-DD calendar date + * - a field that should be a list but is a bare string (e.g. tags: ai, llm) + * - a field that should be a boolean but is the string "true" + * - an empty title, an unterminated frontmatter block, duplicate keys + * + * Parses a practical subset of YAML β€” enough for real frontmatter: scalars, + * quoted strings, flow lists ([a, b]), block lists (- item), booleans, numbers, + * dates. Nested maps are reported as unsupported rather than silently mis-parsed. + * + * Zero dependencies. Network-free. Works on any Node >= 14. + * + * Usage: + * node frontmatter-lint.js post.md + * node frontmatter-lint.js posts/*.md --require title,date,tags + * node frontmatter-lint.js --dir blog --require title,date --list tags + * node frontmatter-lint.js post.md --json + * + * Exit codes: 0 = all clean 1 = lint problems found 2 = usage/IO error + */ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +function printHelp() { + console.log(`frontmatter-lint.js β€” lint YAML frontmatter in Markdown files + +Usage: + node frontmatter-lint.js [more files...] [options] + +Options: + --require Comma-separated keys that MUST be present and non-empty. + Default: title,date + --date-keys Comma-separated keys that must be a valid YYYY-MM-DD date. + Default: date + --bool-keys Comma-separated keys that must be a real boolean (not "true"). + Default: draft + --list Comma-separated keys that must be a list (array). + Default: tags + --dir Recursively lint every .md file under . + --allow-missing Files with no frontmatter block pass instead of failing. + --json Emit machine-readable JSON instead of text. + --quiet Print only files with problems (and the summary). + --help Show this help. + +Examples: + node frontmatter-lint.js README.md + node frontmatter-lint.js --dir blog-drafts --require title,date,tags,draft + node frontmatter-lint.js post.md --list tags,categories --bool-keys draft,featured +`); +} + +// ---- argument parsing ------------------------------------------------------- + +function parseArgs(argv) { + const opts = { + files: [], + require: ['title', 'date'], + dateKeys: ['date'], + boolKeys: ['draft'], + listKeys: ['tags'], + dir: null, + allowMissing: false, + json: false, + quiet: false, + }; + const csv = (v) => v.split(',').map((s) => s.trim()).filter(Boolean); + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + switch (a) { + case '--help': case '-h': opts.help = true; break; + case '--require': opts.require = csv(argv[++i] || ''); break; + case '--date-keys': opts.dateKeys = csv(argv[++i] || ''); break; + case '--bool-keys': opts.boolKeys = csv(argv[++i] || ''); break; + case '--list': opts.listKeys = csv(argv[++i] || ''); break; + case '--dir': opts.dir = argv[++i]; break; + case '--allow-missing': opts.allowMissing = true; break; + case '--json': opts.json = true; break; + case '--quiet': opts.quiet = true; break; + default: + if (a.startsWith('-')) { opts.unknown = a; } + else { opts.files.push(a); } + } + } + return opts; +} + +function walkMarkdown(dir, acc) { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + if (entry.name === '.git' || entry.name === 'node_modules') continue; + const full = path.join(dir, entry.name); + if (entry.isDirectory()) walkMarkdown(full, acc); + else if (entry.isFile() && /\.(md|markdown)$/i.test(entry.name)) acc.push(full); + } + return acc; +} + +// ---- frontmatter extraction ------------------------------------------------- + +/** + * Pull the frontmatter block delimited by `---` lines at the very top of the file. + * Returns { found, lines, startLine, error }. + */ +function extractFrontmatter(text) { + const lines = text.split(/\r?\n/); + if (lines[0] !== '---') return { found: false }; + for (let i = 1; i < lines.length; i++) { + if (lines[i] === '---' || lines[i] === '...') { + return { found: true, lines: lines.slice(1, i), startLine: 2 }; + } + } + return { found: true, error: 'frontmatter block opened with --- but never closed' }; +} + +// ---- minimal YAML subset parser --------------------------------------------- + +function stripQuotes(s) { + const t = s.trim(); + if (t.length >= 2 && ((t[0] === '"' && t.endsWith('"')) || (t[0] === "'" && t.endsWith("'")))) { + return t.slice(1, -1); + } + return t; +} + +function coerceScalar(raw) { + const t = raw.trim(); + if (t === '') return { type: 'empty', value: '' }; + if (t === 'true' || t === 'false') return { type: 'bool', value: t === 'true' }; + if (t === 'null' || t === '~') return { type: 'null', value: null }; + if (/^-?\d+(\.\d+)?$/.test(t)) return { type: 'number', value: Number(t) }; + // flow list: [a, b, c] + if (t.startsWith('[') && t.endsWith(']')) { + const inner = t.slice(1, -1).trim(); + const items = inner === '' ? [] : inner.split(',').map((x) => stripQuotes(x)); + return { type: 'list', value: items }; + } + return { type: 'string', value: stripQuotes(t) }; +} + +/** + * Parse the frontmatter lines into { data, problems } where problems are + * structural parse issues (duplicate keys, bad indentation, nested maps). + */ +function parseFrontmatter(fmLines, startLine) { + const data = {}; + const problems = []; + let currentListKey = null; + + for (let i = 0; i < fmLines.length; i++) { + const ln = startLine + i; + const line = fmLines[i]; + if (line.trim() === '' || line.trim().startsWith('#')) continue; + + // block-list item belonging to the previous key + const listItem = line.match(/^(\s*)-\s+(.*)$/); + if (listItem && currentListKey) { + data[currentListKey].value.push(stripQuotes(listItem[2])); + data[currentListKey].type = 'list'; + continue; + } + + const kv = line.match(/^(\s*)([^:\s][^:]*):\s?(.*)$/); + if (!kv) { + problems.push({ line: ln, msg: `could not parse line: ${JSON.stringify(line)}` }); + continue; + } + const indent = kv[1].length; + const key = kv[2].trim(); + const rest = kv[3]; + + if (indent > 0) { + problems.push({ line: ln, msg: `nested/indented key "${key}" β€” nested maps aren't supported in frontmatter linting` }); + continue; + } + if (Object.prototype.hasOwnProperty.call(data, key)) { + problems.push({ line: ln, msg: `duplicate key "${key}"` }); + } + + if (rest.trim() === '') { + // could be the start of a block list, or an empty scalar + data[key] = { type: 'empty', value: [], line: ln }; + data[key].value = []; + currentListKey = key; + } else { + const sc = coerceScalar(rest); + data[key] = { type: sc.type, value: sc.value, line: ln }; + currentListKey = null; + } + } + return { data, problems }; +} + +// ---- validation ------------------------------------------------------------- + +function isValidIsoDate(s) { + if (typeof s !== 'string') return false; + const m = s.match(/^(\d{4})-(\d{2})-(\d{2})$/); + if (!m) return false; + const [y, mo, d] = [Number(m[1]), Number(m[2]), Number(m[3])]; + const dt = new Date(Date.UTC(y, mo - 1, d)); + return dt.getUTCFullYear() === y && dt.getUTCMonth() === mo - 1 && dt.getUTCDate() === d; +} + +function lintFile(file, opts) { + const issues = []; + let text; + try { + text = fs.readFileSync(file, 'utf8'); + } catch (e) { + return { file, ok: false, issues: [{ msg: `cannot read file: ${e.message}` }] }; + } + + const fm = extractFrontmatter(text); + if (!fm.found) { + if (opts.allowMissing) return { file, ok: true, issues: [], skipped: true }; + return { file, ok: false, issues: [{ msg: 'no frontmatter block (expected --- at line 1)' }] }; + } + if (fm.error) return { file, ok: false, issues: [{ line: 1, msg: fm.error }] }; + + const { data, problems } = parseFrontmatter(fm.lines, fm.startLine); + for (const p of problems) issues.push(p); + + // required keys present + non-empty + for (const key of opts.require) { + const node = data[key]; + if (!node) { issues.push({ msg: `missing required key "${key}"` }); continue; } + const empty = node.type === 'empty' || + (node.type === 'string' && node.value === '') || + (node.type === 'list' && node.value.length === 0); + if (empty) issues.push({ line: node.line, msg: `required key "${key}" is empty` }); + } + + // date keys + for (const key of opts.dateKeys) { + const node = data[key]; + if (!node) continue; + if (!isValidIsoDate(String(node.value))) { + issues.push({ line: node.line, msg: `"${key}" is not a valid YYYY-MM-DD date: ${JSON.stringify(node.value)}` }); + } + } + + // boolean keys + for (const key of opts.boolKeys) { + const node = data[key]; + if (!node) continue; + if (node.type !== 'bool') { + issues.push({ line: node.line, msg: `"${key}" should be a boolean (true/false), got ${node.type}: ${JSON.stringify(node.value)}` }); + } + } + + // list keys + for (const key of opts.listKeys) { + const node = data[key]; + if (!node) continue; + if (node.type !== 'list') { + issues.push({ line: node.line, msg: `"${key}" should be a list, got ${node.type}: ${JSON.stringify(node.value)}` }); + } + } + + return { file, ok: issues.length === 0, issues, data }; +} + +// ---- main ------------------------------------------------------------------- + +function main() { + const opts = parseArgs(process.argv.slice(2)); + if (opts.help) { printHelp(); process.exit(0); } + if (opts.unknown) { console.error(`Unknown option: ${opts.unknown}\n`); printHelp(); process.exit(2); } + + let files = opts.files.slice(); + if (opts.dir) { + try { walkMarkdown(opts.dir, files); } + catch (e) { console.error(`Cannot read --dir ${opts.dir}: ${e.message}`); process.exit(2); } + } + if (files.length === 0) { console.error('No files given. Pass file paths or --dir .\n'); printHelp(); process.exit(2); } + // de-dup while preserving order + files = [...new Set(files)]; + + const results = files.map((f) => lintFile(f, opts)); + const failed = results.filter((r) => !r.ok); + + if (opts.json) { + console.log(JSON.stringify({ total: results.length, failed: failed.length, results }, null, 2)); + process.exit(failed.length ? 1 : 0); + } + + for (const r of results) { + if (r.ok) { + if (!opts.quiet) console.log(`βœ“ ${r.file}${r.skipped ? ' (no frontmatter, allowed)' : ''}`); + continue; + } + console.log(`βœ— ${r.file}`); + for (const it of r.issues) { + console.log(` ${it.line ? `line ${it.line}: ` : ''}${it.msg}`); + } + } + + console.log(`\n${results.length - failed.length}/${results.length} file(s) clean.`); + process.exit(failed.length ? 1 : 0); +} + +if (require.main === module) main(); + +module.exports = { extractFrontmatter, parseFrontmatter, coerceScalar, isValidIsoDate, lintFile }; From e47e2a2c57fc63f6516f30f9a5a5aedecc831e7e Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 28 Jun 2026 12:46:31 +0000 Subject: [PATCH 6/8] Add heading-lint.js: lint Markdown heading structure (duplicate slugs, skipped levels) --- README.md | 37 ++++++ heading-lint.js | 314 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 351 insertions(+) create mode 100644 heading-lint.js diff --git a/README.md b/README.md index f868d35..acb4c44 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ A curated collection of highly robust, custom-built Node/Python automation scrip 4. [markdown-toc.js](#4-markdown-tocjs) 5. [link-check.js](#5-link-checkjs) 6. [frontmatter-lint.js](#6-frontmatter-lintjs) +7. [heading-lint.js](#7-heading-lintjs) --- @@ -184,6 +185,42 @@ Exports `extractFrontmatter`, `parseFrontmatter`, `coerceScalar`, `isValidIsoDat --- +## 7. `heading-lint.js` +> **Catch the heading-structure bugs that silently break your README's anchors and table of contents.** + +A zero-dependency Node script that lints the heading *structure* of Markdown files. It's the upstream companion to `link-check.js`: link-check tells you a `#anchor` is broken; heading-lint tells you **why** β€” almost always a duplicate heading or a skipped level. It uses the **same** GitHub slug algorithm and the **same** code-fence skipping as `markdown-toc.js` and `link-check.js`, so its idea of a "duplicate anchor" matches exactly what GitHub (and those tools) compute. + +### ⚑ Key Features: +* **Duplicate-slug detection**: Two headings that resolve to the same GitHub anchor (e.g. two `## Setup`s). GitHub silently renames the second to `setup-1`, so a `[link](#setup)` lands on the wrong heading β€” the #1 cause of "the link looks right but jumps to the wrong place." +* **Skipped-level detection**: Flags outline jumps like H2 β†’ H4 (skipping H3) that break the document structure and most TOC generators. +* **H1 hygiene**: Flags multiple H1s (`--allow-multiple-h1` to disable) and missing H1 (`--no-require-h1` to disable). `--require-h1` additionally requires the *first* heading to be the H1. +* **Empty-heading detection**: Catches a bare marker (`## `) with no text. +* **CI / pre-commit ready**: Exit `0` (clean), `1` (problems), `2` (usage/IO error). `--json` for machine-readable output, `--quiet` to print only failures. +* **Zero dependencies**: Pure Node `fs`. Network-free and deterministic. + +### πŸš€ Usage: +```bash +# Lint one file +node heading-lint.js README.md + +# Lint several, machine-readable +node heading-lint.js README.md docs/*.md --json + +# A partial/included doc that legitimately has no top-level H1 +node heading-lint.js CHANGELOG.md --no-require-h1 + +# CI: fail the build on any heading problem +node heading-lint.js README.md +``` + +### πŸ• Dogfood note (honest output): +Run it on *this* README and it reports the repeated `⚑ Key Features:` / `πŸš€ Usage:` / `πŸ“¦ Reusable functions:` subsection labels as `duplicate-slug`. That's the tool working correctly β€” those headings really do collide into the same anchors. They're harmless *here* only because the Table of Contents links to the numbered section titles (`#1-auto-classify-projectsjs`), never the bare labels. The moment anyone writes `[see usage](#-usage)`, it would resolve to the first one. The lesson the tool is teaching: decorative repeated headings are a latent anchor bug β€” make subsection headings unique if anything links to them. + +### πŸ“¦ Reusable functions: +Exports `slugify`, `cleanText`, `extractHeadings`, and `lintHeadings` for use in your own tooling via `require()`. + +--- + ## πŸ” Wire the docs checks into CI / pre-commit `link-check.js` and `markdown-toc.js` are most useful when they run automatically β€” so docs rot diff --git a/heading-lint.js b/heading-lint.js new file mode 100644 index 0000000..cc1a20f --- /dev/null +++ b/heading-lint.js @@ -0,0 +1,314 @@ +#!/usr/bin/env node +/** + * heading-lint.js + * + * Lint the heading STRUCTURE of Markdown files β€” the problems that quietly break + * a README's table of contents and in-page anchors. It's the upstream companion + * to link-check.js: link-check tells you a `#anchor` is broken; this tells you + * WHY β€” usually a duplicate heading or a missing level the slugger mangled. + * + * Checks (each can be toggled off): + * + * 1. duplicate-slug Two headings that produce the SAME GitHub anchor slug. + * GitHub disambiguates the second as `name-1`, so a + * [link](#name) silently lands on the wrong one β€” or your + * hand-written TOC points at an anchor that moved. This is + * the single most common cause of a "link looks right but + * jumps to the wrong place" bug. + * 2. skipped-level A jump like H2 -> H4 (skipping H3). Breaks document + * outline / accessibility and most TOC generators. + * 3. multiple-h1 More than one H1 in a file. A README should have exactly + * one title. (Toggle with --allow-multiple-h1.) + * 4. no-h1 File has no H1 at all. (Toggle with --no-require-h1.) + * 5. empty-heading A heading marker with no text (`## `). + * 6. first-not-h1 The first heading isn't an H1 (starts at H2+). Off by + * default for non-README docs; enable with --require-h1. + * + * It uses the SAME slug algorithm and the SAME code-fence skipping as + * markdown-toc.js / link-check.js, so its idea of "duplicate anchor" matches + * exactly what those tools (and GitHub) compute. + * + * Zero dependencies. Works on any Node >= 14. + * + * Usage: + * node heading-lint.js README.md # lint one file + * node heading-lint.js README.md docs/*.md # lint several + * node heading-lint.js README.md --json # machine-readable report + * node heading-lint.js docs/page.md --no-require-h1 # don't flag missing H1 + * node heading-lint.js README.md --allow-multiple-h1 + * node heading-lint.js --help + * + * Exit codes (CI / pre-commit friendly): + * 0 no heading problems + * 1 one or more problems found + * 2 usage error (no files, missing file, bad flag) + */ + +'use strict'; + +const fs = require('fs'); + +function printHelp() { + console.log(`heading-lint.js β€” lint Markdown heading structure + +Usage: + node heading-lint.js [more.md ...] [options] + +Options: + --json Emit a JSON report instead of human-readable text. + --quiet Only print problems (nothing on a clean file). + --require-h1 Require the FIRST heading to be an H1 (default: only + require that an H1 exists somewhere). + --no-require-h1 Don't require an H1 at all (good for partial/included docs). + --allow-multiple-h1 Don't flag a file that has more than one H1. + --help Show this help. + +Checks: duplicate-slug, skipped-level, multiple-h1, no-h1, empty-heading, + first-not-h1 (only with --require-h1). + +Exit codes: + 0 clean + 1 problem(s) found + 2 usage error + +Examples: + node heading-lint.js README.md + node heading-lint.js README.md docs/*.md --json + node heading-lint.js CHANGELOG.md --no-require-h1`); +} + +/** + * GitHub's anchor-slug algorithm β€” identical to markdown-toc.js / link-check.js: + * lowercase, strip anything that isn't a word char / space / hyphen, spaces -> hyphens + * (no run-collapsing), then disambiguate duplicates as base, base-1, base-2, ... + */ +function slugify(text, seen) { + let slug = text + .trim() + .toLowerCase() + .replace(/[^\w\s-]/g, '') + .replace(/ /g, '-'); + const base = slug; + const n = seen.get(base) || 0; + if (n > 0) slug = `${base}-${n}`; + seen.set(base, n + 1); + return slug; +} + +/** Strip inline markdown so the displayed heading text is clean for slugging. */ +function cleanText(text) { + return text + .replace(/\[([^\]]+)\]\([^)]*\)/g, '$1') // [label](url) -> label + .replace(/[`*_~]/g, '') // code/bold/italic markers + .trim(); +} + +/** + * Extract ATX headings, skipping fenced code blocks (``` or ~~~). + * Returns [{ level, text, line }] (line is 1-based). + */ +function extractHeadings(markdown) { + const lines = markdown.split('\n'); + const headings = []; + let fence = null; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const fenceMatch = line.match(/^\s*(```+|~~~+)/); + if (fenceMatch) { + const marker = fenceMatch[1][0]; + if (fence === null) fence = marker; + else if (fence === marker) fence = null; + continue; + } + if (fence !== null) continue; + + const h = line.match(/^(#{1,6})(\s+.*?)?\s*#*\s*$/); + if (!h) continue; + const level = h[1].length; + const text = (h[2] || '').trim(); + headings.push({ level, text, line: i + 1 }); + } + return headings; +} + +/** + * Lint one file's heading list. Returns an array of problem objects: + * { rule, line, level, text, message }. + */ +function lintHeadings(markdown, opts) { + const o = Object.assign( + { requireH1: true, firstMustBeH1: false, allowMultipleH1: false }, + opts + ); + const headings = extractHeadings(markdown); + const problems = []; + const seenSlug = new Map(); // base-slug -> count, mirrors GitHub's slugger + const slugFirstSeen = new Map(); // base-slug -> first heading that produced it + + let h1Count = 0; + let prevLevel = null; + + headings.forEach((h, idx) => { + // empty-heading + if (!h.text) { + problems.push({ + rule: 'empty-heading', + line: h.line, + level: h.level, + text: '', + message: `Empty H${h.level} heading (marker with no text)`, + }); + return; // nothing else to check on an empty heading + } + + const display = cleanText(h.text); + + // duplicate-slug β€” compute the base slug the way GitHub does, before suffixing + const base = display + .trim() + .toLowerCase() + .replace(/[^\w\s-]/g, '') + .replace(/ /g, '-'); + if (slugFirstSeen.has(base)) { + const first = slugFirstSeen.get(base); + problems.push({ + rule: 'duplicate-slug', + line: h.line, + level: h.level, + text: display, + message: `Duplicate heading slug "#${base}" (first used at line ${first.line}: "${first.text}"). GitHub will rename this anchor to "#${base}-${seenSlug.get(base)}" β€” links to "#${base}" may resolve to the wrong heading.`, + }); + } else { + slugFirstSeen.set(base, { line: h.line, text: display }); + } + slugify(display, seenSlug); // advance the disambiguation counter + + // h1 bookkeeping + if (h.level === 1) h1Count++; + + // first-not-h1 + if (idx === 0 && o.firstMustBeH1 && h.level !== 1) { + problems.push({ + rule: 'first-not-h1', + line: h.line, + level: h.level, + text: display, + message: `First heading is H${h.level}, expected H1`, + }); + } + + // skipped-level (only on the way DOWN; jumping back up any number of levels is fine) + if (prevLevel !== null && h.level > prevLevel + 1) { + problems.push({ + rule: 'skipped-level', + line: h.line, + level: h.level, + text: display, + message: `Heading level jumps from H${prevLevel} to H${h.level} (skips H${prevLevel + 1})`, + }); + } + prevLevel = h.level; + }); + + // multiple-h1 + if (!o.allowMultipleH1 && h1Count > 1) { + const firstExtra = headings.filter((h) => h.level === 1)[1]; + problems.push({ + rule: 'multiple-h1', + line: firstExtra ? firstExtra.line : 1, + level: 1, + text: firstExtra ? cleanText(firstExtra.text) : '', + message: `File has ${h1Count} H1 headings; expected exactly one title`, + }); + } + + // no-h1 + if (o.requireH1 && h1Count === 0) { + problems.push({ + rule: 'no-h1', + line: 1, + level: 0, + text: '', + message: 'File has no H1 heading (no top-level title)', + }); + } + + return problems; +} + +function parseArgs(argv) { + const opts = { + json: false, + quiet: false, + requireH1: true, + firstMustBeH1: false, + allowMultipleH1: false, + help: false, + }; + const files = []; + for (const arg of argv) { + switch (arg) { + case '--json': opts.json = true; break; + case '--quiet': opts.quiet = true; break; + case '--require-h1': opts.firstMustBeH1 = true; opts.requireH1 = true; break; + case '--no-require-h1': opts.requireH1 = false; opts.firstMustBeH1 = false; break; + case '--allow-multiple-h1': opts.allowMultipleH1 = true; break; + case '--help': case '-h': opts.help = true; break; + default: + if (arg.startsWith('-')) { opts.badFlag = arg; return { opts, files }; } + files.push(arg); + } + } + return { opts, files }; +} + +function main() { + const { opts, files } = parseArgs(process.argv.slice(2)); + + if (opts.help) { printHelp(); process.exit(0); } + if (opts.badFlag) { console.error(`Unknown flag: ${opts.badFlag}\n`); printHelp(); process.exit(2); } + if (files.length === 0) { console.error('Error: no Markdown files given.\n'); printHelp(); process.exit(2); } + + const report = []; + let totalProblems = 0; + let usageError = false; + + for (const file of files) { + let markdown; + try { + markdown = fs.readFileSync(file, 'utf8'); + } catch (e) { + console.error(`Error: cannot read ${file} (${e.code || e.message})`); + usageError = true; + continue; + } + const problems = lintHeadings(markdown, opts); + totalProblems += problems.length; + report.push({ file, problems }); + } + + if (opts.json) { + console.log(JSON.stringify({ ok: totalProblems === 0 && !usageError, usageError, files: report }, null, 2)); + } else { + for (const { file, problems } of report) { + if (problems.length === 0) { + if (!opts.quiet) console.log(`βœ“ ${file} β€” heading structure clean`); + continue; + } + console.log(`βœ— ${file} β€” ${problems.length} problem(s):`); + for (const p of problems) { + const where = p.line ? `line ${p.line}` : 'β€”'; + console.log(` [${p.rule}] ${where}: ${p.message}`); + } + } + if (!opts.quiet && totalProblems === 0) console.log('\nAll heading checks passed.'); + } + + if (usageError) process.exit(2); + process.exit(totalProblems === 0 ? 0 : 1); +} + +if (require.main === module) main(); + +module.exports = { slugify, cleanText, extractHeadings, lintHeadings }; From 23f14f28964522aa8c7f1aa5f79d9066a2e0b04e Mon Sep 17 00:00:00 2001 From: "Pradyoth (via Claude Code)" Date: Mon, 29 Jun 2026 12:45:10 +0000 Subject: [PATCH 7/8] Add table-fmt.js: align GFM tables (gofmt for Markdown tables), zero-dep, --write/--check --- README.md | 37 ++++++ table-fmt.js | 336 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 373 insertions(+) create mode 100755 table-fmt.js diff --git a/README.md b/README.md index acb4c44..eff86d0 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ A curated collection of highly robust, custom-built Node/Python automation scrip 5. [link-check.js](#5-link-checkjs) 6. [frontmatter-lint.js](#6-frontmatter-lintjs) 7. [heading-lint.js](#7-heading-lintjs) +8. [table-fmt.js](#8-table-fmtjs) --- @@ -221,6 +222,42 @@ Exports `slugify`, `cleanText`, `extractHeadings`, and `lintHeadings` for use in --- +## 8. `table-fmt.js` +> **`gofmt` for your Markdown tables β€” align every column so the raw source is readable and diffs stay clean.** + +A zero-dependency Node script that reformats GitHub-Flavored Markdown tables: it pads every column to an even width and rewrites the delimiter row to honor each column's alignment. The rendered HTML is unchanged β€” this only fixes the source whitespace, the single most tedious thing to maintain by hand after you edit a cell. It's the formatting companion to the lint family above: those tell you something is wrong; this one quietly fixes it. + +### ⚑ Key Features: +* **Column alignment**: Pads each column to its widest cell (min width 3) and justifies body cells **left** (`:---`), **right** (`---:`), **center** (`:--:`), or default per the delimiter row. +* **Ragged-row repair**: Pads short rows with empty cells and drops extras to the header's column count β€” exactly how GitHub renders a ragged table. +* **Code-fence aware**: Tables inside fenced blocks (```` ``` ```` / `~~~`) are left untouched, using the same fence-skipping as the other scripts here. +* **Idempotent**: Formatting twice produces byte-identical output β€” safe to run in a loop or a hook. +* **CI / pre-commit ready**: `--check` exits `1` if any file isn't already formatted; `--write` fixes in place; `--json` for machine-readable reports. Reads stdin with `-`. +* **Zero dependencies**: Pure Node `fs`. Network-free and deterministic. + +### πŸš€ Usage: +```bash +# Print a formatted copy to stdout +node table-fmt.js README.md + +# Rewrite files in place +node table-fmt.js README.md docs/*.md --write + +# CI: fail if any table isn't aligned +node table-fmt.js *.md --check + +# Pipe through it +cat doc.md | node table-fmt.js - +``` + +### ⚠️ Honest limitation: +Column width is measured in Unicode **code points**, not terminal display columns. Wide CJK characters and emoji take two cells in a monospace editor, so a table full of them can look slightly off even when the tool considers it aligned. ASCII tables β€” the common case β€” align exactly. + +### πŸ“¦ Reusable functions: +Exports `splitCells`, `isDelimiterRow`, `alignmentOf`, `formatTables`, and `formatOneTable` for use in your own tooling via `require()`. + +--- + ## πŸ” Wire the docs checks into CI / pre-commit `link-check.js` and `markdown-toc.js` are most useful when they run automatically β€” so docs rot diff --git a/table-fmt.js b/table-fmt.js new file mode 100755 index 0000000..8736b17 --- /dev/null +++ b/table-fmt.js @@ -0,0 +1,336 @@ +#!/usr/bin/env node +/** + * table-fmt.js + * + * Format (align) GitHub-Flavored Markdown tables β€” `gofmt` for the pipe tables + * in your README. Pads every column to an even width and renders the delimiter + * row to match each column's declared alignment, so the raw Markdown is + * readable in an editor and diffs stay clean. The rendered HTML is unchanged; + * this only touches the source whitespace. + * + * It's the formatting companion to the lint family in this repo + * (markdown-toc.js / link-check.js / heading-lint.js): those tell you something + * is wrong; this one quietly fixes the most tedious thing by hand β€” keeping a + * table's columns aligned after you edit a cell. + * + * What it does to each table: + * - Trims and re-pads every cell so column borders line up. + * - Pads each column to the widest cell in that column (min width 3). + * - Rewrites the delimiter row to honor alignment: left (`:---`), + * right (`---:`), center (`:--:`), or none (`---`). + * - Justifies body cells to their column's alignment (left/right/center). + * - Pads short rows with empty cells and drops extra cells to the header's + * column count β€” exactly how GitHub renders a ragged table. + * - Leaves tables inside fenced code blocks (``` / ~~~) untouched. + * - Is idempotent: formatting twice produces identical output. + * + * Honest limitation: column width is measured in Unicode code points + * (`Array.from(s).length`), not terminal display columns. Wide CJK characters + * and emoji occupy two cells in a monospace editor, so a table full of them may + * look slightly off even when this tool considers it aligned. ASCII tables β€” + * the overwhelming common case β€” align exactly. + * + * Zero dependencies. Works on any Node >= 14. + * + * Usage: + * node table-fmt.js README.md # print formatted README to stdout + * node table-fmt.js README.md --write # rewrite README.md in place + * node table-fmt.js *.md --write # format many files in place + * node table-fmt.js README.md --check # exit 1 if it isn't already formatted + * node table-fmt.js *.md --check --json # machine-readable check report + * cat doc.md | node table-fmt.js - # read stdin, write formatted to stdout + * node table-fmt.js --help + * + * Exit codes (CI / pre-commit friendly): + * 0 success β€” formatted (stdout/--write), or --check found nothing to change + * 1 --check found files that need formatting + * 2 usage error (no files, missing file, bad flag) + */ + +'use strict'; + +const fs = require('fs'); + +// --- core: split a table row into trimmed cells, respecting escaped pipes --- +function splitCells(line) { + let s = line.trim(); + // Optional leading/trailing pipe. A trailing pipe is only a border if it + // isn't escaped (`\|`). + if (s.startsWith('|')) s = s.slice(1); + if (s.endsWith('|') && !s.endsWith('\\|')) s = s.slice(0, -1); + + const cells = []; + let buf = ''; + for (let i = 0; i < s.length; i++) { + const c = s[i]; + if (c === '\\' && i + 1 < s.length) { + // keep escape sequences (e.g. \| ) intact inside the cell + buf += c + s[i + 1]; + i++; + continue; + } + if (c === '|') { + cells.push(buf.trim()); + buf = ''; + continue; + } + buf += c; + } + cells.push(buf.trim()); + return cells; +} + +// A delimiter cell looks like ---, :---, ---:, or :--: +function isDelimiterCell(cell) { + return /^:?-+:?$/.test(cell.trim()); +} + +// Is this line the delimiter (separator) row of a table? +function isDelimiterRow(line) { + if (!line.includes('|') && !/^[\s:|-]+$/.test(line)) return false; + const cells = splitCells(line); + if (cells.length === 0) return false; + return cells.every((c) => isDelimiterCell(c)); +} + +// A plausible table row contains at least one unescaped pipe. +function looksLikeRow(line) { + const s = line.trim(); + if (s === '') return false; + // strip escaped pipes, then require a real pipe somewhere + return s.replace(/\\\|/g, '').includes('|'); +} + +function alignmentOf(delimCell) { + const c = delimCell.trim(); + const left = c.startsWith(':'); + const right = c.endsWith(':'); + if (left && right) return 'center'; + if (right) return 'right'; + if (left) return 'left'; + return 'none'; +} + +const cpLen = (s) => Array.from(s).length; + +function padCell(cell, width, align) { + const pad = width - cpLen(cell); + if (pad <= 0) return cell; + if (align === 'right') return ' '.repeat(pad) + cell; + if (align === 'center') { + const l = Math.floor(pad / 2); + return ' '.repeat(l) + cell + ' '.repeat(pad - l); + } + return cell + ' '.repeat(pad); // left / none +} + +function renderDelimiter(width, align) { + const w = Math.max(width, 3); + switch (align) { + case 'left': + return ':' + '-'.repeat(w - 1); + case 'right': + return '-'.repeat(w - 1) + ':'; + case 'center': + return ':' + '-'.repeat(w - 2) + ':'; + default: + return '-'.repeat(w); + } +} + +// Format one table given its raw lines (header, delimiter, ...body). +// `indent` is the leading whitespace to preserve on every emitted line. +function formatOneTable(rawLines, indent) { + const header = splitCells(rawLines[0]); + const delims = splitCells(rawLines[1]); + const cols = header.length; + const aligns = []; + for (let i = 0; i < cols; i++) aligns.push(alignmentOf(delims[i] || '---')); + + // Normalize every body row to exactly `cols` cells (pad short, drop extra). + const bodyRows = rawLines.slice(2).map((l) => { + const cells = splitCells(l); + while (cells.length < cols) cells.push(''); + cells.length = cols; + return cells; + }); + + // Column widths from header + body (delimiter handled by min-3 in render). + const widths = []; + for (let i = 0; i < cols; i++) { + let w = cpLen(header[i] || ''); + for (const row of bodyRows) w = Math.max(w, cpLen(row[i])); + widths[i] = Math.max(w, 3); + } + + const out = []; + out.push( + indent + '| ' + header.map((c, i) => padCell(c, widths[i], aligns[i])).join(' | ') + ' |' + ); + out.push(indent + '| ' + widths.map((w, i) => renderDelimiter(w, aligns[i])).join(' | ') + ' |'); + for (const row of bodyRows) { + out.push(indent + '| ' + row.map((c, i) => padCell(c, widths[i], aligns[i])).join(' | ') + ' |'); + } + return out; +} + +// Main pass: find tables outside fenced code blocks and reformat them. +function formatTables(content) { + const newline = content.includes('\r\n') ? '\r\n' : '\n'; + const lines = content.split(/\r?\n/); + const out = []; + let inFence = false; + let fenceMarker = ''; + let tableCount = 0; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const trimmed = line.trim(); + + // Track fenced code blocks (``` or ~~~). Tables inside are left alone. + const fenceMatch = trimmed.match(/^(```+|~~~+)/); + if (fenceMatch) { + if (!inFence) { + inFence = true; + fenceMarker = fenceMatch[1][0]; + } else if (trimmed.startsWith(fenceMarker.repeat(3)) || trimmed[0] === fenceMarker) { + inFence = false; + fenceMarker = ''; + } + out.push(line); + continue; + } + if (inFence) { + out.push(line); + continue; + } + + // A table starts when this line is a row and the next is a delimiter row. + if ( + looksLikeRow(line) && + i + 1 < lines.length && + isDelimiterRow(lines[i + 1]) && + splitCells(lines[i + 1]).length === splitCells(line).length + ) { + const indent = (line.match(/^\s*/) || [''])[0]; + const block = [line, lines[i + 1]]; + let j = i + 2; + while (j < lines.length && looksLikeRow(lines[j]) && !isDelimiterRow(lines[j])) { + block.push(lines[j]); + j++; + } + out.push(...formatOneTable(block, indent)); + tableCount++; + i = j - 1; + continue; + } + + out.push(line); + } + + const output = out.join(newline); + return { output, changed: output !== content, tableCount }; +} + +// ---------------------------- CLI ---------------------------- +function printHelp() { + console.log(`table-fmt.js β€” align GitHub-Flavored Markdown tables (zero deps) + +Usage: + node table-fmt.js [more.md ...] [options] + cat file.md | node table-fmt.js - + +Options: + --write Rewrite files in place (default prints formatted output to stdout) + --check Don't write; exit 1 if any file isn't already formatted + --json Machine-readable report (with --check) + --help Show this help + +Exit codes: 0 ok | 1 --check found unformatted files | 2 usage error`); +} + +function fail(msg) { + console.error('table-fmt: ' + msg); + process.exit(2); +} + +function main(argv) { + const args = argv.slice(2); + if (args.includes('--help') || args.includes('-h')) { + printHelp(); + return 0; + } + + const write = args.includes('--write'); + const check = args.includes('--check'); + const json = args.includes('--json'); + const files = args.filter((a) => !a.startsWith('--')); + + for (const a of args) { + if (a.startsWith('--') && !['--write', '--check', '--json'].includes(a)) { + fail(`unknown flag: ${a}`); + } + } + if (write && check) fail('--write and --check are mutually exclusive'); + if (files.length === 0) fail('no input files (pass a .md file or - for stdin)'); + + // stdin mode + if (files.length === 1 && files[0] === '-') { + const content = fs.readFileSync(0, 'utf8'); + process.stdout.write(formatTables(content).output); + return 0; + } + + const report = []; + let needsFormatting = false; + + for (const file of files) { + let content; + try { + content = fs.readFileSync(file, 'utf8'); + } catch (e) { + fail(`cannot read ${file}: ${e.message}`); + } + const { output, changed, tableCount } = formatTables(content); + report.push({ file, tables: tableCount, changed }); + + if (check) { + if (changed) needsFormatting = true; + } else if (write) { + if (changed) { + fs.writeFileSync(file, output); + if (!json) console.log(`formatted ${file} (${tableCount} table${tableCount === 1 ? '' : 's'})`); + } else if (!json) { + console.log(`unchanged ${file}`); + } + } else { + // default: print formatted output to stdout + process.stdout.write(output); + } + } + + if (json) { + console.log(JSON.stringify({ files: report, needsFormatting }, null, 2)); + } else if (check) { + for (const r of report) { + console.log(`${r.changed ? 'WOULD FORMAT' : 'ok '} ${r.file} (${r.tables} table${r.tables === 1 ? '' : 's'})`); + } + if (needsFormatting) { + console.log('\nSome files are not formatted. Run with --write to fix.'); + } + } + + return check && needsFormatting ? 1 : 0; +} + +if (require.main === module) { + process.exit(main(process.argv)); +} + +module.exports = { + splitCells, + isDelimiterRow, + alignmentOf, + formatTables, + formatOneTable, +}; From 8c41beb2487ecd30fa87419b566dbfa38dd78d24 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 30 Jun 2026 12:39:38 +0000 Subject: [PATCH 8/8] Add code-fence-lint.js: lint fenced code blocks (unclosed, missing language, mismatched) --- README.md | 41 +++++++ code-fence-lint.js | 277 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 318 insertions(+) create mode 100644 code-fence-lint.js diff --git a/README.md b/README.md index eff86d0..91bf5f8 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ A curated collection of highly robust, custom-built Node/Python automation scrip 6. [frontmatter-lint.js](#6-frontmatter-lintjs) 7. [heading-lint.js](#7-heading-lintjs) 8. [table-fmt.js](#8-table-fmtjs) +9. [code-fence-lint.js](#9-code-fence-lintjs) --- @@ -258,6 +259,46 @@ Exports `splitCells`, `isDelimiterRow`, `alignmentOf`, `formatTables`, and `form --- +## 9. `code-fence-lint.js` +> **Catch the unclosed code fence that silently turns the rest of your README into one grey blob.** + +A zero-dependency Node script that lints fenced code blocks in Markdown. Its headline catch is the **unclosed fence** β€” a ```` ``` ```` that opens but never closes, swallowing everything after it into a single code block on GitHub. It tracks fences the same way the rest of the lint family does, but goes deeper: it understands fence *length* and *character*, so a ```` ```` ```` (4-backtick) block legitimately containing ```` ``` ```` lines, and a ```` ``` ```` block containing `~~~` lines, are parsed correctly instead of false-flagged. + +### ⚑ Key Features: +* **Unclosed-fence detection**: Flags any fence that opens but never closes before end of file β€” the #1 cause of "why is half my README a code block?" Always an error (exit `1`). +* **Length & character aware**: A closing fence must use the **same** character (`` ` `` or `~`) and be **at least as long** as the opener. This makes longer outer fences (```` ```` ````) that wrap shorter inner fences (```` ``` ````) work, and catches the mismatched case where the only candidate closer was too short to actually close the block. +* **Missing-language warnings**: An opening fence with no language tag (```` ``` ```` vs ```` ```js ````) is reported as a **warning** (no syntax highlighting). Promote it to an error with `--strict-language`, or silence it entirely with `--no-require-language`. +* **Tildes too**: `~~~` fences are handled exactly like backtick fences, including the cross-character rule (a `~~~` line inside a ```` ``` ```` block is content, not a close). +* **No false positives from inline code**: Only a line that *starts* (after ≀3 spaces, CommonMark-style) with a fence run counts β€” inline `` `code` `` is never mistaken for a fence. Fences inside list items work. +* **CI / pre-commit ready**: Exit `0` (clean β€” warnings alone don't fail), `1` (errors), or `2` (usage/IO error). `--json` for machine-readable output, `--quiet` to print only failures. +* **Zero dependencies**: Pure Node `fs`. Network-free and deterministic. + +### πŸš€ Usage: +```bash +# Lint one file +node code-fence-lint.js README.md + +# Lint several, machine-readable +node code-fence-lint.js README.md docs/*.md --json + +# Make a missing language tag fail the build, not just warn +node code-fence-lint.js README.md --strict-language + +# Don't care about language tags β€” only catch unclosed fences +node code-fence-lint.js README.md --no-require-language + +# CI: fail the build on any unclosed/mismatched fence +node code-fence-lint.js README.md +``` + +### ⚠️ Honest limitation: +There's no full CommonMark block parser. A ```` ``` ```` that lives inside an *indented* (4-space) code block or an unusually-nested blockquote may be read as a real fence. The common cases β€” top-level fences and fences inside list items β€” are handled correctly. + +### πŸ“¦ Reusable functions: +Exports `matchFence` and `lintFences` for use in your own tooling via `require()`. + +--- + ## πŸ” Wire the docs checks into CI / pre-commit `link-check.js` and `markdown-toc.js` are most useful when they run automatically β€” so docs rot diff --git a/code-fence-lint.js b/code-fence-lint.js new file mode 100644 index 0000000..07f8cb5 --- /dev/null +++ b/code-fence-lint.js @@ -0,0 +1,277 @@ +#!/usr/bin/env node +/** + * code-fence-lint.js + * + * Lint fenced code blocks in Markdown files β€” the structural mistakes that wreck + * how a README renders on GitHub. The classic one: an unclosed fence that turns the + * entire rest of your document into a giant grey code block. This is the formatting + * companion to the rest of the lint family (link-check.js, heading-lint.js, + * frontmatter-lint.js): it tracks fenced blocks the same way they do, then reports + * the ones that are broken. + * + * Checks (each rule can be toggled off): + * + * 1. unclosed-fence A code fence that opens but never closes (an odd number of + * matching fences). On GitHub this swallows everything after it + * into one code block. ALWAYS an error. + * 2. missing-language An opening ``` / ~~~ with no language identifier (```\n vs + * ```js\n). No syntax highlighting, and many linters/renderers + * prefer an explicit info string. WARNING by default β€” promote + * to an error with --strict-language, silence with + * --no-require-language. + * 3. mismatched-fence A closing fence SHORTER than the one that opened the block, + * where detectable. A block opened with ```` (4 backticks) is + * only closed by 4+ backticks; a bare ``` inside it is content, + * not a close. We flag the case where the block never closes + * because the only candidate closers were too short. (Reported + * as part of unclosed-fence with an explanatory message.) + * + * How fences are matched (CommonMark-aligned, pragmatically): + * - A fence is a line whose first non-space run is 3+ backticks OR 3+ tildes. + * - The opening fence's character (` or ~) and length set the block. A closing + * fence must use the SAME character and be AT LEAST as long, and carry no info + * string. This is what lets a ```` block legitimately contain ``` lines, and a + * ``` block legitimately contain ~~~ lines β€” they don't close each other. + * - Up to 3 leading spaces of indentation are allowed (CommonMark). This is what + * makes fences inside list items work; deeply-indented ``` inside an indented + * code context is a known limitation (see below). + * - Inline code (single/double backticks within a line, like `x`) is never a + * fence β€” only a line that STARTS (after ≀3 spaces) with the fence run counts. + * + * Known limitations (honest, not over-claimed): + * - No full CommonMark block parser. A ``` that is itself inside an *indented* + * (4-space) code block, or inside a blockquote with unusual nesting, may be + * read as a real fence. The common cases β€” top-level fences and fences inside + * list items β€” are handled correctly. + * - "Missing language" only inspects the OPENING fence's info string; it can't + * know whether a blank info string was intentional (e.g. plain text output). + * That's exactly why it's a warning, not an error, by default. + * + * Zero dependencies. Network-free. Works on any Node >= 14. + * + * Usage: + * node code-fence-lint.js README.md # lint one file + * node code-fence-lint.js README.md docs/*.md # lint several + * node code-fence-lint.js README.md --json # machine-readable report + * node code-fence-lint.js README.md --strict-language # missing-language is an error + * node code-fence-lint.js README.md --no-require-language # don't report missing-language + * node code-fence-lint.js --help + * + * Exit codes (CI / pre-commit friendly): + * 0 no problems (warnings alone do not fail the build) + * 1 one or more errors found (unclosed/mismatched fences, or missing-language + * under --strict-language) + * 2 usage error (no files, missing file, bad flag) + */ + +'use strict'; + +const fs = require('fs'); + +function printHelp() { + console.log(`code-fence-lint.js β€” lint fenced code blocks in Markdown + +Usage: + node code-fence-lint.js [more.md ...] [options] + +Options: + --json Emit a JSON report instead of human-readable text. + --quiet Only print files with problems (nothing on a clean file). + --strict-language Treat a missing language tag as an ERROR (fails the build), + not just a warning. + --no-require-language Don't report fences missing a language tag at all. + --help Show this help. + +Checks: unclosed-fence (error), missing-language (warning by default), + mismatched-fence (error, reported via unclosed-fence). + +Exit codes: + 0 clean (warnings alone do not fail) + 1 error(s) found + 2 usage error + +Examples: + node code-fence-lint.js README.md + node code-fence-lint.js README.md docs/*.md --json + node code-fence-lint.js README.md --strict-language`); +} + +/** + * Match a fence line. Returns null if the line is not a fence, otherwise + * { char, len, info } where: + * char '`' or '~' + * len number of fence characters in the opening run + * info the info string after the run, trimmed (language tag etc.) + * + * CommonMark: up to 3 leading spaces; the fence is a run of >= 3 identical + * backticks or tildes. A backtick fence's info string may not itself contain a + * backtick (that would be inline code, not a fence). + */ +function matchFence(line) { + const m = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/); + if (!m) return null; + const run = m[2]; + const char = run[0]; + const info = m[3].trim(); + // A backtick info string containing a backtick isn't a valid opening fence. + if (char === '`' && info.indexOf('`') !== -1) return null; + return { char, len: run.length, info }; +} + +/** + * Walk a Markdown document and report fenced-code-block problems. + * + * Returns an array of problem objects: { rule, severity, line, message }. + * rule 'unclosed-fence' | 'missing-language' + * severity 'error' | 'warning' + * + * `opts.requireLanguage` (default true) toggles missing-language reporting. + * `opts.strictLanguage` (default false) makes missing-language an error. + */ +function lintFences(markdown, opts) { + const o = Object.assign({ requireLanguage: true, strictLanguage: false }, opts); + const lines = markdown.split('\n'); + const problems = []; + + // Active fence state: null when outside a code block, otherwise the open fence's + // { char, len, info, line }. + let open = null; + + for (let i = 0; i < lines.length; i++) { + const fence = matchFence(lines[i]); + if (!fence) continue; + + if (open === null) { + // This fence OPENS a block. + open = { char: fence.char, len: fence.len, info: fence.info, line: i + 1 }; + + if (o.requireLanguage && fence.info === '') { + const severity = o.strictLanguage ? 'error' : 'warning'; + problems.push({ + rule: 'missing-language', + severity, + line: i + 1, + message: `Code fence opened with no language tag (${fence.char.repeat(fence.len)} with no info string). Add a language (e.g. \`${fence.char.repeat(fence.len)}js\`) for syntax highlighting.`, + }); + } + continue; + } + + // We're inside a block. A line is a CLOSING fence only if it uses the SAME + // character, is AT LEAST as long as the opener, and carries no info string. + if (fence.char === open.char && fence.len >= open.len && fence.info === '') { + open = null; // block closed cleanly + continue; + } + // Otherwise this fence line is CONTENT of the open block (a shorter same-char + // fence, a different-char fence, or a closer with a stray info string). Ignore. + } + + // End of file with a block still open: unclosed. + if (open !== null) { + const opener = `${open.char.repeat(open.len)}${open.info ? ' ' + open.info : ''}`; + let message = `Code fence opened at line ${open.line} (${opener.trim()}) is never closed before end of file. On GitHub this turns the rest of the document into one code block.`; + if (open.len > 3) { + message += ` Note: this block opened with ${open.len} ${open.char === '`' ? 'backticks' : 'tildes'}, so only a fence of ${open.len}+ ${open.char === '`' ? 'backticks' : 'tildes'} closes it β€” a shorter ${open.char.repeat(3)} inside it counts as content, not a close.`; + } + problems.push({ + rule: 'unclosed-fence', + severity: 'error', + line: open.line, + message, + }); + } + + return problems; +} + +function parseArgs(argv) { + const opts = { + json: false, + quiet: false, + requireLanguage: true, + strictLanguage: false, + help: false, + }; + const files = []; + for (const arg of argv) { + switch (arg) { + case '--json': opts.json = true; break; + case '--quiet': opts.quiet = true; break; + case '--strict-language': opts.strictLanguage = true; opts.requireLanguage = true; break; + case '--no-require-language': opts.requireLanguage = false; break; + case '--help': case '-h': opts.help = true; break; + default: + if (arg.startsWith('-')) { opts.badFlag = arg; return { opts, files }; } + files.push(arg); + } + } + return { opts, files }; +} + +function main() { + const { opts, files } = parseArgs(process.argv.slice(2)); + + if (opts.help) { printHelp(); process.exit(0); } + if (opts.badFlag) { console.error(`Unknown flag: ${opts.badFlag}\n`); printHelp(); process.exit(2); } + if (files.length === 0) { console.error('Error: no Markdown files given.\n'); printHelp(); process.exit(2); } + + const report = []; + let totalErrors = 0; + let totalWarnings = 0; + let usageError = false; + + for (const file of files) { + let markdown; + try { + markdown = fs.readFileSync(file, 'utf8'); + } catch (e) { + console.error(`Error: cannot read ${file} (${e.code || e.message})`); + usageError = true; + continue; + } + const problems = lintFences(markdown, opts); + const errors = problems.filter((p) => p.severity === 'error').length; + const warnings = problems.filter((p) => p.severity === 'warning').length; + totalErrors += errors; + totalWarnings += warnings; + report.push({ file, problems, errors, warnings }); + } + + if (opts.json) { + console.log(JSON.stringify({ + ok: totalErrors === 0 && !usageError, + usageError, + errors: totalErrors, + warnings: totalWarnings, + files: report, + }, null, 2)); + } else { + for (const { file, problems, errors, warnings } of report) { + if (problems.length === 0) { + if (!opts.quiet) console.log(`βœ“ ${file} β€” fenced code blocks OK`); + continue; + } + const counts = []; + if (errors) counts.push(`${errors} error(s)`); + if (warnings) counts.push(`${warnings} warning(s)`); + console.log(`βœ— ${file} β€” ${counts.join(', ')}:`); + for (const p of problems) { + const tag = p.severity === 'error' ? 'error' : 'warn'; + console.log(` [${tag}: ${p.rule}] line ${p.line}: ${p.message}`); + } + } + if (!opts.quiet && totalErrors === 0 && totalWarnings === 0) { + console.log('\nAll fenced code blocks check out.'); + } else if (totalErrors === 0 && totalWarnings > 0) { + console.log(`\n${totalWarnings} warning(s), no errors.`); + } + } + + if (usageError) process.exit(2); + process.exit(totalErrors === 0 ? 0 : 1); +} + +if (require.main === module) main(); + +module.exports = { matchFence, lintFences };