Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions .planning/agent-context/cpython-source-sha-pin.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,22 @@
## 5. Decision log

- Resolved SHAs (tag → 40-hex commit), one line each:
- 3.10 / v3.10.20 →
- 3.11 / v3.11.15 →
- 3.12 / v3.12.13 →
- 3.13 / v3.13.13 →
- 3.14 / v3.14.4 →
- Where/how the verification aborts on mismatch:
- 3.10 / v3.10.20 → 842e987df856a5d4db37933c62a3456930a19092
- 3.11 / v3.11.15 → 2340a037f7450e70fccfe411e6531afb4d57a312
- 3.12 / v3.12.13 → 3bb231a6a5dc02b95658877318bf61501a7209e9
- 3.13 / v3.13.13 → 01104ce1beb3135c2e0c01ec835b994c1f55a1c0
- 3.14 / v3.14.4 → 23116f998f6789d8c2fbe5ed5b8146854c8c2a4f
- Where/how the verification aborts on mismatch: after the shallow
tag-based clone in `build-index`, `git -C <clone_dir> rev-parse HEAD` is
compared to the authoritative config SHA. A mismatch logs the version, tag,
actual SHA, and expected SHA, then raises `SystemExit(1)` before Sphinx setup
or content ingestion can proceed.
- **Draft SECURITY.md threat-model paragraph (for Vision to apply):**
>
> The largest build-time supply-chain input is the `build-index` clone of the
> upstream CPython repository, which provides the source tree used to generate
> canonical documentation content. Each supported CPython docs release is pinned
> to the exact commit SHA that its human-readable release tag currently resolves
> to; the tag is retained for operator readability, but the SHA is the
> authoritative integrity anchor. If a tag is reissued, moved, or otherwise
> resolves to different source content, the build fails before Sphinx setup or
> content ingestion rather than silently publishing changed documentation.
36 changes: 35 additions & 1 deletion src/mcp_server_python_docs/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def _consume_saved_stdout_fd() -> int:
logger = logging.getLogger("mcp_server_python_docs")

# === Now safe to import everything else ===
import subprocess # noqa: E402

import click # noqa: E402

from mcp_server_python_docs.ingestion.cpython_versions import ( # noqa: E402
Expand Down Expand Up @@ -110,6 +112,33 @@ def serve() -> None:
pass # Client disconnected (HYGN-03)


def _verify_cpython_source_sha(
clone_dir: str,
*,
version: str,
tag: str,
expected_sha: str,
) -> None:
"""Abort the docs build if a CPython tag resolves to unexpected content."""
rev_parse = subprocess.run(
["git", "-C", clone_dir, "rev-parse", "HEAD"],
check=True,
capture_output=True,
text=True,
)
actual_sha = rev_parse.stdout.strip()
if actual_sha != expected_sha:
logger.error(
"CPython %s source integrity check failed: tag %s "
"resolved to %s, expected %s. Aborting build.",
version,
tag,
actual_sha,
expected_sha,
)
raise SystemExit(1)


@main.command("build-index")
@click.option(
"--versions",
Expand All @@ -124,7 +153,6 @@ def serve() -> None:
def build_index(versions: str, skip_content: bool) -> None:
"""Build the documentation index from objects.inv and Sphinx JSON."""
import shutil
import subprocess
import tempfile
import venv
from pathlib import Path
Expand Down Expand Up @@ -224,6 +252,12 @@ def build_index(versions: str, skip_content: bool) -> None:
capture_output=True,
text=True,
)
_verify_cpython_source_sha(
clone_dir,
version=version,
tag=config["tag"],
expected_sha=config["sha"],
)

# Create dedicated Sphinx venv (INGR-C-02)
venv_dir = os.path.join(clone_dir, "_sphinx_venv")
Expand Down
35 changes: 28 additions & 7 deletions src/mcp_server_python_docs/ingestion/cpython_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class CPythonDocsBuildConfig(TypedDict):
"""Build settings for one CPython documentation release."""

tag: str
sha: str
sphinx_pin: str


Expand All @@ -21,12 +22,32 @@ class CPythonDocsBuildConfig(TypedDict):

SUPPORTED_DOC_VERSIONS_CSV: Final[str] = ",".join(SUPPORTED_DOC_VERSIONS)

# CPython git tags are pinned so content builds are reproducible and do not
# drift when a maintenance branch receives new commits.
# CPython git SHAs are authoritative for content build integrity. Tags are kept
# for human-readable version mapping, but a moved tag must fail verification.
CPYTHON_DOCS_BUILD_CONFIG: Final[dict[str, CPythonDocsBuildConfig]] = {
"3.10": {"tag": "v3.10.20", "sphinx_pin": "sphinx==3.4.3"},
"3.11": {"tag": "v3.11.15", "sphinx_pin": "sphinx~=7.2.0"},
"3.12": {"tag": "v3.12.13", "sphinx_pin": "sphinx~=8.2.0"},
"3.13": {"tag": "v3.13.13", "sphinx_pin": "sphinx<9.0.0"},
"3.14": {"tag": "v3.14.4", "sphinx_pin": "sphinx<9.0.0"},
"3.10": {
"tag": "v3.10.20",
"sha": "842e987df856a5d4db37933c62a3456930a19092",
"sphinx_pin": "sphinx==3.4.3",
},
"3.11": {
"tag": "v3.11.15",
"sha": "2340a037f7450e70fccfe411e6531afb4d57a312",
"sphinx_pin": "sphinx~=7.2.0",
},
"3.12": {
"tag": "v3.12.13",
"sha": "3bb231a6a5dc02b95658877318bf61501a7209e9",
"sphinx_pin": "sphinx~=8.2.0",
},
"3.13": {
"tag": "v3.13.13",
"sha": "01104ce1beb3135c2e0c01ec835b994c1f55a1c0",
"sphinx_pin": "sphinx<9.0.0",
},
"3.14": {
"tag": "v3.14.4",
"sha": "23116f998f6789d8c2fbe5ed5b8146854c8c2a4f",
"sphinx_pin": "sphinx<9.0.0",
},
}
39 changes: 39 additions & 0 deletions tests/test_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@

import io
import os
import re
import runpy
import shutil
import subprocess
import sys
import types

Expand Down Expand Up @@ -51,8 +53,45 @@ def test_supported_versions_have_pinned_docs_build_config(self):
for version in SUPPORTED_DOC_VERSIONS:
config = CPYTHON_DOCS_BUILD_CONFIG[version]
assert config["tag"].startswith(f"v{version}.")
assert re.fullmatch(r"[0-9a-f]{40}", config["sha"])
assert config["sphinx_pin"].startswith("sphinx")

def test_cpython_source_sha_verification_aborts_on_mismatch(
self,
monkeypatch,
caplog,
):
from mcp_server_python_docs import __main__ as cli_main

calls: list[list[str]] = []

def fake_run(
cmd: list[str],
*,
check: bool,
capture_output: bool,
text: bool,
) -> subprocess.CompletedProcess[str]:
calls.append(cmd)
assert check is True
assert capture_output is True
assert text is True
return subprocess.CompletedProcess(cmd, 0, stdout="b" * 40 + "\n")

monkeypatch.setattr(cli_main.subprocess, "run", fake_run)

with pytest.raises(SystemExit) as exc_info:
cli_main._verify_cpython_source_sha(
"/tmp/cpython-3.14",
version="3.14",
tag="v3.14.4",
expected_sha="a" * 40,
)

assert exc_info.value.code == 1
assert calls == [["git", "-C", "/tmp/cpython-3.14", "rev-parse", "HEAD"]]
assert "source integrity check failed" in caplog.text


class TestJsonBuildRequirements:
def test_omits_html_only_sphinx_extensions(self, tmp_path):
Expand Down