diff --git a/.github/workflows/adapter_tests.yml b/.github/workflows/adapter_tests.yml new file mode 100644 index 0000000..c2872b2 --- /dev/null +++ b/.github/workflows/adapter_tests.yml @@ -0,0 +1,108 @@ +name: adapter-tests + +# Runs the adapter + conformance test suites on every push/PR that +# touches adapter or spec source. Pinned dependencies + Python 3.12 so +# NAT-dependent tests run (don't skip). Skipped tests in the NAT job +# fail the build — addresses the "skips read as passes" failure mode +# that motivated this workflow (PR #22 review). + +on: + push: + paths: + - 'adapters/**' + - 'specification/**' + - '.github/workflows/adapter_tests.yml' + pull_request: + paths: + - 'adapters/**' + - 'specification/**' + - '.github/workflows/adapter_tests.yml' + +jobs: + conformance: + # Single-file 48-test suite at adapters/test_acs_core_conformance.py + # asserts every ACS-Core MUST against the canonical schemas. This + # runs without any framework deps — only the schema validators. + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install shared test deps + run: pip install -r adapters/requirements-test.txt + - name: Run ACS-Core conformance suite + env: + ACS_SPEC_DIR: ${{ github.workspace }}/specification/v0.1.0 + run: | + cd adapters + python -m unittest test_acs_core_conformance -v 2>&1 | tee out.log + # Conformance is the spec floor; ZERO skips allowed. + if grep -E "skipped=[1-9]" out.log; then + echo "::error::Conformance tests skipped — every spec MUST must execute" + exit 1 + fi + + unit: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - {adapter: _common, install_nat: false} + - {adapter: claude-code, install_nat: false} + - {adapter: cursor, install_nat: false} + - {adapter: nat, install_nat: true } + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + # NAT 1.7.0 supports 3.10–3.12; 3.13 has no wheel. + python-version: '3.12' + + - name: Install shared test deps + run: pip install -r adapters/requirements-test.txt + + - name: Install NAT runtime (pinned) + if: matrix.install_nat + # Pinned `nvidia-nat-core==1.7.0` per PR #22 review — an + # unpinned install would let a future NAT release shift the + # `InvocationContext` shape under us without test signal. + # `nvidia-nat-langchain` is the bridge `test_live.py` needs + # for end-to-end NAT-runtime tests; pinned to the matching + # NAT version. + run: | + pip install -r adapters/nat/requirements.txt + pip install nvidia-nat-langchain==1.7.0 + + - name: Run ${{ matrix.adapter }} tests + env: + ACS_SPEC_DIR: ${{ github.workspace }}/specification/v0.1.0 + run: | + cd adapters/${{ matrix.adapter }} + python -m unittest discover -v tests 2>&1 | tee out.log + + - name: Fail on unexpected skips in NAT job + # NAT IS installed in this job, so any test that skips on + # "NAT not installed" is a test-gating bug. Hard fail. + if: matrix.install_nat + run: | + cd adapters/${{ matrix.adapter }} + if grep -E "skipped=[1-9]" out.log; then + echo "::error::NAT job had skipped tests — NAT is installed, gating is broken" + cat out.log + exit 1 + fi + + - name: Surface skips in other adapters + # Claude Code's `test_live.py` gates on the `claude` CLI being + # on PATH; we don't install the CLI in CI. Cursor's `test_live.py` + # is a manual-procedure placeholder. Both legitimate. Warn so + # operators can see them; don't fail the build. + if: ${{ !matrix.install_nat }} + run: | + cd adapters/${{ matrix.adapter }} + if grep -E "skipped=[1-9]" out.log; then + echo "::warning::Tests skipped in ${{ matrix.adapter }} — review out.log" + grep -E "skipped|SKIPPED" out.log || true + fi diff --git a/.gitignore b/.gitignore index 624a9e0..85fd5c7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,17 @@ book/ site/ references/ -.DS_Store \ No newline at end of file +.DS_Store + +# Adapter manual-test scratch dirs (project-level hook configs with +# machine-specific paths from local probing — must never land in commits) +adapters/*/.cursor/ +adapters/*/.claude/ +adapters/*/.acs-handshake-cache/ + +# Python venvs (NAT manual test environment lives here; the path is +# tied to the machine that built it and must not ship) +adapters/*/.venv/ +adapters/*/.nat-venv/ +**/__pycache__/ +*.pyc \ No newline at end of file diff --git a/adapters/.gitignore b/adapters/.gitignore new file mode 100644 index 0000000..3bbe7b6 --- /dev/null +++ b/adapters/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +*.pyo diff --git a/adapters/README.md b/adapters/README.md new file mode 100644 index 0000000..a4d597c --- /dev/null +++ b/adapters/README.md @@ -0,0 +1,226 @@ +# ACS Adapters + +Reference implementations that wire popular agent frameworks to an ACS Guardian. The goal: a framework adopts ACS through **configuration only**, with no agent code changes. + +## What is a Guardian? + +The **Guardian** is the policy enforcement point: a long-running HTTP service that receives every ACS envelope from the adapter, evaluates it against the deployment's policy, and returns one of five dispositions (allow / deny / modify / ask / defer). It's the "decider"; the adapter is the "messenger." + +Two roles in any deployment: + +- **Production Guardian** — your real policy engine. Typically OPA/Rego, Cedar, or a vendor SDK plugged in behind an HTTP server that speaks the ACS wire protocol. The adapter doesn't care what's inside; only the wire contract matters. +- **`example-guardian/example_guardian.py`** in this repo — a teaching artifact and test substrate. Implements the full wire protocol (handshake, envelope schema validation, HMAC signing, rolling chain, replay protection, skew rejection, dispositions, `system/ping`) but with a deliberately tiny deterministic policy: denies a short list of destructive Bash patterns + writes to system paths, allows everything else. Useful for local testing; **not for production**. + +Production deployments swap the policy code in the example Guardian's `evaluate(method, params)` function with their real engine and keep the wire-protocol scaffolding. + +Running the Guardian — terminal window, `launchd`, `systemd`, container — is the operator's responsibility. The adapter expects it to be reachable at `$ACS_GUARDIAN_URL`; if it isn't, the §6.4 fail posture applies and an `ACS_AUDIT` event is emitted. + +## ACS-Core conformance check + +One command verifies this stack against the ACS-Core baseline **minus full Wrapped MCP**, which is deferred: + +```bash +cd adapters +python -m unittest test_acs_core_conformance +``` + +`test_acs_core_conformance.py` enumerates every ACS-Core MUST from `docs/spec/conformance.md` — handshake, envelope shape, the 6 minimum hooks, all 5 dispositions, rolling chain, replay + skew rejection, HMAC-SHA256 baseline, decision honoring + fail-open audit + audit-cause differentiation, system/ping, and the `protocols/MCP/*` namespace shape. Each test docstring quotes the spec line it falsifies. The suite loads the canonical schemas from `Agent-Control-Standard/ACS` (set `ACS_SPEC_DIR` to point at `specification/v0.1.0/`); schemas missing is a hard FAIL, not a skip — spec validation is non-negotiable. Format checking (`uuid`, `date-time`) is enforced via `rfc3339-validator`. + +**Wrapped MCP caveat.** The conformance suite verifies the wire-format shape of `protocols/MCP/*` (envelope validates, Guardian returns a structured response, no crash), but the reference Guardian does **not** implement full MCP request wrapping — incoming MCP requests are routed through the standard `steps/toolCallRequest` path with the tool name reflecting the MCP method, not as the wrapped `protocols/MCP/*` form. Deployments that need full MCP wrapping must extend the Guardian. This is a documented v0.2 deferral; a green conformance run means "ACS-Core baseline **minus** full Wrapped MCP", not "the whole baseline." See `test_acs_core_conformance.py::Core10_WrappedMcp`. + +## How adapters work + +The adapters are **translators**. Each one speaks its framework's hook protocol on one side and ACS JSON-RPC on the other. The framework's agent code is untouched. The Guardian's policy code is untouched. The adapter is the bilingual layer between them. + +### The general pattern (same for all three adapters) + +For each event the framework fires: + +``` + framework adapter Guardian + │ │ │ + │ hook event (framework │ │ + │ native JSON / call) │ │ + │ ───────────────────────► │ │ + │ │ ACS JSON-RPC request │ + │ │ ──────────────────────► │ + │ │ │ evaluate + │ │ │ policy + │ │ ACS decision │ + │ │ ◄────────────────────── │ + │ decision (framework │ │ + │ native response shape) │ │ + │ ◄─────────────────────── │ │ + │ │ │ + ▼ ▼ ▼ + applies the appends + decision audit chain +``` + +Six steps: + +1. Framework fires its hook with a payload in its own format. +2. Adapter receives that payload, translates to an ACS JSON-RPC request. +3. Adapter POSTs to the Guardian endpoint. +4. Guardian evaluates against policy, returns an ACS decision (`allow` / `deny` / `modify` / `ask` / `defer`). +5. Adapter translates that decision back to whatever the framework expects to receive. +6. Framework applies the decision (run / block / modify the action). + +### Concrete walkthrough: Claude Code, ALLOW path + +You ask Claude Code to `echo hello`. + +For brevity, this walkthrough shows the envelope SHAPES and omits the +HMAC-SHA256 `signature` block on each envelope and the once-per-session +`handshake/hello` round-trip that precedes the first content-bearing +event. Both are present in real envelopes — run `python3 adapters/claude-code/e2e_check.py` +to see verbatim envelopes including signatures. + +**Step 1.** Claude Code is about to call its Bash tool. Before it runs, Claude Code's hook system fires `PreToolUse`. Your `settings.json` configures `PreToolUse` to run `python3 acs_adapter.py`. Claude Code spawns that process and pipes the event to stdin: + +```json +{ + "session_id": "abc-123", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_input": {"command": "echo hello"}, + "tool_use_id": "...", + "cwd": "/tmp/...", + "permission_mode": "default" +} +``` + +**Step 2.** The adapter reads that JSON, builds an ACS JSON-RPC request conforming to v0.1.0 `request-envelope.json` and `hooks/tool-call-request.json`: + +```json +{ + "jsonrpc": "2.0", + "id": "", + "method": "steps/toolCallRequest", + "params": { + "acs_version": "0.1.0", + "request_id": "", + "timestamp": "2026-06-17T12:34:56.789Z", + "metadata": { + "agent_id": "claude-code:a1b2c3d4", + "session_id": "abc-123", + "cwd": "/tmp/...", + "platform": "claude-code" + }, + "payload": { + "tool": {"name": "Bash"}, + "arguments": {"command": {"value": "echo hello"}} + } + } +} +``` + +Notice the shape: `acs_version` / `request_id` / `timestamp` / `metadata` live inside `params`, not at the envelope root (the envelope schema's `additionalProperties: false` rejects unknown top-level keys). Each tool argument is wrapped as `{value: ...}` so ACS-Provenance can attach provenance per-argument without changing the schema. + +**Step 3.** The adapter POSTs to the Guardian endpoint (`http://127.0.0.1:8787/acs`). + +**Step 4.** The Guardian evaluates. Our example Guardian's deterministic policy: `echo hello` doesn't match the destructive-Bash regex. Returns a response conforming to `response-envelope.json`: + +```json +{ + "jsonrpc": "2.0", + "id": "", + "result": { + "type": "final", + "acs_version": "0.1.0", + "request_id": "", + "decision": "allow", + "chain_hash": "..." + } +} +``` + +**Step 5.** The adapter translates back to Claude Code's expected shape: + +```json +{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "allow"}} +``` + +**Step 6.** Claude Code reads stdout, sees `permissionDecision: "allow"`, executes the Bash tool. You see `hello` printed. + +The whole round-trip is ~10 ms. The agent doesn't know any of this happened. + +### DENY path differs only in steps 4–6 + +Same as above, but with `command: "rm -rf /home/u"`: + +- **Step 4:** Guardian returns `{"decision": "deny", "reasoning": "destructive Bash pattern in: rm -rf /home/u"}` +- **Step 5:** Adapter emits `{"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": "deny", "permissionDecisionReason": "destructive Bash pattern..."}}` +- **Step 6:** Claude Code reads `permissionDecision: "deny"`, does not execute the Bash tool, and surfaces the reason: *"The command was blocked — a policy denied the Bash tool call, so it never ran."* + +### What changes across the three adapters + +The general pattern is identical. The framework-specific translation differs: + +| | Claude Code | Cursor | NAT | +|---|---|---|---| +| **Where the adapter lives** | Separate shell process spawned per hook | Separate shell process spawned per hook | In-process Python class, same memory space as the agent | +| **How the framework sends the event** | JSON on stdin; event type is a field inside the JSON (`hook_event_name`) | JSON on stdin; event type passed as a CLI argument (one command per event in `hooks.json`) | Python method call: `pre_invoke(context)` with `context.function_context.name` | +| **Native event field names** | `tool_name`, `tool_input`, `tool_response` | `tool_name`, `tool_input`, `tool_output`, `command` (for shell) | `context.function_context.name`, `context.modified_kwargs` | +| **Native allow/deny output** | `{"hookSpecificOutput": {"permissionDecision": "allow"|"deny"}}` on stdout | `{"permission": "allow"|"deny"}` on stdout, or `exit 2` to block | Set `context.action = InvocationAction.SKIP` to block, or raise `ACSGuardianDenied` | +| **Native modify mechanism** | `hookSpecificOutput.updatedInput` | `updated_input` | Mutate `context.modified_kwargs` (input) or `context.output` (output) | +| **Process model** | OS spawns a Python process for every hook event | OS spawns a Python process for every hook event | Zero IPC; everything in the same Python interpreter | + +The Guardian-side wire format is **the same** for all three. The adapter is bilingual: it knows the framework's protocol on one side and ACS on the other. + +### Decision honoring is a framework property + +Every adapter relies on its framework providing the §6.4 guarantee: the framework MUST wait for the verdict and apply it before the action executes. If a framework fired the hook fire-and-forget and continued the action without waiting, the adapter would still send to the Guardian and the audit chain would still record the decision — but the framework wouldn't apply it. That would be non-conformant. None of the three frameworks here does that; how each one delivers the guarantee is in the per-adapter README. + +### The key insight + +ACS standardizes the wire format and the decision contract. Adapters live where the boundary is: between the framework and the Guardian. Each adapter: + +1. Knows the framework's hook protocol (the framework's JSON shape, response field names, exit codes). +2. Knows ACS (always the same). +3. Translates between them. + +The framework's agent code is untouched. The Guardian's policy code is untouched. The adapter is the bilingual translator that makes them speak. **One Guardian, one ACS contract, three adapters that translate three different protocols into that contract.** Add a new framework, write a new adapter, the Guardian doesn't change. + +--- + +## Directory layout (identical across all three adapters) + +Each adapter follows the same structure. Files differ only where the framework's native naming requires it (config example file extension, etc.): + +``` +adapters// +├── README.md # overview + quick start + conformance status +├── acs_adapter.py # the adapter (same filename across all three) +├── mapping.md # framework event → ACS step method table +├── .example # drop-in framework-native config: +│ # claude-code/settings.json.example +│ # cursor/hooks.json.example +│ # nat/workflow.yml.example +└── tests/ + ├── __init__.py + ├── test_adapter.py # unit / integration tests against real types + ├── test_live.py # automated live test (Cursor: skipped placeholder pointing at live_verification.md) + ├── example_payloads.md # masked real-world payload examples + └── live_verification.md # (Cursor only) manual reproduction procedure +``` + +Plus the shared: + +``` +adapters/example-guardian/ +├── README.md +└── example_guardian.py # used by all three adapters' tests +``` + +--- + +## Contributing a new adapter + +1. Create `adapters//`. +2. Write `mapping.md` documenting how the framework's hook events map to ACS `steps/*` methods, and how the framework's response shape relates to ACS dispositions. +3. (Optional but encouraged) Write the adapter itself, plus tests. The Claude Code adapter is the template. +4. Add a row to the status table above. +5. Open a PR against `Agent-Control-Standard/ACS`. + +The bar for "reference implementation" status is: round-trip tests pass against the example Guardian, documented configuration for users, and an explicit conformance posture statement matching the format in the Claude Code adapter's README. diff --git a/adapters/SECURITY.md b/adapters/SECURITY.md new file mode 100644 index 0000000..2c96b35 --- /dev/null +++ b/adapters/SECURITY.md @@ -0,0 +1,230 @@ +# Adapter security posture + +This document is the threat model for the three reference adapters +(`claude-code/`, `cursor/`, `nat/`), the `example-guardian/`, and the +shared helpers in `_common/`. It says what attacks the code defends +against, how, and what is explicitly out of scope. + +ACS is a security project — the whole point of the standard is to +police agent behavior. Adapter code that itself has security holes +undermines the standard. This document and the tests under +`_common/tests/test_security.py` are the falsifiers for every claim +below. + +## In-scope components + +| Component | Trust posture | +|---|---| +| Adapter process (`claude-code/`, `cursor/`) | Shell-spawned by the framework per hook event. Runs as the framework's user. Reads framework JSON from stdin. | +| Adapter middleware (`nat/`) | In-process inside the agent's Python runtime. Same address space as the agent. | +| Guardian process (`example-guardian/`) | HTTP server. Holds the HMAC secret. Computes the rolling audit chain. | +| Wire transport | JSON-RPC over HTTP (deployments wrap with HTTPS / mTLS for production). | +| HMAC shared secret | Loaded from `ACS_HMAC_SECRET_FILE` (preferred) or `ACS_HMAC_SECRET` (env-var fallback). | +| Cache files | `~/.cache/acs-adapter-handshake/` (ServerHello cache), `~/.cache/acs-adapter-session/` (per-session step-id tracking). | +| Audit log | `ACS_AUDIT ` lines on adapter stderr. | + +## Threat model + +### Defended attacks (with the mitigation that defeats each) + +**T1 — Envelope tampering on the wire.** +An attacker intercepts the adapter ↔ Guardian connection and modifies +the payload, method, or session_id. +*Mitigation:* HMAC-SHA256 over JCS-canonicalized envelope with the +signature field removed (Specification §10). Both sides verify with +`hmac.compare_digest` (constant-time). Signed input includes +`method`, `metadata.session_id`, `request_id`, and `timestamp` so the +signature is bound to the whole envelope. +*Test:* `test_acs_core_conformance.py::Core07_BaselineIntegrity::test_tampered_request_signature_invalid` + +**T2 — Cross-session signature lift.** +An attacker captures a signed envelope from session A and replays it +under a different session_id. +*Mitigation:* HMAC key is HKDF-SHA256-derived per-session from +`(input_keying_material, session_id)`. A signature valid under +session A's derived key fails verification under session B's key. +*Test:* covered indirectly by `HmacSigning::test_signed_request_accepted` +(uses session-specific key derivation). + +**T3 — Replay within a session.** +Same envelope re-sent within the same session_id. +*Mitigation:* Guardian tracks `request_id` per session and rejects +duplicates with `REPLAY_DETECTED` (-32005, §10.3). Per-session, +in-memory; bounded by session lifetime. +*Test:* `ReplayRejection::test_duplicate_request_id_rejected` + +**T4 — Stale or future-dated envelope.** +Captured envelope replayed outside the freshness window. +*Mitigation:* Guardian rejects timestamps outside the negotiated skew +window (default ±5 min) with `TIMESTAMP_OUT_OF_WINDOW` (-32006, §10.3). +*Test:* `TimestampSkew::test_ancient_timestamp_rejected`, +`test_future_timestamp_rejected` + +**T5 — SSRF via `ACS_GUARDIAN_URL`.** +Attacker controls the env var; sets it to `file:///etc/passwd` to +read arbitrary files, or to `data://` to feed a crafted response. +*Mitigation:* `validate_guardian_url()` rejects any scheme that is +not `http` or `https`. Called from every adapter's `call_guardian` +and from `do_handshake` / `ping` in `_common`. +*Test:* `GuardianUrlSchemeAllowlist::test_file_scheme_rejected` and +five other scheme rejections. + +**T6 — Guardian HTTP DoS via oversized body.** +Attacker POSTs a request with Content-Length > available RAM, +expecting the Guardian to allocate. +*Mitigation:* Guardian refuses Content-Length > `MAX_REQUEST_BODY_BYTES` +(1 MiB, matches the handshake's `max_payload_size_bytes`) before +reading the body. Returns 413 + JSON-RPC error -32600. +*Test:* `GuardianBodySizeCap::test_oversized_request_rejected` + +**T7 — Leaked HMAC secret via world-readable file.** +Operator (or misconfiguration) leaves `ACS_HMAC_SECRET_FILE` mode +0644 / 0640; any local process can read the key. +*Mitigation:* `load_hmac_secret()` rejects the file unless its mode is +`& 0o077 == 0` (no group / other access) AND it is owned by the +running user AND it is not a symlink. `SecretFilePermissionsError` +prevents the adapter from silently using a leaked secret. +*Test:* `HmacSecretFilePermissions::{test_world_readable_rejected, +test_group_readable_rejected, test_symlink_rejected}` + +**T8 — Cache / session-state poisoning.** +Local attacker writes the handshake cache or session-state file +between adapter invocations. For session-state, this would let them +inject a fake `parent_step_id` into the next `subagentStart` payload. +*Mitigation:* Both cache directories created mode 0700; cache files +created mode 0600 via `os.open(...O_CREAT, 0o600)`. A local attacker +without same-uid privileges cannot read or write the state. +*Test:* `CacheDirPermissions::{test_session_state_dir_is_0700, +test_session_state_file_is_0600}` + +**T9 — Regex DoS via oversized command.** +Attacker submits a multi-MB command string crafted to trigger +catastrophic backtracking in the destructive-bash regex set. +*Mitigation:* `scan_destructive_bash_safely()` refuses to scan +commands longer than `DESTRUCTIVE_SCAN_MAX_LEN` (8 KiB). The skip is +audited and the caller MUST treat the return value `"input_too_large"` +as suspicious — NOT as "safe to allow." Real shell commands are tiny; +multi-KB strings are either tunneled data or a DoS attempt. +*Test:* `RegexInputSizeCap::test_oversized_command_is_short_circuited` + +**T10 — Audit-log injection.** +Field values in audit events (`session_id`, `method`, `error=str(e)`) +could contain newlines or control characters that, if naively written, +would let an attacker forge fake `ACS_AUDIT` lines. +*Mitigation:* `audit_event()` emits a single line of +`json.dumps(payload, sort_keys=True)`. JSON encoding escapes `\n`, +`\r`, and other control characters in string values. The +`ACS_AUDIT ` prefix is the only unescaped text. +*No dedicated test* — the property is enforced by the JSON encoder; +adding a test would be checking stdlib behavior. + +**T11 — Handshake downgrade.** +Attacker MITMs the handshake response to claim the Guardian doesn't +support signing, hoping the adapter then sends unsigned envelopes. +*Mitigation:* The adapter signs iff its own `ACS_HMAC_SECRET` / +`_FILE` is set, regardless of what `signature_algorithms_supported` +the handshake advertised. The handshake's advertised algorithm list +is informational; it does not control the adapter's signing behavior. +*No test* — this is an absence of behavior. Verified by reading the +code: `sign_envelope()` consults `load_hmac_secret()` only. + +**T12 — Guardian-side signature stripping.** +Attacker MITMs the Guardian's response and removes the `signature` +field, hoping the adapter's response verification silently accepts. +*Mitigation:* `verify_signature()` returns False if a signature is +absent AND the adapter has `ACS_HMAC_SECRET` configured. The adapter +rejects the response and fails per `ACS_DEFAULT_DENY` posture (audit +event in either case). +*No dedicated test* — covered indirectly by the conformance suite's +signed round-trip tests (Core07_BaselineIntegrity), which would fail +if signature absence were silently accepted. + +### Out of scope (deployment / operational concerns) + +**O1 — Plaintext HTTP exposes payload content.** +Even when HMAC-signed, the body is readable on the wire. An attacker +on the network path can read tool names, arguments, prompts, results. +*Posture:* Use HTTPS for any non-loopback deployment. Set +`ACS_GUARDIAN_URL=https://...` and put the Guardian behind TLS (real +cert, or mTLS for stronger client identity). The adapter's URL +allowlist accepts `https://`. We do not bundle a TLS implementation. + +**O2 — Regex bypass via shell quoting / expansion.** +Shell evaluates `r''m -rf /` as `rm -rf /` after quote removal. The +adapter only sees the literal `r''m -rf /` from the framework, which +does not match our destructive-bash regex. Same for `$(echo rm) -rf /`, +backslash escapes, locale-dependent commands, etc. +*Posture:* The destructive-bash regex is a teaching artifact and a +defense-in-depth heuristic, NOT a security boundary. Production +deployments wire a real policy engine (OPA/Rego, Cedar, or a vendor +policy bundle) into the Guardian. Documented in +`example-guardian/example_guardian.py` and `adapters/README.md`. + +**O3 — Secret in env-var visible to child processes / `ps eauxw`.** +`ACS_HMAC_SECRET` shows up in `/proc/PID/environ` and is inherited by +any child process the adapter or Guardian spawns. +*Posture:* That's exactly why `ACS_HMAC_SECRET_FILE` exists. The env +var is a development convenience; production deployments use the file +path with mode 0600. Documented in the Guardian README. + +**O4 — Compromised Guardian.** +A Guardian that holds the symmetric HMAC key can re-sign a rewritten +chain head and present a clean history. The HMAC baseline detects +network tampering and cross-Guardian disagreement but cannot prove +non-repudiation against the Guardian itself. +*Posture:* That's what the ACS-Crypto profile (asymmetric / PQC +signatures) and the ACS-Audit profile (`request_hash` per +ContextEntry) are for. v0.1 baseline acknowledges this tradeoff +(`conformance.md:30`); we don't implement those profiles. + +**O5 — Compromised secret-file storage.** +A reader of the storage volume (cloud snapshot, backup, etc.) gets +the HMAC key. File permissions don't help there. +*Posture:* Encrypted storage, sealed secrets (Vault, K8s sealed +secrets, KMS-backed envelopes). Out of scope for the adapter; the +adapter just receives the key bytes from a file path. + +**O6 — Memory disclosure / core dumps.** +HMAC key in process memory. No explicit zeroize. +*Posture:* Disable core dumps in production (`ulimit -c 0` / +`fs.suid_dumpable=0`). Python doesn't expose secure-erase primitives; +acceptable v0.1 tradeoff. + +**O7 — Compromised adapter binary.** +A malicious adapter binary (shipped via supply-chain attack on the +ACS repo, or a tampered local install) does whatever it wants. +*Posture:* Out of scope. Verify the adapter against the published +hash. The ACS-Inspect profile addresses the AgBOM side; binary +integrity of the adapter itself is a deployment-tools concern. + +**O8 — Compromised framework (Claude Code / Cursor / NAT).** +If Claude Code itself is compromised, all bets are off — it can +choose not to call hooks, ignore deny responses, etc. ACS-Core §6.4 +("decision honoring") is a property the framework must implement; an +adapter cannot enforce it on a hostile framework. +*Posture:* Framework integrity is a separate trust boundary outside +ACS's scope. + +## Mitigation matrix at a glance + +| Threat | Mitigation | Test | +|---|---|---| +| T1 envelope tampering | HMAC-SHA256 over JCS canonical input | `HmacSigning::test_tampered_request_rejected` | +| T2 cross-session lift | per-session HKDF key derivation | (covered by T1 verification path) | +| T3 in-session replay | per-session `request_id` set + -32005 | `ReplayRejection::test_duplicate_request_id_rejected` | +| T4 timestamp skew | skew-window check + -32006 | `TimestampSkew::test_ancient_timestamp_rejected` | +| T5 SSRF | URL scheme allowlist (http/https) | `GuardianUrlSchemeAllowlist::*` (6 tests) | +| T6 body-size DoS | Content-Length cap + 413 | `GuardianBodySizeCap::test_oversized_request_rejected` | +| T7 leaky secret file | mode/owner/symlink check on `ACS_HMAC_SECRET_FILE` | `HmacSecretFilePermissions::*` (4 tests) | +| T8 cache poisoning | dir 0700 + file 0600 | `CacheDirPermissions::*` (2 tests) | +| T9 regex DoS | 8 KiB input cap + audit | `RegexInputSizeCap::*` (3 tests) | +| T10 log injection | `json.dumps` escaping | enforced by stdlib | +| T11 handshake downgrade | adapter signs based on own config, not handshake | code-level | +| T12 response sig stripping | `verify_signature` False when sig absent + secret set | code-level | + +## How to report a finding + +Open an issue at `https://github.com/Agent-Control-Standard/ACS` with +the `security` label, or email the maintainers listed in `CODEOWNERS`. +Do not include exploit details in a public issue; request a private +channel first. diff --git a/adapters/_common/acs_common.py b/adapters/_common/acs_common.py new file mode 100644 index 0000000..61bc6ac --- /dev/null +++ b/adapters/_common/acs_common.py @@ -0,0 +1,657 @@ +""" +Shared ACS v0.1.0 helpers used by the three reference adapters and the +example Guardian. + +Lives in `adapters/_common/`. Each adapter prepends this directory to +`sys.path` and imports the symbols it needs. + +What's in here: + +- `jcs_canonicalize` — RFC 8785 (JCS) canonicalization. Uses the + `rfc8785` PyPI package when available (full compliance including + number edge cases); falls back to a sorted-keys + compact-separators + implementation otherwise, which is JCS-equivalent for all JSON + shapes ACS envelopes carry. +- `derive_session_key` — HKDF-SHA256 per-session key derivation per §10. +- `sign_envelope` / `verify_signature` — HMAC-SHA256 baseline signature + over JCS(envelope with signature field removed), per §10. +- `load_hmac_secret` — read the HMAC secret from `ACS_HMAC_SECRET_FILE` + (preferred: file mode 0600) or `ACS_HMAC_SECRET` (env). File path + beats env var so secrets don't sit in `ps aux` output. +- `iso8601_now` / `coerce_uuid` / `parse_iso8601` — time + ID helpers. +- `audit_event` — structured `ACS_AUDIT` line for §6.4 fail-open bypass. +- `ensure_session_handshake` / `ping` — protocol helpers (§4, §13). + `ensure_session_handshake` is idempotent per session via disk cache; + see its docstring. The old name `do_handshake` is kept as an alias. +- `session_state` — per-session JSON file used by adapters to track + last_step_id, seen_step_ids, etc. across separate hook-process + invocations (shell-stdin adapters spawn one process per hook). +""" +from __future__ import annotations + +import datetime +import hashlib +import hmac +import json +import os +import stat +import sys +import time +import urllib.error +import urllib.request +import uuid +from pathlib import Path +from typing import Any + +ACS_VERSION = "0.1.0" +DEFAULT_SKEW_WINDOW_MS = 300_000 + +# Maximum bytes the Guardian will read from a single HTTP POST body. +# Matches the handshake's max_payload_size_bytes default. Defends against +# memory exhaustion via a huge Content-Length. +MAX_REQUEST_BODY_BYTES = 1_048_576 # 1 MiB + +# Maximum command string length to scan with destructive-pattern regexes. +# Real shell commands are tiny; longer inputs are either non-shell data +# routed through the wrong tool or a regex-DoS attempt. +DESTRUCTIVE_SCAN_MAX_LEN = 8 * 1024 # 8 KiB + + +# ----- Canonicalization ----- + +try: + import rfc8785 as _rfc8785 # type: ignore[import-not-found] + _HAVE_RFC8785 = True +except ImportError: + _rfc8785 = None + _HAVE_RFC8785 = False + + +def jcs_canonicalize(obj: Any) -> bytes: + """RFC 8785 (JSON Canonicalization Scheme). + + Uses the `rfc8785` package when installed (full RFC 8785 compliance, + including float / -0 / subnormal handling and Unicode normalization). + Falls back to a sorted-keys + compact-separators implementation + when not, which is JCS-equivalent for all JSON shapes ACS envelopes + carry but does not handle every floating-point edge case. + + Install rfc8785 for full compliance: pip install rfc8785 + """ + if _HAVE_RFC8785: + return _rfc8785.dumps(obj) + return json.dumps( + obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") + + +# ----- Signing (HMAC-SHA256 baseline per §10) ----- + +def derive_session_key(input_key_material: bytes, session_id: str) -> bytes: + """HKDF-SHA256 with session_id as `info`, no salt. + + Spec §10: 'per-session HMAC key is HKDF-derived from deployment-provided + input keying material (a pre-shared secret, or a transport channel + binding such as a TLS exporter) together with the session_id.' + """ + # HKDF-Extract with empty salt + prk = hmac.new(b"\x00" * 32, input_key_material, hashlib.sha256).digest() + # HKDF-Expand to 32 bytes with info = session_id + info = session_id.encode("utf-8") + t = hmac.new(prk, info + b"\x01", hashlib.sha256).digest() + return t # one 32-byte block is enough for HMAC-SHA256 + + +class SecretFilePermissionsError(RuntimeError): + """ACS_HMAC_SECRET_FILE exists but its permissions / ownership leak the secret.""" + + +def _check_secret_file_perms(path: str) -> None: + """Refuse to read the secret file if anything about its mode or + ownership would expose the key to another local user.""" + # Reject symlinks — a symlink is an attack vector (replace target + # without changing the visible path). + if os.path.islink(path): + raise SecretFilePermissionsError( + f"ACS_HMAC_SECRET_FILE {path!r} is a symlink; refusing to follow") + st = os.stat(path) + mode = stat.S_IMODE(st.st_mode) + if mode & 0o077: + raise SecretFilePermissionsError( + f"ACS_HMAC_SECRET_FILE {path!r} mode {oct(mode)} is too permissive; " + f"must be 0600 or 0400 (no group/other access). " + f"Fix: chmod 600 {path}") + if hasattr(os, "geteuid") and st.st_uid != os.geteuid(): + raise SecretFilePermissionsError( + f"ACS_HMAC_SECRET_FILE {path!r} owned by uid {st.st_uid}, " + f"adapter is running as uid {os.geteuid()}; refusing") + + +def load_hmac_secret() -> bytes: + """Read the HMAC input keying material. + + Resolution order (first hit wins): + 1. `ACS_HMAC_SECRET_FILE` — path to a file containing the secret. + Preferred for production. Permissions MUST be 0600 (or 0400) and + the file MUST be owned by the running user. The file's content + (stripped of trailing whitespace) is the secret. Symlinks are + rejected. Insecure permissions raise SecretFilePermissionsError — + the adapter refuses to use a leaked secret rather than silently + proceed. + 2. `ACS_HMAC_SECRET` — env-var fallback. Quick for dev, less secure + (visible in `ps eauxw`, child-process envs, core dumps). + 3. Empty bytes — caller decides whether that means dev-mode or fail. + + Generate a secret: `openssl rand -hex 32 > /etc/acs/hmac.key && chmod 600 /etc/acs/hmac.key` + """ + path = os.environ.get("ACS_HMAC_SECRET_FILE", "").strip() + if path: + _check_secret_file_perms(path) + try: + with open(path, "rb") as f: + return f.read().rstrip(b"\r\n\t ") + except OSError: + return b"" + env_val = os.environ.get("ACS_HMAC_SECRET", "") + return env_val.encode("utf-8") if env_val else b"" + + +# Back-compat alias for internal callers. +_signing_secret = load_hmac_secret + + +# ----- URL scheme allowlist (defends against SSRF / file:// disclosure) ----- + +_ALLOWED_GUARDIAN_SCHEMES = frozenset({"http", "https"}) + + +def validate_guardian_url(url: str) -> None: + """Reject Guardian URLs whose scheme is not http/https. + + urllib.request.urlopen happily accepts file://, ftp://, data://, etc. + An attacker who controls ACS_GUARDIAN_URL could use file:// to read + arbitrary files the adapter user has access to, or data:// to feed + a crafted response. The adapter and any other code POSTing to the + Guardian MUST call this before urlopen. + + Optionally also restricts the hostname against an operator-provided + `ACS_GUARDIAN_HOST_ALLOWLIST` (comma-separated). Defense in depth + against env-var attacks that smuggle a real http:// URL to an + internal service the adapter shouldn't reach. + """ + from urllib.parse import urlparse + parsed = urlparse(url) + if parsed.scheme.lower() not in _ALLOWED_GUARDIAN_SCHEMES: + raise ValueError( + f"Guardian URL scheme {parsed.scheme!r} not allowed; " + f"only {sorted(_ALLOWED_GUARDIAN_SCHEMES)} permitted") + allow = os.environ.get("ACS_GUARDIAN_HOST_ALLOWLIST", "").strip() + if allow: + allowed_hosts = {h.strip().lower() for h in allow.split(",") if h.strip()} + host = (parsed.hostname or "").lower() + if host not in allowed_hosts: + raise ValueError( + f"Guardian host {host!r} not in ACS_GUARDIAN_HOST_ALLOWLIST " + f"({sorted(allowed_hosts)})") + + +# ----- Bounded regex scanning (defends against regex DoS / input bombing) ----- + +def scan_destructive_bash_safely(cmd: str, *, max_len: int = DESTRUCTIVE_SCAN_MAX_LEN): + """Run destructive-pattern regex scanning ONLY if cmd is below max_len. + + Returns: + None — command is short and matched no destructive pattern + a re.Pattern — command matched (caller decides what to do) + "input_too_large" — command exceeds max_len; caller MUST treat as + suspicious and MUST NOT silently allow (skipping the scan is + not the same as the scan returning "safe"). + + Caller pattern set is loaded lazily from example_guardian.DESTRUCTIVE_BASH_PATTERNS + to keep the canonical pattern set in one place. + """ + if len(cmd) > max_len: + audit_event("destructive_scan_skipped_oversized", + cmd_length=len(cmd), max_len=max_len) + return "input_too_large" + # Lazy import — _common doesn't directly own the pattern set, the + # Guardian does. Adapters that want to run the scan themselves can + # call this; the Guardian uses its own DESTRUCTIVE_BASH_PATTERNS + # directly. We import here so this function is callable without + # forcing example-guardian onto every adapter's path. + try: + eg_path = str(Path(__file__).resolve().parent.parent / "example-guardian") + if eg_path not in sys.path: + sys.path.insert(0, eg_path) + import example_guardian # type: ignore[import-not-found] + for pat in example_guardian.DESTRUCTIVE_BASH_PATTERNS: + if pat.search(cmd): + return pat + return None + except ImportError: + return None + + +def sign_envelope(envelope: dict, *, key: bytes | None = None, + session_id: str | None = None, key_id: str = "default") -> dict: + """Add a signature to the envelope. Returns the envelope unchanged + if no key material is available (caller's responsibility to log).""" + if key is None: + ikm = _signing_secret() + if not ikm: + return envelope + if not session_id: + params = envelope.get("params") or envelope.get("result") or {} + meta = params.get("metadata") or {} + session_id = meta.get("session_id") or params.get("request_id") or "" + key = derive_session_key(ikm, session_id) + + # Find where to put the signature: params for requests, result for responses + container_key = "params" if "method" in envelope else "result" + container = envelope.get(container_key, {}) + # Strip any existing signature before signing + unsigned_container = {k: v for k, v in container.items() if k != "signature"} + unsigned_envelope = {**envelope, container_key: unsigned_container} + + sig_bytes = hmac.new(key, jcs_canonicalize(unsigned_envelope), hashlib.sha256).digest() + import base64 + container["signature"] = { + "algorithm": "HMAC-SHA256", + "value": base64.b64encode(sig_bytes).decode("ascii"), + "key_id": key_id, + } + envelope[container_key] = container + return envelope + + +def verify_signature(envelope: dict, *, key: bytes | None = None, + session_id: str | None = None) -> bool: + """Verify a signed envelope. Returns True if valid (or if no signature + present and no key material configured — local-dev mode).""" + container_key = "params" if "method" in envelope else "result" + container = envelope.get(container_key) or {} + sig = container.get("signature") + if sig is None: + # No signature on the wire; valid only if local-dev (no key configured) + return not bool(_signing_secret()) + + if sig.get("algorithm") != "HMAC-SHA256": + return False + expected_b64 = sig.get("value") + if not expected_b64: + return False + + if key is None: + ikm = _signing_secret() + if not ikm: + # Signature present but no key configured: cannot verify + return False + meta = container.get("metadata") or {} + session_id = session_id or meta.get("session_id") or container.get("request_id") or "" + key = derive_session_key(ikm, session_id) + + unsigned_container = {k: v for k, v in container.items() if k != "signature"} + unsigned_envelope = {**envelope, container_key: unsigned_container} + expected_bytes = hmac.new(key, jcs_canonicalize(unsigned_envelope), hashlib.sha256).digest() + import base64 + import binascii + # Malformed base64 (truncation, garbage chars, non-string) must NOT + # crash the request path — that turns a bad signature into a 500 / + # uncaught exception instead of the spec's SIGNATURE_INVALID + # (-32004) response. Return False so the caller emits the right + # error code and the audit event carries cause=signature_invalid_*. + try: + provided_bytes = base64.b64decode(expected_b64, validate=True) + except (binascii.Error, ValueError, TypeError): + return False + return hmac.compare_digest(expected_bytes, provided_bytes) + + +# ----- JSON-RPC error code → audit cause label ----- +# +# Adapters use this when the Guardian returns a JSON-RPC `error` response +# (as opposed to a transport failure). Separating "Guardian rejected this +# envelope" from "I couldn't reach the Guardian" is load-bearing for +# operator triage — same fail-posture under §6.4, completely different +# remediation. Codes are the §17.1 / JSON-RPC reserved set. +GUARDIAN_ERROR_CAUSE: dict[int, str] = { + -32001: "unsupported_version_response", + -32002: "provenance_required_response", + -32004: "signature_invalid_response", # adapter or operator bug + -32005: "replay_detected_response", # duplicate request_id + -32006: "timestamp_out_of_window_response", # clock skew + -32600: "malformed_envelope_response", # non-conformant envelope + -32700: "parse_error_response", +} + + +def guardian_error_cause(code: int | None) -> str: + """Resolve a JSON-RPC error code to a stable audit cause label. + + Returns the generic 'guardian_error_response' for unrecognized codes + so audit consumers always have a non-empty cause string.""" + if code is None: + return "guardian_error_response" + return GUARDIAN_ERROR_CAUSE.get(code, "guardian_error_response") + + +# ----- Time + IDs ----- + +def iso8601_now() -> str: + return ( + datetime.datetime.now(datetime.timezone.utc) + .isoformat(timespec="milliseconds") + .replace("+00:00", "Z") + ) + + +def coerce_uuid(raw: str | None, *, namespace_prefix: str = "acs") -> str: + if not raw: + return str(uuid.uuid4()) + try: + return str(uuid.UUID(raw)) + except (ValueError, AttributeError, TypeError): + return str(uuid.uuid5(uuid.NAMESPACE_URL, f"{namespace_prefix}:{raw}")) + + +def parse_iso8601(ts: str) -> datetime.datetime: + return datetime.datetime.fromisoformat(ts.replace("Z", "+00:00")) + + +# ----- Audit events (§6.4 fail-open recording) ----- + +def audit_event(event_type: str, **fields: Any) -> None: + """Emit a structured audit-event line to stderr. + + §6.4: 'Every step that proceeds without a decision MUST be recorded + as an audit event, so the bypass is visible rather than silent.' + + Deployments redirect or parse the `ACS_AUDIT` prefix to feed a real + audit sink. The line is single-line JSON for trivial line-oriented + ingestion. + """ + payload = { + "acs_audit_event": event_type, + "timestamp": iso8601_now(), + **fields, + } + sys.stderr.write("ACS_AUDIT " + json.dumps(payload, sort_keys=True) + "\n") + sys.stderr.flush() + + +# ----- Handshake (§4) ----- + +_HANDSHAKE_CACHE_DIR = Path( + os.environ.get( + "ACS_HANDSHAKE_CACHE", + os.path.join(os.path.expanduser("~"), ".cache", "acs-adapter-handshake"), + ) +) + + +def _handshake_cache_path(session_id: str, guardian_url: str) -> Path: + # Full SHA-256 (no truncation) for the same reason as state files — + # avoid birthday collisions across the deployment's lifetime. + key = hashlib.sha256((session_id + "|" + guardian_url).encode()).hexdigest() + return _HANDSHAKE_CACHE_DIR / f"{key}.json" + + +# Cache TTL — default 1 hour. A Guardian config change (new +# skew_window_ms, new accepted profiles) propagates to adapters within +# this window. Override with ACS_HANDSHAKE_CACHE_TTL_SECONDS. +_HANDSHAKE_CACHE_TTL_S = int(os.environ.get("ACS_HANDSHAKE_CACHE_TTL_SECONDS", "3600")) + + +def ensure_session_handshake( + *, + guardian_url: str, + session_id: str, + agent_id: str, + platform: str, + methods_implemented: list[str], + wrapped_protocols: list[str] | None = None, + timeout: float = 5.0, +) -> dict | None: + """Idempotently ensure a handshake/hello has happened for this session. + + Spec contract (§4): handshake is REQUIRED at session start, ONCE + per session, not per event. The shell-stdin adapters + (claude-code, cursor) spawn a fresh process per hook event, so we + persist the negotiated ServerHello in a small JSON file under + `~/.cache/acs-adapter-handshake/.json`: + + - First event of a session: cache miss → POSTs ClientHello, + receives ServerHello, writes cache file, returns ServerHello. + - Subsequent events same session: cache hit (file fresh, < 1h + old by default) → reads file, returns cached ServerHello. + NO network call. + - Cache files older than the TTL are ignored so operator + Guardian-config changes propagate. + + Returns the ServerHello (cached or freshly fetched), or None on + failure (Guardian unreachable, etc.) — adapters fall to their + startup posture in that case (§4.1). + + Function-name rationale: previously `do_handshake`, which + misleadingly read as 'POST every call'. The cache short-circuit + makes this an ensure-once, so the name says so. + """ + cache = _handshake_cache_path(session_id, guardian_url) + if cache.exists(): + try: + mtime = cache.stat().st_mtime + if (time.time() - mtime) <= _HANDSHAKE_CACHE_TTL_S: + with open(cache) as f: + return json.load(f) + # Else: cache is stale, fall through to re-handshake + except (json.JSONDecodeError, OSError): + pass + + client_hello = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": "handshake/hello", + "params": { + "acs_version": ACS_VERSION, + "request_id": str(uuid.uuid4()), + "timestamp": iso8601_now(), + "metadata": {"agent_id": agent_id, "session_id": session_id, "platform": platform}, + "payload": { + "acs_versions_supported": [ACS_VERSION], + "methods_implemented": methods_implemented, + "transports_supported": ["http", "stdio"], + "max_payload_size_bytes": 1_000_000, + "provenance_producer": "none", + "wrapped_protocols": wrapped_protocols or [], + "profiles_supported": ["acs-core"], + "signature_algorithms_supported": ( + ["HMAC-SHA256"] if _signing_secret() else [] + ), + }, + }, + } + sign_envelope(client_hello, session_id=session_id) + try: + validate_guardian_url(guardian_url) + except ValueError: + return None + try: + body = json.dumps(client_hello).encode("utf-8") + req = urllib.request.Request( + guardian_url, data=body, + headers={"Content-Type": "application/json"}, method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + response = json.loads(resp.read().decode("utf-8")) + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError, ValueError): + return None + + result = response.get("result") or {} + server_hello = result.get("payload") + if server_hello: + try: + _HANDSHAKE_CACHE_DIR.mkdir(parents=True, exist_ok=True, mode=0o700) + try: + os.chmod(_HANDSHAKE_CACHE_DIR, 0o700) + except OSError: + pass + fd = os.open(str(cache), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as f: + json.dump(server_hello, f) + except OSError: + pass + return server_hello + + +# ----- system/ping (§13) ----- + +def ping(guardian_url: str, *, echo: str = "ping", timeout: float = 2.0) -> dict | None: + """Send a system/ping and return the result, or None on failure. + + Per §13: Guardian MUST always return allow; ping does not participate + in the chain; no signature required. + """ + request = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": "system/ping", + "params": { + "acs_version": ACS_VERSION, + "request_id": str(uuid.uuid4()), + "timestamp": iso8601_now(), + "metadata": {"agent_id": "ping-client", "session_id": str(uuid.uuid4())}, + "payload": {"echo": echo}, + }, + } + try: + validate_guardian_url(guardian_url) + except ValueError: + return None + try: + body = json.dumps(request).encode("utf-8") + req = urllib.request.Request( + guardian_url, data=body, + headers={"Content-Type": "application/json"}, method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")).get("result") + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError, ValueError): + return None + + +# ----- Per-session adapter state (cross-invocation) ----- +# +# Shell-stdin adapters (claude-code, cursor) spawn one process per hook +# event. To accumulate state across events in the same session (last +# step_id, step_ids seen, subagent registry, etc.) the adapter persists +# a small JSON file in the cache directory. + +_SESSION_STATE_DIR = Path( + os.environ.get( + "ACS_SESSION_STATE_DIR", + os.path.join(os.path.expanduser("~"), ".cache", "acs-adapter-session"), + ) +) + + +def _session_state_path(session_id: str, *, workspace: str | None = None) -> Path: + """Path to the per-session state file. + + Hash key is full 64-char SHA-256 (not [:16]) to eliminate birthday + collisions over the lifetime of a deployment. When `workspace` is + given, it is folded into the hash so two clients with the same + session_id but different workspaces (e.g., two Cursor windows + using `conv-default` as conversation_id) get distinct state files. + """ + if not session_id: + # Empty session_id — return a path that won't collide with anything real + digest = hashlib.sha256(b"empty").hexdigest() + return _SESSION_STATE_DIR / f"{digest}.json" + if workspace: + digest = hashlib.sha256( + (workspace + "\x00" + session_id).encode() + ).hexdigest() + else: + digest = hashlib.sha256(session_id.encode()).hexdigest() + return _SESSION_STATE_DIR / f"{digest}.json" + + +def load_session_state(session_id: str, *, workspace: str | None = None) -> dict: + """Return the session-state dict for `session_id`, or an empty dict. + + See `_session_state_path` for the workspace-namespacing rationale. + """ + if not session_id: + return {} + path = _session_state_path(session_id, workspace=workspace) + try: + with open(path) as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + return {} + + +def save_session_state(session_id: str, state: dict, *, workspace: str | None = None) -> None: + """Persist the session-state dict atomically. No-op on session_id empty. + + The directory is created with mode 0700 and the file with mode 0600 + so other local users cannot read or poison adapter state. State + files contain step_id histories that an attacker could use to spoof + `parent_step_id` in subagentStart payloads — a security boundary + for the chain integrity properties of §8. + """ + if not session_id: + return + try: + _SESSION_STATE_DIR.mkdir(parents=True, exist_ok=True, mode=0o700) + # mkdir(exist_ok=True) does not chmod an existing dir — enforce explicitly + try: + os.chmod(_SESSION_STATE_DIR, 0o700) + except OSError: + pass + path = _session_state_path(session_id, workspace=workspace) + tmp = path.with_suffix(".json.tmp") + # Open with 0o600 from the start so the file is never group/world-readable + fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as f: + json.dump(state, f) + os.replace(tmp, path) + try: + os.chmod(path, 0o600) + except OSError: + pass + except OSError: + pass + + +def record_step(session_id: str, step_id: str, *, workspace: str | None = None) -> None: + """Append step_id to the session's seen-list and update last_step_id.""" + if not session_id or not step_id: + return + st = load_session_state(session_id, workspace=workspace) + seen = st.setdefault("seen_step_ids", []) + if step_id not in seen: + seen.append(step_id) + # Bound the list so it doesn't grow unbounded across long sessions + if len(seen) > 1000: + del seen[: len(seen) - 1000] + st["last_step_id"] = step_id + save_session_state(session_id, st, workspace=workspace) + + +# ----- Sys-path bootstrap for adapters in sibling directories ----- + +def install_path_for_sibling() -> None: + """Convenience: ensures the _common dir is on sys.path so adapters in + sibling directories can `from acs_common import ...`. No-op if + already on path.""" + here = str(Path(__file__).resolve().parent) + if here not in sys.path: + sys.path.insert(0, here) + + +# ----- Back-compat alias ----- +# `do_handshake` was the original name. Renamed to make the cache +# short-circuit visible at call sites. Old name kept so out-of-tree +# adapter forks aren't broken by the rename. +do_handshake = ensure_session_handshake diff --git a/adapters/_common/e2e_report.py b/adapters/_common/e2e_report.py new file mode 100644 index 0000000..c9bea2c --- /dev/null +++ b/adapters/_common/e2e_report.py @@ -0,0 +1,175 @@ +"""Shared e2e-check infrastructure: pretty-printer + recording-Guardian +helpers used by every adapter's `e2e_check.py`. + +Originally duplicated in three places (cursor / claude-code / nat +e2e_check files). Lifted here so a change to scenario-output format, +schema-validation sub-checks, or the real-policy handler hits every +adapter at once and adapters can't drift. + +Adapter-specific scenarios stay in each adapter's own e2e_check.py +(they encode framework-specific hook shapes, prompts, and assertions +that don't generalize).""" +from __future__ import annotations + +import json +from typing import Any, Callable + +# ANSI escapes; identical across every adapter's e2e printer. +CHECK = "✓" +CROSS = "✗" +PASS_TXT = "\033[1;32mPASS\033[0m" +FAIL_TXT = "\033[1;31mFAIL\033[0m" +BOLD = "\033[1m" +RESET = "\033[0m" + + +class Report: + """Render scenario PASS/FAIL output and a final summary. + + Use: + report = Report() + report.print_header(...adapter-specific lines...) + report.case(1, total, "ALLOW — ...") + report.field("Marker:", marker) + report.sub("Function executed", ok, "count=1") + report.finish("allow-path", all_passed) + return 0 if report.summary("YOUR INSTALL IS ACS-CONFORMANT") else 1 + """ + + def __init__(self) -> None: + self.entries: list[tuple[str, bool]] = [] + + def print_header(self, *lines: str, width: int = 70) -> None: + bar = "═" * width + print(bar) + for line in lines: + print(f" {line}") + print(bar) + print() + print("─" * width) + print() + + def case(self, num: int, total: int, title: str) -> None: + print(f"[{num}/{total}] {BOLD}{title}{RESET}") + + def field(self, label: str, value: str) -> None: + print(f" {label:12s} {value}") + + def sub(self, label: str, ok: bool, detail: str = "") -> None: + mark = CHECK if ok else CROSS + line = f" {mark} {label}" + if detail: + line += f" ({detail})" + print(line) + + def json_block(self, label: str, obj: Any, *, truncate: int = 200) -> None: + rendered = json.dumps(self._trim(obj, truncate), indent=2, sort_keys=True) + rendered = "\n".join(" " + ln for ln in rendered.splitlines()) + print(f" ── {label}") + print(rendered) + + def quote_block(self, label: str, text: str, *, max_chars: int = 400) -> None: + text = text.strip() + if len(text) > max_chars: + text = text[:max_chars] + f"\n[…+{len(text) - max_chars} more chars truncated]" + wrapped = "\n".join(" " + ln for ln in text.splitlines()) + print(f" ── {label}") + print(wrapped) + + def operator_action(self, instructions: list[str]) -> None: + """Wait for the operator to perform the listed steps and press Enter. + Cursor uses this (GUI in the loop); Claude/NAT do not.""" + import sys + print(f" {BOLD}── ACTION REQUIRED ──{RESET}") + for i, line in enumerate(instructions, 1): + print(f" {i}. {line}") + print() + try: + input(f" {BOLD}Press Enter when done (or Ctrl-C to abort):{RESET} ") + except (EOFError, KeyboardInterrupt): + print() + print(" Aborted.") + sys.exit(1) + print() + + def _trim(self, obj: Any, n: int) -> Any: + if isinstance(obj, str): + return obj if len(obj) <= n else obj[:n] + f"…(+{len(obj) - n} chars)" + if isinstance(obj, dict): + return {k: self._trim(v, n) for k, v in obj.items()} + if isinstance(obj, list): + return [self._trim(v, n) for v in obj] + return obj + + def finish(self, title: str, ok: bool) -> None: + verdict = PASS_TXT if ok else FAIL_TXT + print(f" Result {verdict}") + print() + self.entries.append((title, ok)) + + def summary(self, success_banner: str, *, width: int = 70) -> bool: + bar = "═" * width + passed = sum(1 for _, ok in self.entries if ok) + total = len(self.entries) + print(bar) + if passed == total: + print(f" Summary: {passed}/{total} scenarios passed — " + f"\033[1;32m{success_banner}\033[0m") + else: + print(f" Summary: {passed}/{total} scenarios passed — " + f"\033[1;31mFAILURES BELOW\033[0m") + for title, ok in self.entries: + if not ok: + print(f" {CROSS} {title}") + print(bar) + return passed == total + + +def real_policy_handler(evaluate_step: Callable) -> Callable[[dict], dict]: + """Wrap `example_guardian.evaluate_step` (or any compatible policy + function with the same 4-arg signature) as a ProgrammableGuardian + `handlers["__default__"]` callable. + + Lets every e2e_check install the real shipping policy with one line: + + guardian.handlers["__default__"] = real_policy_handler(evaluate_step) + + Previously each adapter rebuilt this 14-line wrapper inline.""" + def handler(req: dict) -> dict: + method = req.get("method", "") + params = req.get("params") or {} + request_id = params.get("request_id", "") + chain_hash = params.get("chain_hash", "0" * 64) + return evaluate_step(method, params, request_id, chain_hash) + return handler + + +def assert_envelopes_signed_and_valid(guardian: Any, + validate_request_envelope: Callable, + sub_results: list) -> None: + """Append the two wire-correctness sub-checks every scenario needs: + HMAC signing + canonical-schema validation. The schema validator + pulls request-envelope.json from disk — so failure here means + adapter ↔ spec drift, not adapter ↔ fixture drift (the failure + mode Rock pointed out on PR #22). + + `sub_results` is mutated in place. Caller passes a `validate_request_envelope` + so this helper stays free of test_harness imports — keeps adapter + dependencies straight.""" + signed_envs = [r for r in guardian.received + if r.get("params", {}).get("signature", {}).get("algorithm") == "HMAC-SHA256"] + all_signed = (len(signed_envs) == len(guardian.received)) + sub_results.append(("Every envelope is HMAC-SHA256 signed", + all_signed, + f"{len(signed_envs)}/{len(guardian.received)}")) + schema_errors: list = [] + for r in guardian.received: + if r.get("method") in ("handshake/hello", "system/ping"): + continue + errs = validate_request_envelope(r) + if errs: + schema_errors.append((r.get("method"), errs[0])) + sub_results.append(("Every envelope validates against canonical schema", + not schema_errors, + "no errors" if not schema_errors + else f"{len(schema_errors)} envelopes failed: {schema_errors[0]}")) diff --git a/adapters/_common/test_harness.py b/adapters/_common/test_harness.py new file mode 100644 index 0000000..d773f00 --- /dev/null +++ b/adapters/_common/test_harness.py @@ -0,0 +1,460 @@ +""" +Shared test harness for ACS adapter tests. + +Test files across `adapters/` (and `adapters//tests/`) have +historically duplicated ~50 lines of boilerplate each: free-port +allocation, Guardian-spawn waiting, envelope construction, schema +validation, ref-resolver setup. This module is the single home for +those helpers — import what you need rather than redefining. + +Usage from any test file: + + import sys + from pathlib import Path + sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "_common")) + from test_harness import ( + free_port, wait_port, + make_envelope, validate_request_envelope, validate_response_envelope, + spawn_guardian, ProgrammableGuardian, + ) + +The harness has no opinion on `unittest.TestCase` vs other runners — +it's pure functions and a context manager. Plug it into whatever +framework you're using. + +The harness exists today (created with the spec_compliance cleanup); +older test files still have inline duplicates. Migration is opt-in: +when you next touch a test file, swap its inline `_free_port` / +`_wait` / `_make_envelope` for imports from here. +""" +from __future__ import annotations + +import contextlib +import datetime +import json +import os +import socket +import subprocess +import sys +import tempfile +import threading +import time +import urllib.error +import urllib.request +import uuid +from pathlib import Path +from typing import Any, Callable, Iterator + +# Bootstrap acs_common from the sibling location so callers don't need to +# import both manually. +_HERE = Path(__file__).resolve().parent +if str(_HERE) not in sys.path: + sys.path.insert(0, str(_HERE)) +import acs_common # noqa: E402 + + +# ──────────────────────────────────────────────────────────────────── +# Port + readiness +# ──────────────────────────────────────────────────────────────────── + +def free_port() -> int: + """Return an unused TCP port on 127.0.0.1.""" + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +def wait_port(host: str, port: int, *, timeout: float = 5.0) -> None: + """Block until something is listening on host:port, or raise.""" + deadline = time.time() + timeout + while time.time() < deadline: + try: + with socket.create_connection((host, port), timeout=0.2): + return + except OSError: + time.sleep(0.05) + raise RuntimeError(f"server not up on {host}:{port} after {timeout}s") + + +# ──────────────────────────────────────────────────────────────────── +# Envelope construction +# ──────────────────────────────────────────────────────────────────── + +def make_envelope( + method: str, + payload: dict | None = None, + *, + session_id: str | None = None, + request_id: str | None = None, + timestamp: str | None = None, + agent_id: str = "test", + platform: str = "test", + sign_with_secret: str | bytes | None = None, +) -> dict: + """Build a canonical request envelope ready for the wire. + + Pass `sign_with_secret` to attach an HMAC-SHA256 signature using + the same HKDF-per-session-key derivation the real adapters use. + Pass None to leave the envelope unsigned (used for handshake/hello, + system/ping, or to test the unsigned-rejection path). + """ + sid = session_id or str(uuid.uuid4()) + env = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": method, + "params": { + "acs_version": "0.1.0", + "request_id": request_id or str(uuid.uuid4()), + "timestamp": timestamp or acs_common.iso8601_now(), + "metadata": { + "agent_id": agent_id, + "session_id": sid, + "platform": platform, + }, + "payload": payload or {}, + }, + } + if sign_with_secret is not None: + secret = sign_with_secret.encode() if isinstance(sign_with_secret, str) else sign_with_secret + key = acs_common.derive_session_key(secret, sid) + acs_common.sign_envelope(env, key=key, session_id=sid) + return env + + +def claude_code_event(hook_event_name: str, *, session_id: str | None = None, + **extra: Any) -> dict: + """Build a Claude Code hook event matching the framework's stdin schema. + + Use this for `subprocess`-spawning adapter tests so every fixture + has the same shape. + """ + base = { + "session_id": session_id or str(uuid.uuid4()), + "transcript_path": "/tmp/test_transcript.jsonl", + "cwd": "/tmp/test_work", + "hook_event_name": hook_event_name, + } + if hook_event_name in ("PreToolUse", "PostToolUse", "Stop"): + base["permission_mode"] = "default" + base.update(extra) + return base + + +# ──────────────────────────────────────────────────────────────────── +# Schema validation +# ──────────────────────────────────────────────────────────────────── + +def _default_spec_dir() -> Path: + """Resolve the canonical schema directory. Override with ACS_SPEC_DIR.""" + return Path(os.environ.get( + "ACS_SPEC_DIR", "/tmp/acs-spec-source/specification/v0.1.0")) + + +def build_local_resolver(schema_name: str, *, spec_dir: Path | None = None): + """Return (schema_dict, RefResolver) for `schema_name` under spec_dir. + + Populates the resolver's `store` so `$ref` to sibling schemas + resolves locally (no network round-trip to acs.org). + """ + from jsonschema.validators import RefResolver + spec_dir = spec_dir or _default_spec_dir() + store: dict[str, dict] = {} + for path in spec_dir.glob("*.json"): + try: + doc = json.loads(path.read_text()) + except (OSError, json.JSONDecodeError): + continue + if "$id" in doc: + store[doc["$id"]] = doc + store[path.as_uri()] = doc + hooks_dir = spec_dir / "hooks" + if hooks_dir.exists(): + for path in hooks_dir.glob("*.json"): + try: + doc = json.loads(path.read_text()) + except (OSError, json.JSONDecodeError): + continue + if "$id" in doc: + store[doc["$id"]] = doc + store[path.as_uri()] = doc + + schema_path = spec_dir / schema_name + schema = json.loads(schema_path.read_text()) + resolver = RefResolver( + base_uri=schema_path.as_uri(), + referrer=schema, + store=store, + ) + return schema, resolver + + +def _validate(envelope: dict, schema_name: str, + spec_dir: Path | None = None) -> list[str]: + from jsonschema import Draft202012Validator + schema, resolver = build_local_resolver(schema_name, spec_dir=spec_dir) + validator = Draft202012Validator( + schema, resolver=resolver, + format_checker=Draft202012Validator.FORMAT_CHECKER, + ) + return [ + f"{'.'.join(str(p) for p in err.absolute_path) or ''}: {err.message}" + for err in validator.iter_errors(envelope) + ] + + +def validate_request_envelope(envelope: dict, *, + spec_dir: Path | None = None) -> list[str]: + """Validate against request-envelope.json. Returns list of error + messages; empty list means valid.""" + return _validate(envelope, "request-envelope.json", spec_dir=spec_dir) + + +def validate_response_envelope(envelope: dict, *, + spec_dir: Path | None = None) -> list[str]: + """Validate against response-envelope.json. Returns list of error + messages; empty list means valid.""" + return _validate(envelope, "response-envelope.json", spec_dir=spec_dir) + + +def validate_hook_payload(payload: dict, hook_schema: str, *, + spec_dir: Path | None = None) -> list[str]: + """Validate a hook payload (e.g., `hooks/tool-call-request.json`). + Returns list of error messages; empty list means valid.""" + return _validate(payload, f"hooks/{hook_schema}", spec_dir=spec_dir) + + +# ──────────────────────────────────────────────────────────────────── +# HTTP helpers +# ──────────────────────────────────────────────────────────────────── + +def post_envelope(url: str, envelope: dict, *, timeout: float = 5.0) -> dict: + """POST an envelope to a Guardian URL and return the parsed response. + + Raises urllib errors if the Guardian is unreachable — callers can + catch to test fail-posture behavior. + """ + body = json.dumps(envelope).encode("utf-8") + req = urllib.request.Request( + url, data=body, + headers={"Content-Type": "application/json"}, method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")) + + +# ──────────────────────────────────────────────────────────────────── +# Guardian lifecycle +# ──────────────────────────────────────────────────────────────────── + +GUARDIAN_SCRIPT = _HERE.parent / "example-guardian" / "example_guardian.py" + + +@contextlib.contextmanager +def spawn_guardian(*, port: int | None = None, + hmac_secret: str | None = None, + dev_mode: bool | None = None, + state_dir: str | None = None, + extra_env: dict | None = None) -> Iterator[tuple[subprocess.Popen, str]]: + """Spawn an example_guardian subprocess; yield (process, url); clean up. + + Usage: + with spawn_guardian(hmac_secret="test") as (proc, url): + resp = post_envelope(url, env) + ... + """ + port = port or free_port() + env = os.environ.copy() + if hmac_secret is not None: + env["ACS_HMAC_SECRET"] = hmac_secret + env.pop("ACS_DEV_MODE", None) + else: + env["ACS_DEV_MODE"] = "1" if dev_mode is not False else "0" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + if state_dir: + env["ACS_GUARDIAN_STATE_DIR"] = state_dir + else: + # Default to an ephemeral state dir so tests don't cross-contaminate + env["ACS_GUARDIAN_STATE_DIR"] = tempfile.mkdtemp(prefix="acs-test-state-") + if extra_env: + env.update(extra_env) + + proc = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(port)], + env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + try: + wait_port("127.0.0.1", port) + yield proc, f"http://127.0.0.1:{port}/acs" + finally: + proc.terminate() + try: + proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + proc.kill() + # Clean ephemeral state dir if we created it + if not state_dir: + import shutil + shutil.rmtree(env["ACS_GUARDIAN_STATE_DIR"], ignore_errors=True) + + +# ──────────────────────────────────────────────────────────────────── +# Programmable Guardian — for tests that need to control responses. +# ──────────────────────────────────────────────────────────────────── +# +# `spawn_guardian` runs the REAL example_guardian process — useful for +# integration tests against the production code path. For unit tests +# that need to force specific dispositions (modify, ask, defer) or +# delays, use ProgrammableGuardian: an in-process HTTP server you can +# configure handler-by-handler. + +class ProgrammableGuardian: + """Test Guardian whose response can be programmed per method. + + Records every received request and every sent response so tests + can assert on the wire-level exchange. By default verifies HMAC + signatures (using TEST_HMAC_SECRET) and returns allow for every + method. Replace `handlers[method]` with a callable returning a + result dict (or an error dict with `code` + `message`) to customize. + """ + + DEFAULT_TEST_HMAC_SECRET = "shared-test-harness-secret-not-for-production" + + def __init__(self, *, hmac_secret: str | None = None, + sign_responses: bool = True) -> None: + import http.server + self.hmac_secret = hmac_secret or self.DEFAULT_TEST_HMAC_SECRET + self.sign_responses = sign_responses + self.port: int = free_port() + self.received: list[dict] = [] + self.sent: list[dict] = [] + self.lock = threading.Lock() + self.handlers: dict[str, Callable[[dict], dict]] = { + "handshake/hello": self._default_handshake, + "__default__": self._default_allow, + } + self.delay_s: float = 0.0 + self._http = http.server + self._server = self._http.HTTPServer( + ("127.0.0.1", self.port), self._make_handler_cls()) + self._thread = threading.Thread(target=self._server.serve_forever, daemon=True) + + def url(self) -> str: + return f"http://127.0.0.1:{self.port}/acs" + + def start(self) -> None: + self._thread.start() + wait_port("127.0.0.1", self.port) + + def stop(self) -> None: + self._server.shutdown() + self._server.server_close() + + def __enter__(self): + self.start() + return self + + def __exit__(self, *exc): + self.stop() + + def reset(self) -> None: + with self.lock: + self.received.clear() + self.sent.clear() + + def methods_seen(self) -> list[str]: + with self.lock: + return [r.get("method", "") for r in self.received] + + def last_envelope(self) -> dict | None: + with self.lock: + return self.received[-1] if self.received else None + + def envelopes_for(self, method: str) -> list[dict]: + with self.lock: + return [r for r in self.received if r.get("method") == method] + + # ─── Default handlers ─── + + def _default_handshake(self, req: dict) -> dict: + return { + "type": "final", "acs_version": "0.1.0", + "request_id": req["params"]["request_id"], + "decision": "allow", + "payload": { + "negotiated_version": "0.1.0", + "methods_evaluated": req["params"]["payload"].get("methods_implemented", []), + "selected_transport": "http", + "signature_algorithms_supported": ["HMAC-SHA256"], + "timeout_config": {"default_ms": 5000}, + "skew_window_ms": 300000, + "on_decision_failure": "proceed", + "profiles_accepted": ["acs-core"], + }, + } + + def _default_allow(self, req: dict) -> dict: + return { + "type": "final", "acs_version": "0.1.0", + "request_id": req["params"]["request_id"], + "decision": "allow", + "chain_hash": "0" * 64, + } + + def _make_handler_cls(self): + guardian = self + + class Handler(self._http.BaseHTTPRequestHandler): + def do_POST(self_h): # noqa: N802 + length = int(self_h.headers.get("Content-Length", "0")) + req = json.loads(self_h.rfile.read(length).decode()) + with guardian.lock: + guardian.received.append(req) + if guardian.delay_s > 0: + time.sleep(guardian.delay_s) + + method = req.get("method", "") + # handshake/hello and system/ping are signature-exempt + # per §4.1 and §13. Everything else MUST verify. + if method not in ("handshake/hello", "system/ping"): + sid = req.get("params", {}).get("metadata", {}).get("session_id", "") + key = acs_common.derive_session_key( + guardian.hmac_secret.encode(), sid) + if not acs_common.verify_signature(req, key=key, session_id=sid): + self_h._reply({ + "jsonrpc": "2.0", "id": req.get("id"), + "error": {"code": -32004, "message": "SIGNATURE_INVALID"}, + }) + return + + handler = guardian.handlers.get(method, guardian.handlers["__default__"]) + result_or_error = handler(req) + if "code" in result_or_error and "message" in result_or_error: + resp = {"jsonrpc": "2.0", "id": req.get("id"), + "error": result_or_error} + else: + resp = {"jsonrpc": "2.0", "id": req.get("id"), + "result": result_or_error} + if guardian.sign_responses: + sid = req["params"]["metadata"]["session_id"] + key = acs_common.derive_session_key( + guardian.hmac_secret.encode(), sid) + acs_common.sign_envelope(resp, key=key, session_id=sid) + self_h._reply(resp) + + def _reply(self_h, resp: dict): + with guardian.lock: + guardian.sent.append(resp) + body = json.dumps(resp).encode() + self_h.send_response(200) + self_h.send_header("Content-Type", "application/json") + self_h.send_header("Content-Length", str(len(body))) + self_h.end_headers() + self_h.wfile.write(body) + + def log_message(self_h, *a, **kw): + return + + return Handler diff --git a/adapters/_common/tests/__init__.py b/adapters/_common/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/adapters/_common/tests/test_edge_cases.py b/adapters/_common/tests/test_edge_cases.py new file mode 100644 index 0000000..a13bbc7 --- /dev/null +++ b/adapters/_common/tests/test_edge_cases.py @@ -0,0 +1,523 @@ +""" +Cross-cutting edge-case tests for the 12 items from the post-PR audit. + +Each test names the item, exercises the failure scenario, and asserts +the fix-side behavior. Tests are written first; fixes follow. +""" +from __future__ import annotations + +import json +import os +import socket +import stat +import subprocess +import sys +import tempfile +import threading +import time +import unittest +import urllib.error +import urllib.request +import uuid +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +import acs_common # noqa: E402 + +from test_harness import free_port as _free_port, wait_port as _wait # noqa: E402 + +HERE = Path(__file__).resolve().parent +GUARDIAN = HERE.parent.parent / "example-guardian" / "example_guardian.py" + + +# ===== #1: rfc8785 cross-install JCS consistency ===== + +class Item01_JcsConsistency(unittest.TestCase): + """The fallback `jcs_canonicalize` and the rfc8785 package must + produce identical bytes for every shape ACS envelopes carry. If + they differ, a deployment with rfc8785 on one side and not the + other gets signature-verification failures on every request.""" + + SAMPLES = [ + # Typical PreToolUse envelope + {"jsonrpc": "2.0", "id": "abc", "method": "steps/toolCallRequest", + "params": {"acs_version": "0.1.0", "request_id": "11111111-1111-4111-8111-111111111111", + "timestamp": "2026-06-17T12:00:00.000Z", + "metadata": {"agent_id": "x", "session_id": "11111111-1111-4111-8111-111111111111"}, + "payload": {"tool": {"name": "Bash"}, + "arguments": {"command": {"value": "ls -la"}}}}}, + # Empty object, empty array, null + {"a": {}, "b": [], "c": None, "d": True, "e": False}, + # Integers + negatives + zero + {"x": 0, "y": -1, "z": 123456789, "neg": -987654321}, + # Nested + ordered-keys check + {"z": 1, "a": 2, "m": {"y": [3, 1, 2], "x": "hi"}}, + # Unicode (BMP) + {"emoji": "🚀", "hebrew": "שלום", "ascii": "hi"}, + ] + + def test_fallback_matches_rfc8785_on_acs_envelope_shapes(self) -> None: + try: + import rfc8785 + except ImportError: + self.skipTest("rfc8785 not installed; can't compare") + for sample in self.SAMPLES: + fallback = json.dumps(sample, sort_keys=True, separators=(",", ":"), + ensure_ascii=False).encode("utf-8") + canonical = rfc8785.dumps(sample) + self.assertEqual(fallback, canonical, + f"JCS divergence between fallback and rfc8785 on {sample!r}:\n" + f" fallback : {fallback!r}\n" + f" rfc8785 : {canonical!r}\n" + "A deployment with mismatched JCS implementations would " + "fail every signed-envelope verification.") + + +# ===== #2: Guardian regex DoS — server-side input cap ===== + +class Item02_GuardianRegexInputCap(unittest.TestCase): + """The Guardian's destructive-bash regex must NOT scan arbitrarily- + large inputs. _common has scan_destructive_bash_safely with an 8KB + cap, but the Guardian's own code path was iterating patterns + directly — uncapped.""" + + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + # Use a unique state dir per run to avoid leftover replay state + cls.statedir = tempfile.mkdtemp(prefix="acs-guardian-state-") + env["ACS_GUARDIAN_STATE_DIR"] = cls.statedir + cls.proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + cls.url = f"http://127.0.0.1:{cls.port}/acs" + + @classmethod + def tearDownClass(cls) -> None: + cls.proc.terminate() + try: cls.proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: cls.proc.kill() + import shutil + shutil.rmtree(cls.statedir, ignore_errors=True) + + def test_huge_bash_command_is_capped_not_scanned(self) -> None: + """A 100 KiB bash command MUST not pin the Guardian on regex + backtracking. The Guardian denies it with a clear reason; does + NOT iterate the destructive-bash patterns over the full input.""" + # 500 KiB command; would burn CPU on naive regex matching + huge = "a " * (250 * 1024) + body = json.dumps({ + "jsonrpc": "2.0", "id": "ed1", + "method": "steps/toolCallRequest", + "params": { + "acs_version": "0.1.0", + "request_id": str(uuid.uuid4()), + "timestamp": acs_common.iso8601_now(), + "metadata": {"agent_id": "t", "session_id": str(uuid.uuid4())}, + "payload": {"tool": {"name": "Bash"}, + "arguments": {"command": {"value": huge}}}, + }, + }).encode() + # Send under a strict time bound + req = urllib.request.Request(self.url, data=body, + headers={"Content-Type": "application/json"}, method="POST") + start = time.monotonic() + with urllib.request.urlopen(req, timeout=10.0) as resp: + elapsed = time.monotonic() - start + data = json.loads(resp.read().decode()) + self.assertLess(elapsed, 1.0, + f"Guardian took {elapsed:.2f}s to handle a 500 KiB command — " + f"regex DoS gap not closed server-side") + # The Guardian should DENY the request rather than allow it + # (uncapped scan would have produced "allow" since `aaaaaa...` + # doesn't match destructive-bash patterns). + result = data.get("result", {}) + self.assertEqual(result.get("decision"), "deny", + f"Guardian must deny oversized bash command (cannot safely scan); " + f"got decision={result.get('decision')!r}, full={data}") + + +# ===== #4: TTL eviction on seen_request_ids ===== + +class Item04_ReplaySetTtlEviction(unittest.TestCase): + """seen_request_ids must be bounded. Without eviction, a long-running + session accumulates UUIDs forever — memory leak + bloated state file.""" + + def test_eviction_drops_entries_older_than_2x_skew_window(self) -> None: + sys.path.insert(0, str(GUARDIAN.parent)) + import example_guardian + sys.path.insert(0, str(GUARDIAN.parent.parent / "_common")) + # Create a fresh SessionState for this test + sid = f"ttl-test-{uuid.uuid4()}" + # Patch the state dir so this test doesn't pollute home + with tempfile.TemporaryDirectory() as tmp: + example_guardian.STATE_DIR = Path(tmp) + example_guardian.PERSIST_ENABLED = True + st = example_guardian.SessionState(session_id=sid) + # Inject an old request_id directly + old_ts = time.time() - (example_guardian.SKEW_WINDOW_MS / 1000) * 3 + recent_ts = time.time() + with st.lock: + # The fix wraps seen_request_ids as a dict of rid -> timestamp + # (or maintains an ordered structure with timestamps) + st.seen_request_ids = {} if isinstance(st.seen_request_ids, dict) else st.seen_request_ids + # If still a set, test fails the fix expectation + self.assertIsInstance(st.seen_request_ids, dict, + "FIX REQUIRED: seen_request_ids must become a dict " + "(request_id -> timestamp_seconds) so TTL eviction " + "can drop entries older than 2 × skew_window.") + st.seen_request_ids["old-rid"] = old_ts + st.seen_request_ids["new-rid"] = recent_ts + example_guardian.evict_old_request_ids(st) + self.assertNotIn("old-rid", st.seen_request_ids, + "old request_id not evicted") + self.assertIn("new-rid", st.seen_request_ids, + "recent request_id wrongly evicted") + + +# ===== #5: handshake cache TTL ===== + +class Item05_HandshakeCacheTtl(unittest.TestCase): + """Handshake cache must invalidate after a TTL. Operator config + changes (skew window, profiles) MUST not be served from stale cache.""" + + def test_fresh_cache_is_honored(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + os.environ["ACS_HANDSHAKE_CACHE"] = tmp + import importlib + importlib.reload(acs_common) + url = "http://127.0.0.1:1/dead" + fake_hello = {"negotiated_version": "0.1.0", "_synthetic": True} + cache_path = acs_common._handshake_cache_path("sess1", url) + cache_path.parent.mkdir(parents=True, exist_ok=True) + cache_path.write_text(json.dumps(fake_hello)) + # Recently-mtimed cache must be honored + result = acs_common.do_handshake( + guardian_url=url, session_id="sess1", + agent_id="x", platform="t", methods_implemented=[], + ) + self.assertEqual(result, fake_hello, + "fresh handshake cache must be honored to avoid the " + "per-process re-handshake overhead") + + def test_stale_cache_is_ignored(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + os.environ["ACS_HANDSHAKE_CACHE"] = tmp + import importlib + importlib.reload(acs_common) + url = "http://127.0.0.1:1/dead" + cache_path = acs_common._handshake_cache_path("sess2", url) + cache_path.parent.mkdir(parents=True, exist_ok=True) + cache_path.write_text(json.dumps({"_synthetic": True})) + # Backdate the cache file 2 hours + two_hours_ago = time.time() - 2 * 3600 + os.utime(cache_path, (two_hours_ago, two_hours_ago)) + result = acs_common.do_handshake( + guardian_url=url, session_id="sess2", + agent_id="x", platform="t", methods_implemented=[], + ) + # Cache stale → not honored → handshake attempted → fails (dead URL) → None + self.assertIsNone(result, + "stale handshake cache was honored — operator Guardian-config " + "changes would not propagate within the TTL") + + +# ===== #6: NAT id(context) collision — WeakKeyDictionary ===== + +class Item06_NatContextIdCollision(unittest.TestCase): + """When pre_invoke can't set an attr on context (frozen / weird type), + the fallback used uuid5(id(context)) — and Python recycles ids after + GC. Two distinct contexts could get the same uuid → collision.""" + + def test_distinct_frozen_contexts_get_distinct_uuids(self) -> None: + sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "nat")) + try: + import acs_adapter as nat_adapter + except ImportError: + self.skipTest("NAT adapter not importable in this env") + + class _StubConfig: + guardian_url = "http://127.0.0.1:8787/acs" + default_deny = False + session_id = "frozen-test" + timeout_s = 5.0 + target_function_or_group = "x" + target_location = "input" + + class FrozenContext: + """Mimics a NAT context that rejects attribute assignment.""" + __slots__ = ("function_context", "modified_kwargs") + def __init__(self): + class FC: name = "tool" + self.function_context = FC() + self.modified_kwargs = {} + + mw = nat_adapter.ACSMiddleware(_StubConfig()) + + ctx1 = FrozenContext() + rid1 = mw._correlation_request_id(ctx1) + # Free ctx1, let Python recycle the id + del ctx1 + import gc; gc.collect() + ctx2 = FrozenContext() + rid2 = mw._correlation_request_id(ctx2) + self.assertNotEqual(rid1, rid2, + f"FIX REQUIRED: two distinct (frozen) contexts produced the " + f"same request_id ({rid1}). id(ctx) recycles after GC; the " + f"frozen-context fallback must use a per-instance unique key " + f"(WeakKeyDictionary).") + + +# ===== #7: unicode / null / surrogate round-trip ===== + +class Item07_UnicodeRoundTrip(unittest.TestCase): + """Tool args with unicode, NULL bytes, and (where possible) lone + surrogates must sign+verify cleanly — otherwise emoji-heavy or + binary-ish args break the wire.""" + + def _make_envelope(self, value): + return { + "jsonrpc": "2.0", "id": "u1", "method": "steps/toolCallRequest", + "params": { + "acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001", + "timestamp": "2026-06-17T12:00:00.000Z", + "metadata": {"agent_id": "x", "session_id": "00000000-0000-4000-8000-000000000001"}, + "payload": {"tool": {"name": "Bash"}, + "arguments": {"v": {"value": value}}}, + }, + } + + def test_emoji_round_trip(self) -> None: + env = self._make_envelope("🚀 from agent — ✨") + key = acs_common.derive_session_key(b"test-secret", "sess") + acs_common.sign_envelope(env, key=key, session_id="sess") + self.assertTrue(acs_common.verify_signature(env, key=key, session_id="sess")) + + def test_null_byte_round_trip(self) -> None: + env = self._make_envelope("before\x00after") + key = acs_common.derive_session_key(b"test-secret", "sess") + acs_common.sign_envelope(env, key=key, session_id="sess") + self.assertTrue(acs_common.verify_signature(env, key=key, session_id="sess")) + + def test_bmp_and_supplementary_planes_round_trip(self) -> None: + # Hebrew (BMP) + emoji (supplementary plane) + env = self._make_envelope("שלום 🌍 こんにちは") + key = acs_common.derive_session_key(b"test-secret", "sess") + acs_common.sign_envelope(env, key=key, session_id="sess") + self.assertTrue(acs_common.verify_signature(env, key=key, session_id="sess")) + + +# ===== #8: ISO 8601 parse resilience ===== + +class Item08_Iso8601Parse(unittest.TestCase): + """parse_iso8601 must accept the spec's range of timestamp shapes + without raising. Brittleness here surfaces as TIMESTAMP_OUT_OF_WINDOW + on legitimate requests.""" + + GOOD = [ + "2026-06-17T12:00:00Z", + "2026-06-17T12:00:00.000Z", + "2026-06-17T12:00:00.123456Z", + "2026-06-17T12:00:00+00:00", + "2026-06-17T12:00:00.500+02:00", + "2026-06-17T12:00:00-05:30", + ] + BAD = [ + "not a timestamp", + "2026/06/17 12:00:00", + "", + ] + + def test_good_timestamps_parse(self) -> None: + for ts in self.GOOD: + try: + acs_common.parse_iso8601(ts) + except Exception as e: + self.fail(f"valid timestamp {ts!r} failed to parse: {e}") + + def test_bad_timestamps_raise_value_error(self) -> None: + for ts in self.BAD: + with self.assertRaises((ValueError, AttributeError), + msg=f"{ts!r} should be rejected"): + acs_common.parse_iso8601(ts) + + +# ===== #9: ACS_GUARDIAN_HOST_ALLOWLIST ===== + +class Item09_HostAllowlist(unittest.TestCase): + """validate_guardian_url should honor an optional + ACS_GUARDIAN_HOST_ALLOWLIST (comma-separated hostnames) so an + operator can restrict the env-var attack surface.""" + + def setUp(self) -> None: + self._old = os.environ.get("ACS_GUARDIAN_HOST_ALLOWLIST") + + def tearDown(self) -> None: + if self._old is None: + os.environ.pop("ACS_GUARDIAN_HOST_ALLOWLIST", None) + else: + os.environ["ACS_GUARDIAN_HOST_ALLOWLIST"] = self._old + + def test_allowlist_unset_accepts_any_http(self) -> None: + os.environ.pop("ACS_GUARDIAN_HOST_ALLOWLIST", None) + acs_common.validate_guardian_url("http://127.0.0.1:8787/acs") + acs_common.validate_guardian_url("http://anything.example.com/acs") + + def test_allowlist_restricts_to_listed_hosts(self) -> None: + os.environ["ACS_GUARDIAN_HOST_ALLOWLIST"] = "127.0.0.1,guardian.internal" + acs_common.validate_guardian_url("http://127.0.0.1:8787/acs") + acs_common.validate_guardian_url("https://guardian.internal/acs") + with self.assertRaises(ValueError): + acs_common.validate_guardian_url("http://attacker.example.com/acs") + + +# ===== #10: Cursor session-state file collision across workspaces ===== + +class Item10_CursorStateCollision(unittest.TestCase): + """Two different Cursor windows that happen to use the same + session_id (non-UUID conversation IDs collide easily) MUST NOT share + a session-state file. State file key must include cwd or workspace + path so parent_step_id can't leak across workspaces.""" + + def test_same_session_id_different_workspace_distinct_paths(self) -> None: + # Two distinct workspaces, same session_id + p1 = acs_common._session_state_path("conv-default", workspace="/a/work1") + p2 = acs_common._session_state_path("conv-default", workspace="/b/work2") + self.assertNotEqual(p1, p2, + f"FIX REQUIRED: session-state path collides across workspaces: {p1}") + + +# ===== #11: Guardian schema-validates incoming envelopes ===== + +class Item11_GuardianValidatesIncoming(unittest.TestCase): + """The Guardian MUST reject envelopes that don't match request-envelope.json + before evaluating policy. Malformed envelopes that slip through can + crash or mis-route the policy code.""" + + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + cls.statedir = tempfile.mkdtemp(prefix="acs-guardian-state-") + env["ACS_GUARDIAN_STATE_DIR"] = cls.statedir + cls.proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + cls.url = f"http://127.0.0.1:{cls.port}/acs" + + @classmethod + def tearDownClass(cls) -> None: + cls.proc.terminate() + try: cls.proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: cls.proc.kill() + import shutil + shutil.rmtree(cls.statedir, ignore_errors=True) + + def test_malformed_envelope_rejected_with_invalid_request(self) -> None: + # Missing required `params` entirely + body = json.dumps({"jsonrpc": "2.0", "id": "x", "method": "steps/sessionStart"}).encode() + req = urllib.request.Request(self.url, data=body, + headers={"Content-Type": "application/json"}, method="POST") + with urllib.request.urlopen(req, timeout=5.0) as resp: + data = json.loads(resp.read().decode()) + self.assertIn("error", data, + f"Guardian must reject schema-violating envelopes; got {data}") + self.assertIn(data["error"]["code"], (-32600, -32602), + f"expected -32600 Invalid Request / -32602 Invalid params; got {data['error']}") + + +# ===== #12: state-file hash length 16 → 64 ===== + +class Item12_StatePathHashLength(unittest.TestCase): + """16 hex chars = 64-bit hash space. After billions of sessions, + birthday collisions become possible. Use full SHA-256 (64 chars).""" + + def test_state_path_uses_full_sha256(self) -> None: + p = acs_common._session_state_path("any-session-id") + # Path stem (filename without extension) must be 64 hex chars + self.assertRegex(p.stem, r"^[0-9a-f]{64}$", + f"session-state filename {p.name!r} uses short hash; " + f"expected 64-char SHA-256 hex digest") + + +# ===== #3: HA Guardian — file-locked merge ===== + +class Item03_HaGuardianFileLock(unittest.TestCase): + """Two Guardian processes sharing a STATE_DIR must not lose + replay-protection state. Process A accepts request X and persists; + Process B must, on its next check_replay for the same session, + re-read disk and see X.""" + + def _start(self, port: int, statedir: str) -> subprocess.Popen: + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + env["ACS_GUARDIAN_STATE_DIR"] = statedir + p = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(port)], env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", port) + return p + + def _post(self, port: int, body: dict) -> dict: + req = urllib.request.Request( + f"http://127.0.0.1:{port}/acs", + data=json.dumps(body).encode(), + headers={"Content-Type": "application/json"}, method="POST", + ) + with urllib.request.urlopen(req, timeout=5.0) as resp: + return json.loads(resp.read().decode()) + + def _envelope(self, sid, rid): + return {"jsonrpc": "2.0", "id": str(uuid.uuid4()), + "method": "steps/sessionStart", + "params": { + "acs_version": "0.1.0", "request_id": rid, + "timestamp": acs_common.iso8601_now(), + "metadata": {"agent_id": "ha", "session_id": sid, "platform": "t"}, + "payload": {}, + }} + + def test_second_guardian_sees_first_guardians_replay_state(self) -> None: + with tempfile.TemporaryDirectory() as statedir: + port_a, port_b = _free_port(), _free_port() + proc_a = self._start(port_a, statedir) + proc_b = self._start(port_b, statedir) + try: + sid = str(uuid.uuid4()) + rid = str(uuid.uuid4()) + # Guardian A accepts the request + r_a = self._post(port_a, self._envelope(sid, rid)) + self.assertIn("result", r_a, + f"Guardian A must accept first send; got {r_a}") + # Guardian B MUST reject the replay (shared state dir + file + # locking + re-read on check_replay). + r_b = self._post(port_b, self._envelope(sid, rid)) + self.assertIn("error", r_b, + "Guardian B accepted a replay of an envelope already " + "seen by Guardian A — HA file-locking + re-read on " + "check_replay broken; cross-instance replay window open") + self.assertEqual(r_b["error"]["code"], -32005) + finally: + for p in (proc_a, proc_b): + p.terminate() + try: p.wait(timeout=2.0) + except subprocess.TimeoutExpired: p.kill() + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/_common/tests/test_security.py b/adapters/_common/tests/test_security.py new file mode 100644 index 0000000..9c5e016 --- /dev/null +++ b/adapters/_common/tests/test_security.py @@ -0,0 +1,440 @@ +""" +Security tests for the shared `acs_common` helpers and the Guardian +HTTP path. Each test names a specific attack and asserts the +mitigation that defeats it. + +Spec context: ACS is a security project (the whole point is to police +agent behavior). Adapter and Guardian code that itself has security +holes undermines the standard. These tests are the falsifiers for the +mitigations documented in `adapters/SECURITY.md`. +""" +from __future__ import annotations + +import binascii +import json +import os +import socket +import stat +import subprocess +import sys +import tempfile +import threading +import time +import unittest +import urllib.error +import urllib.request +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +import acs_common # noqa: E402 +from test_harness import free_port as _free_port, wait_port as _wait # noqa: E402 + + +HERE = Path(__file__).resolve().parent +GUARDIAN = HERE.parent.parent / "example-guardian" / "example_guardian.py" + + +# ----- 1. SSRF via Guardian URL ----- + +class GuardianUrlSchemeAllowlist(unittest.TestCase): + """ACS_GUARDIAN_URL must be http/https; file://, ftp://, data://, + etc. must be refused before the adapter calls urlopen — otherwise + an attacker who controls the env var can read arbitrary files.""" + + def test_file_scheme_rejected(self) -> None: + with self.assertRaises(ValueError) as cm: + acs_common.validate_guardian_url("file:///etc/passwd") + self.assertIn("scheme", str(cm.exception).lower()) + + def test_ftp_scheme_rejected(self) -> None: + with self.assertRaises(ValueError): + acs_common.validate_guardian_url("ftp://example.com/x") + + def test_data_scheme_rejected(self) -> None: + with self.assertRaises(ValueError): + acs_common.validate_guardian_url("data:text/plain,evil") + + def test_javascript_scheme_rejected(self) -> None: + with self.assertRaises(ValueError): + acs_common.validate_guardian_url("javascript:alert(1)") + + def test_http_accepted(self) -> None: + acs_common.validate_guardian_url("http://127.0.0.1:8787/acs") # no raise + + def test_https_accepted(self) -> None: + acs_common.validate_guardian_url("https://guardian.internal/acs") + + +# ----- 2. World-readable secret file ----- + +class HmacSecretFilePermissions(unittest.TestCase): + """A secret file readable by group or world is a configuration mistake + that must be refused — silently using a world-readable secret leaks + the HMAC key to any local process.""" + + def setUp(self) -> None: + self.tmpdir = tempfile.mkdtemp() + self.secret_path = Path(self.tmpdir) / "hmac.key" + self.secret_path.write_bytes(b"super-secret-key-material" * 4) + self._old_env = { + "ACS_HMAC_SECRET_FILE": os.environ.get("ACS_HMAC_SECRET_FILE"), + "ACS_HMAC_SECRET": os.environ.get("ACS_HMAC_SECRET"), + } + os.environ["ACS_HMAC_SECRET_FILE"] = str(self.secret_path) + os.environ.pop("ACS_HMAC_SECRET", None) + + def tearDown(self) -> None: + import shutil + shutil.rmtree(self.tmpdir, ignore_errors=True) + for k, v in self._old_env.items(): + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v + + def test_mode_0600_accepted(self) -> None: + os.chmod(self.secret_path, 0o600) + # Returns secret bytes; no raise + self.assertTrue(acs_common.load_hmac_secret().startswith(b"super-secret")) + + def test_world_readable_rejected(self) -> None: + os.chmod(self.secret_path, 0o644) + with self.assertRaises(acs_common.SecretFilePermissionsError): + acs_common.load_hmac_secret() + + def test_group_readable_rejected(self) -> None: + os.chmod(self.secret_path, 0o640) + with self.assertRaises(acs_common.SecretFilePermissionsError): + acs_common.load_hmac_secret() + + def test_symlink_rejected(self) -> None: + real = Path(self.tmpdir) / "real.key" + real.write_bytes(b"x" * 32) + os.chmod(real, 0o600) + self.secret_path.unlink() + os.symlink(real, self.secret_path) + with self.assertRaises(acs_common.SecretFilePermissionsError): + acs_common.load_hmac_secret() + + +# ----- 3. Guardian HTTP DoS via oversized body ----- + +class GuardianBodySizeCap(unittest.TestCase): + """A POST with Content-Length > limit must be refused without reading + the whole body. The Guardian's documented max_payload_size_bytes in + the handshake is 1 MiB; the read path must enforce it.""" + + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + cls.proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + cls.url = f"http://127.0.0.1:{cls.port}/acs" + + @classmethod + def tearDownClass(cls) -> None: + cls.proc.terminate() + try: + cls.proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.proc.kill() + + def test_oversized_request_rejected(self) -> None: + oversized = b"a" * (2 * 1024 * 1024) # 2 MiB > 1 MiB cap + req = urllib.request.Request( + self.url, data=oversized, + headers={"Content-Type": "application/json"}, method="POST", + ) + code = None + connection_reset = False + try: + with urllib.request.urlopen(req, timeout=5.0) as resp: + code = resp.status + except urllib.error.HTTPError as e: + code = e.code + except urllib.error.URLError: + # The Guardian sent its 413 response and closed the socket + # before the client finished writing 2 MiB — manifests as a + # connection reset / write error on the client side. That's + # still a successful rejection: the body was never accepted. + connection_reset = True + # Any outcome that is NOT 200 OK proves the body was refused. + self.assertFalse(code == 200, + f"oversized request was accepted (status {code}) — DoS risk") + if not connection_reset: + self.assertIn(code, (400, 413), + f"expected 413 Payload Too Large or 400 or connection reset; got {code}") + + +# ----- 4. Cache directory permissions ----- + +class CacheDirPermissions(unittest.TestCase): + """save_session_state and the handshake cache must create files with + mode 0600 and parent dirs with mode 0700 — otherwise a local + attacker can read or poison adapter state.""" + + def setUp(self) -> None: + self.tmpdir = Path(tempfile.mkdtemp()) + self._old = os.environ.get("ACS_SESSION_STATE_DIR") + os.environ["ACS_SESSION_STATE_DIR"] = str(self.tmpdir / "state") + # Re-evaluate module-level dir + import importlib + importlib.reload(acs_common) + + def tearDown(self) -> None: + import shutil + shutil.rmtree(self.tmpdir, ignore_errors=True) + if self._old is None: + os.environ.pop("ACS_SESSION_STATE_DIR", None) + else: + os.environ["ACS_SESSION_STATE_DIR"] = self._old + + def test_session_state_file_is_0600(self) -> None: + acs_common.save_session_state("test-session", {"x": 1}) + path = acs_common._session_state_path("test-session") + self.assertTrue(path.exists()) + mode = stat.S_IMODE(os.stat(path).st_mode) + self.assertEqual(mode, 0o600, + f"session-state file mode is {oct(mode)}, want 0o600") + + def test_session_state_dir_is_0700(self) -> None: + acs_common.save_session_state("test-session", {"x": 1}) + d = acs_common._SESSION_STATE_DIR + mode = stat.S_IMODE(os.stat(d).st_mode) + self.assertEqual(mode, 0o700, + f"session-state dir mode is {oct(mode)}, want 0o700") + + +# ----- 5. Regex DoS / oversized command input ----- + +class RegexInputSizeCap(unittest.TestCase): + """Destructive-bash regex must not run on arbitrarily-large inputs. + A multi-MB command crafted to trigger backtracking would freeze the + Guardian; cap input length and audit the skip.""" + + def test_oversized_command_is_short_circuited(self) -> None: + # Construct a 64 KiB command that is benign but oversized + large_cmd = "echo " + ("a" * (64 * 1024)) + # The destructive matcher should refuse to scan and return a + # sentinel that the caller treats as "do not allow without + # external review", not a False that allows the command. + result = acs_common.scan_destructive_bash_safely(large_cmd, max_len=8192) + self.assertEqual(result, "input_too_large", + "oversized command MUST be flagged, not scanned, not silently allowed") + + def test_small_destructive_command_still_caught(self) -> None: + # Use the Guardian's actual pattern set + sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "example-guardian")) + import example_guardian + for pat in example_guardian.DESTRUCTIVE_BASH_PATTERNS: + if pat.search("rm -rf /home/x"): + return + self.fail("destructive pattern matching broken") + + def test_small_safe_command_returns_none(self) -> None: + self.assertIsNone( + acs_common.scan_destructive_bash_safely("ls -la", max_len=8192)) + + +class Item13_DestructiveRmFlagVariants(unittest.TestCase): + """Regression: every `rm` variant that combines recursive + force, + no matter what other harmless flags are present (verbose, interactive, + etc.), MUST be caught. + + Original Guardian regex used `\\b` after a fixed `-rf` match, so + `rm -rfv /tmp/...` slipped through — the trailing `v` defeated the + word boundary. A trivial single-letter extension defeating the + policy is the worst class of regex bug for a security control.""" + + @classmethod + def setUpClass(cls) -> None: + sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "example-guardian")) + import example_guardian + cls.eg = example_guardian + + def _assert_caught(self, cmd: str) -> None: + match = self.eg._matches_destructive_bash(cmd) + self.assertTrue(match, + f"REGRESSION: destructive command not caught: {cmd!r}. " + f"The Guardian's regex must match rm with recursive+force " + f"flags regardless of additional harmless flags.") + + def _assert_allowed(self, cmd: str) -> None: + match = self.eg._matches_destructive_bash(cmd) + self.assertFalse(match, + f"FALSE POSITIVE: benign command flagged as destructive: {cmd!r}") + + def test_rf_variants_caught(self) -> None: + # Canonical + self._assert_caught("rm -rf /home/x") + self._assert_caught("rm -fr /home/x") + self._assert_caught("rm --recursive --force /home/x") + self._assert_caught("rm --force --recursive /home/x") + + def test_rf_with_trailing_letters_caught(self) -> None: + # The bug Bar found: extra flag letters after r/f defeated the regex + self._assert_caught("rm -rfv /home/x") + self._assert_caught("rm -rfvi /home/x") + self._assert_caught("rm -rfvI /home/x") + self._assert_caught("rm -frv /home/x") + + def test_rf_with_middle_or_leading_letters_caught(self) -> None: + self._assert_caught("rm -rvf /home/x") + self._assert_caught("rm -vrf /home/x") + self._assert_caught("rm -ivrf /home/x") + + def test_rf_with_trailing_slash_and_command_chain_caught(self) -> None: + # Exact shape that Claude generated when Bar asked for an -rf test + self._assert_caught("rm -rfv /tmp/this-is-a-fake-test-path-12345/") + self._assert_caught("rm -rfv /tmp/foo/ ; echo done") + + def test_benign_rm_not_flagged(self) -> None: + # rm WITHOUT both r and f is allowed + self._assert_allowed("rm -v /home/x") + self._assert_allowed("rm -i /home/x") + self._assert_allowed("rm /home/x") + self._assert_allowed("rmdir /home/x") + # NOTE: `echo rm -rf /home/x` IS flagged by the regex (conservative + # by design — wrapping a destructive command in `echo` and piping + # to `sh` is a known evasion). Operators who want to allow it + # disable the pattern in their policy bundle. + + +class Item14_ToolNameCaseInsensitive(unittest.TestCase): + """Regression: the example Guardian's destructive-Bash policy was + gated on `tool_name in ("Bash", "Shell")` — a case-sensitive string + match. A NAT YAML key `shell` (lowercase) became the instance name, + sailed past the check, and a real LLM-driven `rm -rf` against a + sandbox directory ran to completion with the canary file deleted. + The destructive regex was correct; the OUTER guard was too strict. + + Every reasonable shell tool name spelling MUST hit the same policy + branch: + - "Bash" (Claude Code adapter's PreToolUse tool name) + - "Shell" (Cursor's beforeShellExecution synthesizes this name) + - "shell" (NAT YAML key used as instance_name) + - "bash" / "BASH" (paranoia — any caller-chosen casing) + """ + + @classmethod + def setUpClass(cls) -> None: + sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "example-guardian")) + import example_guardian + cls.eg = example_guardian + + def _call(self, tool_name: str, command: str) -> dict: + params = { + "request_id": "00000000-0000-4000-8000-000000000001", + "chain_hash": "0" * 64, + "metadata": {"session_id": "00000000-0000-4000-8000-000000000002"}, + "payload": { + "tool": {"name": tool_name}, + "arguments": {"command": {"value": command}}, + }, + } + return self.eg.evaluate_step( + "steps/toolCallRequest", params, + "00000000-0000-4000-8000-000000000001", "0" * 64) + + def test_lowercase_shell_destructive_denied(self) -> None: + """The exact regression: tool name 'shell' (lowercase) MUST hit + the destructive policy branch — discovered when a real Vertex- + driven react_agent's `shell` tool ran `rm -rf` and Guardian + allowed.""" + result = self._call("shell", "rm -rf /tmp/x/") + self.assertEqual(result.get("decision"), "deny", + "REGRESSION: lowercase 'shell' tool with rm -rf MUST be denied; " + "the case-sensitive name check let real destructive commands through") + self.assertIn("destructive_command", result.get("reason_codes", [])) + + def test_every_reasonable_casing_denied(self) -> None: + for name in ("Bash", "BASH", "bash", "Shell", "SHELL", "shell", "ShElL"): + with self.subTest(tool_name=name): + result = self._call(name, "rm -rf /tmp/x/") + self.assertEqual(result.get("decision"), "deny", + f"tool name {name!r} should hit destructive-Bash branch") + self.assertIn("destructive_command", + result.get("reason_codes", [])) + + def test_unrelated_tool_unaffected(self) -> None: + """Case-folding must not over-broaden — `read` is not a shell tool.""" + result = self._call("Read", "rm -rf /tmp/x/") + # Read tool with a 'command' arg that LOOKS dangerous — not a + # shell call, the destructive regex isn't applied here. + self.assertEqual(result.get("decision"), "allow", + "case-fold must not pull non-shell tools into shell policy") + + def test_task_subagent_gate_also_case_insensitive(self) -> None: + """The Task tool deny was also case-sensitive; same case-fold.""" + # ALLOW_SUBAGENT defaults False so Task should be denied + for name in ("Task", "task", "TASK"): + with self.subTest(tool_name=name): + result = self._call(name, "") + self.assertEqual(result.get("decision"), "deny", + f"tool name {name!r} (Task variant) must hit subagent gate") + self.assertIn("subagent_gated", result.get("reason_codes", [])) + + +class Item15_VerifySignatureRobustToMalformedBase64(unittest.TestCase): + """Regression: verify_signature() called base64.b64decode() directly + without exception handling. A malformed signature value (truncation, + garbage characters, wrong padding) raised binascii.Error / ValueError + up to the request path instead of producing the spec's SIGNATURE_INVALID + (-32004) response. The Guardian only caught GuardianError around + signature checks, so the uncaught binascii.Error tore down the request + on the wire as a 500 / disconnected handler — security control + converted to a denial-of-service vector. + + Every form of unparseable base64 MUST return False (signature invalid), + never crash.""" + + BAD_VALUES = [ + "not-base64", # raw garbage + "this is not!base64@@@", # non-base64 characters + "!!!", # too short, illegal chars + "===", # padding only + "AB==CD", # padding mid-string + "A" * 1000003, # huge, no padding alignment + "", # empty (this is "no signature" → False, no crash) + ] + + def _make_envelope(self, sig_value): + return { + "params": { + "request_id": "00000000-0000-4000-8000-000000000001", + "metadata": {"session_id": "00000000-0000-4000-8000-000000000002"}, + "signature": {"algorithm": "HMAC-SHA256", "value": sig_value}, + } + } + + def test_malformed_signature_returns_false_not_crashes(self) -> None: + import os + os.environ["ACS_HMAC_SECRET"] = "verify-sig-robustness-test-secret" + try: + for bad in self.BAD_VALUES: + with self.subTest(signature=bad[:30]): + env = self._make_envelope(bad) + try: + result = acs_common.verify_signature(env) + except (binascii.Error, ValueError, TypeError) as e: + self.fail( + f"REGRESSION: malformed signature {bad[:30]!r} " + f"raised {type(e).__name__} instead of returning " + f"False; this turns SIGNATURE_INVALID into a " + f"crash on the request path") + self.assertFalse( + result, + f"malformed signature {bad[:30]!r} must verify as False") + finally: + os.environ.pop("ACS_HMAC_SECRET", None) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/claude-code/README.md b/adapters/claude-code/README.md new file mode 100644 index 0000000..096c4c1 --- /dev/null +++ b/adapters/claude-code/README.md @@ -0,0 +1,302 @@ +# ACS adapter: Claude Code + +A drop-in adapter that wires [Claude Code](https://docs.claude.com/claude-code) hooks to an ACS Guardian. No agent code changes; configuration only. + +## How it works + +Claude Code fires a hook (e.g. `PreToolUse`) by running a shell command and passing the hook event as JSON on stdin. The command's stdout becomes the hook's decision. + +This adapter is that command. On every hook event Claude Code spawns the adapter as a subprocess; the adapter: + +1. Reads the Claude Code hook event from stdin. +2. Translates it to an ACS JSON-RPC envelope ([mapping.md](./mapping.md)). +3. Signs it with HMAC-SHA256 (`ACS_HMAC_SECRET_FILE`). +4. POSTs to the Guardian. +5. Verifies the response signature. +6. Translates the verdict back to Claude Code's expected output shape and writes to stdout. +7. Exits. + +The handshake/hello fires once per session, cached on disk so subsequent events skip the round-trip. + +### Decision honoring (§6.4) + +ACS-Core §6.4 requires the framework to wait for the verdict and apply it before the action executes. Claude Code provides this guarantee through its hook protocol: the adapter is invoked as a blocking subprocess and the framework reads its stdout for the decision, so the tool can't run until the adapter exits with a verdict. The adapter relies on this — without it, a Guardian deny would arrive after the side effect. + +## Install — five steps + +You need three pieces co-located on the same machine: a running Guardian, a shared HMAC secret, and a `~/.claude/settings.json` that wires the adapter into Claude Code's hooks. `wire.py` does step 3 for you. + +Commands below assume `$ACS_REPO` points at your local clone of `Agent-Control-Standard/ACS` (or your fork). Export it once: + +```bash +export ACS_REPO=/path/to/your/clone # e.g., $HOME/code/ACS +``` + +### 1. Generate the shared HMAC secret + +Both the adapter and the Guardian read this file. Mode 0600 is enforced by the adapter — anything looser and it refuses to start. + +```bash +mkdir -p ~/.acs +openssl rand -hex 32 > ~/.acs/hmac.key +chmod 600 ~/.acs/hmac.key +``` + +### 2. Run the Guardian + +Use the example Guardian for testing; a production Guardian is the same wire protocol with a real policy engine attached. + +```bash +ACS_HMAC_SECRET_FILE=~/.acs/hmac.key \ + python3 $ACS_REPO/adapters/example-guardian/example_guardian.py \ + --port 8787 +``` + +Keep this terminal open. You should see `[guardian] listening on 127.0.0.1:8787`. + +(For long-running setups: a `launchd` plist on macOS or a `systemd` unit on Linux. Out of scope here.) + +### 3. Wire `~/.claude/settings.json` + +The `wire.py` CLI does this safely — dry-run by default, atomic write with a timestamped backup when you pass `--write`. + +```bash +cd "$ACS_REPO/adapters/claude-code" + +# Preview the change without touching the file +python3 wire.py \ + --guardian-url=http://127.0.0.1:8787/acs \ + --secret-file=~/.acs/hmac.key + +# Apply it (creates ~/.claude/settings.json.bak.) +python3 wire.py \ + --guardian-url=http://127.0.0.1:8787/acs \ + --secret-file=~/.acs/hmac.key \ + --write +``` + +What it wires by default: + +| Hook | Posture | +|---|---| +| `PreToolUse` | **fail-CLOSED** (gate — silent fail-open is a policy hole) | +| `UserPromptSubmit` | **fail-CLOSED** (gate) | +| `SessionStart` | fail-open (observational; §6.4 default) | +| `PostToolUse` | fail-open | +| `Notification` | fail-open | +| `SessionEnd` | fail-open | + +Override with `--default-deny` (fail-closed on every hook) or `--all-fail-open` (strict §6.4 default everywhere). + +To remove the wiring later: `python3 wire.py --unwire --write`. + +### 4. Restart any open Claude Code session + +Claude Code reads `~/.claude/settings.json` at session start, not live. Existing sessions keep their pre-wiring config. + +### 5. Verify the install + +Two complementary checks. The end-to-end check is the one to run if you only have time for one: + +```bash +cd "$ACS_REPO/adapters/claude-code" +python3 e2e_check.py +``` + +Real Claude is driven through four scenarios — allow, deny, Read tool, multi-tool handshake-once — every envelope is printed verbatim, you read PASS/FAIL per scenario. Wall-clock ~60-90s because real Claude is in the loop. The final line is either `YOUR CLAUDE CODE INSTALL IS ACS-CONFORMANT` (exit 0) or a per-scenario failure list (exit 1). + +You can also do an in-session manual smoke test (see [Smoke tests](#smoke-tests) below). + +## Prerequisites + +- **`claude` CLI** installed and authenticated — install guide: +- **Python 3.10+** with `jsonschema` and `rfc8785` — `pip install -r ../requirements-test.txt` +- **Canonical ACS schemas** reachable on disk. Default location `/tmp/acs-spec-source/specification/v0.1.0/`; override via `ACS_SPEC_DIR`. Clone with: + ```bash + git clone https://github.com/Agent-Control-Standard/ACS.git /tmp/acs-spec-source + ``` + +## Smoke tests + +Five tests, ordered from broadest to most specific. Run any/all. + +### Smoke #1 — automated test suite (unit + integration, ~30s) + +Run from `adapters/` (the conformance suite lives at the top level): + +```bash +cd "$ACS_REPO/adapters" + +python3 -m unittest test_acs_core_conformance +# Expect: Ran 48 tests in ~10s / OK (every ACS-Core MUST) + +(cd claude-code && python3 -m unittest discover tests) +# Expect: Ran 32 tests / OK (round-trip + schema + live) + +(cd _common && python3 -m unittest discover tests) +# Expect: Ran 33 tests / OK (security + edge cases) +``` + +If any of those fail, the failure message names the specific spec MUST or property that broke. + +### Smoke #2 — real Claude end-to-end (~60-90s) + +```bash +cd "$ACS_REPO/adapters/claude-code" +python3 e2e_check.py +``` + +Drives real Claude through 4 scenarios with a recording Guardian. Prints every envelope on the wire plus per-scenario PASS/FAIL. + +### Smoke #3 — in-session manual test + +Open a real Claude Code session. Try: + +``` +echo hello via Bash +``` + +In your Guardian terminal you should see roughly this sequence (one new entry per hook Claude fires): + +``` +[guardian] handshake/hello session=... +[guardian] steps/sessionStart session=... +[guardian] steps/userMessage session=... +[guardian] steps/toolCallRequest session=... +[guardian] steps/toolCallResult session=... +[guardian] steps/agentResponse session=... +[guardian] steps/sessionEnd session=... +``` + +Then try a denied command in the same session: + +``` +Run: rm -rf /home/some-fake-path +``` + +Claude should refuse and surface the Guardian's `reasoning` field. The example Guardian's regex catches `rm -rf /...`; the Bash never runs. + +### Smoke #4 — audit-cause differentiation + +Verifies that the adapter's audit log distinguishes "Guardian unreachable" (ops issue) from "Guardian rejected the envelope" (client/operator bug). + +Unsigned envelope to a signing-required Guardian: + +```bash +ACS_GUARDIAN_URL="http://127.0.0.1:8787/acs" \ +ACS_HMAC_SECRET="" \ +python3 $ACS_REPO/adapters/claude-code/acs_adapter.py 2>&1 <<'EOF' +{"session_id":"11111111-1111-4111-8111-111111111111","transcript_path":"/tmp/t","cwd":"/tmp","permission_mode":"default","hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"echo test"}} +EOF +``` + +Expected stderr — note the `cause` field: + +``` +acs-adapter: Guardian returned JSON-RPC error -32004 (signature_invalid_response): SIGNATURE_INVALID +ACS_AUDIT {"acs_audit_event": "fail_open_bypass", "cause": "signature_invalid_response", ...} +``` + +Guardian unreachable (different cause, same disposition): + +```bash +ACS_GUARDIAN_URL="http://127.0.0.1:1/dead" \ +python3 $ACS_REPO/adapters/claude-code/acs_adapter.py 2>&1 <<'EOF' +{"session_id":"11111111-1111-4111-8111-111111111111","transcript_path":"/tmp/t","cwd":"/tmp","permission_mode":"default","hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"echo test"}} +EOF +``` + +Expected: + +``` +acs-adapter: Guardian unreachable: +ACS_AUDIT {"acs_audit_event": "fail_open_bypass", "cause": "transport_failure", ...} +``` + +Same `acs_audit_event`, distinct `cause`. Operators grep on `cause=` to triage. + +### Smoke #5 — pre-flight inventory (paranoid) + +If you're debugging, this is the fastest "where did I go wrong" sweep: + +```bash +echo "=== Guardian listening? ===" +lsof -i :8787 | head -3 || echo "NOT RUNNING" + +echo "=== Secret file 0600? ===" +ls -la ~/.acs/hmac.key + +echo "=== Hooks wired? ===" +python3 -c "import json, os; d=json.load(open(os.path.expanduser('~/.claude/settings.json'))); print(list(d.get('hooks',{}).keys()))" + +echo "=== Guardian responds to system/ping? ===" +cd "$ACS_REPO/adapters" && python3 -c " +import sys; sys.path.insert(0, '_common') +from acs_common import ping; import json +r = ping('http://127.0.0.1:8787/acs'); print(json.dumps(r, indent=2)[:200] if r else 'no response') +" +``` + +## Files + +- `acs_adapter.py` — the adapter itself. Stdlib + `rfc8785` for JCS canonicalization. +- `wire.py` — settings.json wiring CLI (dry-run by default; `--write` to apply). +- `e2e_check.py` — real-Claude end-to-end verifier (4 scenarios). +- `settings.json.example` — reference wiring (`wire.py` produces a more comprehensive one). +- `mapping.md` — Claude Code hook → ACS step method table, plus disposition translation. +- `tests/` — round-trip + schema + live integration tests. +- `tests/example_payloads.md` — masked real-world payload examples showing exactly what Claude Code emits. + +The adapter shares `adapters/_common/` with the Cursor and NAT adapters (signing, handshake cache, audit events, URL allowlist). + +## Configuration + +The adapter is configured by environment variables, typically set per-hook by `wire.py`: + +| Variable | Default | Purpose | +|---|---|---| +| `ACS_GUARDIAN_URL` | `http://127.0.0.1:8787/acs` | Guardian endpoint. http/https only; SSRF allowlist refuses other schemes. | +| `ACS_HMAC_SECRET_FILE` | (unset) | Path to a 0600 file holding the shared HMAC secret. | +| `ACS_HMAC_SECRET` | (unset) | Inline secret. Less secure (visible in `ps eauxw`). Prefer the file. | +| `ACS_DEFAULT_DENY` | `0` | Fail-open with audit (§6.4 default). Set to `1` for fail-closed. | +| `ACS_HANDSHAKE` | `1` | Set to `0` to disable the handshake/hello call on first use. | +| `ACS_AGENT_ID` | derived from cwd | Stable agent identifier sent in `metadata.agent_id`. | +| `ACS_HANDSHAKE_CACHE` | `~/.cache/acs-adapter-handshake/` | Per-session ServerHello cache dir. | +| `ACS_GUARDIAN_HOST_ALLOWLIST` | (unset) | Optional comma-separated hostname allowlist (defense in depth). | + +## On-disk state + +- `~/.cache/acs-adapter-handshake/.json` — cached ServerHello per session. Adapter creates with mode 0600; refreshed when older than 1 hour. +- `~/.cache/acs-guardian-state/.json` — Guardian-side per-session chain head + replay set; survives Guardian restart. + +## Conformance status + +Honest, MUST-by-MUST against `docs/spec/conformance.md`: + +| ACS-Core item | Status | +|---|---| +| Handshake (`handshake/hello`) | ✓ adapter sends ClientHello on first session call; cached in `~/.cache/acs-adapter-handshake/`. | +| JSON-RPC envelope shape (`request-envelope.json`) | ✓ validates against canonical schema for every mapped hook (`tests/test_envelope_schema.py`); format checking enforces `uuid` and `date-time`. | +| Hook taxonomy (6 minimum) | ✓ `sessionStart`, `userMessage`, `toolCallRequest`, `toolCallResult`, `agentResponse`, `sessionEnd`. | +| Dispositions (ALLOW/DENY/ASK/DEFER) | ✓ on **pre-execution** hooks (`PreToolUse`, `UserPromptSubmit`); MODIFY partial (`PreToolUse` with `parameter_overrides` only). **Post-execution and lifecycle hooks (`PostToolUse`, `Notification → agentResponse`, `Stop`, `SessionEnd`) are observation-only** — Claude Code fires them after the side effect / message has occurred; a Guardian `deny` on those cannot undo it. See `mapping.md`. | +| Unknown-disposition fail posture | ✓ default-deny honored on unknown verdicts when `ACS_DEFAULT_DENY=1`; spec-default fail-open path emits audit event. | +| SessionContext + published `chain_hash` | ✓ session_id propagated; Guardian computes rolling SHA-256 chain per §8.2 (`adapters/test_acs_core_conformance.py::Core05_SessionContext`). | +| Replay protection (`request_id` + `timestamp`) | ✓ adapter sends both; Guardian rejects duplicate `request_id` (REPLAY_DETECTED -32005) and timestamps outside skew window (TIMESTAMP_OUT_OF_WINDOW -32006) per §10.3. | +| Baseline integrity (HMAC-SHA256 signature) | ✓ HKDF-derived per-session key signs every request and response when `ACS_HMAC_SECRET[_FILE]` is set; Guardian rejects unsigned/tampered with SIGNATURE_INVALID -32004. | +| Decision honoring (§6.4) | ✓ adapter blocks on subprocess return; spec-default fail-open posture emits structured `ACS_AUDIT` event on every bypass; audit `cause` field distinguishes failure modes. | +| Liveness `system/ping` | ✓ Guardian implements always-allow ping that bypasses chain/replay/signature checks per §13. | +| `nonce` (optional replay field) | ✗ adapter does not emit `nonce`; the envelope field is OPTIONAL in v0.1. | +| Wrapped MCP `protocols/MCP/*` | ✗ not implemented; Claude Code's MCP traffic flows through its own mechanism and would need a separate wrapping path. | + +## Troubleshooting + +| Symptom | Likely cause | +|---|---| +| Adapter exits 0 with empty stdout; Guardian terminal silent | Adapter pointed at the wrong URL (check `ACS_GUARDIAN_URL` in your settings.json); check stderr for `ACS_AUDIT cause=transport_failure`. | +| Every hook gets denied | Likely `ACS_DEFAULT_DENY=1` + Guardian down. Check the Guardian process is running. | +| Adapter says `SecretFilePermissionsError` | The HMAC secret file is mode > 0600. `chmod 600 ~/.acs/hmac.key`. | +| Guardian returns `-32004 SIGNATURE_INVALID` | Adapter and Guardian aren't reading the same secret. `cat ~/.acs/hmac.key` on both sides should match. | +| Guardian returns `-32006 TIMESTAMP_OUT_OF_WINDOW` | Clock skew between adapter and Guardian > 5 minutes. Sync time (`sudo sntp -sS time.apple.com` on macOS). | +| Guardian returns `-32600 Invalid Request` for `metadata.session_id` | Session ID isn't a UUID. Real Claude Code always sends UUIDs; if you're hand-crafting envelopes for testing, fix the fixture. | + +Everything the adapter does that's not policy decision-making is audited on stderr as a JSON line prefixed `ACS_AUDIT`. The `cause` field tells you which failure mode fired. diff --git a/adapters/claude-code/acs_adapter.py b/adapters/claude-code/acs_adapter.py new file mode 100755 index 0000000..98918a5 --- /dev/null +++ b/adapters/claude-code/acs_adapter.py @@ -0,0 +1,489 @@ +#!/usr/bin/env python3 +""" +ACS adapter for Claude Code hooks. + +Translates a Claude Code hook event (read from stdin as JSON) into an +ACS JSON-RPC request, signs it (HMAC-SHA256 baseline per Specification +§10 when ACS_HMAC_SECRET is set), sends it to a Guardian, and +translates the ACS response back to Claude Code's expected output. + +Wire-format ground truth: Agent-Control-Standard/ACS specification/v0.1.0/ + +Environment variables: + ACS_GUARDIAN_URL Guardian endpoint (default: http://127.0.0.1:8787/acs) + ACS_DEFAULT_DENY "1" = fail-closed on adapter error or unknown + Guardian disposition. Default: "0" (spec default + per §6.4 is fail-open with audit event). Switch + to "1" for production deployments that prefer + fail-closed availability tradeoff. + ACS_HMAC_SECRET Shared secret for baseline HMAC-SHA256 envelope + signing per §10. If unset, requests are unsigned + (local-dev mode). ACS-Core conformance requires + this to be set. + ACS_AGENT_ID Explicit agent_id for metadata. If unset, derived + from cwd as `claude-code:`. + ACS_HANDSHAKE "0" disables the handshake/hello call on first + use. Default "1". Handshake result is cached + per-session in ~/.cache/acs-adapter-handshake/. +""" +from __future__ import annotations + +import hashlib +import json +import os +import sys +import urllib.error +import urllib.request +import uuid +from pathlib import Path +from typing import Any, Callable + + +# Bootstrap shared helpers from sibling adapters/_common/ +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "_common")) +from acs_common import ( # noqa: E402 + ACS_VERSION, + audit_event, + coerce_uuid, + ensure_session_handshake, + guardian_error_cause, + iso8601_now, + sign_envelope, + validate_guardian_url, + verify_signature, +) + + +GUARDIAN_URL = os.environ.get("ACS_GUARDIAN_URL", "http://127.0.0.1:8787/acs") +DEFAULT_DENY = os.environ.get("ACS_DEFAULT_DENY", "0") == "1" +HANDSHAKE_ENABLED = os.environ.get("ACS_HANDSHAKE", "1") == "1" + + +# ─── Hook taxonomy ────────────────────────────────────────────────────────── + +HOOK_MAP: dict[str, str] = { + "SessionStart": "steps/sessionStart", + "SessionEnd": "steps/sessionEnd", + "UserPromptSubmit": "steps/userMessage", + "PreToolUse": "steps/toolCallRequest", + "PostToolUse": "steps/toolCallResult", + "Notification": "steps/agentResponse", + "Stop": "steps/sessionEnd", +} + +# Hooks whose deny shape is {"decision": "block", "reason": "..."} +# (i.e., everything except PreToolUse, which uses hookSpecificOutput.permissionDecision). +BLOCK_RESPONSE_HOOKS = frozenset({ + "PostToolUse", "UserPromptSubmit", "Stop", "SubagentStop", "PreCompact", +}) + +SESSION_END_REASON_MAP: dict[str, str] = { + "clear": "completed", + "logout": "abandoned", + "prompt_input_exit": "abandoned", + "other": "completed", +} + +PRETOOL_PERMISSION_MAP: dict[str, str] = { + "allow": "allow", "deny": "deny", "ask": "ask", "defer": "defer", +} + +KNOWN_DECISIONS = frozenset({"allow", "deny", "modify", "ask", "defer"}) + + +# ─── Response writers — one definition each, used everywhere ────────────── + +def _emit(payload: dict[str, Any]) -> None: + """Single point where the adapter writes to stdout. Idempotent if + called with empty dict.""" + if not payload: + return + json.dump(payload, sys.stdout) + sys.stdout.write("\n") + + +def _pretool_response(decision: str, reason: str = "", + updated_input: dict | None = None) -> dict[str, Any]: + """Build Claude Code's PreToolUse response shape.""" + hso: dict[str, Any] = { + "hookEventName": "PreToolUse", + "permissionDecision": decision, + } + if reason: + hso["permissionDecisionReason"] = reason + if updated_input is not None: + hso["updatedInput"] = updated_input + return {"hookSpecificOutput": hso} + + +def _block_response(reason: str, hook_event: str | None = None) -> dict[str, Any]: + """Build Claude Code's generic block shape used by PostToolUse, + UserPromptSubmit, Stop, SubagentStop, PreCompact.""" + out: dict[str, Any] = {"decision": "block", "reason": reason} + if hook_event: + out["hookSpecificOutput"] = {"hookEventName": hook_event} + return out + + +# ─── Helpers ──────────────────────────────────────────────────────────────── + +def _agent_id(event: dict[str, Any]) -> str: + explicit = os.environ.get("ACS_AGENT_ID") + if explicit: + return explicit + cwd = event.get("cwd") or os.environ.get("PWD") or "" + if cwd: + return f"claude-code:{hashlib.sha256(cwd.encode()).hexdigest()[:8]}" + return "claude-code:unknown" + + +def _wrap_arguments(raw: dict[str, Any]) -> dict[str, Any]: + return {k: {"value": v} for k, v in (raw or {}).items()} + + +def _tool_use_request_id(tool_use_id: str | None) -> str | None: + """Deterministic UUID5 from Claude Code's tool_use_id so PostToolUse + can populate `request_id_ref` (per tool-call-result.json:19-23) + linking back to its originating PreToolUse request.""" + if not tool_use_id: + return None + return str(uuid.uuid5(uuid.NAMESPACE_URL, f"claude-code:tool_use:{tool_use_id}")) + + +# ─── Payload builders — dispatch table, one function per hook ────────────── +# +# Each function takes the Claude Code event dict and returns the +# hook-payload portion of the ACS envelope (the part that goes under +# `params.payload`). The dispatch table at the bottom maps hook names +# to these functions; build_payload is then a one-line dispatch. + +def _payload_pretool_use(event: dict[str, Any]) -> dict[str, Any]: + return { + "tool": {"name": event.get("tool_name", "")}, + "arguments": _wrap_arguments(event.get("tool_input") or {}), + } + + +def _payload_post_tool_use(event: dict[str, Any]) -> dict[str, Any]: + tool_response = event.get("tool_response", event.get("tool_output")) + interrupted = isinstance(tool_response, dict) and tool_response.get("interrupted") + payload: dict[str, Any] = { + "tool": {"name": event.get("tool_name", "")}, + "exit_status": "failure" if interrupted else "success", + "outputs": [{"value": tool_response}] if tool_response is not None else [], + } + ref = _tool_use_request_id(event.get("tool_use_id")) + if ref: + payload["request_id_ref"] = ref + if event.get("duration_ms") is not None: + payload["duration_ms"] = event["duration_ms"] + return payload + + +def _payload_user_prompt(event: dict[str, Any]) -> dict[str, Any]: + return {"content": [{"type": "text", "value": event.get("prompt", "")}]} + + +def _payload_notification(event: dict[str, Any]) -> dict[str, Any]: + return {"content": [{"type": "text", "value": event.get("message", "")}]} + + +def _payload_session_start(event: dict[str, Any]) -> dict[str, Any]: + ctx = {k: v for k, v in ( + ("source", event.get("source")), + ("model", event.get("model")), + ("transcript_path", event.get("transcript_path")) + ) if v} + return {"platform_context": ctx} if ctx else {} + + +def _payload_session_end(event: dict[str, Any]) -> dict[str, Any]: + name = event.get("hook_event_name", "") + raw_reason = event.get("reason") or ("completed" if name == "Stop" else "other") + return {"reason": SESSION_END_REASON_MAP.get(raw_reason, "completed")} + + +_PAYLOAD_BUILDERS: dict[str, Callable[[dict[str, Any]], dict[str, Any]]] = { + "PreToolUse": _payload_pretool_use, + "PostToolUse": _payload_post_tool_use, + "UserPromptSubmit": _payload_user_prompt, + "Notification": _payload_notification, + "SessionStart": _payload_session_start, + "SessionEnd": _payload_session_end, + "Stop": _payload_session_end, +} + + +def build_payload(event: dict[str, Any]) -> dict[str, Any]: + builder = _PAYLOAD_BUILDERS.get(event.get("hook_event_name", "")) + return builder(event) if builder else {} + + +# ─── Envelope construction ────────────────────────────────────────────────── + +def build_request(event: dict[str, Any]) -> dict[str, Any]: + method = HOOK_MAP.get(event.get("hook_event_name", "")) + if method is None: + return {} + + session_id = event.get("session_id") + if not session_id: + return {} + + metadata: dict[str, Any] = { + "agent_id": _agent_id(event), + "session_id": session_id, + "platform": "claude-code", + } + for k in ("cwd", "transcript_path", "permission_mode"): + if event.get(k): + metadata[k] = event[k] + + # For PreToolUse, pin request_id to a deterministic UUID derived + # from tool_use_id so the matching PostToolUse can reference it. + request_id = (_tool_use_request_id(event.get("tool_use_id")) + if method == "steps/toolCallRequest" else None) or str(uuid.uuid4()) + + envelope = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": method, + "params": { + "acs_version": ACS_VERSION, + "request_id": request_id, + "timestamp": iso8601_now(), + "metadata": metadata, + "payload": build_payload(event), + }, + } + sign_envelope(envelope, session_id=session_id) + return envelope + + +def _maybe_handshake(event: dict[str, Any]) -> None: + """Called on every hook event. + + Looks like 'handshake every event', but `ensure_session_handshake` + is idempotent: the FIRST event of a session_id triggers a real + handshake/hello POST and writes the negotiated ServerHello to + ~/.cache/acs-adapter-handshake/. Every subsequent event for the + same session_id reads that file and returns without a network call. + """ + if not HANDSHAKE_ENABLED: + return + session_id = event.get("session_id") + if not session_id: + return + ensure_session_handshake( + guardian_url=GUARDIAN_URL, + session_id=session_id, + agent_id=_agent_id(event), + platform="claude-code", + methods_implemented=list(HOOK_MAP.values()), + ) + + +def call_guardian(request: dict[str, Any]) -> dict[str, Any]: + validate_guardian_url(GUARDIAN_URL) # SSRF: refuse file://, ftp://, etc. + body = json.dumps(request).encode("utf-8") + req = urllib.request.Request( + GUARDIAN_URL, data=body, + headers={"Content-Type": "application/json"}, method="POST", + ) + with urllib.request.urlopen(req, timeout=5.0) as resp: + return json.loads(resp.read().decode("utf-8")) + + +# ─── Response translation — dispatch table, one function per hook ───────── + +def _translate_pretool(decision: str, reasoning: str, + modifications: dict) -> dict[str, Any]: + if decision in PRETOOL_PERMISSION_MAP: + return _pretool_response(PRETOOL_PERMISSION_MAP[decision], reasoning) + if decision == "modify": + overrides = modifications.get("parameter_overrides") + if overrides is not None: + return _pretool_response("allow", reasoning, updated_input=overrides) + return _pretool_response( + "deny", + f"MODIFY substituted to DENY (no parameter_overrides): {reasoning}", + ) + return {} + + +def _translate_posttool(decision: str, reasoning: str, + modifications: dict) -> dict[str, Any]: + if decision == "deny": + return _block_response(reasoning or "blocked by Guardian", "PostToolUse") + if decision == "modify": + updated = modifications.get("modified_content") + if updated is not None: + hso = { + "hookEventName": "PostToolUse", + "updatedToolOutput": str(updated), + } + if reasoning: + hso["additionalContext"] = reasoning + return {"hookSpecificOutput": hso} + return _block_response( + f"MODIFY substituted to DENY (no modified_content): {reasoning}") + if decision in ("ask", "defer"): + return _block_response( + f"{decision} on post-tool not supported by Claude Code: {reasoning}") + return {} + + +def _translate_user_prompt(decision: str, reasoning: str, + modifications: dict) -> dict[str, Any]: + if decision == "deny": + return _block_response(reasoning or "blocked by Guardian") + if decision in ("ask", "defer"): + return _block_response(f"{decision} on user prompt: {reasoning}") + return {} + + +def _translate_session_stop(decision: str, reasoning: str, + modifications: dict) -> dict[str, Any]: + """Stop / SubagentStop — only deny matters; allow is the default.""" + if decision == "deny": + return _block_response(reasoning or "blocked by Guardian") + return {} + + +_TRANSLATORS: dict[str, Callable[[str, str, dict], dict[str, Any]]] = { + "PreToolUse": _translate_pretool, + "PostToolUse": _translate_posttool, + "UserPromptSubmit": _translate_user_prompt, + "Stop": _translate_session_stop, + "SubagentStop": _translate_session_stop, +} + + +def translate_response(acs_response: dict[str, Any], hook_event: str) -> dict[str, Any]: + result = acs_response.get("result", {}) + decision = (result.get("decision") or "").lower() + reasoning = result.get("reasoning", "") + modifications = result.get("modifications", {}) + + # Unknown disposition under fail-closed → emit a deny in the hook's shape. + if decision not in KNOWN_DECISIONS and DEFAULT_DENY: + reason = f"unknown Guardian disposition '{decision}' (default-deny)" + if hook_event == "PreToolUse": + return _pretool_response("deny", reason) + if hook_event in BLOCK_RESPONSE_HOOKS: + return _block_response(reason) + + translator = _TRANSLATORS.get(hook_event) + if translator: + return translator(decision, reasoning, modifications) + + # Informational hooks (SessionStart, SessionEnd, Notification) — + # surface additional_context if the Guardian provided any, else empty. + additional = result.get("additional_context") + if additional: + return {"hookSpecificOutput": { + "hookEventName": hook_event, + "additionalContext": str(additional), + }} + return {} + + +# ─── Main flow ────────────────────────────────────────────────────────────── + +def main() -> int: + raw = sys.stdin.read().strip() + if not raw: + return 0 + try: + event = json.loads(raw) + except json.JSONDecodeError as e: + sys.stderr.write(f"acs-adapter: invalid JSON on stdin: {e}\n") + return _fail(cause="invalid_stdin_json") + + hook_name = event.get("hook_event_name", "") + if hook_name not in HOOK_MAP: + return 0 + + # Handshake on first call of a session (cached after). Best-effort: + # a failed handshake follows the deployment's startup posture (§4.1). + _maybe_handshake(event) + + request = None + try: + request = build_request(event) + if not request: + sys.stderr.write(f"acs-adapter: could not build request for {hook_name}\n") + return _fail(hook_name, event.get("session_id"), + cause="adapter_build_failed") + response = call_guardian(request) + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError) as e: + sys.stderr.write(f"acs-adapter: Guardian unreachable: {e}\n") + return _fail(hook_name, event.get("session_id"), + cause="transport_failure", + request_id=(request or {}).get("params", {}).get("request_id"), + method=(request or {}).get("method"), + error=str(e)) + except Exception as e: # noqa: BLE001 + sys.stderr.write(f"acs-adapter: adapter error: {e}\n") + return _fail(hook_name, event.get("session_id"), + cause="adapter_exception", error=str(e)) + + # Guardian responded — was it a result or a JSON-RPC error? + # An `error` means the Guardian explicitly rejected this envelope, + # which is NOT a transport failure. §6.4 collapses them but the + # cause field tells operators which case fired so they can act. + if "error" in response: + err = response.get("error") or {} + code = err.get("code") + cause = guardian_error_cause(code) + sys.stderr.write( + f"acs-adapter: Guardian returned JSON-RPC error " + f"{code} ({cause}): {err.get('message','')}\n") + return _fail(hook_name, event.get("session_id"), + cause=cause, + error_code=code, + error_message=err.get("message"), + request_id=(request or {}).get("params", {}).get("request_id"), + method=(request or {}).get("method")) + + # Response signature check (only relevant when signing is enabled). + if not verify_signature(response, session_id=event.get("session_id")): + sys.stderr.write("acs-adapter: response signature invalid\n") + return _fail(hook_name, event.get("session_id"), + cause="response_signature_invalid") + + _emit(translate_response(response, hook_name)) + return 0 + + +def _fail(hook_name: str = "", session_id: str | None = None, *, + cause: str = "unknown", **audit_extras) -> int: + """Apply the deployment's fail posture and record an audit event per §6.4. + + `cause` distinguishes the failure mode (transport_failure, + signature_invalid_response, malformed_envelope_response, etc.) + independently of the posture. The audit event's top-level type + (`fail_open_bypass` or `decision_failure_fail_closed`) is set by + ACS_DEFAULT_DENY; the `cause` field tells operators what actually + went wrong so a malformed envelope (client bug) doesn't get + confused with an unreachable Guardian (ops issue). + """ + if DEFAULT_DENY: + msg = f"ACS adapter: decision-failure ({cause})" + if hook_name == "PreToolUse": + _emit(_pretool_response("deny", msg)) + elif hook_name in BLOCK_RESPONSE_HOOKS: + _emit(_block_response(msg)) + audit_event("decision_failure_fail_closed", + cause=cause, hook=hook_name, session_id=session_id, + **audit_extras) + return 0 + + audit_event("fail_open_bypass", + cause=cause, hook=hook_name, session_id=session_id, + **audit_extras) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/claude-code/e2e_check.py b/adapters/claude-code/e2e_check.py new file mode 100644 index 0000000..d84df18 --- /dev/null +++ b/adapters/claude-code/e2e_check.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python3 +""" +End-to-end conformance check for an adopter's Claude Code ACS integration. + +This is NOT a synthetic adapter test. It drives a REAL `claude --print` +invocation against a freshly-wired `.claude/settings.json` and a +recording Guardian, and verifies that: + + 1. Claude actually fires the hooks we expect, in the order we expect. + 2. The adapter translates each hook into a wire-conformant ACS envelope. + 3. Every envelope is HMAC-signed end-to-end and the Guardian verifies it. + 4. The Guardian's verdicts are actually applied by Claude — allow lets + the tool run, deny visibly blocks it. + 5. The handshake fires ONCE per Claude session, even when many hooks + fire within that session. + +A developer integrating ACS runs this once after wiring the adapter to +confirm their installation works in production-like conditions. + +Prerequisites: + - `claude` CLI on PATH (Claude Code installed and authenticated) + - Python 3.10+ + - The canonical ACS schemas at $ACS_SPEC_DIR (default + /tmp/acs-spec-source/specification/v0.1.0/) + +Usage (from this directory): + + python3 e2e_check.py + +Each scenario takes ~10-15 seconds because real Claude is in the loop. +Total wall-clock ~60-90 seconds. Add `--model` overrides via the +CLAUDE_MODEL env var if you want a different model than the default +(claude-haiku-4-5 — chosen for speed). +""" +from __future__ import annotations + +import json +import os +import shutil +import socket +import subprocess +import sys +import tempfile +import threading +import time +import uuid +from pathlib import Path + +HERE = Path(__file__).resolve().parent +ADAPTER = HERE / "acs_adapter.py" +COMMON_DIR = HERE.parent / "_common" +SPEC_DIR_DEFAULT = Path(os.environ.get( + "ACS_SPEC_DIR", "/tmp/acs-spec-source/specification/v0.1.0")) + +sys.path.insert(0, str(COMMON_DIR)) +import acs_common # noqa: E402 +from test_harness import ( # noqa: E402 + ProgrammableGuardian, + free_port, + validate_request_envelope, + validate_response_envelope, + wait_port, +) + + +HMAC_SECRET = "e2e-test-shared-secret-not-for-production" +CLAUDE_MODEL = os.environ.get("CLAUDE_MODEL", "claude-haiku-4-5") +PER_CALL_TIMEOUT_S = 120.0 + + +# Shared pretty-printer + helpers; see adapters/_common/e2e_report.py. +from e2e_report import ( # noqa: E402 + Report, + assert_envelopes_signed_and_valid as _assert_envelopes_signed_and_valid, +) + + +def _envelope_checks(guardian, sub_results: list) -> None: + _assert_envelopes_signed_and_valid( + guardian, validate_request_envelope, sub_results) + + +# ────────────────────────────────────────────────────────────────────── +# Workdir + settings.json +# ────────────────────────────────────────────────────────────────────── + +def write_settings(workdir: Path, port: int) -> None: + """Create .claude/settings.json that wires every Claude Code hook + type to the adapter. Adapter is invoked once per hook event; it + reads the framework-shaped event from stdin, builds an ACS envelope, + POSTs to the Guardian, and writes the verdict back to stdout.""" + claude_dir = workdir / ".claude" + claude_dir.mkdir(parents=True, exist_ok=True) + command = ( + f"ACS_GUARDIAN_URL=http://127.0.0.1:{port}/acs " + f"ACS_HMAC_SECRET={HMAC_SECRET} " + f"ACS_GUARDIAN_HOST_ALLOWLIST= " + f"python3 {ADAPTER}" + ) + + def one(matcher: str = "*"): + return {"matcher": matcher, "hooks": [{"type": "command", "command": command}]} + + settings = { + "hooks": { + "SessionStart": [one()], + "UserPromptSubmit": [one()], + "PreToolUse": [one()], + "PostToolUse": [one()], + "Notification": [one()], + "Stop": [one()], + "SessionEnd": [one()], + } + } + (claude_dir / "settings.json").write_text(json.dumps(settings, indent=2)) + + +def run_claude(prompt: str, *, workdir: Path, + timeout: float = PER_CALL_TIMEOUT_S) -> tuple[int, str, str]: + """Invoke `claude --print --model ` from workdir.""" + proc = subprocess.run( + ["claude", "--print", + "--model", CLAUDE_MODEL, + "--permission-mode", "acceptEdits", + prompt], + cwd=str(workdir), + capture_output=True, + text=True, + timeout=timeout, + ) + return proc.returncode, proc.stdout, proc.stderr + + +# ────────────────────────────────────────────────────────────────────── +# Programmable disposition handlers for the Guardian +# ────────────────────────────────────────────────────────────────────── + +def allow_unless(*, deny_when_command_contains: str | None = None, + deny_reason: str = "policy denies this") -> callable: + """Build a Guardian handler that allows by default but denies when + a Bash command's `command` argument contains a given substring.""" + def handler(req: dict) -> dict: + result_base = { + "type": "final", "acs_version": "0.1.0", + "request_id": req["params"]["request_id"], + "chain_hash": "0" * 64, + } + if deny_when_command_contains is not None: + payload = req["params"].get("payload") or {} + args = payload.get("arguments") or {} + cmd_arg = args.get("command") + cmd_str = "" + if isinstance(cmd_arg, dict): + cmd_str = str(cmd_arg.get("value", "")) + elif isinstance(cmd_arg, str): + cmd_str = cmd_arg + if deny_when_command_contains in cmd_str: + return {**result_base, "decision": "deny", + "reasoning": deny_reason, + "reason_codes": ["policy_deny"]} + return {**result_base, "decision": "allow"} + return handler + + +# ────────────────────────────────────────────────────────────────────── +# Scenario runner +# ────────────────────────────────────────────────────────────────────── + +def header_for_scenario(report: Report, num: int, total: int, title: str, + prompt: str, expectation: str) -> None: + report.case(num, total, title) + report.field("Prompt:", f"\"{prompt}\"") + report.field("Expected:", expectation) + + +def dump_session_envelopes(report: Report, guardian: ProgrammableGuardian, + session_id: str | None = None) -> None: + """For every envelope received in this session, print method name + + a one-line summary. For interesting ones (toolCallRequest), print the + full JSON.""" + if session_id is None: + envelopes = list(guardian.received) + else: + envelopes = [r for r in guardian.received + if r.get("params", {}).get("metadata", {}).get("session_id") == session_id] + + methods = [(r.get("method", ""), r.get("params", {}).get("request_id", "")[:8]) + for r in envelopes] + print(f" ── Hooks Claude fired (in order)") + for method, rid in methods: + print(f" {method:35s} req={rid}…") + + # Print the first toolCallRequest envelope verbatim — what Claude actually emits + pretool = next((r for r in envelopes if r.get("method") == "steps/toolCallRequest"), None) + if pretool: + report.json_block("First steps/toolCallRequest envelope (verbatim)", + pretool, truncate=140) + + +# ────────────────────────────────────────────────────────────────────── +# Scenarios +# ────────────────────────────────────────────────────────────────────── + +TOTAL_SCENARIOS = 4 + + +def scenario_allow(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + marker = f"ACS_E2E_OK_{uuid.uuid4().hex[:8]}" + prompt = f"Use the Bash tool exactly once to run: echo {marker}" + header_for_scenario( + report, 1, TOTAL_SCENARIOS, + title="ALLOW — benign Bash; Claude runs the tool", + prompt=prompt, + expectation=(f"Claude fires PreToolUse(Bash); Guardian allows; " + f"the Bash tool actually executes; marker {marker!r} " + f"appears in the toolCallResult envelope"), + ) + + guardian.reset() + guardian.handlers["steps/toolCallRequest"] = allow_unless() + + rc, stdout, stderr = run_claude(prompt, workdir=workdir) + + report.quote_block(f"Claude's stdout (rc={rc})", stdout, max_chars=300) + + sub_results = [] + sub_results.append(("Guardian received at least one envelope", + len(guardian.received) > 0, + f"received {len(guardian.received)}")) + methods = set(r.get("method", "") for r in guardian.received) + sub_results.append(("Guardian received handshake/hello", + "handshake/hello" in methods, "")) + sub_results.append(("Guardian received steps/toolCallRequest", + "steps/toolCallRequest" in methods, "")) + _envelope_checks(guardian, sub_results) + # Check the marker in the toolCallResult envelope (the actual tool + # output the Guardian saw), not in Claude's prose summary — Claude + # may condense or rephrase tool output before showing it to the user. + result_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallResult"] + marker_in_tool_output = False + for r in result_envs: + outputs = r.get("params", {}).get("payload", {}).get("outputs", []) + for o in outputs: + value = o.get("value") + if isinstance(value, dict): + if marker in json.dumps(value): + marker_in_tool_output = True + break + elif isinstance(value, str) and marker in value: + marker_in_tool_output = True + break + sub_results.append(("Marker appears in the toolCallResult envelope (Bash actually ran)", + marker_in_tool_output, + "marker found in tool output" if marker_in_tool_output + else f"marker absent across {len(result_envs)} toolCallResult envelope(s)")) + + dump_session_envelopes(report, guardian) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("allow-path", all(ok for _, ok, _ in sub_results)) + + +def scenario_deny(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + marker = f"ACS_E2E_RAN_{uuid.uuid4().hex[:8]}" + # Use a totally benign prompt Claude will happily attempt — the + # Guardian, not Claude, is doing the blocking. The Guardian is + # configured to deny every Bash call so it doesn't matter what + # Claude tries to run. + prompt = f"Use the Bash tool exactly once to run: echo {marker}" + header_for_scenario( + report, 2, TOTAL_SCENARIOS, + title="DENY — Guardian blocks a tool call; Claude does not run it", + prompt=prompt, + expectation=("Guardian returns DENY for the Bash call (forced via " + "Guardian config, not Claude's own safety layer); " + "Claude does NOT execute the command; " + f"marker {marker!r} is NOT in any toolCallResult"), + ) + + # Force the Guardian to deny every Bash call this scenario, regardless + # of payload contents. This isolates ACS enforcement from Claude's own + # safety filters — we want to test "Guardian decides DENY, adapter + # surfaces it, Claude honors it", not Claude's prompt-classification. + def deny_all_bash(req: dict) -> dict: + payload = req["params"].get("payload") or {} + tool_name = payload.get("tool", {}).get("name", "") + base = { + "type": "final", "acs_version": "0.1.0", + "request_id": req["params"]["request_id"], + "chain_hash": "0" * 64, + } + if tool_name in ("Bash", "Shell"): + return {**base, "decision": "deny", + "reasoning": "E2E test: Guardian configured to deny all Bash", + "reason_codes": ["e2e_force_deny"]} + return {**base, "decision": "allow"} + + guardian.reset() + guardian.handlers["steps/toolCallRequest"] = deny_all_bash + + rc, stdout, stderr = run_claude(prompt, workdir=workdir) + + report.quote_block(f"Claude's stdout (rc={rc})", stdout, max_chars=300) + + sub_results = [] + methods = set(r.get("method", "") for r in guardian.received) + saw_pretool = "steps/toolCallRequest" in methods + sub_results.append(("Guardian received steps/toolCallRequest " + "(Claude attempted the tool)", + saw_pretool, "")) + deny_resp = next((r for r in guardian.sent + if r.get("result", {}).get("decision") == "deny"), None) + sub_results.append(("Guardian returned a deny verdict", + deny_resp is not None, + "deny issued" if deny_resp else "no deny found")) + # Check toolCallResult envelopes — the marker MUST NOT be in any + # toolCallResult output (proves the Bash actually didn't execute). + result_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallResult"] + marker_in_results = False + for r in result_envs: + outputs = r.get("params", {}).get("payload", {}).get("outputs", []) + for o in outputs: + value = o.get("value") + if isinstance(value, dict) and marker in json.dumps(value): + marker_in_results = True + break + elif isinstance(value, str) and marker in value: + marker_in_results = True + break + sub_results.append(("Marker is NOT in any toolCallResult (command did not run)", + not marker_in_results, + "marker absent" if not marker_in_results else "MARKER PRESENT (command ran despite deny)")) + + dump_session_envelopes(report, guardian) + if deny_resp: + report.json_block("Guardian's deny verdict (verbatim)", + deny_resp["result"], truncate=140) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("deny-path", all(ok for _, ok, _ in sub_results)) + + +def scenario_read_tool(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + prompt = "Use the Read tool to read the file /etc/hostname; just report what you read." + header_for_scenario( + report, 3, TOTAL_SCENARIOS, + title="READ TOOL — different tool, same wire contract", + prompt=prompt, + expectation=("Claude fires PreToolUse(Read); envelope contains the " + "file_path argument wrapped per tool-call-request.json"), + ) + + guardian.reset() + guardian.handlers["steps/toolCallRequest"] = allow_unless() + + rc, stdout, _ = run_claude(prompt, workdir=workdir) + report.quote_block(f"Claude's stdout (rc={rc})", stdout, max_chars=200) + + sub_results = [] + read_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallRequest" + and r["params"]["payload"]["tool"]["name"] == "Read"] + sub_results.append(("Guardian received a Read steps/toolCallRequest", + bool(read_envs), f"{len(read_envs)} found")) + if read_envs: + env = read_envs[0] + args = env["params"]["payload"].get("arguments", {}) + # tool-call-request.json:26-37 — arguments wrapped as {value: ...} + sub_results.append( + ("Arguments are wrapped per tool-call-request.json:26-37", + all(isinstance(v, dict) and "value" in v for v in args.values()), + f"args={list(args.keys())}")) + sub_results.append( + ("Read tool's file_path appears in arguments", + "file_path" in args, "")) + + dump_session_envelopes(report, guardian) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("read-tool-path", all(ok for _, ok, _ in sub_results)) + + +def scenario_handshake_once(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + prompt = ("Use the Bash tool exactly twice: first run " + "'echo first-call', then run 'echo second-call'.") + header_for_scenario( + report, 4, TOTAL_SCENARIOS, + title="HANDSHAKE — fires exactly ONCE per Claude session (§4)", + prompt=prompt, + expectation=("Claude fires multiple hooks (handshake at session " + "start, then ≥2 PreToolUse + PostToolUse pairs); " + "Guardian sees exactly 1 handshake/hello for the session"), + ) + + guardian.reset() + guardian.handlers["steps/toolCallRequest"] = allow_unless() + # Fresh handshake cache so this scenario's first envelope DOES handshake + cache_dir = workdir / ".acs-handshake-cache" + if cache_dir.exists(): + shutil.rmtree(cache_dir) + cache_dir.mkdir() + os.environ["ACS_HANDSHAKE_CACHE"] = str(cache_dir) + + rc, stdout, _ = run_claude(prompt, workdir=workdir) + report.quote_block(f"Claude's stdout (rc={rc})", stdout, max_chars=250) + + handshakes = [r for r in guardian.received if r.get("method") == "handshake/hello"] + pretools = [r for r in guardian.received if r.get("method") == "steps/toolCallRequest"] + posttools = [r for r in guardian.received if r.get("method") == "steps/toolCallResult"] + methods_in_order = [r.get("method", "") for r in guardian.received] + + print(f" ── Hooks Claude fired (in order)") + for m in methods_in_order: + print(f" {m}") + + sub_results = [ + ("Exactly 1 handshake/hello per session", len(handshakes) == 1, + f"got {len(handshakes)}"), + ("≥2 steps/toolCallRequest (Claude did call Bash twice)", + len(pretools) >= 2, f"got {len(pretools)}"), + ("≥2 steps/toolCallResult (each Bash returned)", + len(posttools) >= 2, f"got {len(posttools)}"), + ] + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("handshake-once-per-session", all(ok for _, ok, _ in sub_results)) + + +# ────────────────────────────────────────────────────────────────────── +# Entry point +# ────────────────────────────────────────────────────────────────────── + +def main() -> int: + # Prereq: claude on PATH + if shutil.which("claude") is None: + print("FATAL: `claude` CLI not found on PATH.", file=sys.stderr) + print("Install Claude Code first: https://docs.claude.com/claude-code", + file=sys.stderr) + return 1 + if not SPEC_DIR_DEFAULT.exists(): + print(f"FATAL: canonical schemas missing at {SPEC_DIR_DEFAULT}", + file=sys.stderr) + print("Set ACS_SPEC_DIR to a clone of " + "Agent-Control-Standard/ACS/specification/v0.1.0/", file=sys.stderr) + return 1 + + report = Report() + report.print_header( + "ACS Claude Code adapter — REAL end-to-end conformance check", + "", + "This test drives a REAL `claude --print` invocation against", + "the adapter you wired up. It is NOT synthetic — it verifies", + "that YOUR installed Claude Code actually fires the hooks ACS", + "expects, that they are correctly translated to wire envelopes,", + "signed end-to-end, and applied by Claude when the Guardian decides.", + "", + f"Spec source : {SPEC_DIR_DEFAULT}", + f"Adapter : {ADAPTER}", + f"Claude CLI : {shutil.which('claude')}", + f"Model : {CLAUDE_MODEL}", + "", + f"{TOTAL_SCENARIOS} scenarios — each invokes real Claude (~10-15s each).", + width=68, + ) + + # Programmable Guardian — records every envelope, signs every response + # with the same HMAC secret the adapter uses, can be configured per + # scenario to return specific dispositions. + guardian = ProgrammableGuardian(hmac_secret=HMAC_SECRET) + guardian.start() + + workdir = Path(tempfile.mkdtemp(prefix="acs-e2e-real-")) + try: + write_settings(workdir, guardian.port) + scenario_allow(report, workdir, guardian) + scenario_deny(report, workdir, guardian) + scenario_read_tool(report, workdir, guardian) + scenario_handshake_once(report, workdir, guardian) + finally: + guardian.stop() + shutil.rmtree(workdir, ignore_errors=True) + + return 0 if report.summary("YOUR CLAUDE CODE INSTALL IS ACS-CONFORMANT", width=68) else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/claude-code/mapping.md b/adapters/claude-code/mapping.md new file mode 100644 index 0000000..b0c39eb --- /dev/null +++ b/adapters/claude-code/mapping.md @@ -0,0 +1,43 @@ +# Claude Code → ACS hook mapping + +Each Claude Code hook event maps to an ACS `steps/*` method. The adapter (`acs_adapter.py`) does the translation in both directions: Claude Code hook event → ACS JSON-RPC request, ACS decision → Claude Code response. + +## Hook event mapping + +| Claude Code hook | ACS step method | Notes | +|---|---|---| +| `SessionStart` | `steps/sessionStart` | Session bounds. | +| `SessionEnd` | `steps/sessionEnd` | | +| `Stop` | `steps/sessionEnd` | Claude Code's session-stop signal; also maps to `sessionEnd`. | +| `UserPromptSubmit` | `steps/userMessage` | The `prompt` field carries the user's input. | +| `PreToolUse` | `steps/toolCallRequest` | Fires before the tool runs. Decision gates execution. | +| `PostToolUse` | `steps/toolCallResult` | Fires after the tool runs. `tool_response` carries the output. | +| `PreCompact` | `steps/preCompact` | Memory compaction is about to occur. | +| `PostCompact` | `steps/postCompact` | Compaction has completed. | +| `SubagentStop` | `steps/subagentStop` | A sub-agent has completed. | +| `Notification` | `steps/agentResponse` | **Observation-only.** Claude Code's `Notification` fires *after* the assistant message is delivered to the user; the framework does not consult the hook return value. The adapter emits the ACS envelope for trace + audit, but a Guardian `deny` / `modify` cannot retroactively block or rewrite a message the user has already seen. ACS-Core §hooks.md describes `agentResponse` as decision-eligible; this adapter's mapping is honest about the framework constraint. | + +Claude Code hooks not currently mapped (the adapter passes them through unhandled, so Claude Code proceeds): `PreToolUseFailure`, `PostToolUseFailure`, `PermissionRequest`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `ConfigChange`, `TeammateIdle`, `TaskCompleted`, `MCPElicitation`. Most of these have no semantic ACS equivalent in v0.1.0 and can be added in follow-up PRs. + +## Disposition mapping + +| ACS disposition | Claude Code response on `PreToolUse` | Claude Code response on other hooks | +|---|---|---| +| `allow` | empty (Claude Code proceeds) | empty | +| `deny` | `{"decision": "block", "reason": "..."}` | `{"continue": false, "stopReason": "..."}` | +| `modify` | `{"decision": "modify", "modifiedInput": ...}` if `parameter_overrides` present | substituted to `block` with audit | +| `ask` | `{"decision": "block", "reason": "approval required: ..."}` | same | +| `defer` | `{"decision": "block", "reason": "deferred: ..."}` | same | + +ASK and DEFER are substituted to BLOCK at the adapter layer because Claude Code's hook protocol does not have a native pause-and-resume primitive. A deployment that needs approver-driven resumption should run ACS-Core's ASK flow at the Guardian layer and have the Guardian return an effective decision (typically `allow` after approver consent or `deny` on rejection) before responding to the adapter. + +## Conformance posture + +The Claude Code adapter implements ACS-Core's mandatory floor: + +- Handshake: not negotiated per-call; the adapter assumes the Guardian advertises ACS-Core support at the endpoint. A production deployment should perform `handshake/hello` at session start and cache the result. +- Five dispositions: ALLOW / DENY / ASK / DEFER are honored as above for **pre-execution** hooks (`PreToolUse`, `UserPromptSubmit`). MODIFY is partially honored (only on `PreToolUse` with `parameter_overrides`). **Post-execution and lifecycle hooks (`PostToolUse`, `Notification → agentResponse`, `Stop`, `SessionEnd`) are observation-only**: Claude Code fires them after the action / message / session has completed; a Guardian `deny` on those hooks cannot undo the side effect. The adapter emits the audit envelope; deployments needing pre-delivery gating must place the gate at `UserPromptSubmit` for prompts or `PreToolUse` for tools. +- Session context: the adapter sends `session_id` on every request, derived from the working directory hash unless `ACS_SESSION_ID` is set. Guardian-side audit chain accumulates against that id. +- Replay protection: `request_id` (UUID) and `timestamp` are populated on every request. +- Baseline integrity: not implemented in this minimal adapter (HMAC-SHA256 keying is out of scope for the example). A production adapter wraps `acs_adapter.py`'s outbound request in an HMAC envelope using a session key derived from deployment configuration. +- Decision honoring: the adapter's `_fail()` posture is `deny` by default (`ACS_DEFAULT_DENY=1`) and configurable to `proceed` via env var. Matches §6.4 semantics. diff --git a/adapters/claude-code/settings.json.example b/adapters/claude-code/settings.json.example new file mode 100644 index 0000000..c602956 --- /dev/null +++ b/adapters/claude-code/settings.json.example @@ -0,0 +1,73 @@ +{ + "_comment": "Drop into ~/.claude/settings.json (or merge with your existing config). Replace ACS_GUARDIAN_URL with your deployment's endpoint.", + "hooks": { + "PreToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/acs_adapter.py", + "env": { + "ACS_GUARDIAN_URL": "http://127.0.0.1:8787/acs", + "ACS_DEFAULT_DENY": "1" + } + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/acs_adapter.py", + "env": { + "ACS_GUARDIAN_URL": "http://127.0.0.1:8787/acs" + } + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/acs_adapter.py", + "env": { + "ACS_GUARDIAN_URL": "http://127.0.0.1:8787/acs" + } + } + ] + } + ], + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/acs_adapter.py", + "env": { + "ACS_GUARDIAN_URL": "http://127.0.0.1:8787/acs" + } + } + ] + } + ], + "SessionEnd": [ + { + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/acs_adapter.py", + "env": { + "ACS_GUARDIAN_URL": "http://127.0.0.1:8787/acs" + } + } + ] + } + ] + } +} diff --git a/adapters/claude-code/tests/__init__.py b/adapters/claude-code/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/adapters/claude-code/tests/example_payloads.md b/adapters/claude-code/tests/example_payloads.md new file mode 100644 index 0000000..09607d1 --- /dev/null +++ b/adapters/claude-code/tests/example_payloads.md @@ -0,0 +1,157 @@ +# Real-world payload examples — Claude Code + +These are the actual JSON shapes Claude Code emits on stdin for each hook, captured from a real `claude` session and **masked**. Identifying fields are replaced with placeholders. Use these to understand the schema the adapter parses. + +All fields documented in [Claude Code's hook docs](https://code.claude.com/docs/en/hooks) plus several that appear in real payloads but aren't in the public docs (flagged below). + +--- + +## SessionStart + +```json +{ + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript_path": "/Users//.claude/projects//.jsonl", + "cwd": "/path/to/project", + "hook_event_name": "SessionStart", + "source": "startup" +} +``` + +`source` enum: `startup`, `resume`, `clear`, `compact`. + +--- + +## UserPromptSubmit + +```json +{ + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript_path": "/Users//.claude/projects//.jsonl", + "cwd": "/path/to/project", + "permission_mode": "default", + "hook_event_name": "UserPromptSubmit", + "prompt": "" +} +``` + +`permission_mode` enum: `default`, `plan`, `acceptEdits`, `auto`, `dontAsk`, `bypassPermissions`. + +--- + +## PreToolUse (Bash tool) + +```json +{ + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript_path": "/Users//.claude/projects//.jsonl", + "cwd": "/path/to/project", + "permission_mode": "acceptEdits", + "effort": {"level": "high"}, + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_input": { + "command": "echo hello", + "description": "Echo a test string" + }, + "tool_use_id": "toolu_00000000000000000000000000" +} +``` + +Fields not in the public docs but present in real payloads: `effort.level`, `tool_use_id`. The adapter forwards `tool_use_id` to the Guardian as part of the payload context. + +--- + +## PostToolUse (Bash tool) + +```json +{ + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript_path": "/Users//.claude/projects//.jsonl", + "cwd": "/path/to/project", + "permission_mode": "acceptEdits", + "effort": {"level": "high"}, + "hook_event_name": "PostToolUse", + "tool_name": "Bash", + "tool_input": { + "command": "echo hello", + "description": "Echo a test string" + }, + "tool_response": { + "stdout": "hello", + "stderr": "", + "interrupted": false, + "isImage": false, + "noOutputExpected": false + }, + "tool_use_id": "toolu_00000000000000000000000000", + "duration_ms": 5616 +} +``` + +**Important schema difference from docs:** the public docs describe a `tool_output` string field, but real Claude Code emits a `tool_response` **object** with `stdout`, `stderr`, `interrupted`, `isImage`, `noOutputExpected`. The adapter reads `tool_response` first, falls back to `tool_output` for forward-compat. + +--- + +## Adapter response shapes + +### Allow (any PreToolUse) + +```json +{ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "allow" + } +} +``` + +### Deny (PreToolUse) + +```json +{ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": "destructive Bash pattern in: rm -rf /home/u" + } +} +``` + +### Modify (PreToolUse) — passes modified tool_input back to Claude Code + +```json +{ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "allow", + "updatedInput": {"command": "echo hello # sanitized by Guardian"} + } +} +``` + +### Block on lifecycle hooks (PostToolUse, UserPromptSubmit, Stop, etc.) + +These use top-level `decision: "block"`: + +```json +{ + "decision": "block", + "reason": "ACS adapter: Guardian unreachable" +} +``` + +--- + +## Masking convention used here + +| Field | Real value contains | Masked as | +|---|---|---| +| `session_id` | Real UUID from the session | `00000000-0000-0000-0000-000000000001` | +| `transcript_path` | Real absolute path on the user's machine | `/Users//.claude/projects//.jsonl` | +| `cwd` | Real working directory at runtime | `/path/to/project` | +| `prompt` | Actual user input | `` | +| `tool_use_id` | Real Claude Code internal id | `toolu_00000000000000000000000000` | +| `command` | Real command (sometimes preserved when benign) | Either preserved or `` | + +No real session data is committed to this repo. diff --git a/adapters/claude-code/tests/test_adapter.py b/adapters/claude-code/tests/test_adapter.py new file mode 100644 index 0000000..cbb968e --- /dev/null +++ b/adapters/claude-code/tests/test_adapter.py @@ -0,0 +1,237 @@ +""" +End-to-end test: start the example Guardian, pipe Claude Code-shaped +hook payloads through acs_adapter.py, assert the output Claude Code +would receive. + +Schema source: https://code.claude.com/docs/en/hooks +""" +from __future__ import annotations + +import json +import os +import socket +import subprocess +import sys +import time +import unittest +from pathlib import Path + + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE.parent +ADAPTER = ADAPTER_DIR / "acs_adapter.py" +GUARDIAN = ADAPTER_DIR.parent / "example-guardian" / "example_guardian.py" + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import ( # noqa: E402 + free_port as _find_free_port, + wait_port as _wait_for_port, +) + + +def _claude_code_event(name: str, **extra) -> dict: + """Construct a hook event matching Claude Code's documented schema.""" + base = { + "session_id": "00000000-0000-4000-8000-000000000001", + "transcript_path": "/tmp/transcript.jsonl", + "cwd": "/tmp/work", + "hook_event_name": name, + } + if name in ("PreToolUse", "PostToolUse", "Stop"): + base["permission_mode"] = "default" + base["effort"] = {"level": "high"} + base.update(extra) + return base + + +class AdapterRoundTrip(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.port = _find_free_port() + env = os.environ.copy(); env["ACS_DEV_MODE"] = "1"; env.pop("ACS_HMAC_SECRET", None); env.pop("ACS_HMAC_SECRET_FILE", None) + cls.guardian_proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + ) + _wait_for_port("127.0.0.1", cls.port) + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian_proc.terminate() + try: + cls.guardian_proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.guardian_proc.kill() + + def _run_adapter( + self, + event: dict, + env_overrides: dict[str, str] | None = None, + ) -> tuple[int, str, str]: + env = os.environ.copy() + env["ACS_GUARDIAN_URL"] = f"http://127.0.0.1:{self.port}/acs" + if env_overrides: + env.update(env_overrides) + proc = subprocess.run( + [sys.executable, str(ADAPTER)], + input=json.dumps(event), + capture_output=True, + text=True, + env=env, + timeout=10, + ) + return proc.returncode, proc.stdout.strip(), proc.stderr.strip() + + # ----- PreToolUse: allow path (must produce hookSpecificOutput.permissionDecision=allow) ----- + + def test_safe_tool_call_allows(self) -> None: + rc, out, err = self._run_adapter(_claude_code_event( + "PreToolUse", tool_name="Read", tool_input={"file_path": "/tmp/safe.txt"}, + )) + self.assertEqual(rc, 0, err) + payload = json.loads(out) + self.assertEqual(payload["hookSpecificOutput"]["hookEventName"], "PreToolUse") + self.assertEqual(payload["hookSpecificOutput"]["permissionDecision"], "allow") + + def test_safe_bash_allows(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "PreToolUse", tool_name="Bash", tool_input={"command": "ls -la"}, + )) + self.assertEqual(rc, 0) + payload = json.loads(out) + self.assertEqual(payload["hookSpecificOutput"]["permissionDecision"], "allow") + + # ----- PreToolUse: deny path ----- + + def test_destructive_bash_denied(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "PreToolUse", tool_name="Bash", tool_input={"command": "rm -rf /home/user"}, + )) + self.assertEqual(rc, 0) + payload = json.loads(out) + hso = payload["hookSpecificOutput"] + self.assertEqual(hso["hookEventName"], "PreToolUse") + self.assertEqual(hso["permissionDecision"], "deny") + self.assertIn("destructive", hso["permissionDecisionReason"].lower()) + + def test_write_to_protected_path_denied(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "PreToolUse", tool_name="Write", + tool_input={"file_path": "/etc/passwd", "content": "x"}, + )) + self.assertEqual(rc, 0) + payload = json.loads(out) + self.assertEqual(payload["hookSpecificOutput"]["permissionDecision"], "deny") + self.assertIn("protected", payload["hookSpecificOutput"]["permissionDecisionReason"].lower()) + + # ----- Lifecycle hooks: empty stdout = proceed ----- + + def test_session_start_no_output(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "SessionStart", source="startup", model="claude-opus-4-7", + )) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + def test_session_end_no_output(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "SessionEnd", reason="clear", + )) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + def test_user_prompt_submit_no_block(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "UserPromptSubmit", prompt="summarize my emails", + )) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + def test_post_tool_use_no_block(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "PostToolUse", tool_name="Read", + tool_input={"file_path": "/tmp/x"}, tool_output="file contents", + )) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + def test_notification_no_block(self) -> None: + rc, out, _ = self._run_adapter(_claude_code_event( + "Notification", notification_type="permission_prompt", + message="confirm action", + )) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + # ----- Unknown hook proceeds silently ----- + + def test_unmapped_hook_proceeds(self) -> None: + rc, out, _ = self._run_adapter({ + "hook_event_name": "SomeFutureHook", + "session_id": "x", + "cwd": "/tmp", + "transcript_path": "/tmp/t", + "data": {"x": 1}, + }) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + # ----- Fail posture ----- + + def test_guardian_unreachable_default_deny_pretoolue(self) -> None: + """PreToolUse with Guardian down + DEFAULT_DENY=1: emit deny in PreToolUse output shape.""" + rc, out, err = self._run_adapter( + _claude_code_event( + "PreToolUse", tool_name="Read", + tool_input={"file_path": "/tmp/x"}, + ), + env_overrides={"ACS_GUARDIAN_URL": "http://127.0.0.1:1/dead", + "ACS_DEFAULT_DENY": "1", + "ACS_HANDSHAKE": "0"}, + ) + self.assertEqual(rc, 0, err) + payload = json.loads(out) + self.assertEqual(payload["hookSpecificOutput"]["permissionDecision"], "deny") + self.assertIn("decision-failure", payload["hookSpecificOutput"]["permissionDecisionReason"].lower()) + # §6.4: every decision-failure path must produce an audit event + self.assertIn("ACS_AUDIT", err) + self.assertIn("decision_failure_fail_closed", err) + + def test_guardian_unreachable_default_deny_posttool(self) -> None: + """PostToolUse with Guardian down + DEFAULT_DENY=1: top-level decision: block.""" + rc, out, err = self._run_adapter( + _claude_code_event( + "PostToolUse", tool_name="Read", + tool_input={"file_path": "/tmp/x"}, tool_output="x", + ), + env_overrides={"ACS_GUARDIAN_URL": "http://127.0.0.1:1/dead", + "ACS_DEFAULT_DENY": "1", + "ACS_HANDSHAKE": "0"}, + ) + self.assertEqual(rc, 0, err) + payload = json.loads(out) + self.assertEqual(payload["decision"], "block") + self.assertIn("decision-failure", payload["reason"].lower()) + self.assertIn("ACS_AUDIT", err) + + def test_guardian_unreachable_fail_open_default_is_audit(self) -> None: + """Spec default per §6.4: fail-open, every bypass recorded as an audit event.""" + rc, out, err = self._run_adapter( + _claude_code_event( + "PreToolUse", tool_name="Read", + tool_input={"file_path": "/tmp/x"}, + ), + env_overrides={"ACS_GUARDIAN_URL": "http://127.0.0.1:1/dead", + "ACS_HANDSHAKE": "0"}, + ) + self.assertEqual(rc, 0, err) + self.assertEqual(out, "") # proceed (fail-open) + # §6.4: 'Every step that proceeds without a decision MUST be recorded as an audit event' + self.assertIn("ACS_AUDIT", err, "fail-open MUST emit an audit event per §6.4") + self.assertIn("fail_open_bypass", err) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/claude-code/tests/test_envelope_schema.py b/adapters/claude-code/tests/test_envelope_schema.py new file mode 100644 index 0000000..7f0658d --- /dev/null +++ b/adapters/claude-code/tests/test_envelope_schema.py @@ -0,0 +1,194 @@ +""" +Spec-validation tests for the Claude Code adapter. + +Ground truth: the canonical v0.1.0 schemas at +`specification/v0.1.0/request-envelope.json` and +`specification/v0.1.0/hooks/*.json` in the upstream +Agent-Control-Standard/ACS repo. + +NOT validated against the example Guardian. These tests fail the +moment the adapter's wire format drifts from the spec, independent +of whether the round-trip tests pass. + +Spec source defaults to /tmp/acs-spec-source/specification/v0.1.0; +override with ACS_SPEC_DIR. +""" +from __future__ import annotations + +import json +import os +import sys +import unittest +from pathlib import Path + +from jsonschema import Draft202012Validator +from jsonschema.validators import RefResolver + + +SPEC_DIR_DEFAULT = Path("/tmp/acs-spec-source/specification/v0.1.0") +SPEC_DIR = Path(os.environ.get("ACS_SPEC_DIR", str(SPEC_DIR_DEFAULT))) + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE.parent +sys.path.insert(0, str(ADAPTER_DIR)) + +import acs_adapter # noqa: E402 + + +def _load_schema(name: str) -> dict: + with open(SPEC_DIR / name) as f: + return json.load(f) + + +def _validate(payload: dict, schema_name: str) -> list: + schema = _load_schema(schema_name) + resolver = RefResolver( + base_uri=(SPEC_DIR.as_uri() + "/" + schema_name), + referrer=schema, + ) + validator = Draft202012Validator( + schema, resolver=resolver, + format_checker=Draft202012Validator.FORMAT_CHECKER, + ) + return [ + f"{'.'.join(str(p) for p in err.absolute_path) or ''}: {err.message}" + for err in validator.iter_errors(payload) + ] + + +def _event(name: str, **extra) -> dict: + """Build a Claude Code-shaped hook event.""" + base = { + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript_path": "/tmp/transcript.jsonl", + "cwd": "/tmp/work", + "hook_event_name": name, + } + base.update(extra) + return base + + +# (event_name, payload_schema_name, fixture builder) +HOOK_CASES = [ + ("PreToolUse", "hooks/tool-call-request.json", lambda: _event( + "PreToolUse", tool_name="Bash", tool_input={"command": "echo hi"}, + tool_use_id="t1", permission_mode="default", + )), + ("PostToolUse", "hooks/tool-call-result.json", lambda: _event( + "PostToolUse", tool_name="Bash", tool_input={"command": "echo hi"}, + tool_response={"stdout": "hi", "stderr": "", "interrupted": False, + "isImage": False, "noOutputExpected": False}, + tool_use_id="t1", duration_ms=12, permission_mode="default", + )), + ("UserPromptSubmit", "hooks/user-message.json", lambda: _event( + "UserPromptSubmit", prompt="summarize my emails", + )), + ("SessionStart", "hooks/session-start.json", lambda: _event( + "SessionStart", source="startup", model="claude-opus-4-7", + )), + ("SessionEnd", "hooks/session-end.json", lambda: _event( + "SessionEnd", reason="clear", + )), + ("Notification", "hooks/agent-response.json", lambda: _event( + "Notification", message="confirm action", notification_type="permission_prompt", + )), + ("Stop", "hooks/session-end.json", lambda: _event( + "Stop", permission_mode="default", + )), +] + + +class SpecValidationSetUp(unittest.TestCase): + def setUp(self) -> None: + if not SPEC_DIR.exists(): + self.fail( + f"Canonical spec schemas not found at {SPEC_DIR}. " + "Clone Agent-Control-Standard/ACS and set ACS_SPEC_DIR. " + "Spec validation is non-negotiable; this is not a skip." + ) + + +class EnvelopeMatchesV010Schema(SpecValidationSetUp): + """Every adapter-emitted envelope MUST validate against request-envelope.json.""" + + +def _make_envelope_test(event_name, _schema, fixture): + def test(self): + envelope = acs_adapter.build_request(fixture()) + errors = _validate(envelope, "request-envelope.json") + self.assertEqual(errors, [], + f"{event_name} envelope FAILS request-envelope.json:\n - " + + "\n - ".join(errors)) + test.__name__ = f"test_envelope_{event_name}" + return test + + +class PayloadMatchesHookSchema(SpecValidationSetUp): + """params.payload MUST validate against the per-hook schema.""" + + +def _make_payload_test(event_name, schema_name, fixture): + def test(self): + envelope = acs_adapter.build_request(fixture()) + payload = envelope.get("params", {}).get("payload") + self.assertIsNotNone( + payload, + f"{event_name}: envelope missing params.payload " + f"(got params keys: {list(envelope.get('params', {}).keys())})", + ) + errors = _validate(payload, schema_name) + self.assertEqual(errors, [], + f"{event_name} payload FAILS {schema_name}:\n - " + + "\n - ".join(errors)) + test.__name__ = f"test_payload_{event_name}" + return test + + +# Attach generated tests so every mapped hook is covered, not just toolCallRequest. +for _event_name, _schema, _fixture in HOOK_CASES: + setattr(EnvelopeMatchesV010Schema, f"test_envelope_{_event_name}", + _make_envelope_test(_event_name, _schema, _fixture)) + setattr(PayloadMatchesHookSchema, f"test_payload_{_event_name}", + _make_payload_test(_event_name, _schema, _fixture)) + + +class TimestampIsISO8601(SpecValidationSetUp): + def test_timestamp_is_iso8601_string(self) -> None: + """request-envelope.json:38-40 — timestamp is string format date-time.""" + envelope = acs_adapter.build_request(_event( + "PreToolUse", tool_name="Read", tool_input={"file_path": "/x"}, + )) + ts = envelope["params"]["timestamp"] + self.assertIsInstance(ts, str) + import datetime as _dt + # Round-trippable as ISO 8601; trailing Z handled + _dt.datetime.fromisoformat(ts.replace("Z", "+00:00")) + + +class MetadataRequiredFields(SpecValidationSetUp): + def test_metadata_has_agent_and_session(self) -> None: + """request-envelope.json:62 — metadata.required = [agent_id, session_id].""" + envelope = acs_adapter.build_request(_event( + "PreToolUse", tool_name="Read", tool_input={"file_path": "/x"}, + )) + meta = envelope["params"]["metadata"] + self.assertIn("agent_id", meta) + self.assertIn("session_id", meta) + self.assertTrue(meta["agent_id"]) + self.assertTrue(meta["session_id"]) + + +class ArgumentsAreWrapped(SpecValidationSetUp): + def test_pretool_arguments_each_have_value_key(self) -> None: + """tool-call-request.json:26-37 — each argument is {value, provenance?}.""" + envelope = acs_adapter.build_request(_event( + "PreToolUse", tool_name="Bash", tool_input={"command": "ls", "timeout": 60}, + )) + args = envelope["params"]["payload"]["arguments"] + self.assertEqual(set(args.keys()), {"command", "timeout"}) + for k, v in args.items(): + self.assertIn("value", v, f"argument '{k}' missing 'value' wrapper: {v}") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/claude-code/tests/test_live.py b/adapters/claude-code/tests/test_live.py new file mode 100644 index 0000000..63fad10 --- /dev/null +++ b/adapters/claude-code/tests/test_live.py @@ -0,0 +1,137 @@ +""" +Live end-to-end test: real Claude Code -> ACS adapter -> Guardian. + +Spawns `claude --print` in a subprocess against a project-level settings.json +that wires the adapter, exercises both ALLOW and DENY paths, asserts Claude +Code's observable output reflects the Guardian's verdict. + +Requires: + - `claude` CLI available on PATH (Claude Code installed) + - Python 3.10+ + +Skipped automatically when `claude` is not on PATH. +""" +from __future__ import annotations + +import json +import os +import shutil +import socket +import subprocess +import sys +import tempfile +import time +import unittest +from pathlib import Path + + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE.parent +ADAPTER = ADAPTER_DIR / "acs_adapter.py" +GUARDIAN = ADAPTER_DIR.parent / "example-guardian" / "example_guardian.py" + + +CLAUDE_AVAILABLE = shutil.which("claude") is not None + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import free_port as _free_port, wait_port as _wait # noqa: E402 + + +@unittest.skipUnless(CLAUDE_AVAILABLE, "`claude` CLI not on PATH") +class LiveClaudeCodeRoundTrip(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.workdir = tempfile.mkdtemp(prefix="acs-live-cc-") + cls.port = _free_port() + + # Project-level settings.json wires the adapter into Claude Code's + # PreToolUse hook. Using the project root .claude/ so we don't + # touch the user's ~/.claude/settings.json. + claude_dir = Path(cls.workdir) / ".claude" + claude_dir.mkdir() + settings = { + "hooks": { + "PreToolUse": [{ + "matcher": "*", + "hooks": [{ + "type": "command", + "command": ( + f"ACS_GUARDIAN_URL=http://127.0.0.1:{cls.port}/acs " + f"python3 {ADAPTER}" + ), + }], + }], + } + } + (claude_dir / "settings.json").write_text(json.dumps(settings, indent=2)) + + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + cls.guardian_proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian_proc.terminate() + try: + cls.guardian_proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.guardian_proc.kill() + shutil.rmtree(cls.workdir, ignore_errors=True) + + def _claude(self, prompt: str, timeout: float = 120.0) -> tuple[int, str]: + """Invoke `claude --print` from the test workdir, capture stdout.""" + proc = subprocess.run( + ["claude", "--print", "--permission-mode", "acceptEdits", prompt], + cwd=self.workdir, + capture_output=True, + text=True, + timeout=timeout, + ) + return proc.returncode, proc.stdout + + # ----- ALLOW path ----- + + def test_benign_bash_runs(self) -> None: + """Guardian's policy allows benign Bash; Claude Code runs it and + the marker string appears in stdout.""" + marker = "ACS_LIVE_TEST_OK_MARKER" + rc, stdout = self._claude(f"Run the shell command: echo {marker}") + self.assertEqual(rc, 0, f"claude exited {rc}; stdout={stdout[:200]}") + self.assertIn(marker, stdout, + f"benign command should have run; stdout={stdout[:300]}") + + # ----- DENY path ----- + + def test_destructive_bash_blocked(self) -> None: + """Guardian's destructive-Bash policy denies; Claude Code surfaces + the block in its output. We test against a string the example + Guardian's regex blocks (no actual destructive op is attempted + because PreToolUse fires before execution).""" + # The example_guardian DESTRUCTIVE_BASH pattern matches 'rm -rf /...' + # PreToolUse fires BEFORE the command runs, so the Guardian sees + # the proposed command and denies it; the command never executes. + prompt = ( + "Use the Bash tool with this exact command: " + "rm -rf /tmp/acs-nonexistent-live-test-target" + ) + rc, stdout = self._claude(prompt) + self.assertEqual(rc, 0) + # Claude Code's response should reference the block / the Guardian + lo = stdout.lower() + self.assertTrue( + "block" in lo or "denied" in lo or "policy" in lo + or "destructive" in lo, + f"deny should surface in Claude Code's response; stdout={stdout[:400]}", + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/claude-code/wire.py b/adapters/claude-code/wire.py new file mode 100644 index 0000000..6438504 --- /dev/null +++ b/adapters/claude-code/wire.py @@ -0,0 +1,486 @@ +#!/usr/bin/env python3 +""" +Wire (or unwire) the Claude Code ACS adapter into a settings.json. + +Operator-driven by design: you supply the deployment-specific values +(Guardian URL, secret file location), this tool computes the exact +hooks entries and either prints them (dry-run, the default) or writes +them with a timestamped backup of the original file. + +Default mode is dry-run — nothing on disk changes until you pass +`--write`. The dry-run output includes a unified diff of what would +change, so you see the exact edit before approving it. + +Examples +======== + +# 1. Preview what would be wired into the user's ~/.claude/settings.json +# (default dry-run; shows a diff against the current file). +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key + +# 2. Same, but actually write (with backup at ~/.claude/settings.json.bak.) +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key \\ + --write + +# 3. Project-level settings: point at a different file +python3 wire.py \\ + --guardian-url=https://guardian.internal/acs \\ + --secret-file=/etc/acs/hmac.key \\ + --settings=./.claude/settings.json \\ + --write + +# 4. Subset of hooks (default is all 6 ACS-Core mandatory) +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key \\ + --hooks=PreToolUse,PostToolUse + +# 5. Fail-closed posture (default is fail-open per §6.4) +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key \\ + --default-deny + +# 6. Remove ACS wiring (preserves any other hooks you have) +python3 wire.py --unwire --settings=~/.claude/settings.json --write + +What this tool does NOT do +========================== + + - Generate the HMAC secret. Run: + openssl rand -hex 32 > ~/.acs/hmac.key && chmod 600 ~/.acs/hmac.key + - Start the Guardian. Run it yourself in a terminal, launchd, systemd, + Docker, whatever fits your deployment. + - Validate that the Guardian is reachable. Test that with + e2e_check.py after wiring. + - Choose any of the deployment-specific values (URL, secret path, + fail posture). All explicit flags — no hidden defaults. +""" +from __future__ import annotations + +import argparse +import datetime +import difflib +import json +import os +import shutil +import stat +import sys +from pathlib import Path +from typing import Any + + +HERE = Path(__file__).resolve().parent +DEFAULT_ADAPTER_PATH = HERE / "acs_adapter.py" + +# ACS-Core minimum hook set per conformance.md:19. The wire CLI accepts +# a subset via --hooks; warns when fewer than these 6 are wired. +ACS_CORE_HOOKS = [ + "SessionStart", + "UserPromptSubmit", + "PreToolUse", + "PostToolUse", + "Notification", + "SessionEnd", +] + +# Hooks whose ACS verdict ACTUALLY GATES the action: the framework +# blocks Claude from running a tool until the adapter returns a verdict. +# A silent fail-open on these is a security hole — if the Guardian is +# down (or the envelope is malformed), the tool runs anyway with no +# policy check. So we set ACS_DEFAULT_DENY=1 on these by default and +# operators must explicitly opt out (--all-fail-open). +# +# The rest (PostToolUse, Notification, SessionEnd, SessionStart) are +# observational — fail-open matches the §6.4 spec default and doesn't +# create a policy hole (no in-flight action to block). +GATE_HOOKS = {"PreToolUse", "UserPromptSubmit"} + +# A marker we embed in the command string so we can later detect "is +# this hook entry one we wired?" without parsing argument shapes. +WIRE_MARKER = "# acs-adapter-wired" + + +# ────────────────────────────────────────────────────────────────────── +# Command-line construction +# ────────────────────────────────────────────────────────────────────── + +def build_command(*, adapter_path: Path, guardian_url: str, + secret_file: str | None, + secret_env: str | None, + default_deny: bool, + host_allowlist: str | None, + python_bin: str) -> str: + """Compose the hook command string used inside settings.json. + + All filesystem paths are written as absolute paths so the hook + command doesn't rely on shell tilde-expansion (which varies by + shell) or current-working-directory (which is wherever Claude + Code happens to be run from). + """ + env_pairs: list[str] = [f"ACS_GUARDIAN_URL={guardian_url}"] + if secret_file: + # Resolve ~ and $VAR ahead of time — Python's open() doesn't + # expand tildes and POSIX sh tilde-after-= is not guaranteed. + env_pairs.append(f"ACS_HMAC_SECRET_FILE={_expand(secret_file)}") + elif secret_env: + env_pairs.append(f"ACS_HMAC_SECRET={secret_env}") + if default_deny: + env_pairs.append("ACS_DEFAULT_DENY=1") + if host_allowlist: + env_pairs.append(f"ACS_GUARDIAN_HOST_ALLOWLIST={host_allowlist}") + env_prefix = " ".join(env_pairs) + return f"{env_prefix} {python_bin} {adapter_path} {WIRE_MARKER}" + + +def build_hook_entry(command: str) -> dict: + """The Claude Code hook-entry shape under each hook type.""" + return { + "matcher": "*", + "hooks": [{"type": "command", "command": command}], + } + + +# ────────────────────────────────────────────────────────────────────── +# settings.json merge +# ────────────────────────────────────────────────────────────────────── + +def load_settings(path: Path) -> dict: + if not path.exists(): + return {} + try: + return json.loads(path.read_text()) + except json.JSONDecodeError as e: + raise SystemExit(f"FATAL: {path} is not valid JSON ({e}). Fix or remove first.") + + +def merge_wire(existing: dict, hook_names: list[str], + commands_by_hook: dict[str, str]) -> dict: + """Return a new settings dict with ACS wiring merged in. + + commands_by_hook is a map {hook_name: shell_command}, so the + caller can use a different command per hook type (typically: + fail-closed for gate hooks, fail-open for observational hooks). + + Re-entrancy: we operate at the inner-hooks level (each entry has + {matcher, hooks: [...]}), so re-wiring REPLACES our inner hook + without touching any of the user's own hooks that share the same + matcher. + """ + out = json.loads(json.dumps(existing)) # deep copy + hooks = out.setdefault("hooks", {}) + for name in hook_names: + command = commands_by_hook[name] + entries = hooks.get(name, []) + # Strip our inner hooks from any matching-* entries and append + # ours. Non-matching entries left as-is. + kept_entries: list[dict] = [] + matcher_star_seen = False + for entry in entries: + if entry.get("matcher", "*") == "*": + matcher_star_seen = True + inner = [h for h in entry.get("hooks", []) + if WIRE_MARKER not in h.get("command", "")] + inner.append({"type": "command", "command": command}) + kept_entries.append({**entry, "hooks": inner}) + else: + kept_entries.append(entry) + if not matcher_star_seen: + kept_entries.append(build_hook_entry(command)) + hooks[name] = kept_entries + return out + + +def merge_unwire(existing: dict, hook_names: list[str]) -> dict: + """Strip ACS-wired INNER HOOKS from the given hook types. + + Operates at the inner-hooks level so a user's own hook commands + sharing the same entry / matcher are preserved. If an entry's + inner-hooks list becomes empty after stripping ours, the entry + is removed; if a hook type's entries list becomes empty, the hook + type is removed. + """ + out = json.loads(json.dumps(existing)) + hooks = out.get("hooks", {}) + for name in list(hooks.keys()): + if name not in hook_names: + continue + entries = hooks.get(name) or [] + new_entries: list[dict] = [] + for entry in entries: + kept_inner = [h for h in entry.get("hooks", []) + if WIRE_MARKER not in h.get("command", "")] + if kept_inner: + new_entries.append({**entry, "hooks": kept_inner}) + # else: entry had only our inner hook(s) — drop it + if new_entries: + hooks[name] = new_entries + else: + hooks.pop(name, None) + if not hooks: + out.pop("hooks", None) + return out + + +# ────────────────────────────────────────────────────────────────────── +# Diff + atomic write +# ────────────────────────────────────────────────────────────────────── + +def render(settings: dict) -> str: + return json.dumps(settings, indent=2, sort_keys=False) + "\n" + + +def render_diff(before: dict, after: dict, label: str) -> str: + a = render(before).splitlines(keepends=True) + b = render(after).splitlines(keepends=True) + return "".join(difflib.unified_diff(a, b, + fromfile=f"{label} (current)", + tofile=f"{label} (proposed)", + n=3)) + + +def write_atomically(path: Path, content: str) -> Path: + """Write content to path atomically, with a timestamped backup of any + existing file. Returns the backup path (or None if no original existed). + """ + backup = None + if path.exists(): + ts = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") + backup = path.with_suffix(path.suffix + f".bak.{ts}") + shutil.copy2(path, backup) + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(content) + os.replace(tmp, path) + return backup + + +# ────────────────────────────────────────────────────────────────────── +# Validation helpers +# ────────────────────────────────────────────────────────────────────── + +def _expand(p: str) -> Path: + return Path(os.path.expanduser(os.path.expandvars(p))) + + +def validate_inputs(args: argparse.Namespace) -> list[str]: + """Return a list of human-readable warnings for the operator. None + of these block the operation — they're informational nudges.""" + warnings: list[str] = [] + + if not args.unwire: + if args.secret_file: + sf = _expand(args.secret_file) + if sf.exists(): + mode = stat.S_IMODE(sf.stat().st_mode) + if mode & 0o077: + warnings.append( + f"WARNING: {sf} is mode {oct(mode)} — the adapter " + f"will refuse to read it. Run: chmod 600 {sf}") + else: + warnings.append( + f"NOTE: secret file {sf} doesn't exist yet. Create with: " + f"openssl rand -hex 32 > {sf} && chmod 600 {sf}") + elif args.secret_env_inline: + warnings.append( + "WARNING: --secret-env-inline embeds the secret directly in " + "settings.json (visible in `ps aux`). For production, prefer " + "--secret-file with a 0600 key file.") + else: + warnings.append( + "WARNING: no HMAC secret configured (neither --secret-file " + "nor --secret-env-inline). Adapter will run unsigned — Guardian " + "will reject every request unless it's also unconfigured " + "(ACS_DEV_MODE=1). ACS-Core baseline integrity (§10) " + "REQUIRES signing.") + + if not args.guardian_url.startswith(("http://", "https://")): + warnings.append( + "WARNING: Guardian URL must start with http:// or https://. " + "The adapter's URL allowlist will reject any other scheme.") + + if args.guardian_url.startswith("http://") and not ( + args.guardian_url.startswith("http://127.") + or args.guardian_url.startswith("http://localhost") + ): + warnings.append( + "WARNING: plaintext HTTP to a non-loopback Guardian. The " + "envelope is HMAC-signed (so unmodifiable) but the payload " + "is readable on the wire. Use https:// for production.") + + if set(args.hooks) - set(ACS_CORE_HOOKS): + extras = set(args.hooks) - set(ACS_CORE_HOOKS) + warnings.append( + f"NOTE: wiring extra hooks not in the ACS-Core minimum: {sorted(extras)}") + missing = set(ACS_CORE_HOOKS) - set(args.hooks) + if missing: + warnings.append( + f"NOTE: wiring a SUBSET of ACS-Core's 6 mandatory hooks. " + f"Missing: {sorted(missing)}. ACS-Core conformance requires " + f"all 6 ({', '.join(ACS_CORE_HOOKS)}).") + + return warnings + + +# ────────────────────────────────────────────────────────────────────── +# CLI +# ────────────────────────────────────────────────────────────────────── + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser( + description="Wire (or unwire) the Claude Code ACS adapter into settings.json.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__.split("Examples\n========")[1] if "Examples\n========" in __doc__ else "", + ) + p.add_argument("--guardian-url", + help="Guardian endpoint (http:// or https://). Required unless --unwire.") + p.add_argument("--secret-file", + help="Path to the HMAC secret file (preferred). The adapter reads it lazily.") + p.add_argument("--secret-env-inline", metavar="HEX", + help="HMAC secret inlined into settings.json env (visible in `ps aux`; " + "dev only). Use --secret-file for production.") + p.add_argument("--settings", + default="~/.claude/settings.json", + help="Path to the Claude Code settings file (default: ~/.claude/settings.json).") + p.add_argument("--adapter", + default=str(DEFAULT_ADAPTER_PATH), + help=f"Absolute path to acs_adapter.py (default: {DEFAULT_ADAPTER_PATH}).") + p.add_argument("--python-bin", + default="python3", + help="Python interpreter the hook command uses (default: python3 from PATH).") + p.add_argument("--hooks", default=",".join(ACS_CORE_HOOKS), + help=f"Comma-separated hook names to wire (default: all 6 ACS-Core hooks: " + f"{','.join(ACS_CORE_HOOKS)}).") + posture_group = p.add_mutually_exclusive_group() + posture_group.add_argument("--default-deny", action="store_true", + help="Force ACS_DEFAULT_DENY=1 (fail-CLOSED) on EVERY wired hook. " + "Default behavior: fail-closed only on gate hooks " + f"({', '.join(sorted(GATE_HOOKS))}) — these block actions " + "until the Guardian decides, so a silent fail-open is a " + "policy hole. Non-gate hooks default to fail-open (matches " + "§6.4 spec default).") + posture_group.add_argument("--all-fail-open", action="store_true", + help="Force ACS_DEFAULT_DENY=0 (fail-OPEN) on EVERY wired hook, " + "including gates. NOT RECOMMENDED for production — an " + "unreachable Guardian or malformed envelope on a gate hook " + "lets the action run unguarded. Matches strict §6.4 default.") + p.add_argument("--host-allowlist", default=None, + help="Comma-separated hostnames the adapter will accept as Guardian URLs " + "(defense in depth against env-var attacks).") + p.add_argument("--unwire", action="store_true", + help="Remove any ACS-wired hooks (preserves non-ACS entries).") + p.add_argument("--write", action="store_true", + help="Actually write the changes to settings.json (with timestamped backup). " + "Without this flag, this tool only prints what it WOULD do.") + + args = p.parse_args(argv) + + # Argument validation + if not args.unwire and not args.guardian_url: + p.error("--guardian-url is required (unless --unwire)") + if args.secret_file and args.secret_env_inline: + p.error("provide --secret-file OR --secret-env-inline, not both") + + settings_path = _expand(args.settings) + adapter_path = _expand(args.adapter) + if not args.unwire and not adapter_path.exists(): + p.error(f"adapter not found at {adapter_path}; pass --adapter to override") + + hook_names = [h.strip() for h in args.hooks.split(",") if h.strip()] + args.hooks = hook_names # for the warning function + + # Build new settings + existing = load_settings(settings_path) + if args.unwire: + new = merge_unwire(existing, + hook_names if args.hooks else ACS_CORE_HOOKS + [ + "Stop", "SubagentStop", "PreCompact", + ]) + else: + # Build one command per hook. Each hook gets the deny posture + # that matches its safety category: + # - --default-deny: ALL hooks fail-closed. + # - --all-fail-open: ALL hooks fail-open. + # - (default): gate hooks fail-closed, others fail-open. + commands_by_hook: dict[str, str] = {} + for hook in hook_names: + if args.default_deny: + hook_deny = True + elif args.all_fail_open: + hook_deny = False + else: + hook_deny = hook in GATE_HOOKS + commands_by_hook[hook] = build_command( + adapter_path=adapter_path, + guardian_url=args.guardian_url, + secret_file=args.secret_file, + secret_env=args.secret_env_inline, + default_deny=hook_deny, + host_allowlist=args.host_allowlist, + python_bin=args.python_bin, + ) + new = merge_wire(existing, hook_names, commands_by_hook) + + # Render + warnings = validate_inputs(args) + for w in warnings: + print(f" ⚠ {w}", file=sys.stderr) + if warnings: + print(file=sys.stderr) + + # Show per-hook fail posture so operator sees the chosen safety mix + if not args.unwire: + print("Per-hook fail posture:") + for hook in hook_names: + if args.default_deny: + posture = "fail-CLOSED (forced via --default-deny)" + elif args.all_fail_open: + posture = "fail-OPEN (forced via --all-fail-open)" + elif hook in GATE_HOOKS: + posture = "fail-CLOSED (gate hook default)" + else: + posture = "fail-OPEN (observational hook default)" + print(f" {hook:18s} → {posture}") + print() + + diff = render_diff(existing, new, label=str(settings_path)) + if not diff: + print(f"No change — {settings_path} already in the desired state.") + return 0 + + print("=" * 70) + print(f"Proposed change to {settings_path}") + print("=" * 70) + print(diff if diff else "(no diff)") + print("=" * 70) + + if not args.write: + print() + print("Dry-run only. To apply, re-run with --write.") + print("A timestamped backup of the original file will be created.") + return 0 + + backup = write_atomically(settings_path, render(new)) + print() + print(f"✓ wrote {settings_path}") + if backup: + print(f" backup at {backup}") + if not args.unwire: + print() + print("Next steps:") + print(" 1. Make sure the Guardian is running and reachable at " + f"{args.guardian_url}") + print(" 2. Restart any open Claude Code sessions — settings.json is " + "read at session start, not live") + print(" 3. Verify the wiring works:") + print(f" cd {HERE}") + print(" python3 e2e_check.py") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/cursor/README.md b/adapters/cursor/README.md new file mode 100644 index 0000000..fa4cc0d --- /dev/null +++ b/adapters/cursor/README.md @@ -0,0 +1,347 @@ +# ACS adapter: Cursor + +A drop-in adapter that wires [Cursor](https://cursor.com) hooks to an ACS Guardian. No agent code changes; configuration only. + +## How it works + +Cursor fires a hook (e.g. `preToolUse`) by running a shell command from `hooks.json`. The command receives the hook event as JSON on stdin and emits Cursor-shaped JSON on stdout (or exit code 2 for some events). + +This adapter is that command. On every hook event Cursor spawns it as a subprocess; the adapter: + +1. Reads the Cursor hook event from stdin. +2. Translates it to an ACS JSON-RPC envelope ([mapping.md](./mapping.md)). +3. Signs it with HMAC-SHA256 (`ACS_HMAC_SECRET_FILE`). +4. POSTs to the Guardian. +5. Verifies the response signature. +6. Translates the verdict back to Cursor's expected output shape and writes to stdout (or exits with code 2 for `beforeSubmitPrompt`). +7. Exits. + +The handshake/hello fires once per session, cached on disk so subsequent events skip the round-trip. + +### Decision honoring (§6.4) + +ACS-Core §6.4 requires the framework to wait for the verdict and apply it before the action executes. Cursor provides this guarantee through its hook protocol: like Claude Code, the adapter is invoked as a blocking subprocess and Cursor reads its stdout (or exit code, for `beforeSubmitPrompt`) for the decision before the action runs. Cursor also provides a native `failClosed: true` flag per hook entry that we set on gate hooks for defense in depth. + +## Install — five steps + +You need three pieces co-located on the same machine: a running Guardian, a shared HMAC secret, and a `hooks.json` that wires the adapter into Cursor's hooks. `wire.py` does step 3 for you. + +Commands below assume `$ACS_REPO` points at your local clone of `Agent-Control-Standard/ACS` (or your fork). Export it once: + +```bash +export ACS_REPO=/path/to/your/clone # e.g., $HOME/code/ACS +``` + +### 1. Generate the shared HMAC secret + +Both the adapter and the Guardian read this file. Mode 0600 is enforced by the adapter — anything looser and it refuses to start. + +```bash +mkdir -p ~/.acs +openssl rand -hex 32 > ~/.acs/hmac.key +chmod 600 ~/.acs/hmac.key +``` + +### 2. Run the Guardian + +Use the example Guardian for testing; a production Guardian is the same wire protocol with a real policy engine attached. + +```bash +ACS_HMAC_SECRET_FILE=~/.acs/hmac.key \ + python3 "$ACS_REPO/adapters/example-guardian/example_guardian.py" \ + --port 8787 +``` + +Keep this terminal open. You should see `[guardian] listening on 127.0.0.1:8787`. + +(For long-running setups: a `launchd` plist on macOS or a `systemd` unit on Linux. Out of scope here.) + +### 3. Wire `hooks.json` + +`wire.py` does this safely — dry-run by default, atomic write with a timestamped backup when you pass `--write`. + +Cursor reads from two locations: +- `~/.cursor/hooks.json` — **user-level** (every workspace) +- `/.cursor/hooks.json` — **project-level** (this workspace only; overrides user-level) + +```bash +cd "$ACS_REPO/adapters/cursor" + +# Preview wiring into ~/.cursor/hooks.json (default — user-level) +python3 wire.py \ + --guardian-url=http://127.0.0.1:8787/acs \ + --secret-file=~/.acs/hmac.key + +# Apply user-level wiring +python3 wire.py \ + --guardian-url=http://127.0.0.1:8787/acs \ + --secret-file=~/.acs/hmac.key \ + --write + +# OR — project-level wiring for a specific workspace +python3 wire.py \ + --guardian-url=http://127.0.0.1:8787/acs \ + --secret-file=~/.acs/hmac.key \ + --settings=./.cursor/hooks.json \ + --write +``` + +What it wires by default (ACS-Core minimum 6, mapped to Cursor's vocabulary): + +| Cursor event | ACS step method | Posture | +|---|---|---| +| `sessionStart` | sessionStart | fail-open (observational) | +| `beforeSubmitPrompt` | userMessage | **fail-CLOSED** (gate) | +| `preToolUse` | toolCallRequest | **fail-CLOSED** (gate) | +| `postToolUse` | toolCallResult | fail-open | +| `afterAgentResponse` | agentResponse | fail-open | +| `sessionEnd` | sessionEnd | fail-open | + +Gate hooks get **both** `ACS_DEFAULT_DENY=1` (our env var) AND `failClosed: true` (Cursor's native flag) — defense in depth: two independent mechanisms that both must fail open for a gate to leak. + +Override with `--default-deny` (fail-closed on every hook) or `--all-fail-open` (strict §6.4 default everywhere). + +To remove the wiring later: `python3 wire.py --unwire --write`. + +### 4. Restart Cursor + +Cursor reads `hooks.json` at startup, not live. Existing windows keep their pre-wiring config until restart. + +### 5. Verify the install + +```bash +cd "$ACS_REPO/adapters/cursor" +python3 e2e_check.py +``` + +Cursor is a GUI app — it has no headless CLI like `claude --print`. The e2e check is therefore **semi-automated**: the script does everything programmatic (Guardian setup, hooks wiring into a temp workspace, validation of envelopes) and prints precise instructions for actions you perform in Cursor. Wall-clock ~5-10 minutes total. + +The final line is `YOUR CURSOR INSTALL IS ACS-CONFORMANT` (exit 0) or a per-scenario failure list (exit 1). + +You can also do an in-session manual smoke test (see [Smoke tests](#smoke-tests) below). + +## Prerequisites + +- **Cursor** installed — +- **Python 3.10+** with `jsonschema` and `rfc8785` — `pip install -r ../requirements-test.txt` +- **Canonical ACS schemas** reachable on disk. Default location `/tmp/acs-spec-source/specification/v0.1.0/`; override via `ACS_SPEC_DIR`. Clone with: + ```bash + git clone https://github.com/Agent-Control-Standard/ACS.git /tmp/acs-spec-source + ``` + +## Smoke tests + +Five tests, ordered from broadest to most specific. Run any/all. + +### Smoke #1 — automated test suite (unit + integration, ~30s) + +Run from `adapters/` (the conformance suite lives at the top level): + +```bash +cd "$ACS_REPO/adapters" + +python3 -m unittest test_acs_core_conformance +# Expect: Ran 48 tests in ~10s / OK (every ACS-Core MUST) + +(cd cursor && python3 -m unittest discover tests) +# Expect: Ran 50 tests / OK (skipped=1) (round-trip + schema + manual placeholder) + +(cd _common && python3 -m unittest discover tests) +# Expect: Ran 38 tests / OK (security + edge cases) +``` + +If any of those fail, the failure message names the specific spec MUST or property that broke. + +### Smoke #2 — semi-automated Cursor end-to-end (~5-10 minutes) + +```bash +cd "$ACS_REPO/adapters/cursor" +python3 e2e_check.py +``` + +Sets up a temp workspace with project-level `.cursor/hooks.json`, starts a recording Guardian wired to the real `example_guardian` policy, and walks you through 6 steps (SETUP, ALLOW, READ-TOOL, DESTRUCTIVE, USER-MESSAGE, HANDSHAKE-ONCE). The same shipping policy is active across every step — running `rm -rf` during any prompt gets the real destructive-Bash regex applied. At each step it tells you exactly what to do in Cursor, then validates the envelopes that arrived. + +### Smoke #3 — in-session manual test + +Open a Cursor workspace where hooks are wired. In the Agent panel try: + +``` +Use the shell to run: echo hello +``` + +In your Guardian terminal you should see roughly this sequence: + +``` +[guardian] handshake/hello session=... +[guardian] steps/sessionStart session=... +[guardian] steps/userMessage session=... +[guardian] steps/toolCallRequest session=... +[guardian] steps/toolCallResult session=... +[guardian] steps/agentResponse session=... +``` + +Then ask Cursor for a denied command: + +``` +Use the shell to run: rm -rf /tmp/some-fake-path +``` + +The example Guardian's regex matches `rm -rf /...`; Cursor surfaces the Guardian's `reasoning` to the user and the command never runs. + +### Smoke #4 — audit-cause differentiation + +Verifies the adapter's audit log distinguishes "Guardian unreachable" (ops issue) from "Guardian rejected the envelope" (client/operator bug). + +Unsigned envelope to a signing-required Guardian: + +```bash +ACS_GUARDIAN_URL="http://127.0.0.1:8787/acs" \ +ACS_HMAC_SECRET="" \ +python3 "$ACS_REPO/adapters/cursor/acs_adapter.py" preToolUse 2>&1 <<'EOF' +{"session_id":"11111111-1111-4111-8111-111111111111","tool_name":"Bash","tool_input":{"command":"echo test"}} +EOF +``` + +Expected stderr — note the `cause` field: + +``` +acs-adapter: Guardian returned JSON-RPC error -32004 (signature_invalid_response): SIGNATURE_INVALID +ACS_AUDIT {"acs_audit_event": "fail_open_bypass", "cause": "signature_invalid_response", ...} +``` + +Guardian unreachable (different cause, same disposition): + +```bash +ACS_GUARDIAN_URL="http://127.0.0.1:1/dead" \ +python3 "$ACS_REPO/adapters/cursor/acs_adapter.py" preToolUse 2>&1 <<'EOF' +{"session_id":"11111111-1111-4111-8111-111111111111","tool_name":"Bash","tool_input":{"command":"echo test"}} +EOF +``` + +Expected: + +``` +acs-adapter: Guardian unreachable: +ACS_AUDIT {"acs_audit_event": "fail_open_bypass", "cause": "transport_failure", ...} +``` + +Same `acs_audit_event`, distinct `cause`. Operators grep on `cause=` to triage. + +### Smoke #5 — pre-flight inventory (paranoid) + +If you're debugging, this is the fastest "where did I go wrong" sweep: + +```bash +echo "=== Guardian listening? ===" +lsof -i :8787 | head -3 || echo "NOT RUNNING" + +echo "=== Secret file 0600? ===" +ls -la ~/.acs/hmac.key + +echo "=== Hooks wired? ===" +python3 -c "import json, os; d=json.load(open(os.path.expanduser('~/.cursor/hooks.json'))); print(list(d.get('hooks',{}).keys()))" + +echo "=== Guardian responds to system/ping? ===" +cd "$ACS_REPO/adapters" && python3 -c " +import sys; sys.path.insert(0, '_common') +from acs_common import ping; import json +r = ping('http://127.0.0.1:8787/acs'); print(json.dumps(r, indent=2)[:200] if r else 'no response') +" +``` + +## Files + +- `acs_adapter.py` — the adapter itself. Stdlib + `rfc8785` for JCS canonicalization. +- `wire.py` — `hooks.json` wiring CLI (dry-run by default; `--write` to apply). +- `e2e_check.py` — semi-automated Cursor end-to-end verifier (6 steps, real `example_guardian` policy). +- `hooks.json.example` — reference wiring (`wire.py` produces a more comprehensive one). +- `mapping.md` — Cursor hook → ACS step method table, plus disposition translation. +- `tests/` — round-trip + schema tests + manual-procedure placeholder. +- `tests/example_payloads.md` — masked real-world payload examples showing exactly what Cursor emits. +- `tests/live_verification.md` — manual reproduction procedure (Cursor has no headless mode). + +The adapter shares `adapters/_common/` with the Claude Code and NAT adapters (signing, handshake cache, audit events, URL allowlist). + +## How it differs from the Claude Code adapter + +Same architectural pattern (shell-stdin/stdout, JSON in, JSON out), different protocol: + +| Aspect | Claude Code | Cursor | +|---|---|---| +| Event dispatch | Single command, event type in `hook_event_name` field | One command per event, event name passed as `argv[1]` | +| Allow/deny field | `hookSpecificOutput.permissionDecision` | `permission` (top-level) | +| Modify input field | `hookSpecificOutput.updatedInput` | `updated_input` | +| Block via exit code | Optional (`exit 2`) | Supported (`exit 2`); `beforeSubmitPrompt` uses exit code rather than JSON output | +| Fail-closed flag | env var `ACS_DEFAULT_DENY` | hook-level `failClosed: true` in `hooks.json`, PLUS env var | +| Per-event output keys | Mostly uniform via `hookSpecificOutput` | Event-specific (`permission`, `additional_context`, `updated_mcp_tool_output`, `followup_message`, ...) | +| Headless CLI for tests | `claude --print` | None — Cursor is a desktop GUI; e2e is semi-automated | + +The adapter handles all these protocol differences internally; the Guardian sees the same ACS JSON-RPC shape from both. + +## Configuration + +The adapter is configured by environment variables, typically set per-hook by `wire.py`: + +| Variable | Default | Purpose | +|---|---|---| +| `ACS_GUARDIAN_URL` | `http://127.0.0.1:8787/acs` | Guardian endpoint. http/https only; SSRF allowlist refuses other schemes. | +| `ACS_HMAC_SECRET_FILE` | (unset) | Path to a 0600 file holding the shared HMAC secret. | +| `ACS_HMAC_SECRET` | (unset) | Inline secret. Less secure (visible in `ps eauxw`). Prefer the file. | +| `ACS_DEFAULT_DENY` | `0` | Fail-open with audit (§6.4 default). Set to `1` for fail-closed. | +| `ACS_HANDSHAKE` | `1` | Set to `0` to disable the handshake/hello call on first use. | +| `ACS_AGENT_ID` | derived from cwd | Stable agent identifier sent in `metadata.agent_id`. | +| `ACS_HANDSHAKE_CACHE` | `~/.cache/acs-adapter-handshake/` | Per-session ServerHello cache dir. | +| `ACS_GUARDIAN_HOST_ALLOWLIST` | (unset) | Optional comma-separated hostname allowlist (defense in depth). | + +The adapter is invoked as `python3 acs_adapter.py `, where `` is one of: `sessionStart`, `sessionEnd`, `stop`, `preToolUse`, `postToolUse`, `postToolUseFailure`, `subagentStart`, `beforeShellExecution`, `afterShellExecution`, `beforeMCPExecution`, `afterMCPExecution`, `afterFileEdit`, `afterTabFileEdit`, `beforeSubmitPrompt`, `preCompact`, `afterAgentResponse`, `afterAgentThought`. + +## On-disk state + +- `~/.cache/acs-adapter-handshake/.json` — cached ServerHello per session. Mode 0600; refreshed when older than 1 hour. +- `~/.cache/acs-adapter-session/.json` — session-state cache (last step_id, seen step_ids). Used by `subagentStart` and `preCompact` to populate real `parent_step_id` / `entries_to_compact` from session history. +- `~/.cache/acs-guardian-state/.json` — Guardian-side per-session chain head + replay set; survives Guardian restart. + +## Conformance status + +Honest, MUST-by-MUST against `docs/spec/conformance.md`: + +| ACS-Core item | Status | +|---|---| +| Handshake (`handshake/hello`) | ✓ on first session call; cached per-session | +| JSON-RPC envelope shape (`request-envelope.json`) | ✓ validates against canonical schema for every mapped hook (36 tests) with format checking | +| Hook taxonomy (6 minimum) | ✓ all six covered; 17 Cursor events mapped total (`subagentStop` intentionally omitted — see honesty table below) | +| Dispositions | ALLOW / DENY / ASK supported on **permission (pre-execution) events** (`preToolUse`, `beforeShellExecution`, `beforeMCPExecution`, `beforeSubmitPrompt`, `subagentStart`). DEFER substituted to ASK (Cursor has no defer). MODIFY supported on `preToolUse` via `updated_input`. **Lifecycle / post-execution hooks (`afterAgentResponse → steps/agentResponse`, `sessionStart`, `sessionEnd`, `afterShellExecution`, etc.) are observation-only** — Cursor fires them after the message / side effect has occurred; a Guardian `deny` cannot undo it. See `mapping.md`. | +| Unknown-disposition fail posture | ✓ | +| SessionContext + published `chain_hash` | ✓ session_id coerced to UUID; Guardian computes rolling SHA-256 chain | +| Replay protection | ✓ Guardian enforcement (REPLAY_DETECTED -32005, TIMESTAMP_OUT_OF_WINDOW -32006) | +| Baseline integrity (HMAC-SHA256) | ✓ when `ACS_HMAC_SECRET[_FILE]` is set; SIGNATURE_INVALID -32004 on tamper | +| Decision honoring (§6.4) | ✓ Cursor blocks on permission deny; adapter uses exit-2 where stdout JSON is not available; fail-open emits `ACS_AUDIT` event; audit `cause` field distinguishes failure modes (transport vs signature vs malformed envelope vs replay vs skew) | +| Liveness `system/ping` | ✓ Guardian-side | +| `nonce` (optional replay field) | ✗ adapter does not emit `nonce`; the envelope field is OPTIONAL in v0.1 | +| Wrapped MCP `protocols/MCP/*` | ⚠ partial — Cursor's `beforeMCPExecution` is mapped to `steps/toolCallRequest`, not to the `protocols/MCP/*` wrapped form. Real wrapping requires forwarding the full MCP request shape, not flattening it; this adapter does not do that. | + +### Per-hook honesty table + +Cursor does not expose every field the ACS v0.1.0 hook schemas require. Where the schema is strict and Cursor is silent, the adapter populates fields from real session state where possible — and omits the hook entirely when the missing field can only be fabricated. + +| Cursor event → ACS hook | What we fill from real data | What's hardcoded | What's omitted | +|---|---|---|---| +| `subagentStart` → `steps/subagentStart` | `subagent_session_id` (deterministic uuid5 of `parent_session + subagent_id`); `parent_session_id` (the envelope's actual `session_id`); `parent_step_id` (last step_id the adapter has seen in this session, tracked in `~/.cache/acs-adapter-session/`); `subagent_descriptor.{agent_id,agent_name}` (from Cursor's `subagent_id` / `subagent_type`) | `intent_derivation = "derived_from_parent"` (defensible default for IDE-spawned subagents) | — | +| `preCompact` → `steps/preCompact` | `entries_to_compact` (list of step_ids the adapter has observed in this session, snapshotted from session state); `triggered_by` (Cursor's `trigger` field) | `triggered_by = "framework_initiated"` only when Cursor omits `trigger` | — | +| `subagentStop` → `steps/subagentStop` | — | — | **Not forwarded.** Required `final_chain_hash` is genuinely unknowable (Cursor maintains no chain). Better to omit than fabricate. | + +These hooks are emitted only when Cursor's `hooks.json` wires them to the adapter. Per-session state for `parent_step_id` / `entries_to_compact` requires the adapter to be wired to at least one earlier hook in the same session (typically `preToolUse`); the adapter records each step's `request_id` to the session-state file on every invocation. + +## Troubleshooting + +| Symptom | Likely cause | +|---|---| +| Cursor's agent runs commands as if no Guardian exists | Cursor reads `hooks.json` at startup. **Restart Cursor** after `wire.py --write`. | +| Adapter exits 0 with empty stdout; Guardian terminal silent | Adapter pointed at the wrong URL (check `ACS_GUARDIAN_URL` in your hooks.json); check stderr for `ACS_AUDIT cause=transport_failure`. | +| Every tool call gets denied | Likely `ACS_DEFAULT_DENY=1` + Guardian down. Check the Guardian process is running. | +| Adapter says `SecretFilePermissionsError` | The HMAC secret file is mode > 0600. `chmod 600 ~/.acs/hmac.key`. | +| Guardian returns `-32004 SIGNATURE_INVALID` | Adapter and Guardian aren't reading the same secret. `cat ~/.acs/hmac.key` on both sides should match. | +| Guardian returns `-32006 TIMESTAMP_OUT_OF_WINDOW` | Clock skew between adapter and Guardian > 5 minutes. Sync time. | +| Guardian returns `-32600 Invalid Request` for `metadata.session_id` | Session ID wasn't coerced to a UUID. Cursor sends conversation_ids that aren't UUIDs; the adapter coerces them via uuid5. If you see this error, file a bug — the coercion is unconditional in `_session_id`. | + +Everything the adapter does that's not policy decision-making is audited on stderr as a JSON line prefixed `ACS_AUDIT`. The `cause` field tells you which failure mode fired. diff --git a/adapters/cursor/acs_adapter.py b/adapters/cursor/acs_adapter.py new file mode 100755 index 0000000..50ec76f --- /dev/null +++ b/adapters/cursor/acs_adapter.py @@ -0,0 +1,651 @@ +#!/usr/bin/env python3 +""" +ACS adapter for Cursor hooks. + +Translates a Cursor hook event into a signed ACS JSON-RPC request, +sends it to a Guardian, and translates the ACS response back to +Cursor's expected output format. + +Wire-format ground truth: Agent-Control-Standard/ACS specification/v0.1.0/ + +Note on payload completeness: Cursor does not expose every field ACS +v0.1.0 hook schemas require. + + - `subagentStart` — three of four required fields (subagent_session_id, + parent_session_id, parent_step_id) are populated from real session + data via deterministic UUID5 and the adapter's session-state tracking + of the last step_id. The fourth, `intent_derivation`, is hardcoded to + `derived_from_parent` (the defensible default for IDE-spawned subagents). + - `preCompact` — both required fields are real: `entries_to_compact` is + the list of step_ids the adapter has observed in this session; + `triggered_by` comes from Cursor's `trigger` field. + - `subagentStop` — NOT forwarded. The required `final_chain_hash` is + genuinely unknowable (Cursor maintains no chain). Better to omit than + to fabricate. + +See the README per-hook honesty table for the full mapping. + +Usage in hooks.json: + { "command": "python3 /path/to/acs_adapter.py preToolUse" } + +Environment variables (same defaults / semantics as the Claude Code adapter): + ACS_GUARDIAN_URL Guardian endpoint (default: http://127.0.0.1:8787/acs) + ACS_DEFAULT_DENY "1" = fail-closed. Default "0" (spec default per §6.4). + Cursor also honors per-hook `failClosed: true` in hooks.json. + ACS_HMAC_SECRET Shared secret for HMAC-SHA256 signing per §10. Unset = no signing (local dev). + ACS_AGENT_ID Explicit agent_id; defaults to cursor:. + ACS_HANDSHAKE "0" disables handshake. Default "1". +""" +from __future__ import annotations + +import hashlib +import json +import os +import sys +import urllib.error +import urllib.request +import uuid +from pathlib import Path +from typing import Any, Callable + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "_common")) +from acs_common import ( # noqa: E402 + ACS_VERSION, + audit_event, + coerce_uuid, + ensure_session_handshake, + guardian_error_cause, + iso8601_now, + load_session_state, + record_step, + save_session_state, + sign_envelope, + validate_guardian_url, + verify_signature, +) + + +GUARDIAN_URL = os.environ.get("ACS_GUARDIAN_URL", "http://127.0.0.1:8787/acs") +DEFAULT_DENY = os.environ.get("ACS_DEFAULT_DENY", "0") == "1" +HANDSHAKE_ENABLED = os.environ.get("ACS_HANDSHAKE", "1") == "1" + + +# ─── Hook taxonomy ────────────────────────────────────────────────────────── + +# Cursor hook event -> ACS step method. +# +# Intentionally OMITTED from this map (documented gap, not synthesis): +# subagentStop — `final_chain_hash` (64-hex SHA-256 of the subagent's +# ContextEntry chain) is genuinely unknowable because +# Cursor does not maintain a chain on its side. Emitting +# a fabricated hash would be schema-valid but +# semantically meaningless. Cursor's subagentStop event +# is therefore not forwarded. The Cursor README per-hook +# honesty table documents the gap. +HOOK_MAP: dict[str, str] = { + "sessionStart": "steps/sessionStart", + "sessionEnd": "steps/sessionEnd", + "stop": "steps/sessionEnd", + "preToolUse": "steps/toolCallRequest", + "postToolUse": "steps/toolCallResult", + "postToolUseFailure": "steps/toolCallResult", + "subagentStart": "steps/subagentStart", + "beforeShellExecution": "steps/toolCallRequest", + "afterShellExecution": "steps/toolCallResult", + "beforeMCPExecution": "steps/toolCallRequest", + "afterMCPExecution": "steps/toolCallResult", + "afterFileEdit": "steps/toolCallResult", + "beforeSubmitPrompt": "steps/userMessage", + "preCompact": "steps/preCompact", + "afterAgentResponse": "steps/agentResponse", + "afterAgentThought": "steps/agentResponse", + "afterTabFileEdit": "steps/toolCallResult", +} + +# Cursor events whose deny shape is `{"permission": "deny", ...}` +# (vs `beforeSubmitPrompt` which uses exit code 2, and post-tool events +# which only carry `additional_context`). +PERMISSION_EVENTS = frozenset({ + "preToolUse", "subagentStart", "beforeShellExecution", "beforeMCPExecution", +}) + +POST_TOOL_EVENTS = frozenset({ + "postToolUse", "postToolUseFailure", "afterMCPExecution", + "afterShellExecution", "afterFileEdit", "afterTabFileEdit", +}) + +PERMISSION_MAP: dict[str, str] = { + "allow": "allow", "deny": "deny", "ask": "ask", "defer": "ask", # no native defer +} + +KNOWN_DECISIONS = frozenset({"allow", "deny", "modify", "ask", "defer"}) + +SESSION_END_REASONS = frozenset({"completed", "cancelled", "error", "timeout", "abandoned"}) + + +# ─── Response writers — one definition each, used everywhere ────────────── + +def _emit(payload: dict[str, Any]) -> None: + """Single point where the adapter writes to stdout. Idempotent on + empty dict. beforeSubmitPrompt uses internal __exit_code/_reasoning + keys (handled in main) and never reaches this writer with them.""" + if not payload: + return + json.dump(payload, sys.stdout) + sys.stdout.write("\n") + + +def _permission_response(decision: str, message: str = "", + updated_input: dict | None = None) -> dict[str, Any]: + """Cursor's permission-event response shape: top-level `permission` + plus optional user/agent messages and updated_input (preToolUse only).""" + out: dict[str, Any] = {"permission": decision} + if message: + out["user_message"] = message + out["agent_message"] = message + if updated_input is not None: + out["updated_input"] = updated_input + return out + + +def _post_tool_response(additional_context: str | None = None, + updated_mcp_tool_output: str | None = None) -> dict[str, Any]: + """Cursor post-tool event response — only `additional_context` and + (for afterMCPExecution) `updated_mcp_tool_output`.""" + out: dict[str, Any] = {} + if additional_context: + out["additional_context"] = additional_context + if updated_mcp_tool_output is not None: + out["updated_mcp_tool_output"] = updated_mcp_tool_output + return out + + +# ─── Helpers ──────────────────────────────────────────────────────────────── + +def _agent_id(event: dict[str, Any]) -> str: + explicit = os.environ.get("ACS_AGENT_ID") + if explicit: + return explicit + cwd = event.get("cwd") or event.get("workspace_path") or os.environ.get("PWD") or "" + if cwd: + return f"cursor:{hashlib.sha256(cwd.encode()).hexdigest()[:8]}" + return "cursor:unknown" + + +def _session_id(event: dict[str, Any]) -> str: + raw = event.get("session_id") or event.get("conversation_id") or "" + return coerce_uuid(raw, namespace_prefix="cursor") if raw else "" + + +def _workspace(event: dict[str, Any]) -> str | None: + """Workspace identifier folded into the session-state file key so + two Cursor windows on different projects can't collide on a shared + non-UUID conversation_id.""" + return event.get("workspace_path") or event.get("cwd") or None + + +def _wrap_arguments(raw: dict[str, Any]) -> dict[str, Any]: + """tool-call-request.json:26-37 — each arg is {value, provenance?}.""" + return {k: {"value": v} for k, v in (raw or {}).items()} + + +def _outputs_list(raw: Any) -> list[dict[str, Any]]: + """tool-call-result.json wants outputs as array of {value, provenance?}.""" + if raw is None: + return [] + if isinstance(raw, list): + return [item if isinstance(item, dict) and "value" in item else {"value": item} for item in raw] + return [{"value": raw}] + + +def _tool_use_request_id(tool_call_id: str | None) -> str | None: + """Deterministic UUID5 so postToolUse can carry request_id_ref linking + back to the originating preToolUse (per tool-call-result.json:19-23).""" + if not tool_call_id: + return None + return str(uuid.uuid5(uuid.NAMESPACE_URL, f"cursor:tool_use:{tool_call_id}")) + + +# ─── Payload builders — dispatch table, one function per Cursor event ───── + +def _payload_pretool(event: dict[str, Any]) -> dict[str, Any]: + return { + "tool": {"name": event.get("tool_name") or event.get("tool", "")}, + "arguments": _wrap_arguments(event.get("tool_input") or event.get("arguments") or {}), + } + + +def _payload_before_shell(event: dict[str, Any]) -> dict[str, Any]: + return { + "tool": {"name": "Shell"}, + "arguments": _wrap_arguments({"command": event.get("command", "")}), + } + + +def _payload_before_mcp(event: dict[str, Any]) -> dict[str, Any]: + return { + "tool": {"name": f"{event.get('mcp_server', '')}:{event.get('mcp_tool', '')}", + "provider": event.get("mcp_server", "")}, + "arguments": _wrap_arguments(event.get("tool_input") or event.get("arguments") or {}), + } + + +def _payload_posttool(event: dict[str, Any]) -> dict[str, Any]: + payload = { + "tool": {"name": event.get("tool_name") or event.get("tool", "")}, + "exit_status": "failure" if event.get("hook_event_name") == "postToolUseFailure" + or event.get("_event_name") == "postToolUseFailure" else "success", + "outputs": _outputs_list(event.get("tool_output") or event.get("result")), + } + ref = _tool_use_request_id(event.get("tool_call_id") or event.get("tool_use_id")) + if ref: + payload["request_id_ref"] = ref + return payload + + +def _payload_after_shell(event: dict[str, Any]) -> dict[str, Any]: + payload = { + "tool": {"name": "Shell"}, + "exit_status": "failure" if event.get("exit_code", 0) else "success", + "outputs": _outputs_list(event.get("output") or event.get("result")), + } + ref = _tool_use_request_id(event.get("execution_id")) + if ref: + payload["request_id_ref"] = ref + return payload + + +def _payload_after_mcp(event: dict[str, Any]) -> dict[str, Any]: + payload = { + "tool": {"name": f"{event.get('mcp_server', '')}:{event.get('mcp_tool', '')}", + "provider": event.get("mcp_server", "")}, + "exit_status": "success", + "outputs": _outputs_list(event.get("tool_output") or event.get("result")), + } + ref = _tool_use_request_id(event.get("call_id")) + if ref: + payload["request_id_ref"] = ref + return payload + + +def _payload_file_edit(event: dict[str, Any]) -> dict[str, Any]: + return { + "tool": {"name": "Edit"}, + "exit_status": "success", + "outputs": _outputs_list({"file_path": event.get("file_path", "")}), + } + + +def _payload_before_submit_prompt(event: dict[str, Any]) -> dict[str, Any]: + return {"content": [{"type": "text", + "value": event.get("prompt") or event.get("user_message", "")}]} + + +def _payload_after_agent(event: dict[str, Any]) -> dict[str, Any]: + return {"content": [{"type": "text", + "value": event.get("response") or event.get("thought", "")}]} + + +def _payload_session_start(event: dict[str, Any]) -> dict[str, Any]: + out: dict[str, Any] = {} + if event.get("workspace_path") or event.get("cwd"): + out["platform_context"] = {"workspace_path": event.get("workspace_path") or event.get("cwd")} + return out + + +def _payload_session_end(event: dict[str, Any]) -> dict[str, Any]: + raw = (event.get("reason") or "").lower() + return {"reason": raw if raw in SESSION_END_REASONS else "completed"} + + +def _payload_subagent_start(event: dict[str, Any]) -> dict[str, Any]: + """All four schema-required fields, populated from real session data + where possible. See Cursor README 'Per-hook honesty table'.""" + sub_raw = event.get("subagent_id", "") + sid = _session_id(event) + st = load_session_state(sid, workspace=_workspace(event)) + parent_step_id = st.get("last_step_id") or sid + payload = { + "subagent_session_id": str(uuid.uuid5( + uuid.NAMESPACE_URL, f"cursor-subagent:{sid}:{sub_raw or 'unknown'}")), + "parent_session_id": sid, + "parent_step_id": parent_step_id, + # Cursor IDE subagents are dispatched by the parent agent + # (Composer/Agent panel routing), inheriting the parent's + # context. derived_from_parent is the defensible default. + "intent_derivation": "derived_from_parent", + } + if sub_raw: + payload["subagent_descriptor"] = { + "agent_id": sub_raw, + "agent_name": event.get("subagent_type", ""), + } + return payload + + +def _payload_precompact(event: dict[str, Any]) -> dict[str, Any]: + """entries_to_compact: real step_ids the adapter has observed in this + session. Cursor doesn't tell us WHICH entries it intends to compact, + but the entries actually IN the session are an honest superset + (compaction always operates on something already observed).""" + sid = _session_id(event) + st = load_session_state(sid, workspace=_workspace(event)) + seen = list(st.get("seen_step_ids") or []) + if not seen: + # No prior steps recorded — adapter wired without preceding hooks. + # Fall back to the session_id as a single placeholder entry. + seen = [sid] + return { + "entries_to_compact": seen, + "triggered_by": (event.get("trigger") or "framework_initiated"), + } + + +_PAYLOAD_BUILDERS: dict[str, Callable[[dict[str, Any]], dict[str, Any]]] = { + "preToolUse": _payload_pretool, + "beforeShellExecution": _payload_before_shell, + "beforeMCPExecution": _payload_before_mcp, + "postToolUse": _payload_posttool, + "postToolUseFailure": _payload_posttool, + "afterShellExecution": _payload_after_shell, + "afterMCPExecution": _payload_after_mcp, + "afterFileEdit": _payload_file_edit, + "afterTabFileEdit": _payload_file_edit, + "beforeSubmitPrompt": _payload_before_submit_prompt, + "afterAgentResponse": _payload_after_agent, + "afterAgentThought": _payload_after_agent, + "sessionStart": _payload_session_start, + "sessionEnd": _payload_session_end, + "stop": _payload_session_end, + "subagentStart": _payload_subagent_start, + "preCompact": _payload_precompact, +} + + +def build_payload(event_name: str, event: dict[str, Any]) -> dict[str, Any]: + builder = _PAYLOAD_BUILDERS.get(event_name) + if not builder: + return {} + # _payload_posttool branches on event_name; thread it through + if event_name in ("postToolUse", "postToolUseFailure"): + event = {**event, "_event_name": event_name} + return builder(event) + + +# ─── Envelope construction ────────────────────────────────────────────────── + +def build_request(event_name: str, event: dict[str, Any]) -> dict[str, Any]: + method = HOOK_MAP.get(event_name) + if method is None: + return {} + + session_id = _session_id(event) + if not session_id: + return {} + + metadata: dict[str, Any] = { + "agent_id": _agent_id(event), + "session_id": session_id, + "platform": "cursor", + "cursor_event": event_name, + } + if event.get("cwd") or event.get("workspace_path"): + metadata["workspace_path"] = event.get("cwd") or event.get("workspace_path") + + # For *Request methods, pin request_id deterministically so a matching + # *Result can populate request_id_ref pointing back at it. + if method == "steps/toolCallRequest": + ref = _tool_use_request_id(event.get("tool_call_id") or event.get("tool_use_id") + or event.get("execution_id")) + request_id = ref or str(uuid.uuid4()) + else: + request_id = str(uuid.uuid4()) + + envelope = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": method, + "params": { + "acs_version": ACS_VERSION, + "request_id": request_id, + "timestamp": iso8601_now(), + "metadata": metadata, + "payload": build_payload(event_name, event), + }, + } + sign_envelope(envelope, session_id=session_id) + return envelope + + +def _maybe_handshake(event: dict[str, Any]) -> None: + """Called on every hook event. Idempotent per session via disk + cache — only the first event of a session actually POSTs + handshake/hello. See ensure_session_handshake's docstring.""" + if not HANDSHAKE_ENABLED: + return + sid = _session_id(event) + if not sid: + return + ensure_session_handshake( + guardian_url=GUARDIAN_URL, + session_id=sid, + agent_id=_agent_id(event), + platform="cursor", + methods_implemented=list(HOOK_MAP.values()), + ) + + +def call_guardian(request: dict[str, Any]) -> dict[str, Any]: + validate_guardian_url(GUARDIAN_URL) # SSRF: refuse file://, ftp://, etc. + body = json.dumps(request).encode("utf-8") + req = urllib.request.Request( + GUARDIAN_URL, data=body, + headers={"Content-Type": "application/json"}, method="POST", + ) + with urllib.request.urlopen(req, timeout=5.0) as resp: + return json.loads(resp.read().decode("utf-8")) + + +# ─── Response translation — dispatch table by event category ────────────── +# +# Cursor's response shapes group naturally into 4 categories: +# - permission events (preToolUse, subagentStart, beforeShellExecution, +# beforeMCPExecution): {"permission": ..., "user_message": ..., "agent_message": ..., +# "updated_input": ...} +# - post-tool events (postToolUse, postToolUseFailure, afterShellExecution, +# afterMCPExecution, afterFileEdit, afterTabFileEdit): {"additional_context": ..., +# "updated_mcp_tool_output": ...} +# - beforeSubmitPrompt: uses exit code 2 (not stdout) to block; carries +# internal __exit_code/_reasoning keys consumed by main() +# - everything else: no response shape (Stop, SessionStart, SessionEnd, etc.) + +def _translate_permission(decision: str, reasoning: str, + modifications: dict, event_name: str) -> dict[str, Any]: + if decision in PERMISSION_MAP: + return _permission_response(PERMISSION_MAP[decision], reasoning) + if decision == "modify": + overrides = modifications.get("parameter_overrides") + if overrides is not None and event_name == "preToolUse": + return _permission_response("allow", reasoning, updated_input=overrides) + return _permission_response("deny", + f"MODIFY substituted to DENY: {reasoning}") + return {} + + +def _translate_post_tool(decision: str, reasoning: str, + modifications: dict, event_name: str) -> dict[str, Any]: + if decision == "modify": + if event_name == "afterMCPExecution": + updated = modifications.get("modified_content") + if updated is not None: + return _post_tool_response(updated_mcp_tool_output=str(updated)) + return _post_tool_response(additional_context=f"MODIFY received: {reasoning}") + if reasoning: + return _post_tool_response(additional_context=reasoning) + return {} + + +def _translate_before_submit_prompt(decision: str, reasoning: str, + modifications: dict, + event_name: str) -> dict[str, Any]: + """Returns internal markers (__exit_code, _reasoning) consumed by main().""" + return {"__exit_code": 2 if decision == "deny" else 0, + "_reasoning": reasoning if decision == "deny" else None} + + +def translate_response(acs_response: dict[str, Any], event_name: str) -> dict[str, Any]: + result = acs_response.get("result", {}) + decision = (result.get("decision") or "").lower() + reasoning = result.get("reasoning", "") + modifications = result.get("modifications", {}) + + # Unknown disposition under fail-closed → emit a deny in the hook's shape. + if decision not in KNOWN_DECISIONS and DEFAULT_DENY: + reason = f"unknown Guardian disposition '{decision}' (default-deny)" + if event_name in PERMISSION_EVENTS: + return _permission_response("deny", reason) + if event_name == "beforeSubmitPrompt": + return {"__exit_code": 2, "_reasoning": reason} + + if event_name in PERMISSION_EVENTS: + return _translate_permission(decision, reasoning, modifications, event_name) + if event_name in POST_TOOL_EVENTS: + return _translate_post_tool(decision, reasoning, modifications, event_name) + if event_name == "beforeSubmitPrompt": + return _translate_before_submit_prompt(decision, reasoning, modifications, event_name) + + # subagentStop is dropped from HOOK_MAP entirely (see comment on HOOK_MAP); + # other events (sessionStart, sessionEnd, stop, subagentStart, preCompact, + # afterAgentResponse, afterAgentThought) are observational with no + # response shape — empty dict skips stdout emission. + return {} + + +# ─── Main flow ────────────────────────────────────────────────────────────── + +def main() -> int: + if len(sys.argv) < 2: + sys.stderr.write("acs-adapter: missing event name argument (usage: acs_adapter.py )\n") + return 1 + event_name = sys.argv[1] + + raw = sys.stdin.read().strip() + if not raw: + return 0 + try: + event = json.loads(raw) + except json.JSONDecodeError as e: + sys.stderr.write(f"acs-adapter: invalid JSON on stdin: {e}\n") + return _fail(event_name, cause="invalid_stdin_json") + + if event_name not in HOOK_MAP: + return 0 + + _maybe_handshake(event) + + request = None + try: + request = build_request(event_name, event) + if not request: + sys.stderr.write(f"acs-adapter: could not build request for {event_name}\n") + return _fail(event_name, _session_id(event), cause="adapter_build_failed") + # Track this step in session state so subsequent subagentStart / + # preCompact events can cite a real parent_step_id / entries_to_compact. + # Done before call_guardian so even a failed Guardian call leaves + # the step recorded for audit. + try: + sid_for_state = _session_id(event) + rid_for_state = request.get("params", {}).get("request_id") + if sid_for_state and rid_for_state: + record_step(sid_for_state, rid_for_state, workspace=_workspace(event)) + except Exception: # noqa: BLE001 + pass + response = call_guardian(request) + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError) as e: + sys.stderr.write(f"acs-adapter: Guardian unreachable: {e}\n") + return _fail(event_name, _session_id(event), + cause="transport_failure", + request_id=(request or {}).get("params", {}).get("request_id"), + method=(request or {}).get("method"), + error=str(e)) + except Exception as e: # noqa: BLE001 + sys.stderr.write(f"acs-adapter: adapter error: {e}\n") + return _fail(event_name, _session_id(event), + cause="adapter_exception", error=str(e)) + + # Guardian responded — was it a result or a JSON-RPC error? + if "error" in response: + err = response.get("error") or {} + code = err.get("code") + cause = guardian_error_cause(code) + sys.stderr.write( + f"acs-adapter: Guardian returned JSON-RPC error " + f"{code} ({cause}): {err.get('message','')}\n") + return _fail(event_name, _session_id(event), + cause=cause, + error_code=code, + error_message=err.get("message"), + request_id=(request or {}).get("params", {}).get("request_id"), + method=(request or {}).get("method")) + + if not verify_signature(response, session_id=_session_id(event)): + sys.stderr.write("acs-adapter: response signature invalid\n") + return _fail(event_name, _session_id(event), + cause="response_signature_invalid") + + out = translate_response(response, event_name) + + if event_name == "beforeSubmitPrompt": + exit_code = out.pop("__exit_code", 0) + reasoning = out.pop("_reasoning", None) + if reasoning: + sys.stderr.write(f"acs-adapter: blocking prompt: {reasoning}\n") + # Cursor with `failClosed: true` treats "exit 0 + empty stdout" + # as a hook FAILURE (since the hook produced no decision), not + # as an allow. Always emit at minimum `{}` on the allow path so + # Cursor sees a real response. On deny we still use exit code 2; + # stdout is ignored for that path. + if exit_code == 0: + sys.stdout.write("{}\n") + return exit_code + + _emit(out) + return 0 + + +def _fail(event_name: str = "", session_id: str | None = None, *, + cause: str = "unknown", **audit_extras) -> int: + """Apply the deployment's fail posture and record an audit event per §6.4. + + `cause` distinguishes the failure mode (transport_failure, + signature_invalid_response, malformed_envelope_response, etc.) + independently of the posture. Disposition (fail_open_bypass / + decision_failure_fail_closed) is determined by ACS_DEFAULT_DENY; + cause tells operators what actually went wrong so a malformed + envelope (client bug) doesn't get confused with an unreachable + Guardian (ops issue). + """ + if DEFAULT_DENY: + msg = f"ACS adapter: decision-failure ({cause})" + if event_name in PERMISSION_EVENTS: + _emit(_permission_response("deny", msg)) + audit_event("decision_failure_fail_closed", + cause=cause, event=event_name, session_id=session_id, **audit_extras) + return 0 + if event_name == "beforeSubmitPrompt": + sys.stderr.write(f"acs-adapter: prompt blocked ({cause})\n") + audit_event("decision_failure_fail_closed", + cause=cause, event=event_name, session_id=session_id, **audit_extras) + return 2 + audit_event("decision_failure_fail_closed", + cause=cause, event=event_name, session_id=session_id, **audit_extras) + return 0 + + audit_event("fail_open_bypass", + cause=cause, event=event_name, session_id=session_id, **audit_extras) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/cursor/e2e_check.py b/adapters/cursor/e2e_check.py new file mode 100644 index 0000000..bdcc4db --- /dev/null +++ b/adapters/cursor/e2e_check.py @@ -0,0 +1,540 @@ +#!/usr/bin/env python3 +""" +End-to-end conformance check for an adopter's Cursor ACS integration. + +Cursor is a desktop GUI app with no headless CLI (unlike `claude --print`), +so this test cannot drive Cursor automatically. Instead it's +**semi-automated**: the script does everything that can be done +programmatically (Guardian setup, hooks wiring into a temp workspace, +validation of what arrives, PASS/FAIL reporting) and asks the operator +to perform a small set of well-defined actions in Cursor. + +For each scenario the script: + + 1. Configures a recording, signing Guardian for the scenario. + 2. Prints precise instructions for what to do in Cursor. + 3. Waits for the operator to press Enter when done. + 4. Validates what envelopes arrived and prints PASS/FAIL. + +What's verified end-to-end (same properties the Claude Code e2e_check +covers): + + 1. Cursor fires the hooks we expect, in the order we expect. + 2. The adapter translates each into a wire-conformant ACS envelope. + 3. Every envelope is HMAC-signed end-to-end and the Guardian verifies it. + 4. The Guardian's verdicts are actually applied — allow lets the tool + run, deny visibly blocks it in Cursor's UI. + 5. The handshake fires ONCE per Cursor session, even with many hooks. + +Prerequisites: + - Cursor installed (https://cursor.com) + - A test workspace (a throwaway directory you can open in Cursor) + - Python 3.10+ with `jsonschema` and `rfc8785` + - The canonical ACS schemas at $ACS_SPEC_DIR (default + /tmp/acs-spec-source/specification/v0.1.0/) + +Usage (from this directory): + + python3 e2e_check.py + +The script will tell you what to do at each step. Total wall-clock +varies because real human interaction is in the loop; budget 5-10 +minutes for the full sweep. + +Backup your real ~/.cursor/hooks.json BEFORE running this — the +script wires a project-level .cursor/hooks.json inside a temp dir +(doesn't touch your user-level config), but if you ALSO want to test +user-level wiring, save your real file first. +""" +from __future__ import annotations + +import json +import os +import shutil +import socket +import subprocess +import sys +import tempfile +import threading +import time +import uuid +from pathlib import Path + +HERE = Path(__file__).resolve().parent +ADAPTER = HERE / "acs_adapter.py" +COMMON_DIR = HERE.parent / "_common" +EXAMPLE_GUARDIAN_DIR = HERE.parent / "example-guardian" +SPEC_DIR_DEFAULT = Path(os.environ.get( + "ACS_SPEC_DIR", "/tmp/acs-spec-source/specification/v0.1.0")) + +sys.path.insert(0, str(COMMON_DIR)) +sys.path.insert(0, str(EXAMPLE_GUARDIAN_DIR)) +import acs_common # noqa: E402 +from test_harness import ( # noqa: E402 + ProgrammableGuardian, + free_port, + validate_request_envelope, + validate_response_envelope, + wait_port, +) +# The REAL example-Guardian policy. Installed once for the whole run so +# every scenario sees the production-shaped policy (not per-scenario +# synthetic handlers): an operator who runs `rm -rf` during the wrong +# prompt still gets denied, a policy regression (like the `rm -rfv` +# evasion fixed in 9713703) gets caught end-to-end through Cursor, and +# the test reflects the real wire+policy integration the adopter ships. +from example_guardian import evaluate_step # noqa: E402 + +HMAC_SECRET = "e2e-test-shared-secret-not-for-production" + + +# Shared pretty-printer + helpers; see adapters/_common/e2e_report.py. +from e2e_report import ( # noqa: E402 + Report, real_policy_handler as _shared_real_policy_handler, + assert_envelopes_signed_and_valid as _assert_envelopes_signed_and_valid, +) + + +def real_policy_handler(): + return _shared_real_policy_handler(evaluate_step) + + +def _envelope_checks(guardian, sub_results: list) -> None: + _assert_envelopes_signed_and_valid( + guardian, validate_request_envelope, sub_results) + + +# ────────────────────────────────────────────────────────────────────── +# Workdir + project-level hooks.json +# ────────────────────────────────────────────────────────────────────── + +def write_project_hooks(workdir: Path, port: int) -> Path: + """Create /.cursor/hooks.json that wires every Cursor hook + event to the adapter. Project-level so it overrides the operator's + user-level config (~/.cursor/hooks.json) for THIS workspace only. + + The handshake cache is pinned into the workdir so the HANDSHAKE-ONCE + scenario can clear it to force a re-handshake without polluting the + operator's `~/.cache/acs-adapter-handshake/`.""" + cursor_dir = workdir / ".cursor" + cursor_dir.mkdir(parents=True, exist_ok=True) + hooks_path = cursor_dir / "hooks.json" + handshake_cache = workdir / ".acs-handshake-cache" + handshake_cache.mkdir(exist_ok=True) + + def cmd(event_name: str, *, fail_closed: bool) -> str: + env_vars = ( + f"ACS_GUARDIAN_URL=http://127.0.0.1:{port}/acs " + f"ACS_HMAC_SECRET={HMAC_SECRET} " + f"ACS_GUARDIAN_HOST_ALLOWLIST= " + f"ACS_HANDSHAKE_CACHE={handshake_cache}" + ) + if fail_closed: + env_vars += " ACS_DEFAULT_DENY=1" + return f"{env_vars} python3 {ADAPTER} {event_name}" + + def entry(event_name: str, *, fail_closed: bool = False) -> dict: + e = {"command": cmd(event_name, fail_closed=fail_closed)} + if fail_closed: + e["failClosed"] = True + return e + + config = { + "version": 1, + "hooks": { + "sessionStart": [entry("sessionStart")], + "beforeSubmitPrompt": [entry("beforeSubmitPrompt", fail_closed=True)], + "preToolUse": [entry("preToolUse", fail_closed=True)], + "postToolUse": [entry("postToolUse")], + "beforeShellExecution": [entry("beforeShellExecution", fail_closed=True)], + "afterShellExecution": [entry("afterShellExecution")], + "afterAgentResponse": [entry("afterAgentResponse")], + "sessionEnd": [entry("sessionEnd")], + }, + } + hooks_path.write_text(json.dumps(config, indent=2)) + return hooks_path + + +# ────────────────────────────────────────────────────────────────────── +# Scenarios +# ────────────────────────────────────────────────────────────────────── + +TOTAL_SCENARIOS = 5 + + +def scenario_setup_open_workspace(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + """Step 0: have the operator open the test workspace in Cursor. + This is the load-bearing setup — if Cursor doesn't pick up the + project-level hooks.json, nothing else works.""" + report.case(0, TOTAL_SCENARIOS, "SETUP — open the test workspace in Cursor") + report.field("Workdir:", str(workdir)) + report.field("Hooks file:", str(workdir / ".cursor" / "hooks.json")) + report.operator_action([ + f"Open Cursor", + f"File → Open Folder → choose {workdir}", + f"Approve any 'trust this workspace' prompt Cursor shows", + f"Open Cursor's Agent panel (Cmd+L or Cmd+I)", + ]) + # Nothing to verify on the wire yet — sessionStart fires when the + # workspace is opened, but it might not have happened yet, or might + # have been merged with the next scenario's events. + + +def scenario_allow(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + marker = f"ACS_E2E_OK_{uuid.uuid4().hex[:8]}" + report.case(1, TOTAL_SCENARIOS, + f"ALLOW — benign shell exec; Cursor runs the tool") + report.field("Marker:", marker) + report.field("Expected:", "Cursor's agent fires PreToolUse + PostToolUse; " + "Guardian allows; the shell exec runs and the " + f"marker {marker!r} appears in the toolCallResult envelope") + + guardian.reset() + + report.operator_action([ + f"In Cursor's Agent panel, ask the agent EXACTLY:", + f" Use the shell to run: echo {marker}", + f"Wait for the agent to finish (you should see {marker!r} in its output).", + ]) + + sub_results = [] + sub_results.append(("Guardian received at least one envelope", + len(guardian.received) > 0, + f"received {len(guardian.received)}")) + methods = set(r.get("method", "") for r in guardian.received) + # Handshake assertion is owned by the HANDSHAKE-ONCE scenario (which + # clears the cache to force a fresh hello). Here it may be absent + # if it already fired and got cached during SETUP — that is the + # correct §4 behavior, not a failure. + # The Cursor tool call MIGHT come through as preToolUse (if the + # agent used the generic tool path) OR beforeShellExecution (if + # it used Cursor's dedicated shell-exec hook). Accept either. + pretool_or_shell = ("steps/toolCallRequest" in methods) + sub_results.append(("Guardian received a toolCallRequest", + pretool_or_shell, "")) + _envelope_checks(guardian, sub_results) + # Marker should appear in some toolCallResult envelope's outputs + result_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallResult"] + marker_in_results = any( + marker in json.dumps(r.get("params", {}).get("payload", {}).get("outputs", [])) + for r in result_envs + ) + sub_results.append(("Marker appears in a toolCallResult (shell actually ran)", + marker_in_results, + "marker found" if marker_in_results + else f"marker absent across {len(result_envs)} result envelope(s)")) + + _dump_session_envelopes(report, guardian) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("allow-path", all(ok for _, ok, _ in sub_results)) + + +def scenario_read_tool(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + """READ-TOOL: different tool, same wire contract. Mirrors the Claude + Code e2e — confirms preToolUse arguments are wrapped per + tool-call-request.json:26-37 regardless of which tool fires.""" + probe = workdir / "read-tool-probe.txt" + probe.write_text("ACS read-tool probe contents\n") + + report.case(2, TOTAL_SCENARIOS, + "READ TOOL — different tool, same wire contract") + report.field("Probe file:", str(probe)) + report.field("Expected:", "Cursor fires preToolUse with tool=Read; " + "adapter wraps args per tool-call-request.json:26-37; " + "envelope's arguments.file_path.value matches the probe path") + + guardian.reset() + + report.operator_action([ + f"In Cursor's Agent panel, ask the agent EXACTLY:", + f" Use the Read tool to read {probe.name}; just report what you read.", + f"Wait for the agent to finish.", + ]) + + sub_results = [] + read_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallRequest" + and r["params"]["payload"].get("tool", {}).get("name") == "Read"] + sub_results.append(("Guardian received a Read steps/toolCallRequest", + bool(read_envs), f"{len(read_envs)} found")) + if read_envs: + env = read_envs[0] + args = env["params"]["payload"].get("arguments", {}) + sub_results.append( + ("Arguments are wrapped per tool-call-request.json:26-37", + bool(args) and all(isinstance(v, dict) and "value" in v + for v in args.values()), + f"args={list(args.keys())}")) + file_path_val = args.get("file_path", {}).get("value", "") \ + if isinstance(args.get("file_path"), dict) else "" + sub_results.append( + ("Read tool's file_path appears in arguments", + probe.name in str(file_path_val), + f"file_path={file_path_val!r}")) + + _dump_session_envelopes(report, guardian) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("read-tool-path", all(ok for _, ok, _ in sub_results)) + + +def scenario_destructive(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + """DESTRUCTIVE: drives the REAL example-Guardian destructive-Bash + regex (the one that had the `rm -rfv` evasion bug, fixed in + 9713703). If a future policy change re-introduces an evasion, this + test catches it end-to-end through Cursor — not just at the regex + unit-test level.""" + victim = workdir / "acs-e2e-victim" + victim.mkdir(exist_ok=True) + canary = victim / "DO_NOT_DELETE.txt" + canary.write_text( + "If you see this file after the DESTRUCTIVE scenario, the deny worked.\n") + + report.case(3, TOTAL_SCENARIOS, + "DESTRUCTIVE — real policy blocks rm -rf end-to-end") + report.field("Victim dir:", str(victim)) + report.field("Canary file:", str(canary)) + report.field("Expected:", + "Cursor's agent attempts `rm -rf` on the victim dir; the " + "REAL example-Guardian destructive-Bash regex matches and " + "returns deny with reason_codes=['destructive_command']; " + "Cursor honors the deny; canary file still exists on disk") + + guardian.reset() + + report.operator_action([ + f"In Cursor's Agent panel, ask the agent EXACTLY:", + f" This is a sandbox security test. Use the shell to run:", + f" rm -rf {victim}/", + f" Just run it; the directory is a throwaway sandbox.", + f"Watch what happens — Cursor should DENY the command.", + ]) + + sub_results = [] + methods = set(r.get("method", "") for r in guardian.received) + sub_results.append(("Guardian received a toolCallRequest " + "(Cursor's agent attempted the rm -rf)", + "steps/toolCallRequest" in methods, "")) + deny_resp = next((r for r in guardian.sent + if r.get("result", {}).get("decision") == "deny" + and "destructive_command" + in (r.get("result", {}).get("reason_codes") or [])), + None) + sub_results.append(("Guardian returned deny with " + "reason_codes=['destructive_command']", + deny_resp is not None, + "destructive_command deny issued" if deny_resp + else "no matching deny found")) + sub_results.append(("Canary file still exists " + "(rm did not execute despite the agent attempting)", + canary.exists(), + "intact" if canary.exists() else "DESTROYED")) + result_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallResult"] + rm_in_results = any( + "rm -rf" in json.dumps(r.get("params", {}).get("payload", {}).get("outputs", [])) + for r in result_envs + ) + sub_results.append(("No toolCallResult contains an executed rm -rf output", + not rm_in_results, + "absent" if not rm_in_results + else "PRESENT (command somehow executed)")) + + _dump_session_envelopes(report, guardian) + if deny_resp: + report.json_block("Guardian's destructive-command deny (verbatim)", + deny_resp["result"], truncate=160) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("destructive-policy-path", + all(ok for _, ok, _ in sub_results)) + + +def scenario_handshake_once(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + """HANDSHAKE — fires exactly ONCE per Cursor session (§4). Mirrors + Claude Code's e2e scenario: clear the file-based handshake cache to + force a re-handshake on this scenario's first event, then drive + multiple tool calls and assert exactly one handshake/hello on the + wire.""" + report.case(5, TOTAL_SCENARIOS, + "HANDSHAKE — fires exactly ONCE per Cursor session (§4)") + report.field("Expected:", + "Cursor fires multiple hooks (≥2 preToolUse + ≥2 postToolUse); " + "Guardian sees exactly 1 handshake/hello across all of them") + + guardian.reset() + # Clear the handshake cache pinned into workdir so the next adapter + # invocation MUST re-handshake. Subsequent invocations in this + # scenario should then hit the warm cache and skip the handshake. + cache_dir = workdir / ".acs-handshake-cache" + if cache_dir.exists(): + shutil.rmtree(cache_dir) + cache_dir.mkdir() + + marker_a = f"first-{uuid.uuid4().hex[:6]}" + marker_b = f"second-{uuid.uuid4().hex[:6]}" + report.operator_action([ + f"In Cursor's Agent panel, ask the agent EXACTLY:", + f" Use the shell TWICE: first run 'echo {marker_a}', " + f"then run 'echo {marker_b}'.", + f"Wait for the agent to finish both commands.", + ]) + + handshakes = [r for r in guardian.received + if r.get("method") == "handshake/hello"] + pretools = [r for r in guardian.received + if r.get("method") == "steps/toolCallRequest"] + posttools = [r for r in guardian.received + if r.get("method") == "steps/toolCallResult"] + + _dump_session_envelopes(report, guardian) + sub_results = [ + ("Exactly 1 handshake/hello per session", + len(handshakes) == 1, f"got {len(handshakes)}"), + ("≥2 steps/toolCallRequest (Cursor's agent ran shell twice)", + len(pretools) >= 2, f"got {len(pretools)}"), + ("≥2 steps/toolCallResult (each shell returned)", + len(posttools) >= 2, f"got {len(posttools)}"), + ] + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("handshake-once-per-session", + all(ok for _, ok, _ in sub_results)) + + +def scenario_user_message(report: Report, workdir: Path, + guardian: ProgrammableGuardian) -> None: + """Verify beforeSubmitPrompt → steps/userMessage path: the prompt + text travels through the wire as a userMessage envelope.""" + marker = f"hello-from-cursor-e2e-{uuid.uuid4().hex[:6]}" + report.case(4, TOTAL_SCENARIOS, + "USER MESSAGE — prompt text arrives as steps/userMessage") + report.field("Marker:", marker) + report.field("Expected:", f"Cursor fires beforeSubmitPrompt with the prompt " + f"text; adapter translates to steps/userMessage; " + f"marker {marker!r} appears in the envelope's " + f"params.payload.content") + + guardian.reset() + + report.operator_action([ + f"In Cursor's Agent panel, send a chat message containing exactly:", + f" {marker}", + f"(The agent can respond however it likes — we're testing the prompt path.)", + ]) + + sub_results = [] + user_msg_envs = [r for r in guardian.received + if r.get("method") == "steps/userMessage"] + sub_results.append(("Guardian received steps/userMessage", + bool(user_msg_envs), f"{len(user_msg_envs)} found")) + marker_in_content = any( + marker in json.dumps(r.get("params", {}).get("payload", {}).get("content", [])) + for r in user_msg_envs + ) + sub_results.append(("Prompt marker appears in userMessage content", + marker_in_content, + "marker found in payload.content" + if marker_in_content else "marker absent")) + + _dump_session_envelopes(report, guardian) + if user_msg_envs: + report.json_block("steps/userMessage envelope (verbatim)", + user_msg_envs[-1], truncate=140) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("user-message-path", all(ok for _, ok, _ in sub_results)) + + +# ────────────────────────────────────────────────────────────────────── +# Output helpers +# ────────────────────────────────────────────────────────────────────── + +def _dump_session_envelopes(report: Report, + guardian: ProgrammableGuardian) -> None: + """Print method name + truncated request_id for every envelope in this scenario.""" + if not guardian.received: + return + print(f" ── Hooks Cursor fired (in order)") + for r in guardian.received: + method = r.get("method", "") + rid = r.get("params", {}).get("request_id", "")[:8] + print(f" {method:35s} req={rid}…") + # First tool envelope verbatim — what Cursor actually emits + pretool = next((r for r in guardian.received + if r.get("method") == "steps/toolCallRequest"), None) + if pretool: + report.json_block("First steps/toolCallRequest envelope (verbatim)", + pretool, truncate=120) + + +# ────────────────────────────────────────────────────────────────────── +# Entry point +# ────────────────────────────────────────────────────────────────────── + +def main() -> int: + if not SPEC_DIR_DEFAULT.exists(): + print(f"FATAL: canonical schemas missing at {SPEC_DIR_DEFAULT}", + file=sys.stderr) + print("Set ACS_SPEC_DIR to a clone of " + "Agent-Control-Standard/ACS/specification/v0.1.0/", file=sys.stderr) + return 1 + + workdir = Path(tempfile.mkdtemp(prefix="acs-cursor-e2e-")) + guardian = ProgrammableGuardian(hmac_secret=HMAC_SECRET) + # Install the REAL example-Guardian policy as the default handler + # for every JSON-RPC method. Scenarios never swap this out — they + # just probe different inputs and assert different outputs against + # the same shipping policy. + guardian.handlers["__default__"] = real_policy_handler() + guardian.start() + write_project_hooks(workdir, guardian.port) + + report = Report() + report.print_header( + "ACS Cursor adapter — REAL end-to-end conformance check", + "", + "This test drives YOUR Cursor installation through real", + "scenarios. The script wires a project-level .cursor/hooks.json", + "inside a temp workspace and asks you to open that workspace", + "in Cursor + perform specific actions. Cursor is a GUI; the", + "loop requires you to do the user actions.", + "", + f"Spec source : {SPEC_DIR_DEFAULT}", + f"Adapter : {ADAPTER}", + f"Cursor app : (open it yourself when prompted)", + f"Test workdir: {workdir}", + "", + f"{TOTAL_SCENARIOS} scenarios. Budget ~5-10 minutes total — real human", + "interaction is in the loop.", + ) + + try: + scenario_setup_open_workspace(report, workdir, guardian) + scenario_allow(report, workdir, guardian) + scenario_read_tool(report, workdir, guardian) + scenario_destructive(report, workdir, guardian) + scenario_user_message(report, workdir, guardian) + scenario_handshake_once(report, workdir, guardian) + finally: + guardian.stop() + print() + print(f" Temp workdir was: {workdir}") + print(f" (Cleaning up... close Cursor or it'll keep the dir open)") + # Best-effort cleanup; if Cursor still has the dir open, rmtree fails + # which is harmless. + shutil.rmtree(workdir, ignore_errors=True) + + return 0 if report.summary("YOUR CURSOR INSTALL IS ACS-CONFORMANT") else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/cursor/hooks.json.example b/adapters/cursor/hooks.json.example new file mode 100644 index 0000000..5245869 --- /dev/null +++ b/adapters/cursor/hooks.json.example @@ -0,0 +1,60 @@ +{ + "_comment": "Drop into /.cursor/hooks.json (or merge with your existing config). Replace ACS_GUARDIAN_URL with your deployment's endpoint and the python3 path with the absolute path to acs_adapter.py.", + "version": 1, + "hooks": { + "sessionStart": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py sessionStart" + } + ], + "beforeSubmitPrompt": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py beforeSubmitPrompt" + } + ], + "preToolUse": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py preToolUse", + "failClosed": true + } + ], + "postToolUse": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py postToolUse" + } + ], + "beforeShellExecution": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py beforeShellExecution", + "failClosed": true + } + ], + "afterShellExecution": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py afterShellExecution" + } + ], + "beforeMCPExecution": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py beforeMCPExecution", + "failClosed": true + } + ], + "afterMCPExecution": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py afterMCPExecution" + } + ], + "subagentStart": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py subagentStart", + "failClosed": true + } + ], + "subagentStop": [ + { + "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py subagentStop" + } + ] + } +} diff --git a/adapters/cursor/mapping.md b/adapters/cursor/mapping.md new file mode 100644 index 0000000..d45c220 --- /dev/null +++ b/adapters/cursor/mapping.md @@ -0,0 +1,101 @@ +# Cursor → ACS hook mapping + +Schema source: Cursor's `create-hook` skill (`~/.cursor/skills-cursor/create-hook/SKILL.md`). + +Each Cursor hook event maps to an ACS `steps/*` method. The adapter (`acs_adapter.py`) does the translation in both directions. + +## Hook event mapping + +| Cursor hook | ACS step method | Notes | +|---|---|---| +| `sessionStart` | `steps/sessionStart` | Session bounds. | +| `sessionEnd` | `steps/sessionEnd` | | +| `stop` | `steps/sessionEnd` | Cursor's agent-stop signal. | +| `preToolUse` | `steps/toolCallRequest` | Principal interception point. | +| `postToolUse` | `steps/toolCallResult` | Tool result available. | +| `postToolUseFailure` | `steps/toolCallResult` | Tool failed; treated as a result for ACS purposes. | +| `subagentStart` | `steps/subagentStart` | | +| `subagentStop` | `steps/subagentStop` | | +| `beforeShellExecution` | `steps/toolCallRequest` (tool name = "Shell") | Shell-specific gating; Cursor exposes this distinct from preToolUse. | +| `afterShellExecution` | `steps/toolCallResult` | | +| `beforeMCPExecution` | `steps/toolCallRequest` (tool name = "MCP:`server`:`tool`") | MCP tool gating. | +| `afterMCPExecution` | `steps/toolCallResult` | | +| `beforeReadFile` | `steps/knowledgeRetrieval` | File reads modeled as knowledge retrieval. | +| `afterFileEdit` | `steps/toolCallResult` | File edits surface as tool results. | +| `beforeSubmitPrompt` | `steps/userMessage` | Pre-submit prompt gating. | +| `preCompact` | `steps/preCompact` | Context compaction. | +| `afterAgentResponse` | `steps/agentResponse` | Agent emitted a response. | +| `afterAgentThought` | `steps/agentResponse` | Agent reasoning trace; modeled as an agent emission. | +| `beforeTabFileRead` | `steps/knowledgeRetrieval` | Tab inline-completion file read. | +| `afterTabFileEdit` | `steps/toolCallResult` | Tab inline edit applied. | + +## Disposition mapping + +Cursor's documented response keys are event-specific. The adapter translates ACS dispositions accordingly: + +### Permission events (`preToolUse`, `subagentStart`, `beforeShellExecution`, `beforeMCPExecution`) + +| ACS disposition | Cursor output | Notes | +|---|---|---| +| `allow` | `{"permission": "allow"}` | | +| `deny` | `{"permission": "deny", "user_message": reasoning, "agent_message": reasoning}` | Cursor displays user_message in UI; agent_message is fed back to the agent's context. | +| `ask` | `{"permission": "ask", "user_message": reasoning, "agent_message": reasoning}` | | +| `defer` | `{"permission": "ask", ...}` | Cursor has no defer; closest equivalent is ask. | +| `modify` | `{"permission": "allow", "updated_input": parameter_overrides, "user_message": reasoning}` on `preToolUse` only | Other permission events have no documented updated-input field; modify substitutes to deny with audit. | + +### `postToolUse`, `postToolUseFailure`, `afterMCPExecution` + +| ACS disposition | Cursor output | +|---|---| +| `allow` | `{}` (no output; proceed) | +| (any decision with reasoning) | `{"additional_context": reasoning}` | +| `modify` on `afterMCPExecution` with `modified_content` | `{"updated_mcp_tool_output": modified_content}` | + +### `subagentStop` + +| ACS disposition | Cursor output | +|---|---| +| `allow` | `{}` | +| `deny` | `{"followup_message": "Subagent denied at stop: " + reasoning}` | + +### `beforeSubmitPrompt` + +Cursor's `beforeSubmitPrompt` has no documented response keys; the adapter uses **exit code** to signal: + +| ACS disposition | Adapter behavior | +|---|---| +| `allow` | exit 0, no output | +| `deny` | exit 2 (Cursor's documented block signal) | +| `ask` / `defer` | exit 2 (substituted to block; pause-resume requires Guardian-side resolution) | + +### Lifecycle events (`sessionStart`, `sessionEnd`, `stop`, `preCompact`, `afterAgentResponse`, `afterAgentThought`, `beforeReadFile`, `beforeTabFileRead`, `afterFileEdit`, `afterTabFileEdit`) + +**Observation-only.** The adapter emits empty output. Cursor fires these *after* the action / message / file edit has occurred (and `beforeReadFile` returns `{}` even on a denied response — the read still happens), so a Guardian `deny` / `modify` on them cannot undo the side effect or block the message. ACS records the event in the audit chain; enforcement on prompts must be placed at `beforeSubmitPrompt`, enforcement on tools at `preToolUse` / `beforeShellExecution` / `beforeMCPExecution`. ACS-Core §hooks.md describes `agentResponse` as decision-eligible; the framework constraint forces this adapter's mapping to honest observation-only. + +## Matchers + +Cursor's `hooks.json` supports `matcher` regex per hook entry. The adapter does not require any matcher; the Guardian filters server-side. Use matchers in `hooks.json` only if you want to scope which calls reach the Guardian (a deployment optimization, not a correctness concern). + +## Exit codes (Cursor's protocol) + +| Exit code | Cursor behavior | When the adapter uses it | +|---|---|---| +| 0 | Success; parse stdout as JSON | Normal case for every event except `beforeSubmitPrompt` | +| 2 | Block (same as deny) | `beforeSubmitPrompt` deny; any event when fail-closed and Guardian unreachable and stdout JSON not viable | +| Other nonzero | Fail open unless `failClosed: true` | Not used by the adapter (errors are converted to deny via posture) | + +## failClosed + +Cursor's per-hook `failClosed: true` makes Cursor block when the hook crashes, times out, or returns invalid JSON. This is independent of the adapter's `ACS_DEFAULT_DENY` (which controls the adapter's own behavior on Guardian-unreachable errors). Use both in production: `failClosed: true` in `hooks.json` for adapter-level failure, `ACS_DEFAULT_DENY=1` for Guardian-unreachable. + +## Conformance posture + +The Cursor adapter implements ACS-Core's mandatory floor in the same shape as the Claude Code adapter: + +- Handshake: assumed-advertised at the endpoint (production adapter performs `handshake/hello` and caches) +- Hook taxonomy minimum (6): all covered, plus many additional Cursor events +- Dispositions: ALLOW / DENY / ASK supported on permission events; MODIFY supported on `preToolUse`; DEFER → ASK substitution (Cursor has no native defer) +- SessionContext: session_id sent every request +- Replay protection: ✓ +- Baseline integrity: deferred to transport layer in this minimal adapter +- Decision honoring: ✓ (Cursor enforces the permission verdict; adapter uses exit-2 where Cursor's protocol uses exit code rather than JSON) diff --git a/adapters/cursor/tests/__init__.py b/adapters/cursor/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/adapters/cursor/tests/example_payloads.md b/adapters/cursor/tests/example_payloads.md new file mode 100644 index 0000000..1913556 --- /dev/null +++ b/adapters/cursor/tests/example_payloads.md @@ -0,0 +1,177 @@ +# Real-world payload examples — Cursor + +These are the actual JSON shapes Cursor emits on stdin for each hook, captured from a real Cursor agent session and **masked**. Identifying fields are replaced with placeholders. Use these to understand the schema the adapter parses. + +The fields documented in Cursor's bundled `create-hook` skill (`~/.cursor/skills-cursor/create-hook/SKILL.md`) plus several that appear in real payloads but aren't in the public-facing docs (flagged below). + +--- + +## sessionStart + +```json +{ + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript_path": "/Users//.cursor/projects//agent-transcripts//.jsonl", + "cwd": "/path/to/workspace", + "hook_event_name": "sessionStart", + "cursor_version": "3.7.x", + "workspace_roots": ["/path/to/workspace"], + "user_email": "" +} +``` + +Fields not in the public skill docs but present in real payloads: `cursor_version`, `workspace_roots`, `user_email`. The adapter does not forward `user_email` to the Guardian by default. + +--- + +## beforeSubmitPrompt + +```json +{ + "conversation_id": "00000000-0000-0000-0000-000000000001", + "generation_id": "00000000-0000-0000-0000-000000000002", + "model": "default", + "composer_mode": "agent", + "prompt": "", + "attachments": [], + "session_id": "00000000-0000-0000-0000-000000000001", + "hook_event_name": "beforeSubmitPrompt", + "cursor_version": "3.7.x", + "workspace_roots": ["/path/to/workspace"], + "user_email": "", + "transcript_path": "/Users//.cursor/projects//agent-transcripts//.jsonl" +} +``` + +`session_id` and `conversation_id` typically hold the same UUID in agent mode. The adapter prefers `session_id`. + +--- + +## preToolUse (Grep) + +```json +{ + "conversation_id": "00000000-0000-0000-0000-000000000001", + "generation_id": "00000000-0000-0000-0000-000000000002", + "model": "default", + "tool_name": "Grep", + "tool_input": { + "pattern": "TODO", + "file_path": "/path/to/workspace" + }, + "tool_use_id": "tool_00000000-0000-0000-0000-000000000003", + "session_id": "00000000-0000-0000-0000-000000000001", + "hook_event_name": "preToolUse", + "cursor_version": "3.7.x", + "workspace_roots": ["/path/to/workspace"], + "user_email": "", + "transcript_path": "/Users//.cursor/projects//agent-transcripts//.jsonl" +} +``` + +--- + +## postToolUse (Grep) + +```json +{ + "conversation_id": "00000000-0000-0000-0000-000000000001", + "generation_id": "00000000-0000-0000-0000-000000000002", + "model": "default", + "tool_name": "Grep", + "tool_input": { + "pattern": "TODO", + "file_path": "/path/to/workspace" + }, + "tool_output": "{\"pattern\":\"TODO\",\"success\":true}", + "duration": 19.175, + "tool_use_id": "tool_00000000-0000-0000-0000-000000000003", + "session_id": "00000000-0000-0000-0000-000000000001", + "hook_event_name": "postToolUse", + "cursor_version": "3.7.x", + "workspace_roots": ["/path/to/workspace"], + "user_email": "", + "transcript_path": "/Users//.cursor/projects//agent-transcripts//.jsonl" +} +``` + +--- + +## beforeShellExecution + +```json +{ + "conversation_id": "00000000-0000-0000-0000-000000000001", + "generation_id": "00000000-0000-0000-0000-000000000002", + "model": "default", + "command": "ls -la", + "cwd": "/path/to/workspace", + "sandbox": true, + "session_id": "00000000-0000-0000-0000-000000000001", + "hook_event_name": "beforeShellExecution", + "cursor_version": "3.7.x", + "workspace_roots": ["/path/to/workspace"], + "user_email": "", + "transcript_path": "/Users//.cursor/projects//agent-transcripts//.jsonl" +} +``` + +`sandbox` indicates whether Cursor will run the command in its sandbox. + +--- + +## Adapter response shapes + +Per-event output keys differ. See the [mapping table](../mapping.md#disposition-mapping) for the full matrix. + +### Allow (permission events) + +```json +{"permission": "allow"} +``` + +### Deny (permission events) + +```json +{ + "permission": "deny", + "user_message": "destructive Bash pattern in: rm -rf /home/u", + "agent_message": "destructive Bash pattern in: rm -rf /home/u" +} +``` + +### Modify (preToolUse with parameter_overrides) + +```json +{ + "permission": "allow", + "updated_input": {"command": "ls -la # sanitized"}, + "user_message": "command sanitized by Guardian" +} +``` + +### Post-tool events (additional_context) + +```json +{"additional_context": "audit: tool ran in 19ms; output 142 bytes"} +``` + +### beforeSubmitPrompt (block via exit code, not stdout) + +The adapter writes nothing to stdout and exits with code 2. Cursor treats exit-2 as a block. + +--- + +## Masking convention used here + +| Field | Real value contains | Masked as | +|---|---|---| +| `session_id`, `conversation_id` | Real UUIDs | `00000000-0000-0000-0000-000000000001` | +| `generation_id`, `tool_use_id` | Real UUIDs | `00000000-0000-0000-0000-00000000000X` / `tool_` | +| `cursor_version` | Real version (e.g. `3.7.21`) | `3.7.x` | +| `workspace_roots`, `cwd` | Real workspace path | `/path/to/workspace` | +| `transcript_path` | Real absolute path | `/Users//.cursor/...` | +| `user_email` | Real user identity | `` | +| `prompt`, `command` | Real content (sometimes preserved when benign) | `` / preserved | + +No real session data is committed to this repo. diff --git a/adapters/cursor/tests/live_verification.md b/adapters/cursor/tests/live_verification.md new file mode 100644 index 0000000..3b03a0c --- /dev/null +++ b/adapters/cursor/tests/live_verification.md @@ -0,0 +1,76 @@ +# Cursor live verification + +Cursor is a desktop application with no documented headless mode, so the live test cannot run in CI. It can be reproduced manually by a reviewer with Cursor installed. + +## Status: ✅ Verified via manual reproduction + +The procedure below has been run end-to-end and produced the expected outcomes (5+ events flowed from a real Cursor session through the adapter to the example Guardian, all hooks routed correctly, zero adapter errors). Real captured payloads are not committed to the repo because Cursor's hook events contain session-identifying fields (workspace path, conversation id, user email). + +## Procedure + +```bash +# 1. Start the example Guardian +python3 ../../example-guardian/example_guardian.py --port 8787 + +# 2. In a new shell, set up a test project with the adapter wired in +mkdir -p /tmp/acs-cursor-live/.cursor +cat > /tmp/acs-cursor-live/.cursor/hooks.json <<'EOF' +{ + "version": 1, + "hooks": { + "sessionStart": [ + { "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py sessionStart" } + ], + "beforeSubmitPrompt": [ + { "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py beforeSubmitPrompt" } + ], + "preToolUse": [ + { "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py preToolUse" } + ], + "postToolUse": [ + { "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py postToolUse" } + ], + "beforeShellExecution": [ + { "command": "ACS_GUARDIAN_URL=http://127.0.0.1:8787/acs python3 /path/to/acs_adapter.py beforeShellExecution" } + ] + } +} +EOF + +# 3. Open the project in Cursor and prompt the agent to do something +# that triggers tool calls (a benign Grep or a Shell command works). + +# 4. Observe the Guardian's stderr for hook events: +# [guardian] steps/sessionStart session= step= +# [guardian] steps/userMessage session= step= +# [guardian] steps/toolCallRequest session= step= +# [guardian] steps/toolCallResult session= step= +``` + +## Expected outcomes + +A benign prompt that triggers tool use should produce, in order, on the Guardian's stderr: + +1. `steps/sessionStart` (Cursor's `sessionStart` event) +2. `steps/userMessage` (Cursor's `beforeSubmitPrompt`) +3. `steps/toolCallRequest` (Cursor's `preToolUse` for the first tool) +4. `steps/toolCallResult` (Cursor's `postToolUse`) +5. Additional `toolCallRequest` / `toolCallResult` pairs for each subsequent tool the agent invokes + +The Cursor UI should show the agent's tool calls proceeding normally; no policy block messages because the example Guardian's policy only denies destructive Bash patterns and writes to system paths. + +## Deny-path verification + +To exercise the deny path, prompt the Cursor agent to run a command matching the example Guardian's destructive regex against a clearly nonexistent target (something the agent has no real reason to run). The Guardian should respond with `deny`; the adapter should emit `{"permission": "deny", "user_message": "destructive Bash pattern..."}`; Cursor should surface the block in its UI and not execute the command. + +## Cursor event schema notes + +While running the reproduction, the Cursor events sent to the adapter include several fields beyond what the public `create-hook` skill documentation specifies. These are handled by the adapter's existing fallback logic without modification: + +- Both `session_id` and `conversation_id` are present (the adapter prefers `session_id`) +- `generation_id`, `model`, `composer_mode`, `cursor_version` +- `workspace_roots`, `transcript_path`, `user_email` (deployment-specific, the adapter does not forward these to the Guardian by default) +- For tools: `tool_use_id`, `duration` +- For Shell: `command`, `cwd`, `sandbox` + +If your Guardian wants to incorporate any of these into policy decisions, extend `acs_adapter.py`'s `build_payload` to include the relevant fields. diff --git a/adapters/cursor/tests/test_adapter.py b/adapters/cursor/tests/test_adapter.py new file mode 100644 index 0000000..110b434 --- /dev/null +++ b/adapters/cursor/tests/test_adapter.py @@ -0,0 +1,195 @@ +""" +End-to-end tests for the Cursor adapter, using the Claude Code example +Guardian. Cursor's hook schema is taken from the create-hook skill that +ships with Cursor. + +Live verification status: unit-tested only. Cursor is a desktop app +that does not have a documented headless mode equivalent to Claude +Code's `--print`, so a live fire-through from Cursor is left as a +manual verification step for a reviewer with Cursor installed. +""" +from __future__ import annotations + +import json +import os +import socket +import subprocess +import sys +import time +import unittest +from pathlib import Path + + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE.parent +ADAPTER = ADAPTER_DIR / "acs_adapter.py" +# Shared example Guardian (same ACS shape across all adapters) +GUARDIAN = ADAPTER_DIR.parent / "example-guardian" / "example_guardian.py" + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import free_port as _find_free_port, wait_port as _wait # noqa: E402 + + +class CursorAdapter(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.port = _find_free_port() + env = os.environ.copy(); env["ACS_DEV_MODE"] = "1"; env.pop("ACS_HMAC_SECRET", None); env.pop("ACS_HMAC_SECRET_FILE", None) + cls.guardian_proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian_proc.terminate() + try: + cls.guardian_proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.guardian_proc.kill() + + def _run(self, event_name: str, event: dict, env_overrides: dict | None = None) -> tuple[int, str, str]: + env = os.environ.copy() + env["ACS_GUARDIAN_URL"] = f"http://127.0.0.1:{self.port}/acs" + if env_overrides: + env.update(env_overrides) + proc = subprocess.run( + [sys.executable, str(ADAPTER), event_name], + input=json.dumps(event), + capture_output=True, + text=True, + env=env, + timeout=10, + ) + return proc.returncode, proc.stdout.strip(), proc.stderr.strip() + + # ----- preToolUse: allow path ----- + def test_pre_tool_safe_read_allows(self) -> None: + rc, out, err = self._run("preToolUse", { + "session_id": "cur-1", "tool_name": "Read", + "tool_input": {"file_path": "/tmp/x"}, + }) + self.assertEqual(rc, 0, err) + payload = json.loads(out) + self.assertEqual(payload["permission"], "allow") + + def test_pre_tool_safe_bash_allows(self) -> None: + rc, out, _ = self._run("preToolUse", { + "session_id": "cur-2", "tool_name": "Bash", + "tool_input": {"command": "ls -la"}, + }) + self.assertEqual(rc, 0) + self.assertEqual(json.loads(out)["permission"], "allow") + + # ----- preToolUse: deny path ----- + def test_pre_tool_destructive_bash_denies(self) -> None: + rc, out, _ = self._run("preToolUse", { + "session_id": "cur-3", "tool_name": "Bash", + "tool_input": {"command": "rm -rf /home/user"}, + }) + self.assertEqual(rc, 0) + payload = json.loads(out) + self.assertEqual(payload["permission"], "deny") + self.assertIn("destructive", payload["user_message"].lower()) + + def test_pre_tool_write_to_protected_path_denies(self) -> None: + rc, out, _ = self._run("preToolUse", { + "session_id": "cur-4", "tool_name": "Write", + "tool_input": {"file_path": "/etc/passwd", "content": "x"}, + }) + self.assertEqual(rc, 0) + self.assertEqual(json.loads(out)["permission"], "deny") + + # ----- beforeShellExecution ----- + def test_before_shell_safe(self) -> None: + rc, out, _ = self._run("beforeShellExecution", { + "session_id": "cur-5", "command": "ls", + }) + self.assertEqual(rc, 0) + self.assertEqual(json.loads(out)["permission"], "allow") + + def test_before_shell_destructive_denies(self) -> None: + rc, out, _ = self._run("beforeShellExecution", { + "session_id": "cur-6", "command": "rm -rf /home/x", + }) + self.assertEqual(rc, 0) + self.assertEqual(json.loads(out)["permission"], "deny") + + # ----- subagentStart ----- + def test_subagent_start_allow(self) -> None: + rc, out, _ = self._run("subagentStart", { + "session_id": "cur-7", "subagent_type": "explore", + }) + self.assertEqual(rc, 0) + # session start variant on the Guardian -> allow; subagentStart maps to subagentStart + payload = json.loads(out) if out else {} + self.assertEqual(payload.get("permission"), "allow") + + # ----- Lifecycle events: empty output ----- + def test_session_start_silent(self) -> None: + rc, out, _ = self._run("sessionStart", {"session_id": "cur-8"}) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + def test_after_agent_response_silent(self) -> None: + rc, out, _ = self._run("afterAgentResponse", { + "session_id": "cur-9", "response": "ok"}) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + # ----- Unknown event ----- + def test_unmapped_event_silent(self) -> None: + rc, out, _ = self._run("someFutureCursorEvent", {"session_id": "x"}) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + # ----- Fail posture ----- + def test_guardian_unreachable_default_deny_on_permission_event(self) -> None: + rc, out, err = self._run("preToolUse", + {"session_id": "cur-10", "tool_name": "Read", + "tool_input": {"file_path": "/tmp/x"}}, + env_overrides={"ACS_GUARDIAN_URL": "http://127.0.0.1:1/dead", + "ACS_DEFAULT_DENY": "1", + "ACS_HANDSHAKE": "0"}, + ) + self.assertEqual(rc, 0, err) + payload = json.loads(out) + self.assertEqual(payload["permission"], "deny") + self.assertIn("decision-failure", payload["user_message"].lower()) + self.assertIn("ACS_AUDIT", err) + self.assertIn("decision_failure_fail_closed", err) + + def test_guardian_unreachable_fail_open_default_is_audit(self) -> None: + """§6.4 spec default: fail-open with audit event.""" + rc, out, err = self._run("preToolUse", + {"session_id": "cur-11", "tool_name": "Read", + "tool_input": {"file_path": "/tmp/x"}}, + env_overrides={"ACS_GUARDIAN_URL": "http://127.0.0.1:1/dead", + "ACS_HANDSHAKE": "0"}, + ) + self.assertEqual(rc, 0) + self.assertEqual(out, "") + self.assertIn("ACS_AUDIT", err, "fail-open MUST emit an audit event per §6.4") + self.assertIn("fail_open_bypass", err) + + def test_before_submit_prompt_block_via_exit_code(self) -> None: + """beforeSubmitPrompt blocks via exit code 2, not stdout (fail-closed mode). + Audit log carries cause=transport_failure since the Guardian is unreachable.""" + rc, _, err = self._run("beforeSubmitPrompt", + {"session_id": "cur-12", "prompt": "anything"}, + env_overrides={"ACS_GUARDIAN_URL": "http://127.0.0.1:1/dead", + "ACS_DEFAULT_DENY": "1", + "ACS_HANDSHAKE": "0"}, + ) + self.assertEqual(rc, 2) + self.assertIn("prompt blocked", err.lower()) + self.assertIn("ACS_AUDIT", err) + self.assertIn("transport_failure", err, + "audit event must carry cause=transport_failure when Guardian unreachable") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/cursor/tests/test_envelope_schema.py b/adapters/cursor/tests/test_envelope_schema.py new file mode 100644 index 0000000..8514112 --- /dev/null +++ b/adapters/cursor/tests/test_envelope_schema.py @@ -0,0 +1,259 @@ +""" +Spec-validation tests for the Cursor adapter. + +Validates every adapter-emitted envelope against the canonical v0.1.0 +`request-envelope.json`, and every per-hook payload against its +corresponding `hooks/.json` schema. Fails the moment the +adapter's wire format drifts from the spec. +""" +from __future__ import annotations + +import json +import os +import sys +import unittest +from pathlib import Path + +from jsonschema import Draft202012Validator +from jsonschema.validators import RefResolver + + +SPEC_DIR_DEFAULT = Path("/tmp/acs-spec-source/specification/v0.1.0") +SPEC_DIR = Path(os.environ.get("ACS_SPEC_DIR", str(SPEC_DIR_DEFAULT))) + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE.parent +sys.path.insert(0, str(ADAPTER_DIR)) + +import acs_adapter # noqa: E402 + + +def _load_schema(name: str) -> dict: + with open(SPEC_DIR / name) as f: + return json.load(f) + + +def _validate(payload: dict, schema_name: str) -> list: + schema = _load_schema(schema_name) + resolver = RefResolver( + base_uri=(SPEC_DIR.as_uri() + "/" + schema_name), + referrer=schema, + ) + validator = Draft202012Validator( + schema, resolver=resolver, + format_checker=Draft202012Validator.FORMAT_CHECKER, + ) + return [ + f"{'.'.join(str(p) for p in err.absolute_path) or ''}: {err.message}" + for err in validator.iter_errors(payload) + ] + + +SESSION_UUID = "00000000-0000-0000-0000-000000000001" + +# (event_name, payload schema, fixture builder) +HOOK_CASES = [ + ("preToolUse", "hooks/tool-call-request.json", { + "session_id": SESSION_UUID, + "workspace_path": "/tmp/workspace", + "tool_name": "edit_file", + "tool_input": {"file_path": "/tmp/x.py", "patch": "..."}, + }), + ("postToolUse", "hooks/tool-call-result.json", { + "session_id": SESSION_UUID, + "workspace_path": "/tmp/workspace", + "tool_name": "edit_file", + "tool_input": {"file_path": "/tmp/x.py"}, + "tool_output": "patched 1 file", + }), + ("postToolUseFailure", "hooks/tool-call-result.json", { + "session_id": SESSION_UUID, + "tool_name": "edit_file", + "tool_input": {"file_path": "/tmp/x.py"}, + "tool_output": "error: file not found", + }), + ("beforeShellExecution", "hooks/tool-call-request.json", { + "session_id": SESSION_UUID, + "command": "ls -la", + }), + ("afterShellExecution", "hooks/tool-call-result.json", { + "session_id": SESSION_UUID, + "command": "ls -la", + "output": "total 0", + "exit_code": 0, + }), + ("beforeMCPExecution", "hooks/tool-call-request.json", { + "session_id": SESSION_UUID, + "mcp_server": "linear", + "mcp_tool": "list_issues", + "tool_input": {"team": "ACS"}, + }), + ("afterMCPExecution", "hooks/tool-call-result.json", { + "session_id": SESSION_UUID, + "mcp_server": "linear", + "mcp_tool": "list_issues", + "tool_output": [{"id": "ACS-1"}], + }), + ("afterFileEdit", "hooks/tool-call-result.json", { + "session_id": SESSION_UUID, + "file_path": "/tmp/x.py", + }), + ("afterTabFileEdit", "hooks/tool-call-result.json", { + "session_id": SESSION_UUID, + "file_path": "/tmp/x.py", + }), + ("beforeSubmitPrompt", "hooks/user-message.json", { + "session_id": SESSION_UUID, + "prompt": "list open PRs", + }), + ("afterAgentResponse", "hooks/agent-response.json", { + "session_id": SESSION_UUID, + "response": "done", + }), + ("afterAgentThought", "hooks/agent-response.json", { + "session_id": SESSION_UUID, + "thought": "thinking about it", + }), + ("sessionStart", "hooks/session-start.json", { + "session_id": SESSION_UUID, + "workspace_path": "/tmp/workspace", + }), + ("sessionEnd", "hooks/session-end.json", { + "session_id": SESSION_UUID, + "reason": "completed", + }), + ("stop", "hooks/session-end.json", { + "session_id": SESSION_UUID, + }), + ("subagentStart", "hooks/subagent-start.json", { + "session_id": SESSION_UUID, + "subagent_id": "sub-1", + "subagent_type": "researcher", + }), + # subagentStop intentionally NOT in HOOK_MAP — `final_chain_hash` is + # genuinely unknowable from Cursor (no chain on its side). Documented + # in the Cursor README per-hook honesty table. + ("preCompact", "hooks/pre-compact.json", { + "session_id": SESSION_UUID, + "trigger": "size_threshold", + }), +] + + +class SpecValidationSetUp(unittest.TestCase): + def setUp(self) -> None: + if not SPEC_DIR.exists(): + self.fail( + f"Canonical spec schemas not found at {SPEC_DIR}. " + "Clone Agent-Control-Standard/ACS and set ACS_SPEC_DIR. " + "Spec validation is non-negotiable; this is not a skip." + ) + + +class EnvelopeMatchesV010Schema(SpecValidationSetUp): + pass + + +def _make_envelope_test(event_name, _schema, fixture): + def test(self): + envelope = acs_adapter.build_request(event_name, fixture) + errors = _validate(envelope, "request-envelope.json") + self.assertEqual(errors, [], + f"{event_name} envelope FAILS request-envelope.json:\n - " + + "\n - ".join(errors)) + test.__name__ = f"test_envelope_{event_name}" + return test + + +class PayloadMatchesHookSchema(SpecValidationSetUp): + pass + + +def _make_payload_test(event_name, schema_name, fixture): + def test(self): + envelope = acs_adapter.build_request(event_name, fixture) + payload = envelope.get("params", {}).get("payload") + self.assertIsNotNone( + payload, + f"{event_name}: envelope missing params.payload " + f"(got params keys: {list(envelope.get('params', {}).keys())})", + ) + errors = _validate(payload, schema_name) + self.assertEqual(errors, [], + f"{event_name} payload FAILS {schema_name}:\n - " + + "\n - ".join(errors)) + test.__name__ = f"test_payload_{event_name}" + return test + + +for _event_name, _schema, _fixture in HOOK_CASES: + setattr(EnvelopeMatchesV010Schema, f"test_envelope_{_event_name}", + _make_envelope_test(_event_name, _schema, _fixture)) + setattr(PayloadMatchesHookSchema, f"test_payload_{_event_name}", + _make_payload_test(_event_name, _schema, _fixture)) + + +class UuidCoercionForNonUuidCursorIds(SpecValidationSetUp): + """Cursor's real conversation_id is not always a UUID — `conv-abc123`, + `chat_xyz`, etc. The adapter MUST coerce it to a valid UUID via uuid5 + before emitting (request-envelope.json:66 requires `metadata.session_id` + to be `format: "uuid"`). Without coercion, format-checker validation + fails. These fixtures exercise that path — the canonical-UUID fixtures + in HOOK_CASES would pass through unchanged and miss the coercion bug. + """ + + NON_UUID_INPUTS = [ + "conv-abc123def456", # Cursor-style conversation id + "chat_2026_session_xyz", # underscore-style + "test-cc-session", # the string the round-trip tests use + "", # empty — adapter should refuse, see below + ] + + def test_non_uuid_conversation_id_coerced(self) -> None: + """A non-UUID conversation_id MUST come out as a valid UUID in + metadata.session_id — otherwise envelope validation fails.""" + import re + UUID_RE = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-" + r"[0-9a-f]{4}-[0-9a-f]{12}$") + for non_uuid in self.NON_UUID_INPUTS: + if not non_uuid: + # Empty session_id: adapter refuses to build the envelope at all. + env = acs_adapter.build_request("preToolUse", + {"session_id": non_uuid, "tool_name": "Read", + "tool_input": {"file_path": "/tmp/x"}}) + self.assertEqual(env, {}, + f"adapter must refuse to build envelope for empty session_id; got {env}") + continue + + env = acs_adapter.build_request("preToolUse", + {"session_id": non_uuid, "tool_name": "Read", + "tool_input": {"file_path": "/tmp/x"}}) + session_id = env["params"]["metadata"]["session_id"] + self.assertTrue(UUID_RE.match(session_id), + f"adapter emitted non-UUID session_id {session_id!r} " + f"for input {non_uuid!r}; format-checker would reject") + # Also: the canonical schema (with format_checker) must accept it + errors = _validate(env, "request-envelope.json") + self.assertEqual(errors, [], + f"envelope for non-UUID input {non_uuid!r} fails canonical schema:\n - " + + "\n - ".join(errors)) + + def test_uuid_coercion_is_deterministic(self) -> None: + """The same non-UUID input MUST always coerce to the same UUID, + so subagentStart and a later subagentStop can both reference the + same subagent across hooks.""" + env1 = acs_adapter.build_request("preToolUse", + {"session_id": "conv-stable-id", "tool_name": "Read", + "tool_input": {"file_path": "/tmp/x"}}) + env2 = acs_adapter.build_request("preToolUse", + {"session_id": "conv-stable-id", "tool_name": "Read", + "tool_input": {"file_path": "/tmp/y"}}) + self.assertEqual(env1["params"]["metadata"]["session_id"], + env2["params"]["metadata"]["session_id"], + "uuid5 coercion must be deterministic — different " + "UUIDs for the same input means cross-hook correlation " + "is broken") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/cursor/tests/test_live.py b/adapters/cursor/tests/test_live.py new file mode 100644 index 0000000..31297d0 --- /dev/null +++ b/adapters/cursor/tests/test_live.py @@ -0,0 +1,41 @@ +""" +Live integration test for Cursor. + +Cursor is a desktop application with no documented headless mode, so the +live test cannot run in CI. The manual reproduction procedure is in +`live_verification.md` in this directory. + +This file is intentionally skipped in automated test runs. It exists to +keep the file naming identical across all three adapters +(`test_live.py`), and to serve as a pointer to the manual procedure. +""" +from __future__ import annotations + +import unittest +from pathlib import Path + + +HERE = Path(__file__).resolve().parent +VERIFICATION_DOC = HERE / "live_verification.md" + + +class LiveCursorPlaceholder(unittest.TestCase): + @unittest.skip("Cursor live verification is a manual procedure; see live_verification.md") + def test_run_manual_procedure(self) -> None: + """Open Cursor on a project with the example hooks.json. + + See `tests/live_verification.md` for the full procedure. + + Verifies: + - The example Guardian receives sessionStart, beforeSubmitPrompt, + preToolUse, postToolUse, beforeShellExecution, afterShellExecution + events when a benign agent prompt triggers tool use. + - The agent's tool calls are gated by the Guardian's policy. + - Adapter writes zero errors to its stderr log. + """ + self.assertTrue(VERIFICATION_DOC.exists(), + f"live verification doc missing: {VERIFICATION_DOC}") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/cursor/wire.py b/adapters/cursor/wire.py new file mode 100644 index 0000000..0d710ac --- /dev/null +++ b/adapters/cursor/wire.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +""" +Wire (or unwire) the Cursor ACS adapter into a hooks.json. + +Operator-driven by design: you supply the deployment-specific values +(Guardian URL, secret file location), this tool computes the exact +hooks entries and either prints them (dry-run, the default) or writes +them with a timestamped backup of the original file. + +Default mode is dry-run — nothing on disk changes until you pass +`--write`. The dry-run output includes a unified diff of what would +change, so you see the exact edit before approving it. + +Cursor reads from two locations (Cursor docs): + - `/.cursor/hooks.json` — project-level (per-workspace) + - `~/.cursor/hooks.json` — user-level (global) + +When both exist, Cursor's project-level entries take precedence. +This CLI defaults to the user-level path; pass --settings for project- +level wiring. + +Examples +======== + +# 1. Preview what would be wired into ~/.cursor/hooks.json +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key + +# 2. Same, but actually write (with backup at ~/.cursor/hooks.json.bak.) +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key \\ + --write + +# 3. Project-level wiring +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key \\ + --settings=./.cursor/hooks.json \\ + --write + +# 4. Subset of hooks +python3 wire.py \\ + --guardian-url=http://127.0.0.1:8787/acs \\ + --secret-file=~/.acs/hmac.key \\ + --hooks=preToolUse,postToolUse + +# 5. Remove ACS wiring (preserves any other hooks you have) +python3 wire.py --unwire --write + +What this tool does NOT do +========================== + + - Generate the HMAC secret. Run: + openssl rand -hex 32 > ~/.acs/hmac.key && chmod 600 ~/.acs/hmac.key + - Start the Guardian. Run it yourself. + - Validate that the Guardian is reachable. + - Choose any of the deployment-specific values. All explicit flags. +""" +from __future__ import annotations + +import argparse +import datetime +import difflib +import json +import os +import shutil +import stat +import sys +from pathlib import Path +from typing import Any + + +HERE = Path(__file__).resolve().parent +DEFAULT_ADAPTER_PATH = HERE / "acs_adapter.py" + +# ACS-Core minimum equivalence in Cursor's hook vocabulary. Maps to: +# sessionStart — ACS sessionStart +# beforeSubmitPrompt — ACS userMessage +# preToolUse — ACS toolCallRequest +# postToolUse — ACS toolCallResult +# afterAgentResponse — ACS agentResponse +# sessionEnd — ACS sessionEnd +ACS_CORE_HOOKS = [ + "sessionStart", + "beforeSubmitPrompt", + "preToolUse", + "postToolUse", + "afterAgentResponse", + "sessionEnd", +] + +# Hooks whose ACS verdict ACTUALLY GATES the action — the framework +# blocks the agent from proceeding until the adapter returns a verdict. +# Silent fail-open on these is a security hole, so we set BOTH: +# - Cursor's native `failClosed: true` (blocks if adapter exits non-zero) +# - our adapter's ACS_DEFAULT_DENY=1 env var (deny if Guardian unreachable +# or returns an unknown disposition) +# Defense in depth: two independent mechanisms that both must fail open +# for a gate to leak. +# +# Observational hooks (postToolUse, afterAgentResponse, sessionStart, +# sessionEnd) get fail-open per §6.4 spec default. +GATE_HOOKS = frozenset({ + "preToolUse", "beforeSubmitPrompt", + "beforeShellExecution", "beforeMCPExecution", "subagentStart", +}) + +# Marker embedded in our hook commands so we can identify "is this a +# hook entry we wired?" on unwire, without parsing argument shapes. +WIRE_MARKER = "# acs-adapter-wired" + + +# ────────────────────────────────────────────────────────────────────── +# Command-line construction +# ────────────────────────────────────────────────────────────────────── + +def build_command(*, adapter_path: Path, event_name: str, + guardian_url: str, + secret_file: str | None, + secret_env: str | None, + default_deny: bool, + host_allowlist: str | None, + python_bin: str) -> str: + """Compose the hook command string used inside hooks.json. + + Cursor passes the event name as argv[1], so the command line ends + with `python3 /path/to/acs_adapter.py `. + """ + env_pairs: list[str] = [f"ACS_GUARDIAN_URL={guardian_url}"] + if secret_file: + env_pairs.append(f"ACS_HMAC_SECRET_FILE={_expand(secret_file)}") + elif secret_env: + env_pairs.append(f"ACS_HMAC_SECRET={secret_env}") + if default_deny: + env_pairs.append("ACS_DEFAULT_DENY=1") + if host_allowlist: + env_pairs.append(f"ACS_GUARDIAN_HOST_ALLOWLIST={host_allowlist}") + env_prefix = " ".join(env_pairs) + return f"{env_prefix} {python_bin} {adapter_path} {event_name} {WIRE_MARKER}" + + +def build_hook_entry(command: str, *, fail_closed: bool) -> dict: + """The Cursor hook-entry shape under each hook type. + + Includes `failClosed: true` for gate hooks (Cursor's native fail- + posture mechanism — separate from our ACS_DEFAULT_DENY env var). + Together they cover both failure modes (adapter crashes vs. + Guardian unreachable / unknown verdict).""" + entry = {"command": command} + if fail_closed: + entry["failClosed"] = True + return entry + + +# ────────────────────────────────────────────────────────────────────── +# hooks.json merge +# ────────────────────────────────────────────────────────────────────── + +def load_settings(path: Path) -> dict: + if not path.exists(): + return {} + try: + return json.loads(path.read_text()) + except json.JSONDecodeError as e: + raise SystemExit(f"FATAL: {path} is not valid JSON ({e}). Fix or remove first.") + + +def merge_wire(existing: dict, hook_names: list[str], + entries_by_hook: dict[str, dict]) -> dict: + """Return a new hooks.json dict with ACS wiring merged in. + + entries_by_hook is {event_name: hook_entry_dict} so each hook can + have its own deny posture / failClosed setting. + + Re-entrancy: operates at the inner-hooks list level so re-wiring + REPLACES our entry without touching the user's own entries under + the same event. + """ + out = json.loads(json.dumps(existing)) # deep copy + out.setdefault("version", 1) + hooks = out.setdefault("hooks", {}) + for name in hook_names: + entry = entries_by_hook[name] + existing_list = hooks.get(name, []) + # Strip our previous entry (carries WIRE_MARKER) and append the new one. + # Non-ACS entries left untouched. + kept = [e for e in existing_list + if WIRE_MARKER not in (e.get("command") or "")] + kept.append(entry) + hooks[name] = kept + return out + + +def merge_unwire(existing: dict, hook_names: list[str]) -> dict: + """Strip ACS-wired entries from the given hook types. + + Preserves the user's own non-ACS entries under the same events. + Empty event lists are removed; empty hooks dict is removed. + """ + out = json.loads(json.dumps(existing)) + hooks = out.get("hooks", {}) + for name in list(hooks.keys()): + if name not in hook_names: + continue + entries = hooks.get(name) or [] + kept = [e for e in entries + if WIRE_MARKER not in (e.get("command") or "")] + if kept: + hooks[name] = kept + else: + hooks.pop(name, None) + if not hooks: + out.pop("hooks", None) + return out + + +# ────────────────────────────────────────────────────────────────────── +# Diff + atomic write +# ────────────────────────────────────────────────────────────────────── + +def render(settings: dict) -> str: + return json.dumps(settings, indent=2, sort_keys=False) + "\n" + + +def render_diff(before: dict, after: dict, label: str) -> str: + a = render(before).splitlines(keepends=True) + b = render(after).splitlines(keepends=True) + return "".join(difflib.unified_diff(a, b, + fromfile=f"{label} (current)", + tofile=f"{label} (proposed)", + n=3)) + + +def write_atomically(path: Path, content: str) -> Path: + """Write content to path atomically, with a timestamped backup of any + existing file. Returns the backup path (or None if no original existed). + """ + backup = None + if path.exists(): + ts = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") + backup = path.with_suffix(path.suffix + f".bak.{ts}") + shutil.copy2(path, backup) + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(content) + os.replace(tmp, path) + return backup + + +# ────────────────────────────────────────────────────────────────────── +# Validation helpers +# ────────────────────────────────────────────────────────────────────── + +def _expand(p: str) -> Path: + return Path(os.path.expanduser(os.path.expandvars(p))) + + +def validate_inputs(args: argparse.Namespace) -> list[str]: + """Return a list of human-readable warnings for the operator. None + of these block the operation — they're informational nudges.""" + warnings: list[str] = [] + + if not args.unwire: + if args.secret_file: + sf = _expand(args.secret_file) + if sf.exists(): + mode = stat.S_IMODE(sf.stat().st_mode) + if mode & 0o077: + warnings.append( + f"WARNING: {sf} is mode {oct(mode)} — the adapter " + f"will refuse to read it. Run: chmod 600 {sf}") + else: + warnings.append( + f"NOTE: secret file {sf} doesn't exist yet. Create with: " + f"openssl rand -hex 32 > {sf} && chmod 600 {sf}") + elif args.secret_env_inline: + warnings.append( + "WARNING: --secret-env-inline embeds the secret directly in " + "hooks.json (visible in `ps aux`). For production, prefer " + "--secret-file with a 0600 key file.") + else: + warnings.append( + "WARNING: no HMAC secret configured (neither --secret-file " + "nor --secret-env-inline). Adapter will run unsigned — Guardian " + "will reject every request unless it's also unconfigured " + "(ACS_DEV_MODE=1). ACS-Core baseline integrity (§10) " + "REQUIRES signing.") + + if not args.guardian_url.startswith(("http://", "https://")): + warnings.append( + "WARNING: Guardian URL must start with http:// or https://. " + "The adapter's URL allowlist will reject any other scheme.") + + if args.guardian_url.startswith("http://") and not ( + args.guardian_url.startswith("http://127.") + or args.guardian_url.startswith("http://localhost") + ): + warnings.append( + "WARNING: plaintext HTTP to a non-loopback Guardian. The " + "envelope is HMAC-signed (so unmodifiable) but the payload " + "is readable on the wire. Use https:// for production.") + + missing = set(ACS_CORE_HOOKS) - set(args.hooks) + if missing: + warnings.append( + f"NOTE: wiring a SUBSET of ACS-Core's minimum hooks. " + f"Missing: {sorted(missing)}. ACS-Core conformance requires " + f"all 6 ({', '.join(ACS_CORE_HOOKS)}).") + + return warnings + + +# ────────────────────────────────────────────────────────────────────── +# CLI +# ────────────────────────────────────────────────────────────────────── + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser( + description="Wire (or unwire) the Cursor ACS adapter into hooks.json.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__.split("Examples\n========")[1] if "Examples\n========" in __doc__ else "", + ) + p.add_argument("--guardian-url", + help="Guardian endpoint (http:// or https://). Required unless --unwire.") + p.add_argument("--secret-file", + help="Path to the HMAC secret file (preferred). The adapter reads it lazily.") + p.add_argument("--secret-env-inline", metavar="HEX", + help="HMAC secret inlined into hooks.json env (visible in `ps aux`; " + "dev only). Use --secret-file for production.") + p.add_argument("--settings", + default="~/.cursor/hooks.json", + help="Path to Cursor hooks.json (default: ~/.cursor/hooks.json — " + "user-level). Use ./.cursor/hooks.json for project-level.") + p.add_argument("--adapter", + default=str(DEFAULT_ADAPTER_PATH), + help=f"Absolute path to acs_adapter.py (default: {DEFAULT_ADAPTER_PATH}).") + p.add_argument("--python-bin", + default="python3", + help="Python interpreter the hook command uses (default: python3 from PATH).") + p.add_argument("--hooks", default=",".join(ACS_CORE_HOOKS), + help=f"Comma-separated hook names to wire (default: ACS-Core minimum: " + f"{','.join(ACS_CORE_HOOKS)}).") + posture_group = p.add_mutually_exclusive_group() + posture_group.add_argument("--default-deny", action="store_true", + help="Force fail-CLOSED on EVERY wired hook (sets BOTH Cursor's " + "native failClosed AND our ACS_DEFAULT_DENY=1). Default behavior: " + "fail-closed only on gate hooks " + f"({', '.join(sorted(GATE_HOOKS & set(ACS_CORE_HOOKS)))}).") + posture_group.add_argument("--all-fail-open", action="store_true", + help="Force fail-OPEN on EVERY wired hook, including gates. NOT " + "RECOMMENDED — strict §6.4 default but a Guardian outage on a " + "gate hook lets the action run unguarded.") + p.add_argument("--host-allowlist", default=None, + help="Comma-separated hostnames the adapter will accept as Guardian URLs.") + p.add_argument("--unwire", action="store_true", + help="Remove any ACS-wired hooks (preserves non-ACS entries).") + p.add_argument("--write", action="store_true", + help="Actually write the changes to hooks.json (with timestamped backup). " + "Without this flag, this tool only prints what it WOULD do.") + + args = p.parse_args(argv) + + if not args.unwire and not args.guardian_url: + p.error("--guardian-url is required (unless --unwire)") + if args.secret_file and args.secret_env_inline: + p.error("provide --secret-file OR --secret-env-inline, not both") + + settings_path = _expand(args.settings) + adapter_path = _expand(args.adapter) + if not args.unwire and not adapter_path.exists(): + p.error(f"adapter not found at {adapter_path}; pass --adapter to override") + + hook_names = [h.strip() for h in args.hooks.split(",") if h.strip()] + args.hooks = hook_names # for the warning function + + existing = load_settings(settings_path) + if args.unwire: + new = merge_unwire( + existing, + hook_names if args.hooks else ACS_CORE_HOOKS + [ + "stop", "beforeShellExecution", "afterShellExecution", + "beforeMCPExecution", "afterMCPExecution", + "afterFileEdit", "afterTabFileEdit", + "subagentStart", "preCompact", + "postToolUseFailure", "afterAgentThought", + ]) + else: + # Build one entry per hook. Each gets the deny posture for its + # safety category: + # - --default-deny: ALL hooks fail-closed + # - --all-fail-open: ALL hooks fail-open + # - (default): gate hooks fail-closed, others fail-open + entries_by_hook: dict[str, dict] = {} + for hook in hook_names: + if args.default_deny: + hook_deny = True + elif args.all_fail_open: + hook_deny = False + else: + hook_deny = hook in GATE_HOOKS + cmd = build_command( + adapter_path=adapter_path, + event_name=hook, + guardian_url=args.guardian_url, + secret_file=args.secret_file, + secret_env=args.secret_env_inline, + default_deny=hook_deny, + host_allowlist=args.host_allowlist, + python_bin=args.python_bin, + ) + entries_by_hook[hook] = build_hook_entry(cmd, fail_closed=hook_deny) + new = merge_wire(existing, hook_names, entries_by_hook) + + warnings = validate_inputs(args) + for w in warnings: + print(f" ⚠ {w}", file=sys.stderr) + if warnings: + print(file=sys.stderr) + + if not args.unwire: + print("Per-hook fail posture:") + for hook in hook_names: + if args.default_deny: + posture = "fail-CLOSED (forced via --default-deny)" + elif args.all_fail_open: + posture = "fail-OPEN (forced via --all-fail-open)" + elif hook in GATE_HOOKS: + posture = "fail-CLOSED (gate hook default)" + else: + posture = "fail-OPEN (observational hook default)" + print(f" {hook:22s} → {posture}") + print() + + diff = render_diff(existing, new, label=str(settings_path)) + if not diff: + print(f"No change — {settings_path} already in the desired state.") + return 0 + + print("=" * 70) + print(f"Proposed change to {settings_path}") + print("=" * 70) + print(diff if diff else "(no diff)") + print("=" * 70) + + if not args.write: + print() + print("Dry-run only. To apply, re-run with --write.") + print("A timestamped backup of the original file will be created.") + return 0 + + backup = write_atomically(settings_path, render(new)) + print() + print(f"✓ wrote {settings_path}") + if backup: + print(f" backup at {backup}") + if not args.unwire: + print() + print("Next steps:") + print(" 1. Make sure the Guardian is running and reachable at " + f"{args.guardian_url}") + print(" 2. Restart Cursor — hooks.json is read at startup, not live") + print(" 3. Verify the wiring works:") + print(f" cd {HERE}") + print(" python3 e2e_check.py") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/example-guardian/README.md b/adapters/example-guardian/README.md new file mode 100644 index 0000000..92d691a --- /dev/null +++ b/adapters/example-guardian/README.md @@ -0,0 +1,60 @@ +# Example Guardian (shared) + +A minimal, deterministic Guardian that all three reference adapters use for their integration tests and quick local demos. + +**This is a teaching artifact, not a production Guardian.** Real Guardians plug in OPA / Rego, Cedar, or a vendor's policy engine, and live behind whatever identity / mTLS / audit infrastructure the deployment runs. This one is a hundred lines of stdlib Python that responds to ACS JSON-RPC requests with hard-coded rules — enough to make every adapter's round-trip work end-to-end without external dependencies. + +## What it does + +HTTP server on `127.0.0.1:8787` (configurable via `--port`) that accepts JSON-RPC 2.0 POST requests at `/acs` and returns ACS decision envelopes. + +## Policy implemented + +| Hook method | Rule | +|---|---| +| `steps/toolCallRequest` with `tool.name in {"Bash", "Shell"}` and `command` matching a destructive regex (`rm -rf /...`, `mkfs`, `dd if=`, fork-bomb pattern, `> /dev/sda`) | `deny` with reasoning | +| `steps/toolCallRequest` with `tool.name == "Write"` and `file_path` under `/etc/` or `/usr/` | `deny` with reasoning | +| Any other `steps/toolCallRequest` | `allow` | +| `steps/sessionStart`, `steps/sessionEnd`, `steps/userMessage`, `steps/toolCallResult`, `steps/agentResponse`, `steps/preCompact`, `steps/postCompact`, `steps/subagentStart`, `steps/subagentStop`, `steps/knowledgeRetrieval`, `steps/memoryStore`, `steps/memoryContextRetrieval` | `allow` | +| Any other method | `deny` with `reasoning: "unknown method: ..."` | + +Every response carries a `chain_hash` field derived from `session_id + method + step_id` (SHA-256). Real Guardians would maintain a rolling chain across the session; this one computes a per-request hash so the adapter can observe the field shape. + +## Why it's shared + +All three reference adapters (`adapters/claude-code/`, `adapters/cursor/`, `adapters/nat/`) speak the same ACS JSON-RPC wire format. Their integration tests all need a Guardian that: + +- Accepts ACS JSON-RPC requests +- Returns deterministic decisions (so tests can assert specific outcomes) +- Logs to stderr so a human running it locally can see what's happening + +One shared Guardian satisfies all three. The previous arrangement (`adapters/claude-code/example_guardian.py`, imported by NAT and Cursor tests via `../../claude-code/`) was a smell. + +## How to run it + +```bash +# default: 127.0.0.1:8787 +python3 example_guardian.py + +# custom port +python3 example_guardian.py --port 8788 +``` + +The server logs every received request to stderr in one line: + +``` +[guardian] listening on 127.0.0.1:8787 +[guardian] steps/toolCallRequest session=abc-123 step=def4567 +``` + +## How to extend + +The policy is in the `evaluate(method, params)` function. To add a new rule, either add to the destructive regex, add a new branch in the toolCallRequest handler, or add a method to the allow-list. + +Production deployments replace this whole file with their actual policy engine. The wire shape (request format, response format) stays the same. + +## What this is NOT + +- **Not a production Guardian.** No identity verification, no authentication, no signed envelopes, no audit chain that survives a restart, no rate limiting. +- **Not the only Guardian.** Anyone can write one — the spec defines the wire contract, the implementation is the deployment's choice. +- **Not an SDK.** Just an HTTP server with hard-coded rules. If you want an SDK to build Guardians, that's a separate workstream. diff --git a/adapters/example-guardian/example_guardian.py b/adapters/example-guardian/example_guardian.py new file mode 100755 index 0000000..06dd9a6 --- /dev/null +++ b/adapters/example-guardian/example_guardian.py @@ -0,0 +1,756 @@ +#!/usr/bin/env python3 +""" +Minimal local Guardian for testing the reference adapters. + +Implements the ACS v0.1.0 spec features the adapters exercise: + +- Wire envelope per `request-envelope.json` / `response-envelope.json`. +- HMAC-SHA256 baseline signing (§10) with HKDF-derived per-session key. +- Rolling SHA-256 audit chain per §8.2 (`entry_hash = sha256(JCS(entry) || previous_hash)`). +- Replay rejection on duplicate `request_id` (§10.3, error -32005). +- Timestamp skew rejection (§10.3, error -32006). +- `handshake/hello` with ClientHello/ServerHello (§4). +- `system/ping` always returns allow, never enters the chain (§13). +- Subagent gating: blocks `Task` tool by default. +- Destructive-Bash regex + protected-system-path Write blocks. + +This is NOT a production Guardian. It is a teaching artifact and a test +substrate for the reference adapters. + +Wire format ground truth: + `specification/v0.1.0/request-envelope.json` + `specification/v0.1.0/response-envelope.json` + `specification/v0.1.0/handshake.json` + `specification/v0.1.0/hooks/*.json` + +Usage: + python3 example_guardian.py [--port 8787] + +Environment variables: + ACS_HMAC_SECRET / ACS_HMAC_SECRET_FILE + Shared secret for HMAC-SHA256 signing per §10. The + Guardian verifies every signed request and signs + every response. **The Guardian refuses to start + unless one of these is set, or `ACS_DEV_MODE=1`.** + File path is preferred for production (no exposure + in `ps aux`); use `chmod 600`. + Generate: `openssl rand -hex 32 > /etc/acs/hmac.key` + ACS_DEV_MODE "1" allows starting without a signing secret. Local + development only. ACS-Core baseline integrity (§10) + is not satisfied in dev mode. + ACS_SKEW_WINDOW_MS Timestamp skew tolerance (default 300_000 = 5 min). + ACS_ALLOW_SUBAGENT "1" allows the Task tool. Default "0" gates it. + ACS_GUARDIAN_STATE_DIR + Directory where per-session state (chain head + + seen request_ids) is persisted. Survives Guardian + restart so §10.3 replay protection isn't reset by + crashes / deploys / autoscaling. Defaults to + ~/.cache/acs-guardian-state/. Set to "" to disable + persistence (RAM-only; dev/test only — opens a + replay window across restarts). +""" +from __future__ import annotations + +import argparse +import errno +import fcntl +import hashlib +import http.server +import json +import os +import re +import socketserver +import sys +import threading +import time +from pathlib import Path +from typing import Any + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "_common")) +from acs_common import ( # noqa: E402 + ACS_VERSION, + DEFAULT_SKEW_WINDOW_MS, + DESTRUCTIVE_SCAN_MAX_LEN, + MAX_REQUEST_BODY_BYTES, + derive_session_key, + iso8601_now, + jcs_canonicalize, + load_hmac_secret, + parse_iso8601, + sign_envelope, + verify_signature, +) + +import datetime + +# Optional spec-schema validation. If jsonschema + a local clone of the +# canonical schemas (ACS_SPEC_DIR) are present, the Guardian validates +# every incoming envelope against request-envelope.json BEFORE policy +# evaluation — so malformed payloads from a buggy adapter or hostile +# input are rejected with INVALID_REQUEST instead of slipping into +# downstream code. +_SPEC_VALIDATION_AVAILABLE = False +_REQUEST_ENVELOPE_VALIDATOR = None +try: + from jsonschema import Draft202012Validator # type: ignore[import-not-found] + from jsonschema.validators import RefResolver # type: ignore[import-not-found] + _spec_dir_env = os.environ.get( + "ACS_SPEC_DIR", + "/tmp/acs-spec-source/specification/v0.1.0", + ) + _spec_dir = Path(_spec_dir_env) + _envelope_schema_path = _spec_dir / "request-envelope.json" + if _envelope_schema_path.exists(): + with open(_envelope_schema_path) as _f: + _schema_obj = json.load(_f) + _REQUEST_ENVELOPE_VALIDATOR = Draft202012Validator( + _schema_obj, + resolver=RefResolver( + base_uri=(_spec_dir.as_uri() + "/request-envelope.json"), + referrer=_schema_obj, + ), + format_checker=Draft202012Validator.FORMAT_CHECKER, + ) + _SPEC_VALIDATION_AVAILABLE = True +except ImportError: + pass + + +SKEW_WINDOW_MS = int(os.environ.get("ACS_SKEW_WINDOW_MS", str(DEFAULT_SKEW_WINDOW_MS))) +ALLOW_SUBAGENT = os.environ.get("ACS_ALLOW_SUBAGENT", "0") == "1" + + +def _hmac_secret() -> bytes: + """Re-read on every call so operators can rotate the secret without + restarting the Guardian (rotate the file under `ACS_HMAC_SECRET_FILE` + or update `ACS_HMAC_SECRET` and the next signature check picks it up). + The handshake's advertised `signature_algorithms_supported` reflects + the current value each time a ClientHello arrives.""" + return load_hmac_secret() + + +# ----- Destructive-Bash regex set ----- + +DESTRUCTIVE_BASH_PATTERNS: tuple[re.Pattern, ...] = ( + # Pattern 0: `rm` + a flag token that contains both r and f in any + # order, possibly with other letters anywhere (-rf, -fr, -rfv, + # -rfvi, -vrf, etc.), followed eventually by a path starting with + # `/`, `~`, or `$HOME`. The trailing `[a-zA-Z]*` after the second + # required flag letter is the bug-fix — without it, `-rfv` only + # matched `-rf` and then `\b` failed against `v`, letting `rm -rfv` + # slip through the policy. (CVE-class evasion: trivial single-letter + # extension defeats the regex.) + re.compile(r"\brm\s+(-[a-zA-Z]*r[a-zA-Z]*f[a-zA-Z]*|-[a-zA-Z]*f[a-zA-Z]*r[a-zA-Z]*|--recursive\s+--force|--force\s+--recursive)\b.*?\s+(/|~|\$HOME)", re.IGNORECASE), + re.compile(r"\brm\s+(-rf|-fr|--recursive\s+--force|--force\s+--recursive)\s+(/|~|\$HOME)(\s|$)", re.IGNORECASE), + re.compile(r"\brm\s+.*--no-preserve-root\b", re.IGNORECASE), + re.compile(r"\bmkfs(\.\w+)?\s+", re.IGNORECASE), + re.compile(r"\bdd\s+.*\bof=/dev/", re.IGNORECASE), + re.compile(r":\(\)\s*\{"), + re.compile(r">\s*/dev/(sd[a-z]|nvme|hd[a-z]|disk)", re.IGNORECASE), + re.compile(r"\bfind\s+(/|~|\$HOME)\b.*-delete\b", re.IGNORECASE), + re.compile(r"\bfind\s+(/|~|\$HOME)\b.*-exec\s+rm\b", re.IGNORECASE), + re.compile(r"\bchmod\s+(-R\s+)?[0-7]*7{2,}[0-7]*\s+(/etc|/usr|/bin|/sbin)", re.IGNORECASE), +) + +PROTECTED_PATH_PREFIXES: tuple[str, ...] = ("/etc/", "/usr/", "/bin/", "/sbin/", "/boot/") + +INFORMATIONAL_METHODS = { + "steps/sessionStart", "steps/sessionEnd", "steps/userMessage", + "steps/toolCallResult", "steps/agentResponse", + "steps/preCompact", "steps/postCompact", + "steps/subagentStart", "steps/subagentStop", + "steps/knowledgeRetrieval", "steps/memoryStore", "steps/memoryContextRetrieval", + "steps/turnStart", "steps/turnEnd", "steps/agentTrigger", +} + + +# ----- Per-session state (replay + chain), persisted across restarts ----- +# +# RAM-only state was a real production gap: a Guardian restart wiped the +# seen-request-id set, opening a replay window for every previously-sent +# envelope. §10.3 says Guardians MUST reject duplicates; that MUST +# doesn't pause for the duration of a deploy. Per-session state now +# persists to a small JSON file per session_id under ACS_GUARDIAN_STATE_DIR +# so the seen set + chain head survive process restarts. + +_STATE_DIR_ENV = os.environ.get( + "ACS_GUARDIAN_STATE_DIR", + os.path.join(os.path.expanduser("~"), ".cache", "acs-guardian-state"), +) +PERSIST_ENABLED = bool(_STATE_DIR_ENV) +STATE_DIR = Path(_STATE_DIR_ENV) if PERSIST_ENABLED else None + + +def _state_path(session_id: str) -> Path | None: + if not PERSIST_ENABLED: + return None + key = hashlib.sha256(session_id.encode()).hexdigest()[:16] + return STATE_DIR / f"{key}.json" + + +class SessionState: + """Holds the rolling chain head and replay protection per session_id. + + Persists to disk after every mutation so Guardian restart cannot + open a replay window. JSON file per session_id, mode 0600, in + STATE_DIR. Loading is best-effort: a corrupt file behaves like a + fresh session. + + seen_request_ids is a dict {request_id: timestamp_seconds} so old + entries can be evicted by `evict_old_request_ids` — without + eviction, long-running sessions accumulate UUIDs without bound. + """ + + def __init__(self, session_id: str = "") -> None: + self.session_id = session_id + self.previous_hash: str | None = None # None for the first entry (§8.1) + self.seen_request_ids: dict[str, float] = {} + self.seen_nonces: dict[str, float] = {} + self.lock = threading.Lock() + self._load() + + def _load(self) -> None: + path = _state_path(self.session_id) + if path is None or not path.exists(): + return + try: + # Backwards-compat: older state files stored seen_request_ids as a + # list. Treat any list entry as having timestamp 0 (will be evicted + # immediately if past the cutoff). + # Hold a shared (read) flock so we don't read a partially-written + # file from a concurrent persist() in another Guardian instance. + with open(path) as f: + try: + fcntl.flock(f.fileno(), fcntl.LOCK_SH) + except OSError: + pass + try: + data = json.load(f) + finally: + try: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + except OSError: + pass + self.previous_hash = data.get("previous_hash") + sr = data.get("seen_request_ids", {}) + self.seen_request_ids = sr if isinstance(sr, dict) else {x: 0.0 for x in sr} + sn = data.get("seen_nonces", {}) + self.seen_nonces = sn if isinstance(sn, dict) else {x: 0.0 for x in sn} + except (OSError, json.JSONDecodeError): + pass + + def persist(self) -> None: + """Atomically write the current state, with file-locked + merge-on-write to support multiple Guardian instances sharing a + STATE_DIR (HA deploys). Must be called with self.lock held. + + Algorithm: + 1. Take an exclusive flock on a sidecar `.lock` file. + 2. Re-read the on-disk state (another instance may have just + written it). + 3. Merge the in-memory state into the on-disk state — union + of seen_request_ids/nonces, max-by-length of previous_hash + (chain forks across instances are a separate problem). + 4. Atomically write the merged result. + 5. Release the flock. + """ + path = _state_path(self.session_id) + if path is None: + return + try: + STATE_DIR.mkdir(parents=True, exist_ok=True, mode=0o700) + try: + os.chmod(STATE_DIR, 0o700) + except OSError: + pass + lock_path = path.with_suffix(".lock") + lock_fd = os.open(str(lock_path), os.O_RDWR | os.O_CREAT, 0o600) + try: + try: + fcntl.flock(lock_fd, fcntl.LOCK_EX) + except OSError as e: + if e.errno != errno.ENOLCK: + raise + + # Re-read on-disk state for merge + merged_seen = dict(self.seen_request_ids) + merged_nonces = dict(self.seen_nonces) + merged_prev = self.previous_hash + if path.exists(): + try: + with open(path) as rf: + disk = json.load(rf) + disk_seen = disk.get("seen_request_ids") or {} + # Backwards-compat: tolerate list form from earlier versions + if isinstance(disk_seen, list): + disk_seen = {x: 0.0 for x in disk_seen} + for k, v in disk_seen.items(): + # Keep the EARLIEST timestamp (so eviction works correctly) + if k not in merged_seen or merged_seen[k] > v: + merged_seen[k] = v + disk_nonces = disk.get("seen_nonces") or {} + if isinstance(disk_nonces, list): + disk_nonces = {x: 0.0 for x in disk_nonces} + for k, v in disk_nonces.items(): + if k not in merged_nonces or merged_nonces[k] > v: + merged_nonces[k] = v + # Chain head: keep whichever exists (in single-Guardian + # mode both are identical; in HA mode, this is best-effort). + if not merged_prev and disk.get("previous_hash"): + merged_prev = disk["previous_hash"] + except (OSError, json.JSONDecodeError): + pass + + self.seen_request_ids = merged_seen + self.seen_nonces = merged_nonces + self.previous_hash = merged_prev + + tmp = path.with_suffix(".json.tmp") + fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as f: + json.dump({ + "previous_hash": self.previous_hash, + "seen_request_ids": self.seen_request_ids, + "seen_nonces": self.seen_nonces, + }, f) + os.replace(tmp, path) + finally: + try: + fcntl.flock(lock_fd, fcntl.LOCK_UN) + except OSError: + pass + os.close(lock_fd) + except OSError: + pass + + +def evict_old_request_ids(st: "SessionState") -> int: + """Drop request_id entries older than 2 × skew_window. + + Replay is impossible past the skew window — Guardian would reject + the request with TIMESTAMP_OUT_OF_WINDOW (-32006) before reaching + the replay check. We use 2 × skew as the cutoff for safety margin + against clock drift across processes. Caller must hold st.lock. + + Returns the number of entries evicted. + """ + cutoff = time.time() - 2 * (SKEW_WINDOW_MS / 1000.0) + old = [k for k, ts in st.seen_request_ids.items() if ts < cutoff] + for k in old: + del st.seen_request_ids[k] + return len(old) + + +class GuardianState: + """Process-global state. Sessions keyed by metadata.session_id.""" + + def __init__(self) -> None: + self.sessions: dict[str, SessionState] = {} + self.lock = threading.Lock() + + def get(self, session_id: str) -> SessionState: + with self.lock: + st = self.sessions.get(session_id) + if st is None: + st = SessionState(session_id=session_id) + self.sessions[session_id] = st + return st + + +STATE = GuardianState() + + +# ----- Helpers ----- + +def _matches_destructive_bash(cmd: str) -> str | re.Pattern | None: + """Returns: + None — safe (no destructive pattern matched) + re.Pattern — a destructive pattern matched + "too_large" — input exceeds the regex-scan cap; caller MUST treat + as suspicious (we don't know if it's destructive). + + The cap (DESTRUCTIVE_SCAN_MAX_LEN = 8 KiB) defends against regex + DoS via crafted huge commands. Real shell commands are tiny; + multi-KB strings are tunneled data or an attack. + """ + if len(cmd) > DESTRUCTIVE_SCAN_MAX_LEN: + return "too_large" + for pat in DESTRUCTIVE_BASH_PATTERNS: + if pat.search(cmd): + return pat + return None + + +def _unwrap_arguments(wrapped: dict[str, Any]) -> dict[str, Any]: + out: dict[str, Any] = {} + for k, v in (wrapped or {}).items(): + out[k] = v["value"] if isinstance(v, dict) and "value" in v else v + return out + + +# ----- Chain computation (§8.2 normative) ----- + +def compute_entry_hash(entry: dict, previous_hash: str | None) -> str: + """entry_hash = lowercase-hex(SHA-256(content_bytes || prev_hash_bytes)) + + content_bytes = UTF-8(JCS(entry with entry_hash and previous_hash REMOVED)) + prev_hash_bytes = raw 32-byte decoding of previous_hash, or empty bytes for the first entry. + """ + entry_for_hash = {k: v for k, v in entry.items() if k not in ("entry_hash", "previous_hash")} + content_bytes = jcs_canonicalize(entry_for_hash) + prev_bytes = bytes.fromhex(previous_hash) if previous_hash else b"" + return hashlib.sha256(content_bytes + prev_bytes).hexdigest() + + +def append_to_chain(session_id: str, method: str, request_id: str, + payload_canonical: str, client_timestamp: str) -> str: + """Append a ContextEntry to the session's chain, return the new chain head. + + Uses the client's request timestamp (already skew-validated upstream) + so an external observer that records the request and the published + chain_hash can fully recompute the entry and verify the hash. If the + Guardian stamped its own time, the entry would be irreproducible. + """ + st = STATE.get(session_id) + with st.lock: + entry = { + "entry_id": request_id, + "step_id": request_id, + "step_type": method, + "request_hash": hashlib.sha256(payload_canonical.encode()).hexdigest(), + "timestamp": client_timestamp or iso8601_now(), + } + if st.previous_hash is not None: + entry["previous_hash"] = st.previous_hash + new_head = compute_entry_hash(entry, st.previous_hash) + st.previous_hash = new_head + st.persist() # so a restart picks up the chain head + return new_head + + +# ----- Replay + skew checks (§10.3 normative) ----- + +class GuardianError(Exception): + def __init__(self, code: int, message: str) -> None: + super().__init__(message) + self.code = code + self.message = message + + +def check_replay(session_id: str, request_id: str) -> None: + if not request_id: + return + st = STATE.get(session_id) + with st.lock: + # HA-mode: re-read on-disk state so we see what other Guardian + # instances have already accepted for this session. + st._load() + if request_id in st.seen_request_ids: + raise GuardianError(-32005, f"REPLAY_DETECTED: request_id {request_id} already seen in session") + # Evict opportunistically — every 100 new request_ids + if len(st.seen_request_ids) % 100 == 0: + evict_old_request_ids(st) + st.seen_request_ids[request_id] = time.time() + st.persist() # flock + merge — visible to other instances + + +def check_skew(timestamp: str) -> None: + if not timestamp: + return + try: + ts = parse_iso8601(timestamp) + except (ValueError, AttributeError): + raise GuardianError(-32006, f"TIMESTAMP_OUT_OF_WINDOW: cannot parse timestamp {timestamp!r}") + now = datetime.datetime.now(datetime.timezone.utc) + delta_ms = abs((now - ts).total_seconds() * 1000) + if delta_ms > SKEW_WINDOW_MS: + raise GuardianError(-32006, f"TIMESTAMP_OUT_OF_WINDOW: {int(delta_ms)}ms > {SKEW_WINDOW_MS}ms") + + +def check_signature(envelope: dict, session_id: str) -> None: + if not _hmac_secret(): + return # local-dev mode + if not verify_signature(envelope, session_id=session_id): + raise GuardianError(-32004, "SIGNATURE_INVALID") + + +# ----- Method evaluators ----- + +def evaluate_handshake(params: dict, request_id: str) -> dict: + """§4: ClientHello in payload; return ServerHello in result.payload.""" + client_hello = params.get("payload") or {} + client_versions = client_hello.get("acs_versions_supported") or [] + if ACS_VERSION not in client_versions: + raise GuardianError(-32001, f"UNSUPPORTED_VERSION: client supports {client_versions}, Guardian speaks {ACS_VERSION}") + + server_hello = { + "negotiated_version": ACS_VERSION, + "methods_evaluated": client_hello.get("methods_implemented") or [], + "selected_transport": "http", + "signature_algorithms_supported": (["HMAC-SHA256"] if _hmac_secret() else []), + "timeout_config": {"default_ms": 5000}, + "skew_window_ms": SKEW_WINDOW_MS, + "on_decision_failure": "proceed", # spec default per §6.4 + "policy_requires_provenance": False, + "profiles_accepted": ["acs-core"], + } + return { + "type": "final", + "acs_version": ACS_VERSION, + "request_id": request_id, + "decision": "allow", + "payload": server_hello, + } + + +def evaluate_ping(params: dict, request_id: str) -> dict: + """§13: always allow; result.payload carries {status, echo, server_timestamp}.""" + echo = (params.get("payload") or {}).get("echo", "") + return { + "type": "final", + "acs_version": ACS_VERSION, + "request_id": request_id, + "decision": "allow", + "payload": { + "status": "ok", + "echo": echo, + "server_timestamp": iso8601_now(), + }, + } + + +def evaluate_step(method: str, params: dict, request_id: str, chain_hash: str) -> dict: + payload = params.get("payload") or {} + + base = { + "type": "final", + "acs_version": ACS_VERSION, + "request_id": request_id, + "chain_hash": chain_hash, + } + + if method == "steps/toolCallRequest": + tool = payload.get("tool") or {} + # Case-fold once so tool names from different platforms — "Bash" + # (Claude Code), "Shell" (Cursor's beforeShellExecution), "shell" + # (NAT YAML key used as instance_name) — all hit the same policy + # branch. Caught via the live-LLM NAT manual test: a `shell` tool + # (lowercase from the YAML key) silently bypassed the destructive- + # Bash check because the comparison was case-sensitive; the agent + # ran `rm -rf` against a sandbox dir and the canary was deleted. + tool_name_raw = tool.get("name", "") + tool_name = tool_name_raw.lower() + args = _unwrap_arguments(payload.get("arguments") or {}) + + if tool_name == "task" and not ALLOW_SUBAGENT: + return {**base, "decision": "deny", + "reasoning": "Task tool (in-process subagent) is gated by default. Set ACS_ALLOW_SUBAGENT=1 to allow.", + "reason_codes": ["subagent_gated"]} + + if tool_name in ("bash", "shell"): + cmd = args.get("command", "") or "" + match = _matches_destructive_bash(cmd) + if match == "too_large": + return {**base, "decision": "deny", + "reasoning": f"command length {len(cmd)} exceeds safe-scan cap " + f"({DESTRUCTIVE_SCAN_MAX_LEN}); cannot evaluate destructive patterns", + "reason_codes": ["input_too_large"]} + if match is not None: + return {**base, "decision": "deny", + "reasoning": f"destructive Bash pattern in: {cmd[:120]}", + "reason_codes": ["destructive_command"]} + + if tool_name == "write": + path = args.get("file_path", "") + if any(path.startswith(p) for p in PROTECTED_PATH_PREFIXES): + return {**base, "decision": "deny", + "reasoning": f"write to protected system path: {path}", + "reason_codes": ["protected_path"]} + + return {**base, "decision": "allow"} + + if method in INFORMATIONAL_METHODS: + return {**base, "decision": "allow"} + + return {**base, "decision": "deny", + "reasoning": f"unknown method: {method}", + "reason_codes": ["unknown_method"]} + + +# ----- Request dispatch ----- + +def handle_request(request: dict) -> dict: + method = request.get("method", "") + request_id = request.get("id") + params = request.get("params") or {} + meta = params.get("metadata") or {} + session_id = meta.get("session_id") or "" + acs_request_id = params.get("request_id", "") + timestamp = params.get("timestamp", "") + + # Schema-validate the envelope (if jsonschema + ACS_SPEC_DIR available). + # Defense in depth: catches malformed envelopes from a buggy adapter + # or hostile input before they reach policy code. system/ping and + # handshake/hello are exempt because their payload shapes differ + # (handshake bootstraps the wire and ping is a transport primitive). + if (_SPEC_VALIDATION_AVAILABLE + and method not in ("system/ping", "handshake/hello")): + errors = list(_REQUEST_ENVELOPE_VALIDATOR.iter_errors(request)) + if errors: + paths = "; ".join( + f"{'.'.join(str(p) for p in e.absolute_path) or ''}: {e.message}" + for e in errors[:5] + ) + return {"jsonrpc": "2.0", "id": request_id, + "error": {"code": -32600, + "message": f"Invalid Request: envelope failed schema: {paths}"}} + + # system/ping and handshake/hello are exempt from signature/chain/replay + # constraints per §13 (ping) and §4.1 (handshake bootstraps signing). + if method == "system/ping": + result = evaluate_ping(params, acs_request_id) + envelope = {"jsonrpc": "2.0", "id": request_id, "result": result} + return envelope + + if method == "handshake/hello": + try: + result = evaluate_handshake(params, acs_request_id) + except GuardianError as e: + return {"jsonrpc": "2.0", "id": request_id, + "error": {"code": e.code, "message": e.message}} + envelope = {"jsonrpc": "2.0", "id": request_id, "result": result} + if _hmac_secret(): + sign_envelope(envelope, session_id=session_id) + return envelope + + # Standard hook traffic — full pipeline + try: + check_signature(request, session_id) + check_skew(timestamp) + check_replay(session_id, acs_request_id) + except GuardianError as e: + return {"jsonrpc": "2.0", "id": request_id, + "error": {"code": e.code, "message": e.message}} + + # Compute chain entry BEFORE evaluating, then include head in result. + payload_canonical = jcs_canonicalize(params).decode("utf-8") + chain_hash = append_to_chain(session_id, method, acs_request_id, payload_canonical, timestamp) + + try: + result = evaluate_step(method, params, acs_request_id, chain_hash) + except GuardianError as e: + return {"jsonrpc": "2.0", "id": request_id, + "error": {"code": e.code, "message": e.message}} + + envelope = {"jsonrpc": "2.0", "id": request_id, "result": result} + if _hmac_secret(): + sign_envelope(envelope, session_id=session_id) + return envelope + + +# ----- HTTP transport ----- + +class GuardianHandler(http.server.BaseHTTPRequestHandler): + def do_POST(self) -> None: # noqa: N802 + # Reject oversized requests before reading the body. Defends + # against a DoS attacker who sets Content-Length to a huge value + # and expects us to allocate that much. MAX_REQUEST_BODY_BYTES + # matches the handshake's advertised max_payload_size_bytes. + try: + length = int(self.headers.get("Content-Length", "0")) + except (TypeError, ValueError): + self._respond(400, {"jsonrpc": "2.0", "id": None, + "error": {"code": -32600, "message": "Invalid Content-Length"}}) + return + if length > MAX_REQUEST_BODY_BYTES: + self._respond(413, {"jsonrpc": "2.0", "id": None, + "error": {"code": -32600, + "message": f"Request body {length} bytes exceeds {MAX_REQUEST_BODY_BYTES} cap"}}) + return + body = self.rfile.read(length).decode("utf-8") + try: + request = json.loads(body) + except json.JSONDecodeError: + self._respond(400, {"jsonrpc": "2.0", "id": None, + "error": {"code": -32700, "message": "Parse error"}}) + return + + method = request.get("method", "") + params = request.get("params") or {} + meta = params.get("metadata") or {} + sys.stderr.write( + f"[guardian] {method} session={meta.get('session_id', '?')[:8]} " + f"req={params.get('request_id', '?')[:8]}\n" + ) + sys.stderr.flush() + + response = handle_request(request) + # Log the verdict so operators can see allow/deny in the terminal, + # not just "envelope received". Critical for live debugging. + result = (response or {}).get("result") or {} + decision = result.get("decision") + if decision: + tail = "" + if decision == "deny": + tail = f" — {result.get('reasoning', '')[:80]}" + sys.stderr.write( + f"[guardian] → {decision}{tail}\n" + ) + sys.stderr.flush() + self._respond(200, response) + + def _respond(self, status: int, body: dict) -> None: + payload = json.dumps(body).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def log_message(self, fmt: str, *args: Any) -> None: + return + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--port", type=int, default=8787) + parser.add_argument("--host", default="127.0.0.1") + args = parser.parse_args() + + # §10 baseline integrity: refuse to start without a signing secret + # unless the operator explicitly opted into dev mode. + if not load_hmac_secret() and os.environ.get("ACS_DEV_MODE", "0") != "1": + sys.stderr.write( + "[guardian] REFUSING TO START: no signing secret configured.\n" + " Configure one of:\n" + " ACS_HMAC_SECRET_FILE=/path/to/key (preferred; chmod 600)\n" + " ACS_HMAC_SECRET= (env-var fallback)\n" + " Generate a key:\n" + " openssl rand -hex 32 > /etc/acs/hmac.key && chmod 600 /etc/acs/hmac.key\n" + " For local development without a secret (NON-CONFORMANT per §10):\n" + " ACS_DEV_MODE=1\n" + ) + return 1 + + if not load_hmac_secret(): + sys.stderr.write( + "[guardian] WARNING: running in ACS_DEV_MODE — envelope signing disabled.\n" + " ACS-Core baseline integrity (§10) is NOT satisfied.\n" + ) + + class ReusableServer(socketserver.ThreadingMixIn, socketserver.TCPServer): + allow_reuse_address = True + daemon_threads = True + + with ReusableServer((args.host, args.port), GuardianHandler) as httpd: + sys.stderr.write(f"[guardian] listening on {args.host}:{args.port} (case-insensitive-tool-policy)\n") + sys.stderr.flush() + try: + httpd.serve_forever() + except KeyboardInterrupt: + pass + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/nat/README.md b/adapters/nat/README.md new file mode 100644 index 0000000..7bea5f3 --- /dev/null +++ b/adapters/nat/README.md @@ -0,0 +1,350 @@ +# ACS adapter: NVIDIA Agent Toolkit (NAT) + +A drop-in middleware that wires [NVIDIA NeMo Agent Toolkit](https://github.com/NVIDIA/NeMo-Agent-Toolkit) to an ACS Guardian. No agent code changes; YAML configuration only. + +## How it works + +NAT exposes a `FunctionMiddleware` abstraction that wraps any function — tools, sub-workflows, LLM calls, retrievers, memory operations — at the call site. This adapter is that middleware. On every call NAT routes through a wired attachment point, the adapter: + +1. Receives the `InvocationContext` (function name, arguments, output) from NAT's middleware pipeline. +2. Translates it to an ACS JSON-RPC envelope ([mapping.md](./mapping.md)). +3. Signs it with HMAC-SHA256 (`ACS_HMAC_SECRET` or `ACS_HMAC_SECRET_FILE`). +4. POSTs to the Guardian. +5. Verifies the response signature and the JSON-RPC error/result shape. +6. Applies the verdict: `allow` passes through; `deny` raises `ACSGuardianDenied` (or sets `InvocationAction.SKIP` on NAT releases that expose it); `modify` mutates `context.modified_kwargs`; `ask`/`defer` substitute to deny at the middleware boundary. +7. Records a `steps/toolCallResult` envelope in `post_invoke` and redacts the output on deny. + +The handshake/hello fires once per session, cached in process state so subsequent events skip the round-trip. + +Independently of the middleware path, the adapter subscribes to NAT's `IntermediateStepManager` for `WORKFLOW_START` / `WORKFLOW_END` events and emits `steps/sessionStart` + `steps/userMessage` (on workflow start with input) and `steps/agentResponse` + `steps/sessionEnd` (on workflow end with output). The observer is auto-subscribed on the first `pre_invoke` call. **The lifecycle stream is observation-only; it cannot block calls.** Enforcement is the middleware path's job. + +### Decision honoring (§6.4) + +ACS-Core §6.4 requires the framework to wait for the verdict and apply it before the action executes. NAT provides this through `function_middleware_invoke`: `pre_invoke` must complete before `call_next(...)` runs, so a Guardian deny (whether surfaced as `ACSGuardianDenied` or `InvocationAction.SKIP`) is applied before the wrapped function would execute. Verified in `tests/test_live.py` (deny tests assert `executed["count"] == 0`). + +## Install — five steps + +You need: a running Guardian, a shared HMAC secret, and a NAT `workflow.yml` with `acs_guardian` wired at every attachment point. `wire.py` does step 3 for you and `--check` keeps it true forever after. + +Commands below assume `$ACS_REPO` points at your local clone of `Agent-Control-Standard/ACS`. Export it once: + +```bash +export ACS_REPO=/path/to/your/clone # e.g., $HOME/code/ACS +``` + +### 1. Generate the shared HMAC secret + +Both the adapter and the Guardian read this file. Mode 0600 is enforced — anything looser and the adapter refuses to start. + +```bash +mkdir -p ~/.acs +openssl rand -hex 32 > ~/.acs/hmac.key +chmod 600 ~/.acs/hmac.key +``` + +### 2. Run the Guardian + +The example Guardian is for testing; a production Guardian is the same wire protocol with a real policy engine attached. + +```bash +ACS_HMAC_SECRET_FILE=~/.acs/hmac.key \ + python3 "$ACS_REPO/adapters/example-guardian/example_guardian.py" \ + --port 8787 +``` + +Keep this terminal open. You should see `[guardian] listening on 127.0.0.1:8787`. + +### 3. Wire `workflow.yml` + +```bash +cd /path/to/your/project # where your workflow.yml lives +pip install nvidia-nat-core ruamel.yaml + +# Preview the change (dry-run, no file write) +python3 "$ACS_REPO/adapters/nat/wire.py" \ + --workflow=workflow.yml \ + --guardian-url=http://127.0.0.1:8787/acs \ + --default-deny + +# Apply (timestamped backup at workflow.yml.bak.) +python3 "$ACS_REPO/adapters/nat/wire.py" \ + --workflow=workflow.yml \ + --guardian-url=http://127.0.0.1:8787/acs \ + --default-deny \ + --write +``` + +`wire.py` walks every attachment point in your YAML and inserts `acs_guardian`: + +| YAML location | What `wire.py` does | +|---|---| +| top-level `middleware:` block | adds the `acs_guardian:` definition with your guardian_url, default_deny, timeout_s | +| `workflow.middleware` | prepends `acs_guardian` (policy gate runs before content filters) | +| every `function_groups.*.middleware` | prepends `acs_guardian` | +| every `functions.*.middleware` that overrides its group | prepends `acs_guardian` | + +Every line we add carries a `# acs-adapter-wired` marker. Re-running is a no-op. `--unwire --write` removes exactly what we added and nothing else. + +To remove later: `python3 wire.py --workflow=workflow.yml --unwire --write`. + +### 4. Restart your NAT process + +NAT loads `workflow.yml` at startup. Existing processes keep their pre-wiring config until restart. + +### 5. Verify the install + +```bash +# Lint — exits non-zero if any attachment point is missing acs_guardian +python3 "$ACS_REPO/adapters/nat/wire.py" --workflow=workflow.yml --check + +# Run the live tests against your installed adapter + Guardian +cd "$ACS_REPO/adapters/nat" +python3 -m unittest tests.test_live -v +``` + +`wire.py --check` is the load-bearing CI gate — see [Coverage discipline](#coverage-discipline-the-yaml-only-rule) below. + +## Prerequisites + +- **NAT installed** — `pip install nvidia-nat-core` (>=1.7.0) +- **Python 3.10+** with `ruamel.yaml`, `jsonschema`, `rfc8785` — `pip install -r requirements.txt -r ../requirements-test.txt` +- **Canonical ACS schemas** reachable on disk. Default `/tmp/acs-spec-source/specification/v0.1.0/`; override via `ACS_SPEC_DIR`. Clone with: + ```bash + git clone https://github.com/Agent-Control-Standard/ACS.git /tmp/acs-spec-source + ``` + +## Coverage discipline (the YAML-only rule) + +NAT's middleware fires per-attachment-point: workflow, function_groups, and individual functions with their own `middleware:` block. If an attachment point is not wired, every call routed through it bypasses the Guardian. This is a structural property of NAT, not a bug in the adapter — Cursor and Claude Code get framework-wide interception for free; NAT does not. + +**The three rules for ACS-conformant NAT deployments:** + +1. **Define every tool in YAML.** Functions registered at runtime in Python code (not in `workflow.yml`) cannot be wired by `wire.py` and will not be gated by the middleware path. If you need full gating, every function lives in the YAML. +2. **Run `wire.py --check` in CI** on every workflow YAML you ship. It exits non-zero if any attachment point is missing `acs_guardian`. Treat that as a build failure. +3. **Re-run `wire.py --write` after any YAML edit** that adds or restructures `function_groups`, `workflow`, or individual `functions`. The marker comments make it idempotent. + +What this gives you: **the same hard-stop guarantee Cursor's `failClosed: true` gives.** The middleware fires synchronously *before* the function executes; a Guardian deny raises `ACSGuardianDenied` (or sets `InvocationAction.SKIP`) and NAT does not call the function. + +**Backstop for the dynamic-registration caveat:** the adapter also subscribes to NAT's `IntermediateStepManager`, which fires for every call NAT routes regardless of middleware wiring. The Guardian therefore *observes* every call (audit trail + trace), even ones the middleware path missed. Subscribers can't *block* — they're notification-only — so this is detection, not enforcement. Audit Guardian logs for `toolCallRequest` envelopes that arrived via lifecycle but never via middleware; that delta is your coverage gap. + +## Smoke tests + +Five tests, ordered from broadest to most specific. Run any/all. + +### Smoke #1 — automated test suite (~10s) + +```bash +cd "$ACS_REPO/adapters" + +python3 -m unittest test_acs_core_conformance +# Expect: Ran 48 tests / OK (every ACS-Core MUST) + +(cd nat && python3 -m unittest discover tests) +# Expect: schema + adapter + lifecycle + failure_modes pass; live tests +# skip cleanly if nvidia-nat-core is not installed. + +(cd _common && python3 -m unittest discover tests) +# Expect: Ran 38 tests / OK (security + edge cases) +``` + +### Smoke #2 — `wire.py --check` (CI gate) + +```bash +python3 "$ACS_REPO/adapters/nat/wire.py" \ + --workflow=/path/to/your/workflow.yml --check +``` + +Exit 0 → every attachment point in the YAML is wired with `acs_guardian`. Exit 1 → at least one gap; the output names the file path, line number, and which kind of attachment point (workflow / function_group / function). Wire this into CI as a build gate. + +### Smoke #3 — end-to-end conformance (e2e_check.py) + +```bash +cd "$ACS_REPO/adapters/nat" +python3 e2e_check.py +``` + +Five fully-automated scenarios against the **real `example_guardian.evaluate_step` policy** (same one the production example Guardian uses), wired through a recording Guardian so the script can assert on every wire envelope: + +| # | Scenario | What it verifies | +|---|---|---| +| 1 | ALLOW | benign Bash function executes; return value flows back; handshake + toolCallRequest received; every envelope HMAC-signed; every envelope validates against the canonical ACS JSON Schema | +| 2 | DESTRUCTIVE | `rm -rf` on a victim dir with a canary file → real policy denies via `destructive_command` regex; `ACSGuardianDenied` raised; **canary file still on disk** (counterproof — the unit-test counter alone can false-pass if the function returns early for any reason) | +| 3 | READ-TOOL | different tool, same wire contract; arguments wrapped per `tool-call-request.json:26-37`; envelope.arguments.file_path.value matches probe path | +| 4 | HANDSHAKE-ONCE | 3 sequential invocations on the same middleware → exactly 1 `handshake/hello` envelope arrived | +| 5 | LIFECYCLE | `WORKFLOW_START`/`WORKFLOW_END` pushed through `IntermediateStepManager` → emits `steps/sessionStart` + `steps/userMessage` + `steps/agentResponse` + `steps/sessionEnd` envelopes with the workflow input/output in the payload (the observability backstop must actually backstop) | + +The final line is `YOUR NAT INSTALL IS ACS-CONFORMANT` (exit 0) or a per-scenario failure list (exit 1). + +Schema validation is against the canonical `request-envelope.json` from `ACS_SPEC_DIR` — adapter ↔ spec, not adapter ↔ test fixture. A drift between adapter and the spec fails this check, not the other way around. + +### Smoke #4 — disposition matrix (test_dispositions_live.py) + +```bash +cd "$ACS_REPO/adapters/nat" +python3 -m unittest tests.test_dispositions_live -v +``` + +End-to-end verification that EVERY ACS disposition (ALLOW, DENY, MODIFY, ASK, DEFER, post_invoke DENY) is honored on the LangChain-shaped input path NAT's runtime actually uses (input captured as `modified_args[0]` Pydantic model, not `modified_kwargs`). Catches the silent-bypass class of bug where MODIFY overrides drop or post_invoke redaction crashes on Pydantic strict-fields. + +### Smoke #5 — alternate live tests (test_live.py) + +```bash +cd "$ACS_REPO/adapters/nat" +python3 -m unittest tests.test_live -v +``` + +Five tests spin up the example Guardian (subprocess, not in-process), construct a real NAT middleware invocation, and assert side-effect counters: a benign function executes, a destructive function is denied, write-to-protected-path is denied, fail-closed blocks on Guardian unreachable, fail-open lets the call through with an audit event. `e2e_check.py` (Smoke #3) is the broader assertion surface; `test_live.py` is the minimal unit-style guarantee that the enforcement contract holds. + +### Smoke #6 — audit-cause differentiation + +Verifies the adapter's audit log distinguishes "Guardian unreachable" (ops issue), "Guardian rejected the envelope" (clock skew, signature, replay), and "adapter exception" (bug). Same fail-posture in all three; different remediation. + +Trigger an unreachable Guardian (with `default_deny: true` in the middleware config): + +```python +# In a script with NAT installed +from acs_adapter import ACSMiddleware, ACSMiddlewareConfig +# Point at a dead port to force transport_failure +cfg = ACSMiddlewareConfig(guardian_url="http://127.0.0.1:1/dead", default_deny=True) +mw = ACSMiddleware(cfg) +# Call mw.pre_invoke(context) — observe stderr for: +# ACS_AUDIT {"acs_audit_event": "decision_failure_fail_closed", "cause": "transport_failure", ...} +``` + +Send a malformed envelope (Guardian returns -32600 Invalid Request): +``` +ACS_AUDIT {"acs_audit_event": "decision_failure_fail_closed", "cause": "malformed_envelope_response", ...} +``` + +Send a stale request (Guardian returns -32006 timestamp out of window): +``` +ACS_AUDIT {"acs_audit_event": "decision_failure_fail_closed", "cause": "timestamp_out_of_window_response", ...} +``` + +Same `acs_audit_event`; the `cause` field is what operators grep on. + +### Smoke #7 — coverage-gap detection + +Have your Guardian log every received envelope's `method` and the source (middleware-routed via `steps/toolCallRequest` vs lifecycle-routed via `steps/toolCallRequest` from `WORKFLOW_START`). After a typical workflow run, every tool call should appear in both streams. A call appearing in lifecycle but not middleware → that function is not wired (re-run `wire.py --check` to locate it). + +## Files + +- `acs_adapter.py` — the middleware class + config + NAT registration. Stdlib + nvidia-nat-core only. +- `wire.py` — comment-preserving YAML installer + linter (`--check`, `--write`, `--unwire`). Requires `ruamel.yaml`. +- `e2e_check.py` — automated end-to-end conformance check (5 scenarios, real `example_guardian` policy, canary-based assertions). +- `workflow.yml.example` — drop-in NAT workflow YAML wiring the middleware. +- `requirements.txt` — runtime deps (`nvidia-nat-core`, `ruamel.yaml`). +- `mapping.md` — NAT lifecycle point → ACS step method table. +- `tests/test_adapter.py` — integration tests against the real NAT API + the `_extract_arguments` regression set covering the LangChain-shape input bug. +- `tests/test_live.py` — 5 live workflow tests exercising NAT's `function_middleware_invoke` orchestration end-to-end. +- `tests/test_dispositions_live.py` — every ACS disposition (ALLOW, DENY, MODIFY, ASK, DEFER, post_invoke DENY) verified on the `modified_args[0]` Pydantic-model path the LangChain react_agent uses in production. +- `tests/test_envelope_schema.py` — JSON-RPC envelope shape validation against canonical schemas. +- `tests/test_lifecycle.py` — `IntermediateStepManager` subscription emits the 6-minimum hooks. +- `tests/test_failure_modes.py` — transport failures, signature checks, replay, timestamp skew, JSON-RPC errors. +- `tests/example_payloads.md` — masked real-world payload examples showing the in-process `InvocationContext` shape and what the adapter sends. + +The adapter shares `adapters/_common/` with the Claude Code and Cursor adapters (signing, handshake cache, audit events, URL allowlist, JSON-RPC error code → cause mapping). + +## How it differs from the Claude Code / Cursor adapters + +| Aspect | Claude Code / Cursor | NAT | +|---|---|---| +| Interception mechanism | Shell-command-with-stdin-JSON (process spawn per hook) | In-process Python middleware class (`FunctionMiddleware`) | +| Configuration | `settings.json` / `hooks.json` | NAT workflow YAML `middleware:` block | +| Coverage default | Framework-wide (every hook event fires) | Opt-in per attachment point (YAML must wire each) | +| Block mechanism | JSON stdout deny shape, or `exit 2` | Raise `ACSGuardianDenied` (NAT 1.7.0) or set `InvocationAction.SKIP` (NAT dev) | +| Modify mechanism | Updated input field in JSON response | Mutate `context.modified_kwargs` / `context.output` | +| Lifecycle coverage | Whatever events the framework's hook surface exposes | Every function NAT wraps — tools, LLMs, retrievers, memory, sub-workflows — **for observability**; enforcement still per-attachment-point | +| Headless CLI for tests | `claude --print` (Claude); none (Cursor — semi-automated) | None needed — NAT runs in-process; `test_live.py` is fully automated | + +The shared protocol layer is identical: the Guardian sees the same ACS JSON-RPC shape from every adapter. + +## Configuration + +### YAML config (the middleware block — `wire.py` populates this) + +| Field | Default | Purpose | +|---|---|---| +| `_type` | `acs_guardian` (required) | Registers the middleware via NAT's `register_middleware` decorator. | +| `guardian_url` | `http://127.0.0.1:8787/acs` | Guardian endpoint. http/https only. | +| `default_deny` | `false` | Fail-open with audit (§6.4 default). Set `true` for fail-closed. | +| `timeout_s` | `5.0` | Per-request Guardian round-trip timeout. | +| `session_id` | (auto, per-process) | UUID; auto-generated and stable for the process lifetime. | +| `target_function_or_group` | (unset) | Optional metadata label; derives `agent_id` if `ACS_AGENT_ID` env is unset. | + +### Environment variables (read by the adapter at runtime) + +| Variable | Default | Purpose | +|---|---|---| +| `ACS_HMAC_SECRET_FILE` | (unset) | Path to a 0600 file holding the shared HMAC secret. | +| `ACS_HMAC_SECRET` | (unset) | Inline secret. Less secure (visible in `ps eauxw`). Prefer the file. | +| `ACS_AGENT_ID` | derived from `target_function_or_group` | Stable agent identifier sent in `metadata.agent_id`. | +| `ACS_SESSION_ID` | derived | Overrides the auto-generated session_id. | +| `ACS_GUARDIAN_HOST_ALLOWLIST` | (unset) | Optional comma-separated hostname allowlist (defense in depth). | + +## On-disk state + +NAT runs the adapter in-process; there is no per-event subprocess state to persist. The handshake is cached in the middleware instance's memory for the process lifetime; restarting the process triggers a fresh handshake on the next call. The Guardian-side state (`~/.cache/acs-guardian-state/.json`) is the same as for the other adapters — chain head + replay set, survives Guardian restart. + +## Conformance status + +Honest, MUST-by-MUST against `docs/spec/conformance.md`: + +| ACS-Core item | Status | +|---|---| +| Handshake (`handshake/hello`) | ✓ on first `pre_invoke`; cached per session in process memory | +| JSON-RPC envelope shape (`request-envelope.json`) | ✓ validates against canonical schema (`test_envelope_schema.py`) | +| Hook taxonomy (6 minimum) | ✓ all 6: `sessionStart`, `userMessage`, `toolCallRequest`, `toolCallResult`, `agentResponse`, `sessionEnd`. Function hooks from `FunctionMiddleware`; lifecycle hooks from `IntermediateStepManager` subscription. Verified in `test_lifecycle.py`. | +| Dispositions | ALLOW / DENY / MODIFY supported on **function-middleware (pre-execution)** hooks (`pre_invoke` for every wrapped function — tools, sub-workflows, LLM, retrievers). ASK/DEFER substituted to DENY at the middleware boundary; deployments wanting pause-and-resume should compose with NAT's HITL middleware (`nat.middleware.hitl`). **Lifecycle hooks from the `IntermediateStepManager` subscription (`steps/sessionStart`, `steps/userMessage`, `steps/agentResponse`, `steps/sessionEnd`) are observation-only** — subscription callbacks cannot veto a NAT event after it fires. See `mapping.md`. | +| Unknown-disposition fail posture | ✓ | +| Post-tool deny redaction | ✓ `post_invoke` clears `context.output = None` and emits an `ACS_AUDIT` `post_invoke_redacted` event per §6.4 output-redaction gate. (NAT's `InvocationContext` is a strict Pydantic model with `validate_assignment=True` — ad-hoc attributes like `acs_post_invoke_redacted` would crash; downstream consumers MUST read the audit event for the redaction signal, not an extra attribute.) | +| SessionContext + published `chain_hash` | ✓ session_id coerced to UUID; Guardian computes rolling chain | +| Replay protection | ✓ Guardian enforcement (REPLAY_DETECTED -32005, TIMESTAMP_OUT_OF_WINDOW -32006); audit cause distinguishes both | +| Baseline integrity (HMAC-SHA256) | ✓ when `ACS_HMAC_SECRET[_FILE]` is set; signed responses verified by adapter (pre_invoke + post_invoke reject SIGNATURE_INVALID) | +| Decision honoring (§6.4) | ✓ NAT's middleware contract guarantees the function will not execute if `pre_invoke` raises or sets SKIP — verified in `test_live.py` (deny tests assert `executed["count"] == 0`); fail-open emits `ACS_AUDIT` events | +| `cause` field on every audit event | ✓ `transport_failure`, `adapter_exception`, `response_signature_invalid`, plus 7 JSON-RPC error code → cause mappings (`unsupported_version_response`, `provenance_required_response`, `signature_invalid_response`, `replay_detected_response`, `timestamp_out_of_window_response`, `malformed_envelope_response`, `parse_error_response`) with `guardian_error_response` as the catch-all fallback for unknown codes | +| Liveness `system/ping` | ✓ Guardian-side | +| `request_id_ref` correlation | ✓ `post_invoke` populates with a deterministic uuid5 derived from session + function + kwargs, linking result to request | +| **Coverage of every tool call** | ⚠ **opt-in via YAML wiring** — see [Coverage discipline](#coverage-discipline-the-yaml-only-rule) above. `wire.py --check` is the CI gate that makes this enforceable. | + +## How NAT's defense middleware composes with this + +NAT ships `defense_middleware` (in `nvidia-nat-security`) for prompt-injection and PII checks. The ACS adapter does not replace those — it composes with them. A NAT YAML can list multiple middlewares per group, and they execute in order. `wire.py` always prepends `acs_guardian` to the chain so the policy gate runs before content filters — denied calls short-circuit before expensive content analysis. Recommended composition: ACS first, then NAT defense middleware, then the function. + +## Compatibility + +The adapter works across multiple NAT releases by feature-detecting the block mechanism: + +- **NAT 1.7.0 (public release):** blocks by raising `ACSGuardianDenied` (NAT documents "Raises: Any exception to abort execution" for `pre_invoke`). +- **NAT dev branch (with `InvocationAction.SKIP`):** prefers setting `context.action = InvocationAction.SKIP` (cleaner, no exception in logs). The adapter detects the symbol's availability at import time. + +## Troubleshooting + +| Symptom | Likely cause | +|---|---| +| `wire.py --check` reports gaps after wiring | YAML was edited after wire.py ran. Re-run `wire.py --write`. | +| `wire.py --check` keeps reporting gaps for the same function | That function is defined in Python code, not YAML. wire.py can't reach it. Move the definition into YAML, or accept the coverage gap and rely on the lifecycle-stream backstop. | +| `wire.py` errors out with "ruamel.yaml" missing | `pip install ruamel.yaml`. | +| NAT runs functions as if no Guardian exists | NAT loaded the workflow before `wire.py --write` ran. **Restart your NAT process.** | +| Every call gets denied | Likely `default_deny: true` in middleware config + Guardian down. Check the Guardian process is running. | +| Adapter says `SecretFilePermissionsError` | HMAC secret file is mode > 0600. `chmod 600 ~/.acs/hmac.key`. | +| Guardian returns `-32004 SIGNATURE_INVALID` | Adapter and Guardian aren't reading the same secret. `cat ~/.acs/hmac.key` on both sides should match. Audit log shows `cause=signature_invalid_response`. | +| Guardian returns `-32005 REPLAY_DETECTED` | Same `request_id` sent twice. Audit log shows `cause=replay_detected_response`. Usually a retry loop bug. | +| Guardian returns `-32006 TIMESTAMP_OUT_OF_WINDOW` | Clock skew between adapter and Guardian > 5 minutes. Sync time. Audit log shows `cause=timestamp_out_of_window_response`. | +| `lifecycle_subscribe_failed` audit event | No active NAT Context (called middleware directly outside a workflow). Test-only path; harmless in production. | + +Everything the adapter does that's not policy decision-making is audited on stderr as a JSON line prefixed `ACS_AUDIT`. The `cause` field tells you which failure mode fired. + +## Running the tests + +```bash +# Tests that don't need NAT (schema, wire format) run anywhere +cd "$ACS_REPO/adapters/nat" +python3 -m unittest discover tests + +# Tests that drive real NAT need it installed +pip install -r requirements.txt +python3 -m unittest tests.test_live -v +``` + +If NAT is not installed, NAT-dependent test classes are skipped cleanly (`@unittest.skipUnless`). diff --git a/adapters/nat/acs_adapter.py b/adapters/nat/acs_adapter.py new file mode 100644 index 0000000..cab1b5d --- /dev/null +++ b/adapters/nat/acs_adapter.py @@ -0,0 +1,853 @@ +""" +ACS middleware for the NVIDIA Agent Toolkit (NAT / NeMo Agent Toolkit). + +Wires NAT's Middleware abstraction to an ACS Guardian. Intercepts every +function (tool / sub-workflow / LLM / etc.) call configured to use this +middleware, sends an ACS JSON-RPC request to the Guardian, and applies +the verdict to NAT's invocation context. + +Schema sources: + - NAT public repo `packages/nvidia_nat_core/src/nat/middleware/` + - Agent-Control-Standard/ACS `specification/v0.1.0/` + +Requires: + pip install nvidia-nat-core + (and nvidia-nat-security if you also want to register alongside NAT's + defense middleware suite) + +Compatibility: + - nvidia-nat-core >= 1.7 (public release). Block via raising + ACSGuardianDenied; modify via setting context.modified_kwargs / output. + - Future versions that expose InvocationAction.SKIP are also supported: + if the symbol is importable, the adapter sets context.action instead + of raising, which produces cleaner traces. + +Environment variables: + ACS_AGENT_ID Explicit agent_id for metadata. If unset, derived from + config.target_function_or_group, falling back to "nat". + +Usage in NAT YAML: + + middleware: + acs_guardian: + _type: acs_guardian + guardian_url: http://127.0.0.1:8787/acs + target_function_or_group: + default_deny: true + + function_groups: + my_tools: + middleware: [acs_guardian] + + workflow: + _type: react_agent + middleware: [acs_guardian] +""" +from __future__ import annotations + +import datetime +import hashlib +import json +import os +import sys +import threading +import urllib.error +import urllib.request +import uuid +from dataclasses import is_dataclass, asdict +from pathlib import Path +from typing import Any, Optional + +# Bootstrap shared helpers from sibling adapters/_common/ +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "_common")) +from acs_common import ( # noqa: E402 + audit_event, + ensure_session_handshake, + guardian_error_cause, + iso8601_now as _common_iso8601_now, + sign_envelope, + validate_guardian_url, + verify_signature, +) + +try: + from nat.middleware.function_middleware import FunctionMiddleware + from nat.middleware.middleware import InvocationContext + from nat.data_models.middleware import FunctionMiddlewareBaseConfig + _NAT_AVAILABLE = True +except ImportError: + FunctionMiddleware = object # type: ignore[assignment, misc] + InvocationContext = Any # type: ignore[assignment, misc] + FunctionMiddlewareBaseConfig = object # type: ignore[assignment, misc] + _NAT_AVAILABLE = False + +# Lifecycle observer support: subscribes to NAT's IntermediateStepManager +# to fire ACS sessionStart / userMessage / agentResponse / sessionEnd at +# the workflow boundary. Without this, NAT alone only fires +# toolCallRequest / toolCallResult and does not satisfy ACS-Core's +# 6-hook taxonomy minimum (conformance.md:19). +try: + from nat.data_models.intermediate_step import IntermediateStepType # type: ignore[import-not-found] + from nat.builder.context import Context as _NATContext # type: ignore[import-not-found] + _HAS_LIFECYCLE = True +except ImportError: + IntermediateStepType = None # type: ignore[assignment] + _NATContext = None # type: ignore[assignment] + _HAS_LIFECYCLE = False + +# InvocationAction.SKIP is on the dev branch; not in NAT 1.7.0 release. +try: + from nat.middleware.middleware import InvocationAction # type: ignore[attr-defined] + _HAS_INVOCATION_ACTION = True +except (ImportError, AttributeError): + InvocationAction = None # type: ignore[assignment] + _HAS_INVOCATION_ACTION = False + +try: + from nat.cli.register_workflow import register_middleware + _HAS_REGISTRATION = True +except ImportError: + register_middleware = None # type: ignore[assignment] + _HAS_REGISTRATION = False + +try: + from pydantic import Field +except ImportError: + Field = lambda **kw: None # type: ignore[assignment, misc] + + +ACS_VERSION = "0.1.0" + + +class ACSGuardianDenied(Exception): + """Raised by the ACS middleware to block a function call. + + NAT's documented blocking mechanism is to raise from pre_invoke (the + docstring: "Raises: Any exception to abort execution"). This custom + exception type lets observers and tests distinguish a policy-driven + block from unrelated errors. + """ + + +# ----- Config ----- + +if _NAT_AVAILABLE: + + class ACSMiddlewareConfig(FunctionMiddlewareBaseConfig, name="acs_guardian"): # type: ignore[misc, valid-type, call-arg] + """Config schema for the ACS NAT middleware. + + Registered with NAT under `_type: acs_guardian`. + """ + guardian_url: str = Field( + default="http://127.0.0.1:8787/acs", + description="ACS Guardian endpoint to POST requests to.", + ) + default_deny: bool = Field( + default=False, + description="If True, block the call when the Guardian is unreachable, returns malformed responses, or returns an unknown disposition. Default False matches the ACS spec default (§6.4 fail-open with audit event); set True for deployments that prefer fail-closed availability tradeoff.", + ) + session_id: Optional[str] = Field( + default=None, + description="Session id sent on every request. Auto-generated per-process if absent. Coerced to UUID format.", + ) + timeout_s: float = Field( + default=5.0, + description="Per-request timeout for the Guardian round-trip.", + ) + target_function_or_group: Optional[str] = None + target_location: str = "input" + + +# ----- Helpers (module-scope so tests can exercise them without instantiating the middleware) ----- + + +def _iso8601_now() -> str: + return _common_iso8601_now() + + +def _coerce_uuid(raw: str | None) -> str: + """request-envelope.json:66 wants session_id as UUID. Accept a UUID + directly; otherwise derive a stable UUID5 from whatever NAT gave us.""" + if not raw: + return str(uuid.uuid4()) + try: + return str(uuid.UUID(raw)) + except (ValueError, AttributeError, TypeError): + return str(uuid.uuid5(uuid.NAMESPACE_URL, f"nat:{raw}")) + + +def _wrap_arguments(raw: dict[str, Any]) -> dict[str, Any]: + """tool-call-request.json:26-37 — each arg is {value, provenance?}.""" + return {k: {"value": v} for k, v in (raw or {}).items()} + + +def _stringify_step_data(data: Any) -> str: + """Best-effort extraction of human-readable content from a NAT + IntermediateStepPayload.data. The shape varies per event_type and per + framework; we pull out a string when possible and json-dump otherwise. + Returns empty string when there is genuinely nothing to forward.""" + if data is None: + return "" + if isinstance(data, str): + return data + # NAT often wraps inputs/outputs in StreamEventData with .input/.output fields + for attr in ("input", "output", "chunk", "value", "content"): + v = getattr(data, attr, None) + if isinstance(v, str) and v: + return v + if v is not None and not isinstance(v, (dict, list, type(None))): + s = str(v) + if s and s != "None": + return s + if isinstance(data, (dict, list)): + try: + return json.dumps(data, default=str) + except (TypeError, ValueError): + return str(data) + return str(data) + + +KNOWN_DECISIONS = frozenset({"allow", "deny", "modify", "ask", "defer"}) + + +def _extract_arguments(context: Any) -> dict[str, Any]: + """Build a flat {arg_name: value} dict from NAT's invocation context. + + NAT's middleware chain captures the function input from + `Function.ainvoke(value)` as `modified_args[0]` — typically a + Pydantic model returned by `_convert_input(value)`. `modified_kwargs` + is only populated when the caller passes named kwargs directly, + which the LangChain `react_agent` path does not. Reading only from + `modified_kwargs` (the original adapter code) yielded an empty + arguments dict, so the Guardian's `args.get("command")` always + returned "" — and a real `rm -rf` driven by an LLM-driven agent + silently bypassed the destructive-Bash policy. + + This helper: + 1. Starts with `modified_kwargs` (highest fidelity when present). + 2. Walks `modified_args`. For each Pydantic model / dataclass / + dict, flattens its fields into the result; for scalars, + falls back to the function's input schema field names (or + positional `arg0`, `arg1`, … if no schema is available). + 3. Returns a JSON-serializable dict ready for wire wrapping. + """ + out: dict[str, Any] = {} + + # 1. kwargs first — already named, no inference needed + kwargs = getattr(context, "modified_kwargs", None) or {} + for k, v in kwargs.items(): + out[str(k)] = v + + # 2. args — extract named fields + args = getattr(context, "modified_args", None) or [] + if not args: + return out + + # Try to read field names from the function's input schema (Pydantic) + schema = None + fc = getattr(context, "function_context", None) + if fc is not None: + schema = getattr(fc, "input_schema", None) + schema_fields: list[str] = [] + if schema is not None: + # Pydantic v2: model_fields. v1 / others: __fields__ + fields = getattr(schema, "model_fields", None) or getattr(schema, "__fields__", None) + if fields: + schema_fields = list(fields.keys()) + + for idx, arg in enumerate(args): + # Pydantic model — best case: dump and merge + if hasattr(arg, "model_dump"): + try: + out.update(arg.model_dump()) + continue + except Exception: # noqa: BLE001 + pass + # Pydantic v1 fallback + if hasattr(arg, "dict") and callable(getattr(arg, "dict", None)): + try: + d = arg.dict() + if isinstance(d, dict): + out.update(d) + continue + except Exception: # noqa: BLE001 + pass + # Dataclass + if is_dataclass(arg): + try: + out.update(asdict(arg)) + continue + except Exception: # noqa: BLE001 + pass + # Plain dict — merge + if isinstance(arg, dict): + for k, v in arg.items(): + out[str(k)] = v + continue + # Scalar — use the schema field name at this position, else argN + name = schema_fields[idx] if idx < len(schema_fields) else f"arg{idx}" + out[name] = arg + + return out + + +def _redact_output(context: Any) -> None: + """Clear context.output to None as the post_invoke redaction signal. + Pulled into a helper so the redaction path is one line and matches + the audit event we emit alongside it. The `output` field is declared + on InvocationContext, so this assignment is always safe; ad-hoc + extra attributes (acs_post_invoke_redacted, etc.) are NOT — that + was the original bug that made post_invoke deny crash silently.""" + try: + context.output = None + except Exception: # noqa: BLE001 + # Test stub or unusual context without a settable output field — + # the audit event still fires so the redaction is recorded. + pass + + +def _apply_overrides_to_context(context: Any, overrides: dict[str, Any]) -> None: + """Apply Guardian's MODIFY parameter_overrides to wherever NAT will + actually read the function input. + + The call NAT eventually executes is: + await call_next(*context.modified_args, **context.modified_kwargs) + + so the override has to land in the same slot the input lives in: + - LangChain agent path: input is `modified_args[0]` (a Pydantic + model returned by `Function._convert_input`). Replace the + instance via `model_copy(update=overrides)` so the next stage + sees the rewritten field values. + - Dataclass: rebuild with overrides merged. + - Plain dict in modified_args[0]: merge in place. + - Direct keyword-arg path: write into `modified_kwargs` as before. + + BEFORE this helper the adapter only wrote overrides to + `modified_kwargs`. On the LangChain path that dict was empty and the + overrides never reached the function — Guardian's MODIFY silently + became a no-op. A `rm -rf` that a Guardian wanted to rewrite to + `echo blocked` ran unchanged. Caught by the live Vertex test. + """ + # 1. Mutate modified_args[0] when it holds the input + args = list(getattr(context, "modified_args", ()) or ()) + if args: + head = args[0] + new_head: Any = None + # Pydantic v2 + if hasattr(head, "model_copy"): + try: + new_head = head.model_copy(update=dict(overrides)) + except Exception: # noqa: BLE001 + new_head = None + # Pydantic v1 + if new_head is None and hasattr(head, "copy") and callable(getattr(head, "copy", None)): + try: + new_head = head.copy(update=dict(overrides)) # type: ignore[call-arg] + except Exception: # noqa: BLE001 + new_head = None + # Dataclass + if new_head is None and is_dataclass(head): + try: + from dataclasses import replace as _dc_replace + new_head = _dc_replace(head, **overrides) + except Exception: # noqa: BLE001 + new_head = None + # Plain dict + if new_head is None and isinstance(head, dict): + new_head = {**head, **overrides} + if new_head is not None: + args[0] = new_head + # `modified_args` is a tuple field with validate_assignment=True; + # assign the new tuple so Pydantic re-validates and accepts it. + try: + context.modified_args = tuple(args) + except Exception: # noqa: BLE001 + # If we can't write back (test stub, etc.), at least the + # in-place mutation of the dict / dataclass-via-replace + # took effect via reference; tuple write is best-effort. + pass + + # 2. Always also write to modified_kwargs — many functions in NAT + # take kwargs directly (no Pydantic conversion), and the live + # smoke tests use this path. Both writes is correct; the + # eventual call_next reads exactly one of them depending on the + # function's signature, but updating both keeps either path safe. + try: + kwargs = dict(getattr(context, "modified_kwargs", None) or {}) + kwargs.update(overrides) + context.modified_kwargs = kwargs + except Exception: # noqa: BLE001 + pass + + +# ----- Middleware class ----- + +class ACSMiddleware(FunctionMiddleware): # type: ignore[misc, valid-type] + """NAT middleware that defers each call's allow/deny/modify decision to an ACS Guardian.""" + + def __init__(self, config): + if _NAT_AVAILABLE: + super().__init__() + self._config = config + self._session_id = _coerce_uuid( + getattr(config, "session_id", None) or os.environ.get("ACS_SESSION_ID") + ) + target = getattr(config, "target_function_or_group", None) or "nat" + self._agent_id = os.environ.get("ACS_AGENT_ID") or f"nat:{hashlib.sha256(target.encode()).hexdigest()[:8]}" + self._handshake_done = False + self._lifecycle_subscribed = False + self._lifecycle_subscription = None + # §3 bug fix: lock around the check-then-set in + # _ensure_lifecycle_subscribed. Without it, two parallel pre_invoke + # calls both see _lifecycle_subscribed=False and both subscribe. + self._lifecycle_lock = threading.Lock() + # Edge case #6: WeakKeyDictionary fallback for frozen contexts. + # Plain id(context) would risk collisions after Python GC + # recycles object ids; WeakKey keys on identity not address. + import weakref + self._frozen_ctx_rids: weakref.WeakKeyDictionary = weakref.WeakKeyDictionary() + self._frozen_ctx_lock = threading.Lock() + + def _ensure_handshake(self) -> None: + if self._handshake_done or os.environ.get("ACS_HANDSHAKE", "1") != "1": + return + methods = ["steps/toolCallRequest", "steps/toolCallResult"] + if _HAS_LIFECYCLE: + methods += [ + "steps/sessionStart", "steps/userMessage", + "steps/agentResponse", "steps/sessionEnd", + ] + # NAT is in-process, so we use the in-memory `_handshake_done` + # flag as the primary guard (above). The disk cache in + # ensure_session_handshake is the fallback that also makes + # things idempotent across process restarts. + ensure_session_handshake( + guardian_url=self._config.guardian_url, + session_id=self._session_id, + agent_id=self._agent_id, + platform="nat", + methods_implemented=methods, + ) + self._handshake_done = True + + def _ensure_lifecycle_subscribed(self) -> None: + """Subscribe to NAT's IntermediateStepManager so workflow-boundary + events fire ACS sessionStart / userMessage / agentResponse / sessionEnd. + + Idempotent and thread-safe: the lock around the check-then-set + prevents two parallel pre_invoke calls from double-subscribing. + Without the lock, every WORKFLOW event would fire its ACS hook + multiple times. + """ + if self._lifecycle_subscribed or not _HAS_LIFECYCLE: + return + with self._lifecycle_lock: + # Re-check inside the lock — another thread may have just won + # the race and completed the subscription. + if self._lifecycle_subscribed: + return + try: + ctx = _NATContext.get() + mgr = ctx.intermediate_step_manager + except Exception: # noqa: BLE001 + # No active Context (e.g., direct middleware invocation in + # tests without a full workflow). Silent skip — function- + # call hooks still fire via FunctionMiddleware. + return + try: + self._lifecycle_subscription = mgr.subscribe( + on_next=self._on_intermediate_step, + on_error=lambda e: audit_event( + "lifecycle_subscription_error", + session_id=self._session_id, error=str(e)), + ) + self._lifecycle_subscribed = True + except Exception as e: # noqa: BLE001 + audit_event("lifecycle_subscribe_failed", + session_id=self._session_id, error=str(e)) + + def _on_intermediate_step(self, step) -> None: + """Subscriber callback. Translates NAT's IntermediateStepType events + at the workflow boundary into ACS hooks. Function-level events + (FUNCTION_START/END, TOOL_START/END, LLM_START/END) are ignored + here because they're already covered by FunctionMiddleware's + pre_invoke/post_invoke.""" + try: + payload = step.payload + event_type = payload.event_type + except AttributeError: + return + + if event_type == IntermediateStepType.WORKFLOW_START: + # Workflow input becomes both sessionStart (boundary marker) + # and userMessage (the input itself). + self._emit_lifecycle_hook( + "steps/sessionStart", + payload={"platform_context": {"workflow_name": payload.name or ""}}) + input_text = _stringify_step_data(payload.data) + if input_text: + self._emit_lifecycle_hook( + "steps/userMessage", + payload={"content": [{"type": "text", "value": input_text}]}) + elif event_type == IntermediateStepType.WORKFLOW_END: + # Workflow output becomes agentResponse; sessionEnd closes the boundary. + output_text = _stringify_step_data(payload.data) + if output_text: + self._emit_lifecycle_hook( + "steps/agentResponse", + payload={"content": [{"type": "text", "value": output_text}]}) + self._emit_lifecycle_hook( + "steps/sessionEnd", + payload={"reason": "completed"}) + + def _emit_lifecycle_hook(self, method: str, payload: dict) -> None: + """Build, sign, and fire-and-forget POST a lifecycle hook. + + Errors are audited but do not interrupt the workflow — lifecycle + emission is best-effort observability, not the enforcement path + (that's pre_invoke / post_invoke).""" + request = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": method, + "params": { + "acs_version": ACS_VERSION, + "request_id": str(uuid.uuid4()), + "timestamp": _iso8601_now(), + "metadata": { + "agent_id": self._agent_id, + "session_id": self._session_id, + "platform": "nat", + }, + "payload": payload, + }, + } + sign_envelope(request, session_id=self._session_id) + try: + self._call_guardian(request) + except Exception as e: # noqa: BLE001 + audit_event("lifecycle_hook_failed", + method=method, session_id=self._session_id, error=str(e)) + + def _correlation_request_id(self, context) -> str: + """Return a request_id for this invocation that is unique per call + but stable across pre_invoke + post_invoke. + + Fast path: stash a fresh uuid4 on the context. pre_invoke and + post_invoke share the same context object, so post_invoke reads + back the same value to populate request_id_ref. + + Fallback for contexts that don't accept attribute assignment + (e.g., `__slots__`-frozen): use a WeakKeyDictionary keyed by + the context object itself. Using a WeakKey ensures the entry is + dropped when the context is GC'd, preventing id() recycling + from causing two distinct contexts to map to the same uuid. + (id() collisions after GC were a real concern; WeakKey avoids + the problem by keying on identity not address.) + """ + existing = getattr(context, "_acs_correlation_request_id", None) + if existing: + return existing + rid = str(uuid.uuid4()) + try: + context._acs_correlation_request_id = rid + return rid + except (AttributeError, TypeError): + pass + # Frozen context — fall back to WeakKeyDictionary if the object + # supports weak references. + try: + with self._frozen_ctx_lock: + cached = self._frozen_ctx_rids.get(context) + if cached is not None: + return cached + self._frozen_ctx_rids[context] = rid + return rid + except TypeError: + # Not weak-referenceable either (e.g., __slots__ without + # __weakref__). Last resort: return the fresh uuid4 each + # call. pre→post correlation (request_id_ref) is lost in + # this path, but the safer alternative — keying on + # id(context) — risks collisions after GC. Audit the + # degradation rather than introduce a silent bug. + audit_event("frozen_unweakrefable_context", + session_id=self._session_id) + return rid + + @property + def enabled(self) -> bool: + return True + + async def pre_invoke(self, context): + """Gate the function call. Block via raising or InvocationAction.SKIP; modify args in place.""" + self._ensure_handshake() + self._ensure_lifecycle_subscribed() + correlation_id = self._correlation_request_id(context) + try: + request = self._build_request( + method="steps/toolCallRequest", + tool_name=context.function_context.name, + tool_arguments=_extract_arguments(context), + request_id=correlation_id, + ) + response = self._call_guardian(request) + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError) as e: + return self._handle_decision_failure( + context, "steps/toolCallRequest", + cause="transport_failure", error=str(e), + deny_msg=f"Guardian unreachable: {e}") + except Exception as e: # noqa: BLE001 + return self._handle_decision_failure( + context, "steps/toolCallRequest", + cause="adapter_exception", error=str(e), + deny_msg=f"adapter error: {e}") + + # Guardian returned a JSON-RPC error response (replay detected, + # signature invalid, timestamp skew, malformed envelope, …). + # Distinct from transport failure: operator triage needs the + # specific cause (clock skew vs adapter bug vs duplicate id). + err = (response or {}).get("error") + if err is not None: + return self._handle_decision_failure( + context, "steps/toolCallRequest", + cause=guardian_error_cause(err.get("code")), + error=err.get("message", ""), + jsonrpc_code=err.get("code"), + deny_msg=f"Guardian rejected envelope: {err.get('message','')}") + + if not verify_signature(response, session_id=self._session_id): + if self._config.default_deny: + audit_event("decision_failure_fail_closed", + cause="response_signature_invalid", + session_id=self._session_id, + method="steps/toolCallRequest") + return self._block(context, "response signature invalid") + audit_event("fail_open_bypass", + cause="response_signature_invalid", + session_id=self._session_id, + method="steps/toolCallRequest") + return None + + result = (response or {}).get("result", {}) + decision = (result.get("decision") or "").lower() + reasoning = result.get("reasoning", "") + + if decision == "allow": + return None + if decision == "deny": + return self._block(context, reasoning or "denied by Guardian") + if decision == "modify": + mods = result.get("modifications", {}) + overrides = mods.get("parameter_overrides") + if isinstance(overrides, dict): + _apply_overrides_to_context(context, overrides) + return context + return self._block(context, f"MODIFY substituted to DENY: {reasoning}") + if decision in ("ask", "defer"): + # NAT has no native pause-and-resume primitive on the middleware + # boundary. Substitute block; deployments wanting ASK/DEFER + # should compose with NAT's HITL middleware. + return self._block(context, f"{decision}: {reasoning}") + + # Unknown disposition: fail posture + if self._config.default_deny: + return self._block(context, f"unknown disposition: {decision}") + return None + + async def post_invoke(self, context): + """Record the result. Apply Guardian's verdict to the output. + + - allow: pass through. + - modify (with modified_content): replace context.output. + - deny: clear context.output to None and tag with reasoning. The + tool already ran (post_invoke fires after execution), so the + side effect cannot be undone — but downstream consumers see no + output. This matches Specification §6.4's output-redaction gate. + - unknown: respect default_deny — drop output if true. + """ + # request_id_ref points at the originating toolCallRequest so the + # Guardian can correlate result with request (tool-call-result.json:19-23). + correlation_id = self._correlation_request_id(context) + try: + request = self._build_request( + method="steps/toolCallResult", + tool_name=context.function_context.name, + tool_arguments=_extract_arguments(context), + result=context.output, + request_id_ref=correlation_id, + ) + response = self._call_guardian(request) + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError) as e: + audit_event("post_invoke_unreachable", + cause="transport_failure", + session_id=self._session_id, + method="steps/toolCallResult", + error=str(e)) + return None + except Exception as e: # noqa: BLE001 + audit_event("post_invoke_unreachable", + cause="adapter_exception", + session_id=self._session_id, + method="steps/toolCallResult", + error=str(e)) + return None + + # Same JSON-RPC error response distinction as pre_invoke. + err = (response or {}).get("error") + if err is not None: + audit_event("post_invoke_unreachable", + cause=guardian_error_cause(err.get("code")), + session_id=self._session_id, + method="steps/toolCallResult", + jsonrpc_code=err.get("code"), + error=err.get("message", "")) + return None + + if not verify_signature(response, session_id=self._session_id): + audit_event("post_invoke_signature_invalid", + cause="response_signature_invalid", + session_id=self._session_id, + method="steps/toolCallResult") + return None + + result = (response or {}).get("result", {}) + decision = (result.get("decision") or "").lower() + reasoning = result.get("reasoning", "") + + if decision == "deny": + # Post-hoc redaction: the tool already executed, but we + # prevent the (potentially sensitive) output from flowing. + # NOTE: NAT's InvocationContext is a strict Pydantic model + # with validate_assignment=True — extra attributes + # (acs_post_invoke_redacted, etc.) raise ValidationError. + # The redaction signal is therefore output=None plus the + # audit event below, which downstream consumers MUST use + # rather than reading nonexistent ad-hoc fields. + _redact_output(context) + audit_event("post_invoke_redacted", + cause="guardian_deny", + session_id=self._session_id, + method="steps/toolCallResult", + reasoning=reasoning or "output redacted by Guardian") + return context + if decision == "modify": + mods = result.get("modifications", {}) + modified_content = mods.get("modified_content") + if modified_content is not None: + context.output = modified_content + return context + if decision not in KNOWN_DECISIONS and self._config.default_deny: + _redact_output(context) + audit_event("post_invoke_redacted", + cause="unknown_disposition_default_deny", + session_id=self._session_id, + method="steps/toolCallResult", + decision=decision) + return context + return None + + # ----- helpers ----- + + def _handle_decision_failure(self, context, method: str, *, cause: str, + error: str, deny_msg: str, + jsonrpc_code: int | None = None): + """Single point for the pre_invoke decision-failure audit + posture. + + Every fail-closed / fail-open emission carries a stable `cause` + label so operators can triage which failure mode hit (transport, + adapter bug, Guardian rejection — and which specific Guardian + rejection: replay vs signature vs skew vs malformed). Mirrors + the Cursor/Claude `_fail(cause=...)` taxonomy.""" + fields: dict[str, Any] = { + "cause": cause, + "session_id": self._session_id, + "method": method, + "error": error, + } + if jsonrpc_code is not None: + fields["jsonrpc_code"] = jsonrpc_code + if self._config.default_deny: + audit_event("decision_failure_fail_closed", **fields) + return self._block(context, deny_msg) + audit_event("fail_open_bypass", **fields) + return None + + def _block(self, context, reason: str): + """Block the invocation. Prefer InvocationAction when available, + fall back to raising for NAT releases that don't expose it.""" + if _HAS_INVOCATION_ACTION: + context.action = InvocationAction.SKIP # type: ignore[attr-defined] + context.acs_block_reason = reason + return context + raise ACSGuardianDenied(reason) + + def _build_request( + self, + method: str, + tool_name: str, + tool_arguments: dict, + result: Any = None, + request_id: str | None = None, + request_id_ref: str | None = None, + ) -> dict: + """Build a signed ACS request envelope matching request-envelope.json.""" + metadata = { + "agent_id": self._agent_id, + "session_id": self._session_id, + "platform": "nat", + } + if method == "steps/toolCallRequest": + payload: dict[str, Any] = { + "tool": {"name": tool_name}, + "arguments": _wrap_arguments(tool_arguments), + } + else: + if result is None: + outputs: list[dict[str, Any]] = [] + elif isinstance(result, (str, int, float, bool, dict, list)): + outputs = [{"value": result}] + else: + outputs = [{"value": str(result)}] + payload = { + "tool": {"name": tool_name}, + "exit_status": "success", + "outputs": outputs, + } + if request_id_ref: + payload["request_id_ref"] = request_id_ref + + envelope = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": method, + "params": { + "acs_version": ACS_VERSION, + "request_id": request_id or str(uuid.uuid4()), + "timestamp": _iso8601_now(), + "metadata": metadata, + "payload": payload, + }, + } + sign_envelope(envelope, session_id=self._session_id) + return envelope + + def _call_guardian(self, request: dict) -> dict: + validate_guardian_url(self._config.guardian_url) # SSRF: refuse file://, ftp://, etc. + body = json.dumps(request).encode("utf-8") + req = urllib.request.Request( + self._config.guardian_url, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=self._config.timeout_s) as resp: + return json.loads(resp.read().decode("utf-8")) + + +# ----- NAT registration ----- + +if _NAT_AVAILABLE and _HAS_REGISTRATION: + @register_middleware(config_type=ACSMiddlewareConfig) # type: ignore[misc] + async def build_acs_middleware(config: "ACSMiddlewareConfig", builder): # type: ignore[name-defined] + """NAT factory entry point. Yields the middleware instance for NAT to wire up.""" + yield ACSMiddleware(config) diff --git a/adapters/nat/e2e_check.py b/adapters/nat/e2e_check.py new file mode 100644 index 0000000..fab5d94 --- /dev/null +++ b/adapters/nat/e2e_check.py @@ -0,0 +1,520 @@ +#!/usr/bin/env python3 +""" +End-to-end conformance check for an adopter's NAT ACS integration. + +Drives the real `ACSMiddleware` against a recording Guardian wired to +the REAL `example_guardian.evaluate_step` policy (the one the production +example Guardian uses). No synthetic per-scenario handlers — every +scenario sees the same shipping policy, so a policy regression (like +the `rm -rfv` evasion fixed in 9713703) gets caught end-to-end here, +not just at the regex unit-test level. + +Why this exists alongside `tests/test_live.py`: + - `test_live.py` uses the real `example_guardian` subprocess and + asserts on side-effect counters (executed["count"]). That proves + enforcement works. + - This script ALSO asserts on the wire envelopes received by the + Guardian: every envelope HMAC-signed, validates against the + canonical ACS JSON Schema (not against fixture shapes — Rock's + point), arguments wrapped per tool-call-request.json:26-37, + handshake fires exactly once per session, lifecycle stream emits + the 4 boundary hooks (sessionStart, userMessage, agentResponse, + sessionEnd) on WORKFLOW_START/END. + - And it asserts the DESTRUCTIVE scenario the unit tests can't: + `rm -rf` against a real victim directory with a canary file. If + the file disappears, the enforcement was theatre — the function + actually ran. The unit test only checks the counter. + +Fully automated. NAT runs in-process, so unlike the Cursor e2e check +there is no operator-in-the-loop: just run and watch. + +Prerequisites: + - nvidia-nat-core installed (>= 1.7.0) + - The canonical ACS schemas at $ACS_SPEC_DIR + (default /tmp/acs-spec-source/specification/v0.1.0/) +""" +from __future__ import annotations + +import asyncio +import json +import os +import sys +import tempfile +import uuid +from pathlib import Path + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE +COMMON_DIR = HERE.parent / "_common" +EXAMPLE_GUARDIAN_DIR = HERE.parent / "example-guardian" +SPEC_DIR_DEFAULT = Path(os.environ.get( + "ACS_SPEC_DIR", "/tmp/acs-spec-source/specification/v0.1.0")) + +sys.path.insert(0, str(COMMON_DIR)) +sys.path.insert(0, str(EXAMPLE_GUARDIAN_DIR)) +sys.path.insert(0, str(ADAPTER_DIR)) + +import acs_common # noqa: E402 +from test_harness import ( # noqa: E402 + ProgrammableGuardian, + validate_request_envelope, +) +from e2e_report import ( # noqa: E402 + Report, real_policy_handler, + assert_envelopes_signed_and_valid as _assert_envelopes_signed_and_valid, +) +# REAL example-Guardian policy. Installed once for the whole run so every +# scenario sees the same shipping policy (no synthetic per-scenario +# handlers, no operator footgun like the one that bit us in Cursor's +# original e2e). +from example_guardian import evaluate_step # noqa: E402 + +HMAC_SECRET = "e2e-test-shared-secret-not-for-production" + +try: + from nat.middleware.middleware import FunctionMiddlewareContext # type: ignore[import-not-found] + _NAT_OK = True +except ImportError: + _NAT_OK = False + + +# ────────────────────────────────────────────────────────────────────── +# Shared helpers +# ────────────────────────────────────────────────────────────────────── + +def _build_middleware(guardian_url: str, *, default_deny: bool = True, + session_id: str | None = None): + """Construct an ACSMiddleware with the same env the operator uses + in production (HMAC secret on; signs every envelope).""" + os.environ["ACS_HMAC_SECRET"] = HMAC_SECRET + from acs_adapter import ACSMiddleware, ACSMiddlewareConfig # noqa: E402 + return ACSMiddleware(ACSMiddlewareConfig( + guardian_url=guardian_url, + default_deny=default_deny, + session_id=session_id or str(uuid.uuid4()), + )) + + +def _ctx(tool_name: str): + return FunctionMiddlewareContext( + name=tool_name, config=None, description=None, + input_schema=None, + single_output_schema=type(None), + stream_output_schema=type(None), + ) + + +# Local closure: bind `validate_request_envelope` once so the per-scenario +# callsites stay short. The shared helper is type-agnostic so the +# adapter passes the canonical-schema validator in. +def _envelope_checks(guardian, sub_results: list) -> None: + _assert_envelopes_signed_and_valid( + guardian, validate_request_envelope, sub_results) + + +# ────────────────────────────────────────────────────────────────────── +# Scenarios +# ────────────────────────────────────────────────────────────────────── + +TOTAL_SCENARIOS = 5 + + +def scenario_allow(report: Report, guardian: ProgrammableGuardian) -> None: + marker = f"ACS_E2E_OK_{uuid.uuid4().hex[:8]}" + report.case(1, TOTAL_SCENARIOS, + f"ALLOW — benign Bash function executes; return value flows back") + report.field("Marker:", marker) + report.field("Expected:", "real policy allows; function executes once; " + "return value contains marker; " + "Guardian sees handshake + toolCallRequest + toolCallResult") + + guardian.reset() + mw = _build_middleware(f"http://127.0.0.1:{guardian.port}/acs", session_id="nat-e2e-allow") + executed = {"count": 0, "got_command": None} + + async def target(command: str) -> str: + executed["count"] += 1 + executed["got_command"] = command + return f"executed: {command} -> {marker}" + + result = asyncio.run(mw.function_middleware_invoke( + command=f"echo {marker}", call_next=target, context=_ctx("Bash"))) + + methods = [r.get("method", "") for r in guardian.received] + sub_results = [] + sub_results.append(("Function actually executed (counter = 1)", + executed["count"] == 1, f"count={executed['count']}")) + sub_results.append(("Function received the expected command", + executed["got_command"] == f"echo {marker}", + f"got={executed['got_command']!r}")) + sub_results.append(("Return value flows back through middleware", + marker in str(result), f"got={result!r}")) + sub_results.append(("Guardian received handshake/hello", + "handshake/hello" in methods, "")) + sub_results.append(("Guardian received steps/toolCallRequest", + "steps/toolCallRequest" in methods, "")) + _envelope_checks(guardian, sub_results) + + _dump_envelopes(report, guardian) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("allow-path", all(ok for _, ok, _ in sub_results)) + + +def scenario_destructive(report: Report, guardian: ProgrammableGuardian) -> None: + """The load-bearing scenario. The unit test asserts counter == 0; we + additionally write a canary file inside a victim dir, then ask the + middleware to rm -rf the victim. If the file vanishes despite the + Guardian's deny, the enforcement failed silently — the counter check + alone wouldn't catch it (counter == 0 if the function returns early + for any reason, including a bug). The canary makes it impossible to + false-pass.""" + victim = Path(tempfile.mkdtemp(prefix="acs-nat-e2e-victim-")) + canary = victim / "DO_NOT_DELETE.txt" + canary.write_text("if you see this after the scenario, the deny worked\n") + + report.case(2, TOTAL_SCENARIOS, + "DESTRUCTIVE — real policy blocks rm -rf; canary survives") + report.field("Victim dir:", str(victim)) + report.field("Canary:", str(canary)) + report.field("Expected:", "real policy denies via destructive_command regex; " + "function NOT executed; canary still on disk") + + guardian.reset() + mw = _build_middleware(f"http://127.0.0.1:{guardian.port}/acs", session_id="nat-e2e-destructive") + executed = {"count": 0} + + async def target(command: str) -> str: + executed["count"] += 1 + # If this ever runs despite the Guardian deny, it WILL delete the + # victim dir — the canary check below catches it. + import shutil + shutil.rmtree(victim, ignore_errors=True) + return "BUG: should not see this" + + from acs_adapter import ACSGuardianDenied # noqa: E402 + raised = None + try: + asyncio.run(mw.function_middleware_invoke( + command=f"rm -rf {victim}/", + call_next=target, + context=_ctx("Bash"), + )) + except ACSGuardianDenied as e: + raised = e + + sub_results = [] + sub_results.append(("Function NOT executed (counter == 0)", + executed["count"] == 0, + f"count={executed['count']}")) + sub_results.append(("ACSGuardianDenied raised (enforcement signaled)", + raised is not None, + f"raised={raised!r}")) + sub_results.append(("Canary file still exists " + "(rm did NOT execute — counterproof)", + canary.exists(), + "intact" if canary.exists() else "DESTROYED")) + deny_resp = next((r for r in guardian.sent + if r.get("result", {}).get("decision") == "deny" + and "destructive_command" + in (r.get("result", {}).get("reason_codes") or [])), + None) + sub_results.append(("Guardian returned deny with " + "reason_codes=['destructive_command']", + deny_resp is not None, + "destructive_command deny issued" if deny_resp + else "no matching deny found")) + _envelope_checks(guardian, sub_results) + + _dump_envelopes(report, guardian) + if deny_resp: + report.json_block("Guardian's destructive-command deny (verbatim)", + deny_resp["result"], truncate=160) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("destructive-policy-path", + all(ok for _, ok, _ in sub_results)) + + # cleanup if still around + if victim.exists(): + import shutil + shutil.rmtree(victim, ignore_errors=True) + + +def scenario_read_tool(report: Report, guardian: ProgrammableGuardian) -> None: + """READ-TOOL — different tool name, same wire contract. Confirms the + adapter wraps every argument as {value: ...} per tool-call-request.json:26-37 + regardless of the tool. Rock-style: validate against the canonical + schema, not against what we think the shape should be.""" + probe_dir = Path(tempfile.mkdtemp(prefix="acs-nat-e2e-read-")) + probe = probe_dir / "probe.txt" + probe.write_text("ACS NAT read-tool probe\n") + + report.case(3, TOTAL_SCENARIOS, + "READ TOOL — different tool, same wire contract") + report.field("Probe file:", str(probe)) + report.field("Expected:", "Read tool args wrapped per tool-call-request.json:26-37; " + "envelope.arguments.file_path.value matches probe path") + + guardian.reset() + mw = _build_middleware(f"http://127.0.0.1:{guardian.port}/acs", session_id="nat-e2e-read") + executed = {"count": 0} + + async def target(file_path: str) -> str: + executed["count"] += 1 + return Path(file_path).read_text() + + result = asyncio.run(mw.function_middleware_invoke( + file_path=str(probe), call_next=target, context=_ctx("Read"))) + + read_envs = [r for r in guardian.received + if r.get("method") == "steps/toolCallRequest" + and r["params"]["payload"].get("tool", {}).get("name") == "Read"] + sub_results = [] + sub_results.append(("Function executed", + executed["count"] == 1, f"count={executed['count']}")) + sub_results.append(("Guardian received a Read toolCallRequest", + bool(read_envs), f"{len(read_envs)} found")) + if read_envs: + args = read_envs[0]["params"]["payload"].get("arguments", {}) + sub_results.append( + ("Arguments wrapped per tool-call-request.json:26-37", + bool(args) and all(isinstance(v, dict) and "value" in v + for v in args.values()), + f"args={list(args.keys())}")) + fp_value = args.get("file_path", {}).get("value", "") \ + if isinstance(args.get("file_path"), dict) else "" + sub_results.append( + ("file_path argument value matches probe path", + str(fp_value) == str(probe), + f"value={fp_value!r}")) + _envelope_checks(guardian, sub_results) + + _dump_envelopes(report, guardian) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("read-tool-path", all(ok for _, ok, _ in sub_results)) + + import shutil + shutil.rmtree(probe_dir, ignore_errors=True) + + +def scenario_handshake_once(report: Report, guardian: ProgrammableGuardian) -> None: + """HANDSHAKE — fires exactly ONCE per ACSMiddleware instance (§4). + Reuse the SAME middleware across 3 invocations; assert exactly one + handshake/hello envelope arrived. Subsequent calls must skip the + round-trip (cached in process memory).""" + report.case(4, TOTAL_SCENARIOS, + "HANDSHAKE — fires exactly ONCE per middleware instance (§4)") + report.field("Expected:", + "3 sequential function calls on the same middleware; " + "Guardian sees exactly 1 handshake/hello across all 3") + + guardian.reset() + mw = _build_middleware(f"http://127.0.0.1:{guardian.port}/acs", session_id="nat-e2e-handshake") + executed = {"count": 0} + + async def target(command: str) -> str: + executed["count"] += 1 + return f"ran #{executed['count']}: {command}" + + for i in range(3): + asyncio.run(mw.function_middleware_invoke( + command=f"echo call-{i}", + call_next=target, + context=_ctx("Bash"), + )) + + handshakes = [r for r in guardian.received + if r.get("method") == "handshake/hello"] + pretools = [r for r in guardian.received + if r.get("method") == "steps/toolCallRequest"] + posttools = [r for r in guardian.received + if r.get("method") == "steps/toolCallResult"] + + _dump_envelopes(report, guardian) + sub_results = [ + ("Exactly 1 handshake/hello across 3 invocations", + len(handshakes) == 1, f"got {len(handshakes)}"), + ("3 toolCallRequest envelopes (one per invocation)", + len(pretools) == 3, f"got {len(pretools)}"), + ("3 toolCallResult envelopes (one per invocation)", + len(posttools) == 3, f"got {len(posttools)}"), + ("All 3 function calls actually executed", + executed["count"] == 3, f"count={executed['count']}"), + ] + _envelope_checks(guardian, sub_results) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("handshake-once-per-session", + all(ok for _, ok, _ in sub_results)) + + +def scenario_lifecycle_observability(report: Report, guardian: ProgrammableGuardian) -> None: + """LIFECYCLE — confirms the IntermediateStepManager subscription + actually emits sessionStart + userMessage + agentResponse + sessionEnd + on WORKFLOW_START / WORKFLOW_END. + + This is the OBSERVABILITY backstop we documented in the README: if + YAML wiring misses a function, the lifecycle stream still surfaces + the call to the Guardian. If THIS scenario fails, the backstop + doesn't actually backstop — we'd be lying in the docs. + """ + report.case(5, TOTAL_SCENARIOS, + "LIFECYCLE — IntermediateStepManager emits boundary hooks") + report.field("Expected:", + "WORKFLOW_START fires steps/sessionStart + steps/userMessage; " + "WORKFLOW_END fires steps/agentResponse + steps/sessionEnd") + + guardian.reset() + + # Mirror test_lifecycle.py's setup: ContextState.get() singleton, + # IntermediateStepManager, subscribe the middleware's _on_intermediate_step + # callback directly (no Context._current_context dance — that attribute + # is internal and not stable across NAT versions). + try: + from nat.builder.intermediate_step_manager import IntermediateStepManager # type: ignore[import-not-found] + from nat.builder.context import ContextState # type: ignore[import-not-found] + from nat.data_models.intermediate_step import ( # type: ignore[import-not-found] + IntermediateStepPayload, IntermediateStepType, StreamEventData, + ) + except ImportError as e: + report.sub("nvidia-nat-core importable for lifecycle test", + False, f"missing: {e}") + report.finish("lifecycle-observability", False) + return + + workflow_input = f"hello-from-nat-e2e-{uuid.uuid4().hex[:6]}" + workflow_output = f"acs-nat-output-{uuid.uuid4().hex[:6]}" + + mw = _build_middleware(f"http://127.0.0.1:{guardian.port}/acs", + session_id="nat-e2e-lifecycle") + ctx_state = ContextState.get() + mgr = IntermediateStepManager(ctx_state) + sub = mgr.subscribe(on_next=mw._on_intermediate_step) + try: + wf_uuid = str(uuid.uuid4()) + # START and END of a span share the same UUID + mgr.push_intermediate_step(IntermediateStepPayload( + UUID=wf_uuid, + event_type=IntermediateStepType.WORKFLOW_START, + name="e2e-workflow", + data=StreamEventData(input=workflow_input), + )) + mgr.push_intermediate_step(IntermediateStepPayload( + UUID=wf_uuid, + event_type=IntermediateStepType.WORKFLOW_END, + name="e2e-workflow", + data=StreamEventData(output=workflow_output), + )) + finally: + try: + sub.dispose() + except Exception: # noqa: BLE001 + pass + + # Give async pumps a moment to flush + import time as _time + _time.sleep(0.2) + + methods = [r.get("method", "") for r in guardian.received] + session_start = [r for r in guardian.received + if r.get("method") == "steps/sessionStart"] + user_msg = [r for r in guardian.received + if r.get("method") == "steps/userMessage"] + agent_resp = [r for r in guardian.received + if r.get("method") == "steps/agentResponse"] + session_end = [r for r in guardian.received + if r.get("method") == "steps/sessionEnd"] + + input_in_payload = any( + workflow_input in json.dumps(r.get("params", {}).get("payload", {})) + for r in user_msg) + output_in_payload = any( + workflow_output in json.dumps(r.get("params", {}).get("payload", {})) + for r in agent_resp) + + _dump_envelopes(report, guardian) + sub_results = [ + ("WORKFLOW_START emitted steps/sessionStart", + bool(session_start), f"{len(session_start)} found"), + ("WORKFLOW_START emitted steps/userMessage with input text", + bool(user_msg) and input_in_payload, + "input present" if input_in_payload else "input MISSING"), + ("WORKFLOW_END emitted steps/agentResponse with output text", + bool(agent_resp) and output_in_payload, + "output present" if output_in_payload else "output MISSING"), + ("WORKFLOW_END emitted steps/sessionEnd", + bool(session_end), f"{len(session_end)} found"), + ] + _envelope_checks(guardian, sub_results) + for label, ok, detail in sub_results: + report.sub(label, ok, detail) + report.finish("lifecycle-observability", + all(ok for _, ok, _ in sub_results)) + + +# ────────────────────────────────────────────────────────────────────── +# Output helpers +# ────────────────────────────────────────────────────────────────────── + +def _dump_envelopes(report: Report, guardian: ProgrammableGuardian) -> None: + if not guardian.received: + return + print(f" ── Envelopes received (in order)") + for r in guardian.received: + method = r.get("method", "") + rid = r.get("params", {}).get("request_id", "")[:8] + print(f" {method:35s} req={rid}…") + + +# ────────────────────────────────────────────────────────────────────── +# Entry point +# ────────────────────────────────────────────────────────────────────── + +def main() -> int: + if not _NAT_OK: + sys.stderr.write( + "FATAL: nvidia-nat-core not installed.\n" + "Install: pip install nvidia-nat-core\n") + return 2 + if not SPEC_DIR_DEFAULT.exists(): + sys.stderr.write( + f"FATAL: canonical schemas missing at {SPEC_DIR_DEFAULT}\n" + "Set ACS_SPEC_DIR to a clone of " + "Agent-Control-Standard/ACS/specification/v0.1.0/\n") + return 2 + + guardian = ProgrammableGuardian(hmac_secret=HMAC_SECRET) + guardian.handlers["__default__"] = real_policy_handler(evaluate_step) + guardian.start() + + report = Report() + report.print_header( + "ACS NAT adapter — REAL end-to-end conformance check", + "", + "Drives the real ACSMiddleware against a recording Guardian", + "wired to the REAL example_guardian.evaluate_step policy.", + "Fully automated — NAT runs in-process, no operator action.", + "", + f"Spec source : {SPEC_DIR_DEFAULT}", + f"Adapter : {ADAPTER_DIR}/acs_adapter.py", + f"Guardian : ProgrammableGuardian + real evaluate_step()", + "", + f"{TOTAL_SCENARIOS} scenarios.", + ) + + try: + scenario_allow(report, guardian) + scenario_destructive(report, guardian) + scenario_read_tool(report, guardian) + scenario_handshake_once(report, guardian) + scenario_lifecycle_observability(report, guardian) + finally: + guardian.stop() + + return 0 if report.summary("YOUR NAT INSTALL IS ACS-CONFORMANT") else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/nat/mapping.md b/adapters/nat/mapping.md new file mode 100644 index 0000000..ce78372 --- /dev/null +++ b/adapters/nat/mapping.md @@ -0,0 +1,84 @@ +# NVIDIA Agent Toolkit (NAT) → ACS mapping + +Schema source: NAT public repo (`packages/nvidia_nat_core/src/nat/middleware/`). + +NAT's architecture differs from Claude Code and Cursor: rather than firing events at named lifecycle points, NAT wraps every Function (tool / sub-workflow / LLM / retriever / memory operation / etc.) with a `Middleware` chain. The middleware's `pre_invoke` is called before the function executes, `post_invoke` after. + +The ACS adapter is implemented as a `FunctionMiddleware` that, for each wrapped call, emits an ACS step request. + +## NAT lifecycle → ACS step method + +| NAT concept | ACS step method | +|---|---| +| Tool function `pre_invoke` | `steps/toolCallRequest` | +| Tool function `post_invoke` | `steps/toolCallResult` | +| LLM `pre_invoke` | `steps/toolCallRequest` (tool name = "LLM:`provider`:`model`") | +| LLM `post_invoke` | `steps/toolCallResult` | +| Retriever `pre_invoke` | `steps/knowledgeRetrieval` (the adapter treats `retrievers` as knowledge-retrieval calls when target_function points to a retriever group) | +| Memory read | `steps/memoryContextRetrieval` | +| Memory write | `steps/memoryStore` | +| Sub-workflow invocation | `steps/subagentStart` / `steps/subagentStop` (NAT models sub-workflows as nested functions) | +| Workflow entry | `steps/sessionStart` (when attaching at workflow level with a `sessionStart` semantic — typically configured via `target_function_or_group: ` plus dispatch logic in the Guardian based on tool name) | + +The minimal adapter in `acs_adapter.py` emits `steps/toolCallRequest` and `steps/toolCallResult` for every wrapped function call. The Guardian can dispatch on the tool name to apply different policy. Splitting into separate ACS step methods (e.g. `steps/knowledgeRetrieval` for retriever calls) is a configuration choice handled in the adapter's `pre_invoke` based on `function_context.name`; the example adapter uses a single method to keep the round-trip simple. + +### Lifecycle hooks are observation-only + +The adapter's `_on_intermediate_step` subscriber emits `steps/sessionStart` + `steps/userMessage` on `WORKFLOW_START` and `steps/agentResponse` + `steps/sessionEnd` on `WORKFLOW_END` via `_emit_lifecycle_hook`. **This path is fire-and-forget.** NAT's `IntermediateStepManager` subscription model is a notification stream — a subscriber callback cannot veto the event after the fact. So although ACS-Core §hooks.md lists `agentResponse` as decision-eligible (ALLOW / DENY / MODIFY), the adapter cannot actually block or rewrite the workflow's output through this path. The envelopes are for trace + audit; enforcement on outputs must be placed at `pre_invoke` / `post_invoke` of the function that produced them, or as a separate review function in the YAML the workflow calls before returning. + +## ACS disposition → NAT behavior + +| ACS disposition | NAT action | +|---|---| +| `allow` | `pre_invoke` returns `None` (proceed unchanged) | +| `deny` | `pre_invoke` raises `ACSGuardianDenied` (NAT 1.7.0) or sets `context.action = InvocationAction.SKIP` (NAT dev). NAT's runtime documents both: *"Raises: Any exception to abort execution"* and the action-based equivalent. | +| `modify` | If `modifications.parameter_overrides` is present and is a dict, the adapter updates `context.modified_kwargs` in place and returns the context. NAT's runtime invokes the function with the modified kwargs. If `modified_content` is present on a post-tool result, the adapter sets `context.output`. | +| `ask` | Substituted to block at the middleware boundary in v1 (NAT has no native pause primitive at the function-middleware layer). Deployments wanting ASK should compose with NAT's HITL middleware (`nat.middleware.hitl`) and have the Guardian resolve before responding. | +| `defer` | Substituted to block in v1 (same reason). | + +## Configuration + +Middleware is registered with NAT via the `name=` class kwarg on the config: + +```python +class ACSMiddlewareConfig(FunctionMiddlewareBaseConfig, name="acs_guardian"): + ... +``` + +NAT picks this up via its `register_middleware` registration mechanism (the adapter ships with `@register_middleware(config_type=ACSMiddlewareConfig)` applied to a factory function). + +YAML wiring: + +```yaml +middleware: + acs: + _type: acs_guardian # matches the name= kwarg above + guardian_url: http://127.0.0.1:8787/acs + default_deny: true + target_function_or_group: my_tools # optional; otherwise applied via group/workflow membership + target_location: input # NAT-standard field + session_id: null # adapter generates one per process + +function_groups: + my_tools: + middleware: [acs] + +workflow: + _type: react_agent + middleware: [acs] +``` + +## Composition with NAT's defense middleware + +NAT ships `nvidia-nat-security` with `defense_middleware` for content-level checks (PII, output verification, pre-tool LLM gating). The ACS adapter does NOT replace these — both can be attached to the same group. Ordering is by list position in YAML; place ACS first if you want the policy gate before content filters, last if you want content rewrites to be visible to ACS as the final state. + +## Conformance posture + +The NAT adapter implements ACS-Core's mandatory floor: + +- Hook taxonomy: every wrapped function call surfaces as `toolCallRequest` + `toolCallResult`. +- Dispositions: ALLOW / DENY / MODIFY supported normatively; ASK / DEFER substituted to DENY with audit (HITL composition is the recommended path). +- SessionContext: `session_id` sent on every request. +- Replay protection: `request_id` UUID + timestamp. +- Decision honoring: NAT's middleware contract guarantees the function does not execute if `pre_invoke` raises or sets SKIP. +- Baseline integrity: deferred to transport layer in this minimal adapter. diff --git a/adapters/nat/requirements.txt b/adapters/nat/requirements.txt new file mode 100644 index 0000000..de1944a --- /dev/null +++ b/adapters/nat/requirements.txt @@ -0,0 +1,9 @@ +# Runtime dependency for the NAT adapter. +# Pinned to the public release that ships ACSMiddlewareConfig's required +# base classes. The adapter feature-detects InvocationAction.SKIP and falls +# back to raising ACSGuardianDenied on releases that don't expose it. +nvidia-nat-core==1.7.0 + +# wire.py / --check uses ruamel.yaml so YAML round-trips preserve user +# comments, key order, and formatting (pyyaml strips all three). +ruamel.yaml>=0.17 diff --git a/adapters/nat/tests/__init__.py b/adapters/nat/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/adapters/nat/tests/example_payloads.md b/adapters/nat/tests/example_payloads.md new file mode 100644 index 0000000..171dec8 --- /dev/null +++ b/adapters/nat/tests/example_payloads.md @@ -0,0 +1,158 @@ +# Real-world payload examples — NAT (NVIDIA Agent Toolkit) + +NAT's adapter doesn't see a wire-format event the way Claude Code and Cursor adapters do. NAT calls the adapter's `pre_invoke` / `post_invoke` methods in-process with a real Python `InvocationContext` object. The examples below show: + +1. **What the adapter receives** (the `InvocationContext` it gets from NAT) +2. **What the adapter sends to the Guardian** (ACS JSON-RPC, same shape as the other adapters) +3. **What the adapter does to NAT's context** (set action, mutate kwargs, etc.) + +All masked. Identifying fields use placeholders. + +--- + +## 1. What the adapter receives from NAT + +NAT constructs an `InvocationContext` and passes it to `pre_invoke(ctx)`. Example for a Bash tool call: + +```python +InvocationContext( + function_context=FunctionMiddlewareContext( + name="Bash", + config=None, + description=None, + input_schema=None, + single_output_schema=NoneType, + stream_output_schema=NoneType, + ), + original_args=(), + original_kwargs={"command": "echo hello"}, + modified_args=(), + modified_kwargs={"command": "echo hello"}, + output=None, + action=None, +) +``` + +In `post_invoke`, the same context arrives with `output` populated: + +```python +InvocationContext( + function_context=FunctionMiddlewareContext(name="Bash", ...), + original_args=(), + original_kwargs={"command": "echo hello"}, + modified_args=(), + modified_kwargs={"command": "echo hello"}, + output="hello\n", + action=None, +) +``` + +NAT's runtime calls `function_middleware_invoke` which orchestrates: build context → `pre_invoke` → call function → `post_invoke`. See `packages/nvidia_nat_core/src/nat/middleware/function_middleware.py`. + +--- + +## 2. What the adapter sends to the Guardian + +The same ACS JSON-RPC shape as the other adapters. The adapter constructs this from the `InvocationContext`: + +### pre_invoke → `steps/toolCallRequest` + +```json +{ + "jsonrpc": "2.0", + "id": "00000000-0000-0000-0000-000000000001", + "method": "steps/toolCallRequest", + "params": { + "session_id": "nat-00000000000000", + "step_id": "00000000-0000-0000-0000-000000000002", + "tool": { + "name": "Bash", + "arguments": {"command": "echo hello"} + } + }, + "acs_version": "0.1.0", + "request_id": "00000000-0000-0000-0000-000000000003", + "timestamp": 1718450000000, + "metadata": {"source": "acs-adapter-nat"} +} +``` + +### post_invoke → `steps/toolCallResult` + +```json +{ + "jsonrpc": "2.0", + "id": "00000000-0000-0000-0000-000000000004", + "method": "steps/toolCallResult", + "params": { + "session_id": "nat-00000000000000", + "step_id": "00000000-0000-0000-0000-000000000005", + "tool": {"name": "Bash", "arguments": {"command": "echo hello"}}, + "result": "hello\n" + }, + "acs_version": "0.1.0", + "request_id": "00000000-0000-0000-0000-000000000006", + "timestamp": 1718450001234, + "metadata": {"source": "acs-adapter-nat"} +} +``` + +`session_id` is auto-generated per process unless `session_id` is set in `workflow.yml`. + +--- + +## 3. What the adapter does to NAT's context after the Guardian responds + +### Allow (Guardian returns `{"decision": "allow"}`) + +```python +# pre_invoke returns None → NAT proceeds with the call unchanged +return None +``` + +### Deny on NAT dev branch (has InvocationAction.SKIP) + +```python +context.action = InvocationAction.SKIP +return context +# NAT runtime: skips the function call, returns None +``` + +### Deny on NAT 1.7.0 (public release, no InvocationAction) + +```python +raise ACSGuardianDenied("destructive Bash pattern in: rm -rf /home/u") +# NAT runtime: documented "Raises: Any exception to abort execution" +``` + +The adapter feature-detects which mechanism NAT exposes and prefers the action-based path when available. + +### Modify input (Guardian returns `parameter_overrides`) + +```python +context.modified_kwargs.update({"command": "echo hello # sanitized"}) +return context +# NAT runtime: invokes the function with the modified kwargs +``` + +### Modify output (Guardian returns `modified_content` in post_invoke) + +```python +context.output = "" +return context +# NAT runtime: propagates the modified output as if the function returned it +``` + +--- + +## Masking convention used here + +| Field | Real value contains | Masked as | +|---|---|---| +| `session_id` | Auto-generated `nat-` or deployment-defined | `nat-00000000000000` | +| `step_id`, `request_id`, `id` | Real UUIDs (per-request) | `00000000-0000-0000-0000-00000000000X` | +| `timestamp` | Real epoch ms | Synthetic value | +| `tool.arguments.command` | Real command (sometimes preserved when benign) | Preserved or `` | +| `result` | Real tool output | Preserved or `` | + +No real session data is committed to this repo. diff --git a/adapters/nat/tests/test_adapter.py b/adapters/nat/tests/test_adapter.py new file mode 100644 index 0000000..2375510 --- /dev/null +++ b/adapters/nat/tests/test_adapter.py @@ -0,0 +1,281 @@ +""" +Integration tests for the ACS NAT middleware against the installed +nvidia-nat-core package (NAT 1.7.0+). + +These tests construct a real NAT InvocationContext, run the adapter's +pre_invoke / post_invoke through the actual NAT middleware machinery, +and assert the round-trip behavior against a live example Guardian. + +Requires: + pip install nvidia-nat-core +""" +from __future__ import annotations + +import asyncio +import os +import json +import socket +import subprocess +import sys +import time +import unittest +from pathlib import Path + +# Skip cleanly if NAT isn't installed in the environment running tests +try: + from nat.middleware.middleware import ( + InvocationContext, + FunctionMiddlewareContext, + ) + _NAT_OK = True +except ImportError: + _NAT_OK = False + +# Import adapter (it tolerates NAT missing) +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from acs_adapter import ACSMiddleware, ACSGuardianDenied # noqa: E402 + +if _NAT_OK: + from acs_adapter import ACSMiddlewareConfig # noqa: E402 + + +HERE = Path(__file__).resolve().parent +GUARDIAN = HERE.parent.parent / "example-guardian" / "example_guardian.py" + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import free_port as _find_free_port, wait_port as _wait # noqa: E402 + + +def _make_context(tool_name: str, args: dict) -> "InvocationContext": + """Build a NAT InvocationContext that exercises the same path real NAT runtime would.""" + return InvocationContext( + function_context=FunctionMiddlewareContext( + name=tool_name, + config=None, + description=None, + input_schema=None, + single_output_schema=type(None), + stream_output_schema=type(None), + ), + original_args=(), + original_kwargs=args, + modified_args=(), + modified_kwargs=dict(args), + ) + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed in test environment") +class NATMiddlewareIntegration(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.port = _find_free_port() + env = os.environ.copy(); env["ACS_DEV_MODE"] = "1"; env.pop("ACS_HMAC_SECRET", None); env.pop("ACS_HMAC_SECRET_FILE", None) + cls.guardian_proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + cls.guardian_url = f"http://127.0.0.1:{cls.port}/acs" + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian_proc.terminate() + try: + cls.guardian_proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.guardian_proc.kill() + + def _make_middleware(self, default_deny: bool = True) -> ACSMiddleware: + cfg = ACSMiddlewareConfig( + guardian_url=self.guardian_url, + default_deny=default_deny, + session_id="nat-test", + ) + return ACSMiddleware(cfg) + + # ----- allow path ----- + + def test_safe_bash_passes_through(self) -> None: + mw = self._make_middleware() + ctx = _make_context("Bash", {"command": "ls -la"}) + result = asyncio.run(mw.pre_invoke(ctx)) + # allow -> return None (passthrough) + self.assertIsNone(result) + self.assertIsNone(ctx.action) if hasattr(ctx, "action") else None + + def test_safe_read_passes_through(self) -> None: + mw = self._make_middleware() + ctx = _make_context("Read", {"file_path": "/tmp/safe.txt"}) + result = asyncio.run(mw.pre_invoke(ctx)) + self.assertIsNone(result) + + # ----- deny path ----- + + def test_destructive_bash_blocks(self) -> None: + """Guardian denies destructive Bash; adapter blocks NAT invocation.""" + mw = self._make_middleware() + ctx = _make_context("Bash", {"command": "rm -rf /home/user"}) + + # On NAT 1.7.0 (no InvocationAction), block raises ACSGuardianDenied + try: + result = asyncio.run(mw.pre_invoke(ctx)) + except ACSGuardianDenied as e: + self.assertIn("destructive", str(e).lower()) + return # block via exception path (NAT 1.7.0) + + # On future NAT (has InvocationAction), block via context.action + from nat.middleware import middleware as m + if hasattr(m, "InvocationAction"): + self.assertIsNotNone(ctx.action) + self.assertEqual(ctx.action.value, "skip") + else: + self.fail("destructive Bash should have blocked") + + def test_write_to_protected_path_blocks(self) -> None: + mw = self._make_middleware() + ctx = _make_context("Write", {"file_path": "/etc/passwd", "content": "x"}) + try: + asyncio.run(mw.pre_invoke(ctx)) + from nat.middleware import middleware as m + self.assertTrue(hasattr(m, "InvocationAction") and ctx.action is not None) + except ACSGuardianDenied as e: + self.assertIn("protected", str(e).lower()) + + # ----- post_invoke ----- + + def test_post_invoke_allow_passes_through(self) -> None: + mw = self._make_middleware() + ctx = _make_context("Read", {"file_path": "/tmp/x"}) + ctx.output = "file contents" + result = asyncio.run(mw.post_invoke(ctx)) + self.assertIsNone(result) # allow + no modification + + # ----- fail posture ----- + + def test_guardian_unreachable_default_deny_blocks(self) -> None: + cfg = ACSMiddlewareConfig( + guardian_url="http://127.0.0.1:1/dead", + default_deny=True, + ) + mw = ACSMiddleware(cfg) + ctx = _make_context("Read", {"file_path": "/tmp/x"}) + try: + asyncio.run(mw.pre_invoke(ctx)) + from nat.middleware import middleware as m + self.assertTrue(hasattr(m, "InvocationAction") and ctx.action is not None) + except ACSGuardianDenied as e: + self.assertIn("unreachable", str(e).lower()) + + def test_guardian_unreachable_fail_open(self) -> None: + cfg = ACSMiddlewareConfig( + guardian_url="http://127.0.0.1:1/dead", + default_deny=False, + ) + mw = ACSMiddleware(cfg) + ctx = _make_context("Read", {"file_path": "/tmp/x"}) + result = asyncio.run(mw.pre_invoke(ctx)) + self.assertIsNone(result) # fail-open: proceed + + +class ExtractArgumentsFromInvocationContext(unittest.TestCase): + """Regression: NAT's middleware chain captures the function input as + `modified_args[0]` (a Pydantic model or similar, returned by + `Function._convert_input(value)`), NOT as `modified_kwargs`. The + original adapter read only from `modified_kwargs` — which is empty + on the LangChain react_agent path — so every `toolCallRequest` + envelope carried `arguments: {}`. A real LLM-driven `rm -rf` + against a sandbox directory ran to completion because the + Guardian's policy had no command string to inspect. + + `_extract_arguments` MUST flatten args from EVERY shape NAT may + use to capture the input. These tests don't need NAT installed — + the helper is duck-typed on the context. + """ + + def _ctx(self, *, modified_args=(), modified_kwargs=None, + input_schema=None): + """Build a duck-typed object matching what _extract_arguments reads.""" + from types import SimpleNamespace + return SimpleNamespace( + modified_args=tuple(modified_args), + modified_kwargs=dict(modified_kwargs or {}), + function_context=SimpleNamespace(input_schema=input_schema), + ) + + def test_pydantic_v2_model_in_modified_args_extracts_fields(self) -> None: + """The exact regression. LangChain react_agent → NAT → Pydantic + model in modified_args[0] → adapter must surface field values.""" + try: + from pydantic import BaseModel + except ImportError: + self.skipTest("pydantic not installed") + from acs_adapter import _extract_arguments + + class ShellInput(BaseModel): + command: str + + ctx = self._ctx(modified_args=(ShellInput(command="rm -rf /tmp/x/"),)) + args = _extract_arguments(ctx) + self.assertEqual(args.get("command"), "rm -rf /tmp/x/", + "REGRESSION: LLM-driven rm -rf bypassed Guardian because adapter " + "ignored Pydantic input in modified_args[0]; field 'command' " + "must be extracted so policy can match destructive patterns") + + def test_plain_dict_in_modified_args_extracts_keys(self) -> None: + """NAT may also pass a raw dict if the function takes one.""" + from acs_adapter import _extract_arguments + ctx = self._ctx(modified_args=({"command": "echo hi"},)) + self.assertEqual(_extract_arguments(ctx), {"command": "echo hi"}) + + def test_modified_kwargs_still_works(self) -> None: + """Existing path (named kwargs, e.g. from direct middleware tests) + must still work — this is the path the integration tests above use.""" + from acs_adapter import _extract_arguments + ctx = self._ctx(modified_kwargs={"file_path": "/tmp/x"}) + self.assertEqual(_extract_arguments(ctx), {"file_path": "/tmp/x"}) + + def test_kwargs_and_args_both_present_kwargs_first(self) -> None: + """If both shapes are populated, both should appear in the result.""" + from acs_adapter import _extract_arguments + ctx = self._ctx(modified_args=({"command": "ls"},), + modified_kwargs={"timeout_s": 5}) + out = _extract_arguments(ctx) + self.assertEqual(out.get("command"), "ls") + self.assertEqual(out.get("timeout_s"), 5) + + def test_scalar_arg_with_schema_uses_field_name(self) -> None: + """If the arg is a scalar (e.g. single string), name it after + the input schema's first field — better than 'arg0' on the wire.""" + try: + from pydantic import BaseModel + except ImportError: + self.skipTest("pydantic not installed") + from acs_adapter import _extract_arguments + + class Schema(BaseModel): + command: str + + ctx = self._ctx(modified_args=("ls -la",), input_schema=Schema) + self.assertEqual(_extract_arguments(ctx).get("command"), "ls -la") + + def test_empty_context_returns_empty(self) -> None: + """No args, no kwargs, no schema → empty dict, not a crash.""" + from acs_adapter import _extract_arguments + self.assertEqual(_extract_arguments(self._ctx()), {}) + + def test_dataclass_in_modified_args(self) -> None: + from acs_adapter import _extract_arguments + from dataclasses import dataclass + + @dataclass + class ShellInput: + command: str + + ctx = self._ctx(modified_args=(ShellInput(command="ls"),)) + self.assertEqual(_extract_arguments(ctx).get("command"), "ls") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/nat/tests/test_dispositions_live.py b/adapters/nat/tests/test_dispositions_live.py new file mode 100644 index 0000000..877f7d9 --- /dev/null +++ b/adapters/nat/tests/test_dispositions_live.py @@ -0,0 +1,254 @@ +"""End-to-end verification that EVERY ACS disposition (ALLOW, DENY, +MODIFY, ASK, DEFER, post_invoke DENY) is honored by the adapter when +NAT's middleware chain captures the input as `modified_args[0]` — the +shape every LangChain-based agent (react_agent, langgraph, etc.) uses +in production. The previous test suite drove `modified_kwargs` only, +masking a class of silent-bypass bugs (the `arguments: {}` envelope +we hit in the live Vertex run). + +Each test: + 1. Spawns a `ProgrammableGuardian` configured to return a specific + disposition for the toolCallRequest. + 2. Builds a real `ACSMiddleware` against it. + 3. Calls `pre_invoke` with input wrapped as a Pydantic model in + `modified_args` (the LangChain shape). + 4. Asserts the disposition was honored — function input was rewritten + for MODIFY, exception was raised for DENY/ASK/DEFER, output was + redacted for post_invoke DENY. + +Without these tests, MODIFY / output-redaction can silently drop on +the agent path and ship to production as theatre. +""" +from __future__ import annotations + +import asyncio +import sys +import unittest +from pathlib import Path +from types import SimpleNamespace + +try: + from nat.middleware.middleware import ( # type: ignore[import-not-found] + InvocationContext, FunctionMiddlewareContext, + ) + from pydantic import BaseModel + _NAT_OK = True +except ImportError: + _NAT_OK = False + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) + +from acs_adapter import ACSMiddleware, ACSGuardianDenied # noqa: E402 +if _NAT_OK: + from acs_adapter import ACSMiddlewareConfig # noqa: E402 + +from test_harness import ProgrammableGuardian # noqa: E402 + + +HMAC = "dispositions-live-shared-secret" + + +def _make_ctx_with_pydantic_input(tool_name: str, model_cls, model_instance): + """Build an InvocationContext that mirrors what NAT's middleware + chain produces when the LangChain wrapper calls + `Function.acall_invoke(**kwargs)` — input lives in modified_args[0] + as a Pydantic model, modified_kwargs is empty.""" + return InvocationContext( + function_context=FunctionMiddlewareContext( + name=tool_name, + config=None, + description=None, + input_schema=model_cls, + single_output_schema=type(None), + stream_output_schema=type(None), + ), + original_args=(model_instance,), + original_kwargs={}, + modified_args=[model_instance], + modified_kwargs={}, + ) + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed") +class DispositionsLive(unittest.TestCase): + """Run-once Guardian; per-test handler override.""" + + @classmethod + def setUpClass(cls) -> None: + cls.guardian = ProgrammableGuardian(hmac_secret=HMAC) + cls.guardian.start() + cls.url = f"http://127.0.0.1:{cls.guardian.port}/acs" + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian.stop() + + def _mw(self, *, default_deny: bool = True) -> ACSMiddleware: + import os + # Restore env after the test — adapter reads ACS_HMAC_SECRET at + # call time, and leaking this into adjacent tests (test_live's + # ACS_DEV_MODE=1 no-signing setup) makes the next test pass + # signed envelopes to an unsigned Guardian → response signature + # invalid. addCleanup runs even on test failure. + prior = os.environ.get("ACS_HMAC_SECRET") + os.environ["ACS_HMAC_SECRET"] = HMAC + def _restore(): + if prior is None: + os.environ.pop("ACS_HMAC_SECRET", None) + else: + os.environ["ACS_HMAC_SECRET"] = prior + self.addCleanup(_restore) + return ACSMiddleware(ACSMiddlewareConfig( + guardian_url=self.url, default_deny=default_deny, + session_id="dispositions-live")) + + def setUp(self) -> None: + self.guardian.reset() + + def _set_response(self, method: str, response: dict) -> None: + def handler(req): + params = req.get("params", {}) or {} + return { + **response, + "request_id": params.get("request_id", ""), + "chain_hash": "0" * 64, + "acs_version": "0.1.0", + "type": "final", + } + self.guardian.handlers[method] = handler + + # ──────────────────────────────────────────────────────────────── + # MODIFY — Guardian rewrites the command; the function must run + # with the OVERRIDE value, not the original + # ──────────────────────────────────────────────────────────────── + + def test_modify_overrides_pydantic_model_input(self) -> None: + """REGRESSION: Adapter wrote overrides to context.modified_kwargs, + but NAT's actual call uses context.modified_args[0] (the Pydantic + model). MODIFY was silently dropped — Guardian saying 'rewrite + rm -rf to echo safe' had ZERO effect. Agent ran the original + dangerous command. The test below confirms the override + actually reaches the function input.""" + + class ShellInput(BaseModel): + command: str + + original = ShellInput(command="rm -rf /tmp/secret-data/") + ctx = _make_ctx_with_pydantic_input("Bash", ShellInput, original) + + self._set_response("steps/toolCallRequest", { + "decision": "modify", + "reasoning": "rewrite to safe command", + "modifications": {"parameter_overrides": {"command": "echo SAFE"}}, + }) + + mw = self._mw() + result = asyncio.run(mw.pre_invoke(ctx)) + + # The modification must reach the input that _ainvoke would actually + # call the function with. NAT calls call_next(*modified_args, **modified_kwargs). + # The adapter must EITHER mutate modified_args[0] in place or replace it. + post_args = list(ctx.modified_args or []) + post_kwargs = dict(ctx.modified_kwargs or {}) + + # Flatten what _ainvoke would actually see + if post_args and hasattr(post_args[0], "command"): + effective = post_args[0].command + elif "command" in post_kwargs: + effective = post_kwargs["command"] + else: + self.fail("After MODIFY, no `command` reachable via modified_args[0] " + "or modified_kwargs — Guardian's parameter_overrides " + "silently dropped (this is the bug)") + + self.assertEqual(effective, "echo SAFE", + "REGRESSION: Guardian's MODIFY override did not reach the function " + "input. Adapter is writing to modified_kwargs, but NAT's call " + "uses modified_args[0]. The agent would run the ORIGINAL " + "(unsafe) command, defeating the purpose of MODIFY.") + + # ──────────────────────────────────────────────────────────────── + # ASK / DEFER substitution — per docs both substitute to DENY at + # the middleware boundary. Verify the function does NOT execute. + # ──────────────────────────────────────────────────────────────── + + def test_ask_substituted_to_deny(self) -> None: + class ShellInput(BaseModel): + command: str + ctx = _make_ctx_with_pydantic_input( + "Bash", ShellInput, ShellInput(command="ls")) + self._set_response("steps/toolCallRequest", { + "decision": "ask", "reasoning": "needs human approval", + }) + mw = self._mw(default_deny=True) + # Either raises ACSGuardianDenied or sets context.action = SKIP. + # In both cases the function MUST NOT execute. + try: + result = asyncio.run(mw.pre_invoke(ctx)) + # If no exception, must have signaled SKIP via action + action = getattr(ctx, "action", None) + self.assertIsNotNone(action, + "ASK was neither raised nor substituted to SKIP — function " + "would execute despite Guardian asking for human approval") + except ACSGuardianDenied: + pass # Expected on NAT 1.7.0 + + def test_defer_substituted_to_deny(self) -> None: + class ShellInput(BaseModel): + command: str + ctx = _make_ctx_with_pydantic_input( + "Bash", ShellInput, ShellInput(command="ls")) + self._set_response("steps/toolCallRequest", { + "decision": "defer", "reasoning": "policy postponed", + }) + mw = self._mw(default_deny=True) + try: + asyncio.run(mw.pre_invoke(ctx)) + action = getattr(ctx, "action", None) + self.assertIsNotNone(action, + "DEFER was neither raised nor substituted to SKIP") + except ACSGuardianDenied: + pass + + # ──────────────────────────────────────────────────────────────── + # post_invoke DENY — Guardian observes the tool result and demands + # redaction. The tool already ran; the OUTPUT must not flow. + # ──────────────────────────────────────────────────────────────── + + def test_post_invoke_deny_redacts_output(self) -> None: + """Guardian sees the toolCallResult and decides the output is + sensitive (e.g. an exfil leak). Adapter MUST clear context.output + so the agent doesn't see the secret. If this fails, the agent + gets the raw secret string anyway — silent leak.""" + class ShellInput(BaseModel): + command: str + ctx = _make_ctx_with_pydantic_input( + "Bash", ShellInput, ShellInput(command="cat /etc/secret")) + + # Simulate the tool already ran and produced sensitive output + ctx.output = "AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + + self._set_response("steps/toolCallResult", { + "decision": "deny", + "reasoning": "output contains a credential", + }) + + mw = self._mw() + asyncio.run(mw.post_invoke(ctx)) + + # The output must be cleared. If the secret still flows through + # context.output, downstream readers (the LLM, callers, logs) + # will see it. `output=None` is the canonical redaction signal; + # downstream code that needs to distinguish "no output" from + # "redacted" reads the ACS_AUDIT stderr log for the + # post_invoke_redacted event (extra attributes on + # InvocationContext are blocked by Pydantic validate_assignment). + self.assertIsNone(ctx.output, + "REGRESSION: post_invoke DENY did not clear context.output — " + "the secret/sensitive value flows through despite Guardian " + "demanding redaction") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/nat/tests/test_envelope_schema.py b/adapters/nat/tests/test_envelope_schema.py new file mode 100644 index 0000000..48b8bb4 --- /dev/null +++ b/adapters/nat/tests/test_envelope_schema.py @@ -0,0 +1,149 @@ +""" +Spec-validation tests for the NAT adapter. + +Validates the adapter's `_build_request()` output against the +canonical v0.1.0 `request-envelope.json` and the corresponding +per-hook schemas. Does NOT require nvidia-nat-core to be installed +(the adapter's helpers are importable without NAT). +""" +from __future__ import annotations + +import json +import os +import sys +import unittest +from pathlib import Path + +from jsonschema import Draft202012Validator +from jsonschema.validators import RefResolver + + +SPEC_DIR_DEFAULT = Path("/tmp/acs-spec-source/specification/v0.1.0") +SPEC_DIR = Path(os.environ.get("ACS_SPEC_DIR", str(SPEC_DIR_DEFAULT))) + +HERE = Path(__file__).resolve().parent +ADAPTER_DIR = HERE.parent +sys.path.insert(0, str(ADAPTER_DIR)) + +import acs_adapter # noqa: E402 + + +def _load_schema(name: str) -> dict: + with open(SPEC_DIR / name) as f: + return json.load(f) + + +def _validate(payload: dict, schema_name: str) -> list: + schema = _load_schema(schema_name) + resolver = RefResolver( + base_uri=(SPEC_DIR.as_uri() + "/" + schema_name), + referrer=schema, + ) + validator = Draft202012Validator( + schema, resolver=resolver, + format_checker=Draft202012Validator.FORMAT_CHECKER, + ) + return [ + f"{'.'.join(str(p) for p in err.absolute_path) or ''}: {err.message}" + for err in validator.iter_errors(payload) + ] + + +class _StubConfig: + """Minimal stand-in for ACSMiddlewareConfig so the test runs without NAT installed.""" + guardian_url = "http://127.0.0.1:8787/acs" + default_deny = True + session_id = "00000000-0000-0000-0000-000000000001" + timeout_s = 5.0 + target_function_or_group = "my_tools" + target_location = "input" + + +class SpecValidationSetUp(unittest.TestCase): + def setUp(self) -> None: + if not SPEC_DIR.exists(): + self.fail( + f"Canonical spec schemas not found at {SPEC_DIR}. " + "Clone Agent-Control-Standard/ACS and set ACS_SPEC_DIR. " + "Spec validation is non-negotiable; this is not a skip." + ) + # Bypass FunctionMiddleware.__init__ when NAT isn't available; instantiate + # the class directly with the stub config. + try: + self.mw = acs_adapter.ACSMiddleware(_StubConfig()) + except TypeError: + # NAT base class accepts no args in some versions; retry with default ctor + self.mw = object.__new__(acs_adapter.ACSMiddleware) + self.mw.__init__(_StubConfig()) + + +class EnvelopeMatchesV010Schema(SpecValidationSetUp): + def test_toolcallrequest_envelope_validates(self) -> None: + env = self.mw._build_request( + method="steps/toolCallRequest", + tool_name="search_web", + tool_arguments={"query": "ACS spec"}, + ) + errors = _validate(env, "request-envelope.json") + self.assertEqual(errors, [], + "envelope FAILS request-envelope.json:\n - " + "\n - ".join(errors)) + + def test_toolcallresult_envelope_validates(self) -> None: + env = self.mw._build_request( + method="steps/toolCallResult", + tool_name="search_web", + tool_arguments={"query": "ACS spec"}, + result={"hits": 3}, + ) + errors = _validate(env, "request-envelope.json") + self.assertEqual(errors, [], + "envelope FAILS request-envelope.json:\n - " + "\n - ".join(errors)) + + def test_timestamp_is_iso8601(self) -> None: + env = self.mw._build_request("steps/toolCallRequest", "x", {}) + ts = env["params"]["timestamp"] + self.assertIsInstance(ts, str) + import datetime as _dt + _dt.datetime.fromisoformat(ts.replace("Z", "+00:00")) + + def test_metadata_has_required_fields(self) -> None: + env = self.mw._build_request("steps/toolCallRequest", "x", {}) + meta = env["params"]["metadata"] + self.assertIn("agent_id", meta) + self.assertIn("session_id", meta) + self.assertTrue(meta["agent_id"]) + self.assertTrue(meta["session_id"]) + + +class PayloadMatchesHookSchema(SpecValidationSetUp): + def test_toolcallrequest_payload_validates(self) -> None: + env = self.mw._build_request( + method="steps/toolCallRequest", + tool_name="search_web", + tool_arguments={"query": "ACS", "limit": 10}, + ) + payload = env["params"]["payload"] + errors = _validate(payload, "hooks/tool-call-request.json") + self.assertEqual(errors, [], + "tool-call-request payload FAILS:\n - " + "\n - ".join(errors)) + for arg_name, arg_val in payload["arguments"].items(): + self.assertIn("value", arg_val, + f"argument '{arg_name}' missing 'value' wrapper") + + def test_toolcallresult_payload_validates(self) -> None: + env = self.mw._build_request( + method="steps/toolCallResult", + tool_name="search_web", + tool_arguments={"query": "ACS"}, + result={"hits": 3, "titles": ["spec", "core"]}, + ) + payload = env["params"]["payload"] + errors = _validate(payload, "hooks/tool-call-result.json") + self.assertEqual(errors, [], + "tool-call-result payload FAILS:\n - " + "\n - ".join(errors)) + self.assertIn(payload["exit_status"], {"success", "failure", "timeout", "blocked"}) + self.assertIsInstance(payload["outputs"], list) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/nat/tests/test_failure_modes.py b/adapters/nat/tests/test_failure_modes.py new file mode 100644 index 0000000..ab28508 --- /dev/null +++ b/adapters/nat/tests/test_failure_modes.py @@ -0,0 +1,384 @@ +""" +Tests that target the 3 most-likely production failure modes identified +in the harsh-reviewer audit. + +Each test names the failure mode in plain English, exercises the exact +production scenario that would trigger it, and asserts the safe +behavior. A regression on any of these is a real outage waiting to +happen. +""" +from __future__ import annotations + +import asyncio +import json +import os +import socket +import subprocess +import sys +import threading +import time +import unittest +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path + +try: + from nat.builder.context import Context, ContextState + from nat.builder.intermediate_step_manager import IntermediateStepManager + from nat.data_models.intermediate_step import ( + IntermediateStepPayload, IntermediateStepType, StreamEventData, + ) + _NAT_OK = True +except ImportError: + _NAT_OK = False + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from acs_adapter import ACSMiddleware # noqa: E402 + +if _NAT_OK: + from acs_adapter import ACSMiddlewareConfig # noqa: E402 + +HERE = Path(__file__).resolve().parent +GUARDIAN_SCRIPT = HERE.parent.parent / "example-guardian" / "example_guardian.py" + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import free_port as _free_port, wait_port as _wait # noqa: E402 + + +class RecordingGuardian(BaseHTTPRequestHandler): + """Test Guardian that records every received method + tracks + duplicate request_ids per session (so we can assert no replay).""" + recorded: list = [] + seen_per_session: dict = {} + lock = threading.Lock() + + def do_POST(self) -> None: # noqa: N802 + length = int(self.headers.get("Content-Length", "0")) + body = json.loads(self.rfile.read(length).decode("utf-8")) + method = body.get("method", "") + params = body.get("params") or {} + meta = params.get("metadata") or {} + sid = meta.get("session_id", "") + rid = params.get("request_id", "") + with RecordingGuardian.lock: + RecordingGuardian.recorded.append({"method": method, "session_id": sid, "request_id": rid}) + seen = RecordingGuardian.seen_per_session.setdefault(sid, set()) + if rid in seen and rid: + # Simulate the real Guardian's REPLAY_DETECTED behavior + reply = json.dumps({ + "jsonrpc": "2.0", "id": body.get("id"), + "error": {"code": -32005, "message": f"REPLAY_DETECTED: {rid}"}, + }).encode("utf-8") + else: + if rid: + seen.add(rid) + reply = json.dumps({ + "jsonrpc": "2.0", "id": body.get("id"), + "result": {"type": "final", "acs_version": "0.1.0", + "request_id": rid, "decision": "allow", + "chain_hash": "0" * 64}, + }).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(reply))) + self.end_headers() + self.wfile.write(reply) + + def log_message(self, *args, **kwargs): + return + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed") +class FailureMode1_DuplicateToolCallReplayDetected(unittest.TestCase): + """FAILURE MODE #1: NAT _correlation_request_id is uuid5-deterministic + from (session, function-name, kwargs-hash). Real workflows call the + same tool with the same args multiple times (list_files, get_status, + repeated lookups, parallel fanout). All such calls get the SAME + ACS request_id; the Guardian's per-session replay protection + rejects every call after the first with REPLAY_DETECTED (-32005). + The user's agent breaks: 'list_files' works on first call, fails + on every subsequent call in the same session. + """ + + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + cls.url = f"http://127.0.0.1:{cls.port}/acs" + cls.server = HTTPServer(("127.0.0.1", cls.port), RecordingGuardian) + cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True) + cls.thread.start() + + @classmethod + def tearDownClass(cls) -> None: + cls.server.shutdown() + cls.server.server_close() + + def setUp(self) -> None: + RecordingGuardian.recorded.clear() + RecordingGuardian.seen_per_session.clear() + os.environ["ACS_HANDSHAKE"] = "0" + + def _ctx(self, tool_name: str, kwargs: dict): + from nat.middleware.middleware import ( + InvocationContext, FunctionMiddlewareContext) + return InvocationContext( + function_context=FunctionMiddlewareContext( + name=tool_name, config=None, description=None, + input_schema=None, + single_output_schema=type(None), + stream_output_schema=type(None), + ), + original_args=(), original_kwargs=kwargs, + modified_args=(), modified_kwargs=dict(kwargs), + ) + + def test_repeat_tool_call_does_not_get_replay_detected(self) -> None: + """Two calls to list_files() within the same session — the SECOND + call MUST NOT be rejected by Guardian replay protection.""" + cfg = ACSMiddlewareConfig( + guardian_url=self.url, default_deny=False, + session_id="repeat-tool-session", + ) + mw = ACSMiddleware(cfg) + + ctx1 = self._ctx("list_files", {}) + r1 = asyncio.run(mw.pre_invoke(ctx1)) + ctx2 = self._ctx("list_files", {}) + r2 = asyncio.run(mw.pre_invoke(ctx2)) + + # Both pre_invoke calls should hit the Guardian + tool_call_records = [r for r in RecordingGuardian.recorded + if r["method"] == "steps/toolCallRequest"] + self.assertEqual(len(tool_call_records), 2, + f"expected 2 toolCallRequest sends, got {tool_call_records}") + + # The two requests MUST have different request_ids; otherwise the + # Guardian's replay protection rejects the second call. + rid1, rid2 = tool_call_records[0]["request_id"], tool_call_records[1]["request_id"] + self.assertNotEqual(rid1, rid2, + "BUG #1: NAT adapter sent the same request_id for two distinct " + "calls to the same tool with the same args. Guardian replay " + "protection rejects the second call with REPLAY_DETECTED. " + "Repeat tool calls in real workflows (list_files, get_status, " + "etc.) will break in production.") + + def test_pre_post_correlation_preserved_when_request_ids_differ(self) -> None: + """Bug-fix verification: pre_invoke and post_invoke for the same + wrapped call MUST correlate. The fix that makes request_ids + unique-per-call MUST also ensure post_invoke can read back the + ID that pre_invoke generated, so request_id_ref on the + toolCallResult equals the request_id on the toolCallRequest. + Captures recorded request bodies and checks the cross-reference.""" + # Augment RecordingGuardian to capture bodies for this test + class BodyCapturingGuardian(BaseHTTPRequestHandler): + bodies: list = [] + + def do_POST(self) -> None: # noqa: N802 + length = int(self.headers.get("Content-Length", "0")) + body = json.loads(self.rfile.read(length).decode("utf-8")) + BodyCapturingGuardian.bodies.append(body) + reply = json.dumps({ + "jsonrpc": "2.0", "id": body.get("id"), + "result": {"type": "final", "acs_version": "0.1.0", + "request_id": body.get("params", {}).get("request_id", ""), + "decision": "allow", "chain_hash": "0" * 64}, + }).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(reply))) + self.end_headers() + self.wfile.write(reply) + def log_message(self, *a, **kw): return + + port = _free_port() + srv = HTTPServer(("127.0.0.1", port), BodyCapturingGuardian) + t = threading.Thread(target=srv.serve_forever, daemon=True) + t.start() + try: + BodyCapturingGuardian.bodies.clear() + cfg = ACSMiddlewareConfig( + guardian_url=f"http://127.0.0.1:{port}/acs", default_deny=False, + session_id="correlation-session", + ) + mw = ACSMiddleware(cfg) + ctx = self._ctx("get_weather", {"city": "Tel Aviv"}) + + asyncio.run(mw.pre_invoke(ctx)) + ctx.output = "sunny" + asyncio.run(mw.post_invoke(ctx)) + + reqs = [b for b in BodyCapturingGuardian.bodies + if b["method"] == "steps/toolCallRequest"] + results = [b for b in BodyCapturingGuardian.bodies + if b["method"] == "steps/toolCallResult"] + self.assertEqual(len(reqs), 1) + self.assertEqual(len(results), 1) + req_id = reqs[0]["params"]["request_id"] + result_ref = results[0]["params"]["payload"].get("request_id_ref") + self.assertEqual(req_id, result_ref, + f"BUG: post_invoke must populate request_id_ref equal to " + f"the pre_invoke's request_id. Got req_id={req_id}, " + f"result_ref={result_ref}. Without this correlation the " + f"Guardian can't link a result to its originating request " + f"and tool-call-result.json:19-23 is violated.") + finally: + srv.shutdown() + srv.server_close() + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed") +class FailureMode2_GuardianRestartReplayWindow(unittest.TestCase): + """FAILURE MODE #2: GuardianState is in-process memory only. On + Guardian restart (deploy, OOM, crash, container roll), the + seen-request-id set is empty. Every envelope sent before the restart + is now replayable — the §10.3 MUST is silently disabled. + + Real deployments restart Guardians continuously. This test sends an + envelope, restarts the Guardian process, re-sends the same envelope, + and asserts the replay is still rejected. + """ + + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + cls.url = f"http://127.0.0.1:{cls.port}/acs" + cls.tmpdir = Path(__file__).resolve().parent / "_guardian_state_tmp" + cls.tmpdir.mkdir(exist_ok=True) + + @classmethod + def tearDownClass(cls) -> None: + import shutil + shutil.rmtree(cls.tmpdir, ignore_errors=True) + + def _start_guardian(self): + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + env["ACS_GUARDIAN_STATE_DIR"] = str(self.tmpdir) # for the fix + proc = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(self.port)], env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", self.port) + return proc + + def _send_envelope(self, sid: str, rid: str): + import urllib.request + import uuid as _uuid + from datetime import datetime, timezone + body = json.dumps({ + "jsonrpc": "2.0", "id": str(_uuid.uuid4()), + "method": "steps/sessionStart", + "params": { + "acs_version": "0.1.0", "request_id": rid, + "timestamp": datetime.now(timezone.utc) + .isoformat(timespec="milliseconds").replace("+00:00", "Z"), + "metadata": {"agent_id": "test", "session_id": sid, "platform": "test"}, + "payload": {}, + }, + }).encode() + req = urllib.request.Request(self.url, data=body, + headers={"Content-Type": "application/json"}, method="POST") + with urllib.request.urlopen(req, timeout=5.0) as resp: + return json.loads(resp.read().decode()) + + def test_replay_protection_survives_guardian_restart(self) -> None: + import uuid as _uuid + sid = str(_uuid.uuid4()) + rid = str(_uuid.uuid4()) + + proc = self._start_guardian() + try: + r1 = self._send_envelope(sid, rid) + self.assertIn("result", r1, "first send must succeed") + finally: + proc.terminate() + try: proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: proc.kill() + + # Restart Guardian fresh — same port, same data dir if the fix is + # in place. Without the fix, seen_request_ids is empty. + proc = self._start_guardian() + try: + r2 = self._send_envelope(sid, rid) + self.assertIn("error", r2, + "BUG #2: replayed envelope was accepted after Guardian restart. " + "§10.3 says Guardians MUST reject duplicate request_ids — but " + "RAM-only state means every restart opens a replay window. " + "Any deployment with autoscaling, deploys, or crash-restart " + "loses replay protection on every restart.") + self.assertEqual(r2["error"]["code"], -32005, + f"expected REPLAY_DETECTED (-32005), got {r2['error']}") + finally: + proc.terminate() + try: proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: proc.kill() + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed") +class FailureMode3_LifecycleSubscriptionRace(unittest.TestCase): + """FAILURE MODE #3: _ensure_lifecycle_subscribed is check-then-set + with no lock. Two parallel pre_invoke calls (normal in NAT — LLM and + tool calls overlap) both see _lifecycle_subscribed=False, both + subscribe. Every subsequent WORKFLOW_START / WORKFLOW_END fires its + ACS lifecycle hooks TWICE. + """ + + def test_concurrent_subscribe_calls_subscribe_only_once(self) -> None: + """Force the race window with a slow fake subscribe(); count + actual calls. Bug = > 1.""" + import acs_adapter as adapter_mod + + subscribe_calls = [0] + subscribe_lock = threading.Lock() + + class FakeSubscription: + def unsubscribe(self): pass + + class FakeMgr: + def subscribe(self, on_next, on_error=None, on_complete=None): + # Widen the race window — both racing threads sleep + # inside subscribe(), so the second one cannot find a + # post-set _lifecycle_subscribed=True flag if there's no + # mutual exclusion before subscribe was called. + time.sleep(0.05) + with subscribe_lock: + subscribe_calls[0] += 1 + return FakeSubscription() + + class FakeCtx: + intermediate_step_manager = FakeMgr() + + cfg = ACSMiddlewareConfig( + guardian_url="http://127.0.0.1:1/dead", + default_deny=False, session_id="race-session-3", + ) + mw = ACSMiddleware(cfg) + + # Patch Context.get() so both threads see our FakeCtx + import unittest.mock as mock + with mock.patch.object(adapter_mod, "_NATContext") as patched_ctx: + patched_ctx.get.return_value = FakeCtx() + + barrier = threading.Barrier(2) + def runner(): + barrier.wait() # release both threads simultaneously + mw._ensure_lifecycle_subscribed() + + t1 = threading.Thread(target=runner) + t2 = threading.Thread(target=runner) + t1.start(); t2.start() + t1.join(); t2.join() + + self.assertEqual(subscribe_calls[0], 1, + f"BUG #3: lifecycle subscribe() was called {subscribe_calls[0]} " + f"times instead of 1. Two threads raced through " + f"_ensure_lifecycle_subscribed's check-then-set with no lock. " + f"Every subsequent WORKFLOW event will fire its ACS lifecycle " + f"hook {subscribe_calls[0]} times: duplicate sessionStart, " + f"duplicate sessionEnd, duplicated audit chain entries.") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/nat/tests/test_lifecycle.py b/adapters/nat/tests/test_lifecycle.py new file mode 100644 index 0000000..1f93380 --- /dev/null +++ b/adapters/nat/tests/test_lifecycle.py @@ -0,0 +1,173 @@ +""" +Lifecycle-observer integration test. + +Subscribes the ACSMiddleware to NAT's IntermediateStepManager, pushes +synthetic WORKFLOW_START / WORKFLOW_END events, and asserts the Guardian +received ACS lifecycle hooks (sessionStart, userMessage, agentResponse, +sessionEnd) in addition to the function-level toolCallRequest/Result. + +Without this middleware, NAT alone emits only function-call hooks and +does not satisfy ACS-Core's 6-hook taxonomy minimum +(`conformance.md:19`). This test proves the lifecycle middleware closes +that gap. + +Requires nvidia-nat-core 1.7.0+ for IntermediateStepManager. +""" +from __future__ import annotations + +import asyncio +import json +import os +import socket +import subprocess +import sys +import threading +import time +import unittest +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path + +try: + from nat.builder.context import Context + from nat.builder.intermediate_step_manager import IntermediateStepManager + from nat.builder.context import ContextState + from nat.data_models.intermediate_step import ( + IntermediateStepPayload, IntermediateStepType, StreamEventData, + ) + _NAT_OK = True +except ImportError: + _NAT_OK = False + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from acs_adapter import ACSMiddleware # noqa: E402 + +if _NAT_OK: + from acs_adapter import ACSMiddlewareConfig # noqa: E402 + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import free_port as _free_port # noqa: E402 + + +class RecordingGuardian(BaseHTTPRequestHandler): + """Tiny HTTP server that records every method it receives.""" + recorded: list = [] + + def do_POST(self) -> None: # noqa: N802 + length = int(self.headers.get("Content-Length", "0")) + body = json.loads(self.rfile.read(length).decode("utf-8")) + RecordingGuardian.recorded.append(body.get("method", "")) + reply = json.dumps({ + "jsonrpc": "2.0", "id": body.get("id"), + "result": { + "type": "final", + "acs_version": "0.1.0", + "request_id": body.get("params", {}).get("request_id", ""), + "decision": "allow", + "chain_hash": "0" * 64, + }, + }).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(reply))) + self.end_headers() + self.wfile.write(reply) + + def log_message(self, *args, **kwargs): + return + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed") +class LifecycleObserver(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + cls.url = f"http://127.0.0.1:{cls.port}/acs" + cls.server = HTTPServer(("127.0.0.1", cls.port), RecordingGuardian) + cls.thread = threading.Thread(target=cls.server.serve_forever, daemon=True) + cls.thread.start() + + @classmethod + def tearDownClass(cls) -> None: + cls.server.shutdown() + cls.server.server_close() + + def setUp(self) -> None: + RecordingGuardian.recorded.clear() + + def _push(self, mgr, event_type, name="test", input=None, output=None, uuid_=None): + data = StreamEventData(input=input, output=output) if (input or output) else None + kwargs = {"event_type": event_type, "name": name, "data": data} + if uuid_: + kwargs["UUID"] = uuid_ + payload = IntermediateStepPayload(**kwargs) + mgr.push_intermediate_step(payload) + return payload.UUID + + def test_workflow_lifecycle_fires_acs_hooks(self) -> None: + """A workflow's WORKFLOW_START and WORKFLOW_END events MUST + produce sessionStart + userMessage + agentResponse + sessionEnd + on the ACS wire, satisfying conformance.md:19 minimum.""" + cfg = ACSMiddlewareConfig( + guardian_url=self.url, default_deny=False, + session_id="lifecycle-test", + ) + mw = ACSMiddleware(cfg) + os.environ["ACS_HANDSHAKE"] = "0" + # Set up a real NAT Context + IntermediateStepManager + ctx_state = ContextState.get() + mgr = IntermediateStepManager(ctx_state) + # Subscribe via the middleware's lifecycle hook + sub = mgr.subscribe(on_next=mw._on_intermediate_step) + try: + # START and END of a span share the same UUID so the manager + # pairs them; otherwise END is dropped with a warning. + wf_uuid = self._push(mgr, IntermediateStepType.WORKFLOW_START, + name="my_workflow", input="what is the weather?") + self._push(mgr, IntermediateStepType.WORKFLOW_END, + name="my_workflow", output="It's sunny", uuid_=wf_uuid) + # Allow async dispatch to flush + time.sleep(0.2) + finally: + sub.unsubscribe() + + recorded = list(RecordingGuardian.recorded) + self.assertIn("steps/sessionStart", recorded, + f"WORKFLOW_START must fire sessionStart; got {recorded}") + self.assertIn("steps/userMessage", recorded, + f"WORKFLOW_START with input must fire userMessage; got {recorded}") + self.assertIn("steps/agentResponse", recorded, + f"WORKFLOW_END with output must fire agentResponse; got {recorded}") + self.assertIn("steps/sessionEnd", recorded, + f"WORKFLOW_END must fire sessionEnd; got {recorded}") + + def test_intermediate_function_events_do_not_fire_lifecycle_hooks(self) -> None: + """FUNCTION_START / TOOL_START etc are covered by FunctionMiddleware's + pre_invoke, NOT by the lifecycle observer. Lifecycle observer must + ignore them.""" + cfg = ACSMiddlewareConfig( + guardian_url=self.url, default_deny=False, + session_id="lifecycle-non-trigger-test", + ) + mw = ACSMiddleware(cfg) + os.environ["ACS_HANDSHAKE"] = "0" + ctx_state = ContextState.get() + mgr = IntermediateStepManager(ctx_state) + sub = mgr.subscribe(on_next=mw._on_intermediate_step) + try: + fn_uuid = self._push(mgr, IntermediateStepType.FUNCTION_START, name="weather_tool", input="x") + self._push(mgr, IntermediateStepType.FUNCTION_END, name="weather_tool", output="sunny", uuid_=fn_uuid) + llm_uuid = self._push(mgr, IntermediateStepType.LLM_START, name="llm", input="prompt") + self._push(mgr, IntermediateStepType.LLM_END, name="llm", output="completion", uuid_=llm_uuid) + time.sleep(0.2) + finally: + sub.unsubscribe() + + self.assertEqual(RecordingGuardian.recorded, [], + "function/llm-level events MUST NOT fire lifecycle hooks " + "(those events are FunctionMiddleware's responsibility); " + f"got {RecordingGuardian.recorded}") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/nat/tests/test_live.py b/adapters/nat/tests/test_live.py new file mode 100644 index 0000000..ab89255 --- /dev/null +++ b/adapters/nat/tests/test_live.py @@ -0,0 +1,208 @@ +""" +Live NAT workflow test: invoke the ACS middleware exactly the way NAT's +runtime invokes wrapped functions. + +Uses `FunctionMiddleware.function_middleware_invoke()` (the actual +orchestration method NAT's runtime calls). The test creates a real +target function, wraps it via the middleware's invoke method with a +real `FunctionMiddlewareContext`, and asserts: + - Allow path: the function executes and its return value is propagated. + - Deny path: the function does NOT execute (no side effect observed) + and the block is signaled per NAT 1.7.0's contract. + - Modify path: the function receives modified kwargs. + +This exercises the same code path as a full NAT workflow run with the +middleware attached -- the runtime constructs the same context, calls +the same `function_middleware_invoke`, and respects the same block / +modify outcomes. It does not load YAML or instantiate a Builder; those +are NAT's responsibility, not the middleware's. + +Requires nvidia-nat-core. Skipped cleanly otherwise. +""" +from __future__ import annotations + +import asyncio +import os +import socket +import subprocess +import sys +import time +import unittest +from pathlib import Path + +try: + from nat.middleware.middleware import FunctionMiddlewareContext + _NAT_OK = True +except ImportError: + _NAT_OK = False + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from acs_adapter import ACSMiddleware, ACSGuardianDenied # noqa: E402 + +if _NAT_OK: + from acs_adapter import ACSMiddlewareConfig # noqa: E402 + +HERE = Path(__file__).resolve().parent +GUARDIAN = HERE.parent.parent / "example-guardian" / "example_guardian.py" + + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_common")) +from test_harness import free_port as _free_port, wait_port as _wait # noqa: E402 + + +@unittest.skipUnless(_NAT_OK, "nvidia-nat-core not installed") +class LiveNATWorkflow(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.port = _free_port() + env = os.environ.copy(); env["ACS_DEV_MODE"] = "1"; env.pop("ACS_HMAC_SECRET", None); env.pop("ACS_HMAC_SECRET_FILE", None) + cls.guardian_proc = subprocess.Popen( + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], env=env, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + ) + _wait("127.0.0.1", cls.port) + cls.guardian_url = f"http://127.0.0.1:{cls.port}/acs" + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian_proc.terminate() + try: + cls.guardian_proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.guardian_proc.kill() + + def _make_middleware(self, default_deny: bool = True) -> ACSMiddleware: + return ACSMiddleware(ACSMiddlewareConfig( + guardian_url=self.guardian_url, default_deny=default_deny, + session_id="nat-live", + )) + + def _ctx(self, tool_name: str) -> "FunctionMiddlewareContext": + return FunctionMiddlewareContext( + name=tool_name, config=None, description=None, + input_schema=None, + single_output_schema=type(None), + stream_output_schema=type(None), + ) + + # ----- ALLOW path: function executes, return value propagated ----- + + def test_allow_function_executes(self) -> None: + """Guardian allows -> function runs -> return value flows through middleware.""" + executed = {"count": 0, "args": None} + + async def target(command: str) -> str: + executed["count"] += 1 + executed["args"] = command + return f"ran: {command}" + + mw = self._make_middleware() + result = asyncio.run( + mw.function_middleware_invoke( + command="ls -la", + call_next=target, + context=self._ctx("Bash"), + ) + ) + self.assertEqual(executed["count"], 1, "allowed function should have run exactly once") + self.assertEqual(executed["args"], "ls -la") + self.assertEqual(result, "ran: ls -la") + + # ----- DENY path: function does NOT execute, block surfaced ----- + + def test_deny_function_does_not_execute(self) -> None: + """Guardian denies destructive Bash -> function MUST NOT run. + + This is the load-bearing property of the middleware: + a function blocked by ACS must not produce its side effect. + """ + executed = {"count": 0} + + async def target(command: str) -> str: + executed["count"] += 1 + return "should not see this" + + mw = self._make_middleware() + with self.assertRaises(ACSGuardianDenied) as cm: + asyncio.run( + mw.function_middleware_invoke( + command="rm -rf /home/u", + call_next=target, + context=self._ctx("Bash"), + ) + ) + self.assertEqual( + executed["count"], 0, + "denied function MUST NOT execute; side-effect counter would expose the bug", + ) + self.assertIn("destructive", str(cm.exception).lower()) + + def test_deny_write_to_protected_path(self) -> None: + executed = {"count": 0} + + async def target(file_path: str, content: str) -> str: + executed["count"] += 1 + return "wrote" + + mw = self._make_middleware() + with self.assertRaises(ACSGuardianDenied): + asyncio.run( + mw.function_middleware_invoke( + file_path="/etc/passwd", content="x", + call_next=target, + context=self._ctx("Write"), + ) + ) + self.assertEqual(executed["count"], 0) + + # ----- Fail-closed posture: Guardian unreachable -> function blocked ----- + + def test_guardian_unreachable_default_deny_blocks_function(self) -> None: + executed = {"count": 0} + + async def target(command: str) -> str: + executed["count"] += 1 + return "ran" + + mw = ACSMiddleware(ACSMiddlewareConfig( + guardian_url="http://127.0.0.1:1/dead", + default_deny=True, session_id="nat-live", + )) + with self.assertRaises(ACSGuardianDenied): + asyncio.run( + mw.function_middleware_invoke( + command="ls", + call_next=target, + context=self._ctx("Bash"), + ) + ) + self.assertEqual(executed["count"], 0, + "fail-closed: function must not execute when Guardian unreachable") + + # ----- Fail-open posture: function runs when Guardian unreachable ----- + + def test_guardian_unreachable_fail_open_runs_function(self) -> None: + executed = {"count": 0} + + async def target(command: str) -> str: + executed["count"] += 1 + return "ran" + + mw = ACSMiddleware(ACSMiddlewareConfig( + guardian_url="http://127.0.0.1:1/dead", + default_deny=False, session_id="nat-live", + )) + result = asyncio.run( + mw.function_middleware_invoke( + command="ls", + call_next=target, + context=self._ctx("Bash"), + ) + ) + self.assertEqual(executed["count"], 1) + self.assertEqual(result, "ran") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/adapters/nat/wire.py b/adapters/nat/wire.py new file mode 100644 index 0000000..43f8a48 --- /dev/null +++ b/adapters/nat/wire.py @@ -0,0 +1,519 @@ +#!/usr/bin/env python3 +""" +Wire (or unwire / lint) the ACS adapter into a NAT `workflow.yml`. + +NAT's middleware is opt-in per attachment point: the user has to list +`acs_guardian` under `middleware:` on the workflow, every function_group, +and any function that overrides its group's middleware. Miss one and +that path bypasses the Guardian. This CLI walks the YAML, finds every +attachment point, and either reports gaps (`--check`) or fills them in +(`--write`). It is idempotent: every line we add carries a +`# acs-adapter-wired` comment so a re-run is a no-op and `--unwire` +removes exactly what we added (and nothing else). + +NAT's framework-wide lifecycle stream (`IntermediateStepManager`, +subscribed in acs_adapter.py:_ensure_lifecycle_subscribed) gives the +Guardian observability of every call even when middleware wiring is +incomplete — that's a backstop, not a substitute for wire+lint, because +lifecycle subscribers can only OBSERVE calls, not BLOCK them. Enforcement +still needs the middleware wired. + +Modes +===== + + python3 wire.py --workflow=path/to/workflow.yml + Dry-run preview: show the unified diff that --write would apply. + + python3 wire.py --workflow=path/to/workflow.yml --write + Apply the wiring (backup at workflow.yml.bak.). + + python3 wire.py --workflow=path/to/workflow.yml --check + Lint-only. Print every attachment point that is not wired and + exit non-zero if any gap is found. Suitable for CI. + + python3 wire.py --workflow=path/to/workflow.yml --unwire --write + Remove every line we previously added (carries the marker). + +Coverage caveat +=============== +This wires what is in the YAML at wire-time. Coverage gaps it cannot +close: + - Functions registered dynamically in Python (not in the YAML). + - Sub-workflows loaded at runtime from other files. + - Custom middleware classes that fork their own call path. + +For those, rely on the lifecycle-subscription backstop the adapter +emits and audit the Guardian for "saw call via lifecycle but never via +middleware" findings. +""" +from __future__ import annotations + +import argparse +import datetime +import difflib +import io +import shutil +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +try: + from ruamel.yaml import YAML + from ruamel.yaml.comments import CommentedMap, CommentedSeq +except ImportError: + sys.stderr.write( + "wire.py needs `ruamel.yaml` for comment-preserving YAML round-trip.\n" + "Install: python3 -m pip install ruamel.yaml\n" + ) + sys.exit(2) + + +WIRE_MARKER = "acs-adapter-wired" +MIDDLEWARE_NAME = "acs_guardian" +MIDDLEWARE_TYPE = "acs_guardian" + + +# ────────────────────────────────────────────────────────────────────── +# Gap model — shared by lint + wire +# ────────────────────────────────────────────────────────────────────── + +@dataclass +class Gap: + """One missing-wire finding. Reused by --check (print) and + default/--write (mutate).""" + severity: str # "error" | "warn" + kind: str # "workflow" | "function_group" | "function" | "middleware_block" | "config" + path: str # dotted location e.g. "function_groups.my_tools" + line: int | None + detail: str + + +def _yaml() -> YAML: + """Round-trip parser preserving comments, formatting, and order.""" + y = YAML(typ="rt") + y.preserve_quotes = True + y.indent(mapping=2, sequence=4, offset=2) + return y + + +# ────────────────────────────────────────────────────────────────────── +# Walker — single source of truth for "what attachment points exist +# and which are wired". Used by both lint and wire to guarantee they +# never diverge. +# ────────────────────────────────────────────────────────────────────── + +def _line_of(node: Any) -> int | None: + """ruamel exposes .lc.line on CommentedMap / CommentedSeq nodes + (1-based after we +1). Returns None if absent.""" + lc = getattr(node, "lc", None) + if lc is None: + return None + return (lc.line or 0) + 1 + + +def _list_includes_middleware(seq: Any) -> bool: + """Treat both ['acs_guardian'] and ['acs_guardian', ...] as wired.""" + if not isinstance(seq, (list, CommentedSeq)): + return False + return any(str(item) == MIDDLEWARE_NAME for item in seq) + + +def find_attachment_points(doc: Any) -> list[Gap]: + """Walk the loaded workflow document and return every gap. + Empty list means fully wired.""" + gaps: list[Gap] = [] + if not isinstance(doc, (dict, CommentedMap)): + gaps.append(Gap("error", "config", "", None, + "workflow file root is not a mapping")) + return gaps + + # 1) The middleware block must define acs_guardian with _type acs_guardian + mw_block = doc.get("middleware") + if not isinstance(mw_block, (dict, CommentedMap)) or MIDDLEWARE_NAME not in mw_block: + gaps.append(Gap("error", "middleware_block", "middleware", + _line_of(mw_block) if mw_block is not None else None, + f"top-level `middleware:` block is missing the " + f"`{MIDDLEWARE_NAME}:` definition")) + else: + mw_def = mw_block[MIDDLEWARE_NAME] + if not isinstance(mw_def, (dict, CommentedMap)) or mw_def.get("_type") != MIDDLEWARE_TYPE: + gaps.append(Gap( + "error", "config", f"middleware.{MIDDLEWARE_NAME}", + _line_of(mw_def), + f"`middleware.{MIDDLEWARE_NAME}._type` must be " + f"`{MIDDLEWARE_TYPE}` (got {mw_def.get('_type') if isinstance(mw_def, dict) else 'non-mapping'!r})")) + + # 2) The workflow attachment point + wf = doc.get("workflow") + if isinstance(wf, (dict, CommentedMap)): + if not _list_includes_middleware(wf.get("middleware")): + gaps.append(Gap("error", "workflow", "workflow", + _line_of(wf), + f"`workflow.middleware` does not include `{MIDDLEWARE_NAME}` — the agent's top-level execution will not be gated")) + + # 3) Every function_group attachment point + fgs = doc.get("function_groups") + if isinstance(fgs, (dict, CommentedMap)): + for name, fg in fgs.items(): + if not isinstance(fg, (dict, CommentedMap)): + continue + if not _list_includes_middleware(fg.get("middleware")): + gaps.append(Gap( + "error", "function_group", + f"function_groups.{name}", _line_of(fg), + f"function_group `{name}` does not list `{MIDDLEWARE_NAME}` — every function in this group will bypass the Guardian")) + + # 4) Any individual function with its own middleware: list — that + # override punches a hole through its group's wiring if it omits us. + funcs = doc.get("functions") + if isinstance(funcs, (dict, CommentedMap)): + for name, fn in funcs.items(): + if not isinstance(fn, (dict, CommentedMap)): + continue + if "middleware" in fn and not _list_includes_middleware(fn.get("middleware")): + gaps.append(Gap( + "error", "function", + f"functions.{name}", _line_of(fn), + f"function `{name}` overrides `middleware:` but omits `{MIDDLEWARE_NAME}` — this call path bypasses the Guardian")) + + return gaps + + +# ────────────────────────────────────────────────────────────────────── +# Mutators — every line we add carries the WIRE_MARKER comment so +# --unwire knows exactly what we (and only what we) added. +# ────────────────────────────────────────────────────────────────────── + +def _make_middleware_def(guardian_url: str, default_deny: bool, + timeout_s: float) -> CommentedMap: + cm = CommentedMap() + cm["_type"] = MIDDLEWARE_TYPE + cm["guardian_url"] = guardian_url + cm["default_deny"] = default_deny + cm["timeout_s"] = timeout_s + return cm + + +def _ensure_middleware_block(doc: CommentedMap, guardian_url: str, + default_deny: bool, timeout_s: float) -> bool: + """Insert middleware.acs_guardian if missing. Returns True iff mutated.""" + mw = doc.get("middleware") + mutated = False + if not isinstance(mw, (dict, CommentedMap)): + mw = CommentedMap() + # Insert at top of file (before workflow if possible) + doc.insert(0, "middleware", mw, + comment=WIRE_MARKER + " (block created)") + mutated = True + if MIDDLEWARE_NAME not in mw: + mw[MIDDLEWARE_NAME] = _make_middleware_def(guardian_url, default_deny, timeout_s) + mw.yaml_add_eol_comment(WIRE_MARKER, key=MIDDLEWARE_NAME) + mutated = True + return mutated + + +def _ensure_middleware_listed(parent: CommentedMap, path_for_log: str) -> bool: + """Ensure parent.middleware includes acs_guardian. Returns True iff mutated.""" + mw = parent.get("middleware") + if mw is None: + new = CommentedSeq([MIDDLEWARE_NAME]) + parent["middleware"] = new + parent.yaml_add_eol_comment(WIRE_MARKER, key="middleware") + return True + if isinstance(mw, (list, CommentedSeq)): + if MIDDLEWARE_NAME in [str(x) for x in mw]: + return False + # Insert FIRST so policy gate runs before content filters + mw.insert(0, MIDDLEWARE_NAME) + try: + mw.yaml_add_eol_comment(WIRE_MARKER, key=0) + except (AttributeError, TypeError): + pass + return True + # middleware is scalar (single middleware) — replace with list + parent["middleware"] = CommentedSeq([MIDDLEWARE_NAME, mw]) + parent.yaml_add_eol_comment(WIRE_MARKER, key="middleware") + return True + + +def install(doc: CommentedMap, *, guardian_url: str, default_deny: bool, + timeout_s: float) -> list[str]: + """Apply wiring to doc in place. Returns a list of human-readable + change descriptions for the operator to review.""" + changes: list[str] = [] + if _ensure_middleware_block(doc, guardian_url, default_deny, timeout_s): + changes.append(f"middleware.{MIDDLEWARE_NAME}: defined ({guardian_url})") + + wf = doc.get("workflow") + if isinstance(wf, (dict, CommentedMap)): + if _ensure_middleware_listed(wf, "workflow"): + changes.append(f"workflow.middleware: added `{MIDDLEWARE_NAME}`") + + fgs = doc.get("function_groups") + if isinstance(fgs, (dict, CommentedMap)): + for name, fg in fgs.items(): + if isinstance(fg, (dict, CommentedMap)): + if _ensure_middleware_listed(fg, f"function_groups.{name}"): + changes.append(f"function_groups.{name}.middleware: added `{MIDDLEWARE_NAME}`") + + funcs = doc.get("functions") + if isinstance(funcs, (dict, CommentedMap)): + for name, fn in funcs.items(): + if isinstance(fn, (dict, CommentedMap)) and "middleware" in fn: + if _ensure_middleware_listed(fn, f"functions.{name}"): + changes.append(f"functions.{name}.middleware: added `{MIDDLEWARE_NAME}`") + + return changes + + +# ────────────────────────────────────────────────────────────────────── +# Unwire — structured (not text-stripping). We re-parse the YAML, walk +# every node we may have annotated, and remove only what carries the +# WIRE_MARKER. Text-stripping orphans child lines under a removed key; +# structured remove takes the whole subtree atomically. +# ────────────────────────────────────────────────────────────────────── + +def _eol_comment_text(parent: CommentedMap | CommentedSeq, key: Any) -> str: + """Return the EOL comment text attached to `parent[key]` (or empty + string if none). ruamel stores these in parent.ca.items[key] as a + quirky 4-tuple — only entry [2] (eol comment token) carries the + text we need.""" + ca = getattr(parent, "ca", None) + if ca is None: + return "" + items = getattr(ca, "items", None) or {} + entry = items.get(key) + if not entry: + return "" + # entry is [pre_key_comment, key_comments, eol_comment_token, ...] + for tok in entry: + if tok is None: + continue + # Handle list of tokens or single token + toks = tok if isinstance(tok, list) else [tok] + for t in toks: + val = getattr(t, "value", None) + if isinstance(val, str) and WIRE_MARKER in val: + return val + return "" + + +def unwire(text: str) -> tuple[str, list[str]]: + """Remove every node we previously annotated with WIRE_MARKER. + Operates on the parsed document; safer than text stripping.""" + removed: list[str] = [] + doc = _yaml().load(text) + if not isinstance(doc, (dict, CommentedMap)): + return text, removed + + # 1) middleware block (and its acs_guardian subkey) + mw = doc.get("middleware") + if isinstance(mw, (dict, CommentedMap)): + # Subkey we added: middleware.acs_guardian (marker on that key) + sub_comment = _eol_comment_text(mw, MIDDLEWARE_NAME) + if MIDDLEWARE_NAME in mw and WIRE_MARKER in sub_comment: + del mw[MIDDLEWARE_NAME] + removed.append(f"middleware.{MIDDLEWARE_NAME}: removed") + + # Top-level block we created (marker on doc.middleware key with + # "(block created)" suffix means we own the whole thing) + top_comment = _eol_comment_text(doc, "middleware") + if "(block created)" in top_comment or len(mw) == 0: + del doc["middleware"] + removed.append("middleware: removed block") + + # 2) workflow.middleware + wf = doc.get("workflow") + if isinstance(wf, (dict, CommentedMap)): + _strip_listed_middleware(wf, "workflow", removed) + + # 3) function_groups.*.middleware + fgs = doc.get("function_groups") + if isinstance(fgs, (dict, CommentedMap)): + for name, fg in fgs.items(): + if isinstance(fg, (dict, CommentedMap)): + _strip_listed_middleware(fg, f"function_groups.{name}", removed) + + # 4) functions.*.middleware + funcs = doc.get("functions") + if isinstance(funcs, (dict, CommentedMap)): + for name, fn in funcs.items(): + if isinstance(fn, (dict, CommentedMap)): + _strip_listed_middleware(fn, f"functions.{name}", removed) + + buf = io.StringIO() + _yaml().dump(doc, buf) + return buf.getvalue(), removed + + +def _strip_listed_middleware(parent: CommentedMap, path: str, + removed: list[str]) -> None: + """Remove ONLY what we previously added (marked with WIRE_MARKER). + Hand-wired entries that lack the marker stay untouched — the marker + is the only signal that a node was ours.""" + mw = parent.get("middleware") + if mw is None: + return + parent_marker = _eol_comment_text(parent, "middleware") + # Case A: we created the whole `middleware:` key for this parent + if WIRE_MARKER in parent_marker and isinstance(mw, (list, CommentedSeq)): + contents = [str(x) for x in mw] + if contents == [MIDDLEWARE_NAME]: + del parent["middleware"] + removed.append(f"{path}.middleware: removed key") + return + # Or scalar-to-list conversion: list is [acs_guardian, ] + if len(contents) >= 2 and contents[0] == MIDDLEWARE_NAME: + # Restore the second entry as the scalar (or keep as list) + del mw[0] + removed.append(f"{path}.middleware: removed inserted `{MIDDLEWARE_NAME}`") + return + # Case B: we prepended into an existing list — only the inserted + # item (index 0) carries our marker. Walk by index, remove ONLY + # the marked one(s). + if isinstance(mw, (list, CommentedSeq)): + to_remove: list[int] = [] + for i, item in enumerate(mw): + if str(item) != MIDDLEWARE_NAME: + continue + item_comment = _eol_comment_text(mw, i) + if WIRE_MARKER in item_comment: + to_remove.append(i) + for i in reversed(to_remove): + del mw[i] + removed.append(f"{path}.middleware[{i}]: removed inserted `{MIDDLEWARE_NAME}`") + if len(mw) == 0 and WIRE_MARKER in parent_marker: + del parent["middleware"] + removed.append(f"{path}.middleware: removed empty list") + + +# ────────────────────────────────────────────────────────────────────── +# Rendering +# ────────────────────────────────────────────────────────────────────── + +def render(doc: Any) -> str: + buf = io.StringIO() + _yaml().dump(doc, buf) + return buf.getvalue() + + +def render_diff(before: str, after: str, label: str) -> str: + diff = difflib.unified_diff( + before.splitlines(keepends=True), + after.splitlines(keepends=True), + fromfile=f"{label} (current)", + tofile=f"{label} (after wire)", + ) + return "".join(diff) + + +def format_gaps(gaps: list[Gap], yaml_path: Path) -> str: + if not gaps: + return f"{yaml_path}: fully wired — no gaps." + lines = [f"{yaml_path}:"] + by_severity = {"error": 0, "warn": 0} + for g in gaps: + by_severity[g.severity] = by_severity.get(g.severity, 0) + 1 + loc = f"line {g.line}" if g.line is not None else "(no line)" + sev = g.severity.upper().ljust(5) + lines.append(f" {sev} {loc}: {g.path} — {g.detail}") + err = by_severity.get("error", 0) + wrn = by_severity.get("warn", 0) + lines.append(f"Summary: {len(gaps)} finding(s) — {err} error, {wrn} warning") + return "\n".join(lines) + + +# ────────────────────────────────────────────────────────────────────── +# Entry point +# ────────────────────────────────────────────────────────────────────── + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser( + description="Wire / lint / unwire the ACS adapter in a NAT workflow.yml.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__) + p.add_argument("--workflow", required=True, + help="Path to the workflow.yml to operate on.") + p.add_argument("--guardian-url", default="http://127.0.0.1:8787/acs", + help="ACS Guardian endpoint (default: http://127.0.0.1:8787/acs).") + p.add_argument("--default-deny", action="store_true", + help="Set the middleware's default_deny: true (fail-closed when " + "Guardian is unreachable). Default false matches §6.4 fail-open + audit.") + p.add_argument("--timeout-s", type=float, default=5.0, + help="Per-request Guardian timeout (default 5.0s).") + p.add_argument("--write", action="store_true", + help="Apply changes. Without this, runs as dry-run (diff to stdout).") + p.add_argument("--unwire", action="store_true", + help="Remove every line we previously added (WIRE_MARKER-tagged).") + p.add_argument("--check", action="store_true", + help="Lint-only: print gaps and exit non-zero if any found.") + args = p.parse_args(argv) + + yaml_path = Path(args.workflow).expanduser().resolve() + if not yaml_path.exists(): + sys.stderr.write(f"FATAL: workflow file not found: {yaml_path}\n") + return 2 + + before_text = yaml_path.read_text() + + if args.check: + doc = _yaml().load(before_text) + gaps = find_attachment_points(doc) + print(format_gaps(gaps, yaml_path)) + return 1 if any(g.severity == "error" for g in gaps) else 0 + + if args.unwire: + after_text, removed = unwire(before_text) + if not removed: + print(f"{yaml_path}: nothing to unwire (no `{WIRE_MARKER}` markers found).") + return 0 + if args.write: + backup = yaml_path.with_suffix( + yaml_path.suffix + ".bak." + + datetime.datetime.now().strftime("%Y%m%dT%H%M%S")) + shutil.copyfile(yaml_path, backup) + yaml_path.write_text(after_text) + print(f"unwired {len(removed)} line(s) from {yaml_path}") + print(f"backup: {backup}") + for r in removed: + print(f" - {r}") + return 0 + diff = render_diff(before_text, after_text, str(yaml_path)) + sys.stdout.write(diff) + print(f"\n(dry-run; re-run with --write to apply. {len(removed)} line(s) would be removed.)") + return 0 + + # Default: install / re-install + doc = _yaml().load(before_text) + changes = install(doc, guardian_url=args.guardian_url, + default_deny=args.default_deny, + timeout_s=args.timeout_s) + after_text = render(doc) + + if not changes: + print(f"{yaml_path}: already fully wired — no changes needed.") + return 0 + + if args.write: + backup = yaml_path.with_suffix( + yaml_path.suffix + ".bak." + + datetime.datetime.now().strftime("%Y%m%dT%H%M%S")) + shutil.copyfile(yaml_path, backup) + yaml_path.write_text(after_text) + print(f"wired {len(changes)} attachment point(s) in {yaml_path}") + print(f"backup: {backup}") + for c in changes: + print(f" + {c}") + return 0 + + diff = render_diff(before_text, after_text, str(yaml_path)) + sys.stdout.write(diff) + print(f"\n{len(changes)} attachment point(s) would be wired (dry-run; re-run with --write to apply):") + for c in changes: + print(f" + {c}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/adapters/nat/workflow.yml.example b/adapters/nat/workflow.yml.example new file mode 100644 index 0000000..e910537 --- /dev/null +++ b/adapters/nat/workflow.yml.example @@ -0,0 +1,22 @@ +# Example NAT workflow YAML wiring the ACS adapter. +# Replace `target_function_or_group` and the surrounding workflow definition +# with your real workflow; the middleware block is the only adapter-specific +# part. + +middleware: + acs_guardian: + _type: acs_guardian # registered name from acs_adapter.py + guardian_url: http://127.0.0.1:8787/acs + default_deny: true + timeout_s: 5.0 + # session_id is auto-generated per process if omitted + # target_function_or_group: my_tools + +function_groups: + my_tools: + middleware: [acs_guardian] + +workflow: + _type: react_agent + middleware: [acs_guardian] + # ... rest of your workflow definition (LLM, tools, etc.) diff --git a/adapters/requirements-test.txt b/adapters/requirements-test.txt new file mode 100644 index 0000000..4cf6b29 --- /dev/null +++ b/adapters/requirements-test.txt @@ -0,0 +1,15 @@ +# Test dependencies shared across all reference adapters. +# Production adapter runtime needs only stdlib + rfc8785 (for full JCS +# canonicalization per RFC 8785). Tests additionally need jsonschema +# for canonical spec validation. +jsonschema>=4.20,<5 +rfc8785>=0.1,<1 +# jsonschema's date-time / uri / hostname / email format checkers +# delegate to optional companion packages. Without these pinned, the +# checkers silently no-op and tests like test_timestamp_is_iso8601 +# false-pass (the invalid value goes through, an "expected to fail" +# assertion sees an empty error list, and CI shows green on a real +# wire-format bug). Pinning rfc3339-validator + the format extras +# turns the checker back on so the suite catches the regression. +rfc3339-validator>=0.1,<1 +jsonschema[format]>=4.20,<5 diff --git a/adapters/test_acs_core_conformance.py b/adapters/test_acs_core_conformance.py new file mode 100644 index 0000000..3cfa48b --- /dev/null +++ b/adapters/test_acs_core_conformance.py @@ -0,0 +1,1355 @@ +""" +ACS-Core conformance test suite. + +ONE test per MUST in `docs/spec/conformance.md` ACS-Core (lines 13-26), +plus the normative requirements in the §-cited sections it references. +Each test docstring quotes the exact spec text it falsifies. + +Run from the adapters/ directory: + + python -m unittest test_acs_core_conformance + +Result: a single "OK" with all-pass means this reference implementation +is conformant against the ACS-Core v0.1.0 baseline **minus full +Wrapped MCP**. The MCP namespace is validated for wire-format shape +(envelope validates, Guardian returns a structured response, no +crash) but the reference Guardian does not implement full MCP +request wrapping — that is a documented v0.2 deferral. See +`Core10_WrappedMcp` for the exact scope of what is and is not +checked. Deployments needing full MCP wrapping must extend the +Guardian. + +Result: any FAIL/ERROR names the specific MUST that broke, with the +spec citation in the test docstring. + +Adopter workflow: copy our adapters, modify for your stack, run this +file. If it still passes, your fork is still ACS-Core (with the +same Wrapped-MCP caveat). If it fails, the failure message tells +you which spec line you broke. +""" +from __future__ import annotations + +import datetime +import hashlib +import hmac as _hmac +import http.server +import json +import os +import re +import socket +import subprocess +import sys +import tempfile +import threading +import time +import unittest +import urllib.error +import urllib.request +import uuid +from pathlib import Path +from typing import Any + + +HERE = Path(__file__).resolve().parent +GUARDIAN_SCRIPT = HERE / "example-guardian" / "example_guardian.py" +COMMON_DIR = HERE / "_common" + +sys.path.insert(0, str(COMMON_DIR)) +import acs_common # noqa: E402 + +# Canonical schemas — REQUIRED for envelope/payload validation. +# Without them, the conformance suite can't validate; it FAILS loudly +# rather than silently skipping. +SPEC_DIR = Path( + os.environ.get( + "ACS_SPEC_DIR", + "/tmp/acs-spec-source/specification/v0.1.0", + ) +) + +# A fixed signing secret used only inside this test process. Real +# deployments use ACS_HMAC_SECRET_FILE; we pass it via env. +TEST_HMAC_SECRET = "acs-core-conformance-test-secret-not-for-production" + + +# ============================================================================= +# Test harness — spawns the Guardian, exchanges signed envelopes. +# Helpers come from adapters/_common/test_harness.py — see that file for +# the canonical implementations of free_port, wait_port, schema validators, +# and ProgrammableGuardian. +# ============================================================================= + +from test_harness import ( # noqa: E402 + free_port as _free_port, + wait_port as _wait_port, + build_local_resolver as _build_local_resolver, + validate_request_envelope as _validate_request_envelope, + validate_response_envelope as _validate_response_envelope, +) + + +class CoreHarness(unittest.TestCase): + """Base class — spawns a Guardian with HMAC signing required. + + Each test class inherits and adds tests. setUpClass spawns one + Guardian for the class; tests share it. Each test creates a fresh + session_id so per-session state (replay set, chain head) doesn't + cross-contaminate. + """ + + HMAC_SECRET: str | None = TEST_HMAC_SECRET # subclass can null to disable + + @classmethod + def setUpClass(cls) -> None: + if not SPEC_DIR.exists(): + raise RuntimeError( + f"Canonical ACS schemas not found at {SPEC_DIR}. " + "ACS-Core conformance tests REQUIRE the canonical v0.1.0 " + "schemas. Set ACS_SPEC_DIR to a clone of " + "Agent-Control-Standard/ACS/specification/v0.1.0/. " + "This is a hard fail — schema validation is non-negotiable." + ) + # Hard-fail if jsonschema's format checkers are silently degraded. + # Without `rfc3339-validator` installed, the `date-time` checker + # is a no-op and tests like `test_timestamp_is_iso8601` false- + # pass: an invalid timestamp goes through, the "must fail + # validation" assertion sees an empty error list, suite shows + # green on a real wire-format bug. Pin in requirements-test.txt + # and assert here so a future drop of the dep can't reintroduce + # the silent-pass mode. + from jsonschema import Draft202012Validator + _fc = Draft202012Validator.FORMAT_CHECKER + if _fc.conforms("not-a-date", "date-time"): + raise RuntimeError( + "jsonschema date-time format checker is degraded — " + "'not-a-date' was accepted as a valid date-time. " + "Install `rfc3339-validator` (pin in adapters/requirements-test.txt). " + "Without it, conformance tests that assert invalid " + "timestamps must fail validation will silently pass." + ) + cls.port = _free_port() + cls.url = f"http://127.0.0.1:{cls.port}/acs" + env = os.environ.copy() + cls.statedir = tempfile.mkdtemp(prefix="acs-core-conformance-") + env["ACS_GUARDIAN_STATE_DIR"] = cls.statedir + if cls.HMAC_SECRET: + env["ACS_HMAC_SECRET"] = cls.HMAC_SECRET + env.pop("ACS_DEV_MODE", None) + else: + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + cls.guardian_proc = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(cls.port)], + env=env, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait_port("127.0.0.1", cls.port) + + @classmethod + def tearDownClass(cls) -> None: + cls.guardian_proc.terminate() + try: + cls.guardian_proc.wait(timeout=2.0) + except subprocess.TimeoutExpired: + cls.guardian_proc.kill() + import shutil + shutil.rmtree(cls.statedir, ignore_errors=True) + + def _make_envelope(self, method: str, payload: dict | None = None, *, + session_id: str | None = None, + request_id: str | None = None, + timestamp: str | None = None, + sign: bool = True) -> dict: + sid = session_id or str(uuid.uuid4()) + env = { + "jsonrpc": "2.0", + "id": str(uuid.uuid4()), + "method": method, + "params": { + "acs_version": "0.1.0", + "request_id": request_id or str(uuid.uuid4()), + "timestamp": timestamp or acs_common.iso8601_now(), + "metadata": { + "agent_id": "conformance-test", + "session_id": sid, + "platform": "test", + }, + "payload": payload or {}, + }, + } + if sign and self.HMAC_SECRET: + key = acs_common.derive_session_key(self.HMAC_SECRET.encode(), sid) + acs_common.sign_envelope(env, key=key, session_id=sid) + return env + + def _post(self, envelope: dict) -> dict: + body = json.dumps(envelope).encode() + req = urllib.request.Request(self.url, data=body, + headers={"Content-Type": "application/json"}, method="POST") + with urllib.request.urlopen(req, timeout=5.0) as resp: + return json.loads(resp.read().decode()) + + +# ============================================================================= +# CORE-01 — Handshake (conformance.md:17, §4) +# ============================================================================= +# +# "Handshake — handshake/hello with ClientHello/ServerHello" +# §4: "Version mismatch terminates with UNSUPPORTED_VERSION (-32001)" +# §4: ServerHello required keys negotiated_version, methods_evaluated, +# selected_transport, timeout_config +# ============================================================================= + +class Core01_Handshake(CoreHarness): + + def test_handshake_returns_server_hello(self) -> None: + """conformance.md:17 — 'Handshake — handshake/hello with + ClientHello/ServerHello'. A Guardian MUST respond to + handshake/hello with a ServerHello in result.payload, AND + the response envelope itself MUST validate against + response-envelope.json.""" + env = self._make_envelope("handshake/hello", payload={ + "acs_versions_supported": ["0.1.0"], + "methods_implemented": ["steps/toolCallRequest"], + "transports_supported": ["http"], + "provenance_producer": "none", + "profiles_supported": ["acs-core"], + }, sign=False) + resp = self._post(env) + self.assertIn("result", resp, + f"handshake/hello must return a result; got {resp}") + # Response envelope MUST validate against response-envelope.json + errors = _validate_response_envelope(resp) + self.assertEqual(errors, [], + f"handshake response fails response-envelope.json:\n - " + + "\n - ".join(errors)) + result = resp["result"] + server_hello = result.get("payload", {}) + # handshake.json:70 — ServerHello required + for required_field in ("negotiated_version", "methods_evaluated", + "selected_transport", "timeout_config"): + self.assertIn(required_field, server_hello, + f"ServerHello missing required field {required_field!r}; " + f"got {server_hello}") + self.assertEqual(server_hello["negotiated_version"], "0.1.0") + self.assertIn("default_ms", server_hello["timeout_config"]) + + def test_version_mismatch_returns_unsupported_version(self) -> None: + """§4: 'Version mismatch terminates with UNSUPPORTED_VERSION + (-32001)'.""" + env = self._make_envelope("handshake/hello", payload={ + "acs_versions_supported": ["99.0.0"], # unsupported + "methods_implemented": ["steps/toolCallRequest"], + "transports_supported": ["http"], + "provenance_producer": "none", + }, sign=False) + resp = self._post(env) + self.assertIn("error", resp, + f"version-mismatch handshake must error; got {resp}") + self.assertEqual(resp["error"]["code"], -32001, + f"§4: code must be -32001 UNSUPPORTED_VERSION; got {resp['error']}") + + +# ============================================================================= +# CORE-02 — Request envelope shape (conformance.md:18, §3, request-envelope.json) +# ============================================================================= +# +# "JSON-RPC 2.0 with ACS extensions. request_id, timestamp, acs_version, +# metadata required on every request." +# +# request-envelope.json:7-8 — top-level required {jsonrpc, method, id, params}; +# additionalProperties: false +# request-envelope.json:10 — jsonrpc const "2.0" +# request-envelope.json:25 — AcsParams required {acs_version, request_id, +# timestamp, metadata, payload} +# request-envelope.json:62 — Metadata required {agent_id, session_id} +# ============================================================================= + +class Core02_EnvelopeShape(CoreHarness): + + def test_valid_envelope_passes_canonical_schema(self) -> None: + """conformance.md:18 — 'request_id, timestamp, acs_version, + metadata required on every request'. A correctly-built envelope + MUST pass request-envelope.json validation including + format-checker (uuid, date-time).""" + env = self._make_envelope("steps/sessionStart", payload={}) + errors = _validate_request_envelope(env) + self.assertEqual(errors, [], + f"Conformant envelope FAILS request-envelope.json validation:\n - " + + "\n - ".join(errors)) + + def test_contradiction_validator_actually_works(self) -> None: + """Falsifier check: a deliberately broken envelope MUST be + rejected. Without this, a no-op validator would pass every + positive-case test.""" + broken = {"jsonrpc": "2.0"} # missing method, id, params entirely + errors = _validate_request_envelope(broken) + self.assertNotEqual(errors, [], + "validator did not reject an envelope missing method/id/params — " + "the schema check is a no-op") + + def test_jsonrpc_field_is_literal_2_0(self) -> None: + """request-envelope.json:10 — `jsonrpc` is the literal string + "2.0"; any other value MUST be rejected by schema validation.""" + env = self._make_envelope("steps/sessionStart", payload={}) + env["jsonrpc"] = "1.0" # tamper + errors = _validate_request_envelope(env) + self.assertTrue(any("jsonrpc" in e for e in errors), + f"jsonrpc != '2.0' must fail validation; got errors {errors}") + + def test_no_additional_top_level_fields_allowed(self) -> None: + """request-envelope.json:8 — `additionalProperties: false` at + envelope root. Any extra top-level key MUST be rejected.""" + env = self._make_envelope("steps/sessionStart", payload={}) + env["unknown_field"] = "should be rejected" + errors = _validate_request_envelope(env) + self.assertTrue(any("unknown_field" in e or "Additional" in e for e in errors), + f"Extra top-level field must be rejected; got {errors}") + + def test_acs_params_all_required_fields_present(self) -> None: + """request-envelope.json:25 — AcsParams MUST contain + acs_version, request_id, timestamp, metadata, payload.""" + env = self._make_envelope("steps/sessionStart", payload={}) + for required in ("acs_version", "request_id", "timestamp", + "metadata", "payload"): + self.assertIn(required, env["params"], + f"params must contain {required!r}") + # Now drop each in turn; validator must reject every variant. + for required in ("acs_version", "request_id", "timestamp", + "metadata", "payload"): + broken = json.loads(json.dumps(env)) + del broken["params"][required] + errors = _validate_request_envelope(broken) + self.assertTrue(errors, + f"envelope missing required params.{required} must fail; " + f"validator passed instead") + + def test_metadata_required_agent_and_session_id(self) -> None: + """request-envelope.json:62 — metadata MUST contain agent_id + and session_id.""" + env = self._make_envelope("steps/sessionStart", payload={}) + for required in ("agent_id", "session_id"): + self.assertIn(required, env["params"]["metadata"]) + # Drop each; validator rejects. + for required in ("agent_id", "session_id"): + broken = json.loads(json.dumps(env)) + del broken["params"]["metadata"][required] + errors = _validate_request_envelope(broken) + self.assertTrue(any(required in e for e in errors), + f"envelope missing metadata.{required} must fail validation") + + def test_request_id_is_uuid(self) -> None: + """request-envelope.json:32-35 — request_id format: uuid.""" + env = self._make_envelope("steps/sessionStart", payload={}) + env["params"]["request_id"] = "not-a-uuid" + errors = _validate_request_envelope(env) + self.assertTrue(any("request_id" in e for e in errors), + f"non-UUID request_id must fail validation; got {errors}") + + def test_timestamp_is_iso8601(self) -> None: + """request-envelope.json:38-40 — timestamp format: date-time.""" + env = self._make_envelope("steps/sessionStart", payload={}) + env["params"]["timestamp"] = "yesterday" + errors = _validate_request_envelope(env) + self.assertTrue(any("timestamp" in e for e in errors), + f"non-ISO timestamp must fail validation; got {errors}") + + def test_acs_version_matches_semver(self) -> None: + """request-envelope.json:27-30 — acs_version pattern ^\\d+\\.\\d+\\.\\d+$.""" + env = self._make_envelope("steps/sessionStart", payload={}) + env["params"]["acs_version"] = "v1" # not semver + errors = _validate_request_envelope(env) + self.assertTrue(any("acs_version" in e for e in errors), + f"non-semver acs_version must fail validation; got {errors}") + + def test_method_namespace_pattern(self) -> None: + """request-envelope.json:13-14 — method MUST match + ^(steps/|protocols/|agbom/|trace/|system/|handshake/|wrapped:).+""" + env = self._make_envelope("arbitrary/method", payload={}) + errors = _validate_request_envelope(env) + self.assertTrue(any("method" in e for e in errors), + f"method outside reserved namespaces must fail validation") + + +# ============================================================================= +# CORE-03 — Hook taxonomy minimum (conformance.md:19) +# ============================================================================= +# +# "At minimum: sessionStart, userMessage or agentTrigger, toolCallRequest, +# toolCallResult, agentResponse, sessionEnd" +# ============================================================================= + +class Core03_HookTaxonomyMinimum(CoreHarness): + """Each of the 6 minimum hooks must be accepted with a valid + disposition (positive case) AND a malformed payload for that hook + must be rejected by Guardian-side schema validation (contradiction). + Without the contradiction, a Guardian that returns 'allow' for any + payload — including malformed ones — would pass.""" + + # (method, valid_payload, broken_payload, payload_schema_file) + # Broken payloads exploit per-hook schema constraints — wrong types + # on enum-constrained fields, missing-required fields, malformed + # nested shapes. Each broken payload MUST fail validation; if it + # doesn't, the schema isn't actually enforcing what it advertises. + HOOKS = [ + ("steps/sessionStart", {}, + # policy_mode is enum strict/moderate/permissive; 123 is wrong type AND not in enum + {"policy_mode": 123}, + "hooks/session-start.json"), + ("steps/userMessage", + {"content": [{"type": "text", "value": "hi"}]}, + {"content": "not-an-array"}, # user-message.json requires content to be array + "hooks/user-message.json"), + ("steps/toolCallRequest", + {"tool": {"name": "Read"}, "arguments": {"file_path": {"value": "/tmp/x"}}}, + {"tool": {"name": "Read"}}, # missing required `arguments` + "hooks/tool-call-request.json"), + ("steps/toolCallResult", + {"tool": {"name": "Read"}, "exit_status": "success", + "outputs": [{"value": "ok"}]}, + {"tool": {"name": "Read"}, "exit_status": "magical"}, # bad enum value + missing outputs + "hooks/tool-call-result.json"), + ("steps/agentResponse", + {"content": [{"type": "text", "value": "ok"}]}, + {}, # missing required content + "hooks/agent-response.json"), + ("steps/sessionEnd", {"reason": "completed"}, + {"reason": "nonsense"}, # not in enum + "hooks/session-end.json"), + ] + + def _send(self, method: str, payload: dict) -> dict: + return self._post(self._make_envelope(method, payload)) + + def _validate_hook_payload(self, payload: dict, schema_file: str) -> list: + from jsonschema import Draft202012Validator + schema, resolver = _build_local_resolver(schema_file) + validator = Draft202012Validator( + schema, resolver=resolver, + format_checker=Draft202012Validator.FORMAT_CHECKER, + ) + return [ + f"{'.'.join(str(p) for p in err.absolute_path) or ''}: {err.message}" + for err in validator.iter_errors(payload) + ] + + def test_each_minimum_hook_returns_known_disposition(self) -> None: + """conformance.md:19 — each of the 6 minimum hooks must produce + a *known* disposition. Positive case + sanity: result.decision + is one of allow/deny/modify/ask/defer, not garbage.""" + KNOWN = {"allow", "deny", "modify", "ask", "defer"} + for method, payload, _broken, _schema in self.HOOKS: + with self.subTest(method=method): + resp = self._send(method, payload) + self.assertIn("result", resp, + f"{method} must be accepted; got {resp}") + self.assertIn(resp["result"].get("decision"), KNOWN, + f"{method} returned non-spec disposition " + f"{resp['result'].get('decision')!r}") + + def test_each_minimum_hooks_malformed_payload_fails_schema(self) -> None: + """Contradiction check: a malformed payload for each minimum hook + MUST fail the canonical hooks/*.json schema. Verifies the per-hook + schemas actually constrain shape — not just rubber-stamp anything.""" + for method, _payload, broken, schema_file in self.HOOKS: + with self.subTest(method=method): + errors = self._validate_hook_payload(broken, schema_file) + self.assertNotEqual(errors, [], + f"{method}: a deliberately broken payload {broken!r} " + f"was accepted by {schema_file} — schema is not " + f"actually constraining shape") + + +# ============================================================================= +# CORE-04 — Dispositions (conformance.md:20, §6) +# ============================================================================= +# +# "All five (ALLOW, DENY, MODIFY, ASK, DEFER) with required fields per §6" +# response-envelope.json:107-110 — conditional requirements: +# deny -> reasoning required +# modify -> reasoning + modifications required +# ask -> reasoning + ask_details required +# defer -> reasoning + defer_details required +# ============================================================================= + +class Core04_Dispositions(CoreHarness): + + def test_allow_response_validates(self) -> None: + """§6 — ALLOW: no required fields beyond decision.""" + resp = self._post(self._make_envelope("steps/sessionStart", {})) + errors = _validate_response_envelope(resp) + self.assertEqual(errors, [], + f"ALLOW response fails response-envelope.json:\n - " + + "\n - ".join(errors)) + self.assertEqual(resp["result"]["decision"], "allow") + + def test_allow_response_without_required_envelope_fields_rejected(self) -> None: + """Contradiction: an allow response missing AcsResult required + fields (type, acs_version, request_id, decision) MUST fail + schema validation. Otherwise positive-case tests are tautological.""" + broken_responses = [ + # Missing type + {"jsonrpc": "2.0", "id": "x", + "result": {"acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001", + "decision": "allow"}}, + # Missing acs_version + {"jsonrpc": "2.0", "id": "x", + "result": {"type": "final", + "request_id": "00000000-0000-4000-8000-000000000001", + "decision": "allow"}}, + # Missing request_id + {"jsonrpc": "2.0", "id": "x", + "result": {"type": "final", "acs_version": "0.1.0", + "decision": "allow"}}, + # Missing decision + {"jsonrpc": "2.0", "id": "x", + "result": {"type": "final", "acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001"}}, + # Bogus decision value + {"jsonrpc": "2.0", "id": "x", + "result": {"type": "final", "acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001", + "decision": "maybe"}}, + ] + for i, broken in enumerate(broken_responses): + with self.subTest(case=i): + errors = _validate_response_envelope(broken) + self.assertNotEqual(errors, [], + f"broken allow response {broken!r} (case {i}) was " + f"accepted by schema — validator is a no-op") + + def test_deny_response_includes_reasoning(self) -> None: + """response-envelope.json:107 — 'if decision const deny, then + required: [reasoning]'. The Guardian's destructive-bash deny + path MUST include reasoning.""" + env = self._make_envelope("steps/toolCallRequest", + {"tool": {"name": "Bash"}, + "arguments": {"command": {"value": "rm -rf /home/u"}}}) + resp = self._post(env) + self.assertEqual(resp["result"]["decision"], "deny") + self.assertIn("reasoning", resp["result"], + "§6 + response-envelope.json:107 — DENY MUST include reasoning") + self.assertTrue(resp["result"]["reasoning"]) + # The response itself MUST validate + errors = _validate_response_envelope(resp) + self.assertEqual(errors, [], + f"DENY response fails response-envelope.json:\n - " + + "\n - ".join(errors)) + + def test_modify_without_modifications_rejected_by_schema(self) -> None: + """response-envelope.json:108 — 'if decision const modify, then + required: [reasoning, modifications]'. A response that claims + modify but lacks modifications MUST fail schema validation.""" + # Synthesize a broken response (Guardian doesn't emit modify in + # our example, so we construct one manually and validate it). + broken = { + "jsonrpc": "2.0", "id": "x", + "result": { + "type": "final", "acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001", + "decision": "modify", + "reasoning": "but no modifications field", + }, + } + errors = _validate_response_envelope(broken) + self.assertTrue(any("modifications" in e for e in errors), + f"modify-without-modifications must fail validation; got {errors}") + + def test_ask_without_ask_details_rejected_by_schema(self) -> None: + """response-envelope.json:109 — 'if decision const ask, then + required: [reasoning, ask_details]'.""" + broken = { + "jsonrpc": "2.0", "id": "x", + "result": { + "type": "final", "acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001", + "decision": "ask", "reasoning": "missing ask_details", + }, + } + errors = _validate_response_envelope(broken) + self.assertTrue(any("ask_details" in e for e in errors), + f"ask-without-ask_details must fail validation; got {errors}") + + def test_defer_without_defer_details_rejected_by_schema(self) -> None: + """response-envelope.json:110 — 'if decision const defer, then + required: [reasoning, defer_details]'.""" + broken = { + "jsonrpc": "2.0", "id": "x", + "result": { + "type": "final", "acs_version": "0.1.0", + "request_id": "00000000-0000-4000-8000-000000000001", + "decision": "defer", "reasoning": "missing defer_details", + }, + } + errors = _validate_response_envelope(broken) + self.assertTrue(any("defer_details" in e for e in errors), + f"defer-without-defer_details must fail validation; got {errors}") + + +# ============================================================================= +# CORE-05 — SessionContext + chain head (conformance.md:21, §8) +# ============================================================================= +# +# "session_id, chain_hash (rolling SHA-256), append-only ContextEntry chain, +# with the Guardian publishing the chain head (chain_hash) on responses for +# content-bearing steps" +# §8.2 — entry_hash = SHA-256(JCS(entry minus entry_hash/previous_hash) || prev_hash_bytes) +# ============================================================================= + +class Core05_SessionContext(CoreHarness): + + def test_response_carries_chain_hash(self) -> None: + """conformance.md:21 — 'Guardian publishing the chain head + (chain_hash) on responses for content-bearing steps'.""" + resp = self._post(self._make_envelope("steps/sessionStart", {})) + self.assertIn("chain_hash", resp["result"], + f"response missing chain_hash; got {resp['result']}") + + def test_chain_hash_is_lowercase_hex_sha256(self) -> None: + """response-envelope.json:82-85 — chain_hash pattern + ^[0-9a-f]{64}$ (lowercase hex SHA-256).""" + resp = self._post(self._make_envelope("steps/sessionStart", {})) + h = resp["result"]["chain_hash"] + self.assertRegex(h, r"^[0-9a-f]{64}$", + f"chain_hash must be lowercase 64-hex SHA-256; got {h!r}") + + def test_chain_links_consecutive_entries(self) -> None: + """§8.2 normative — consecutive entries in a session must be + chained, i.e. entry[i+1].previous_hash = entry[i].entry_hash.""" + sid = str(uuid.uuid4()) + h1 = self._post(self._make_envelope("steps/sessionStart", {}, + session_id=sid))["result"]["chain_hash"] + h2 = self._post(self._make_envelope( + "steps/toolCallRequest", + {"tool": {"name": "Read"}, + "arguments": {"file_path": {"value": "/tmp/x"}}}, + session_id=sid))["result"]["chain_hash"] + self.assertNotEqual(h1, h2, + "consecutive chain_hashes must differ — a fake chain reuses hashes") + + def test_distinct_sessions_have_distinct_chain_heads(self) -> None: + """§8.2 — chain is per-session; two different sessions must + produce different chain heads from the same first event.""" + h1 = self._post(self._make_envelope("steps/sessionStart", {}))["result"]["chain_hash"] + h2 = self._post(self._make_envelope("steps/sessionStart", {}))["result"]["chain_hash"] + self.assertNotEqual(h1, h2) + + def test_chain_externally_recomputable_across_3_entries(self) -> None: + """§8.2 normative — entry_hash = SHA-256(JCS(entry minus + entry_hash/previous_hash) || prev_hash_bytes). An external + observer with the request stream MUST recompute the published + chain head byte-for-byte across multiple entries; this is what + catches a 'chain that doesn't actually chain' mutation. + + Testing 3 entries because a chain that returned sha256(entry) + (ignoring previous_hash) would still produce the right value + for the first entry (no previous_hash to ignore). The second + and third entries are where the chain link is actually + observable.""" + + def expected(req: dict, prev_hash: str | None) -> str: + params = req["params"] + entry = { + "entry_id": params["request_id"], + "step_id": params["request_id"], + "step_type": req["method"], + "request_hash": hashlib.sha256( + acs_common.jcs_canonicalize(params)).hexdigest(), + "timestamp": params["timestamp"], + } + content_bytes = acs_common.jcs_canonicalize(entry) + prev_bytes = bytes.fromhex(prev_hash) if prev_hash else b"" + return hashlib.sha256(content_bytes + prev_bytes).hexdigest() + + sid = str(uuid.uuid4()) + req1 = self._make_envelope("steps/sessionStart", {}, session_id=sid) + h1 = self._post(req1)["result"]["chain_hash"] + self.assertEqual(h1, expected(req1, None), + "entry 1 (root): published chain_hash != externally-computed hash") + + req2 = self._make_envelope("steps/userMessage", + {"content": [{"type": "text", "value": "hi"}]}, session_id=sid) + h2 = self._post(req2)["result"]["chain_hash"] + self.assertEqual(h2, expected(req2, h1), + "entry 2: published chain_hash != externally-computed hash. " + "Either previous_hash is not folded in or JCS canonicalization differs.") + # Falsifier: same h2 computed WITHOUT prev_hash MUST differ — i.e. chain + # actually depends on the previous hash, not just the entry content. + self.assertNotEqual(h2, expected(req2, None), + "entry 2's hash matches the no-previous_hash computation — " + "the chain is not actually chained, just hashed.") + + req3 = self._make_envelope("steps/toolCallRequest", + {"tool": {"name": "Read"}, + "arguments": {"file_path": {"value": "/tmp/x"}}}, + session_id=sid) + h3 = self._post(req3)["result"]["chain_hash"] + self.assertEqual(h3, expected(req3, h2), + "entry 3: chain breaks at depth 2 — not a transitive chain") + + +# ============================================================================= +# CORE-06 — Replay protection (conformance.md:22, §10.3) +# ============================================================================= +# +# "request_id (UUID) and timestamp on every request; Guardians MUST reject +# replays per §10.3" +# §10.3: "Guardians MUST reject duplicate request_id values within the +# session with REPLAY_DETECTED (-32005)" +# §10.3: "Guardians MUST reject requests whose timestamp is more than the +# negotiated skew window in the past or future, returning +# TIMESTAMP_OUT_OF_WINDOW (-32006)" +# ============================================================================= + +class Core06_ReplayProtection(CoreHarness): + + def test_duplicate_request_id_rejected_with_32005(self) -> None: + """§10.3 — 'Guardians MUST reject duplicate request_id values + within the session with REPLAY_DETECTED (-32005)'.""" + sid = str(uuid.uuid4()) + rid = str(uuid.uuid4()) + r1 = self._post(self._make_envelope("steps/sessionStart", {}, + session_id=sid, request_id=rid)) + self.assertIn("result", r1) + r2 = self._post(self._make_envelope("steps/userMessage", + {"content": [{"type": "text", "value": "hi"}]}, + session_id=sid, request_id=rid)) + self.assertIn("error", r2, f"replay must be rejected; got {r2}") + self.assertEqual(r2["error"]["code"], -32005, + f"§10.3 — code must be -32005 REPLAY_DETECTED; got {r2['error']}") + + def test_timestamp_outside_window_rejected_with_32006(self) -> None: + """§10.3 — 'Guardians MUST reject requests whose timestamp is + more than the negotiated skew window in the past or future, + returning TIMESTAMP_OUT_OF_WINDOW (-32006)'. Tests BOTH + directions: an ancient timestamp and a future one. Without + future-side coverage, a clock-skewed client gets + Heisenberg-ish behavior — sometimes accepted, sometimes not.""" + # Past + ancient = datetime.datetime(2010, 1, 1, tzinfo=datetime.timezone.utc).isoformat() + resp = self._post(self._make_envelope("steps/sessionStart", {}, + timestamp=ancient)) + self.assertIn("error", resp) + self.assertEqual(resp["error"]["code"], -32006, + f"§10.3 — code must be -32006 TIMESTAMP_OUT_OF_WINDOW (past)") + # Future + future = (datetime.datetime.now(datetime.timezone.utc) + + datetime.timedelta(hours=1)).isoformat() + resp = self._post(self._make_envelope("steps/sessionStart", {}, + timestamp=future)) + self.assertIn("error", resp, + "§10.3 says 'past or future' — future-skewed timestamps must " + "also be rejected, not silently accepted") + self.assertEqual(resp["error"]["code"], -32006) + + def test_error_response_envelope_validates(self) -> None: + """Every Guardian response — including ERROR responses — MUST + validate against response-envelope.json. The disposition tests + cover allow/deny envelopes; this one covers the error branch + of the JSON-RPC oneOf (result OR error).""" + sid = str(uuid.uuid4()) + rid = str(uuid.uuid4()) + # First request: accepted + self._post(self._make_envelope("steps/sessionStart", {}, + session_id=sid, request_id=rid)) + # Second request: same (sid, rid) → -32005 REPLAY_DETECTED error + resp = self._post(self._make_envelope("steps/sessionStart", {}, + session_id=sid, request_id=rid)) + self.assertIn("error", resp) + errors = _validate_response_envelope(resp) + self.assertEqual(errors, [], + f"Guardian error response fails response-envelope.json:\n - " + + "\n - ".join(errors)) + + def test_same_request_id_across_sessions_is_fine(self) -> None: + """§10.3 — replay protection is PER-SESSION. The same + request_id used in two different sessions MUST both be accepted.""" + rid = str(uuid.uuid4()) + r1 = self._post(self._make_envelope("steps/sessionStart", {}, + request_id=rid)) + r2 = self._post(self._make_envelope("steps/sessionStart", {}, + request_id=rid)) + self.assertIn("result", r1) + self.assertIn("result", r2, + "cross-session same-request_id must be accepted; " + "replay protection scope is per-session") + + +# ============================================================================= +# CORE-07 — Baseline integrity (conformance.md:23, §10) +# ============================================================================= +# +# "every request and response carries a signature over the canonical +# envelope. HMAC-SHA256 with an HKDF-derived per-session key from +# deployment-provided key material is the baseline" +# §10: "The signed input ... is the RFC 8785 (JCS) canonicalization of +# the request or response envelope with the signature field removed" +# ============================================================================= + +class Core07_BaselineIntegrity(CoreHarness): + + def test_signed_request_accepted(self) -> None: + """conformance.md:23 — signed request with HMAC-SHA256 baseline + MUST be accepted by a Guardian that requires signing.""" + resp = self._post(self._make_envelope("steps/sessionStart", {})) + self.assertIn("result", resp, + f"signed request was rejected; got {resp}") + + def test_unsigned_request_rejected_when_secret_configured(self) -> None: + """conformance.md:23 — when signing is required, an unsigned + request MUST be rejected.""" + env = self._make_envelope("steps/sessionStart", {}, sign=False) + resp = self._post(env) + self.assertIn("error", resp, + f"unsigned request was accepted; got {resp}") + self.assertEqual(resp["error"]["code"], -32004, + f"unsigned-request error must be -32004 SIGNATURE_INVALID") + + def test_tampered_request_signature_invalid(self) -> None: + """§10 — 'signed input ... canonicalization of the envelope with + the signature field removed' — any post-sign tamper MUST fail + verification.""" + env = self._make_envelope("steps/sessionStart", {}) + # Tamper with method AFTER signing + env["method"] = "steps/userMessage" + resp = self._post(env) + self.assertIn("error", resp) + self.assertEqual(resp["error"]["code"], -32004) + + def test_response_is_signed_and_verifies(self) -> None: + """conformance.md:23 — 'every request and response carries a + signature'. The Guardian's response MUST be signed; a client + MUST be able to verify it with the same HKDF-derived key.""" + sid = str(uuid.uuid4()) + env = self._make_envelope("steps/sessionStart", {}, session_id=sid) + resp = self._post(env) + self.assertIn("result", resp) + # The result body must include signature, and signature must verify. + sig = resp["result"].get("signature") + self.assertIsNotNone(sig, + "Guardian response missing `signature` field per §10") + key = acs_common.derive_session_key(self.HMAC_SECRET.encode(), sid) + self.assertTrue(acs_common.verify_signature(resp, key=key), + "Guardian's response signature must verify with the " + "HKDF-derived per-session key") + + def test_per_session_key_derivation(self) -> None: + """§10 — 'HKDF-derived per-session key from deployment-provided + key material'. The derived key MUST differ between sessions + with the same secret.""" + secret = self.HMAC_SECRET.encode() + k1 = acs_common.derive_session_key(secret, "session-A") + k2 = acs_common.derive_session_key(secret, "session-B") + self.assertNotEqual(k1, k2, + "per-session HKDF MUST produce distinct keys for distinct sessions") + # Same session_id → same key + k1b = acs_common.derive_session_key(secret, "session-A") + self.assertEqual(k1, k1b, + "HKDF must be deterministic for the same (secret, session_id)") + + def test_signature_covers_session_id(self) -> None: + """§10 — 'binds the signature to the whole envelope, including + method, metadata.session_id, request_id, and timestamp, so a + captured signature cannot be lifted into a different envelope'. + + Verifies by: take a valid signed envelope, change session_id, + Guardian MUST reject (the signature was over the old session_id).""" + sid_a = str(uuid.uuid4()) + sid_b = str(uuid.uuid4()) + env = self._make_envelope("steps/sessionStart", {}, session_id=sid_a) + # Lift signature to a different session + env["params"]["metadata"]["session_id"] = sid_b + resp = self._post(env) + self.assertIn("error", resp, + "signature lifted into a different session_id MUST be rejected") + self.assertEqual(resp["error"]["code"], -32004) + + +# ============================================================================= +# CORE-08 — Decision honoring (conformance.md:24, §6.4) +# ============================================================================= +# +# Adapter-side property — covered end-to-end in the per-adapter test +# suites because it depends on how the framework (Claude Code, Cursor, +# NAT) routes the verdict. The wire-level property "Guardian responds +# in time" is covered here; "framework actually waits and applies" is +# covered in adapters/{claude-code,cursor,nat}/tests/. +# ============================================================================= + +class Core08_DecisionHonoringAdapter(unittest.TestCase): + """§6.4 is an adapter-side property: 'the Observed Agent MUST wait + for the Guardian's decision up to the negotiated timeout and apply + it'. We falsify this by: + + 1. The adapter MUST apply DENY when the Guardian returns DENY + (positive: deny shows up as `permissionDecision: deny`). + 2. The adapter MUST wait for the response, not proceed before it + arrives (a slow-but-responsive Guardian still gets honored). + 3. On no-response, the adapter MUST fall to its fail posture and + emit an audit event (contradiction: silent bypass is a §6.4 violation). + """ + + def _run_claude_adapter(self, *, guardian_url: str, + env_overrides: dict | None = None, + timeout: float = 10.0) -> subprocess.CompletedProcess: + adapter = HERE / "claude-code" / "acs_adapter.py" + env = os.environ.copy() + env["ACS_GUARDIAN_URL"] = guardian_url + env["ACS_HANDSHAKE"] = "0" + env.pop("ACS_DEFAULT_DENY", None) + if env_overrides: + env.update(env_overrides) + return subprocess.run( + [sys.executable, str(adapter)], + input=json.dumps({ + "session_id": "00000000-0000-4000-8000-000000000001", + "transcript_path": "/tmp/t", "cwd": "/tmp", + "permission_mode": "default", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_input": {"command": "rm -rf /home/u"}, + }), + capture_output=True, text=True, env=env, timeout=timeout, + ) + + def test_adapter_actually_applies_guardian_deny(self) -> None: + """§6.4 positive — when the Guardian returns DENY, the adapter + MUST translate it to the framework's deny shape. A framework + that gets `allow` for a destructive Bash would execute it.""" + port = _free_port() + env = os.environ.copy() + env["ACS_DEV_MODE"] = "1" + env.pop("ACS_HMAC_SECRET", None) + env.pop("ACS_HMAC_SECRET_FILE", None) + env["ACS_GUARDIAN_STATE_DIR"] = tempfile.mkdtemp() + guardian = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(port)], + env=env, stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait_port("127.0.0.1", port) + try: + proc = self._run_claude_adapter( + guardian_url=f"http://127.0.0.1:{port}/acs") + self.assertEqual(proc.returncode, 0, + f"adapter exited non-zero: {proc.stderr}") + payload = json.loads(proc.stdout) + hso = payload["hookSpecificOutput"] + self.assertEqual(hso["permissionDecision"], "deny", + f"Guardian returned DENY for `rm -rf /home/u` but adapter " + f"emitted {hso!r}. §6.4 — 'DENY blocks the action' violated.") + finally: + guardian.terminate() + try: guardian.wait(timeout=2.0) + except subprocess.TimeoutExpired: guardian.kill() + + def test_adapter_waits_for_a_slow_guardian(self) -> None: + """§6.4 — 'wait for the Guardian's decision up to the negotiated + timeout'. The adapter MUST NOT proceed before the response + arrives. We run a deliberately-slow Guardian (1s delay) and + check the adapter took at least that long AND honored the result.""" + delay_s = 1.0 + + class SlowGuardian(http.server.BaseHTTPRequestHandler): + def do_POST(self_h): # noqa: N802 + length = int(self_h.headers.get("Content-Length", "0")) + body = json.loads(self_h.rfile.read(length).decode()) + time.sleep(delay_s) + reply = json.dumps({ + "jsonrpc": "2.0", "id": body.get("id"), + "result": {"type": "final", "acs_version": "0.1.0", + "request_id": body.get("params", {}).get("request_id", ""), + "decision": "deny", + "reasoning": "slow guardian denied"}, + }).encode() + self_h.send_response(200) + self_h.send_header("Content-Length", str(len(reply))) + self_h.send_header("Content-Type", "application/json") + self_h.end_headers() + self_h.wfile.write(reply) + def log_message(self, *a, **kw): return + + port = _free_port() + srv = http.server.HTTPServer(("127.0.0.1", port), SlowGuardian) + t = threading.Thread(target=srv.serve_forever, daemon=True) + t.start() + try: + start = time.monotonic() + proc = self._run_claude_adapter( + guardian_url=f"http://127.0.0.1:{port}/acs") + elapsed = time.monotonic() - start + self.assertGreaterEqual(elapsed, delay_s, + f"adapter returned in {elapsed:.2f}s but Guardian deliberately " + f"slept {delay_s}s — the adapter proceeded WITHOUT waiting. " + f"§6.4 violated.") + self.assertEqual(proc.returncode, 0, proc.stderr) + payload = json.loads(proc.stdout) + self.assertEqual(payload["hookSpecificOutput"]["permissionDecision"], "deny", + "adapter waited but failed to apply the verdict") + finally: + srv.shutdown() + srv.server_close() + + def test_fail_open_emits_audit_event(self) -> None: + """§6.4 — 'Every step that proceeds without a decision MUST be + recorded as an audit event, so the bypass is visible rather + than silent'.""" + proc = self._run_claude_adapter(guardian_url="http://127.0.0.1:1/dead") + self.assertIn("ACS_AUDIT", proc.stderr, + f"§6.4 — fail-open path must emit ACS_AUDIT event; stderr was:\n{proc.stderr}") + self.assertIn("fail_open_bypass", proc.stderr, + "audit event type must be 'fail_open_bypass'") + # Cause field must identify this as transport failure (Guardian + # was unreachable), not as a Guardian-returned error. + self.assertIn("transport_failure", proc.stderr, + "audit event must carry cause=transport_failure when Guardian is unreachable; " + "without this, transport failures and Guardian-returned errors look identical " + "in the audit log") + + def test_guardian_error_response_carries_distinct_cause(self) -> None: + """Regression — found by hand-probing in a Claude session: when + the Guardian returns a JSON-RPC error (e.g. SIGNATURE_INVALID, + Invalid Request), the adapter SHOULD distinguish that in the + audit log from 'Guardian unreachable'. Both apply the same + fail posture per §6.4, but operators need to grep the cause to + tell them apart — a signature error is a client/operator bug + (fix your code), an unreachable Guardian is an ops issue (your + gate is down). + + Setup: Guardian REQUIRES signing (started with ACS_HMAC_SECRET). + Adapter is invoked WITHOUT a secret, so it sends unsigned envelopes. + Guardian responds with -32004 SIGNATURE_INVALID. + """ + adapter = HERE / "claude-code" / "acs_adapter.py" + # Spin up a Guardian that requires signing + port = _free_port() + env_g = os.environ.copy() + env_g["ACS_HMAC_SECRET"] = "regression-test-secret" + env_g.pop("ACS_DEV_MODE", None) + env_g["ACS_GUARDIAN_STATE_DIR"] = tempfile.mkdtemp() + guardian = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(port)], env=env_g, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait_port("127.0.0.1", port) + try: + env_a = os.environ.copy() + env_a["ACS_GUARDIAN_URL"] = f"http://127.0.0.1:{port}/acs" + env_a["ACS_HANDSHAKE"] = "0" + env_a.pop("ACS_HMAC_SECRET", None) + env_a.pop("ACS_HMAC_SECRET_FILE", None) + env_a.pop("ACS_DEFAULT_DENY", None) # default fail-open + + proc = subprocess.run( + [sys.executable, str(adapter)], + input=json.dumps({ + "session_id": "00000000-0000-4000-8000-000000000001", + "transcript_path": "/tmp/t", "cwd": "/tmp", + "permission_mode": "default", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_input": {"command": "echo hi"}, + }), + capture_output=True, text=True, env=env_a, timeout=10, + ) + self.assertIn("ACS_AUDIT", proc.stderr, + "fail-open path must still emit audit; stderr was:\n" + proc.stderr) + # The disposition is fail_open (since DEFAULT_DENY=0) + self.assertIn("fail_open_bypass", proc.stderr, + "disposition must be fail_open_bypass under DEFAULT_DENY=0") + # The cause MUST be the Guardian-returned-error case, NOT + # the transport case. This is the regression: if a future + # change collapses them again, this test fires. + self.assertIn("signature_invalid_response", proc.stderr, + "REGRESSION GAP: an unsigned envelope to a signing-required " + "Guardian must audit with cause=signature_invalid_response, " + "not cause=transport_failure. Collapsing them is the " + "footgun a Claude probe surfaced — without the distinct " + "cause, operators can't grep their audit log for client " + "bugs (which they should fix) vs Guardian outages.") + self.assertNotIn("cause\": \"transport_failure", proc.stderr, + "Guardian-returned error must NOT be logged as transport_failure") + finally: + guardian.terminate() + try: guardian.wait(timeout=2.0) + except subprocess.TimeoutExpired: guardian.kill() + + def test_guardian_error_under_fail_closed_emits_deny(self) -> None: + """Companion regression — same setup as above, but with + ACS_DEFAULT_DENY=1. The adapter MUST emit a deny (not silently + proceed) AND must audit the specific cause.""" + adapter = HERE / "claude-code" / "acs_adapter.py" + port = _free_port() + env_g = os.environ.copy() + env_g["ACS_HMAC_SECRET"] = "regression-test-secret-2" + env_g.pop("ACS_DEV_MODE", None) + env_g["ACS_GUARDIAN_STATE_DIR"] = tempfile.mkdtemp() + guardian = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(port)], env=env_g, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait_port("127.0.0.1", port) + try: + env_a = os.environ.copy() + env_a["ACS_GUARDIAN_URL"] = f"http://127.0.0.1:{port}/acs" + env_a["ACS_HANDSHAKE"] = "0" + env_a["ACS_DEFAULT_DENY"] = "1" + env_a.pop("ACS_HMAC_SECRET", None) + env_a.pop("ACS_HMAC_SECRET_FILE", None) + + proc = subprocess.run( + [sys.executable, str(adapter)], + input=json.dumps({ + "session_id": "00000000-0000-4000-8000-000000000002", + "transcript_path": "/tmp/t", "cwd": "/tmp", + "permission_mode": "default", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_input": {"command": "rm -rf /home/some-fake-path"}, + }), + capture_output=True, text=True, env=env_a, timeout=10, + ) + # With DEFAULT_DENY=1, the adapter MUST emit a deny on stdout. + self.assertTrue(proc.stdout.strip(), + "fail-closed mode must emit a deny on stdout, not be silent") + try: + payload = json.loads(proc.stdout) + except json.JSONDecodeError: + self.fail(f"stdout was not JSON: {proc.stdout!r}") + self.assertEqual( + payload.get("hookSpecificOutput", {}).get("permissionDecision"), + "deny", + "PreToolUse adapter under DEFAULT_DENY=1 + Guardian rejects " + "envelope must emit permissionDecision=deny. Without this, " + "the original gap stays open: an unsigned envelope produces " + "no stdout (proceed) when fail-open, hiding the policy hole." + ) + # And the audit log must record the specific cause + self.assertIn("decision_failure_fail_closed", proc.stderr, + "fail-closed audit type must appear") + self.assertIn("signature_invalid_response", proc.stderr, + "audit must carry cause=signature_invalid_response") + finally: + guardian.terminate() + try: guardian.wait(timeout=2.0) + except subprocess.TimeoutExpired: guardian.kill() + + def test_malformed_envelope_under_fail_closed_emits_deny(self) -> None: + """Companion to the signature regression — what Claude in the + other probe found FIRST: a non-UUID session_id makes the Guardian + return -32600 Invalid Request. Under fail-open the adapter + silently proceeds (the original footgun). Under DEFAULT_DENY=1 + the adapter MUST emit a deny AND log cause=malformed_envelope_response.""" + adapter = HERE / "claude-code" / "acs_adapter.py" + port = _free_port() + env_g = os.environ.copy() + env_g["ACS_DEV_MODE"] = "1" # no signing for this test + env_g.pop("ACS_HMAC_SECRET", None) + env_g.pop("ACS_HMAC_SECRET_FILE", None) + env_g["ACS_GUARDIAN_STATE_DIR"] = tempfile.mkdtemp() + guardian = subprocess.Popen( + [sys.executable, str(GUARDIAN_SCRIPT), "--port", str(port)], env=env_g, + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, + ) + _wait_port("127.0.0.1", port) + try: + env_a = os.environ.copy() + env_a["ACS_GUARDIAN_URL"] = f"http://127.0.0.1:{port}/acs" + env_a["ACS_HANDSHAKE"] = "0" + env_a["ACS_DEFAULT_DENY"] = "1" + env_a.pop("ACS_HMAC_SECRET", None) + env_a.pop("ACS_HMAC_SECRET_FILE", None) + + # Non-UUID session_id triggers -32600 from the Guardian's + # request-envelope.json schema validation + proc = subprocess.run( + [sys.executable, str(adapter)], + input=json.dumps({ + "session_id": "test-sess", # not a UUID — Guardian rejects + "transcript_path": "/tmp/t", "cwd": "/tmp", + "permission_mode": "default", + "hook_event_name": "PreToolUse", + "tool_name": "Bash", + "tool_input": {"command": "rm -rf /home/some-fake-path"}, + }), + capture_output=True, text=True, env=env_a, timeout=10, + ) + # Either the adapter refused to build the envelope at all + # (adapter_build_failed) because session_id isn't a UUID, + # OR the Guardian rejected with -32600. Both should result + # in a deny under DEFAULT_DENY=1. + self.assertTrue(proc.stdout.strip(), + "fail-closed must emit a deny on stdout; stdout was empty. " + "stderr: " + proc.stderr[:400]) + try: + payload = json.loads(proc.stdout) + except json.JSONDecodeError: + self.fail(f"stdout was not JSON: {proc.stdout!r}") + self.assertEqual( + payload.get("hookSpecificOutput", {}).get("permissionDecision"), + "deny", + "non-UUID session_id under DEFAULT_DENY=1 must produce deny, " + "not silent proceed (the original footgun)") + self.assertIn("decision_failure_fail_closed", proc.stderr) + # Cause is either adapter_build_failed (caught before the wire) + # or malformed_envelope_response (caught by Guardian). + self.assertTrue( + "adapter_build_failed" in proc.stderr + or "malformed_envelope_response" in proc.stderr, + f"cause must distinguish the malformed-envelope case from " + f"a transport failure; stderr was: {proc.stderr[:400]}") + finally: + guardian.terminate() + try: guardian.wait(timeout=2.0) + except subprocess.TimeoutExpired: guardian.kill() + + +# ============================================================================= +# CORE-09 — Liveness system/ping (conformance.md:25, §13) +# ============================================================================= +# +# §13: "Guardians MUST always return decision: allow for system/ping +# regardless of policy, signature, or session state." +# §13: "system/ping MUST NOT be written into SessionContext as a ContextEntry" +# §13: "system/ping MUST NOT require a signature even if the session +# otherwise requires signatures" +# ============================================================================= + +class Core09_SystemPing(CoreHarness): + + def test_ping_returns_allow(self) -> None: + """§13 — 'Guardians MUST always return decision: allow for + system/ping regardless of policy, signature, or session state'. + Also: the response envelope MUST validate against + response-envelope.json.""" + env = self._make_envelope("system/ping", {"echo": "hi"}, sign=False) + resp = self._post(env) + self.assertIn("result", resp) + self.assertEqual(resp["result"]["decision"], "allow") + errors = _validate_response_envelope(resp) + self.assertEqual(errors, [], + f"ping response fails response-envelope.json:\n - " + + "\n - ".join(errors)) + + def test_ping_does_not_require_signature(self) -> None: + """§13 — 'system/ping MUST NOT require a signature even if the + session otherwise requires signatures, so that liveness probing + remains possible during signature-rotation or key-resolution + failures'.""" + env = self._make_envelope("system/ping", {"echo": "hi"}, sign=False) + resp = self._post(env) + self.assertIn("result", resp, + "unsigned ping must be accepted even when Guardian requires signing") + + def test_ping_payload_includes_status_echo_timestamp(self) -> None: + """§13 — 'response ... with decision: allow and a payload + object carrying {status: ok, echo: , + server_timestamp: }'.""" + env = self._make_envelope("system/ping", {"echo": "ping-test"}, sign=False) + result = self._post(env)["result"] + payload = result.get("payload", {}) + self.assertEqual(payload.get("status"), "ok") + self.assertEqual(payload.get("echo"), "ping-test") + self.assertIn("server_timestamp", payload) + + def test_ping_does_not_consume_replay_slot(self) -> None: + """§13 — 'system/ping MUST NOT be written into SessionContext + as a ContextEntry; it does not participate in the chain hash'. + Two pings with the same request_id must both succeed — + otherwise ping is silently in the replay set.""" + sid = str(uuid.uuid4()) + rid = str(uuid.uuid4()) + env1 = self._make_envelope("system/ping", {"echo": "1"}, + session_id=sid, request_id=rid, sign=False) + env2 = self._make_envelope("system/ping", {"echo": "2"}, + session_id=sid, request_id=rid, sign=False) + r1 = self._post(env1) + r2 = self._post(env2) + self.assertIn("result", r1) + self.assertIn("result", r2, + "second ping with same request_id was rejected — " + "ping must not enter the replay set") + + +# ============================================================================= +# CORE-10 — Wrapped MCP (conformance.md:26) +# ============================================================================= +# +# "Wrapped MCP — protocols/MCP/*" +# +# Our example Guardian doesn't fully implement MCP wrapping (it falls +# through to unknown-method deny). But the method-namespace MUST accept +# protocols/MCP/* method names at the wire level — i.e., the envelope +# schema MUST validate such methods, and the Guardian MUST return +# either a valid result or a structured error (not crash). +# ============================================================================= + +class Core10_WrappedMcp(CoreHarness): + + def test_mcp_namespace_method_validates(self) -> None: + """conformance.md:26 — protocols/MCP/* method namespace MUST + be a valid wire-level form. request-envelope.json:13-14 + regex includes ^protocols/ so any protocols/MCP/* method + passes schema validation.""" + env = self._make_envelope("protocols/MCP/tools/call", {}) + errors = _validate_request_envelope(env) + self.assertEqual(errors, [], + f"protocols/MCP/* method MUST be valid wire-format; got {errors}") + + def test_guardian_returns_structured_response_for_mcp(self) -> None: + """The Guardian MUST not crash on a protocols/MCP/* method + AND its response MUST validate against response-envelope.json. + A 'no-op' Guardian that returns an empty 200 would pass the + previous version of this test; this version requires the + response to be schema-valid. + + NOTE — this is a partial Core-10 verification. Full wrapped + MCP semantics (forwarding, MCP-specific validation, MCP error + mapping) is a separate implementation gap documented in the + adapter READMEs.""" + env = self._make_envelope("protocols/MCP/tools/call", + {"name": "echo", "arguments": {"text": "hi"}}) + resp = self._post(env) + # Must be a well-formed JSON-RPC envelope + self.assertTrue("result" in resp or "error" in resp, + f"Guardian response for MCP method lacks both result and error: {resp}") + # ResponseEnvelope schema validates — including conditional fields + # (deny -> reasoning required, etc.). A garbage response is rejected. + errors = _validate_response_envelope(resp) + self.assertEqual(errors, [], + f"response to protocols/MCP/* envelope is malformed: {errors}") + + def test_mcp_method_namespace_rejects_garbage_namespaces(self) -> None: + """Contradiction: methods OUTSIDE the reserved namespaces + (steps/, protocols/, agbom/, trace/, system/, handshake/, + wrapped:) MUST be rejected. Per request-envelope.json:14 the + regex is ^(steps/|protocols/|agbom/|trace/|system/|handshake/|wrapped:).+ + so anything not starting with one of those prefixes — and with + at least one char after — must fail.""" + bad_methods = [ + "arbitrary/method", # wrong prefix + "no-slash-at-all", # no separator + "PROTOCOLS/upper", # wrong case (prefix is case-sensitive) + "random/garbage", # wrong prefix + "step/typo", # 'step' not 'steps' + ] + for bad in bad_methods: + with self.subTest(method=bad): + env = self._make_envelope(bad, {}) + errors = _validate_request_envelope(env) + self.assertTrue(any("method" in e for e in errors), + f"method {bad!r} outside reserved namespaces was accepted") + + +# ============================================================================= +# Conformance summary — entry point. +# ============================================================================= + +if __name__ == "__main__": + print("=" * 70) + print("ACS-Core conformance check (v0.1.0)") + print("=" * 70) + print("Spec source:", SPEC_DIR) + print() + unittest.main(verbosity=2)