hyperpolymath · hyperpolymath · May 30, 2026 · May 30, 2026
diff --git a/.github/workflows/perf-rebaseline.yml b/.github/workflows/perf-rebaseline.yml
@@ -0,0 +1,174 @@
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+#
+# perf-rebaseline.yml — Phase D-4 baseline-collection automation
+# (standards#99 of the standards#91 single-lane HCG channel).
+#
+# What it does
+# ────────────
+# Manual workflow_dispatch only. Runs bench/gateway_latency.exs on the
+# published reference target (ubuntu-latest, per docs/perf-contract.md
+# § Targets), pipes bench/results.json through bench/rebaseline.exs to
+# produce a regenerated bench/baseline.json with real p50/p95/p99/ips
+# per scenario, then opens a `perf: rebaseline (standards#99)` PR for
+# maintainer review.
+#
+# Why it exists
+# ─────────────
+# docs/perf-contract.md § Baseline lifecycle defines the rebaseline
+# ritual as `just bench-collect` on a CI-equivalent target. The
+# published reference is ubuntu-latest GHA, but the ritual was authored
+# as a manual local step, which requires an Elixir 1.19 / OTP 28
+# toolchain set up locally on the operator's machine. This workflow
+# moves step 2 of the ritual (the actual collection run) onto the
+# published reference target itself, so the rebaseline can be initiated
+# from any GitHub UI without a local toolchain. The numbers it produces
+# are then comparable to perf-regression.yml's gate, which runs on the
+# same target.
+#
+# The generated PR leaves bench/baseline.json `_status` as
+# "scaffold-placeholder". The maintainer reviews the numbers (steps
+# 3-4 of the ritual) and either flips `_status` → "active" in the same
+# PR (arming perf-regression.yml's gate immediately) or in a follow-up
+# after a confidence-building window. Splitting "land real numbers"
+# from "arm the gate" stays compatible with the runbook's D-4 / D-3
+# checklist split in
+# boj-server/docs/integration/hcg-tier2-rollout-runbook.md § 1.1.
+#
+# What it deliberately does NOT do
+# ────────────────────────────────
+# - Flip `_status` to "active" itself — that's a maintainer judgement
+#   on the noise/spread of the collected numbers, not an automation.
+# - Push directly to main — never; always opens a PR.
+# - Tighten tolerance ratios — also a maintainer judgement once
+#   intra-run variance is characterised.
+
+name: Perf Rebaseline
+
+on:
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: 'Branch or SHA to collect from (default: main)'
+        required: false
+        default: main
+
+# Deliberately NO concurrency cancel-in-progress here: a workflow_dispatch
+# rebaseline is operator-initiated and serial dispatches should each
+# complete on their own. (perf-regression.yml has cancel-in-progress
+# because PR re-pushes obsolete prior runs; this workflow has no such
+# obsolescence relationship.)
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  rebaseline:
+    name: Collect baseline and open PR
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          ref: ${{ github.event.inputs.ref }}
+          # PR-creating workflows need fresh history so the branch push
+          # carries enough context for the merge-base computation.
+          fetch-depth: 0
+
+      - name: Setup Elixir/OTP
+        uses: erlef/setup-beam@fc68ffb90438ef2936bbb3251622353b3dcb2f93 # v1.18.2
+        with:
+          # Pinned to match .tool-versions; bump both together.
+          # MUST also match perf-regression.yml so numbers are comparable.
+          elixir-version: '1.19'
+          otp-version: '28'
+
+      - name: Cache deps
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          # Reuse perf-regression.yml's cache key so the first rebaseline
+          # after a perf-regression run primes off the warm cache.
+          path: |
+            deps
+            _build
+          key: ${{ runner.os }}-perf-${{ hashFiles('mix.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-perf-
+
+      - name: Install deps
+        run: |
+          mix local.hex --force
+          mix local.rebar --force
+          mix deps.get
+
+      - name: Compile
+        run: mix compile --warnings-as-errors
+
+      - name: Run bench harness
+        run: mix run bench/gateway_latency.exs | tee bench/console.log
+
+      - name: Regenerate baseline.json from results.json
+        env:
+          REBASELINE_RUN_ID: ${{ github.run_id }}
+          REBASELINE_RUN_REF: ${{ github.event.inputs.ref }}
+        run: mix run bench/rebaseline.exs
+
+      - name: Upload bench artefacts
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: perf-rebaseline-results
+          path: |
+            bench/results.json
+            bench/console.log
+          retention-days: 30
+
+      - name: Open rebaseline PR
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          BRANCH="perf/rebaseline-${{ github.run_id }}"
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git checkout -b "$BRANCH"
+          git add bench/baseline.json
+          if git diff --cached --quiet; then
+            echo '::warning::No baseline.json delta from this run — nothing to PR.'
+            exit 0
+          fi
+          RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          git commit -m 'perf: rebaseline (standards#99)' \
+            -m "Auto-collected from \`bench/gateway_latency.exs\` on \`ubuntu-latest\` (the published reference target per \`docs/perf-contract.md\` § Targets)." \
+            -m "Workflow run: $RUN_URL"
+          git push -u origin "$BRANCH"
+          gh pr create \
+            --base main \
+            --head "$BRANCH" \
+            --title 'perf: rebaseline (standards#99)' \
+            --body "$(cat <<EOF
+          ## Summary
+
+          Auto-generated by \`.github/workflows/perf-rebaseline.yml\` (workflow run [#${{ github.run_id }}]($RUN_URL)). Replaces the scaffold-placeholder TODO values in \`bench/baseline.json\` with real \`p50\` / \`p95\` / \`p99\` / \`ips\` per scenario, collected by \`bench/gateway_latency.exs\` on the published reference target (\`ubuntu-latest\`, per \`docs/perf-contract.md\` § Targets).
+
+          \`Refs hyperpolymath/standards#91\`
+          \`Refs hyperpolymath/standards#99\`
+
+          ## What's in this PR
+
+          - \`bench/baseline.json\`: real per-scenario percentiles + ips; refreshed \`_generated_at\` / \`_generated_by\`.
+          - \`_status\` remains \`scaffold-placeholder\`. The maintainer flips it to \`active\` in this PR (arming the gate immediately) or in a follow-up after a confidence-building window — see \`docs/perf-contract.md\` § Baseline lifecycle.
+          - \`tolerance\` ratios unchanged.
+
+          ## Review checklist
+
+          - [ ] Numbers are within an order of magnitude of expectations for each scenario (sanity).
+          - [ ] Spread looks reasonable (no scenario with p99 >> p95 in a way that suggests a noisy outlier; if so, consider re-running before merging).
+          - [ ] Decide whether to flip \`_status\` → \`active\` here (one-PR D-4 + D-3 close) or in a follow-up (D-4 lands; D-3 flip later).
+
+          Bench artefacts (results.json, console.log) are attached to the workflow run as the \`perf-rebaseline-results\` artefact for 30 days.
+          EOF
+          )"
diff --git a/Justfile b/Justfile
@@ -110,7 +110,18 @@ bench:
 bench-collect:
     mix run bench/gateway_latency.exs
     @echo "Results written to bench/results.json"
-    @echo "To rebaseline: review numbers, then update bench/baseline.json in a dedicated PR."
+    @echo "To rebaseline: run 'just rebaseline' or update bench/baseline.json by hand in a dedicated PR."
+
+# Run the harness and regenerate bench/baseline.json from the result
+# (Phase D-4 rebaseline ritual; see docs/perf-contract.md). Leaves
+# `_status` as scaffold-placeholder — the maintainer reviews and flips
+# to "active" in the rebaseline PR. The CI workflow
+# .github/workflows/perf-rebaseline.yml runs the same two steps on
+# the published reference target (ubuntu-latest); use this recipe to
+# preview the regeneration locally before dispatching the workflow.
+rebaseline:
+    mix run bench/gateway_latency.exs
+    mix run bench/rebaseline.exs
 
 # ═══════════════════════════════════════════════════════════════════════════════
 # LINT & FORMAT

diff --git a/bench/rebaseline.exs b/bench/rebaseline.exs
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
+#
+# bench/rebaseline.exs — Phase D-4 baseline regeneration helper
+# (standards#99 of the standards#91 single-lane HCG channel).
+#
+# Reads bench/results.json (produced by bench/gateway_latency.exs) and
+# bench/baseline.json, then writes a regenerated bench/baseline.json
+# that replaces the per-scenario TODO values with the real p50/p95/p99/
+# ips from results.json — preserving _comment, _schema_version, tolerance,
+# per-scenario _comment_* fields, and (deliberately) _status.
+#
+# `_status` is left as "scaffold-placeholder". The rebaseline PR is the
+# review gate; the maintainer flips `_status` → "active" in the PR
+# (arming perf-regression.yml's gate) or in a follow-up. See
+# docs/perf-contract.md § Baseline lifecycle and the workflow file
+# .github/workflows/perf-rebaseline.yml for the surrounding ritual.
+#
+# Field ordering is preserved via Jason.OrderedObject so the diff
+# against the prior baseline is review-grade (numbers move; structure
+# does not).
+#
+# Runs:
+#
+#   • Driven by .github/workflows/perf-rebaseline.yml on ubuntu-latest
+#     (the published reference target per docs/perf-contract.md).
+#   • Locally after `just bench-collect`:
+#
+#         just rebaseline
+#
+#     or directly:
+#
+#         mix run bench/rebaseline.exs
+
+defmodule Bench.Rebaseline do
+  alias Jason.OrderedObject
+
+  @results_path "bench/results.json"
+  @baseline_path "bench/baseline.json"
+
+  def run do
+    with {:ok, results_raw} <- File.read(@results_path),
+         {:ok, baseline_raw} <- File.read(@baseline_path),
+         {:ok, results} <- Jason.decode(results_raw),
+         {:ok, baseline} <- Jason.decode(baseline_raw, objects: :ordered_objects) do
+      new_baseline = rebaseline(baseline, results)
+      json = Jason.encode!(new_baseline, pretty: true)
+      File.write!(@baseline_path, json <> "\n")
+      report(new_baseline)
+    else
+      {:error, :enoent} ->
+        IO.puts(
+          :stderr,
+          "ERROR: #{@results_path} or #{@baseline_path} missing. " <>
+            "Did `mix run bench/gateway_latency.exs` run first?"
+        )
+
+        System.halt(2)
+
+      {:error, reason} ->
+        IO.puts(:stderr, "ERROR reading baseline/results: #{inspect(reason)}")
+        System.halt(2)
+    end
+  end
+
+  # ── Rebaseline logic ───────────────────────────────────────────────────────
+
+  defp rebaseline(%OrderedObject{} = baseline, results) do
+    results_stats = Map.get(results, "statistics", %{})
+    existing_scenarios = oget(baseline, "scenarios", %OrderedObject{values: []})
+
+    rebaselined_scenarios =
+      results_stats
+      |> Enum.sort_by(&elem(&1, 0))
+      |> Enum.map(fn {name, stats} ->
+        {name, scenario_entry(oget(existing_scenarios, name, %OrderedObject{values: []}), stats)}
+      end)
+      |> then(fn pairs -> %OrderedObject{values: pairs} end)
+
+    baseline
+    |> oput("_generated_at", DateTime.utc_now() |> DateTime.to_iso8601())
+    |> oput("_generated_by", generated_by())
+    |> oput("scenarios", rebaselined_scenarios)
+  end
+
+  defp scenario_entry(%OrderedObject{values: existing_pairs}, stats) do
+    comments =
+      existing_pairs
+      |> Enum.filter(fn {k, _v} -> String.starts_with?(k, "_comment") end)
+
+    numbers = [
+      {"p50_us", us(stats, "50")},
+      {"p95_us", us(stats, "95")},
+      {"p99_us", us(stats, "99")},
+      {"ips", round2(Map.get(stats, "ips"))}
+    ]
+
+    %OrderedObject{values: comments ++ numbers}
+  end
+
+  defp us(stats, p) do
+    case get_in(stats, ["percentiles", p]) do
+      nil -> nil
+      ns when is_number(ns) -> round2(ns / 1_000.0)
+    end
+  end
+
+  defp round2(nil), do: nil
+  defp round2(n) when is_integer(n), do: Float.round(n / 1.0, 2)
+  defp round2(n) when is_float(n), do: Float.round(n, 2)
+
+  defp generated_by do
+    run_id = System.get_env("REBASELINE_RUN_ID")
+    ref = System.get_env("REBASELINE_RUN_REF")
+
+    case run_id do
+      nil ->
+        "local: mix run bench/rebaseline.exs"
+
+      run ->
+        ".github/workflows/perf-rebaseline.yml (ubuntu-latest; ref=#{ref || "main"}; run=#{run})"
+    end
+  end
+
+  # ── OrderedObject helpers ──────────────────────────────────────────────────
+  #
+  # Jason.OrderedObject does not implement the full Access protocol, so
+  # tiny get/put helpers keep the rebaseline code shape close to the
+  # equivalent Map.get / Map.put it would otherwise use.
+
+  defp oget(%OrderedObject{values: pairs}, key, default) do
+    case List.keyfind(pairs, key, 0) do
+      {^key, v} -> v
+      nil -> default
+    end
+  end
+
+  defp oput(%OrderedObject{values: pairs}, key, value) do
+    new_pairs =
+      case List.keymember?(pairs, key, 0) do
+        true -> List.keyreplace(pairs, key, 0, {key, value})
+        false -> pairs ++ [{key, value}]
+      end
+
+    %OrderedObject{values: new_pairs}
+  end
+
+  # ── Reporter ───────────────────────────────────────────────────────────────
+
+  defp report(%OrderedObject{} = baseline) do
+    IO.puts("")
+    IO.puts("bench/baseline.json regenerated.")
+    IO.puts("  _status:        #{inspect(oget(baseline, "_status", nil))}")
+    IO.puts("  _generated_at:  #{oget(baseline, "_generated_at", "?")}")
+    IO.puts("  _generated_by:  #{oget(baseline, "_generated_by", "?")}")
+    IO.puts("")
+
+    scenarios = oget(baseline, "scenarios", %OrderedObject{values: []})
+
+    IO.puts("Per-scenario numbers (µs / ips):")
+
+    Enum.each(scenarios.values, fn {name, %OrderedObject{values: pairs}} ->
+      m = Map.new(pairs)
+
+      IO.puts(
+        "  #{name}: p50=#{fmt(m["p50_us"])} p95=#{fmt(m["p95_us"])} " <>
+          "p99=#{fmt(m["p99_us"])} ips=#{fmt(m["ips"])}"
+      )
+    end)
+
+    IO.puts("")
+
+    IO.puts(
+      "Next: review numbers in the rebaseline PR; flip `_status` → \"active\" " <>
+        "to arm the perf-regression gate (see docs/perf-contract.md § Baseline lifecycle)."
+    )
+  end
+
+  defp fmt(nil), do: "—"
+  defp fmt(n) when is_number(n), do: to_string(n)
+end
+
+Bench.Rebaseline.run()