Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions .github/workflows/perf-rebaseline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# SPDX-License-Identifier: MPL-2.0
# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
#
# perf-rebaseline.yml — Phase D-4 baseline-collection automation
# (standards#99 of the standards#91 single-lane HCG channel).
#
# What it does
# ────────────
# Manual workflow_dispatch only. Runs bench/gateway_latency.exs on the
# published reference target (ubuntu-latest, per docs/perf-contract.md
# § Targets), pipes bench/results.json through bench/rebaseline.exs to
# produce a regenerated bench/baseline.json with real p50/p95/p99/ips
# per scenario, then opens a `perf: rebaseline (standards#99)` PR for
# maintainer review.
#
# Why it exists
# ─────────────
# docs/perf-contract.md § Baseline lifecycle defines the rebaseline
# ritual as `just bench-collect` on a CI-equivalent target. The
# published reference is ubuntu-latest GHA, but the ritual was authored
# as a manual local step, which requires an Elixir 1.19 / OTP 28
# toolchain set up locally on the operator's machine. This workflow
# moves step 2 of the ritual (the actual collection run) onto the
# published reference target itself, so the rebaseline can be initiated
# from any GitHub UI without a local toolchain. The numbers it produces
# are then comparable to perf-regression.yml's gate, which runs on the
# same target.
#
# The generated PR leaves bench/baseline.json `_status` as
# "scaffold-placeholder". The maintainer reviews the numbers (steps
# 3-4 of the ritual) and either flips `_status` → "active" in the same
# PR (arming perf-regression.yml's gate immediately) or in a follow-up
# after a confidence-building window. Splitting "land real numbers"
# from "arm the gate" stays compatible with the runbook's D-4 / D-3
# checklist split in
# boj-server/docs/integration/hcg-tier2-rollout-runbook.md § 1.1.
#
# What it deliberately does NOT do
# ────────────────────────────────
# - Flip `_status` to "active" itself — that's a maintainer judgement
# on the noise/spread of the collected numbers, not an automation.
# - Push directly to main — never; always opens a PR.
# - Tighten tolerance ratios — also a maintainer judgement once
# intra-run variance is characterised.

name: Perf Rebaseline

on:
workflow_dispatch:
inputs:
ref:
description: 'Branch or SHA to collect from (default: main)'
required: false
default: main

# Deliberately NO concurrency cancel-in-progress here: a workflow_dispatch
# rebaseline is operator-initiated and serial dispatches should each
# complete on their own. (perf-regression.yml has cancel-in-progress
# because PR re-pushes obsolete prior runs; this workflow has no such
# obsolescence relationship.)

permissions:
contents: write
pull-requests: write

jobs:
rebaseline:
name: Collect baseline and open PR
runs-on: ubuntu-latest
timeout-minutes: 20

steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.ref }}
# PR-creating workflows need fresh history so the branch push
# carries enough context for the merge-base computation.
fetch-depth: 0

- name: Setup Elixir/OTP
uses: erlef/setup-beam@fc68ffb90438ef2936bbb3251622353b3dcb2f93 # v1.18.2
with:
# Pinned to match .tool-versions; bump both together.
# MUST also match perf-regression.yml so numbers are comparable.
elixir-version: '1.19'
otp-version: '28'

- name: Cache deps
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
# Reuse perf-regression.yml's cache key so the first rebaseline
# after a perf-regression run primes off the warm cache.
path: |
deps
_build
key: ${{ runner.os }}-perf-${{ hashFiles('mix.lock') }}
restore-keys: |
${{ runner.os }}-perf-

- name: Install deps
run: |
mix local.hex --force
mix local.rebar --force
mix deps.get

- name: Compile
run: mix compile --warnings-as-errors

- name: Run bench harness
run: mix run bench/gateway_latency.exs | tee bench/console.log

- name: Regenerate baseline.json from results.json
env:
REBASELINE_RUN_ID: ${{ github.run_id }}
REBASELINE_RUN_REF: ${{ github.event.inputs.ref }}
run: mix run bench/rebaseline.exs

- name: Upload bench artefacts
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: perf-rebaseline-results
path: |
bench/results.json
bench/console.log
retention-days: 30

- name: Open rebaseline PR
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
BRANCH="perf/rebaseline-${{ github.run_id }}"
git config user.name 'github-actions[bot]'
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
git checkout -b "$BRANCH"
git add bench/baseline.json
if git diff --cached --quiet; then
echo '::warning::No baseline.json delta from this run — nothing to PR.'
exit 0
fi
RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
git commit -m 'perf: rebaseline (standards#99)' \
-m "Auto-collected from \`bench/gateway_latency.exs\` on \`ubuntu-latest\` (the published reference target per \`docs/perf-contract.md\` § Targets)." \
-m "Workflow run: $RUN_URL"
git push -u origin "$BRANCH"
gh pr create \
--base main \
--head "$BRANCH" \
--title 'perf: rebaseline (standards#99)' \
--body "$(cat <<EOF
## Summary

Auto-generated by \`.github/workflows/perf-rebaseline.yml\` (workflow run [#${{ github.run_id }}]($RUN_URL)). Replaces the scaffold-placeholder TODO values in \`bench/baseline.json\` with real \`p50\` / \`p95\` / \`p99\` / \`ips\` per scenario, collected by \`bench/gateway_latency.exs\` on the published reference target (\`ubuntu-latest\`, per \`docs/perf-contract.md\` § Targets).

\`Refs hyperpolymath/standards#91\`
\`Refs hyperpolymath/standards#99\`

## What's in this PR

- \`bench/baseline.json\`: real per-scenario percentiles + ips; refreshed \`_generated_at\` / \`_generated_by\`.
- \`_status\` remains \`scaffold-placeholder\`. The maintainer flips it to \`active\` in this PR (arming the gate immediately) or in a follow-up after a confidence-building window — see \`docs/perf-contract.md\` § Baseline lifecycle.
- \`tolerance\` ratios unchanged.

## Review checklist

- [ ] Numbers are within an order of magnitude of expectations for each scenario (sanity).
- [ ] Spread looks reasonable (no scenario with p99 >> p95 in a way that suggests a noisy outlier; if so, consider re-running before merging).
- [ ] Decide whether to flip \`_status\` → \`active\` here (one-PR D-4 + D-3 close) or in a follow-up (D-4 lands; D-3 flip later).

Bench artefacts (results.json, console.log) are attached to the workflow run as the \`perf-rebaseline-results\` artefact for 30 days.
EOF
)"
13 changes: 12 additions & 1 deletion Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,18 @@ bench:
bench-collect:
mix run bench/gateway_latency.exs
@echo "Results written to bench/results.json"
@echo "To rebaseline: review numbers, then update bench/baseline.json in a dedicated PR."
@echo "To rebaseline: run 'just rebaseline' or update bench/baseline.json by hand in a dedicated PR."

# Run the harness and regenerate bench/baseline.json from the result
# (Phase D-4 rebaseline ritual; see docs/perf-contract.md). Leaves
# `_status` as scaffold-placeholder — the maintainer reviews and flips
# to "active" in the rebaseline PR. The CI workflow
# .github/workflows/perf-rebaseline.yml runs the same two steps on
# the published reference target (ubuntu-latest); use this recipe to
# preview the regeneration locally before dispatching the workflow.
rebaseline:
mix run bench/gateway_latency.exs
mix run bench/rebaseline.exs

# ═══════════════════════════════════════════════════════════════════════════════
# LINT & FORMAT
Expand Down
183 changes: 183 additions & 0 deletions bench/rebaseline.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# SPDX-License-Identifier: MPL-2.0
# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
#
# bench/rebaseline.exs — Phase D-4 baseline regeneration helper
# (standards#99 of the standards#91 single-lane HCG channel).
#
# Reads bench/results.json (produced by bench/gateway_latency.exs) and
# bench/baseline.json, then writes a regenerated bench/baseline.json
# that replaces the per-scenario TODO values with the real p50/p95/p99/
# ips from results.json — preserving _comment, _schema_version, tolerance,
# per-scenario _comment_* fields, and (deliberately) _status.
#
# `_status` is left as "scaffold-placeholder". The rebaseline PR is the
# review gate; the maintainer flips `_status` → "active" in the PR
# (arming perf-regression.yml's gate) or in a follow-up. See
# docs/perf-contract.md § Baseline lifecycle and the workflow file
# .github/workflows/perf-rebaseline.yml for the surrounding ritual.
#
# Field ordering is preserved via Jason.OrderedObject so the diff
# against the prior baseline is review-grade (numbers move; structure
# does not).
#
# Runs:
#
# • Driven by .github/workflows/perf-rebaseline.yml on ubuntu-latest
# (the published reference target per docs/perf-contract.md).
# • Locally after `just bench-collect`:
#
# just rebaseline
#
# or directly:
#
# mix run bench/rebaseline.exs

defmodule Bench.Rebaseline do
alias Jason.OrderedObject

@results_path "bench/results.json"
@baseline_path "bench/baseline.json"

def run do
with {:ok, results_raw} <- File.read(@results_path),
{:ok, baseline_raw} <- File.read(@baseline_path),
{:ok, results} <- Jason.decode(results_raw),
{:ok, baseline} <- Jason.decode(baseline_raw, objects: :ordered_objects) do
new_baseline = rebaseline(baseline, results)
json = Jason.encode!(new_baseline, pretty: true)
File.write!(@baseline_path, json <> "\n")
report(new_baseline)
else
{:error, :enoent} ->
IO.puts(
:stderr,
"ERROR: #{@results_path} or #{@baseline_path} missing. " <>
"Did `mix run bench/gateway_latency.exs` run first?"
)

System.halt(2)

{:error, reason} ->
IO.puts(:stderr, "ERROR reading baseline/results: #{inspect(reason)}")
System.halt(2)
end
end

# ── Rebaseline logic ───────────────────────────────────────────────────────

defp rebaseline(%OrderedObject{} = baseline, results) do
results_stats = Map.get(results, "statistics", %{})
existing_scenarios = oget(baseline, "scenarios", %OrderedObject{values: []})

rebaselined_scenarios =
results_stats
|> Enum.sort_by(&elem(&1, 0))
|> Enum.map(fn {name, stats} ->
{name, scenario_entry(oget(existing_scenarios, name, %OrderedObject{values: []}), stats)}
end)
|> then(fn pairs -> %OrderedObject{values: pairs} end)

baseline
|> oput("_generated_at", DateTime.utc_now() |> DateTime.to_iso8601())
|> oput("_generated_by", generated_by())
|> oput("scenarios", rebaselined_scenarios)
end

defp scenario_entry(%OrderedObject{values: existing_pairs}, stats) do
comments =
existing_pairs
|> Enum.filter(fn {k, _v} -> String.starts_with?(k, "_comment") end)

numbers = [
{"p50_us", us(stats, "50")},
{"p95_us", us(stats, "95")},
{"p99_us", us(stats, "99")},
{"ips", round2(Map.get(stats, "ips"))}
]

%OrderedObject{values: comments ++ numbers}
end

defp us(stats, p) do
case get_in(stats, ["percentiles", p]) do
nil -> nil
ns when is_number(ns) -> round2(ns / 1_000.0)
end
end

defp round2(nil), do: nil
defp round2(n) when is_integer(n), do: Float.round(n / 1.0, 2)
defp round2(n) when is_float(n), do: Float.round(n, 2)

defp generated_by do
run_id = System.get_env("REBASELINE_RUN_ID")
ref = System.get_env("REBASELINE_RUN_REF")

case run_id do
nil ->
"local: mix run bench/rebaseline.exs"

run ->
".github/workflows/perf-rebaseline.yml (ubuntu-latest; ref=#{ref || "main"}; run=#{run})"
end
end

# ── OrderedObject helpers ──────────────────────────────────────────────────
#
# Jason.OrderedObject does not implement the full Access protocol, so
# tiny get/put helpers keep the rebaseline code shape close to the
# equivalent Map.get / Map.put it would otherwise use.

defp oget(%OrderedObject{values: pairs}, key, default) do
case List.keyfind(pairs, key, 0) do
{^key, v} -> v
nil -> default
end
end

defp oput(%OrderedObject{values: pairs}, key, value) do
new_pairs =
case List.keymember?(pairs, key, 0) do
true -> List.keyreplace(pairs, key, 0, {key, value})
false -> pairs ++ [{key, value}]
end

%OrderedObject{values: new_pairs}
end

# ── Reporter ───────────────────────────────────────────────────────────────

defp report(%OrderedObject{} = baseline) do
IO.puts("")
IO.puts("bench/baseline.json regenerated.")
IO.puts(" _status: #{inspect(oget(baseline, "_status", nil))}")
IO.puts(" _generated_at: #{oget(baseline, "_generated_at", "?")}")
IO.puts(" _generated_by: #{oget(baseline, "_generated_by", "?")}")
IO.puts("")

scenarios = oget(baseline, "scenarios", %OrderedObject{values: []})

IO.puts("Per-scenario numbers (µs / ips):")

Enum.each(scenarios.values, fn {name, %OrderedObject{values: pairs}} ->
m = Map.new(pairs)

IO.puts(
" #{name}: p50=#{fmt(m["p50_us"])} p95=#{fmt(m["p95_us"])} " <>
"p99=#{fmt(m["p99_us"])} ips=#{fmt(m["ips"])}"
)
end)

IO.puts("")

IO.puts(
"Next: review numbers in the rebaseline PR; flip `_status` → \"active\" " <>
"to arm the perf-regression gate (see docs/perf-contract.md § Baseline lifecycle)."
)
end

defp fmt(nil), do: "—"
defp fmt(n) when is_number(n), do: to_string(n)
end

Bench.Rebaseline.run()
Loading
Loading