From d3228ecdc2977a75a4da04f4883f991706d13652 Mon Sep 17 00:00:00 2001 From: Vijit Singh Date: Thu, 18 Jun 2026 08:18:13 -0500 Subject: [PATCH] test(#284): hypothesis property tests for the money/numeric logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 7 correctness. Adds property-based tests (hypothesis) asserting invariants across a wide input range — the class of bug example tests miss (cf. the #70 anti-windup overshoot): - earnings (`xmr_per_hs_day`): non-negativity, linearity in block reward, decreasing in difficulty, zero on non-positive inputs. - metrics windowed averages: non-negativity, empty->0, and conservation (routed p2pool + xvb sum to the total when no legacy fallback). - XvB controller (`_fraction_to_ms`, `_routed_fraction`, `_max_donation_fraction`): monotonicity, unit-interval routed fraction, and the VIP/PPLNS reserve clamp staying in [0, hard cap]. - donation simulator (`run_algo`): the closed-loop controller never winds up — the donated fraction is clamped to [0, 1] every cycle, so p2pool efficiency stays in [0, 1] across randomized scenarios. hypothesis added to the test extra (uv.lock relocked). The ruff `ANN` -> `ty` typechecker on-ramp is documented as a deferred post-1.0 follow-up (dashboard README) — not a v1.1 blocker (ty is pre-1.0). Dashboard suite green: 543 passed, 94% coverage. Closes #284. Co-Authored-By: Claude Opus 4.8 (1M context) --- build/dashboard/README.md | 11 ++ build/dashboard/pyproject.toml | 2 + .../tests/service/test_numeric_properties.py | 112 ++++++++++++++++++ .../sim/test_donation_model_properties.py | 46 +++++++ build/dashboard/uv.lock | 23 ++++ docs/test-inventory.md | 23 +++- 6 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 build/dashboard/tests/service/test_numeric_properties.py create mode 100644 build/dashboard/tests/sim/test_donation_model_properties.py diff --git a/build/dashboard/README.md b/build/dashboard/README.md index 3983c2f..67d5984 100644 --- a/build/dashboard/README.md +++ b/build/dashboard/README.md @@ -76,6 +76,17 @@ docker build --target test ./build/dashboard Tests are hermetic — no network, no containers, no real database (an in-memory SQLite is used via the `state_manager` fixture and the auto-applied DB-isolation fixture in `tests/conftest.py`). +Two coverage ratchets back the suite: a **total** floor (`--cov-fail-under=80`) and a **patch** gate +(`make test-patch-coverage` → `diff-cover` ≥ 90% on changed lines, #286). The money/numeric logic +(earnings, the XvB controller, the donation simulator) also has **property-based tests** +(`hypothesis`, #284) asserting invariants — non-negativity, conservation, monotonicity, clamp +bounds — across a wide input range, the class of bug example tests miss (cf. the #70 overshoot). + +**Typing roadmap (deferred):** the app is lightly annotated today, so a static type-checker gate is +premature (and `ty` is still pre-1.0). The on-ramp — turning on ruff's `ANN` ruleset as a +non-blocking annotation ratchet, then adopting `ty`/`pyright` once coverage is meaningful and `ty` +reaches 1.0 — is a post-1.0 follow-up (#284), not a v1.1 blocker. + ## Image The `Dockerfile` is multi-stage: diff --git a/build/dashboard/pyproject.toml b/build/dashboard/pyproject.toml index f21f470..de6b064 100644 --- a/build/dashboard/pyproject.toml +++ b/build/dashboard/pyproject.toml @@ -33,6 +33,8 @@ test = [ "pytest-aiohttp>=1.0", # diff-cover (#286): patch-coverage gate — new/changed lines must be >=90% covered. "diff-cover>=9", + # hypothesis (#284): property-based tests asserting invariants on the money/numeric logic. + "hypothesis>=6", ] # Developer tooling (Wave 7, #280). Pinned so local, pre-commit, and CI all run the SAME ruff # — lint output is version-sensitive, so a floor would let CI and a contributor disagree. diff --git a/build/dashboard/tests/service/test_numeric_properties.py b/build/dashboard/tests/service/test_numeric_properties.py new file mode 100644 index 0000000..f5d408c --- /dev/null +++ b/build/dashboard/tests/service/test_numeric_properties.py @@ -0,0 +1,112 @@ +"""Property-based tests (#284, hypothesis) for the money/numeric service layer. + +These assert *invariants* — non-negativity, conservation, monotonicity, clamp bounds — across a +wide input range rather than fixed examples, the class of bug example tests miss (cf. the #70 +anti-windup overshoot). Pure functions only; no I/O. +""" + +import math +from unittest.mock import MagicMock + +from hypothesis import given +from hypothesis import strategies as st + +from mining_dashboard.config.config import TIER_DEFAULTS, XVB_TIME_ALGO_MS +from mining_dashboard.service.algo_service import AlgoService +from mining_dashboard.service.earnings import xmr_per_hs_day +from mining_dashboard.service.metrics import _avg_p2pool_over_window, _avg_xvb_over_window + +_nonneg = st.floats(min_value=0, max_value=1e18, allow_nan=False, allow_infinity=False) +_pos = st.floats(min_value=1e-6, max_value=1e15, allow_nan=False, allow_infinity=False) + + +# --------------------------------------------------------------------------- earnings +@given(reward=_nonneg, diff=_nonneg) +def test_xmr_per_hs_day_non_negative(reward, diff): + assert xmr_per_hs_day(reward, diff) >= 0.0 + + +@given(reward=st.floats(max_value=0, allow_nan=False, allow_infinity=False), diff=_nonneg) +def test_xmr_per_hs_day_zero_on_nonpositive_reward(reward, diff): + assert xmr_per_hs_day(reward, diff) == 0.0 + + +@given(reward=_pos, diff=_pos, k=st.floats(min_value=0, max_value=1e6, allow_nan=False)) +def test_xmr_per_hs_day_linear_in_reward(reward, diff, k): + # Expected earnings are linear in the block reward (the model's defining property). + base = xmr_per_hs_day(reward, diff) + assert math.isclose(xmr_per_hs_day(k * reward, diff), k * base, rel_tol=1e-9, abs_tol=1e-30) + + +@given(reward=_pos, diff=_pos, bump=_pos) +def test_xmr_per_hs_day_decreases_with_difficulty(reward, diff, bump): + # Higher network difficulty -> a lower per-H/s rate. + assert xmr_per_hs_day(reward, diff + bump) <= xmr_per_hs_day(reward, diff) + + +# --------------------------------------------------------------- metrics windowed averages +@st.composite +def _in_window_rows(draw): + """History rows inside the window, with v == v_p2pool + v_xvb (no legacy fallback).""" + hr = st.floats(min_value=0, max_value=1e9, allow_nan=False, allow_infinity=False) + rows = [] + for _ in range(draw(st.integers(min_value=1, max_value=30))): + vp, vx = draw(hr), draw(hr) + rows.append({"timestamp": 10**12, "v_p2pool": vp, "v_xvb": vx, "v": vp + vx}) + return rows + + +@given(rows=_in_window_rows()) +def test_window_avgs_non_negative_and_conserve(rows): + window = 10**9 # all rows fall inside + ap = _avg_p2pool_over_window(rows, window) + ax = _avg_xvb_over_window(rows, window) + assert ap >= 0.0 and ax >= 0.0 + # Conservation: with v == vp + vx (no legacy fallback), the routed parts sum to the total avg. + avg_v = sum(r["v"] for r in rows) / len(rows) + assert math.isclose(ap + ax, avg_v, rel_tol=1e-9, abs_tol=1e-6) + + +@given(window=st.floats(min_value=1, max_value=1e9, allow_nan=False)) +def test_window_avgs_empty_history_is_zero(window): + assert _avg_p2pool_over_window([], window) == 0.0 + assert _avg_xvb_over_window([], window) == 0.0 + + +# --------------------------------------------------------------- algo_service clamp / fraction +def _algo(): + sm = MagicMock() + sm.get_tiers.return_value = dict(TIER_DEFAULTS) + return AlgoService(sm, MagicMock(), MagicMock()) + + +@given(frac=st.floats(min_value=0, max_value=1, allow_nan=False)) +def test_fraction_to_ms_monotone_and_non_negative(frac): + a = _algo() + assert a._fraction_to_ms(frac) >= 0 + # A larger fraction never yields a shorter slice. + assert a._fraction_to_ms(frac) <= a._fraction_to_ms(min(frac + 0.1, 1.0)) + + +@given(frac=st.floats(max_value=0, allow_nan=False, allow_infinity=False)) +def test_fraction_to_ms_zero_on_nonpositive(frac): + assert _algo()._fraction_to_ms(frac) == 0 + + +@given(dur=st.floats(min_value=0, max_value=XVB_TIME_ALGO_MS, allow_nan=False)) +def test_routed_fraction_in_unit_interval(dur): + assert AlgoService._routed_fraction("XVB", dur) == 1.0 + assert AlgoService._routed_fraction("P2POOL", dur) == 0.0 + assert 0.0 <= AlgoService._routed_fraction("SPLIT", dur) <= 1.0 + + +@given( + current_hr=st.floats(min_value=1, max_value=1e9, allow_nan=False), + window=st.floats(min_value=1, max_value=1e6, allow_nan=False), + diff=st.floats(min_value=0, max_value=1e15, allow_nan=False), +) +def test_max_donation_fraction_within_reserve_bounds(current_hr, window, diff): + # The VIP/PPLNS reserve clamp must keep the donatable fraction in [0, the hard cap]. + a = _algo() + f = a._max_donation_fraction(current_hr, window, {"difficulty": diff}) + assert 0.0 <= f <= a.max_donation_fraction diff --git a/build/dashboard/tests/sim/test_donation_model_properties.py b/build/dashboard/tests/sim/test_donation_model_properties.py new file mode 100644 index 0000000..559b28d --- /dev/null +++ b/build/dashboard/tests/sim/test_donation_model_properties.py @@ -0,0 +1,46 @@ +"""Property-based tests (#284, hypothesis) for the XvB donation-controller simulator. + +The closed-loop controller (Issue #70) must never *wind up*: whatever the rig hashrate, tier +target, measurement semantics, crediting, or report lag, the donated fraction stays clamped to +[0, 1] every cycle — so p2pool efficiency stays in [0, 1] and the credited rate stays non-negative. +Asserting this over randomized scenarios catches the overshoot class of bug that the fixed-example +tests in test_donation_model.py can't. +""" + +from hypothesis import given, settings +from hypothesis import strategies as st + +from mining_dashboard.sim.donation_model import CYCLES_PER_DAY, Scenario, run_algo + +_hr = st.floats(min_value=1e3, max_value=1e7, allow_nan=False, allow_infinity=False) + + +@settings(max_examples=50, deadline=None) +@given( + target_hr=_hr, + current_hr=_hr, + warm_avg=st.floats(min_value=0, max_value=1e7, allow_nan=False), + measurement=st.sampled_from(["fixed", "connected"]), + credit_factor=st.floats(min_value=0.5, max_value=2.0, allow_nan=False), + report_lag=st.integers(min_value=0, max_value=6), +) +def test_controller_never_winds_up( + target_hr, current_hr, warm_avg, measurement, credit_factor, report_lag +): + result = run_algo( + Scenario( + name="prop", + target_hr=target_hr, + current_hr=current_hr, + cycles=2 * CYCLES_PER_DAY, # 2 days — reaches steady state, stays fast + warm_avg=warm_avg, + measurement=measurement, + credit_factor=credit_factor, + report_lag_cycles=report_lag, + ) + ) + # Anti-windup (#70): the donated fraction is clamped to [0, 1] every cycle ... + assert all(0.0 <= f <= 1.0 for f in result.fraction) + # ... so the derived steady-state efficiency and the credited rate stay sane. + assert 0.0 <= result.p2pool_efficiency <= 1.0 + assert all(c >= 0.0 for c in result.credited) diff --git a/build/dashboard/uv.lock b/build/dashboard/uv.lock index 8ae0b5e..8ef4dd1 100644 --- a/build/dashboard/uv.lock +++ b/build/dashboard/uv.lock @@ -606,6 +606,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/b0/4af731ff7492c68a96e4c71bfd0f4590acde92b31c6fe4894e6465c10ff6/grpcio-1.81.1-cp314-cp314-win_amd64.whl", hash = "sha256:3768a5ff1b2125e6f552e561b6b2dca0e64982d8949689b4df145cf8b98d7821", size = 5070275, upload-time = "2026-06-11T12:46:48.486Z" }, ] +[[package]] +name = "hypothesis" +version = "6.155.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/77/13ec9b6390bce44f5badab39837dd6789bbfe6342a2ac611a71537a7756f/hypothesis-6.155.3.tar.gz", hash = "sha256:1e34b17ae9873515384312cb7640abd773eb096c7eef8c0d9c614fa2c306e9bb", size = 477961, upload-time = "2026-06-16T00:33:23.273Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/23/ce3a543935a01e478349e82f6c1440776f92d4cb346662c4d81574878fed/hypothesis-6.155.3-py3-none-any.whl", hash = "sha256:ede5a3d142d9c5c9f70cb3075541905b228d6c3a682bcec3d4fe0722e9eda127", size = 544401, upload-time = "2026-06-16T00:33:20.497Z" }, +] + [[package]] name = "identify" version = "2.6.19" @@ -738,6 +750,7 @@ dev = [ ] test = [ { name = "diff-cover" }, + { name = "hypothesis" }, { name = "pytest" }, { name = "pytest-aiohttp" }, { name = "pytest-asyncio" }, @@ -751,6 +764,7 @@ requires-dist = [ { name = "aiohttp", specifier = ">=3.10.11" }, { name = "diff-cover", marker = "extra == 'test'", specifier = ">=9" }, { name = "grpcio", specifier = ">=1.78.0" }, + { name = "hypothesis", marker = "extra == 'test'", specifier = ">=6" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4" }, { name = "protobuf", specifier = ">=6.31.1,<7" }, { name = "pytest", marker = "extra == 'test'", specifier = ">=8" }, @@ -1258,6 +1272,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/c7/c53e8dbff9c9dc4b7928773421ae294a5d28fcb8dcda1a089579d3a7e510/ruff-0.15.17-py3-none-win_arm64.whl", hash = "sha256:f3be1fbb34bcdfd146240d8fb92a709d4c2c8191348580a3c044ec60fa0b4456", size = 11355275, upload-time = "2026-06-11T17:54:43.635Z" }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + [[package]] name = "tomli" version = "2.4.1" diff --git a/docs/test-inventory.md b/docs/test-inventory.md index 1e71478..d285113 100644 --- a/docs/test-inventory.md +++ b/docs/test-inventory.md @@ -4,7 +4,7 @@ _Generated by `make test-inventory` ([`tests/inventory.sh`](../tests/inventory.s edit by hand** — re-run the target to refresh. See [Testing Strategy](testing-strategy.md) for how the tiers fit together._ -**Totals:** 500 dashboard unit tests · 12 contract tests · 31 frontend +**Totals:** 511 dashboard unit tests · 12 contract tests · 31 frontend tests · 46 `pithead` shell sections · 17 harness self-test sections · 9 live config scenarios (17 axis values) · 6 mini-stack scenarios. @@ -14,7 +14,7 @@ tests · 46 `pithead` shell sections · 17 harness self-test sections · | Tier | Suite | Cases | |---|---|---| -| 1 — Unit | dashboard pytest | 500 | +| 1 — Unit | dashboard pytest | 511 | | 1 — Unit | frontend (node --test) | 31 | | 1 — Unit | `pithead` shell suite | 46 sections | | 1 — Unit | compose interpolation + hardening (#90) | 1 | @@ -27,7 +27,7 @@ tests · 46 `pithead` shell sections · 17 harness self-test sections · ## Tier 1 — Unit & component -### Dashboard (pytest) — 500 tests +### Dashboard (pytest) — 511 tests #### tests/client/test_docker_control.py — 6 - test_tcp_scheme_rewritten_to_http @@ -352,6 +352,18 @@ tests · 46 `pithead` shell sections · 17 harness self-test sections · - test_down_clears_only_after_recovery_window - test_healthy_requires_stable_window_from_unknown +#### tests/service/test_numeric_properties.py — 10 +- test_xmr_per_hs_day_non_negative +- test_xmr_per_hs_day_zero_on_nonpositive_reward +- test_xmr_per_hs_day_linear_in_reward +- test_xmr_per_hs_day_decreases_with_difficulty +- test_window_avgs_non_negative_and_conserve +- test_window_avgs_empty_history_is_zero +- test_fraction_to_ms_monotone_and_non_negative +- test_fraction_to_ms_zero_on_nonpositive +- test_routed_fraction_in_unit_interval +- test_max_donation_fraction_within_reserve_bounds + #### tests/service/test_storage_service.py — 30 - test_get_tiers - test_default_xvb_stats @@ -414,6 +426,9 @@ tests · 46 `pithead` shell sections · 17 harness self-test sections · - test_zero_reads_do_not_run_away - test_recovers_after_worker_drop +#### tests/sim/test_donation_model_properties.py — 1 +- test_controller_never_winds_up + #### tests/test_main.py — 1 - test_build_app_returns_wired_application @@ -807,5 +822,5 @@ tests · 46 `pithead` shell sections · 17 harness self-test sections · --- -_Grand total: **621** enumerated cases/sections across the four tiers (plus the live +_Grand total: **632** enumerated cases/sections across the four tiers (plus the live lifecycle and fault-injection phases, which are exercised on a real server)._