From e57ce3e5feb4134927f329103e8ebe821e60ad5d Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:11:51 -0700 Subject: [PATCH 01/33] Update background-auto-estimate plan after review cycle --- docs/dev/plans/background-auto-estimate.md | 170 ++++++++++++++++----- 1 file changed, 131 insertions(+), 39 deletions(-) diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index f6a31703c..e417caa6e 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -14,16 +14,23 @@ autonomously. No other deliberate exception to `AGENTS.md` is taken. ## ADR -This plan owns the ADR -[`docs/dev/adrs/suggestions/background-auto-estimate.md`](../adrs/suggestions/background-auto-estimate.md). -The ADR stays a **suggestion** (Status: Proposed) for this PR; promotion -to `accepted/` is intentionally **out of scope** here and can follow -when the team formally accepts it. (`/review-plan` may request promotion -as a P1 step; if so, it becomes a one-line docs step that `git mv`s the -file, flips the Status line, and updates the index row.) -`/draft-impl-1`'s Phase A commits the ADR from its current -`suggestions/` location and removes the design-phase `_review-*` / -`_reply-*` siblings. +This plan owns the ADR, currently at +[`docs/dev/adrs/suggestions/background-auto-estimate.md`](../adrs/suggestions/background-auto-estimate.md) +(Status: Proposed). Because this change **implements** that ADR, +[`AGENTS.md`](../../../AGENTS.md) → **Change Discipline** requires the +**same change** to promote it to `accepted/` before the PR is opened — a +PR that implements an ADR must not leave it in `suggestions/`. Promotion +is therefore **in scope and mandatory**, handled by the first Phase 1 +step (**P1.0**): `git mv` the ADR into `accepted/`, set its +`**Status:**` to `Accepted`, flip its `docs/dev/adrs/index.md` row to +`Accepted` with the `accepted/…` link, and rewrite every link that +pointed at the old `suggestions/` path — in this plan and the ADR — +located with `git grep -n`. The ordering relative to `/draft-impl-1` is: +its **Phase A** runs **before** the checklist walk — committing the +reviewed plan and the ADR (still in `suggestions/`) and removing the +design-phase `_review-*` / `_reply-*` siblings — and then **P1.0**, the +first checklist step in Phase B, performs the promotion above and commits +the moved ADR, the `index.md` update, and the plan link rewrites. ## Branch and PR @@ -36,8 +43,12 @@ file, flips the Status line, and updates the index row.) - **Public API.** A user-invoked `LineSegmentBackground.auto_estimate(*, method='auto', width=None, smoothness=None, n_points=None, use_model=True)` — zero-arg must work, no `free` argument, no `**kwargs`. Returns - `None`, logs a one-line summary (method, width, point count). - **Never** runs inside `_update()` / at calculation time. + `None`, logs a one-line summary (resolved method, effective width in + points, point count) read from the metadata the estimator helper + returns (the `BackgroundEstimate` result object, P1.3), so the + reported width is the value actually used — supplied, derived, or the + degenerate-input fallback — never a guess reconstructed by the + adapter. **Never** runs inside `_update()` / at calculation time. - **Two-stage algorithm.** Stage 1 estimates a peak-insensitive background curve `B(x)`; Stage 2 thins it to sparse `(x, intensity)` anchors with Ramer–Douglas–Peucker simplification (endpoints always @@ -84,11 +95,12 @@ file, flips the Status line, and updates the index row.) - **Empirical calibration (resolved during Phase 2, not blocking).** The Stage-2 tolerance multiplier (`c · σ`, proposed `c ≈ 2`), the width - percentile (proposed ~75th), and confirmation that the single `arpls` - default holds across the tutorial corpus (CWL/TOF, neutron/X-ray). - Record anything surprising in the ADR. -- **ADR promotion** to `accepted/` is out of scope here (see _ADR_); - flagged for `/review-plan` to confirm or request. + percentile (proposed ~75th), the numeric constants in the backend + dispatch contract (P1.3 — the `arpls`/`fabc` `lam` scaling and the + `snip`/`fabc` window factors `k`, `m`), and confirmation that the + single `arpls` default holds across the tutorial corpus (CWL/TOF, + neutron/X-ray). Only the constants are open; the parameter-to-backend + mapping itself is fixed in P1.3. Record anything surprising in the ADR. ## Concrete files likely to change @@ -105,7 +117,9 @@ file, flips the Status line, and updates the index row.) `BackgroundTypeEnum`. - `src/easydiffraction/datablocks/experiment/categories/background/estimate.py` — **new** pure-function estimator module (parameterization + Stage-1 - via `pybaselines` + Stage-2 thinning). + via `pybaselines` + Stage-2 thinning), returning a `BackgroundEstimate` + result object (curve, anchors, and the + method/width/noise/tolerance/backend-params metadata the adapter logs). - `src/easydiffraction/core/collection.py` — reusable `clear()` on `CollectionBase` via `_adopt_items([])` (unlink children, empty `_items`, rebuild `_index`). Used by the overwrite contract. @@ -114,9 +128,14 @@ file, flips the Status line, and updates the index row.) `CategoryCollection` is defined here, not in `collection.py`. - `src/easydiffraction/datablocks/experiment/categories/background/line_segment.py` — add `LineSegmentBackground.auto_estimate()` (the thin adapter). -- `docs/dev/adrs/suggestions/background-auto-estimate.md` and - `docs/dev/adrs/index.md` — already written; committed by - `/draft-impl-1` Phase A (not edited again here). +- `docs/dev/adrs/{suggestions → accepted}/background-auto-estimate.md` + and `docs/dev/adrs/index.md` — the ADR is promoted out of + `suggestions/` in **P1.0** (`git mv`, `**Status:** Accepted`, index row + flipped to `accepted/…`, `suggestions/` links rewritten); its technical + content is otherwise unchanged here. `/draft-impl-1`'s Phase A (before + the checklist) commits the reviewed plan and the still-in-`suggestions/` + ADR and removes the design siblings; the P1.0 step then commits the + promotion (moved ADR, index update, plan link rewrites). - Phase 2 (tests): `tests/unit/easydiffraction/datablocks/experiment/categories/background/test_estimate.py` (**new**), `…/test_line_segment.py` (update for `auto_estimate`), unit @@ -132,6 +151,23 @@ step's `Commit:` message **before** moving to the next step or the Phase 1 review gate. Mark `[x]` in this file as part of the same commit. Phase 1 is **code + docs only — no tests** (those are Phase 2). +- [ ] **P1.0 — Promote the ADR to `accepted/`.** Per + [`AGENTS.md`](../../../AGENTS.md) → **Change Discipline**, a change + that implements an ADR must move it out of `suggestions/` in the + same change. `git mv` + `docs/dev/adrs/suggestions/background-auto-estimate.md` → + `docs/dev/adrs/accepted/background-auto-estimate.md`, set its + `**Status:**` line to `Accepted`, flip the matching + `docs/dev/adrs/index.md` row to `Accepted` with the `accepted/…` + link, and rewrite every remaining `suggestions/` link to this ADR + (in this plan and the ADR itself) to `accepted/`, locating them + with `git grep -n background-auto-estimate`. Docs-only; no + technical content of the ADR changes. Stage the moved + `docs/dev/adrs/accepted/background-auto-estimate.md`, + `docs/dev/adrs/index.md`, and + `docs/dev/plans/background-auto-estimate.md`. Commit: + `Promote background-auto-estimate ADR to accepted` + - [ ] **P1.1 — Add `pybaselines` dependency.** Add `'pybaselines>=1.1'` to the `dependencies` list in `pyproject.toml` (it is the new runtime backend, §4 of the ADR). Run `pixi lock` to regenerate @@ -148,7 +184,7 @@ step's `Commit:` message **before** moving to the next step or the Phase - [ ] **P1.3 — Add the background curve estimator helper.** Create the new module `estimate.py` with a pure - `estimate_background_curve(x, y, *, method='arpls', beam_mode, peaks=None, width=None, smoothness=None, n_points=None) -> (curve, anchors)`. + `estimate_background_curve(x, y, *, method='arpls', beam_mode, peaks=None, width=None, smoothness=None, n_points=None) -> BackgroundEstimate`. `method` is the **resolved** Stage-1 algorithm (`snip` / `arpls` / `fabc` — never `auto`) and selects the `pybaselines` routine, so **all backend dispatch lives in the helper**, not the adapter. @@ -158,7 +194,51 @@ step's `Commit:` message **before** moving to the next step or the Phase to anchors by RDP with tolerance `c · σ` (endpoints kept, optional `n_points` cap). Array-in/array-out, no model state, no domain imports. Extract helpers to stay under the lint complexity - thresholds. Commit: `Add background curve estimator helper` + thresholds. + + **Return value.** Return a small frozen result object + `BackgroundEstimate` (a `dataclass` or `NamedTuple` local to + `estimate.py`) with fields: `curve` (the dense `B(x)` over the + input grid), `anchors` (the thinned `(x, intensity)` array), + `method` (the resolved Stage-1 method actually run), `width` (the + effective `W` in points — supplied, derived, or fallback), `noise` + (σ), `tolerance` (the `c · σ` actually used), and `backend_params` + (the dict handed to `pybaselines`). The adapter logs its one-line + summary from this metadata (see _Decisions_ → Public API), so the + reported values are the ones actually used — this replaces the + earlier bare `(curve, anchors)` return, which could not carry the + derived/fallback width the summary needs. + + **`peaks` contract.** `peaks` is an optional boolean `np.ndarray` + aligned 1-to-1 with `x` (identical length): `True` marks a + peak/forbidden sample where **Stage 2 must not place a + non-endpoint anchor** (the first and last samples are always kept + regardless). When `peaks is None`, the helper builds the equivalent + mask from its own `find_peaks`/`peak_widths` pass on `y` (each + detected peak widened to ±`W`); when supplied (the model-guided + path in P1.5, or tests) it is used verbatim. This single mask is + the mechanism behind the "no anchor lands on a peak" invariant in + both paths. + + **Backend dispatch contract (only the constants are calibrated in + Phase 2).** Map the derived or supplied parameters onto the + `pybaselines` 1.2.x API as follows — the mapping is fixed here, and + only the numeric constants (`k`, `m`, the `lam` scaling) are open + for Phase-2 tuning: + - `arpls` → `Baseline.arpls(y, lam=λ)`. `λ = smoothness` when the + caller supplies it, otherwise a derived Whittaker penalty that + grows with `N` and `W` (larger grid / broader peaks → larger + `λ`); the scaling constant is calibrated in Phase 2. + - `snip` → `Baseline.snip(y, max_half_window=ceil(k·W))` with + `k ≈ 1` so the window clears the broadest peak half-width. + `snip` has no Whittaker penalty, so an explicitly supplied + `smoothness` is **not applicable**: ignore it and emit one + `log.warning`. + - `fabc` → `Baseline.fabc(y, lam=λ, scale=ceil(W), min_length=ceil(m·W))`, + with `λ` as for `arpls`, `scale` the wavelet scale ≈ peak width, + and `min_length` the shortest run the classifier accepts as + baseline (`m ≈ 1`). + Commit: `Add background curve estimator helper` - [ ] **P1.4 — Add `CollectionBase.clear()`.** Add a bulk reset to `CollectionBase` (`core/collection.py`). It must **not** be a bare @@ -182,15 +262,20 @@ step's `Commit:` message **before** moving to the next step or the Phase _Decisions_). It: reads `self._parent.data`; chooses the helper input `y` — data-only `intensity_meas`, or, when `use_model` and `np.any(intensity_calc)`, the peak-subtracted - `intensity_meas − (intensity_calc − intensity_bkg)` and `peaks` - detected from the peak-only model array; resolves `method='auto'` - to `arpls` and passes the resolved method into the helper (which - owns Stage-1 dispatch); clips heights to `[0, intensity_meas]`; - `clear()`s the collection (logging the replace notice when it was - non-empty) and `create()`s fixed points with sequential ids; logs - the one-line summary. Validate `method` against - `BackgroundEstimatorMethodEnum` centrally. Numpy-style docstring; - no `**kwargs`. Commit: + `intensity_meas − (intensity_calc − intensity_bkg)`. In the + model-guided path it also builds the `peaks` boolean mask per the + P1.3 contract: run `find_peaks` on the peak-only model array + `intensity_calc − intensity_bkg`, widen each detected peak to its + `peak_widths` extent, set those samples `True`, and pass it as + `peaks=`; the data-only path passes `peaks=None` (the helper + derives its own). Resolves `method='auto'` to `arpls` and passes + the resolved method into the helper (which owns Stage-1 dispatch); + clips anchor heights to `[0, intensity_meas]`; `clear()`s the + collection (logging the replace notice when it was non-empty) and + `create()`s fixed points with sequential ids; logs the one-line + summary from the returned `BackgroundEstimate` metadata. Validate + `method` against `BackgroundEstimatorMethodEnum` centrally. + Numpy-style docstring; no `**kwargs`. Commit: `Add auto_estimate to LineSegmentBackground` - [ ] **P1.6 — Phase 1 review gate.** No code. Mark this `[x]`, commit @@ -209,13 +294,20 @@ Tests to add/update (unit tests mirror the source tree per with a known analytic background (flat, linear, smooth curve, TOF-like decay) plus planted Gaussians including a deliberately overlapped multiplet — assert the recovered points reproduce the true background - within tolerance, **no anchor lands on a planted peak**, and none - exceeds the local data; **CWL angular broadening** (FWHM grows with x) - keeps the background off the broad peaks; **model-guided re-run** with - a supplied peak-only model places better anchors **and** yields - **absolute** background heights (not residual corrections); - **determinism** (same input → same points); **graceful degradation** - (peakless input → single warning, not a crash). + within tolerance, **no anchor lands on a planted peak** (covering both + the supplied-`peaks` mask built from the planted peak regions and the + `peaks=None` self-derived path), and none exceeds the local data; that + the returned `BackgroundEstimate` metadata (`method`, `width`, + `noise`, `tolerance`) reports the values actually used; that each + method routes to the contracted `pybaselines` call (`arpls`/`fabc` + `lam`, `snip` `max_half_window`, `fabc` `scale`/`min_length`) and that + a `smoothness` passed to `snip` is ignored with one warning; **CWL + angular broadening** (FWHM grows with x) keeps the background off the + broad peaks; **model-guided re-run** with a supplied peak-only model + places better anchors **and** yields **absolute** background heights + (not residual corrections); **determinism** (same input → same + points); **graceful degradation** (peakless input → single warning, + not a crash). - **`test_line_segment.py` (update)** for `auto_estimate` lifecycle: overwrite-and-re-fix (fixed points even when prior ones were freed), the replace notice on a non-empty collection, sequential ids, and From 6df933eaa5117363f9a1d60461ceba2d4bafc223 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:20:15 -0700 Subject: [PATCH 02/33] Promote background-auto-estimate ADR to accepted --- .../background-auto-estimate.md | 2 +- docs/dev/adrs/index.md | 2 +- docs/dev/plans/background-auto-estimate.md | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) rename docs/dev/adrs/{suggestions => accepted}/background-auto-estimate.md (99%) diff --git a/docs/dev/adrs/suggestions/background-auto-estimate.md b/docs/dev/adrs/accepted/background-auto-estimate.md similarity index 99% rename from docs/dev/adrs/suggestions/background-auto-estimate.md rename to docs/dev/adrs/accepted/background-auto-estimate.md index 1baca4e89..c6755e27b 100644 --- a/docs/dev/adrs/suggestions/background-auto-estimate.md +++ b/docs/dev/adrs/accepted/background-auto-estimate.md @@ -1,6 +1,6 @@ # ADR: Automatic Line-Segment Background Estimation -**Status:** Proposed **Date:** 2026-06-01 +**Status:** Accepted **Date:** 2026-06-01 ## Group diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 4156a5eb0..6b9f3379a 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -35,7 +35,7 @@ folders. | Documentation | Accepted | Plotting & Docs Performance for Interactive Figures | Self-hosts a lazy, shared figure runtime so docs pages load fast and progressively while staying interactive. | [`plotting-docs-performance.md`](accepted/plotting-docs-performance.md) | | Documentation | Suggestion | Documentation CI and Build Verification | Proposes strict MkDocs builds, API-derived docs, snippet smoke tests, link checks, and prose/spelling checks. | [`documentation-ci-build.md`](suggestions/documentation-ci-build.md) | | Experiment model | Accepted | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | -| Experiment model | Suggestion | Automatic Line-Segment Background Estimation | Detects line-segment background control points from the measured pattern, peak-insensitive and editable. | [`background-auto-estimate.md`](suggestions/background-auto-estimate.md) | +| Experiment model | Accepted | Automatic Line-Segment Background Estimation | Detects line-segment background control points from the measured pattern, peak-insensitive and editable. | [`background-auto-estimate.md`](accepted/background-auto-estimate.md) | | Factories | Accepted | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | | Naming | Accepted | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | | Persistence | Accepted | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index e417caa6e..465fa0fba 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -1,7 +1,7 @@ # Plan: Automatic Line-Segment Background Estimation This plan follows [`AGENTS.md`](../../../AGENTS.md) and implements the -[`background-auto-estimate`](../adrs/suggestions/background-auto-estimate.md) +[`background-auto-estimate`](../adrs/accepted/background-auto-estimate.md) ADR (drafted via `/draft-adr`, review cycle closed at the sentinel). **Dependency authorization (for `/draft-impl-1`):** this plan **names @@ -14,9 +14,10 @@ autonomously. No other deliberate exception to `AGENTS.md` is taken. ## ADR -This plan owns the ADR, currently at -[`docs/dev/adrs/suggestions/background-auto-estimate.md`](../adrs/suggestions/background-auto-estimate.md) -(Status: Proposed). Because this change **implements** that ADR, +This plan owns the ADR at +[`docs/dev/adrs/accepted/background-auto-estimate.md`](../adrs/accepted/background-auto-estimate.md) +(Status: Accepted — promoted from `suggestions/` in step P1.0). Because +this change **implements** that ADR, [`AGENTS.md`](../../../AGENTS.md) → **Change Discipline** requires the **same change** to promote it to `accepted/` before the PR is opened — a PR that implements an ADR must not leave it in `suggestions/`. Promotion @@ -151,7 +152,7 @@ step's `Commit:` message **before** moving to the next step or the Phase 1 review gate. Mark `[x]` in this file as part of the same commit. Phase 1 is **code + docs only — no tests** (those are Phase 2). -- [ ] **P1.0 — Promote the ADR to `accepted/`.** Per +- [x] **P1.0 — Promote the ADR to `accepted/`.** Per [`AGENTS.md`](../../../AGENTS.md) → **Change Discipline**, a change that implements an ADR must move it out of `suggestions/` in the same change. `git mv` From 1f7e6892d99e72a6c5b79ae9aab7076deb4a3ad6 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:23:52 -0700 Subject: [PATCH 03/33] Add pybaselines dependency --- docs/dev/plans/background-auto-estimate.md | 2 +- pixi.lock | 44 ++++++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index 465fa0fba..9886a9cb8 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -169,7 +169,7 @@ step's `Commit:` message **before** moving to the next step or the Phase `docs/dev/plans/background-auto-estimate.md`. Commit: `Promote background-auto-estimate ADR to accepted` -- [ ] **P1.1 — Add `pybaselines` dependency.** Add `'pybaselines>=1.1'` +- [x] **P1.1 — Add `pybaselines` dependency.** Add `'pybaselines>=1.1'` to the `dependencies` list in `pyproject.toml` (it is the new runtime backend, §4 of the ADR). Run `pixi lock` to regenerate `pixi.lock`. Stage `pyproject.toml` and `pixi.lock` (and diff --git a/pixi.lock b/pixi.lock index d4ab13b04..59f1ad7ed 100644 --- a/pixi.lock +++ b/pixi.lock @@ -241,6 +241,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/5c/fb93d3092640a24dfb7bd7727a24016d7c01774ca013e60efd3f683c8002/backrefs-7.0-py314-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl @@ -560,6 +561,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/5c/fb93d3092640a24dfb7bd7727a24016d7c01774ca013e60efd3f683c8002/backrefs-7.0-py314-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl @@ -875,6 +877,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/14/615a450205e1b56d16c6783f5ccd116cde05550faad70ae077c955654a75/h5py-3.16.0-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/5c/fb93d3092640a24dfb7bd7727a24016d7c01774ca013e60efd3f683c8002/backrefs-7.0-py314-none-any.whl @@ -1218,6 +1221,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl @@ -1547,6 +1551,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl @@ -1859,6 +1864,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl @@ -2200,6 +2206,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/5c/fb93d3092640a24dfb7bd7727a24016d7c01774ca013e60efd3f683c8002/backrefs-7.0-py314-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl @@ -2519,6 +2526,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/5c/fb93d3092640a24dfb7bd7727a24016d7c01774ca013e60efd3f683c8002/backrefs-7.0-py314-none-any.whl - pypi: https://files.pythonhosted.org/packages/3f/d0/7b958df957e4827837b590944008f0b28078f552b451f7407b4b3d54f574/asciichartpy-1.5.25-py2.py3-none-any.whl @@ -2834,6 +2842,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/38/8b/7ec325b4e9e78beefc2d025b01ee8a2fde771ef7c957c3bff99b9e1fbffa/xraydb-4.5.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/eb/fea4d1d51c49832120f7f285d07306db3960f423a2612c6057caf3e8196f/pip-26.1.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/14/615a450205e1b56d16c6783f5ccd116cde05550faad70ae077c955654a75/h5py-3.16.0-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/3e/17/1f31d8562e6f970d64911f1abc330d233bc0c0601411cf7e19c1292be6da/spdx_headers-1.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/5c/fb93d3092640a24dfb7bd7727a24016d7c01774ca013e60efd3f683c8002/backrefs-7.0-py314-none-any.whl @@ -8757,6 +8766,7 @@ packages: - pillow - plotly - pooch + - pybaselines>=1.1 - rich - scipy - sympy @@ -9921,6 +9931,40 @@ packages: requires_dist: - prompt-toolkit>=2.0,<4.0 requires_python: '>=3.9' +- pypi: https://files.pythonhosted.org/packages/3d/66/f044d53935b142d47ce2a65b8c4f51fdb5ca85ee1035fb2b7857971b122e/pybaselines-1.2.1-py3-none-any.whl + name: pybaselines + version: 1.2.1 + sha256: d8f224a0b5ac4cdcef861bc60533131c37255b4d1193f18a410bc37fe5217c73 + requires_dist: + - numpy>=1.20 + - scipy>=1.6 + - build ; extra == 'dev' + - bump-my-version ; extra == 'dev' + - matplotlib ; extra == 'dev' + - numba>=0.53 ; extra == 'dev' + - numpydoc ; extra == 'dev' + - pentapy>=1.1 ; extra == 'dev' + - pytest>=6.0 ; extra == 'dev' + - ruff ; extra == 'dev' + - sphinx ; extra == 'dev' + - sphinx-copybutton ; extra == 'dev' + - sphinx-gallery>=0.16 ; extra == 'dev' + - sphinx-rtd-theme ; extra == 'dev' + - twine ; extra == 'dev' + - matplotlib ; extra == 'docs' + - numpydoc ; extra == 'docs' + - sphinx ; extra == 'docs' + - sphinx-copybutton ; extra == 'docs' + - sphinx-gallery>=0.16 ; extra == 'docs' + - sphinx-rtd-theme ; extra == 'docs' + - numba>=0.53 ; extra == 'full' + - pentapy>=1.1 ; extra == 'full' + - build ; extra == 'release' + - bump-my-version ; extra == 'release' + - twine ; extra == 'release' + - pytest>=6.0 ; extra == 'test' + - ruff ; extra == 'test' + requires_python: '>=3.9' - pypi: https://files.pythonhosted.org/packages/3e/14/615a450205e1b56d16c6783f5ccd116cde05550faad70ae077c955654a75/h5py-3.16.0-cp314-cp314-win_amd64.whl name: h5py version: 3.16.0 diff --git a/pyproject.toml b/pyproject.toml index 6e5b27250..a91cad3b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ 'pandas', # Displaying tables in Jupyter notebooks 'plotly', # Interactive plots 'pillow', # Rendering structure figures (labels, legend) for reports + 'pybaselines>=1.1', # Background curve estimation backend (SNIP, arPLS, fabc) ] [project.optional-dependencies] From 2ee2420fd163af2ce0a8337bea941b6b20e2d0cf Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:24:58 -0700 Subject: [PATCH 04/33] Add BackgroundEstimatorMethodEnum --- docs/dev/plans/background-auto-estimate.md | 2 +- .../experiment/categories/background/enums.py | 28 ++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index 9886a9cb8..a94544db3 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -176,7 +176,7 @@ step's `Commit:` message **before** moving to the next step or the Phase `pixi.toml` only if a direct pin was required). Commit: `Add pybaselines dependency` -- [ ] **P1.2 — Add `BackgroundEstimatorMethodEnum`.** In `enums.py`, add +- [x] **P1.2 — Add `BackgroundEstimatorMethodEnum`.** In `enums.py`, add a `StrEnum` with members `AUTO='auto'`, `SNIP='snip'`, `ARPLS='arpls'`, `FABC='fabc'`, plus `default()` (returns `AUTO`) and `description()`, following the existing `BackgroundTypeEnum`. diff --git a/src/easydiffraction/datablocks/experiment/categories/background/enums.py b/src/easydiffraction/datablocks/experiment/categories/background/enums.py index c0e325a73..de3e0e13d 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/enums.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/enums.py @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause -"""Enumerations for background model types.""" +"""Enumerations for background model types and estimation methods.""" from __future__ import annotations @@ -26,3 +26,29 @@ def description(self) -> str: if self is BackgroundTypeEnum.CHEBYSHEV: return 'Chebyshev polynomial background' return None + + +class BackgroundEstimatorMethodEnum(StrEnum): + """Supported automatic background-estimation methods.""" + + AUTO = 'auto' + SNIP = 'snip' + ARPLS = 'arpls' + FABC = 'fabc' + + @classmethod + def default(cls) -> BackgroundEstimatorMethodEnum: + """Return the default estimation method.""" + return cls.AUTO + + def description(self) -> str: + """Human-friendly description for the enum value.""" + if self is BackgroundEstimatorMethodEnum.AUTO: + return 'Let the library choose (currently arPLS)' + if self is BackgroundEstimatorMethodEnum.SNIP: + return 'SNIP iterative peak-clipping baseline' + if self is BackgroundEstimatorMethodEnum.ARPLS: + return 'Asymmetrically reweighted penalized least squares' + if self is BackgroundEstimatorMethodEnum.FABC: + return 'Fully automatic baseline correction (classification)' + return None From 09313fd711a4718d8d8d7b6b794ee7ab54d34033 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:36:32 -0700 Subject: [PATCH 05/33] Add background curve estimator helper --- docs/dev/plans/background-auto-estimate.md | 9 +- .../categories/background/estimate.py | 497 ++++++++++++++++++ 2 files changed, 504 insertions(+), 2 deletions(-) create mode 100644 src/easydiffraction/datablocks/experiment/categories/background/estimate.py diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index a94544db3..9fbf8f36d 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -183,9 +183,14 @@ step's `Commit:` message **before** moving to the next step or the Phase No `__init__.py` change (the enum is imported directly, like `BackgroundTypeEnum`). Commit: `Add BackgroundEstimatorMethodEnum` -- [ ] **P1.3 — Add the background curve estimator helper.** Create the +- [x] **P1.3 — Add the background curve estimator helper.** Create the new module `estimate.py` with a pure - `estimate_background_curve(x, y, *, method='arpls', beam_mode, peaks=None, width=None, smoothness=None, n_points=None) -> BackgroundEstimate`. + `estimate_background_curve(x, y, *, method='arpls', peaks=None, width=None, smoothness=None, n_points=None) -> BackgroundEstimate`. + (The ADR §6 sketch also lists `beam_mode`; it is **omitted from the + Phase 1 helper** — unused until the deferred per-beam-mode policy, + and keeping it would push the signature past the project's + `PLR0913` 7-argument limit, which this plan honors rather than + bypasses.) `method` is the **resolved** Stage-1 algorithm (`snip` / `arpls` / `fabc` — never `auto`) and selects the `pybaselines` routine, so **all backend dispatch lives in the helper**, not the adapter. diff --git a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py new file mode 100644 index 000000000..db9e4759b --- /dev/null +++ b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py @@ -0,0 +1,497 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +""" +Automatic background-curve estimation for powder patterns. + +Pure, array-in / array-out helpers with no experiment-model state. The +estimator runs in two stages (see the ``background-auto-estimate`` ADR): + +* Stage 1 builds a peak-insensitive background curve ``B(x)`` over the + whole grid using :mod:`pybaselines`. +* Stage 2 thins ``B(x)`` to a sparse set of ``(x, intensity)`` anchors + with a vertical Ramer-Douglas-Peucker simplification, keeping the + endpoints and never placing a non-endpoint anchor on a peak. + +All per-dataset parameters (peak width, noise, smoothing penalty) are +derived from the data so a bare call needs no tuning. The numeric +constants below are first cuts; they are calibrated against the tutorial +corpus in Phase 2. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +from pybaselines import Baseline +from scipy.signal import find_peaks +from scipy.signal import peak_widths + +from easydiffraction.utils.logging import log + +# Stage-2 RDP tolerance as a multiple of the noise sigma (c in c*sigma). +_NOISE_TOLERANCE_FACTOR = 2.0 +# Robust upper percentile of measured peak widths used as the window. +_WIDTH_PERCENTILE = 75.0 +# Peak prominence threshold for find_peaks, in units of noise sigma. +_PEAK_PROMINENCE_FACTOR = 3.0 +# Relative height at which peak widths are measured (FWHM). +_PEAK_WIDTH_REL_HEIGHT = 0.5 +# snip max_half_window as a multiple of the peak width W. +_SNIP_WINDOW_FACTOR = 1.0 +# fabc min_length as a multiple of the peak width W. +_FABC_MIN_LENGTH_FACTOR = 1.0 +# Second-difference noise inflation: var(diff2) = 6 * var(noise). +_SECOND_DIFF_SCALE = 6.0**0.5 +# MAD-to-sigma scaling for a normal distribution. +_MAD_TO_SIGMA = 1.4826 +# Floor for the derived Whittaker penalty. +_LAM_FLOOR = 1.0e2 +# Smallest pattern (in points) the estimator can work on. +_MIN_POINTS = 5 +# Fallback peak width (in points) when no peaks can be detected. +_FALLBACK_WIDTH = 10.0 +# Geometric growth of the RDP tolerance when capping the anchor count. +_TOLERANCE_GROWTH = 1.3 +# Maximum tolerance-growth iterations when enforcing ``n_points``. +_MAX_CAP_ITERATIONS = 20 + + +@dataclass(frozen=True) +class BackgroundEstimate: + """ + Result of a background-curve estimation. + + Attributes + ---------- + curve : np.ndarray + Dense peak-insensitive background ``B(x)`` over the input grid. + anchors : np.ndarray + Thinned control points with shape ``(n_anchors, 2)`` whose rows + are ``(x, intensity)``; heights are read from ``curve``. + method : str + Resolved Stage-1 method actually run (``snip``/``arpls``/ + ``fabc``). + width : float + Effective peak width ``W`` in points (supplied, derived, or the + degenerate-input fallback). + noise : float + Robust noise estimate ``sigma`` from the second difference. + tolerance : float + Stage-2 RDP tolerance actually used (``c * sigma``). + backend_params : dict[str, float] + Parameters handed to the :mod:`pybaselines` routine. + """ + + curve: np.ndarray + anchors: np.ndarray + method: str + width: float + noise: float + tolerance: float + backend_params: dict[str, float] + + +def _robust_noise(y: np.ndarray) -> float: + """ + Estimate the noise standard deviation, insensitive to peaks. + + Uses the median absolute deviation (MAD) of the second difference of + the intensities; the second difference suppresses the smooth + background and most peak signal, leaving noise. + + Parameters + ---------- + y : np.ndarray + Intensities over the grid. + + Returns + ------- + float + Estimated noise sigma; ``0.0`` for flat input. + """ + diff2 = np.diff(y, n=2) + mad = float(np.median(np.abs(diff2 - np.median(diff2)))) + return _MAD_TO_SIGMA * mad / _SECOND_DIFF_SCALE + + +def _measure_width(y: np.ndarray, sigma: float) -> tuple[float, np.ndarray]: + """ + Measure a robust peak width and the prominent peak positions. + + Peaks are found with a prominence threshold relative to the noise and + their full-width-at-half-maximum is summarised by a high percentile, + so the window clears the broadest (e.g. high-angle CWL) peaks. + + Parameters + ---------- + y : np.ndarray + Intensities over the grid. + sigma : float + Noise estimate used for the prominence threshold. + + Returns + ------- + width : float + Robust peak width in points; the fallback when no peaks are + found. + peaks : np.ndarray + Indices of the detected peaks (possibly empty). + """ + prominence = _PEAK_PROMINENCE_FACTOR * sigma if sigma > 0 else None + peaks, _ = find_peaks(y, prominence=prominence) + if not peaks.size: + return _FALLBACK_WIDTH, peaks + widths = peak_widths(y, peaks, rel_height=_PEAK_WIDTH_REL_HEIGHT)[0] + width = float(np.percentile(widths, _WIDTH_PERCENTILE)) + return max(width, 1.0), peaks + + +def _forbidden_from_peaks(n: int, peaks: np.ndarray, width: float) -> np.ndarray: + """ + Build a peak-region mask from detected peak positions. + + Each peak is widened by ``+/- width`` points; Stage 2 must not place + a non-endpoint anchor on a masked sample. + + Parameters + ---------- + n : int + Number of grid points. + peaks : np.ndarray + Indices of detected peaks. + width : float + Half-width (in points) masked around each peak. + + Returns + ------- + np.ndarray + Boolean mask of length ``n``; ``True`` marks a peak region. + """ + mask = np.zeros(n, dtype=bool) + half = int(np.ceil(width)) + for peak in peaks: + lo = max(0, int(peak) - half) + hi = min(n, int(peak) + half + 1) + mask[lo:hi] = True + return mask + + +def _derive_lam(n: int, width: float) -> float: + """ + Derive a Whittaker smoothing penalty for arPLS/fabc. + + The penalty grows with the grid size and the peak width so the + baseline stays smooth under broad features. The scaling is a + monotonic first cut; the constant is calibrated in Phase 2. + + Parameters + ---------- + n : int + Number of grid points. + width : float + Peak width in points. + + Returns + ------- + float + The ``lam`` penalty passed to the backend. + """ + return float(max(_LAM_FLOOR, n * max(width, 1.0))) + + +def _stage1_baseline( + x: np.ndarray, + y: np.ndarray, + method: str, + width: float, + smoothness: float | None, +) -> tuple[np.ndarray, dict[str, float]]: + """ + Compute the Stage-1 background curve via pybaselines. + + Dispatches to the resolved ``method`` and maps the derived width and + optional smoothness onto the backend parameters (the plan's backend + dispatch contract). + + Parameters + ---------- + x : np.ndarray + Grid coordinates. + y : np.ndarray + Intensities (data-only or peak-subtracted) to baseline. + method : str + Resolved method: ``snip``, ``arpls`` or ``fabc``. + width : float + Peak width in points. + smoothness : float or None + Optional Whittaker penalty override. + + Returns + ------- + curve : np.ndarray + Estimated background over the grid. + backend_params : dict[str, float] + Parameters passed to the backend. + + Raises + ------ + ValueError + If ``method`` is not a supported Stage-1 routine. + """ + fitter = Baseline(x_data=x) + if method == 'arpls': + lam = smoothness if smoothness is not None else _derive_lam(y.size, width) + curve, _ = fitter.arpls(y, lam=lam) + return curve, {'lam': float(lam)} + if method == 'snip': + if smoothness is not None: + log.warning("Method 'snip' ignores the 'smoothness' parameter.") + max_half_window = int(np.ceil(_SNIP_WINDOW_FACTOR * width)) + curve, _ = fitter.snip(y, max_half_window=max_half_window) + return curve, {'max_half_window': float(max_half_window)} + if method == 'fabc': + lam = smoothness if smoothness is not None else _derive_lam(y.size, width) + scale = int(np.ceil(width)) + min_length = int(np.ceil(_FABC_MIN_LENGTH_FACTOR * width)) + curve, _ = fitter.fabc(y, lam=lam, scale=scale, min_length=min_length) + return curve, {'lam': float(lam), 'scale': float(scale), 'min_length': float(min_length)} + msg = f'Unsupported Stage-1 background method: {method!r}' + raise ValueError(msg) + + +def _rdp_indices(x: np.ndarray, curve: np.ndarray, epsilon: float) -> np.ndarray: + """ + Vertical Ramer-Douglas-Peucker simplification of a curve. + + Returns the indices of the points to keep so that every dropped point + lies within ``epsilon`` (in intensity units) of the piecewise-linear + interpolation through the kept points. The endpoints are always kept. + + Parameters + ---------- + x : np.ndarray + Monotonic grid coordinates. + curve : np.ndarray + Curve values to simplify. + epsilon : float + Maximum allowed vertical deviation. + + Returns + ------- + np.ndarray + Sorted indices of the retained points. + """ + n = x.size + keep = np.zeros(n, dtype=bool) + keep[0] = True + keep[-1] = True + stack = [(0, n - 1)] + while stack: + start, end = stack.pop() + if end <= start + 1: + continue + span = x[end] - x[start] + if span <= 0: + continue + segment = slice(start, end + 1) + line = curve[start] + (curve[end] - curve[start]) * (x[segment] - x[start]) / span + deviation = np.abs(curve[segment] - line) + deviation[0] = 0.0 + deviation[-1] = 0.0 + local = int(np.argmax(deviation)) + if deviation[local] > epsilon: + index = start + local + keep[index] = True + stack.append((start, index)) + stack.append((index, end)) + return np.flatnonzero(keep) + + +def _drop_forbidden(indices: np.ndarray, forbidden: np.ndarray, n: int) -> np.ndarray: + """ + Drop non-endpoint anchors that fall on a forbidden (peak) sample. + + Parameters + ---------- + indices : np.ndarray + Candidate anchor indices (sorted, includes the endpoints). + forbidden : np.ndarray + Boolean peak-region mask. + n : int + Number of grid points, used to identify the endpoints. + + Returns + ------- + np.ndarray + Filtered indices, always retaining ``0`` and ``n - 1``. + """ + endpoints = {0, n - 1} + kept = [int(i) for i in indices if int(i) in endpoints or not forbidden[i]] + return np.array(sorted(set(kept)), dtype=int) + + +def _thin_to_anchors( + x: np.ndarray, + curve: np.ndarray, + epsilon: float, + forbidden: np.ndarray, + n_points: int | None, +) -> np.ndarray: + """ + Select anchor indices: RDP, drop peak-region anchors, cap the count. + + When more than ``n_points`` anchors survive, the RDP tolerance is + grown geometrically and the simplification re-run until the count + fits (the endpoints are always retained). + + Parameters + ---------- + x : np.ndarray + Grid coordinates. + curve : np.ndarray + Background curve to thin. + epsilon : float + RDP tolerance (intensity units). + forbidden : np.ndarray + Boolean peak-region mask; non-endpoint anchors here are dropped. + n_points : int or None + Optional maximum number of anchors (endpoints included). + + Returns + ------- + np.ndarray + Sorted anchor indices, always including the two endpoints. + """ + indices = _drop_forbidden(_rdp_indices(x, curve, epsilon), forbidden, x.size) + if n_points is None: + return indices + tolerance = epsilon + for _ in range(_MAX_CAP_ITERATIONS): + if indices.size <= n_points: + break + tolerance *= _TOLERANCE_GROWTH + indices = _drop_forbidden(_rdp_indices(x, curve, tolerance), forbidden, x.size) + return indices + + +def _flat_estimate( + x: np.ndarray, + y: np.ndarray, + method: str, + width: float | None, + noise: float, +) -> BackgroundEstimate: + """ + Build a trivial flat-background estimate for degenerate input. + + Parameters + ---------- + x : np.ndarray + Grid coordinates. + y : np.ndarray + Intensities. + method : str + Resolved method (recorded for the summary). + width : float or None + Supplied width, if any. + noise : float + Noise estimate. + + Returns + ------- + BackgroundEstimate + A flat curve at the data minimum with two endpoint anchors. + """ + level = float(np.min(y)) if y.size else 0.0 + curve = np.full(x.size, level) + anchors = np.array([[x[0], level], [x[-1], level]]) if x.size else np.empty((0, 2)) + return BackgroundEstimate( + curve=curve, + anchors=anchors, + method=method, + width=float(width) if width is not None else _FALLBACK_WIDTH, + noise=noise, + tolerance=_NOISE_TOLERANCE_FACTOR * noise, + backend_params={}, + ) + + +def estimate_background_curve( + x: np.ndarray, + y: np.ndarray, + *, + method: str = 'arpls', + peaks: np.ndarray | None = None, + width: float | None = None, + smoothness: float | None = None, + n_points: int | None = None, +) -> BackgroundEstimate: + """ + Estimate background control points from a measured pattern. + + Stage 1 builds a peak-insensitive curve ``B(x)`` with the resolved + ``method``; Stage 2 thins it to sparse anchors. Every per-dataset + parameter defaults to a data-derived value, so a bare call works. + + Parameters + ---------- + x : np.ndarray + Grid coordinates (e.g. 2theta or time-of-flight), monotonic. + y : np.ndarray + Intensities to baseline: the measured pattern (data-only) or the + peak-subtracted measured pattern (model-guided). + method : str, optional + Resolved Stage-1 routine: ``arpls`` (default), ``snip`` or + ``fabc``. ``auto`` is resolved by the caller, never here. + peaks : np.ndarray or None, optional + Boolean mask aligned with ``x``; ``True`` forbids a non-endpoint + anchor. When ``None`` the mask is derived from ``y`` itself. + width : float or None, optional + Peak width in points; derived from ``y`` when ``None``. + smoothness : float or None, optional + Whittaker penalty override for ``arpls``/``fabc``; ignored by + ``snip``. + n_points : int or None, optional + Maximum number of anchors (endpoints included); uncapped when + ``None``. + + Returns + ------- + BackgroundEstimate + The curve, anchors, and metadata describing the run. + """ + x = np.asarray(x, dtype=float) + y = np.asarray(y, dtype=float) + noise = _robust_noise(y) + + if y.size < _MIN_POINTS: + log.warning('Pattern too short to estimate a background; returning a flat one.') + return _flat_estimate(x, y, method, width, noise) + + detected: np.ndarray = np.array([], dtype=int) + if width is None or peaks is None: + measured_width, detected = _measure_width(y, noise) + if width is None: + width = measured_width + + if peaks is None: + forbidden = _forbidden_from_peaks(y.size, detected, width) + if not detected.size: + log.warning('No peaks detected; background anchors may be unreliable.') + else: + forbidden = np.asarray(peaks, dtype=bool) + + curve, backend_params = _stage1_baseline(x, y, method, width, smoothness) + tolerance = _NOISE_TOLERANCE_FACTOR * noise + indices = _thin_to_anchors(x, curve, tolerance, forbidden, n_points) + anchors = np.column_stack((x[indices], curve[indices])) + return BackgroundEstimate( + curve=curve, + anchors=anchors, + method=method, + width=float(width), + noise=noise, + tolerance=float(tolerance), + backend_params=backend_params, + ) From e6ad77c1c313982ef31e7a76c378f0883a18966c Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:39:10 -0700 Subject: [PATCH 06/33] Add clear method to CollectionBase --- docs/dev/plans/background-auto-estimate.md | 2 +- src/easydiffraction/core/category.py | 10 ++++++++++ src/easydiffraction/core/collection.py | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index 9fbf8f36d..52cabc1a0 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -246,7 +246,7 @@ step's `Commit:` message **before** moving to the next step or the Phase baseline (`m ≈ 1`). Commit: `Add background curve estimator helper` -- [ ] **P1.4 — Add `CollectionBase.clear()`.** Add a bulk reset to +- [x] **P1.4 — Add `CollectionBase.clear()`.** Add a bulk reset to `CollectionBase` (`core/collection.py`). It must **not** be a bare `self._items = []`: that would strand the name `_index` and leave removed children with a stale `_parent`. Implement it by diff --git a/src/easydiffraction/core/category.py b/src/easydiffraction/core/category.py index 93afa631f..53b51d9b9 100644 --- a/src/easydiffraction/core/category.py +++ b/src/easydiffraction/core/category.py @@ -269,3 +269,13 @@ def create(self, **kwargs: object) -> None: setattr(child_obj, attr, val) self.add(child_obj) + + def clear(self) -> None: + """ + Remove every item, then mark the parent datablock dirty. + + Layers dirty-marking on :meth:`CollectionBase.clear`, mirroring + how :meth:`add` layers it on the base insert. + """ + super().clear() + self._mark_parent_dirty() diff --git a/src/easydiffraction/core/collection.py b/src/easydiffraction/core/collection.py index 625ff6610..677af2d39 100644 --- a/src/easydiffraction/core/collection.py +++ b/src/easydiffraction/core/collection.py @@ -126,6 +126,16 @@ def remove(self, name: str) -> None: """ del self[name] + def clear(self) -> None: + """ + Remove every item, unlinking each from this collection. + + Delegates to :meth:`_adopt_items` with an empty list: every + child has ``_parent`` cleared, ``_items`` is emptied, and the + index is rebuilt, matching the invariants ``__delitem__`` keeps. + """ + self._adopt_items([]) + def _key_for(self, item: GuardedBase) -> str | None: # noqa: PLR6301 """ Return the identity key for *item*. From 8195dd39c6053386f6033acdc2853ff47e69bd81 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:48:03 -0700 Subject: [PATCH 07/33] Add auto_estimate to LineSegmentBackground --- docs/dev/plans/background-auto-estimate.md | 2 +- .../categories/background/line_segment.py | 141 ++++++++++++++++++ 2 files changed, 142 insertions(+), 1 deletion(-) diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index 52cabc1a0..e7cce0f84 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -263,7 +263,7 @@ step's `Commit:` message **before** moving to the next step or the Phase these invariants is added in Phase 2.) Commit: `Add clear method to CollectionBase` -- [ ] **P1.5 — Add `LineSegmentBackground.auto_estimate()`.** In +- [x] **P1.5 — Add `LineSegmentBackground.auto_estimate()`.** In `line_segment.py`, add the public method (signature in _Decisions_). It: reads `self._parent.data`; chooses the helper input `y` — data-only `intensity_meas`, or, when `use_model` and diff --git a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py index 0e48ddb91..ac0189426 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py @@ -10,6 +10,8 @@ import numpy as np from scipy.interpolate import interp1d +from scipy.signal import find_peaks +from scipy.signal import peak_widths from easydiffraction.core.category import CategoryItem from easydiffraction.core.display_handler import DisplayHandler @@ -22,6 +24,8 @@ from easydiffraction.core.variable import NumericDescriptor from easydiffraction.core.variable import Parameter from easydiffraction.core.variable import StringDescriptor +from easydiffraction.datablocks.experiment.categories.background import enums +from easydiffraction.datablocks.experiment.categories.background import estimate from easydiffraction.datablocks.experiment.categories.background.base import BackgroundBase from easydiffraction.datablocks.experiment.categories.background.factory import BackgroundFactory from easydiffraction.datablocks.experiment.item.enums import BeamModeEnum @@ -143,6 +147,68 @@ def y(self, value: float) -> None: self._y.value = value +def _resolve_method(method: str) -> str: + """ + Validate a method name and resolve ``auto``. + + Parameters + ---------- + method : str + Requested method; one of the ``BackgroundEstimatorMethodEnum`` + values. + + Returns + ------- + str + The resolved Stage-1 method (``auto`` becomes ``arpls``). + + Raises + ------ + ValueError + If ``method`` is not a known estimator method. + """ + try: + chosen = enums.BackgroundEstimatorMethodEnum(method) + except ValueError as exc: + valid = ', '.join(member.value for member in enums.BackgroundEstimatorMethodEnum) + msg = f'Unknown background method {method!r}. Choose one of: {valid}.' + raise ValueError(msg) from exc + if chosen is enums.BackgroundEstimatorMethodEnum.AUTO: + return enums.BackgroundEstimatorMethodEnum.ARPLS.value + return chosen.value + + +def _model_peak_mask(peak_only: np.ndarray) -> np.ndarray: + """ + Build a forbidden-anchor mask from a peak-only model array. + + Peaks in the peak-only model are detected and widened by their own + full-width-at-half-maximum; Stage 2 must not place a non-endpoint + anchor on any masked sample. + + Parameters + ---------- + peak_only : np.ndarray + Peak-only model intensities (``intensity_calc - intensity_bkg``). + + Returns + ------- + np.ndarray + Boolean mask aligned with ``peak_only``. + """ + mask = np.zeros(peak_only.size, dtype=bool) + peaks, _ = find_peaks(peak_only) + if not peaks.size: + return mask + widths = peak_widths(peak_only, peaks, rel_height=0.5)[0] + for index, peak_width in zip(peaks, widths, strict=True): + half = int(np.ceil(peak_width)) + lo = max(0, int(index) - half) + hi = int(index) + half + 1 + mask[lo:hi] = True + return mask + + @BackgroundFactory.register class LineSegmentBackground(BackgroundBase): """Linear-interpolation background between user-defined points.""" @@ -190,6 +256,81 @@ def _update( y = interp_func(x) data._set_intensity_bkg(y) + def auto_estimate( + self, + *, + method: str = 'auto', + width: float | None = None, + smoothness: float | None = None, + n_points: int | None = None, + use_model: bool = True, + ) -> None: + """ + Detect background control points from the measured pattern. + + Builds a peak-insensitive background curve and thins it to a + sparse set of fixed line-segment points, overwriting any existing + ones. Heights come from the de-peaked curve, clipped to the + measured intensities so they never eat into peaks. After at least + one calculation, ``use_model`` lets the fitted model place better + points across overlapped regions. + + Parameters + ---------- + method : str, optional + Estimation method: ``auto`` (default, resolves to ``arpls``), + ``snip``, ``arpls`` or ``fabc``. + width : float or None, optional + Peak width in points; measured from the data when ``None``. + smoothness : float or None, optional + Backend smoothing override; derived when ``None``. + n_points : int or None, optional + Maximum number of points; uncapped when ``None``. + use_model : bool, optional + When a calculation has run, subtract the fitted peaks before + estimating so anchors land in true inter-peak gaps. + """ + resolved = _resolve_method(method) + data = self._parent.data + x = np.asarray(data.x, dtype=float) + intensity_meas = np.asarray(data.intensity_meas, dtype=float) + intensity_calc = np.asarray(data.intensity_calc, dtype=float) + + if use_model and np.any(intensity_calc): + peak_only = intensity_calc - np.asarray(data.intensity_bkg, dtype=float) + y = intensity_meas - peak_only + peaks = _model_peak_mask(peak_only) + else: + y = intensity_meas + peaks = None + + result = estimate.estimate_background_curve( + x, + y, + method=resolved, + peaks=peaks, + width=width, + smoothness=smoothness, + n_points=n_points, + ) + + anchor_x = result.anchors[:, 0] + measured = np.interp(anchor_x, x, intensity_meas) + heights = np.clip(result.anchors[:, 1], 0.0, measured) + + if len(self): + log.info('Replacing existing background points with a new estimate.') + self.clear() + for index, (point_x, height) in enumerate(zip(anchor_x, heights, strict=True), start=1): + self.create(id=str(index), x=float(point_x), y=float(height)) + for point in self._items: + point.y.free = False + + count = len(self) + width_pts = result.width + summary = f'Background estimate: {resolved}, {count} points, width {width_pts:.0f} pts' + log.info(summary) + def show(self) -> None: """Print a table of control points (x, intensity).""" columns_headers: list[str] = ['X', 'Intensity'] From c0f09366b04bc79b491e88f8882dfbc15e8f76df Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:48:31 -0700 Subject: [PATCH 08/33] Reach Phase 1 review gate --- docs/dev/plans/background-auto-estimate.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index e7cce0f84..f0937f758 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -284,7 +284,7 @@ step's `Commit:` message **before** moving to the next step or the Phase Numpy-style docstring; no `**kwargs`. Commit: `Add auto_estimate to LineSegmentBackground` -- [ ] **P1.6 — Phase 1 review gate.** No code. Mark this `[x]`, commit +- [x] **P1.6 — Phase 1 review gate.** No code. Mark this `[x]`, commit the checklist update alone, and hand off to `/review-impl-1`. Commit: `Reach Phase 1 review gate` From 3011b77ea9fb683f120218dc0e3cc31eea53b086 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 08:59:36 -0700 Subject: [PATCH 09/33] Guard auto_estimate against empty active data --- .../experiment/categories/background/line_segment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py index ac0189426..b2b860fc8 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py @@ -293,6 +293,9 @@ def auto_estimate( resolved = _resolve_method(method) data = self._parent.data x = np.asarray(data.x, dtype=float) + if x.size == 0: + log.warning('No active data points; cannot estimate a background.') + return intensity_meas = np.asarray(data.intensity_meas, dtype=float) intensity_calc = np.asarray(data.intensity_calc, dtype=float) From 062011423c03ad5564d5c32fcefe6996a2e6bc89 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:01:07 -0700 Subject: [PATCH 10/33] Make n_points cap deterministic at zero noise --- .../categories/background/estimate.py | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py index db9e4759b..ffc2e3d6f 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py @@ -331,6 +331,53 @@ def _drop_forbidden(indices: np.ndarray, forbidden: np.ndarray, n: int) -> np.nd return np.array(sorted(set(kept)), dtype=int) +def _cap_by_deviation( + x: np.ndarray, + curve: np.ndarray, + indices: np.ndarray, + n_points: int, +) -> np.ndarray: + """ + Reduce anchors to ``n_points``, keeping the endpoints. + + The two endpoints are always retained; the remaining slots go to the + interior anchors that deviate most from the straight chord between + them. Guarantees the cap even when the RDP tolerance cannot reduce + the count (e.g. zero-noise data, where the tolerance stays zero). + + Parameters + ---------- + x : np.ndarray + Grid coordinates. + curve : np.ndarray + Background curve. + indices : np.ndarray + Candidate anchor indices (sorted, includes the endpoints). + n_points : int + Target maximum number of anchors (``>= 2``). + + Returns + ------- + np.ndarray + ``min(indices.size, n_points)`` sorted indices. + """ + if indices.size <= n_points: + return indices + first = indices[0] + last = indices[-1] + interior = indices[1:-1] + keep_count = max(n_points - 2, 0) + span = x[last] - x[first] + if span <= 0 or keep_count == 0: + chosen = interior[:keep_count] + else: + line = curve[first] + (curve[last] - curve[first]) * (x[interior] - x[first]) / span + deviation = np.abs(curve[interior] - line) + start = interior.size - keep_count + chosen = interior[np.sort(np.argsort(deviation)[start:])] + return np.concatenate(([first], chosen, [last])) + + def _thin_to_anchors( x: np.ndarray, curve: np.ndarray, @@ -342,8 +389,9 @@ def _thin_to_anchors( Select anchor indices: RDP, drop peak-region anchors, cap the count. When more than ``n_points`` anchors survive, the RDP tolerance is - grown geometrically and the simplification re-run until the count - fits (the endpoints are always retained). + grown geometrically and re-run until the count fits; if that cannot + reduce it (e.g. zero noise), a deviation-based cap guarantees the + bound. The endpoints are always retained. Parameters ---------- @@ -364,14 +412,16 @@ def _thin_to_anchors( Sorted anchor indices, always including the two endpoints. """ indices = _drop_forbidden(_rdp_indices(x, curve, epsilon), forbidden, x.size) - if n_points is None: + if n_points is None or indices.size <= n_points: return indices tolerance = epsilon for _ in range(_MAX_CAP_ITERATIONS): - if indices.size <= n_points: + if tolerance <= 0 or indices.size <= n_points: break tolerance *= _TOLERANCE_GROWTH indices = _drop_forbidden(_rdp_indices(x, curve, tolerance), forbidden, x.size) + if indices.size > n_points: + indices = _cap_by_deviation(x, curve, indices, n_points) return indices From 8b5493fc8a045bddd119ac185c094e619c0ededa Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:02:27 -0700 Subject: [PATCH 11/33] Validate auto_estimate numeric overrides --- .../categories/background/line_segment.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py index b2b860fc8..36d51971f 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py @@ -35,6 +35,8 @@ from easydiffraction.utils.logging import log from easydiffraction.utils.utils import render_table +_MIN_ANCHOR_POINTS = 2 # Minimum line-segment anchors (the two endpoints) + class LineSegment(CategoryItem): """Single background control point for interpolation.""" @@ -178,6 +180,39 @@ def _resolve_method(method: str) -> str: return chosen.value +def _validate_overrides( + width: float | None, + smoothness: float | None, + n_points: int | None, +) -> None: + """ + Validate the public numeric overrides of ``auto_estimate``. + + Parameters + ---------- + width : float or None + Peak width override; must be positive when supplied. + smoothness : float or None + Smoothing override; must be positive when supplied. + n_points : int or None + Anchor cap; must be an integer ``>= 2`` when supplied. + + Raises + ------ + ValueError + If any supplied override is out of range. + """ + if width is not None and width <= 0: + msg = f'width must be positive, got {width!r}.' + raise ValueError(msg) + if smoothness is not None and smoothness <= 0: + msg = f'smoothness must be positive, got {smoothness!r}.' + raise ValueError(msg) + if n_points is not None and (not isinstance(n_points, int) or n_points < _MIN_ANCHOR_POINTS): + msg = f'n_points must be an integer >= 2, got {n_points!r}.' + raise ValueError(msg) + + def _model_peak_mask(peak_only: np.ndarray) -> np.ndarray: """ Build a forbidden-anchor mask from a peak-only model array. @@ -291,6 +326,7 @@ def auto_estimate( estimating so anchors land in true inter-peak gaps. """ resolved = _resolve_method(method) + _validate_overrides(width, smoothness, n_points) data = self._parent.data x = np.asarray(data.x, dtype=float) if x.size == 0: From 0cb131e4b2ca4cdd5e01e9b231be9822beaccb7e Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:03:56 -0700 Subject: [PATCH 12/33] Update ADR to match Phase 1 helper contract --- docs/dev/adrs/accepted/background-auto-estimate.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/dev/adrs/accepted/background-auto-estimate.md b/docs/dev/adrs/accepted/background-auto-estimate.md index c6755e27b..2da058771 100644 --- a/docs/dev/adrs/accepted/background-auto-estimate.md +++ b/docs/dev/adrs/accepted/background-auto-estimate.md @@ -329,15 +329,21 @@ active points only. ### 6. Where the code lives A backend-agnostic estimator helper — -`estimate_background_curve(x, y, *, beam_mode, peaks=None, width=None, ...) -> (curve, anchors)` +`estimate_background_curve(x, y, *, method='arpls', peaks=None, width=None, ...) -> BackgroundEstimate` — lives in a new small module in the background package (e.g. `datablocks/experiment/categories/background/estimate.py`). It is pure -array-in/array-out (the optional `peaks` argument carries model peak -positions detected from the peak-only model array per §5 — not +array-in/array-out (the optional `peaks` argument is a boolean mask +aligned with `x` that forbids non-endpoint anchors on peak samples, +built by the adapter from the peak-only model array per §5 — not reflection metadata), holds no model state, wraps `pybaselines` for Stage 1, and keeps the §3 parameterization and Stage-2 thinning in-house — so it stays unit-testable in isolation and pulls no domain logic into -`core/`. `LineSegmentBackground.auto_estimate()` is a thin adapter: read +`core/`. It returns a small `BackgroundEstimate` result object (curve, +anchors, and the method/width/noise/tolerance/backend-params metadata +the adapter logs). The `beam_mode` argument from earlier drafts is +deferred with the per-beam-mode policy (see _Deferred Work_); omitting +it also keeps the helper within the project's argument-count guardrail. +`LineSegmentBackground.auto_estimate()` is a thin adapter: read the pattern (and model, if present), call the helper, clip, and `create()` the points. Helpers are extracted as needed to stay under the lint complexity thresholds From 527ff412b16e92c5febbccabf570046e39721e45 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:23:54 -0700 Subject: [PATCH 13/33] Apply pixi run fix auto-fixes --- .../categories/background/estimate.py | 30 +++++++++-------- .../categories/background/line_segment.py | 33 ++++++++++--------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py index ffc2e3d6f..f87bc1d27 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py @@ -119,9 +119,10 @@ def _measure_width(y: np.ndarray, sigma: float) -> tuple[float, np.ndarray]: """ Measure a robust peak width and the prominent peak positions. - Peaks are found with a prominence threshold relative to the noise and - their full-width-at-half-maximum is summarised by a high percentile, - so the window clears the broadest (e.g. high-angle CWL) peaks. + Peaks are found with a prominence threshold relative to the noise + and their full-width-at-half-maximum is summarised by a high + percentile, so the window clears the broadest (e.g. high-angle CWL) + peaks. Parameters ---------- @@ -224,7 +225,7 @@ def _stage1_baseline( Resolved method: ``snip``, ``arpls`` or ``fabc``. width : float Peak width in points. - smoothness : float or None + smoothness : float | None Optional Whittaker penalty override. Returns @@ -264,9 +265,10 @@ def _rdp_indices(x: np.ndarray, curve: np.ndarray, epsilon: float) -> np.ndarray """ Vertical Ramer-Douglas-Peucker simplification of a curve. - Returns the indices of the points to keep so that every dropped point - lies within ``epsilon`` (in intensity units) of the piecewise-linear - interpolation through the kept points. The endpoints are always kept. + Returns the indices of the points to keep so that every dropped + point lies within ``epsilon`` (in intensity units) of the + piecewise-linear interpolation through the kept points. The + endpoints are always kept. Parameters ---------- @@ -403,7 +405,7 @@ def _thin_to_anchors( RDP tolerance (intensity units). forbidden : np.ndarray Boolean peak-region mask; non-endpoint anchors here are dropped. - n_points : int or None + n_points : int | None Optional maximum number of anchors (endpoints included). Returns @@ -443,7 +445,7 @@ def _flat_estimate( Intensities. method : str Resolved method (recorded for the summary). - width : float or None + width : float | None Supplied width, if any. noise : float Noise estimate. @@ -491,18 +493,18 @@ def estimate_background_curve( y : np.ndarray Intensities to baseline: the measured pattern (data-only) or the peak-subtracted measured pattern (model-guided). - method : str, optional + method : str, default='arpls' Resolved Stage-1 routine: ``arpls`` (default), ``snip`` or ``fabc``. ``auto`` is resolved by the caller, never here. - peaks : np.ndarray or None, optional + peaks : np.ndarray | None, default=None Boolean mask aligned with ``x``; ``True`` forbids a non-endpoint anchor. When ``None`` the mask is derived from ``y`` itself. - width : float or None, optional + width : float | None, default=None Peak width in points; derived from ``y`` when ``None``. - smoothness : float or None, optional + smoothness : float | None, default=None Whittaker penalty override for ``arpls``/``fabc``; ignored by ``snip``. - n_points : int or None, optional + n_points : int | None, default=None Maximum number of anchors (endpoints included); uncapped when ``None``. diff --git a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py index 36d51971f..79540795d 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py @@ -190,11 +190,11 @@ def _validate_overrides( Parameters ---------- - width : float or None + width : float | None Peak width override; must be positive when supplied. - smoothness : float or None + smoothness : float | None Smoothing override; must be positive when supplied. - n_points : int or None + n_points : int | None Anchor cap; must be an integer ``>= 2`` when supplied. Raises @@ -224,7 +224,8 @@ def _model_peak_mask(peak_only: np.ndarray) -> np.ndarray: Parameters ---------- peak_only : np.ndarray - Peak-only model intensities (``intensity_calc - intensity_bkg``). + Peak-only model intensities (``intensity_calc - + intensity_bkg``). Returns ------- @@ -304,24 +305,24 @@ def auto_estimate( Detect background control points from the measured pattern. Builds a peak-insensitive background curve and thins it to a - sparse set of fixed line-segment points, overwriting any existing - ones. Heights come from the de-peaked curve, clipped to the - measured intensities so they never eat into peaks. After at least - one calculation, ``use_model`` lets the fitted model place better - points across overlapped regions. + sparse set of fixed line-segment points, overwriting any + existing ones. Heights come from the de-peaked curve, clipped to + the measured intensities so they never eat into peaks. After at + least one calculation, ``use_model`` lets the fitted model place + better points across overlapped regions. Parameters ---------- - method : str, optional - Estimation method: ``auto`` (default, resolves to ``arpls``), - ``snip``, ``arpls`` or ``fabc``. - width : float or None, optional + method : str, default='auto' + Estimation method: ``auto`` (default, resolves to + ``arpls``), ``snip``, ``arpls`` or ``fabc``. + width : float | None, default=None Peak width in points; measured from the data when ``None``. - smoothness : float or None, optional + smoothness : float | None, default=None Backend smoothing override; derived when ``None``. - n_points : int or None, optional + n_points : int | None, default=None Maximum number of points; uncapped when ``None``. - use_model : bool, optional + use_model : bool, default=True When a calculation has run, subtract the fitted peaks before estimating so anchors land in true inter-peak gaps. """ From 4ef44bd7fa80ee72c4d8e7b7706b1abe272afc0a Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:25:21 -0700 Subject: [PATCH 14/33] Use list.extend in RDP stack updates --- .../datablocks/experiment/categories/background/estimate.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py index f87bc1d27..bf20b271e 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/estimate.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/estimate.py @@ -305,8 +305,7 @@ def _rdp_indices(x: np.ndarray, curve: np.ndarray, epsilon: float) -> np.ndarray if deviation[local] > epsilon: index = start + local keep[index] = True - stack.append((start, index)) - stack.append((index, end)) + stack.extend(((start, index), (index, end))) return np.flatnonzero(keep) From 5235ab6f0998391bb660c3482edfd0915d0acc00 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:33:09 -0700 Subject: [PATCH 15/33] Add unit tests for background estimator helper --- .../categories/background/test_estimate.py | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tests/unit/easydiffraction/datablocks/experiment/categories/background/test_estimate.py diff --git a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_estimate.py b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_estimate.py new file mode 100644 index 000000000..b51f66ef4 --- /dev/null +++ b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_estimate.py @@ -0,0 +1,164 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +import numpy as np +import pytest + +from easydiffraction.datablocks.experiment.categories.background import estimate + +estimate_background_curve = estimate.estimate_background_curve +BackgroundEstimate = estimate.BackgroundEstimate + + +def _pattern(n=400, slope=0.3, intercept=5.0, peaks=((5.0, 6.0, 0.15),), noise=0.0, seed=0): + """Build a synthetic pattern: linear background + Gaussian peaks + noise.""" + x = np.linspace(0.0, 10.0, n) + y = intercept + slope * x + for center, amp, width in peaks: + y = y + amp * np.exp(-((x - center) ** 2) / (2.0 * width**2)) + if noise > 0: + rng = np.random.default_rng(seed) + y = y + rng.normal(0.0, noise, size=n) + return x, y.astype(float) + + +def _anchor_indices(x, anchors): + return [int(np.argmin(np.abs(x - ax))) for ax in anchors[:, 0]] + + +def _collect_warnings(monkeypatch): + records = [] + + class _Log: + def warning(self, message, *args, **kwargs): + records.append(str(message)) + + monkeypatch.setattr(estimate, 'log', _Log()) + return records + + +def test_returns_background_estimate_with_metadata(): + x, y = _pattern(noise=0.05, seed=1) + result = estimate_background_curve(x, y, method='arpls') + assert isinstance(result, BackgroundEstimate) + assert result.method == 'arpls' + assert result.curve.shape == x.shape + assert result.anchors.ndim == 2 + assert result.anchors.shape[1] == 2 + assert result.width > 0 + assert result.noise >= 0 + assert result.tolerance >= 0 + assert 'lam' in result.backend_params + + +def test_endpoints_always_kept(): + x, y = _pattern(noise=0.05, seed=2) + result = estimate_background_curve(x, y) + assert result.anchors[0, 0] == pytest.approx(x[0]) + assert result.anchors[-1, 0] == pytest.approx(x[-1]) + + +def test_recovers_linear_background_curve(): + x, y = _pattern(noise=0.05, seed=3) + result = estimate_background_curve(x, y, method='arpls') + true_bg = 5.0 + 0.3 * x + # The de-peaked curve tracks the true linear background closely. + assert np.median(np.abs(result.curve - true_bg)) < 0.5 + + +def test_anchors_are_sparse_with_noise(): + x, y = _pattern(n=400, noise=0.05, seed=4) + result = estimate_background_curve(x, y) + # Far fewer anchors than grid points, but at least the two endpoints. + assert 2 <= result.anchors.shape[0] < 60 + + +def test_no_anchor_on_planted_peak_self_derived(): + x, y = _pattern(noise=0.04, peaks=((5.0, 8.0, 0.2),), seed=5) + result = estimate_background_curve(x, y, peaks=None) + interior = result.anchors[1:-1, 0] + # No interior anchor sits on the planted peak (centre 5.0). + assert not np.any(np.abs(interior - 5.0) < 0.4) + + +def test_no_anchor_on_supplied_mask(): + x, y = _pattern(noise=0.04, seed=6) + mask = (x > 4.0) & (x < 6.0) + result = estimate_background_curve(x, y, peaks=mask) + indices = _anchor_indices(x, result.anchors) + endpoints = {0, x.size - 1} + for idx in indices: + if idx not in endpoints: + assert not mask[idx] + + +def test_determinism(): + x, y = _pattern(noise=0.05, seed=7) + first = estimate_background_curve(x, y) + second = estimate_background_curve(x, y) + assert np.array_equal(first.anchors, second.anchors) + assert first.width == second.width + + +def test_graceful_degradation_peakless(monkeypatch): + records = _collect_warnings(monkeypatch) + x = np.linspace(0.0, 10.0, 60) + y = np.full(60, 3.0) + result = estimate_background_curve(x, y) + assert result.anchors.shape[0] >= 2 + assert any('peak' in r.lower() for r in records) + + +def test_method_dispatch_backend_params(): + x, y = _pattern(noise=0.05, seed=8) + assert 'lam' in estimate_background_curve(x, y, method='arpls').backend_params + snip = estimate_background_curve(x, y, method='snip').backend_params + assert 'max_half_window' in snip + fabc = estimate_background_curve(x, y, method='fabc').backend_params + assert 'scale' in fabc + assert 'min_length' in fabc + + +def test_snip_ignores_smoothness_with_warning(monkeypatch): + records = _collect_warnings(monkeypatch) + x, y = _pattern(noise=0.03, seed=9) + estimate_background_curve(x, y, method='snip', smoothness=500.0) + assert any('smoothness' in r.lower() for r in records) + + +def test_n_points_cap_respected(): + x, y = _pattern(n=400, noise=0.05, seed=10) + result = estimate_background_curve(x, y, n_points=6) + assert result.anchors.shape[0] <= 6 + + +def test_n_points_cap_respected_zero_noise(): + # Noiseless curved baseline: tolerance is zero, so the cap must rely on + # the deviation-based fallback rather than RDP-tolerance growth. + x = np.linspace(0.0, 10.0, 300) + y = 5.0 + 2.0 * np.sin(x / 3.0) + result = estimate_background_curve(x, y, n_points=5) + assert result.anchors.shape[0] <= 5 + assert result.anchors.shape[0] >= 2 + + +def test_invalid_method_rejected(): + x, y = _pattern(noise=0.05, seed=11) + with pytest.raises(ValueError, match='method'): + estimate_background_curve(x, y, method='bogus') + + +def test_cwl_broadening_keeps_background_off_broad_peaks(): + # FWHM grows with x; the upper-percentile width must still clear the + # broad high-angle peak so the background is not pulled up under it. + x = np.linspace(0.0, 10.0, 500) + y = 4.0 + 0.0 * x + for center in (2.0, 8.0): + width = 0.1 + 0.06 * center # broadening with angle + y = y + 7.0 * np.exp(-((x - center) ** 2) / (2.0 * width**2)) + rng = np.random.default_rng(12) + y = y + rng.normal(0.0, 0.05, size=x.size) + result = estimate_background_curve(x, y) + # Background near the broad peak stays well below the peak top. + near_peak = result.curve[np.argmin(np.abs(x - 8.0))] + assert near_peak < 6.0 From be4e08179f5e6dec92bd020474b46ef45c7c9ba3 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:33:49 -0700 Subject: [PATCH 16/33] Add auto_estimate lifecycle tests --- .../background/test_line_segment.py | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py index afa3c1dbb..2937e8d02 100644 --- a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py +++ b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py @@ -2,6 +2,12 @@ # SPDX-License-Identifier: BSD-3-Clause import numpy as np +import pytest + +from types import SimpleNamespace + +from easydiffraction.datablocks.experiment.categories.background import line_segment +from easydiffraction.datablocks.experiment.categories.background.line_segment import LineSegmentBackground def test_line_segment_background_calculate_and_cif(): @@ -35,3 +41,110 @@ def test_line_segment_background_calculate_and_cif(): assert 'loop_' in cif assert '_pd_background.line_segment_X' in cif assert '_pd_background.line_segment_intensity' in cif + + +def _make_background(x, intensity_meas, intensity_calc=None, intensity_bkg=None): + n = len(x) + calc = np.zeros(n) if intensity_calc is None else intensity_calc + bkg = np.zeros(n) if intensity_bkg is None else intensity_bkg + data = SimpleNamespace( + x=np.asarray(x, dtype=float), + intensity_meas=np.asarray(intensity_meas, dtype=float), + intensity_calc=np.asarray(calc, dtype=float), + intensity_bkg=np.asarray(bkg, dtype=float), + ) + bkg_obj = LineSegmentBackground() + object.__setattr__(bkg_obj, '_parent', SimpleNamespace(data=data)) + return bkg_obj + + +def _synthetic(n=300, seed=0): + x = np.linspace(0.0, 10.0, n) + y = 5.0 + 0.3 * x + 8.0 * np.exp(-((x - 5.0) ** 2) / (2.0 * 0.2**2)) + rng = np.random.default_rng(seed) + return x, y + rng.normal(0.0, 0.05, size=n) + + +def _fake_log(records, key): + methods = { + 'info': lambda self, m, *a, **k: None, + 'warning': lambda self, m, *a, **k: None, + } + methods[key] = lambda self, m, *a, **k: records.append(str(m)) + return type('FakeLog', (), methods)() + + +def test_auto_estimate_creates_sequential_fixed_points(): + x, y = _synthetic(seed=1) + bkg = _make_background(x, y) + bkg.auto_estimate() + assert len(bkg) >= 2 + ids = [p.id.value for p in bkg._items] + assert ids == [str(i) for i in range(1, len(bkg) + 1)] + assert all(p.y.free is False for p in bkg._items) + + +def test_auto_estimate_overwrites_and_refixes(): + x, y = _synthetic(seed=2) + bkg = _make_background(x, y) + bkg.create(id='99', x=1.0, y=1.0) + bkg._items[0].y.free = True # user freed a hand-added point + bkg.auto_estimate() + ids = [p.id.value for p in bkg._items] + assert '99' not in ids + assert ids[0] == '1' + assert all(p.y.free is False for p in bkg._items) + + +def test_auto_estimate_replace_notice(monkeypatch): + records = [] + monkeypatch.setattr(line_segment, 'log', _fake_log(records, 'info')) + x, y = _synthetic(seed=3) + bkg = _make_background(x, y) + bkg.auto_estimate() # first call: nothing to replace + assert not [r for r in records if 'Replacing' in r] + records.clear() + bkg.auto_estimate() # second call: replace notice + assert any('Replacing' in r for r in records) + + +def test_auto_estimate_model_guided_path(): + x, y = _synthetic(seed=4) + calc = 8.0 * np.exp(-((x - 5.0) ** 2) / (2.0 * 0.2**2)) + 6.0 + bkg = _make_background(x, y, intensity_calc=calc, intensity_bkg=np.full_like(x, 6.0)) + bkg.auto_estimate(use_model=True) + assert len(bkg) >= 2 + assert all(p.y.free is False for p in bkg._items) + + +def test_auto_estimate_accepts_each_method(): + x, y = _synthetic(seed=5) + for method in ('auto', 'snip', 'arpls', 'fabc'): + bkg = _make_background(x, y) + bkg.auto_estimate(method=method) + assert len(bkg) >= 2 + + +def test_auto_estimate_rejects_invalid_method(): + x, y = _synthetic(seed=6) + bkg = _make_background(x, y) + with pytest.raises(ValueError, match='method'): + bkg.auto_estimate(method='nope') + + +def test_auto_estimate_validates_overrides(): + x, y = _synthetic(seed=7) + bkg = _make_background(x, y) + with pytest.raises(ValueError, match='width'): + bkg.auto_estimate(width=-1.0) + with pytest.raises(ValueError, match='n_points'): + bkg.auto_estimate(n_points=1) + + +def test_auto_estimate_empty_data_warns(monkeypatch): + records = [] + monkeypatch.setattr(line_segment, 'log', _fake_log(records, 'warning')) + bkg = _make_background(np.array([]), np.array([])) + bkg.auto_estimate() + assert len(bkg) == 0 + assert any('No active data' in r for r in records) From d72a0fd10cb60a305cfd2b4dfb46ce336447e633 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:35:58 -0700 Subject: [PATCH 17/33] Add CollectionBase.clear() invariant tests --- .../easydiffraction/core/test_category.py | 16 +++++++++ .../easydiffraction/core/test_collection.py | 36 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/tests/unit/easydiffraction/core/test_category.py b/tests/unit/easydiffraction/core/test_category.py index d7edb094e..6ae2fbd1c 100644 --- a/tests/unit/easydiffraction/core/test_category.py +++ b/tests/unit/easydiffraction/core/test_category.py @@ -251,3 +251,19 @@ def test_category_collection_help(capsys): assert 'Items (2)' in out assert 'n1' in out assert 'n2' in out + + +def test_category_collection_clear_marks_parent_dirty(): + from types import SimpleNamespace + + c = SimpleCollection() + c.create(a='n1') + c.create(a='n2') + assert len(c) == 2 + + parent = SimpleNamespace(_need_categories_update=False) + object.__setattr__(c, '_parent', parent) + + c.clear() + assert len(c) == 0 + assert parent._need_categories_update is True diff --git a/tests/unit/easydiffraction/core/test_collection.py b/tests/unit/easydiffraction/core/test_collection.py index 920b3a729..b0f546840 100644 --- a/tests/unit/easydiffraction/core/test_collection.py +++ b/tests/unit/easydiffraction/core/test_collection.py @@ -165,3 +165,39 @@ def as_cif(self) -> str: del c['beta'] assert 'beta' not in c assert len(c) == 1 + + +def test_collection_clear_empties_and_unlinks_children(): + from easydiffraction.core.collection import CollectionBase + from easydiffraction.core.identity import Identity + + class Item: + def __init__(self, name): + self._identity = Identity(owner=self, category_entry=lambda: name) + + class MyCollection(CollectionBase): + @property + def parameters(self): + return [] + + @property + def as_cif(self) -> str: + return '' + + c = MyCollection(item_type=Item) + a = Item('a') + b = Item('b') + c['a'] = a + c['b'] = b + assert len(c) == 2 + assert a._parent is c + + c.clear() + + # Empty, with the name index cleared so lookups fail. + assert len(c) == 0 + assert list(c.keys()) == [] + assert 'a' not in c + # Every prior child is unlinked from the collection. + assert a._parent is None + assert b._parent is None From 5c2294f3aecb60593b175681fe922619fe77af37 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:38:23 -0700 Subject: [PATCH 18/33] Add functional tutorial-corpus test for auto_estimate --- .../test_background_auto_estimate_corpus.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/functional/test_background_auto_estimate_corpus.py diff --git a/tests/functional/test_background_auto_estimate_corpus.py b/tests/functional/test_background_auto_estimate_corpus.py new file mode 100644 index 000000000..56fdd8e4c --- /dev/null +++ b/tests/functional/test_background_auto_estimate_corpus.py @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tutorial-corpus regression for automatic background estimation. + +Loads a representative constant-wavelength tutorial experiment (the HRPT +LBCO pattern from ed-2, whose hand-placed background is a flat ~170), runs +``auto_estimate()`` data-only, and asserts the recovered background tracks +the known one. Data-only: no calculation engine is run. +""" + +import easydiffraction as ed +import numpy as np + + +def test_auto_estimate_recovers_cwl_background(tmp_path): + project = ed.Project() + data_path = ed.download_data(id=3, destination=str(tmp_path)) + project.experiments.add_from_data_path( + name='hrpt', + data_path=data_path, + sample_form='powder', + beam_mode='constant wavelength', + radiation_probe='neutron', + ) + experiment = project.experiments['hrpt'] + # Mirror the tutorial's excluded edges so noisy ends do not skew anchors. + experiment.excluded_regions.create(id='1', start=0, end=5) + experiment.excluded_regions.create(id='2', start=165, end=180) + + experiment.background.auto_estimate() + + points = list(experiment.background) + heights = np.array([p.y.value for p in points]) + positions = np.array([p.x.value for p in points]) + + # A sensible, sparse set of points was produced from the real pattern. + assert 2 <= len(points) < 100 + # The hand-placed ground-truth background is flat at ~170; the recovered + # heights track it and never go negative. + assert np.all(heights >= 0) + assert 100.0 < float(np.median(heights)) < 250.0 + # Points span the active measured range. + assert positions.min() < 20.0 + assert positions.max() > 150.0 From 812757eb383be6ff68f4aaf64e3c59974d8d6948 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:39:28 -0700 Subject: [PATCH 19/33] Add TOF tutorial-corpus case for auto_estimate --- .../test_background_auto_estimate_corpus.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/functional/test_background_auto_estimate_corpus.py b/tests/functional/test_background_auto_estimate_corpus.py index 56fdd8e4c..7216b621d 100644 --- a/tests/functional/test_background_auto_estimate_corpus.py +++ b/tests/functional/test_background_auto_estimate_corpus.py @@ -42,3 +42,30 @@ def test_auto_estimate_recovers_cwl_background(tmp_path): # Points span the active measured range. assert positions.min() < 20.0 assert positions.max() > 150.0 + + +def test_auto_estimate_recovers_tof_background(tmp_path): + project = ed.Project() + data_path = ed.download_data(id=17, destination=str(tmp_path)) + project.experiments.add_from_data_path( + name='sim_si', + data_path=data_path, + sample_form='powder', + beam_mode='time-of-flight', + radiation_probe='neutron', + ) + experiment = project.experiments['sim_si'] + experiment.excluded_regions.create(id='1', start=0, end=55000) + experiment.excluded_regions.create(id='2', start=105500, end=200000) + + experiment.background.auto_estimate() + + points = list(experiment.background) + heights = np.array([p.y.value for p in points]) + # Sparse, non-negative anchors on the real TOF pattern (different beam + # mode and a curved/decaying regime), confirming the single arpls + # default holds across beam modes. + assert 2 <= len(points) < 100 + assert np.all(heights >= 0) + # The hand-placed TOF background is flat at ~0.01; recovered stays small. + assert float(np.median(heights)) < 5.0 From 77782a53fe51b382b69a15f3c4c4c0814bbec693 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 09:42:19 -0700 Subject: [PATCH 20/33] Apply ruff format to auto_estimate tests --- .../experiment/categories/background/test_line_segment.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py index 2937e8d02..671d9d107 100644 --- a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py +++ b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py @@ -7,7 +7,9 @@ from types import SimpleNamespace from easydiffraction.datablocks.experiment.categories.background import line_segment -from easydiffraction.datablocks.experiment.categories.background.line_segment import LineSegmentBackground +from easydiffraction.datablocks.experiment.categories.background.line_segment import ( + LineSegmentBackground, +) def test_line_segment_background_calculate_and_cif(): From 6fffe4766ce54c73cde35489f5c7cafa4517edad Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 10:02:33 -0700 Subject: [PATCH 21/33] Document automatic background estimation in the user guide --- .../user-guide/analysis-workflow/analysis.md | 21 +++++++++++++++ .../analysis-workflow/experiment.md | 27 +++++++++++++++++++ docs/docs/user-guide/parameters/background.md | 7 +++++ 3 files changed, 55 insertions(+) diff --git a/docs/docs/user-guide/analysis-workflow/analysis.md b/docs/docs/user-guide/analysis-workflow/analysis.md index c7035a985..3819765b7 100644 --- a/docs/docs/user-guide/analysis-workflow/analysis.md +++ b/docs/docs/user-guide/analysis-workflow/analysis.md @@ -271,6 +271,27 @@ To plot the measured and calculated data after the fit, you can use the project.display.pattern(expt_name='hrpt') ``` +### Re-estimating the Background + +If you seeded the background automatically (see the +[Background Category](experiment.md#background-category) section), you +can improve it once a fit has produced a model. Calling `auto_estimate` +again after a fit automatically uses the fitted peaks (the default +`use_model=True`) to place better points, especially across crowded +regions where peaks overlap: + +```python +# Re-estimate the background using the fitted model +project.experiments['hrpt'].background.auto_estimate() + +# Optionally free some of the new (fixed) points and fit again +project.experiments['hrpt'].background['1'].y.free = True +project.analysis.fit() +``` + +This estimate → refine → re-estimate loop is safe to repeat: each call +overwrites the previous points with a fresh, fixed background. + ## Bayesian Analysis Bayesian minimizers sample a posterior distribution rather than only diff --git a/docs/docs/user-guide/analysis-workflow/experiment.md b/docs/docs/user-guide/analysis-workflow/experiment.md index 043d4c58e..ab381d6ab 100644 --- a/docs/docs/user-guide/analysis-workflow/experiment.md +++ b/docs/docs/user-guide/analysis-workflow/experiment.md @@ -227,6 +227,33 @@ project.experiments['hrpt'].background.create(x=110, y=170) project.experiments['hrpt'].background.create(x=165, y=170) ``` +Instead of placing every point by hand, you can let EasyDiffraction +detect a sensible set of background points directly from the measured +pattern with the `auto_estimate` method. Called with no arguments, it +builds a peak-insensitive background curve, places points between the +peaks, and reads their heights from that curve so they do not eat into +peak intensities: + +```python +# Automatically estimate background points from the measured pattern +project.experiments['hrpt'].background.auto_estimate() +``` + +The generated points are ordinary, editable control points. They are +created **fixed** (not refined); you can review them, keep them, or free +any of them for refinement (see [Analysis](analysis.md)). Each call +**overwrites** the existing points, so you always start from a clean, +reproducible background. It works for both constant-wavelength and +time-of-flight data, neutron and X-ray. + +You can also guide the estimate with optional arguments, for example to +cap the number of points or choose a specific method: + +```python +# Estimate with at most 10 background points +project.experiments['hrpt'].background.auto_estimate(n_points=10) +``` + ### 5. Linked Phases Category { #linked-phases-category } ```python diff --git a/docs/docs/user-guide/parameters/background.md b/docs/docs/user-guide/parameters/background.md index e307c981b..45e890b0e 100644 --- a/docs/docs/user-guide/parameters/background.md +++ b/docs/docs/user-guide/parameters/background.md @@ -7,6 +7,13 @@ when calculating diffractograms. Please see the [IUCr page](https://www.iucr.org/resources/cif/dictionaries/browse/cif_pd) for further details. +!!! tip "Automatic background estimation" + + Line-segment background points can be detected automatically from the + measured pattern with + [`background.auto_estimate()`](../analysis-workflow/experiment.md#background-category), + instead of entering them by hand. + ## [\_pd_background.line_segment_X](https://www.iucr.org/resources/cif/dictionaries/browse/cif_pd) List of X-coordinates used to create many straight-line segments From 95521630065852283f68f64b04dfee9eaa30f693 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 10:15:34 -0700 Subject: [PATCH 22/33] Apply pixi run fix auto-fixes --- .../adrs/accepted/background-auto-estimate.md | 8 +- docs/dev/package-structure/full.md | 8 +- docs/dev/package-structure/short.md | 4 +- docs/dev/plans/background-auto-estimate.md | 86 ++++++++++--------- pyproject.toml | 50 +++++------ 5 files changed, 78 insertions(+), 78 deletions(-) diff --git a/docs/dev/adrs/accepted/background-auto-estimate.md b/docs/dev/adrs/accepted/background-auto-estimate.md index 2da058771..81f35abd5 100644 --- a/docs/dev/adrs/accepted/background-auto-estimate.md +++ b/docs/dev/adrs/accepted/background-auto-estimate.md @@ -343,10 +343,10 @@ anchors, and the method/width/noise/tolerance/backend-params metadata the adapter logs). The `beam_mode` argument from earlier drafts is deferred with the per-beam-mode policy (see _Deferred Work_); omitting it also keeps the helper within the project's argument-count guardrail. -`LineSegmentBackground.auto_estimate()` is a thin adapter: read -the pattern (and model, if present), call the helper, clip, and -`create()` the points. Helpers are extracted as needed to stay under the -lint complexity thresholds +`LineSegmentBackground.auto_estimate()` is a thin adapter: read the +pattern (and model, if present), call the helper, clip, and `create()` +the points. Helpers are extracted as needed to stay under the lint +complexity thresholds ([`lint-complexity-thresholds.md`](../accepted/lint-complexity-thresholds.md)) rather than raising them. diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index de4e69ac5..58991c95f 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -276,7 +276,10 @@ │ │ │ │ │ ├── 🏷️ class PolynomialTerm │ │ │ │ │ └── 🏷️ class ChebyshevPolynomialBackground │ │ │ │ ├── 📄 enums.py -│ │ │ │ │ └── 🏷️ class BackgroundTypeEnum +│ │ │ │ │ ├── 🏷️ class BackgroundTypeEnum +│ │ │ │ │ └── 🏷️ class BackgroundEstimatorMethodEnum +│ │ │ │ ├── 📄 estimate.py +│ │ │ │ │ └── 🏷️ class BackgroundEstimate │ │ │ │ ├── 📄 factory.py │ │ │ │ │ └── 🏷️ class BackgroundFactory │ │ │ │ └── 📄 line_segment.py @@ -611,8 +614,6 @@ │ │ │ │ └── 🏷️ class ProjectInfo │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class ProjectInfoFactory -│ │ ├── 📁 publication -│ │ ├── 📁 rendering │ │ ├── 📁 rendering_plot │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -673,7 +674,6 @@ │ │ ├── 📁 html │ │ │ └── 📁 vendor │ │ └── 📁 tex -│ │ └── 📁 styles │ ├── 📄 __init__.py │ ├── 📄 data_context.py │ │ └── 🏷️ class ReportDataContext diff --git a/docs/dev/package-structure/short.md b/docs/dev/package-structure/short.md index 61c705dd4..ec8c02af6 100644 --- a/docs/dev/package-structure/short.md +++ b/docs/dev/package-structure/short.md @@ -131,6 +131,7 @@ │ │ │ │ ├── 📄 base.py │ │ │ │ ├── 📄 chebyshev.py │ │ │ │ ├── 📄 enums.py +│ │ │ │ ├── 📄 estimate.py │ │ │ │ ├── 📄 factory.py │ │ │ │ └── 📄 line_segment.py │ │ │ ├── 📁 calculator @@ -289,8 +290,6 @@ │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py -│ │ ├── 📁 publication -│ │ ├── 📁 rendering │ │ ├── 📁 rendering_plot │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -330,7 +329,6 @@ │ │ ├── 📁 html │ │ │ └── 📁 vendor │ │ └── 📁 tex -│ │ └── 📁 styles │ ├── 📄 __init__.py │ ├── 📄 data_context.py │ ├── 📄 enums.py diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index f0937f758..6e4b3b7ac 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -17,21 +17,21 @@ autonomously. No other deliberate exception to `AGENTS.md` is taken. This plan owns the ADR at [`docs/dev/adrs/accepted/background-auto-estimate.md`](../adrs/accepted/background-auto-estimate.md) (Status: Accepted — promoted from `suggestions/` in step P1.0). Because -this change **implements** that ADR, -[`AGENTS.md`](../../../AGENTS.md) → **Change Discipline** requires the -**same change** to promote it to `accepted/` before the PR is opened — a -PR that implements an ADR must not leave it in `suggestions/`. Promotion -is therefore **in scope and mandatory**, handled by the first Phase 1 -step (**P1.0**): `git mv` the ADR into `accepted/`, set its -`**Status:**` to `Accepted`, flip its `docs/dev/adrs/index.md` row to -`Accepted` with the `accepted/…` link, and rewrite every link that -pointed at the old `suggestions/` path — in this plan and the ADR — -located with `git grep -n`. The ordering relative to `/draft-impl-1` is: -its **Phase A** runs **before** the checklist walk — committing the -reviewed plan and the ADR (still in `suggestions/`) and removing the -design-phase `_review-*` / `_reply-*` siblings — and then **P1.0**, the -first checklist step in Phase B, performs the promotion above and commits -the moved ADR, the `index.md` update, and the plan link rewrites. +this change **implements** that ADR, [`AGENTS.md`](../../../AGENTS.md) → +**Change Discipline** requires the **same change** to promote it to +`accepted/` before the PR is opened — a PR that implements an ADR must +not leave it in `suggestions/`. Promotion is therefore **in scope and +mandatory**, handled by the first Phase 1 step (**P1.0**): `git mv` the +ADR into `accepted/`, set its `**Status:**` to `Accepted`, flip its +`docs/dev/adrs/index.md` row to `Accepted` with the `accepted/…` link, +and rewrite every link that pointed at the old `suggestions/` path — in +this plan and the ADR — located with `git grep -n`. The ordering +relative to `/draft-impl-1` is: its **Phase A** runs **before** the +checklist walk — committing the reviewed plan and the ADR (still in +`suggestions/`) and removing the design-phase `_review-*` / `_reply-*` +siblings — and then **P1.0**, the first checklist step in Phase B, +performs the promotion above and commits the moved ADR, the `index.md` +update, and the plan link rewrites. ## Branch and PR @@ -101,7 +101,8 @@ the moved ADR, the `index.md` update, and the plan link rewrites. `snip`/`fabc` window factors `k`, `m`), and confirmation that the single `arpls` default holds across the tutorial corpus (CWL/TOF, neutron/X-ray). Only the constants are open; the parameter-to-backend - mapping itself is fixed in P1.3. Record anything surprising in the ADR. + mapping itself is fixed in P1.3. Record anything surprising in the + ADR. ## Concrete files likely to change @@ -118,9 +119,10 @@ the moved ADR, the `index.md` update, and the plan link rewrites. `BackgroundTypeEnum`. - `src/easydiffraction/datablocks/experiment/categories/background/estimate.py` — **new** pure-function estimator module (parameterization + Stage-1 - via `pybaselines` + Stage-2 thinning), returning a `BackgroundEstimate` - result object (curve, anchors, and the - method/width/noise/tolerance/backend-params metadata the adapter logs). + via `pybaselines` + Stage-2 thinning), returning a + `BackgroundEstimate` result object (curve, anchors, and the + method/width/noise/tolerance/backend-params metadata the adapter + logs). - `src/easydiffraction/core/collection.py` — reusable `clear()` on `CollectionBase` via `_adopt_items([])` (unlink children, empty `_items`, rebuild `_index`). Used by the overwrite contract. @@ -131,12 +133,13 @@ the moved ADR, the `index.md` update, and the plan link rewrites. — add `LineSegmentBackground.auto_estimate()` (the thin adapter). - `docs/dev/adrs/{suggestions → accepted}/background-auto-estimate.md` and `docs/dev/adrs/index.md` — the ADR is promoted out of - `suggestions/` in **P1.0** (`git mv`, `**Status:** Accepted`, index row - flipped to `accepted/…`, `suggestions/` links rewritten); its technical - content is otherwise unchanged here. `/draft-impl-1`'s Phase A (before - the checklist) commits the reviewed plan and the still-in-`suggestions/` - ADR and removes the design siblings; the P1.0 step then commits the - promotion (moved ADR, index update, plan link rewrites). + `suggestions/` in **P1.0** (`git mv`, `**Status:** Accepted`, index + row flipped to `accepted/…`, `suggestions/` links rewritten); its + technical content is otherwise unchanged here. `/draft-impl-1`'s Phase + A (before the checklist) commits the reviewed plan and the + still-in-`suggestions/` ADR and removes the design siblings; the P1.0 + step then commits the promotion (moved ADR, index update, plan link + rewrites). - Phase 2 (tests): `tests/unit/easydiffraction/datablocks/experiment/categories/background/test_estimate.py` (**new**), `…/test_line_segment.py` (update for `auto_estimate`), unit @@ -153,9 +156,9 @@ step's `Commit:` message **before** moving to the next step or the Phase 1 is **code + docs only — no tests** (those are Phase 2). - [x] **P1.0 — Promote the ADR to `accepted/`.** Per - [`AGENTS.md`](../../../AGENTS.md) → **Change Discipline**, a change - that implements an ADR must move it out of `suggestions/` in the - same change. `git mv` + [`AGENTS.md`](../../../AGENTS.md) → **Change Discipline**, a + change that implements an ADR must move it out of `suggestions/` + in the same change. `git mv` `docs/dev/adrs/suggestions/background-auto-estimate.md` → `docs/dev/adrs/accepted/background-auto-estimate.md`, set its `**Status:**` line to `Accepted`, flip the matching @@ -186,21 +189,20 @@ step's `Commit:` message **before** moving to the next step or the Phase - [x] **P1.3 — Add the background curve estimator helper.** Create the new module `estimate.py` with a pure `estimate_background_curve(x, y, *, method='arpls', peaks=None, width=None, smoothness=None, n_points=None) -> BackgroundEstimate`. - (The ADR §6 sketch also lists `beam_mode`; it is **omitted from the - Phase 1 helper** — unused until the deferred per-beam-mode policy, - and keeping it would push the signature past the project's + (The ADR §6 sketch also lists `beam_mode`; it is **omitted from + the Phase 1 helper** — unused until the deferred per-beam-mode + policy, and keeping it would push the signature past the project's `PLR0913` 7-argument limit, which this plan honors rather than - bypasses.) - `method` is the **resolved** Stage-1 algorithm (`snip` / `arpls` / - `fabc` — never `auto`) and selects the `pybaselines` routine, so - **all backend dispatch lives in the helper**, not the adapter. - Derive `W` (find_peaks → peak_widths, ~75th percentile) and noise - σ (MAD of the second difference) when not supplied; compute the - Stage-1 `B(x)` via the selected `pybaselines` routine; thin `B(x)` - to anchors by RDP with tolerance `c · σ` (endpoints kept, optional - `n_points` cap). Array-in/array-out, no model state, no domain - imports. Extract helpers to stay under the lint complexity - thresholds. + bypasses.) `method` is the **resolved** Stage-1 algorithm (`snip` + / `arpls` / `fabc` — never `auto`) and selects the `pybaselines` + routine, so **all backend dispatch lives in the helper**, not the + adapter. Derive `W` (find_peaks → peak_widths, ~75th percentile) + and noise σ (MAD of the second difference) when not supplied; + compute the Stage-1 `B(x)` via the selected `pybaselines` routine; + thin `B(x)` to anchors by RDP with tolerance `c · σ` (endpoints + kept, optional `n_points` cap). Array-in/array-out, no model + state, no domain imports. Extract helpers to stay under the lint + complexity thresholds. **Return value.** Return a small frozen result object `BackgroundEstimate` (a `dataclass` or `NamedTuple` local to diff --git a/pyproject.toml b/pyproject.toml index a91cad3b3..89e23d7bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,31 +23,31 @@ classifiers = [ ] requires-python = '>=3.12' dependencies = [ - 'numpy', # Numerical computing library - 'asciichartpy', # ASCII charts for terminal output - 'pooch', # Data downloader - 'typer', # Command-line interface creation - 'rich', # Rich text and beautiful formatting in the terminal - 'varname', # Variable name introspection - 'asteval', # An expression evaluator for Python - 'scipy', # Scientific computing library - 'sympy', # Symbolic mathematics library - 'lmfit', # Non-linear optimization and curve fitting - 'bumps', # Non-linear optimization and curve fitting - 'emcee', # Affine-invariant MCMC sampler - 'dfo-ls', # Non-linear optimization and curve fitting - 'gemmi', # Crystallography library - 'cryspy', # Calculations of diffraction patterns - 'crysfml', # Calculations of diffraction patterns - 'diffpy.pdffit2', # Calculations of Pair Distribution Function (PDF) - 'diffpy.utils', # Utilities for PDF calculations - 'uncertainties', # Propagation of uncertainties - 'h5py', # HDF5 file handling - 'typeguard', # Runtime type checking - 'darkdetect', # Detecting dark mode (system-level) - 'pandas', # Displaying tables in Jupyter notebooks - 'plotly', # Interactive plots - 'pillow', # Rendering structure figures (labels, legend) for reports + 'numpy', # Numerical computing library + 'asciichartpy', # ASCII charts for terminal output + 'pooch', # Data downloader + 'typer', # Command-line interface creation + 'rich', # Rich text and beautiful formatting in the terminal + 'varname', # Variable name introspection + 'asteval', # An expression evaluator for Python + 'scipy', # Scientific computing library + 'sympy', # Symbolic mathematics library + 'lmfit', # Non-linear optimization and curve fitting + 'bumps', # Non-linear optimization and curve fitting + 'emcee', # Affine-invariant MCMC sampler + 'dfo-ls', # Non-linear optimization and curve fitting + 'gemmi', # Crystallography library + 'cryspy', # Calculations of diffraction patterns + 'crysfml', # Calculations of diffraction patterns + 'diffpy.pdffit2', # Calculations of Pair Distribution Function (PDF) + 'diffpy.utils', # Utilities for PDF calculations + 'uncertainties', # Propagation of uncertainties + 'h5py', # HDF5 file handling + 'typeguard', # Runtime type checking + 'darkdetect', # Detecting dark mode (system-level) + 'pandas', # Displaying tables in Jupyter notebooks + 'plotly', # Interactive plots + 'pillow', # Rendering structure figures (labels, legend) for reports 'pybaselines>=1.1', # Background curve estimation backend (SNIP, arPLS, fabc) ] From 5862327a78f5c03ff6903020d42dd42c13b25cf1 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 10:30:08 -0700 Subject: [PATCH 23/33] Compare auto_estimate against tutorial background curve --- .../test_background_auto_estimate_corpus.py | 131 ++++++++++++------ 1 file changed, 85 insertions(+), 46 deletions(-) diff --git a/tests/functional/test_background_auto_estimate_corpus.py b/tests/functional/test_background_auto_estimate_corpus.py index 7216b621d..eaa07ce65 100644 --- a/tests/functional/test_background_auto_estimate_corpus.py +++ b/tests/functional/test_background_auto_estimate_corpus.py @@ -2,70 +2,109 @@ # SPDX-License-Identifier: BSD-3-Clause """Tutorial-corpus regression for automatic background estimation. -Loads a representative constant-wavelength tutorial experiment (the HRPT -LBCO pattern from ed-2, whose hand-placed background is a flat ~170), runs -``auto_estimate()`` data-only, and asserts the recovered background tracks -the known one. Data-only: no calculation engine is run. +Loads representative tutorial experiments (constant-wavelength HRPT/LBCO +from ed-2 and time-of-flight Si from ed-13), records each tutorial's +hand-placed background as a reference curve, then strips it, runs +``auto_estimate()``, and asserts the estimated line-segment background +tracks the reference to within a small fraction of the *signal* scale +over the active data. Data-only: no calculation engine is run. + +These two data ids represent the CWL and TOF regimes through the same +adapter and estimator code path. The other tutorials the plan lists are +substituted deliberately: ed-17 ships its data as a zip scan directory +and ed-16 defines its background in a loop, both of which complicate a +clean data-only load without adding coverage of a new code path. Sloping +and curved backgrounds are covered against *exact* analytic ground truth +by the unit tests in +``tests/unit/.../categories/background/test_estimate.py``. """ import easydiffraction as ed import numpy as np +# The estimated background may differ from the coarse hand-placed +# reference by at most these fractions of the measured signal scale +# (the 5-95 percentile range of the measured intensities). They are loose +# enough for a hand-placed reference yet tight enough that a wrong-level +# or garbage estimate fails. +_MEDIAN_TOL = 0.15 +_MAX_TOL = 0.45 + -def test_auto_estimate_recovers_cwl_background(tmp_path): +def _assert_tracks_reference(tmp_path, name, data_id, beam_mode, probe, excluded, ref_points): project = ed.Project() - data_path = ed.download_data(id=3, destination=str(tmp_path)) + data_path = ed.download_data(id=data_id, destination=str(tmp_path)) project.experiments.add_from_data_path( - name='hrpt', + name=name, data_path=data_path, sample_form='powder', - beam_mode='constant wavelength', - radiation_probe='neutron', + beam_mode=beam_mode, + radiation_probe=probe, ) - experiment = project.experiments['hrpt'] - # Mirror the tutorial's excluded edges so noisy ends do not skew anchors. - experiment.excluded_regions.create(id='1', start=0, end=5) - experiment.excluded_regions.create(id='2', start=165, end=180) + experiment = project.experiments[name] + for start, end in excluded: + experiment.excluded_regions.create(start=start, end=end) - experiment.background.auto_estimate() + data = experiment.background._parent.data + x = np.asarray(data.x, dtype=float) + measured = np.asarray(data.intensity_meas, dtype=float) + signal_scale = float(np.percentile(measured, 95) - np.percentile(measured, 5)) + + # The tutorial's hand-placed background is the reference curve. + for px, py in ref_points: + experiment.background.create(x=px, y=py) + ref_x = np.array([p.x.value for p in experiment.background]) + ref_y = np.array([p.y.value for p in experiment.background]) + reference = np.interp(x, ref_x, ref_y) + # Strip the reference and estimate the background automatically. + experiment.background.auto_estimate() points = list(experiment.background) - heights = np.array([p.y.value for p in points]) - positions = np.array([p.x.value for p in points]) + est_x = np.array([p.x.value for p in points]) + est_y = np.array([p.y.value for p in points]) + estimate = np.interp(x, est_x, est_y) - # A sensible, sparse set of points was produced from the real pattern. + span = x.max() - x.min() + # A sparse, non-negative set of anchors spanning the active range. assert 2 <= len(points) < 100 - # The hand-placed ground-truth background is flat at ~170; the recovered - # heights track it and never go negative. - assert np.all(heights >= 0) - assert 100.0 < float(np.median(heights)) < 250.0 - # Points span the active measured range. - assert positions.min() < 20.0 - assert positions.max() > 150.0 + assert np.all(est_y >= 0) + assert est_x.min() <= x.min() + 0.05 * span + assert est_x.max() >= x.max() - 0.05 * span + # The estimate tracks the hand-placed reference within a small fraction + # of the signal scale (a wrong-level or garbage estimate would not). + assert np.median(np.abs(estimate - reference)) < _MEDIAN_TOL * signal_scale + assert np.max(np.abs(estimate - reference)) < _MAX_TOL * signal_scale -def test_auto_estimate_recovers_tof_background(tmp_path): - project = ed.Project() - data_path = ed.download_data(id=17, destination=str(tmp_path)) - project.experiments.add_from_data_path( - name='sim_si', - data_path=data_path, - sample_form='powder', - beam_mode='time-of-flight', - radiation_probe='neutron', +def test_auto_estimate_tracks_cwl_tutorial_background(tmp_path): + # ed-2: constant-wavelength neutron HRPT/LBCO, flat background ~170. + _assert_tracks_reference( + tmp_path, + 'hrpt', + 3, + 'constant wavelength', + 'neutron', + [(0, 5), (165, 180)], + [(10, 170), (30, 170), (50, 170), (110, 170), (165, 170)], ) - experiment = project.experiments['sim_si'] - experiment.excluded_regions.create(id='1', start=0, end=55000) - experiment.excluded_regions.create(id='2', start=105500, end=200000) - experiment.background.auto_estimate() - points = list(experiment.background) - heights = np.array([p.y.value for p in points]) - # Sparse, non-negative anchors on the real TOF pattern (different beam - # mode and a curved/decaying regime), confirming the single arpls - # default holds across beam modes. - assert 2 <= len(points) < 100 - assert np.all(heights >= 0) - # The hand-placed TOF background is flat at ~0.01; recovered stays small. - assert float(np.median(heights)) < 5.0 +def test_auto_estimate_tracks_tof_tutorial_background(tmp_path): + # ed-13: time-of-flight neutron Si, flat background ~0.01. + _assert_tracks_reference( + tmp_path, + 'sim_si', + 17, + 'time-of-flight', + 'neutron', + [(0, 55000), (105500, 200000)], + [ + (50000, 0.01), + (60000, 0.01), + (70000, 0.01), + (80000, 0.01), + (90000, 0.01), + (100000, 0.01), + (110000, 0.01), + ], + ) From 64fa746038a74d6dee84349553e6531cb02cb5b0 Mon Sep 17 00:00:00 2001 From: andrewsazonov Date: Thu, 4 Jun 2026 10:32:20 -0700 Subject: [PATCH 24/33] Assert adapter dispatch and clipping in auto_estimate tests --- .../background/test_line_segment.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py index 671d9d107..4d11a49d0 100644 --- a/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py +++ b/tests/unit/easydiffraction/datablocks/experiment/categories/background/test_line_segment.py @@ -150,3 +150,76 @@ def test_auto_estimate_empty_data_warns(monkeypatch): bkg.auto_estimate() assert len(bkg) == 0 assert any('No active data' in r for r in records) + + +def _patch_helper(monkeypatch, captured, anchors=None): + """Replace the estimator helper with a fake that records its inputs.""" + + def fake(x, y, *, method, peaks, width, smoothness, n_points): + captured.update( + x=np.asarray(x), + y=np.asarray(y), + method=method, + peaks=(None if peaks is None else np.asarray(peaks)), + width=width, + n_points=n_points, + ) + rows = anchors if anchors is not None else np.array([[x[0], 1.0], [x[-1], 1.0]]) + return SimpleNamespace(anchors=rows, width=5.0) + + monkeypatch.setattr(line_segment, 'estimate', SimpleNamespace(estimate_background_curve=fake)) + + +def test_auto_estimate_forwards_resolved_method(monkeypatch): + captured = {} + _patch_helper(monkeypatch, captured) + x, y = _synthetic(seed=20) + for requested, expected in ( + ('auto', 'arpls'), + ('snip', 'snip'), + ('arpls', 'arpls'), + ('fabc', 'fabc'), + ): + _make_background(x, y).auto_estimate(method=requested) + assert captured['method'] == expected + + +def test_auto_estimate_model_guided_passes_peak_subtracted_inputs(monkeypatch): + captured = {} + _patch_helper(monkeypatch, captured) + x = np.linspace(0.0, 10.0, 200) + peak = 40.0 * np.exp(-((x - 5.0) ** 2) / (2.0 * 0.2**2)) + bkg = np.full_like(x, 90.0) + meas = bkg + peak + 3.0 + calc = bkg + peak # populated model -> model-guided path + obj = _make_background(x, meas, intensity_calc=calc, intensity_bkg=bkg) + obj.auto_estimate(use_model=True) + # Helper receives the peak-subtracted measured intensities, not the raw data. + assert np.allclose(captured['y'], meas - (calc - bkg)) + # ...and a non-empty forbidden mask built from the model peak. + assert captured['peaks'] is not None + assert captured['peaks'].any() + + +def test_auto_estimate_data_only_passes_raw_inputs(monkeypatch): + captured = {} + _patch_helper(monkeypatch, captured) + x, meas = _synthetic(seed=21) + calc = meas.copy() # even with a populated model present... + obj = _make_background(x, meas, intensity_calc=calc) + obj.auto_estimate(use_model=False) # ...use_model=False forces the data-only path + assert np.allclose(captured['y'], meas) + assert captured['peaks'] is None + + +def test_auto_estimate_clips_heights_to_measured(monkeypatch): + captured = {} + x = np.linspace(0.0, 10.0, 101) + meas = np.full_like(x, 50.0) + anchors = np.array([[x[0], 80.0], [x[50], -10.0], [x[-1], 30.0]]) + _patch_helper(monkeypatch, captured, anchors=anchors) + obj = _make_background(x, meas) + obj.auto_estimate() + heights = [p.y.value for p in obj._items] + # Absolute anchor heights clipped to [0, measured(=50)] -- no residual add-back. + assert heights == [50.0, 0.0, 30.0] From d5bc4167621270fc288e2bc1635ca100643be752 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:11:26 -0700 Subject: [PATCH 25/33] Simplify background estimation with auto_estimate method --- docs/docs/tutorials/ed-2.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/docs/docs/tutorials/ed-2.py b/docs/docs/tutorials/ed-2.py index e2207a8b2..a67e45135 100644 --- a/docs/docs/tutorials/ed-2.py +++ b/docs/docs/tutorials/ed-2.py @@ -117,11 +117,7 @@ experiment.peak.broad_lorentz_y = 0.1 # %% -experiment.background.create(id='1', x=10, y=170) -experiment.background.create(id='2', x=30, y=170) -experiment.background.create(id='3', x=50, y=170) -experiment.background.create(id='4', x=110, y=170) -experiment.background.create(id='5', x=165, y=170) +experiment.background.auto_estimate() # %% experiment.excluded_regions.create(id='1', start=0, end=5) @@ -152,11 +148,8 @@ experiment.peak.broad_gauss_w.free = True experiment.peak.broad_lorentz_y.free = True -experiment.background['1'].y.free = True -experiment.background['2'].y.free = True -experiment.background['3'].y.free = True -experiment.background['4'].y.free = True -experiment.background['5'].y.free = True +for point in experiment.background: + point.y.free = True experiment.linked_phases['lbco'].scale.free = True From 656f4fab56881e4e83b992914605770e3099110c Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:17:01 -0700 Subject: [PATCH 26/33] Record narrower Phase 2 calibration outcome in docs --- .../adrs/accepted/background-auto-estimate.md | 50 ++++++++++--------- docs/dev/plans/background-auto-estimate.md | 40 ++++++++------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/docs/dev/adrs/accepted/background-auto-estimate.md b/docs/dev/adrs/accepted/background-auto-estimate.md index 81f35abd5..5315a95c5 100644 --- a/docs/dev/adrs/accepted/background-auto-estimate.md +++ b/docs/dev/adrs/accepted/background-auto-estimate.md @@ -359,15 +359,17 @@ Work_ — to avoid an abstraction before its second concrete use. The four design questions raised in review are resolved: noise-relative Stage-2 thinning (§3), always-overwrite with a replace notice (§5), a single Stage-1 method for now (§3), and a void method that logs a -one-line summary (§1). What remains is empirical calibration, done -against the tutorial corpus during implementation: - -- The exact Stage-2 tolerance multiplier (`c · σ`, proposed `c ≈ 2`) and - the width percentile (proposed ~75th) need tuning against real - datasets. -- Whether the single Stage-1 method holds across the whole corpus - (CWL/TOF, neutron/X-ray) or a `beam_mode`/`radiation_probe` policy is - eventually needed (see §Deferred Work). +one-line summary (§1). Empirical calibration was carried out in Phase 2: + +- The Stage-2 tolerance multiplier (`c · σ`, `c = 2`) and the width + percentile (~75th) are first-cut constants; they were validated — not + exhaustively swept — against the representative CWL (`ed-2`) and TOF + (`ed-13`) datasets plus the analytic unit cases, and produce sensible + backgrounds there. Re-tuning stays possible if a future dataset needs + it. +- The single Stage-1 method (`arpls`) holds for both validated beam + modes; no `beam_mode`/`radiation_probe` policy was required (it stays + in §Deferred Work should a future corpus show otherwise). ## Consequences @@ -471,22 +473,22 @@ helper: the single fallback warning rather than an exception or a garbage background. -**Tutorial corpus as real-world reference.** The ~25 tutorial scripts in -`docs/docs/tutorials/*.py` already build real experiments with -well-defined backgrounds across both beam modes and both probes — CWL -(e.g. the sloping background in -[`ed-17.py`](../../../../docs/docs/tutorials/ed-17.py) and -[`ed-2.py`](../../../../docs/docs/tutorials/ed-2.py)) and TOF (e.g. -[`ed-13.py`](../../../../docs/docs/tutorials/ed-13.py), -[`ed-16.py`](../../../../docs/docs/tutorials/ed-16.py)). Their -hand-placed line-segment points are ground truth: stripping them and +**Tutorial corpus as real-world reference.** The tutorial scripts in +`docs/docs/tutorials/*.py` build real experiments with well-defined +backgrounds across both beam modes and both probes. Their hand-placed +line-segment points are a real-world reference: stripping them and re-running `auto_estimate()` should reproduce a comparable background -curve within tolerance. This gives broad, real coverage across space -groups, beam modes, and probes at almost no authoring cost, and is the -reference set used to calibrate the default constants and confirm the -single Stage-1 method. These corpus checks run at the functional / -script level where the tutorial experiments are already loaded, not at -unit level. +curve. **Phase 2 outcome:** the functional regression validates two +representative datasets — CWL +[`ed-2.py`](../../../../docs/docs/tutorials/ed-2.py) and TOF +[`ed-13.py`](../../../../docs/docs/tutorials/ed-13.py) — comparing the +estimated curve against the hand-placed reference to within a fraction +of the measured signal scale; the single `arpls` default and the +first-cut constants hold for both. Sloping and curved backgrounds are +covered against exact analytic ground truth by the unit tests, not the +corpus. A broader per-tutorial sweep (e.g. `ed-17`, `ed-16`) was not +needed and stays available if a future dataset misbehaves. These checks +run at the functional / unit level. The estimator module mirrors into `tests/unit/easydiffraction/datablocks/experiment/categories/background/` diff --git a/docs/dev/plans/background-auto-estimate.md b/docs/dev/plans/background-auto-estimate.md index 6e4b3b7ac..511e65289 100644 --- a/docs/dev/plans/background-auto-estimate.md +++ b/docs/dev/plans/background-auto-estimate.md @@ -94,15 +94,16 @@ update, and the plan link rewrites. ## Open questions -- **Empirical calibration (resolved during Phase 2, not blocking).** The - Stage-2 tolerance multiplier (`c · σ`, proposed `c ≈ 2`), the width - percentile (proposed ~75th), the numeric constants in the backend - dispatch contract (P1.3 — the `arpls`/`fabc` `lam` scaling and the - `snip`/`fabc` window factors `k`, `m`), and confirmation that the - single `arpls` default holds across the tutorial corpus (CWL/TOF, - neutron/X-ray). Only the constants are open; the parameter-to-backend - mapping itself is fixed in P1.3. Record anything surprising in the - ADR. +- **Empirical calibration (carried out in Phase 2).** The Stage-2 + tolerance multiplier (`c · σ`, `c = 2`), the width percentile (~75th), + and the backend dispatch constants (P1.3 — the `arpls`/`fabc` `lam` + scaling and the `snip`/`fabc` window factors `k`, `m`) are first-cut + values, validated against the representative CWL (`ed-2`) and TOF + (`ed-13`) datasets and the analytic unit cases rather than + exhaustively swept across all tutorials. The single `arpls` default + holds for both validated beam modes. The parameter-to-backend mapping + is fixed in P1.3; only the constants stay tunable if a future dataset + needs it. ## Concrete files likely to change @@ -329,15 +330,18 @@ Tests to add/update (unit tests mirror the source tree per `CategoryCollection` marks its parent dirty — tested directly, not only via `auto_estimate()`. - **Functional tutorial-corpus comparison** in `tests/functional/` - (data-only, no engine; run by `pixi run functional-tests`): load - representative tutorial experiments — CWL - [`ed-2.py`](../../docs/tutorials/ed-2.py), - [`ed-17.py`](../../docs/tutorials/ed-17.py); TOF - [`ed-13.py`](../../docs/tutorials/ed-13.py), - [`ed-16.py`](../../docs/tutorials/ed-16.py) — strip their hand-placed - points, run `auto_estimate()`, and assert the recovered curve matches - the original within tolerance. Use this to calibrate `c` and the width - percentile and confirm the single `arpls` default. + (data-only, no engine; run by `pixi run functional-tests`): for each + case, record the hand-placed background, strip it, run + `auto_estimate()`, interpolate the generated curve over the active + data, and assert it tracks the reference to within a fraction of the + measured signal scale. As implemented this covers CWL + [`ed-2.py`](../../docs/tutorials/ed-2.py) and TOF + [`ed-13.py`](../../docs/tutorials/ed-13.py); `ed-17` (zip scan + directory) and `ed-16` (loop-defined background) are substituted with + the justification noted in the test, and sloping/curved backgrounds + are covered with analytic ground truth by the unit tests. Confirms the + single `arpls` default and the first-cut `c` / width-percentile + constants. - Verify the test-structure mirror with `pixi run test-structure-check`. Verification commands (zsh-safe log capture where output is needed): From 79ddb5dacb10f61aae60bf98fd6b4a91c332fc64 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:19:17 -0700 Subject: [PATCH 27/33] Simplify background setting by using auto_estimate method --- docs/docs/tutorials/ed-6.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/docs/tutorials/ed-6.py b/docs/docs/tutorials/ed-6.py index 50a3ab363..995ff0a50 100644 --- a/docs/docs/tutorials/ed-6.py +++ b/docs/docs/tutorials/ed-6.py @@ -125,15 +125,7 @@ # ### Set Background # %% -expt.background.create(id='1', x=4.4196, y=500) -expt.background.create(id='2', x=6.6207, y=500) -expt.background.create(id='3', x=10.4918, y=500) -expt.background.create(id='4', x=15.4634, y=500) -expt.background.create(id='5', x=45.6041, y=500) -expt.background.create(id='6', x=74.6844, y=500) -expt.background.create(id='7', x=103.4187, y=500) -expt.background.create(id='8', x=121.6311, y=500) -expt.background.create(id='9', x=159.4116, y=500) +expt.background.auto_estimate() # %% [markdown] # ### Set Linked Phases From bfa5820a364b3a32f2c82c439e14cb1b71c67b32 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:19:27 -0700 Subject: [PATCH 28/33] Document the empty-data exception to the overwrite contract --- .../adrs/accepted/background-auto-estimate.md | 31 +++++++++++-------- .../analysis-workflow/experiment.md | 7 +++-- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/docs/dev/adrs/accepted/background-auto-estimate.md b/docs/dev/adrs/accepted/background-auto-estimate.md index 5315a95c5..a363c46f2 100644 --- a/docs/dev/adrs/accepted/background-auto-estimate.md +++ b/docs/dev/adrs/accepted/background-auto-estimate.md @@ -299,19 +299,24 @@ The intended usage is a loop, and the API supports it directly: background and clip heights to the original measured intensities (§2). -**Every call overwrites and re-fixes.** `auto_estimate()` always clears -the collection and rebuilds it — there is no append mode — and the -rebuilt points are **fixed** (`free=False`) regardless of whether the -previous points had been freed during refinement. A second call is -therefore a fresh fixed seed, not a merge: calling it again overwrites -the points and re-fixes them even if they were free. This keeps the loop -predictable (each pass starts from a clean, fixed background) and -idempotent (same inputs → same points). Clearing everything — including -any hand-added points — is the deliberate "overwrite" contract; -preserving manual points is deferred. When the collection is non-empty, -the call logs a one-line notice that it is replacing the existing -points, so a user who hand-tuned a background is not surprised; the -first call, with nothing to replace, is silent. +**Every call overwrites and re-fixes.** Whenever it produces an +estimate, `auto_estimate()` clears the collection and rebuilds it — +there is no append mode — and the rebuilt points are **fixed** +(`free=False`) regardless of whether the previous points had been freed +during refinement. A second call is therefore a fresh fixed seed, not a +merge: calling it again overwrites the points and re-fixes them even if +they were free. This keeps the loop predictable (each pass starts from a +clean, fixed background) and idempotent (same inputs → same points). +Clearing everything — including any hand-added points — is the +deliberate "overwrite" contract; preserving manual points is deferred. +When the collection is non-empty, the call logs a one-line notice that +it is replacing the existing points, so a user who hand-tuned a +background is not surprised; the first call, with nothing to replace, is +silent. The one exception is degenerate input: when no active data +remain (every point excluded, or data not yet loaded), the call emits a +single warning and returns **without touching the existing points**, so +an accidental call on an unloaded experiment does not wipe a hand-tuned +background. **Always fixed; no `free` argument.** Generated points are always created fixed (`intensity.free = False`) — there is no caller-selectable diff --git a/docs/docs/user-guide/analysis-workflow/experiment.md b/docs/docs/user-guide/analysis-workflow/experiment.md index ab381d6ab..61dddd0d9 100644 --- a/docs/docs/user-guide/analysis-workflow/experiment.md +++ b/docs/docs/user-guide/analysis-workflow/experiment.md @@ -242,8 +242,11 @@ project.experiments['hrpt'].background.auto_estimate() The generated points are ordinary, editable control points. They are created **fixed** (not refined); you can review them, keep them, or free any of them for refinement (see [Analysis](analysis.md)). Each call -**overwrites** the existing points, so you always start from a clean, -reproducible background. It works for both constant-wavelength and +**overwrites** the existing points (when there are active data to +estimate from), so you always start from a clean, reproducible +background; if no active data remain — for example every point is +excluded, or data are not yet loaded — it warns and leaves your existing +points unchanged. It works for both constant-wavelength and time-of-flight data, neutron and X-ray. You can also guide the estimate with optional arguments, for example to From 9bbd5981520c392ab35e805517281252ee8ca8e1 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:43:08 -0700 Subject: [PATCH 29/33] Simplify background estimation with auto_estimate method --- docs/docs/tutorials/ed-20.py | 44 ++++-------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/docs/docs/tutorials/ed-20.py b/docs/docs/tutorials/ed-20.py index bc5b1e644..ef8e77eb0 100644 --- a/docs/docs/tutorials/ed-20.py +++ b/docs/docs/tutorials/ed-20.py @@ -146,46 +146,10 @@ expt_s2.background.show_supported() # %% -# expt_s2.background.type = 'line-segment' - -# %% -for idx, (x, y) in enumerate( - [ - (40111.8789, 0.0170), - (41193.5664, 0.1484), - (42041.3750, 0.1848), - (42713.7734, 0.1975), - (44409.3945, 0.1891), - (45198.7344, 0.2147), - (46251.1875, 0.1887), - (49350.0742, 0.2194), - (51289.6836, 0.1991), - (55245.1992, 0.1981), - (55679.7070, 0.2276), - (56383.9102, 0.2439), - (58956.1797, 0.2907), - (61536.4570, 0.3067), - (63768.0469, 0.3242), - (65581.2109, 0.2973), - (70183.8516, 0.2575), - (71787.8203, 0.2321), - (78343.1094, 0.2158), - (80016.8047, 0.1694), - (98141.8516, 0.2400), - (99262.2344, 0.4335), - (100985.8516, 0.4375), - (101933.8516, 0.3427), - (108656.0312, 0.5339), - (110896.7500, 0.9537), - (113137.4844, 1.1668), - (114430.2031, 1.1164), - (116929.4844, 0.9161), - (119428.7422, 0.6885), - (134506.3438, 0.0692), - ], - start=1, -): - expt_s2.background.create(id=str(idx), x=x, y=y) +expt_s2.background.auto_estimate() + +# %% +expt_s2.background.show() # %% for point in expt_s2.background: From d308082a65b93d9124e6ad5ffc2e077f97f754d0 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:45:47 -0700 Subject: [PATCH 30/33] Estimate background after excluding regions in ed-2 tutorial --- docs/docs/tutorials/ed-2.ipynb | 17 +++++------------ docs/docs/tutorials/ed-2.py | 6 +++--- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/docs/docs/tutorials/ed-2.ipynb b/docs/docs/tutorials/ed-2.ipynb index 60ba1b82c..fe6db46f2 100644 --- a/docs/docs/tutorials/ed-2.ipynb +++ b/docs/docs/tutorials/ed-2.ipynb @@ -260,11 +260,8 @@ "metadata": {}, "outputs": [], "source": [ - "experiment.background.create(id='1', x=10, y=170)\n", - "experiment.background.create(id='2', x=30, y=170)\n", - "experiment.background.create(id='3', x=50, y=170)\n", - "experiment.background.create(id='4', x=110, y=170)\n", - "experiment.background.create(id='5', x=165, y=170)" + "experiment.excluded_regions.create(id='1', start=0, end=5)\n", + "experiment.excluded_regions.create(id='2', start=165, end=180)" ] }, { @@ -274,8 +271,7 @@ "metadata": {}, "outputs": [], "source": [ - "experiment.excluded_regions.create(id='1', start=0, end=5)\n", - "experiment.excluded_regions.create(id='2', start=165, end=180)" + "experiment.background.auto_estimate()" ] }, { @@ -333,11 +329,8 @@ "experiment.peak.broad_gauss_w.free = True\n", "experiment.peak.broad_lorentz_y.free = True\n", "\n", - "experiment.background['1'].y.free = True\n", - "experiment.background['2'].y.free = True\n", - "experiment.background['3'].y.free = True\n", - "experiment.background['4'].y.free = True\n", - "experiment.background['5'].y.free = True\n", + "for point in experiment.background:\n", + " point.y.free = True\n", "\n", "experiment.linked_phases['lbco'].scale.free = True" ] diff --git a/docs/docs/tutorials/ed-2.py b/docs/docs/tutorials/ed-2.py index a67e45135..67febc7f3 100644 --- a/docs/docs/tutorials/ed-2.py +++ b/docs/docs/tutorials/ed-2.py @@ -116,13 +116,13 @@ experiment.peak.broad_gauss_w = 0.1 experiment.peak.broad_lorentz_y = 0.1 -# %% -experiment.background.auto_estimate() - # %% experiment.excluded_regions.create(id='1', start=0, end=5) experiment.excluded_regions.create(id='2', start=165, end=180) +# %% +experiment.background.auto_estimate() + # %% experiment.linked_phases.create(id='lbco', scale=10.0) From 2def74951ac25cf247c73805ee4a3cf6edc7dfdb Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:45:47 -0700 Subject: [PATCH 31/33] Regenerate ed-6 notebook for auto_estimate conversion --- docs/docs/tutorials/ed-6.ipynb | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/docs/tutorials/ed-6.ipynb b/docs/docs/tutorials/ed-6.ipynb index e38f74ef4..d3d24bc99 100644 --- a/docs/docs/tutorials/ed-6.ipynb +++ b/docs/docs/tutorials/ed-6.ipynb @@ -271,15 +271,7 @@ "metadata": {}, "outputs": [], "source": [ - "expt.background.create(id='1', x=4.4196, y=500)\n", - "expt.background.create(id='2', x=6.6207, y=500)\n", - "expt.background.create(id='3', x=10.4918, y=500)\n", - "expt.background.create(id='4', x=15.4634, y=500)\n", - "expt.background.create(id='5', x=45.6041, y=500)\n", - "expt.background.create(id='6', x=74.6844, y=500)\n", - "expt.background.create(id='7', x=103.4187, y=500)\n", - "expt.background.create(id='8', x=121.6311, y=500)\n", - "expt.background.create(id='9', x=159.4116, y=500)" + "expt.background.auto_estimate()" ] }, { From d3c9cde5d30d7d7bb4aa9cd9c1a4dda3790187e7 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 13:47:52 -0700 Subject: [PATCH 32/33] Regenerate ed-20 notebook for auto_estimate conversion --- docs/docs/tutorials/ed-20.ipynb | 40 ++------------------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/docs/docs/tutorials/ed-20.ipynb b/docs/docs/tutorials/ed-20.ipynb index 98d9572a7..7fd3b3725 100644 --- a/docs/docs/tutorials/ed-20.ipynb +++ b/docs/docs/tutorials/ed-20.ipynb @@ -316,7 +316,7 @@ "metadata": {}, "outputs": [], "source": [ - "# expt_s2.background.type = 'line-segment'" + "expt_s2.background.auto_estimate()" ] }, { @@ -326,43 +326,7 @@ "metadata": {}, "outputs": [], "source": [ - "for idx, (x, y) in enumerate(\n", - " [\n", - " (40111.8789, 0.0170),\n", - " (41193.5664, 0.1484),\n", - " (42041.3750, 0.1848),\n", - " (42713.7734, 0.1975),\n", - " (44409.3945, 0.1891),\n", - " (45198.7344, 0.2147),\n", - " (46251.1875, 0.1887),\n", - " (49350.0742, 0.2194),\n", - " (51289.6836, 0.1991),\n", - " (55245.1992, 0.1981),\n", - " (55679.7070, 0.2276),\n", - " (56383.9102, 0.2439),\n", - " (58956.1797, 0.2907),\n", - " (61536.4570, 0.3067),\n", - " (63768.0469, 0.3242),\n", - " (65581.2109, 0.2973),\n", - " (70183.8516, 0.2575),\n", - " (71787.8203, 0.2321),\n", - " (78343.1094, 0.2158),\n", - " (80016.8047, 0.1694),\n", - " (98141.8516, 0.2400),\n", - " (99262.2344, 0.4335),\n", - " (100985.8516, 0.4375),\n", - " (101933.8516, 0.3427),\n", - " (108656.0312, 0.5339),\n", - " (110896.7500, 0.9537),\n", - " (113137.4844, 1.1668),\n", - " (114430.2031, 1.1164),\n", - " (116929.4844, 0.9161),\n", - " (119428.7422, 0.6885),\n", - " (134506.3438, 0.0692),\n", - " ],\n", - " start=1,\n", - "):\n", - " expt_s2.background.create(id=str(idx), x=x, y=y)" + "expt_s2.background.show()" ] }, { From 1caea81ce15a090c70d7baf8ec20cc4259342079 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Thu, 4 Jun 2026 15:46:33 -0700 Subject: [PATCH 33/33] Enhance table rendering: prevent cell wrapping for wide tables --- src/easydiffraction/display/tablers/pandas.py | 10 +++++++--- .../easydiffraction/display/tablers/test_pandas.py | 5 +++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/easydiffraction/display/tablers/pandas.py b/src/easydiffraction/display/tablers/pandas.py index 38ff2590e..5327cb830 100644 --- a/src/easydiffraction/display/tablers/pandas.py +++ b/src/easydiffraction/display/tablers/pandas.py @@ -36,9 +36,13 @@ # ``min-width: 0`` neutralise MkDocs Material's ``table:not([class])`` # rules, which otherwise inject a per-row ``border-top`` (stray rules # between rows) and ``th { min-width: 5rem }`` (over-wide columns) onto -# class-less embedded tables. Inline values win over the theme -# stylesheet, so no CSS class or ``