diff --git a/.github/workflows/python-weather-diagnostics-toolkit-ci.yml b/.github/workflows/python-weather-diagnostics-toolkit-ci.yml new file mode 100644 index 0000000..cd11c83 --- /dev/null +++ b/.github/workflows/python-weather-diagnostics-toolkit-ci.yml @@ -0,0 +1,53 @@ +name: python-weather-diagnostics-toolkit-ci +run-name: python weather diagnostics toolkit ci / ${{ github.event_name }} / ${{ github.ref_name }} + +on: + workflow_dispatch: + push: + paths: + - ".github/workflows/python-weather-diagnostics-toolkit-ci.yml" + - "projects/python-weather-diagnostics-toolkit/**" + pull_request: + paths: + - ".github/workflows/python-weather-diagnostics-toolkit-ci.yml" + - "projects/python-weather-diagnostics-toolkit/**" + +permissions: {} + +env: + PYTHON_WEATHER_DIAGNOSTICS_VERSION: "3.11" + +jobs: + test: + runs-on: ubuntu-latest + permissions: + contents: read + defaults: + run: + working-directory: projects/python-weather-diagnostics-toolkit + steps: + - name: Check out repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ env.PYTHON_WEATHER_DIAGNOSTICS_VERSION }} + + - name: Upgrade pip + run: python -m pip install --upgrade pip + + - name: Install project + run: python -m pip install -e .[dev] + + - name: Run tests + run: python -m pytest + + - name: Compile modules and scripts + run: python -m compileall src scripts + + - name: CLI help smoke tests + run: | + python scripts/run_thermodynamic_check.py --help + python scripts/run_dynamics_summary.py --help + python scripts/run_synthetic_ensemble.py --help diff --git a/README.md b/README.md index 330e5a3..1a56f50 100644 --- a/README.md +++ b/README.md @@ -33,11 +33,22 @@ demonstration charts, and explicit data-redistribution boundaries. `sbom-diff-and-risk` remains the flagship release-facing tool in this repository. -The precipitation diagnostics projects are supporting scientific-data mini-labs. -They demonstrate reproducible analysis workflows, data-policy boundaries, and -reviewer-friendly interpretation, but they are not part of the -`sbom-diff-and-risk` release surface and should not be read as a separate -meteorology portfolio. +The precipitation and weather diagnostics projects are supporting +scientific-data mini-labs. They demonstrate reproducible analysis workflows, +data-policy boundaries, and reviewer-friendly interpretation, but they are not +part of the `sbom-diff-and-risk` release surface and should not be read as a +separate meteorology portfolio. + +## Supporting Weather Diagnostics Project + +[`projects/python-weather-diagnostics-toolkit`](projects/python-weather-diagnostics-toolkit/README.md) +is a public-safe Python weather-diagnostics mini-lab. It demonstrates reusable +diagnostics for ERA5-style gridded fields, including coordinate/variable +normalization, dewpoint checks, vorticity and advection diagnostics, +time-ordered baseline modeling, and synthetic ensemble summaries. + +This toolkit is a supporting atmospheric diagnostics module and is not part of +the `sbom-diff-and-risk` release surface. ## Why This Repository Exists @@ -114,6 +125,31 @@ Useful entry points: - [Inference analysis](projects/precipitation-anomaly-diagnostics-lab/docs/inference-analysis.md) - [Synthetic inference report](projects/precipitation-anomaly-diagnostics-lab/examples/synthetic-inference-report.md) +Project: +[`python-weather-diagnostics-toolkit`](projects/python-weather-diagnostics-toolkit/README.md) + +Status: +Public-safe supporting atmospheric diagnostics module. + +What to review: +Reusable Python weather-field diagnostics, synthetic examples, data-policy +boundaries, and deterministic tests for thermodynamic, dynamic, ensemble, and +baseline-model utilities. + +This toolkit is a supporting scientific-data project and is not part of the +`sbom-diff-and-risk` release surface. + +Useful entry points: + +- [`python-weather-diagnostics-toolkit` README](projects/python-weather-diagnostics-toolkit/README.md) +- [Reviewer path](projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md) +- [Calculation methods](projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md) +- [Diagnostic analysis](projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md) +- [Source-to-public mapping](projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md) +- [Methodology](projects/python-weather-diagnostics-toolkit/docs/methodology.md) +- [Data policy](projects/python-weather-diagnostics-toolkit/docs/data-policy.md) +- [Synthetic report](projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md) + ## Verification and Release Evidence `sbom-diff-and-risk` has separate verification surfaces. They are related, but diff --git a/projects/python-weather-diagnostics-toolkit/.gitignore b/projects/python-weather-diagnostics-toolkit/.gitignore new file mode 100644 index 0000000..db4afc8 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/.gitignore @@ -0,0 +1,25 @@ +# Raw and restricted data should stay local. +data/ +raw_data/ +downloads/ +*.nc +*.grib +*.grb +*.hdf +*.h5 +*.tif + +# Generated outputs are reproducible from scripts. +outputs/ +figures/ +*.png +*.pdf + +# Local credentials and caches. +.cdsapirc +.env +__pycache__/ +*.py[cod] +.pytest_cache/ +.ruff_cache/ +*.egg-info/ diff --git a/projects/python-weather-diagnostics-toolkit/PUBLICATION_BOUNDARIES.md b/projects/python-weather-diagnostics-toolkit/PUBLICATION_BOUNDARIES.md new file mode 100644 index 0000000..82c843b --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/PUBLICATION_BOUNDARIES.md @@ -0,0 +1,34 @@ +# Publication Boundaries + +This project is published under the pseudonymous technical identity `stacknil`. +It is a public-safe supporting module inside `scientific-computing-toolkit`. + +## Allowed Public Framing + +- Python scientific-computing mini-lab +- ERA5-style weather-field diagnostics +- thermodynamic and dynamic diagnostic utilities +- reproducible synthetic examples +- data-policy-aware atmospheric analysis workflow +- reviewer-friendly documentation and tests + +## Disallowed Framing + +- official institutional project +- course archive or assignment submission +- operational forecast system +- production weather service +- proof of forecast skill +- redistribution channel for raw weather datasets + +## Public Identity Boundary + +Public documentation should use `stacknil` and should not include legal names, +personal or institutional identifiers, classroom labels, local usernames, or +institutional branding. + +## Data Boundary + +Raw data must remain outside the public repository unless it is tiny, +synthetic, intentionally generated for demonstration, and clearly labeled as +synthetic. diff --git a/projects/python-weather-diagnostics-toolkit/README.md b/projects/python-weather-diagnostics-toolkit/README.md new file mode 100644 index 0000000..26607f6 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/README.md @@ -0,0 +1,198 @@ +# Python Weather Diagnostics Toolkit + +A public-safe scientific-computing mini-lab for reproducible Python weather +diagnostics on ERA5-style gridded atmospheric fields. + +Repository role: +This is a supporting atmospheric diagnostics module inside +`scientific-computing-toolkit`. It demonstrates reusable Python workflows for +weather-field analysis, data-policy discipline, and reviewer-friendly +interpretation. It is not part of the `sbom-diff-and-risk` release surface, not +an operational forecast system, and not a separate meteorology portfolio. + +## What It Does + +The toolkit preserves the technical substance of local weather-analysis +experiments while removing course, personal, local-machine, and raw-data +artifacts. It focuses on: + +- ERA5-style coordinate and variable-name normalization +- 2 m temperature, 10 m wind, 500 hPa height, and 850 hPa wind/temperature fields +- Magnus-formula dewpoint diagnostics and round-trip humidity checks +- geopotential-height conversion +- relative-vorticity and horizontal-advection diagnostics +- cosine-latitude regional means +- a deterministic time-ordered ridge-regression baseline for 24-hour temperature prediction +- synthetic ensemble summaries for Nino-style forecast-plume interpretation + +## Repository Structure + +```text +python-weather-diagnostics-toolkit/ ++-- configs/example.yaml ++-- docs/ +| +-- data-policy.md +| +-- calculation-methods.md +| +-- diagnostic-analysis.md +| +-- methodology.md +| +-- reproducibility.md +| +-- reviewer-path.md +| +-- source-to-public-mapping.md ++-- examples/ +| +-- sample_metadata.json +| +-- synthetic-weather-diagnostics-report.md ++-- scripts/ +| +-- run_dynamics_summary.py +| +-- run_synthetic_ensemble.py +| +-- run_thermodynamic_check.py ++-- src/python_weather_diagnostics_toolkit/ ++-- tests/ ++-- PUBLICATION_BOUNDARIES.md ++-- SANITIZATION_REPORT.md +``` + +## Installation + +From this project directory: + +```bash +python -m pip install -e .[dev] +``` + +Optional meteorological plotting and unit-aware diagnostics can use: + +```bash +python -m pip install -e .[meteo] +``` + +## Example Usage + +Run the deterministic test suite: + +```bash +python -m pytest +``` + +Inspect the public CLI surfaces: + +```bash +python scripts/run_thermodynamic_check.py --help +python scripts/run_dynamics_summary.py --help +python scripts/run_synthetic_ensemble.py --help +``` + +Generate a synthetic ensemble summary: + +```bash +python scripts/run_synthetic_ensemble.py --out outputs/synthetic_ensemble_summary.csv +``` + +## Scientific Computing Surface + +The project exposes three reviewable calculation layers. + +Thermodynamic layer: + +- converts temperature and relative humidity into dewpoint with the Magnus approximation +- accepts relative humidity as either percent or 0-1 ratio +- reconstructs humidity from dewpoint as a round-trip consistency check +- keeps this calculation independent of any real dataset + +Dynamic layer: + +- converts geopotential to geopotential height using standard gravity +- estimates latitude/longitude grid spacing from spherical Earth geometry +- computes relative vorticity as `dv/dx - du/dy` +- computes horizontal scalar advection as `-(u dS/dx + v dS/dy)` +- keeps finite-difference assumptions explicit for reviewer inspection + +Statistical layer: + +- reduces gridded fields to cosine-latitude area means +- constructs time-ordered forecast tables from regional features +- fits a deterministic ridge-regression baseline without random shuffling +- reports RMSE, MAE, bias, and correlation as workflow diagnostics +- summarizes synthetic ensemble spread, quantiles, and threshold probabilities + +For formulas and numerical assumptions, see +[`docs/calculation-methods.md`](docs/calculation-methods.md). + +## Diagnostic Analysis + +The intended analysis pattern is: + +```text +normalize input metadata +-> compute derived thermodynamic or dynamic fields +-> summarize fields into small artifacts +-> interpret the pattern with explicit limits +``` + +Examples: + +- A coherent 500 hPa vorticity feature can identify rotation or shear, but it + should be interpreted with height contours and wind context. +- Negative 850 hPa temperature advection can indicate cold-air import by + horizontal flow, but it is not a complete temperature tendency budget. +- A ridge-regression baseline can test whether simple regional predictors carry + signal, but it is not a forecast-skill claim without real validation data and + comparison baselines. +- Ensemble threshold probabilities summarize member agreement; synthetic + probabilities in this repository are examples of mechanics, not real climate + information. + +For interpretation guidance, see +[`docs/diagnostic-analysis.md`](docs/diagnostic-analysis.md). + +## Expected Inputs + +For real analysis, users provide their own local ERA5-style NetCDF files through +`configs/example.yaml`. The toolkit expects common variables such as: + +- single-level fields: `t2m`, `d2m`, `u10`, `v10`, `tp`, or their long ERA5 names +- pressure-level fields: `t`, `u`, `v`, `z`, `r`, `w`, `vo`, or their long ERA5 names +- coordinates: `time` or `valid_time`, `latitude`, `longitude`, and optionally `pressure_level` + +## Generated Outputs + +The reusable scripts and library functions can produce: + +- JSON summaries for thermodynamic and dynamic diagnostics +- CSV ensemble summaries from synthetic reviewer-safe data +- local figures or NetCDF-derived summaries when users connect their own data + +Generated outputs are intentionally ignored by Git unless they are explicitly +small, synthetic, and documentation-oriented. + +## Limitations + +- This project is a compact diagnostics mini-lab, not a production forecasting system. +- The built-in data are synthetic and should not be interpreted as climate evidence. +- The ridge baseline is a transparent benchmark, not a claim of forecast skill. +- Map rendering and MetPy/Cartopy workflows are optional because they can require heavier system dependencies. +- Scientific interpretation depends on user-supplied data provenance, spatial domain, temporal sampling, and quality control. + +## Data Policy + +This repository does not redistribute ERA5, ECMWF, GRIB, NetCDF, station +datasets, course documents, personal reports, or local-machine artifacts. Users +must obtain datasets from their original providers and follow provider access +and licensing policies. See [`docs/data-policy.md`](docs/data-policy.md). + +## Reviewer Path + +Use [`docs/reviewer-path.md`](docs/reviewer-path.md) for a 30-second, +5-minute, and 15-minute review route. + +The more detailed technical route is: + +1. [`docs/calculation-methods.md`](docs/calculation-methods.md) +2. [`docs/diagnostic-analysis.md`](docs/diagnostic-analysis.md) +3. [`docs/source-to-public-mapping.md`](docs/source-to-public-mapping.md) + +## Privacy-Safe Scope + +The public version is maintained under the pseudonymous technical identity +`stacknil`. It is not an official institutional project and does not include +raw school materials, personal identifiers, local paths, provider account +material, or restricted data. diff --git a/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md b/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md new file mode 100644 index 0000000..8432e31 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md @@ -0,0 +1,69 @@ +# Sanitization Report + +This report records how the local source materials were converted into a +public-safe GitHub project. + +## Files Inspected + +The source folder was inspected for: + +- Python scripts for ERA5-style downloading, plotting, dewpoint checks, dynamic diagnostics, and simple baseline modeling +- one notebook with general machine-learning teaching material +- raw NetCDF weather datasets +- station/text data and spreadsheet-like materials +- generated PNG outputs +- course PDFs, PowerPoint decks, Word documents, report templates, and compressed archives +- personal report files and submission-style artifacts + +## Identifiers Removed or Generalized + +The public project removes or generalizes: + +- course and classroom framing +- personal identifiers found in filenames +- institution-specific source-folder context +- local-machine paths and cloud-sync paths +- report-template and submission wording +- Chinese course terms in public filenames and package text + +The public version uses neutral English project language and the pseudonymous +identity `stacknil`. + +## Raw Files Excluded + +The following classes of files were intentionally excluded: + +- raw ERA5/ECMWF NetCDF files +- station data files +- original course PDFs, PPT/PPTX, DOC/DOCX files, templates, and instructions +- original compressed archives +- original generated figures with unknown metadata +- notebook outputs and teaching-material copies +- local download credentials or provider-account configuration + +## Scientific Logic Preserved + +The public project preserves the reusable calculation ideas: + +- ERA5-style variable and coordinate alias handling +- dewpoint calculation with a Magnus approximation +- humidity/dewpoint consistency checks +- geopotential-height conversion +- relative-vorticity calculation +- horizontal temperature-advection diagnostics +- cosine-latitude regional means +- time-ordered ridge-regression baseline evaluation +- synthetic ensemble summary mechanics + +## Remaining Assumptions + +- The local source folder remains private and unchanged. +- Public examples are synthetic and do not reproduce real weather cases. +- Users must provide their own licensed datasets for real-data runs. +- Optional plotting workflows may need system dependencies outside the default test path. + +## Unresolved Risks + +- Real-data scientific interpretation depends on user-supplied data quality and provider licensing. +- Binary artifacts generated by users should be checked for metadata before publication. +- Data-provider terms may change; users are responsible for checking current terms before downloading or redistributing derived outputs. diff --git a/projects/python-weather-diagnostics-toolkit/configs/example.yaml b/projects/python-weather-diagnostics-toolkit/configs/example.yaml new file mode 100644 index 0000000..318759e --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/configs/example.yaml @@ -0,0 +1,34 @@ +# Copy this file locally and replace placeholders with paths to data you are +# allowed to use. Do not commit raw data paths or provider access material. + +project: + name: python-weather-diagnostics-toolkit + role: supporting atmospheric diagnostics module + +data: + single_level_pattern: "/path/to/era5/single-level/*.nc" + pressure_level_pattern: "/path/to/era5/pressure-level/*.nc" + output_dir: "outputs" + +domain: + longitude_min: 70.0 + longitude_max: 140.0 + latitude_min: 15.0 + latitude_max: 55.0 + +diagnostics: + surface_temperature: true + dewpoint_roundtrip: true + geopotential_height_500hpa: true + vorticity_500hpa: true + temperature_advection_850hpa: true + ridge_temperature_baseline: true + +baseline_model: + lead_steps: 24 + train_fraction: 0.8 + ridge_alpha: 1.0 + +plotting: + quiver_skip: 5 + figure_dpi: 200 diff --git a/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md b/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md new file mode 100644 index 0000000..1e1f0b6 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md @@ -0,0 +1,221 @@ +# Calculation Methods + +This document describes the scientific-computing calculations implemented in +the public toolkit. The emphasis is on transparent numerical steps that can be +reviewed independently of any private or provider-restricted dataset. + +## Data Model + +The toolkit assumes gridded atmospheric data with latitude, longitude, time, +and optionally pressure-level coordinates. ERA5-style files often use different +names for the same physical field. The public code therefore separates the +scientific operation from provider-specific naming. + +Canonical coordinates: + +| Canonical name | Accepted aliases | +| --- | --- | +| `time` | `time`, `valid_time` | +| `latitude` | `latitude`, `lat` | +| `longitude` | `longitude`, `lon` | +| `pressure_level` | `pressure_level`, `level`, `isobaricInhPa` | + +Canonical variables: + +| Canonical name | Common aliases | Typical unit | +| --- | --- | --- | +| `t2m` | `t2m`, `2m_temperature`, `2t` | K | +| `d2m` | `d2m`, `2m_dewpoint_temperature`, `2d` | K | +| `u10` | `u10`, `10m_u_component_of_wind`, `10u` | m/s | +| `v10` | `v10`, `10m_v_component_of_wind`, `10v` | m/s | +| `temperature` | `t`, `temperature` | K | +| `relative_humidity` | `r`, `relative_humidity` | percent or ratio | +| `u` | `u`, `u_component_of_wind` | m/s | +| `v` | `v`, `v_component_of_wind` | m/s | +| `omega` | `w`, `omega`, `vertical_velocity` | Pa/s | +| `geopotential` | `z`, `geopotential` | m2/s2 | +| `relative_vorticity` | `vo`, `relative_vorticity` | s^-1 | + +Latitude is sorted into ascending order when needed. This keeps finite +differences consistent and avoids silently flipping north/south derivatives. + +## Thermodynamic Calculation + +The dewpoint calculation uses a Magnus-form approximation. For temperature +`T` in degrees Celsius and relative humidity ratio `RH`: + +```text +gamma = ln(RH) + aT / (b + T) +Td = b gamma / (a - gamma) +``` + +The public implementation uses: + +```text +a = 17.625 +b = 243.04 +``` + +Inputs may use either percent humidity (`68`) or ratio humidity (`0.68`). The +implementation converts percent-like values to ratios and clips the ratio to +`[1e-6, 1]` to avoid logarithm singularities. + +The inverse check reconstructs humidity from temperature and dewpoint: + +```text +RH = exp(a Td / (b + Td)) / exp(a T / (b + T)) +``` + +This is a numerical consistency check, not independent observational +validation. It confirms that the forward and inverse formulas agree. + +## Geopotential Height + +ERA5 pressure-level geopotential is commonly stored as geopotential in +`m2/s2`. The toolkit converts it to approximate geopotential height: + +```text +height = geopotential / g0 +g0 = 9.80665 m/s2 +``` + +This conversion supports 500 hPa height diagnostics and contour overlays. + +## Horizontal Grid Spacing + +For compact diagnostics on regular latitude/longitude grids, spacing is +approximated with spherical Earth geometry: + +```text +dy = R d(phi) +dx = R cos(phi) d(lambda) +R = 6,371,000 m +``` + +where `phi` is latitude in radians and `lambda` is longitude in radians. This +is sufficient for reviewer-safe synthetic tests and first-pass diagnostics. For +formal research, users should validate map projection assumptions, grid +staggering, resolution, and polar behavior. + +## Relative Vorticity + +Relative vorticity is computed as: + +```text +zeta = dv/dx - du/dy +``` + +where `u` is zonal wind and `v` is meridional wind. The implementation uses +centered finite differences through `numpy.gradient`, with edge differences +handled by NumPy's boundary behavior. + +Diagnostic meaning: + +- positive/negative sign depends on coordinate conventions and hemisphere +- coherent extrema can identify rotation or shear features +- a vorticity field should be interpreted together with height contours and wind flow + +## Horizontal Advection + +For a scalar field `S`, horizontal advection is: + +```text +advection = -(u dS/dx + v dS/dy) +``` + +For temperature, negative values indicate cooling tendency from horizontal flow +under the chosen units and coordinate assumptions; positive values indicate +warming tendency. In a full thermodynamic budget, this is only one term. The +public mini-lab intentionally keeps the default implementation to horizontal +advection so that tests remain small and dependency-light. + +## Area-Weighted Regional Mean + +Regional features use cosine-latitude weighting: + +```text +weight(phi) = cos(phi) +area_mean = sum(field * weight) / sum(valid_weight) +``` + +This avoids treating all latitude rows as equal-area rows. The implementation +supports arrays with time or pressure dimensions before the final +latitude/longitude dimensions. + +## Time-Ordered Baseline Model + +The baseline model transforms gridded fields into region-mean features: + +```text +X = [Z500_mean, T850_mean, T2m_current_mean] +y = T2m_future_mean +``` + +The target is formed by shifting the current 2 m temperature series by a +configured lead: + +```text +y(t) = T2m(t + lead_steps) +``` + +The split is time ordered: + +```text +train = first train_fraction of rows +test = remaining rows +``` + +This avoids temporal leakage that would occur if samples were randomly shuffled. + +The ridge objective is: + +```text +minimize ||X beta - y||^2 + alpha ||beta||^2 +``` + +Features are standardized using the training partition only. The intercept is +not penalized. + +## Evaluation Metrics + +The toolkit reports: + +```text +RMSE = sqrt(mean((prediction - truth)^2)) +MAE = mean(abs(prediction - truth)) +bias = mean(prediction - truth) +correlation = corr(prediction, truth) +``` + +Metrics should be interpreted as workflow diagnostics unless the data source, +sampling design, baseline comparisons, and validation periods are documented. + +## Synthetic Ensemble Summary + +The synthetic Nino-style ensemble generates deterministic plume values with a +fixed seed. It then computes: + +```text +mean = ensemble mean +spread = ensemble standard deviation +p10 = 10th percentile +p90 = 90th percentile +warm_probability = fraction(member >= 0.5) +cold_probability = fraction(member <= -0.5) +``` + +These calculations demonstrate ensemble-summary mechanics without embedding +real forecast products. + +Example synthetic summary rows: + +| Lead month | Mean | Spread | P10 | P90 | Warm probability | Cold probability | +| ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| 1 | 1.514 | 0.253 | 1.127 | 1.829 | 1.00 | 0.00 | +| 6 | 1.104 | 0.440 | 0.523 | 1.629 | 0.90 | 0.00 | +| 12 | 0.116 | 0.654 | -0.680 | 0.924 | 0.25 | 0.25 | +| 18 | -0.768 | 1.019 | -1.577 | 0.187 | 0.05 | 0.55 | +| 24 | -1.469 | 1.204 | -3.092 | 0.213 | 0.10 | 0.85 | + +These values are synthetic. They are useful for verifying table generation and +reviewer interpretation, not for climate diagnosis. diff --git a/projects/python-weather-diagnostics-toolkit/docs/data-policy.md b/projects/python-weather-diagnostics-toolkit/docs/data-policy.md new file mode 100644 index 0000000..c71aa67 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/data-policy.md @@ -0,0 +1,39 @@ +# Data Policy + +This project is designed around reproducible methods, not redistributed raw +weather datasets. + +## Included + +- reusable Python source code +- configuration templates with placeholder paths +- deterministic synthetic examples +- small text summaries and documentation +- tests that run without external datasets + +## Excluded + +- raw ERA5, ECMWF, NetCDF, GRIB, HDF, station, or forecast files +- raw course PDFs, PowerPoint decks, Word templates, assignment instructions, or reports +- local-machine paths and usernames +- personal, institutional, classroom, or collaborator identifiers +- provider account material or local access configuration +- generated binary artifacts with unknown metadata + +## User Responsibility + +Users who run this toolkit on real weather data must obtain datasets from their +original providers and follow the providers' access, citation, redistribution, +and licensing policies. For ERA5-style workflows, that usually means obtaining +data through the Copernicus Climate Data Store or another authorized provider. + +The repository intentionally avoids bundling downloader access material. If a +user uses `cdsapi` or another data client, provider account material should +remain in local user configuration outside this repository. + +## Public Outputs + +Public outputs should be limited to small derived summaries, synthetic +demonstrations, or figures with metadata stripped or regenerated from public-safe +inputs. Do not commit raw data, private station files, or binary course +materials. diff --git a/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md b/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md new file mode 100644 index 0000000..291dcaa --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md @@ -0,0 +1,168 @@ +# Diagnostic Analysis + +This document explains how to read the toolkit's diagnostics as scientific +computing outputs. The goal is to make interpretation explicit while keeping +the project clearly outside operational forecasting. + +## Analysis Chain + +The public workflow follows a conservative chain: + +```text +input metadata +-> coordinate and variable normalization +-> unit-aware or unit-documented calculation +-> derived diagnostic field or regional feature +-> small summary artifact +-> bounded interpretation +``` + +Each step is intentionally inspectable. A reviewer should be able to identify +which variables were used, which equation was applied, what assumptions entered +the calculation, and what conclusion is justified. + +## Surface Field Analysis + +Typical surface diagnostics combine: + +- 2 m temperature as a filled field +- 10 m wind vectors or streamlines +- optional precipitation accumulation or pressure fields + +Interpretation pattern: + +```text +temperature gradient + wind direction -> possible thermal advection context +``` + +What this supports: + +- identifying warm/cold spatial gradients +- checking whether low-level wind is aligned with the gradient +- selecting cases for deeper pressure-level diagnostics + +What it does not support by itself: + +- official weather warnings +- attribution of a weather event +- forecast-skill claims + +## Dewpoint and Humidity Consistency + +The dewpoint check is a calculation validation surface. It compares a custom +Magnus-form calculation with an inverse humidity reconstruction. In real-data +workflows, it can also be compared against a library such as MetPy. + +Interpretation pattern: + +```text +small round-trip difference -> formula and unit handling are internally consistent +large difference -> inspect humidity scale, temperature units, missing values, or metadata +``` + +The public synthetic run returns a dewpoint of about `15.8 C` for `22 C` and +`68%` humidity, with a reconstructed humidity ratio of `0.68`. This demonstrates +calculation consistency only. + +## 500 hPa Height and Vorticity + +The 500 hPa layer is often used as a mid-tropospheric circulation diagnostic. +The toolkit supports: + +- geopotential-to-height conversion +- relative-vorticity computation +- contour/fill separation for interpretation + +Interpretation pattern: + +```text +height contours -> large-scale trough/ridge structure +vorticity extrema -> local rotation/shear features +co-location -> possible dynamically active region +``` + +Care points: + +- vorticity sign depends on wind gradients and coordinate orientation +- boundary rows/columns are less stable under finite differences +- height and vorticity should be read together, not as isolated fields + +## 850 hPa Temperature Advection + +The 850 hPa layer is useful for lower-tropospheric thermal advection. The public +calculation is: + +```text +-(u dT/dx + v dT/dy) +``` + +Interpretation pattern: + +```text +positive advection -> horizontal flow imports warmer air +negative advection -> horizontal flow imports colder air +``` + +This is a horizontal term only. A full temperature tendency diagnosis may also +need vertical motion, diabatic heating, boundary-layer processes, surface +fluxes, and analysis increments. The public project keeps the default +calculation narrow so it remains reproducible and testable without heavy +external data. + +## Regional Temperature Baseline + +The baseline model is intentionally simple: + +- reduce gridded fields to region-mean predictors +- use previous/current circulation and temperature features +- predict a future regional 2 m temperature target +- split by time order +- report transparent metrics + +Interpretation pattern: + +```text +low error relative to a persistence baseline -> features may carry predictive signal +high bias -> missing physics, bad sampling, or target-period drift +low correlation -> poor phase tracking +large residual tails -> weak extreme-event handling +``` + +The current public implementation does not claim skill because it does not +bundle a real validation dataset or persistence comparison. It provides the +calculation structure needed for such a review. + +## Ensemble Plume Interpretation + +The synthetic ensemble output demonstrates how to summarize multiple model +members: + +- ensemble mean shows the central tendency +- spread shows member disagreement +- quantiles show an uncertainty envelope +- threshold probabilities translate members into categorical risk-like summaries + +Interpretation pattern: + +```text +mean crosses threshold + high member agreement -> stronger synthetic signal +mean near zero + large spread -> uncertain phase +probability shifts over lead time -> changing member consensus +``` + +In the deterministic synthetic example, the ensemble starts warm, becomes +mixed near lead month 12, and shifts cold by lead month 24. This is an example +of interpreting an artificial plume, not a statement about the real ocean. + +## Reviewer Questions + +Useful reviewer questions: + +- Are variable aliases explicit enough to avoid silent field swaps? +- Are units documented at each calculation boundary? +- Are coordinate assumptions visible? +- Is the model split time ordered? +- Are synthetic values labeled synthetic? +- Are real-data claims avoided unless data provenance is documented? + +The intended answer should be yes for the public mini-lab. diff --git a/projects/python-weather-diagnostics-toolkit/docs/methodology.md b/projects/python-weather-diagnostics-toolkit/docs/methodology.md new file mode 100644 index 0000000..651555f --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/methodology.md @@ -0,0 +1,109 @@ +# Methodology + +This mini-lab converts local weather-analysis exercises into reusable, +public-safe scientific-computing components. The workflow is intentionally +compact: every calculation should be inspectable, deterministic, and runnable on +synthetic data before a user connects real datasets. + +## 1. Dataset Normalization + +ERA5-style files can use short names (`t2m`, `u`, `v`, `z`) or long names +(`2m_temperature`, `u_component_of_wind`, `geopotential`). The toolkit uses +alias maps to normalize common coordinate and variable names: + +- `valid_time` -> `time` +- `lat` / `lon` -> `latitude` / `longitude` +- `level` / `isobaricInhPa` -> `pressure_level` +- common ERA5 short and long variable names -> canonical diagnostic names + +Latitude is sorted into ascending order when needed so array derivatives are +consistent. + +## 2. Thermodynamic Diagnostics + +The public thermodynamic check uses the Magnus approximation: + +```text +gamma = ln(RH) + aT / (b + T) +Td = b gamma / (a - gamma) +``` + +where `T` is temperature in degrees Celsius, `RH` is relative humidity as a +0-1 ratio, and `Td` is dewpoint temperature. The default constants are +`a = 17.625` and `b = 243.04`. + +The reviewer-safe check computes dewpoint and then reconstructs relative +humidity from temperature and dewpoint. This confirms implementation +consistency without requiring real ERA5 files. + +## 3. Dynamic Diagnostics + +The dynamic calculations operate on regular latitude/longitude grids: + +```text +geopotential height = geopotential / g0 +relative vorticity = dv/dx - du/dy +horizontal advection = -(u dS/dx + v dS/dy) +``` + +Grid spacing is approximated from spherical Earth geometry: + +```text +dy = R d(latitude) +dx = R cos(latitude) d(longitude) +``` + +This is adequate for compact diagnostics and tests. For formal research or +forecast operations, users should validate projection assumptions, grid +staggering, units, vertical coordinates, and boundary behavior. + +## 4. Regional Features + +Region-mean features use cosine-latitude weighting so higher-latitude grid rows +do not receive the same area weight as lower-latitude rows. A typical feature +table can include: + +- regional 500 hPa geopotential height +- regional 850 hPa temperature +- current 2 m temperature +- future 2 m temperature target shifted by a configured lead + +## 5. Baseline Prediction + +The included baseline is a transparent ridge regression: + +```text +minimize ||X beta - y||^2 + alpha ||beta||^2 +``` + +The split is time ordered: the first part of the series trains the model, and +the later part evaluates it. This avoids random leakage across time and keeps +the result reproducible. Metrics include RMSE, MAE, bias, and correlation. + +The baseline is included for workflow demonstration only. It is not a claim of +forecast skill. + +## 6. Synthetic Ensemble Summary + +The synthetic Nino-style ensemble utility creates deterministic plume data with +a fixed random seed. It demonstrates: + +- multi-model spread +- ensemble mean +- 10th and 90th percentile envelope +- warm/cold threshold probabilities + +The generated values are synthetic and should be read only as an example of +summary mechanics. + +## 7. Interpretation Boundaries + +A public interpretation should say what was computed and what the diagnostic +suggests, while avoiding unsupported claims. For example: + +- A coherent vorticity maximum can identify a circulation feature in the chosen field. +- Temperature advection sign can indicate whether the local flow imports warmer or colder air. +- A ridge baseline can expose whether simple regional features carry predictive information. + +These are diagnostics, not operational warnings, official forecasts, or +production climate conclusions. diff --git a/projects/python-weather-diagnostics-toolkit/docs/reproducibility.md b/projects/python-weather-diagnostics-toolkit/docs/reproducibility.md new file mode 100644 index 0000000..9610369 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/reproducibility.md @@ -0,0 +1,61 @@ +# Reproducibility + +The public project is designed to reproduce without raw weather datasets. + +## Local Setup + +```bash +python -m pip install -e .[dev] +``` + +Optional meteorological plotting dependencies: + +```bash +python -m pip install -e .[meteo] +``` + +## Checks + +Run: + +```bash +python -m pytest +python -m compileall src scripts +python scripts/run_thermodynamic_check.py --help +python scripts/run_dynamics_summary.py --help +python scripts/run_synthetic_ensemble.py --help +``` + +## Synthetic Demo + +```bash +python scripts/run_thermodynamic_check.py +python scripts/run_dynamics_summary.py +python scripts/run_synthetic_ensemble.py --out outputs/synthetic_ensemble_summary.csv +``` + +These commands use deterministic synthetic values or toy fields. They verify the +public calculation paths without requiring ERA5, ECMWF, station, or local course +data. + +## Real-Data Reproduction + +To run the workflow on real data: + +1. Obtain data from the original provider. +2. Confirm the provider permits your use case. +3. Copy `configs/example.yaml` to a local untracked config file. +4. Replace placeholder paths with local dataset paths. +5. Run diagnostics locally and keep raw data outside Git. + +## Final Checklist + +- no raw datasets included +- no personal identifiers +- no school identifiers +- no local paths +- no course submission artifacts +- no provider account material +- no generated binary artifacts with unknown metadata +- synthetic examples are labeled as synthetic +- baseline outputs are not presented as forecast skill claims diff --git a/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md b/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md new file mode 100644 index 0000000..9d74c48 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md @@ -0,0 +1,92 @@ +# Reviewer Path + +## 30-second orientation + +Read the README first. Confirm this is a sanitized Python weather-diagnostics +mini-lab, not a course archive, not a raw-data mirror, and not an operational +forecast system. + +Check the first screen for the project role: + +- supporting atmospheric diagnostics module +- public-safe scientific-computing workflow +- not part of the `sbom-diff-and-risk` release surface +- not a separate meteorology portfolio + +## 5-minute workflow review + +Inspect: + +- [`docs/methodology.md`](methodology.md) +- [`docs/calculation-methods.md`](calculation-methods.md) +- [`docs/diagnostic-analysis.md`](diagnostic-analysis.md) +- [`docs/data-policy.md`](data-policy.md) +- [`examples/synthetic-weather-diagnostics-report.md`](../examples/synthetic-weather-diagnostics-report.md) +- [`examples/sample_metadata.json`](../examples/sample_metadata.json) + +This pass should show how local weather-analysis scripts were converted into +reusable calculation modules and reviewer-safe examples. + +Questions to answer: + +- Are field aliases separated from scientific calculations? +- Are dewpoint, vorticity, advection, regional means, and ensemble summaries + described with equations or explicit numerical assumptions? +- Are synthetic examples clearly labeled as synthetic? +- Are forecast-skill claims avoided unless real validation data are supplied? + +## 15-minute reproducibility review + +Run: + +```bash +python -m pip install -e .[dev] +python -m pytest +python -m compileall src scripts +python scripts/run_thermodynamic_check.py --help +python scripts/run_dynamics_summary.py --help +python scripts/run_synthetic_ensemble.py --help +``` + +Then run one synthetic path: + +```bash +python scripts/run_synthetic_ensemble.py --out outputs/synthetic_ensemble_summary.csv +``` + +Expected result: + +- tests pass without raw ERA5, ECMWF, station, or course files +- CLI help surfaces are available +- the ensemble command writes a small CSV under ignored `outputs/` +- no generated caches or output files need to be committed + +## Boundaries + +Read: + +- [`docs/data-policy.md`](data-policy.md) +- [`PUBLICATION_BOUNDARIES.md`](../PUBLICATION_BOUNDARIES.md) +- [`SANITIZATION_REPORT.md`](../SANITIZATION_REPORT.md) +- [`docs/source-to-public-mapping.md`](source-to-public-mapping.md) + +This project is portfolio evidence for Python scientific-computing structure, +not a public redistribution of raw weather data or course material. + +## Technical Deep-Dive Route + +For a deeper review, read the project in this order: + +1. `src/python_weather_diagnostics_toolkit/aliases.py` for input-name + normalization. +2. `src/python_weather_diagnostics_toolkit/thermodynamics.py` and + `docs/calculation-methods.md` for dewpoint formulas and round-trip checks. +3. `src/python_weather_diagnostics_toolkit/dynamics.py` for grid spacing, + vorticity, and advection. +4. `src/python_weather_diagnostics_toolkit/features.py` for cosine-latitude + regional means and time-ordered baseline modeling. +5. `src/python_weather_diagnostics_toolkit/ensemble.py` for deterministic + synthetic plume summaries. + +This route should make the project reviewable as code, not just as a +documentation wrapper. diff --git a/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md b/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md new file mode 100644 index 0000000..32a9c4f --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md @@ -0,0 +1,60 @@ +# Source-to-Public Mapping + +This document explains how local weather-analysis materials were converted into +a public-safe technical project. It avoids preserving original filenames, +assignment framing, or private source context. + +## Public Design Principle + +The public project keeps reusable scientific-computing logic and discards +course-specific packaging. The result should look like a small technical module, +not a submitted report archive. + +## Mapping Table + +| Local analysis theme | Public module or document | What changed | +| --- | --- | --- | +| ERA5 field loading and variable-name handling | `aliases.py`, `configs/example.yaml` | local paths replaced by placeholders; aliases made reusable | +| surface temperature and wind plotting | `README.md`, `methodology.md` | plotting task reframed as diagnostics workflow | +| dewpoint verification | `thermodynamics.py`, `run_thermodynamic_check.py`, tests | formula isolated and tested with synthetic values | +| 500 hPa height and vorticity maps | `dynamics.py`, `diagnostic-analysis.md`, tests | map-specific code converted to numerical fields | +| 850 hPa temperature advection | `dynamics.py`, `run_dynamics_summary.py` | calculation made dependency-light and synthetic-testable | +| regional feature construction | `features.py` | area weighting and target shifting made explicit | +| simple temperature prediction baseline | `features.py`, `diagnostic-analysis.md` | baseline framed as workflow sanity check, not forecast skill | +| ensemble plume exercises | `ensemble.py`, `run_synthetic_ensemble.py` | real or teaching data replaced by deterministic synthetic data | +| generated figures and reports | `examples/synthetic-weather-diagnostics-report.md` | binary outputs replaced by text explanation | +| course documents and templates | excluded | not suitable for public repository | +| raw NetCDF and station data | excluded | users must obtain data from providers | + +## Why Not Preserve Original Scripts? + +The original scripts mixed local paths, case-specific assumptions, plotting +side effects, and report-oriented output names. The public version separates: + +- calculation functions in `src/` +- CLI smoke paths in `scripts/` +- reproducibility checks in `tests/` +- public interpretation in `docs/` and `examples/` + +This makes the project easier to review and safer to publish. + +## What Technical Substance Was Preserved? + +Preserved: + +- ERA5-style gridded data handling +- humidity and dewpoint diagnostics +- geopotential-height conversion +- relative-vorticity and advection calculations +- regional feature engineering +- time-ordered baseline modeling +- ensemble summary interpretation + +Not preserved: + +- private filenames or source-folder structure +- raw datasets +- provider account material +- classroom prompts or report templates +- generated binary artifacts with unknown metadata +- personal or institutional identifiers diff --git a/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json b/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json new file mode 100644 index 0000000..71e5ec0 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json @@ -0,0 +1,16 @@ +{ + "project": "python-weather-diagnostics-toolkit", + "identity": "stacknil", + "data_status": "synthetic reviewer-safe examples only", + "real_data_required_for_research": true, + "raw_data_included": false, + "course_materials_included": false, + "local_paths_included": false, + "example_domains": [ + "thermodynamic dewpoint checks", + "relative-vorticity diagnostics", + "temperature-advection diagnostics", + "time-ordered baseline evaluation", + "synthetic ensemble summaries" + ] +} diff --git a/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md b/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md new file mode 100644 index 0000000..01df24c --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md @@ -0,0 +1,98 @@ +# Synthetic Weather Diagnostics Report + +This example report uses synthetic toy values only. It demonstrates the shape of +a reviewer-safe interpretation without claiming real weather results. + +## Thermodynamic Check + +A sample temperature of `22 C` with relative humidity of `68%` gives a +dewpoint near `15.8 C` using the Magnus approximation. Reconstructing relative +humidity from that dewpoint returns the original humidity ratio within numerical +precision. + +Command: + +```bash +python scripts/run_thermodynamic_check.py --temperature-c 22 --relative-humidity 68 +``` + +Interpretation: +The check verifies that the implementation is internally consistent. It does +not validate a real observation, station record, or reanalysis field. + +## Dynamic Check + +The synthetic gridded field has smooth wind and temperature gradients. The +dynamic summary computes: + +- relative vorticity from `dv/dx - du/dy` +- horizontal temperature advection from `-(u dT/dx + v dT/dy)` +- geopotential height from geopotential divided by standard gravity + +Command: + +```bash +python scripts/run_dynamics_summary.py +``` + +Interpretation: +A sign-coherent advection field can indicate whether the synthetic flow imports +warmer or colder air across the toy domain. In real analysis, this inference +would need unit checks, map-domain checks, temporal context, and source-data +quality control. + +## Baseline Prediction + +The ridge-regression baseline uses region-mean features and a time-ordered +train/test split. It is intentionally simple and transparent. + +Diagnostic readout: + +- `rmse` reports typical prediction error magnitude in target units +- `mae` is less sensitive to isolated large residuals than RMSE +- `bias` indicates systematic overprediction or underprediction +- `correlation` indicates phase tracking, not absolute calibration + +Interpretation: +The baseline is useful as a workflow sanity check. It should not be described +as operational forecast skill without independent validation, comparison +baselines, and real-data provenance. + +## Synthetic Ensemble + +The Nino-style ensemble example summarizes deterministic synthetic plume data: + +| Field | Meaning | +| --- | --- | +| `mean` | ensemble mean by lead month | +| `spread` | ensemble standard deviation | +| `p10`, `p90` | central spread envelope | +| `warm_probability` | fraction of members above `0.5` | +| `cold_probability` | fraction of members below `-0.5` | + +Command: + +```bash +python scripts/run_synthetic_ensemble.py --out outputs/synthetic_ensemble_summary.csv +``` + +Example interpretation: +Early synthetic lead months have high warm-threshold agreement. Middle lead +months become less certain as spread increases. Later lead months shift toward +cold-threshold agreement. This demonstrates how an ensemble plume can be +summarized as central tendency, spread, and threshold agreement. + +Interpretation: +The example shows how to report ensemble spread and threshold probabilities +without embedding real forecast products or provider-restricted files. + +## Misuse Checks + +Do not treat this report as: + +- evidence for a real historical weather event +- a forecast product +- a benchmark against operational numerical weather prediction +- proof that any data provider permits redistribution + +The report is a reviewer-safe explanation of mechanics only. diff --git a/projects/python-weather-diagnostics-toolkit/pyproject.toml b/projects/python-weather-diagnostics-toolkit/pyproject.toml new file mode 100644 index 0000000..6c8eda4 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "python-weather-diagnostics-toolkit" +version = "0.1.0" +description = "A public-safe Python mini-lab for reproducible weather-field diagnostics." +readme = "README.md" +requires-python = ">=3.10" +license = { text = "MIT" } +authors = [{ name = "stacknil" }] +dependencies = [ + "numpy>=1.24", + "pandas>=2.0", + "xarray>=2023.1", + "pyyaml>=6.0" +] + +[project.optional-dependencies] +meteo = ["matplotlib>=3.7", "metpy>=1.6", "cartopy>=0.22"] +download = ["cdsapi>=0.7"] +dev = ["pytest>=7.0", "ruff>=0.4"] + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/projects/python-weather-diagnostics-toolkit/scripts/run_dynamics_summary.py b/projects/python-weather-diagnostics-toolkit/scripts/run_dynamics_summary.py new file mode 100644 index 0000000..55843b6 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/scripts/run_dynamics_summary.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Run a synthetic dynamic-diagnostics summary.""" + +from __future__ import annotations + +import argparse +import json + +import numpy as np + +from python_weather_diagnostics_toolkit.dynamics import horizontal_advection, relative_vorticity +from python_weather_diagnostics_toolkit.synthetic import make_synthetic_weather_dataset + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--pressure-level", type=int, default=850) + return parser + + +def main() -> None: + args = build_parser().parse_args() + ds = make_synthetic_weather_dataset() + layer = ds.sel(pressure_level=args.pressure_level) + lat = ds.latitude.values + lon = ds.longitude.values + vort = relative_vorticity(layer.u.values, layer.v.values, lat, lon) + adv = horizontal_advection(layer.temperature.values, layer.u.values, layer.v.values, lat, lon) + payload = { + "pressure_level_hpa": args.pressure_level, + "max_abs_vorticity_s-1": float(np.nanmax(np.abs(vort))), + "mean_temperature_advection_k_s-1": float(np.nanmean(adv)), + } + print(json.dumps(payload, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/projects/python-weather-diagnostics-toolkit/scripts/run_synthetic_ensemble.py b/projects/python-weather-diagnostics-toolkit/scripts/run_synthetic_ensemble.py new file mode 100644 index 0000000..5a1b176 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/scripts/run_synthetic_ensemble.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Generate a reviewer-safe synthetic ensemble summary CSV.""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +from python_weather_diagnostics_toolkit.ensemble import ( + ensemble_summary, + make_synthetic_nino_ensemble, +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--out", type=Path, default=Path("outputs/synthetic_ensemble_summary.csv")) + parser.add_argument("--models", type=int, default=20) + parser.add_argument("--leads", type=int, default=24) + parser.add_argument("--seed", type=int, default=42) + return parser + + +def main() -> None: + args = build_parser().parse_args() + df = make_synthetic_nino_ensemble( + n_models=args.models, + n_leads=args.leads, + seed=args.seed, + ) + summary = ensemble_summary(df) + args.out.parent.mkdir(parents=True, exist_ok=True) + summary.to_csv(args.out) + print(f"wrote {args.out}") + + +if __name__ == "__main__": + main() diff --git a/projects/python-weather-diagnostics-toolkit/scripts/run_thermodynamic_check.py b/projects/python-weather-diagnostics-toolkit/scripts/run_thermodynamic_check.py new file mode 100644 index 0000000..91be5a3 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/scripts/run_thermodynamic_check.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Run a synthetic dewpoint calculation check.""" + +from __future__ import annotations + +import argparse +import json + +import numpy as np + +from python_weather_diagnostics_toolkit.thermodynamics import ( + magnus_dewpoint_celsius, + relative_humidity_from_dewpoint, +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--temperature-c", type=float, default=22.0) + parser.add_argument("--relative-humidity", type=float, default=68.0) + return parser + + +def main() -> None: + args = build_parser().parse_args() + dewpoint = magnus_dewpoint_celsius(args.temperature_c, args.relative_humidity) + rh_back = relative_humidity_from_dewpoint(args.temperature_c, dewpoint) + payload = { + "temperature_c": args.temperature_c, + "relative_humidity_input": args.relative_humidity, + "dewpoint_c": float(np.asarray(dewpoint)), + "roundtrip_relative_humidity_ratio": float(np.asarray(rh_back)), + } + print(json.dumps(payload, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py new file mode 100644 index 0000000..c9bce48 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py @@ -0,0 +1,22 @@ +"""Public-safe weather diagnostics utilities for gridded atmospheric fields.""" + +from .aliases import get_data_array, standardize_coordinates +from .dynamics import geopotential_to_height, horizontal_advection, relative_vorticity +from .ensemble import ensemble_summary, make_synthetic_nino_ensemble +from .features import area_mean, regression_metrics, ridge_regression_fit_predict +from .thermodynamics import magnus_dewpoint_celsius, relative_humidity_from_dewpoint + +__all__ = [ + "area_mean", + "ensemble_summary", + "geopotential_to_height", + "get_data_array", + "horizontal_advection", + "magnus_dewpoint_celsius", + "make_synthetic_nino_ensemble", + "regression_metrics", + "relative_humidity_from_dewpoint", + "relative_vorticity", + "ridge_regression_fit_predict", + "standardize_coordinates", +] diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py new file mode 100644 index 0000000..a9b6de5 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py @@ -0,0 +1,77 @@ +"""Coordinate and variable aliases for ERA5-style weather datasets.""" + +from __future__ import annotations + +from collections.abc import Iterable + +import xarray as xr + +COORDINATE_ALIASES: dict[str, tuple[str, ...]] = { + "time": ("time", "valid_time"), + "latitude": ("latitude", "lat"), + "longitude": ("longitude", "lon"), + "pressure_level": ("pressure_level", "level", "isobaricInhPa"), +} + +VARIABLE_ALIASES: dict[str, tuple[str, ...]] = { + "t2m": ("t2m", "2m_temperature", "2t"), + "d2m": ("d2m", "2m_dewpoint_temperature", "2d"), + "u10": ("u10", "10m_u_component_of_wind", "10u"), + "v10": ("v10", "10m_v_component_of_wind", "10v"), + "tp": ("tp", "total_precipitation"), + "temperature": ("t", "temperature"), + "relative_humidity": ("r", "relative_humidity"), + "u": ("u", "u_component_of_wind"), + "v": ("v", "v_component_of_wind"), + "omega": ("w", "omega", "vertical_velocity"), + "geopotential": ("z", "geopotential"), + "relative_vorticity": ("vo", "relative_vorticity"), + "msl": ("msl", "mean_sea_level_pressure"), +} + + +def _first_present(candidates: Iterable[str], names: Iterable[str]) -> str | None: + available = set(names) + for candidate in candidates: + if candidate in available: + return candidate + return None + + +def standardize_coordinates(ds: xr.Dataset) -> xr.Dataset: + """Rename common ERA5 coordinate variants to canonical names. + + The function intentionally avoids mutating data variables. It only renames + coordinates/dimensions when a canonical name is absent and a known alias is + present. + """ + + rename: dict[str, str] = {} + names = set(ds.coords) | set(ds.dims) + for canonical, aliases in COORDINATE_ALIASES.items(): + if canonical in names: + continue + found = _first_present(aliases, names) + if found is not None and found != canonical: + rename[found] = canonical + + out = ds.rename(rename) if rename else ds + if "latitude" in out.coords and out.latitude.size > 1: + values = out.latitude.values + if values[0] > values[-1]: + out = out.sortby("latitude") + return out + + +def get_data_array(ds: xr.Dataset, canonical_name: str) -> xr.DataArray: + """Return a data variable by canonical name or known alias.""" + + if canonical_name in ds.data_vars: + return ds[canonical_name] + + aliases = VARIABLE_ALIASES.get(canonical_name, (canonical_name,)) + found = _first_present(aliases, ds.data_vars) + if found is None: + available = ", ".join(sorted(ds.data_vars)) + raise KeyError(f"Missing variable '{canonical_name}'. Available variables: {available}") + return ds[found] diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py new file mode 100644 index 0000000..476cdea --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py @@ -0,0 +1,66 @@ +"""Dynamic diagnostics for regular latitude/longitude weather fields.""" + +from __future__ import annotations + +import numpy as np + +EARTH_RADIUS_M = 6_371_000.0 +STANDARD_GRAVITY = 9.80665 + + +def geopotential_to_height(geopotential) -> np.ndarray: + """Convert geopotential from m2/s2 to geopotential height in meters.""" + + return np.asarray(geopotential, dtype=float) / STANDARD_GRAVITY + + +def _spacing_m(latitude, longitude) -> tuple[np.ndarray, np.ndarray]: + lat = np.asarray(latitude, dtype=float) + lon = np.asarray(longitude, dtype=float) + if lat.ndim != 1 or lon.ndim != 1: + raise ValueError("latitude and longitude must be one-dimensional") + if lat.size < 2 or lon.size < 2: + raise ValueError("latitude and longitude must contain at least two points") + + dlat = np.gradient(np.deg2rad(lat)) + dlon = np.gradient(np.deg2rad(lon)) + dy = EARTH_RADIUS_M * dlat + dx = EARTH_RADIUS_M * np.cos(np.deg2rad(lat))[:, None] * dlon[None, :] + return dy[:, None], dx + + +def gradient_on_latlon(field, latitude, longitude) -> tuple[np.ndarray, np.ndarray]: + """Return d(field)/dy and d(field)/dx on a regular lat/lon grid.""" + + values = np.asarray(field, dtype=float) + if values.ndim != 2: + raise ValueError("field must be a two-dimensional latitude/longitude array") + + dy, dx = _spacing_m(latitude, longitude) + if values.shape != dx.shape: + raise ValueError( + "field shape must match latitude/longitude lengths: " + f"{values.shape} != {dx.shape}" + ) + + d_dindex_y = np.gradient(values, axis=0) + d_dindex_x = np.gradient(values, axis=1) + return d_dindex_y / dy, d_dindex_x / dx + + +def relative_vorticity(u_wind, v_wind, latitude, longitude) -> np.ndarray: + """Compute relative vorticity, zeta = dv/dx - du/dy, in s^-1.""" + + du_dy, _ = gradient_on_latlon(u_wind, latitude, longitude) + _, dv_dx = gradient_on_latlon(v_wind, latitude, longitude) + return dv_dx - du_dy + + +def horizontal_advection(scalar, u_wind, v_wind, latitude, longitude) -> np.ndarray: + """Compute horizontal advection, -(u dS/dx + v dS/dy).""" + + dscalar_dy, dscalar_dx = gradient_on_latlon(scalar, latitude, longitude) + return -( + np.asarray(u_wind, dtype=float) * dscalar_dx + + np.asarray(v_wind, dtype=float) * dscalar_dy + ) diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/ensemble.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/ensemble.py new file mode 100644 index 0000000..3f3004f --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/ensemble.py @@ -0,0 +1,44 @@ +"""Synthetic ensemble utilities for reviewer-safe examples.""" + +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def make_synthetic_nino_ensemble( + *, + n_models: int = 20, + n_leads: int = 24, + seed: int = 42, +) -> pd.DataFrame: + """Create a deterministic synthetic Nino 3.4 ensemble plume table.""" + + if n_models <= 0 or n_leads <= 0: + raise ValueError("n_models and n_leads must be positive") + + rng = np.random.default_rng(seed) + lead = np.arange(1, n_leads + 1) + base = np.cos(np.linspace(0.0, np.pi, n_leads)) * 1.5 + values: dict[str, np.ndarray] = {} + for idx in range(n_models): + bias = rng.uniform(-0.3, 0.3) + noise = rng.normal(0.0, 0.1 + 0.05 * lead) + values[f"model_{idx + 1:02d}"] = base + bias + noise + + df = pd.DataFrame(values, index=lead) + df.index.name = "lead_month" + return df + + +def ensemble_summary(df: pd.DataFrame) -> pd.DataFrame: + """Summarize an ensemble by lead month.""" + + summary = pd.DataFrame(index=df.index) + summary["mean"] = df.mean(axis=1) + summary["spread"] = df.std(axis=1) + summary["p10"] = df.quantile(0.10, axis=1) + summary["p90"] = df.quantile(0.90, axis=1) + summary["warm_probability"] = (df >= 0.5).mean(axis=1) + summary["cold_probability"] = (df <= -0.5).mean(axis=1) + return summary diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/features.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/features.py new file mode 100644 index 0000000..b4d3f19 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/features.py @@ -0,0 +1,120 @@ +"""Feature extraction and deterministic baseline models for weather fields.""" + +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def latitude_weights(latitude) -> np.ndarray: + """Return non-negative cosine latitude weights.""" + + weights = np.cos(np.deg2rad(np.asarray(latitude, dtype=float))) + return np.clip(weights, 0.0, None) + + +def area_mean(field, latitude) -> np.ndarray: + """Area-weight a field over its final two latitude/longitude dimensions.""" + + values = np.asarray(field, dtype=float) + weights = latitude_weights(latitude) + if values.shape[-2] != weights.size: + raise ValueError("latitude length must match the second-to-last field dimension") + reshape = (1,) * (values.ndim - 2) + (weights.size, 1) + weights_2d = weights.reshape(reshape) + numerator = np.nansum(values * weights_2d, axis=(-2, -1)) + denominator = np.nansum(np.isfinite(values) * weights_2d, axis=(-2, -1)) + return numerator / denominator + + +def build_forecast_frame( + time_index, + z500, + t850, + t2m, + *, + lead_steps: int = 24, +) -> pd.DataFrame: + """Build a simple time-ordered forecast table from region-mean features.""" + + if lead_steps <= 0: + raise ValueError("lead_steps must be positive") + df = pd.DataFrame( + { + "z500": np.asarray(z500, dtype=float), + "t850": np.asarray(t850, dtype=float), + "t2m_current": np.asarray(t2m, dtype=float), + }, + index=pd.Index(time_index, name="time"), + ) + df["target_t2m_next"] = df["t2m_current"].shift(-lead_steps) + return df.dropna() + + +def ridge_regression_fit_predict( + features, + target, + *, + train_fraction: float = 0.8, + alpha: float = 1.0, +) -> dict[str, np.ndarray | float]: + """Fit a deterministic ridge baseline with time-ordered train/test split.""" + + x = np.asarray(features, dtype=float) + y = np.asarray(target, dtype=float) + if x.ndim != 2: + raise ValueError("features must be a two-dimensional array") + if y.ndim != 1 or y.size != x.shape[0]: + raise ValueError("target must be one-dimensional and aligned with features") + if not 0.0 < train_fraction < 1.0: + raise ValueError("train_fraction must be between 0 and 1") + + split = int(x.shape[0] * train_fraction) + if split <= 0 or split >= x.shape[0]: + raise ValueError("train_fraction leaves an empty train or test partition") + + x_train, x_test = x[:split], x[split:] + y_train, y_test = y[:split], y[split:] + + mean = x_train.mean(axis=0) + scale = x_train.std(axis=0) + scale = np.where(scale == 0.0, 1.0, scale) + x_train_scaled = (x_train - mean) / scale + x_test_scaled = (x_test - mean) / scale + + design = np.column_stack([np.ones(x_train_scaled.shape[0]), x_train_scaled]) + penalty = np.eye(design.shape[1]) * alpha + penalty[0, 0] = 0.0 + coef = np.linalg.solve(design.T @ design + penalty, design.T @ y_train) + + test_design = np.column_stack([np.ones(x_test_scaled.shape[0]), x_test_scaled]) + y_pred = test_design @ coef + return { + "coefficients": coef, + "feature_mean": mean, + "feature_scale": scale, + "y_test": y_test, + "y_pred": y_pred, + "split_index": float(split), + } + + +def regression_metrics(y_true, y_pred) -> dict[str, float]: + """Return RMSE, MAE, bias, and Pearson correlation.""" + + true = np.asarray(y_true, dtype=float) + pred = np.asarray(y_pred, dtype=float) + if true.shape != pred.shape: + raise ValueError("y_true and y_pred must have matching shapes") + + err = pred - true + if true.size < 2 or np.allclose(true.std(), 0.0) or np.allclose(pred.std(), 0.0): + corr = float("nan") + else: + corr = float(np.corrcoef(true, pred)[0, 1]) + return { + "rmse": float(np.sqrt(np.mean(err**2))), + "mae": float(np.mean(np.abs(err))), + "bias": float(np.mean(err)), + "correlation": corr, + } diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/synthetic.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/synthetic.py new file mode 100644 index 0000000..2920950 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/synthetic.py @@ -0,0 +1,41 @@ +"""Small deterministic synthetic fields for examples and tests.""" + +from __future__ import annotations + +import numpy as np +import xarray as xr + + +def make_synthetic_weather_dataset() -> xr.Dataset: + """Create a tiny ERA5-like dataset with no real climate data.""" + + lat = np.linspace(30.0, 45.0, 6) + lon = np.linspace(105.0, 125.0, 8) + lon2d, lat2d = np.meshgrid(lon, lat) + + t2m = 273.15 + 2.0 + 0.12 * (lon2d - 105.0) - 0.35 * (lat2d - 30.0) + t850 = 273.15 - 4.0 - 0.25 * (lat2d - 30.0) + z500 = 5_650.0 + 12.0 * (lat2d - 37.5) - 5.0 * (lon2d - 115.0) + u850 = 8.0 + 0.05 * (lat2d - 37.5) + v850 = -2.0 + 0.08 * (lon2d - 115.0) + relative_humidity = 65.0 + 10.0 * np.sin(np.deg2rad(lon2d)) + + return xr.Dataset( + data_vars={ + "t2m": (("latitude", "longitude"), t2m), + "temperature": (("pressure_level", "latitude", "longitude"), np.stack([z500 * 0 + t850])), + "geopotential": (("pressure_level", "latitude", "longitude"), np.stack([z500 * 9.80665])), + "u": (("pressure_level", "latitude", "longitude"), np.stack([u850])), + "v": (("pressure_level", "latitude", "longitude"), np.stack([v850])), + "relative_humidity": (("pressure_level", "latitude", "longitude"), np.stack([relative_humidity])), + }, + coords={ + "pressure_level": [850], + "latitude": lat, + "longitude": lon, + }, + attrs={ + "source": "synthetic", + "note": "Deterministic toy data for documentation and tests only.", + }, + ) diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/thermodynamics.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/thermodynamics.py new file mode 100644 index 0000000..bf79beb --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/thermodynamics.py @@ -0,0 +1,62 @@ +"""Thermodynamic calculations used by the weather diagnostics mini-lab.""" + +from __future__ import annotations + +import numpy as np + + +def _as_float_array(values) -> np.ndarray: + return np.asarray(values, dtype=float) + + +def _rh_to_ratio(relative_humidity) -> np.ndarray: + rh = _as_float_array(relative_humidity) + ratio = np.where(rh > 1.5, rh / 100.0, rh) + return np.clip(ratio, 1e-6, 1.0) + + +def magnus_dewpoint_celsius( + temperature_celsius, + relative_humidity, + *, + a: float = 17.625, + b: float = 243.04, +) -> np.ndarray: + """Estimate dewpoint temperature from temperature and relative humidity. + + Parameters + ---------- + temperature_celsius: + Air temperature in degrees Celsius. + relative_humidity: + Relative humidity as either 0-1 ratio or 0-100 percent. + a, b: + Magnus constants. The defaults are common over-water values. + """ + + temp = _as_float_array(temperature_celsius) + rh_ratio = _rh_to_ratio(relative_humidity) + gamma = np.log(rh_ratio) + (a * temp) / (b + temp) + return (b * gamma) / (a - gamma) + + +def relative_humidity_from_dewpoint( + temperature_celsius, + dewpoint_celsius, + *, + a: float = 17.625, + b: float = 243.04, +) -> np.ndarray: + """Recover relative humidity ratio from temperature and dewpoint.""" + + temp = _as_float_array(temperature_celsius) + dewpoint = _as_float_array(dewpoint_celsius) + saturation = np.exp((a * temp) / (b + temp)) + vapor = np.exp((a * dewpoint) / (b + dewpoint)) + return np.clip(vapor / saturation, 0.0, 1.0) + + +def kelvin_to_celsius(temperature_kelvin) -> np.ndarray: + """Convert Kelvin values to degrees Celsius.""" + + return _as_float_array(temperature_kelvin) - 273.15 diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py b/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py new file mode 100644 index 0000000..2cf7381 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py @@ -0,0 +1,30 @@ +import xarray as xr + +from python_weather_diagnostics_toolkit.aliases import get_data_array, standardize_coordinates +from python_weather_diagnostics_toolkit.synthetic import make_synthetic_weather_dataset + + +def test_standardize_coordinates_renames_common_aliases(): + ds = xr.Dataset( + data_vars={"t2m": (("valid_time", "lat", "lon"), [[[1.0]]])}, + coords={"valid_time": [0], "lat": [40.0], "lon": [120.0]}, + ) + + out = standardize_coordinates(ds) + + assert {"time", "latitude", "longitude"}.issubset(out.coords) + + +def test_get_data_array_accepts_known_variable_aliases(): + ds = xr.Dataset(data_vars={"2m_temperature": (("x",), [273.15])}) + + da = get_data_array(ds, "t2m") + + assert float(da.values[0]) == 273.15 + + +def test_synthetic_dataset_is_tiny_and_labeled_synthetic(): + ds = make_synthetic_weather_dataset() + + assert ds.attrs["source"] == "synthetic" + assert ds.t2m.shape == (6, 8) diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py b/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py new file mode 100644 index 0000000..21e39ca --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py @@ -0,0 +1,26 @@ +import numpy as np + +from python_weather_diagnostics_toolkit.dynamics import horizontal_advection, relative_vorticity + + +def test_constant_flow_has_zero_relative_vorticity(): + lat = np.linspace(30.0, 35.0, 5) + lon = np.linspace(100.0, 105.0, 6) + u = np.ones((lat.size, lon.size)) * 5.0 + v = np.ones_like(u) * -2.0 + + vort = relative_vorticity(u, v, lat, lon) + + np.testing.assert_allclose(vort, 0.0, atol=1e-15) + + +def test_eastward_flow_advects_eastward_increasing_scalar_negatively(): + lat = np.linspace(30.0, 35.0, 5) + lon = np.linspace(100.0, 105.0, 6) + scalar = np.tile(lon, (lat.size, 1)) + u = np.ones_like(scalar) * 10.0 + v = np.zeros_like(scalar) + + adv = horizontal_advection(scalar, u, v, lat, lon) + + assert np.all(adv < 0.0) diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_features.py b/projects/python-weather-diagnostics-toolkit/tests/test_features.py new file mode 100644 index 0000000..1238554 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_features.py @@ -0,0 +1,27 @@ +import numpy as np + +from python_weather_diagnostics_toolkit.features import ( + area_mean, + regression_metrics, + ridge_regression_fit_predict, +) + + +def test_area_mean_preserves_constant_field(): + lat = np.array([30.0, 35.0, 40.0]) + field = np.ones((2, lat.size, 4)) * 7.0 + + mean = area_mean(field, lat) + + np.testing.assert_allclose(mean, np.array([7.0, 7.0])) + + +def test_ridge_baseline_returns_predictions_and_metrics(): + x = np.column_stack([np.arange(20.0), np.arange(20.0) * 0.5]) + y = 2.0 + x[:, 0] * 0.3 - x[:, 1] * 0.1 + + result = ridge_regression_fit_predict(x, y, train_fraction=0.75, alpha=0.01) + metrics = regression_metrics(result["y_test"], result["y_pred"]) + + assert result["y_pred"].shape == result["y_test"].shape + assert metrics["rmse"] < 0.05 diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_thermodynamics.py b/projects/python-weather-diagnostics-toolkit/tests/test_thermodynamics.py new file mode 100644 index 0000000..fb3b303 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_thermodynamics.py @@ -0,0 +1,23 @@ +import numpy as np + +from python_weather_diagnostics_toolkit.thermodynamics import ( + magnus_dewpoint_celsius, + relative_humidity_from_dewpoint, +) + + +def test_magnus_dewpoint_roundtrip_relative_humidity(): + temperature = np.array([20.0, 25.0, 30.0]) + rh_percent = np.array([50.0, 65.0, 80.0]) + + dewpoint = magnus_dewpoint_celsius(temperature, rh_percent) + rh_back = relative_humidity_from_dewpoint(temperature, dewpoint) + + np.testing.assert_allclose(rh_back, rh_percent / 100.0, atol=1e-10) + + +def test_dewpoint_never_exceeds_temperature_for_unsaturated_air(): + temperature = np.array([10.0, 15.0, 20.0]) + dewpoint = magnus_dewpoint_celsius(temperature, 70.0) + + assert np.all(dewpoint <= temperature)