From 290e6a4f8a1cd8435b18b0e709bc97851aacf7e8 Mon Sep 17 00:00:00 2001 From: stacknil Date: Sat, 23 May 2026 18:19:05 +0800 Subject: [PATCH] feat(weather): add precipitation and climate diagnostics --- .../python-weather-diagnostics-toolkit-ci.yml | 2 + .../README.md | 26 ++++- .../SANITIZATION_REPORT.md | 4 + .../configs/example.yaml | 4 + .../docs/calculation-methods.md | 57 +++++++++++ .../docs/climate-statistical-diagnostics.md | 76 +++++++++++++++ .../docs/diagnostic-analysis.md | 57 +++++++++++ .../docs/methodology.md | 34 ++++++- .../docs/reviewer-path.md | 17 +++- .../docs/source-to-public-mapping.md | 7 ++ .../docs/station-precipitation-workflows.md | 96 +++++++++++++++++++ .../examples/sample_metadata.json | 2 + .../synthetic-weather-diagnostics-report.md | 32 +++++++ .../scripts/run_climate_statistics.py | 57 +++++++++++ .../scripts/run_precipitation_workflow.py | 58 +++++++++++ .../__init__.py | 29 +++++- .../aliases.py | 1 + .../climate.py | 70 ++++++++++++++ .../dynamics.py | 19 ++++ .../interpolation.py | 80 ++++++++++++++++ .../precipitation.py | 80 ++++++++++++++++ .../tests/test_aliases_and_synthetic.py | 8 ++ .../tests/test_climate.py | 60 ++++++++++++ .../tests/test_dynamics.py | 13 +++ .../tests/test_interpolation.py | 49 ++++++++++ .../tests/test_precipitation.py | 55 +++++++++++ 26 files changed, 983 insertions(+), 10 deletions(-) create mode 100644 projects/python-weather-diagnostics-toolkit/docs/climate-statistical-diagnostics.md create mode 100644 projects/python-weather-diagnostics-toolkit/docs/station-precipitation-workflows.md create mode 100644 projects/python-weather-diagnostics-toolkit/scripts/run_climate_statistics.py create mode 100644 projects/python-weather-diagnostics-toolkit/scripts/run_precipitation_workflow.py create mode 100644 projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/climate.py create mode 100644 projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/interpolation.py create mode 100644 projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/precipitation.py create mode 100644 projects/python-weather-diagnostics-toolkit/tests/test_climate.py create mode 100644 projects/python-weather-diagnostics-toolkit/tests/test_interpolation.py create mode 100644 projects/python-weather-diagnostics-toolkit/tests/test_precipitation.py diff --git a/.github/workflows/python-weather-diagnostics-toolkit-ci.yml b/.github/workflows/python-weather-diagnostics-toolkit-ci.yml index b8f204f..62e60e5 100644 --- a/.github/workflows/python-weather-diagnostics-toolkit-ci.yml +++ b/.github/workflows/python-weather-diagnostics-toolkit-ci.yml @@ -53,4 +53,6 @@ jobs: run: | python scripts/run_thermodynamic_check.py --help python scripts/run_dynamics_summary.py --help + python scripts/run_precipitation_workflow.py --help + python scripts/run_climate_statistics.py --help python scripts/run_synthetic_ensemble.py --help diff --git a/projects/python-weather-diagnostics-toolkit/README.md b/projects/python-weather-diagnostics-toolkit/README.md index 26607f6..a6a7837 100644 --- a/projects/python-weather-diagnostics-toolkit/README.md +++ b/projects/python-weather-diagnostics-toolkit/README.md @@ -20,7 +20,9 @@ artifacts. It focuses on: - 2 m temperature, 10 m wind, 500 hPa height, and 850 hPa wind/temperature fields - Magnus-formula dewpoint diagnostics and round-trip humidity checks - geopotential-height conversion -- relative-vorticity and horizontal-advection diagnostics +- relative-vorticity, horizontal-advection, and moisture-flux diagnostics +- station-to-grid interpolation and precipitation accumulation conversion +- anomaly, composite, and grid-point correlation helpers - cosine-latitude regional means - a deterministic time-ordered ridge-regression baseline for 24-hour temperature prediction - synthetic ensemble summaries for Nino-style forecast-plume interpretation @@ -34,6 +36,8 @@ python-weather-diagnostics-toolkit/ | +-- data-policy.md | +-- calculation-methods.md | +-- diagnostic-analysis.md +| +-- station-precipitation-workflows.md +| +-- climate-statistical-diagnostics.md | +-- methodology.md | +-- reproducibility.md | +-- reviewer-path.md @@ -43,6 +47,8 @@ python-weather-diagnostics-toolkit/ | +-- synthetic-weather-diagnostics-report.md +-- scripts/ | +-- run_dynamics_summary.py +| +-- run_precipitation_workflow.py +| +-- run_climate_statistics.py | +-- run_synthetic_ensemble.py | +-- run_thermodynamic_check.py +-- src/python_weather_diagnostics_toolkit/ @@ -78,6 +84,8 @@ Inspect the public CLI surfaces: ```bash python scripts/run_thermodynamic_check.py --help python scripts/run_dynamics_summary.py --help +python scripts/run_precipitation_workflow.py --help +python scripts/run_climate_statistics.py --help python scripts/run_synthetic_ensemble.py --help ``` @@ -104,11 +112,20 @@ Dynamic layer: - estimates latitude/longitude grid spacing from spherical Earth geometry - computes relative vorticity as `dv/dx - du/dy` - computes horizontal scalar advection as `-(u dS/dx + v dS/dy)` +- computes moisture flux divergence as `d(q u)/dx + d(q v)/dy` - keeps finite-difference assumptions explicit for reviewer inspection +Station and precipitation layer: + +- replaces sentinel-coded missing values with `NaN` +- interpolates station values to a target grid with inverse-distance weighting +- converts accumulated precipitation into per-step amounts and rates +- summarizes event totals and threshold exceedance masks + Statistical layer: - reduces gridded fields to cosine-latitude area means +- computes anomalies, standardized anomalies, composites, and grid-point correlations - constructs time-ordered forecast tables from regional features - fits a deterministic ridge-regression baseline without random shuffling - reports RMSE, MAE, bias, and correlation as workflow diagnostics @@ -150,8 +167,9 @@ For real analysis, users provide their own local ERA5-style NetCDF files through `configs/example.yaml`. The toolkit expects common variables such as: - single-level fields: `t2m`, `d2m`, `u10`, `v10`, `tp`, or their long ERA5 names -- pressure-level fields: `t`, `u`, `v`, `z`, `r`, `w`, `vo`, or their long ERA5 names +- pressure-level fields: `t`, `u`, `v`, `z`, `r`, `q`, `w`, `vo`, or their long ERA5 names - coordinates: `time` or `valid_time`, `latitude`, `longitude`, and optionally `pressure_level` +- optional station tables supplied locally with longitude, latitude, and value columns ## Generated Outputs @@ -188,7 +206,9 @@ The more detailed technical route is: 1. [`docs/calculation-methods.md`](docs/calculation-methods.md) 2. [`docs/diagnostic-analysis.md`](docs/diagnostic-analysis.md) -3. [`docs/source-to-public-mapping.md`](docs/source-to-public-mapping.md) +3. [`docs/station-precipitation-workflows.md`](docs/station-precipitation-workflows.md) +4. [`docs/climate-statistical-diagnostics.md`](docs/climate-statistical-diagnostics.md) +5. [`docs/source-to-public-mapping.md`](docs/source-to-public-mapping.md) ## Privacy-Safe Scope diff --git a/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md b/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md index 8432e31..42aad17 100644 --- a/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md +++ b/projects/python-weather-diagnostics-toolkit/SANITIZATION_REPORT.md @@ -51,6 +51,10 @@ The public project preserves the reusable calculation ideas: - geopotential-height conversion - relative-vorticity calculation - horizontal temperature-advection diagnostics +- moisture flux divergence diagnostics +- station missing-value handling and lightweight station-to-grid interpolation +- accumulated precipitation conversion and event-total summaries +- anomaly, standardized-anomaly, composite, and correlation-field diagnostics - cosine-latitude regional means - time-ordered ridge-regression baseline evaluation - synthetic ensemble summary mechanics diff --git a/projects/python-weather-diagnostics-toolkit/configs/example.yaml b/projects/python-weather-diagnostics-toolkit/configs/example.yaml index 318759e..b14c518 100644 --- a/projects/python-weather-diagnostics-toolkit/configs/example.yaml +++ b/projects/python-weather-diagnostics-toolkit/configs/example.yaml @@ -22,6 +22,10 @@ diagnostics: geopotential_height_500hpa: true vorticity_500hpa: true temperature_advection_850hpa: true + moisture_flux_divergence_850hpa: true + station_interpolation: false + precipitation_event_total: false + climate_statistics: false ridge_temperature_baseline: true baseline_model: diff --git a/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md b/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md index 9443c5f..71d3637 100644 --- a/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md +++ b/projects/python-weather-diagnostics-toolkit/docs/calculation-methods.md @@ -135,6 +135,21 @@ warming tendency. In a full thermodynamic budget, this is only one term. The public mini-lab intentionally keeps the default implementation to horizontal advection so that tests remain small and dependency-light. +## Moisture Flux Divergence + +For lower-tropospheric precipitation diagnostics, the toolkit includes a +horizontal specific-humidity flux divergence: + +```text +div(qV) = d(q u)/dx + d(q v)/dy +``` + +where `q` is specific humidity, `u` is zonal wind, and `v` is meridional wind. +Negative values are often read as moisture-flux convergence under the chosen +coordinate and unit assumptions. This diagnostic should be interpreted with +precipitation, vertical motion, and synoptic context rather than as a complete +rainfall budget. + ## Area-Weighted Regional Mean Regional features use cosine-latitude weighting: @@ -230,3 +245,45 @@ Example synthetic summary rows: These values are synthetic. They are useful for verifying table generation and reviewer interpretation, not for climate diagnosis. + +## Station And Precipitation Utilities + +The precipitation helpers cover common data-preparation steps: + +```text +missing sentinel -> NaN +accumulated precipitation -> per-step amount +per-step amount -> mm/day-equivalent rate +event total -> sum over finite event samples +threshold exceedance -> finite value >= configured threshold +``` + +Accumulated precipitation is required to be non-decreasing along the selected +lead axis. A decreasing finite sequence raises an error because it usually +indicates mixed forecast cycles, an incorrect lead dimension, or an unhandled +product reset. + +Station-to-grid examples use inverse-distance weighting: + +```text +weight = 1 / distance**power +grid_value = sum(weight * station_value) / sum(weight) +``` + +This is a transparent interpolation baseline, not a claim that IDW is optimal +for every terrain, network density, or precipitation regime. + +## Climate Statistics + +Climate-statistics helpers include anomalies, standardized anomalies, +composites, and grid-point correlations: + +```text +anomaly = value - climatology_mean +standardized = anomaly / climatology_std +composite = mean(field[event_mask]) +r = cov(index, field_point) / (std(index) * std(field_point)) +``` + +Zero-spread baselines, too-small samples, and zero-variance correlation points +return `NaN` so review surfaces show where the statistic is undefined. diff --git a/projects/python-weather-diagnostics-toolkit/docs/climate-statistical-diagnostics.md b/projects/python-weather-diagnostics-toolkit/docs/climate-statistical-diagnostics.md new file mode 100644 index 0000000..08162bd --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/climate-statistical-diagnostics.md @@ -0,0 +1,76 @@ +# Climate Statistical Diagnostics + +The source materials included basic climate-statistics workflows: anomalies, +correlation, regression, composites, and simple machine-learning baselines. The +public toolkit now exposes a small set of deterministic helpers for these +tasks, while keeping datasets and claims outside the repository. + +## Anomalies + +An anomaly is a departure from a reference baseline: + +```text +anomaly = value - climatology_mean +``` + +A standardized anomaly divides that departure by a reference spread: + +```text +standardized_anomaly = (value - climatology_mean) / climatology_std +``` + +If the baseline spread is zero, the public implementation returns `NaN` rather +than an infinite value. That makes degenerate baselines visible during review. + +## Composite Means + +Composite analysis averages samples selected by an event mask: + +```text +composite = mean(field[event_mask]) +``` + +The event mask should be defined before looking at the composite field. Useful +examples include warm-event days, heavy-precipitation days, or high-index +periods. Public examples should document: + +- how the event mask was defined +- the number of selected samples +- whether the composite is a mean field, difference field, or anomaly field +- whether statistical significance was assessed separately + +## Correlation Fields + +The helper `pearson_correlation_field` computes the Pearson correlation between +a one-dimensional index and every grid point in a field: + +```text +r = cov(index, field_point) / (std(index) * std(field_point)) +``` + +The implementation handles missing values pairwise and returns `NaN` for grid +points with too few finite pairs or zero variance. + +## Regression And Prediction Boundaries + +The toolkit already includes a ridge-regression baseline for time-ordered +temperature prediction. The climate-statistics helpers are meant to support +feature exploration before modeling: + +```text +anomaly map -> regional feature -> time-ordered split -> transparent baseline +``` + +They do not establish forecast skill on their own. A public result should +include an explicit validation period, comparison baseline, sampling design, +and error metric before making predictive claims. + +## Review Checklist + +For any climate-statistics result, verify: + +- the baseline period is stated +- missing-value handling is documented +- the sample axis is time ordered when used for prediction +- event definitions are chosen before composite interpretation +- correlation or regression output is not described as causation diff --git a/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md b/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md index 9d1f0f0..9a2de6a 100644 --- a/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md +++ b/projects/python-weather-diagnostics-toolkit/docs/diagnostic-analysis.md @@ -109,6 +109,28 @@ fluxes, and analysis increments. The public project keeps the default calculation narrow so it remains reproducible and testable without heavy external data. +## Moisture Transport And Heavy Precipitation + +For heavy-precipitation case studies, the toolkit supports horizontal moisture +flux divergence: + +```text +d(q u)/dx + d(q v)/dy +``` + +Interpretation pattern: + +```text +moisture convergence + sustained lift + favorable circulation -> plausible rainfall support +``` + +Care points: + +- moisture convergence is not rainfall by itself +- vertically integrated transport may be more appropriate than one pressure level +- precipitation totals should be checked against observation or reanalysis products +- terrain, convection, and microphysics are outside this compact diagnostic + ## Regional Temperature Baseline The baseline model is intentionally simple: @@ -159,6 +181,41 @@ In the deterministic synthetic example, the ensemble starts warm, becomes mixed near lead month 12, and shifts cold by lead month 24. This is an example of interpreting an artificial plume, not a statement about the real ocean. +## Station, Precipitation, And Extremes + +Station observations and gridded precipitation require an explicit quality +control chain before interpretation: + +```text +missing sentinel -> finite-value mask -> interpolation or event total -> threshold check +``` + +Threshold exceedance should be described according to the threshold source: + +- absolute threshold: value meets a fixed user-supplied amount +- percentile threshold: value exceeds a local historical percentile +- standardized anomaly: value exceeds a baseline-relative spread multiple + +The public toolkit provides mechanics for these checks. It does not define +official warnings or redistribute station records. + +## Climate Statistics + +Anomaly, composite, and correlation outputs are exploratory diagnostics. + +Interpretation pattern: + +```text +well-defined baseline + transparent event mask + sufficient samples -> interpretable statistic +``` + +Care points: + +- correlation is not causation +- composite masks should be selected before interpreting the resulting field +- undefined baselines or zero-variance points should remain visible as `NaN` +- prediction workflows should keep time-ordered validation + ## Reviewer Questions Useful reviewer questions: diff --git a/projects/python-weather-diagnostics-toolkit/docs/methodology.md b/projects/python-weather-diagnostics-toolkit/docs/methodology.md index 651555f..d7e0d60 100644 --- a/projects/python-weather-diagnostics-toolkit/docs/methodology.md +++ b/projects/python-weather-diagnostics-toolkit/docs/methodology.md @@ -68,7 +68,35 @@ table can include: - current 2 m temperature - future 2 m temperature target shifted by a configured lead -## 5. Baseline Prediction +## 5. Station And Precipitation Preparation + +Station and precipitation workflows start with quality control: + +```text +sentinel-coded missing values -> NaN +station observations -> finite station rows -> IDW grid +forecast accumulations -> per-step precipitation -> rate or event total +``` + +The public implementation uses small NumPy helpers rather than provider-specific +download code. This keeps the method reviewable while leaving data access, +licensing, and provenance to the user. + +## 6. Climate Statistics + +Climate diagnostics use explicit baselines and event masks: + +```text +anomaly = value - climatology_mean +standardized anomaly = anomaly / climatology_std +composite = mean(selected event samples) +correlation field = corr(index, grid point) +``` + +Undefined or under-sampled statistics return `NaN` instead of a misleading +number. + +## 7. Baseline Prediction The included baseline is a transparent ridge regression: @@ -83,7 +111,7 @@ the result reproducible. Metrics include RMSE, MAE, bias, and correlation. The baseline is included for workflow demonstration only. It is not a claim of forecast skill. -## 6. Synthetic Ensemble Summary +## 8. Synthetic Ensemble Summary The synthetic Nino-style ensemble utility creates deterministic plume data with a fixed random seed. It demonstrates: @@ -96,7 +124,7 @@ a fixed random seed. It demonstrates: The generated values are synthetic and should be read only as an example of summary mechanics. -## 7. Interpretation Boundaries +## 9. Interpretation Boundaries A public interpretation should say what was computed and what the diagnostic suggests, while avoiding unsupported claims. For example: diff --git a/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md b/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md index 9d74c48..ce64f42 100644 --- a/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md +++ b/projects/python-weather-diagnostics-toolkit/docs/reviewer-path.md @@ -20,6 +20,8 @@ Inspect: - [`docs/methodology.md`](methodology.md) - [`docs/calculation-methods.md`](calculation-methods.md) - [`docs/diagnostic-analysis.md`](diagnostic-analysis.md) +- [`docs/station-precipitation-workflows.md`](station-precipitation-workflows.md) +- [`docs/climate-statistical-diagnostics.md`](climate-statistical-diagnostics.md) - [`docs/data-policy.md`](data-policy.md) - [`examples/synthetic-weather-diagnostics-report.md`](../examples/synthetic-weather-diagnostics-report.md) - [`examples/sample_metadata.json`](../examples/sample_metadata.json) @@ -32,6 +34,8 @@ Questions to answer: - Are field aliases separated from scientific calculations? - Are dewpoint, vorticity, advection, regional means, and ensemble summaries described with equations or explicit numerical assumptions? +- Are station interpolation, precipitation accumulation conversion, and + climate-statistics helpers documented with missing-data behavior? - Are synthetic examples clearly labeled as synthetic? - Are forecast-skill claims avoided unless real validation data are supplied? @@ -45,6 +49,8 @@ python -m pytest python -m compileall src scripts python scripts/run_thermodynamic_check.py --help python scripts/run_dynamics_summary.py --help +python scripts/run_precipitation_workflow.py --help +python scripts/run_climate_statistics.py --help python scripts/run_synthetic_ensemble.py --help ``` @@ -82,10 +88,15 @@ For a deeper review, read the project in this order: 2. `src/python_weather_diagnostics_toolkit/thermodynamics.py` and `docs/calculation-methods.md` for dewpoint formulas and round-trip checks. 3. `src/python_weather_diagnostics_toolkit/dynamics.py` for grid spacing, - vorticity, and advection. -4. `src/python_weather_diagnostics_toolkit/features.py` for cosine-latitude + vorticity, advection, and moisture flux divergence. +4. `src/python_weather_diagnostics_toolkit/precipitation.py` and + `src/python_weather_diagnostics_toolkit/interpolation.py` for station and + precipitation preparation helpers. +5. `src/python_weather_diagnostics_toolkit/climate.py` for anomaly, + composite, and correlation helpers. +6. `src/python_weather_diagnostics_toolkit/features.py` for cosine-latitude regional means and time-ordered baseline modeling. -5. `src/python_weather_diagnostics_toolkit/ensemble.py` for deterministic +7. `src/python_weather_diagnostics_toolkit/ensemble.py` for deterministic synthetic plume summaries. This route should make the project reviewable as code, not just as a diff --git a/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md b/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md index 32a9c4f..f77a114 100644 --- a/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md +++ b/projects/python-weather-diagnostics-toolkit/docs/source-to-public-mapping.md @@ -19,6 +19,10 @@ not a submitted report archive. | dewpoint verification | `thermodynamics.py`, `run_thermodynamic_check.py`, tests | formula isolated and tested with synthetic values | | 500 hPa height and vorticity maps | `dynamics.py`, `diagnostic-analysis.md`, tests | map-specific code converted to numerical fields | | 850 hPa temperature advection | `dynamics.py`, `run_dynamics_summary.py` | calculation made dependency-light and synthetic-testable | +| moisture transport diagnostics | `dynamics.py`, `diagnostic-analysis.md` | water-vapor process reframed as flux-divergence calculation | +| station observation cleaning and interpolation | `precipitation.py`, `interpolation.py`, `station-precipitation-workflows.md` | sentinel handling and IDW interpolation made synthetic-testable | +| accumulated precipitation conversion | `precipitation.py`, `station-precipitation-workflows.md` | forecast accumulations converted without redistributing products | +| anomaly, composite, and correlation exercises | `climate.py`, `climate-statistical-diagnostics.md` | statistical methods separated from local datasets | | regional feature construction | `features.py` | area weighting and target shifting made explicit | | simple temperature prediction baseline | `features.py`, `diagnostic-analysis.md` | baseline framed as workflow sanity check, not forecast skill | | ensemble plume exercises | `ensemble.py`, `run_synthetic_ensemble.py` | real or teaching data replaced by deterministic synthetic data | @@ -46,6 +50,9 @@ Preserved: - humidity and dewpoint diagnostics - geopotential-height conversion - relative-vorticity and advection calculations +- moisture flux divergence +- station interpolation and precipitation accumulation conversion +- anomaly, composite, and correlation helpers - regional feature engineering - time-ordered baseline modeling - ensemble summary interpretation diff --git a/projects/python-weather-diagnostics-toolkit/docs/station-precipitation-workflows.md b/projects/python-weather-diagnostics-toolkit/docs/station-precipitation-workflows.md new file mode 100644 index 0000000..e6e0d3f --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/docs/station-precipitation-workflows.md @@ -0,0 +1,96 @@ +# Station And Precipitation Workflows + +The source materials included station observations, gridded reanalysis, and +forecast-style accumulated precipitation examples. The public toolkit keeps the +reusable methods but does not redistribute station tables, NetCDF files, plots, +or report artifacts. + +## Station Quality Control + +Station tables often encode missing observations with a numeric sentinel. The +public helper `mark_missing_sentinel` replaces that code with `NaN` before any +interpolation, event total, or verification step. + +Recommended order: + +```text +load local station table +-> normalize column names outside this repo +-> replace missing sentinel with NaN +-> remove non-finite longitude/latitude/value rows +-> interpolate or summarize +``` + +This keeps missingness explicit and prevents sentinel values from becoming +false heavy-precipitation or temperature extremes. + +## Station-To-Grid Interpolation + +The toolkit includes inverse-distance weighting (IDW) for lightweight +station-to-grid examples: + +```text +weight = 1 / distance**power +grid_value = sum(weight * station_value) / sum(weight) +``` + +The implementation: + +- accepts 1D station longitude, latitude, and value arrays +- accepts either 1D target longitude/latitude axes or matching 2D grids +- ignores stations with non-finite coordinates or values +- preserves exact station-grid matches +- supports a maximum search distance and minimum-neighbor count + +IDW is intentionally simple. For formal spatial analysis, compare against +ordinary kriging, objective analysis, or domain-specific interpolation methods, +and document station density, terrain effects, and validation error. + +## Accumulated Precipitation + +Forecast products can store precipitation as an accumulation over lead time. +The public helper first differences a non-decreasing accumulated series: + +```text +increment[0] = accumulated[0] +increment[t] = accumulated[t] - accumulated[t - 1] +``` + +For a 6-hour step, a per-step amount can be expressed as a mm/day-equivalent +rate: + +```text +rate = increment * 24 / 6 +``` + +The helper raises an error when a finite accumulated sequence decreases. This +is a quality-control signal: the user may have mixed forecast cycles, used the +wrong lead dimension, or encountered a product reset that should be handled +before conversion. + +## Event Totals And Thresholds + +The public precipitation utilities also provide: + +- `event_total`: sum over an event axis while returning `NaN` for all-missing + grid points +- `threshold_exceedance`: mark finite values that meet or exceed a configured + threshold + +Thresholds are intentionally user-supplied. Public examples should explain +whether a threshold is absolute, percentile-based, or standardized-anomaly +based, and should avoid official warning language unless the source authority +and rule are documented. + +## What Is Not Included + +The public project does not include: + +- station observation tables +- provider forecast products +- gridded reanalysis files +- generated maps from local analysis +- private report text or templates + +The code is a reusable calculation scaffold. Users connect their own licensed +data locally. diff --git a/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json b/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json index 71e5ec0..ac9d78d 100644 --- a/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json +++ b/projects/python-weather-diagnostics-toolkit/examples/sample_metadata.json @@ -10,6 +10,8 @@ "thermodynamic dewpoint checks", "relative-vorticity diagnostics", "temperature-advection diagnostics", + "station interpolation and precipitation accumulation conversion", + "climate anomaly, composite, and correlation diagnostics", "time-ordered baseline evaluation", "synthetic ensemble summaries" ] diff --git a/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md b/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md index 01df24c..f33149d 100644 --- a/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md +++ b/projects/python-weather-diagnostics-toolkit/examples/synthetic-weather-diagnostics-report.md @@ -58,6 +58,38 @@ The baseline is useful as a workflow sanity check. It should not be described as operational forecast skill without independent validation, comparison baselines, and real-data provenance. +## Station And Precipitation Workflow + +The synthetic station workflow replaces a missing-value sentinel, interpolates +finite station values to a small grid, converts accumulated precipitation to +step rates, and counts threshold exceedance points. + +Command: + +```bash +python scripts/run_precipitation_workflow.py +``` + +Interpretation: +This checks the mechanics of a station-to-grid and accumulation-conversion +pipeline. It does not redistribute station observations, forecast products, or +official threshold definitions. + +## Climate Statistics + +The climate-statistics example computes anomalies, standardized anomalies, +event-mask composites, and grid-point correlations on synthetic arrays. + +Command: + +```bash +python scripts/run_climate_statistics.py +``` + +Interpretation: +The output demonstrates how to structure exploratory climate statistics. It +does not establish causation, event attribution, or forecast skill. + ## Synthetic Ensemble The Nino-style ensemble example summarizes deterministic synthetic plume data: diff --git a/projects/python-weather-diagnostics-toolkit/scripts/run_climate_statistics.py b/projects/python-weather-diagnostics-toolkit/scripts/run_climate_statistics.py new file mode 100644 index 0000000..2549635 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/scripts/run_climate_statistics.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Run a synthetic climate-statistics summary.""" + +from __future__ import annotations + +import argparse +import json + +import numpy as np + +from python_weather_diagnostics_toolkit.climate import ( + anomaly, + composite_mean, + pearson_correlation_field, + standardized_anomaly, +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--event-threshold", type=float, default=1.0) + return parser + + +def main() -> None: + args = build_parser().parse_args() + index = np.array([-1.0, -0.2, 0.4, 1.2, 1.6]) + baseline_mean = np.array([10.0, 10.0, 10.0]) + baseline_std = np.array([2.0, 2.5, 5.0]) + current = np.array([12.0, 8.0, 15.0]) + field = np.stack( + [ + np.array([[0.0, 4.0], [1.0, 3.0]]), + np.array([[1.0, 3.0], [2.0, 3.0]]), + np.array([[2.0, 2.0], [3.0, 3.0]]), + np.array([[3.0, 1.0], [4.0, 3.0]]), + np.array([[4.0, 0.0], [5.0, 3.0]]), + ] + ) + event_mask = index >= args.event_threshold + corr = pearson_correlation_field(index, field) + payload = { + "anomaly": anomaly(current, baseline_mean).round(3).tolist(), + "standardized_anomaly": standardized_anomaly( + current, + baseline_mean, + baseline_std, + ).round(3).tolist(), + "event_sample_count": int(event_mask.sum()), + "composite_mean_max": float(np.nanmax(composite_mean(field, event_mask))), + "max_abs_correlation": float(np.nanmax(np.abs(corr))), + } + print(json.dumps(payload, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/projects/python-weather-diagnostics-toolkit/scripts/run_precipitation_workflow.py b/projects/python-weather-diagnostics-toolkit/scripts/run_precipitation_workflow.py new file mode 100644 index 0000000..821d09e --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/scripts/run_precipitation_workflow.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Run a synthetic station and precipitation workflow summary.""" + +from __future__ import annotations + +import argparse +import json + +import numpy as np + +from python_weather_diagnostics_toolkit.interpolation import idw_interpolate_station_to_grid +from python_weather_diagnostics_toolkit.precipitation import ( + cumulative_to_rate, + event_total, + mark_missing_sentinel, + threshold_exceedance, +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--threshold-mm", type=float, default=50.0) + parser.add_argument("--step-hours", type=float, default=6.0) + return parser + + +def main() -> None: + args = build_parser().parse_args() + station_lon = np.array([100.0, 101.0, 102.0, 103.0]) + station_lat = np.array([30.0, 31.0, 30.5, 31.5]) + station_values = mark_missing_sentinel(np.array([12.0, -32766.0, 28.0, 34.0])) + grid = idw_interpolate_station_to_grid( + station_lon, + station_lat, + station_values, + np.array([100.0, 101.0, 102.0]), + np.array([30.0, 31.0]), + min_neighbors=2, + ) + accumulated = np.array( + [ + [[0.0, 4.0, 10.0], [0.0, 8.0, 15.0]], + [[1.0, 3.0, 9.0], [2.0, 6.0, 14.0]], + ] + ) + rate = cumulative_to_rate(accumulated, step_hours=args.step_hours, axis=-1) + total = event_total(rate, axis=-1) + payload = { + "finite_interpolated_grid_points": int(np.isfinite(grid).sum()), + "max_rate_mm_day": float(np.nanmax(rate)), + "max_event_total_mm_day_equivalent": float(np.nanmax(total)), + "threshold_grid_points": int(threshold_exceedance(total, threshold=args.threshold_mm).sum()), + } + print(json.dumps(payload, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py index c9bce48..37827b3 100644 --- a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/__init__.py @@ -1,22 +1,49 @@ """Public-safe weather diagnostics utilities for gridded atmospheric fields.""" from .aliases import get_data_array, standardize_coordinates -from .dynamics import geopotential_to_height, horizontal_advection, relative_vorticity +from .climate import anomaly, composite_mean, pearson_correlation_field, standardized_anomaly +from .dynamics import ( + geopotential_to_height, + horizontal_advection, + moisture_flux_divergence, + relative_vorticity, +) from .ensemble import ensemble_summary, make_synthetic_nino_ensemble from .features import area_mean, regression_metrics, ridge_regression_fit_predict +from .interpolation import idw_interpolate_station_to_grid +from .precipitation import ( + cumulative_to_increment, + cumulative_to_rate, + event_total, + increment_to_rate, + mark_missing_sentinel, + threshold_exceedance, +) from .thermodynamics import magnus_dewpoint_celsius, relative_humidity_from_dewpoint __all__ = [ + "anomaly", "area_mean", + "composite_mean", + "cumulative_to_increment", + "cumulative_to_rate", "ensemble_summary", + "event_total", "geopotential_to_height", "get_data_array", "horizontal_advection", + "idw_interpolate_station_to_grid", + "increment_to_rate", "magnus_dewpoint_celsius", "make_synthetic_nino_ensemble", + "mark_missing_sentinel", + "moisture_flux_divergence", + "pearson_correlation_field", "regression_metrics", "relative_humidity_from_dewpoint", "relative_vorticity", "ridge_regression_fit_predict", "standardize_coordinates", + "standardized_anomaly", + "threshold_exceedance", ] diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py index a9b6de5..d80aa89 100644 --- a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/aliases.py @@ -20,6 +20,7 @@ "v10": ("v10", "10m_v_component_of_wind", "10v"), "tp": ("tp", "total_precipitation"), "temperature": ("t", "temperature"), + "specific_humidity": ("q", "specific_humidity"), "relative_humidity": ("r", "relative_humidity"), "u": ("u", "u_component_of_wind"), "v": ("v", "v_component_of_wind"), diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/climate.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/climate.py new file mode 100644 index 0000000..1350e90 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/climate.py @@ -0,0 +1,70 @@ +"""Small climate-statistics helpers used by reviewer-safe examples.""" + +from __future__ import annotations + +import numpy as np + + +def anomaly(values, climatology) -> np.ndarray: + """Return departures from a provided climatological baseline.""" + + return np.asarray(values, dtype=float) - np.asarray(climatology, dtype=float) + + +def standardized_anomaly(values, climatology_mean, climatology_std) -> np.ndarray: + """Return standardized anomalies while masking zero-spread baselines.""" + + departures = anomaly(values, climatology_mean) + spread = np.asarray(climatology_std, dtype=float) + with np.errstate(divide="ignore", invalid="ignore"): + standardized = departures / spread + return np.where(spread > 0.0, standardized, np.nan) + + +def composite_mean(values, mask, *, axis: int = 0, min_count: int = 1) -> np.ndarray: + """Average samples selected by a boolean event mask.""" + + array = np.asarray(values, dtype=float) + selector = np.asarray(mask, dtype=bool) + if selector.ndim != 1: + raise ValueError("mask must be one-dimensional") + moved = np.moveaxis(array, axis, 0) + if moved.shape[0] != selector.size: + raise ValueError("mask length must match the selected sample axis") + if min_count <= 0: + raise ValueError("min_count must be positive") + + selected = moved[selector] + if selected.shape[0] < min_count: + return np.full(moved.shape[1:], np.nan, dtype=float) + return np.nanmean(selected, axis=0) + + +def pearson_correlation_field(index, field, *, min_count: int = 3) -> np.ndarray: + """Correlate a one-dimensional index with every grid point in a field.""" + + x = np.asarray(index, dtype=float) + y = np.asarray(field, dtype=float) + if x.ndim != 1: + raise ValueError("index must be one-dimensional") + if y.ndim < 1 or y.shape[0] != x.size: + raise ValueError("field first dimension must align with index") + if min_count < 2: + raise ValueError("min_count must be at least 2") + + y2 = y.reshape((x.size, -1)) + x2 = x[:, None] + valid = np.isfinite(x2) & np.isfinite(y2) + count = valid.sum(axis=0) + safe_count = np.where(count > 0, count, 1) + x_mean = np.sum(np.where(valid, x2, 0.0), axis=0) / safe_count + y_mean = np.sum(np.where(valid, y2, 0.0), axis=0) / safe_count + x_centered = np.where(valid, x2 - x_mean, 0.0) + y_centered = np.where(valid, y2 - y_mean, 0.0) + numerator = np.sum(x_centered * y_centered, axis=0) + x_var = np.sum(x_centered**2, axis=0) + y_var = np.sum(y_centered**2, axis=0) + with np.errstate(divide="ignore", invalid="ignore"): + corr = numerator / np.sqrt(x_var * y_var) + corr = np.where((count >= min_count) & (x_var > 0.0) & (y_var > 0.0), corr, np.nan) + return corr.reshape(y.shape[1:]) diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py index fbd95cc..60c556c 100644 --- a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/dynamics.py @@ -75,3 +75,22 @@ def horizontal_advection(scalar, u_wind, v_wind, latitude, longitude) -> np.ndar u_values = _matching_field("u_wind", u_wind, scalar_values.shape) v_values = _matching_field("v_wind", v_wind, scalar_values.shape) return -(u_values * dscalar_dx + v_values * dscalar_dy) + + +def moisture_flux_divergence( + specific_humidity, + u_wind, + v_wind, + latitude, + longitude, +) -> np.ndarray: + """Compute horizontal divergence of specific-humidity flux.""" + + q_values = np.asarray(specific_humidity, dtype=float) + if q_values.ndim != 2: + raise ValueError("specific_humidity must be a two-dimensional latitude/longitude array") + u_values = _matching_field("u_wind", u_wind, q_values.shape) + v_values = _matching_field("v_wind", v_wind, q_values.shape) + _, dqu_dx = gradient_on_latlon(q_values * u_values, latitude, longitude) + dqv_dy, _ = gradient_on_latlon(q_values * v_values, latitude, longitude) + return dqu_dx + dqv_dy diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/interpolation.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/interpolation.py new file mode 100644 index 0000000..3368c25 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/interpolation.py @@ -0,0 +1,80 @@ +"""Lightweight station-to-grid interpolation utilities.""" + +from __future__ import annotations + +import numpy as np + + +def _one_dimensional(name: str, values) -> np.ndarray: + array = np.asarray(values, dtype=float) + if array.ndim != 1: + raise ValueError(f"{name} must be one-dimensional") + return array + + +def _grid_mesh(longitude, latitude) -> tuple[np.ndarray, np.ndarray]: + lon = np.asarray(longitude, dtype=float) + lat = np.asarray(latitude, dtype=float) + if lon.ndim == 1 and lat.ndim == 1: + return np.meshgrid(lon, lat) + if lon.shape != lat.shape: + raise ValueError("grid longitude and latitude must both be 1D or have matching shapes") + return lon, lat + + +def idw_interpolate_station_to_grid( + station_longitude, + station_latitude, + station_values, + grid_longitude, + grid_latitude, + *, + power: float = 2.0, + max_distance_degrees: float | None = None, + min_neighbors: int = 1, +) -> np.ndarray: + """Interpolate station values to a regular or curvilinear grid with IDW.""" + + station_lon = _one_dimensional("station_longitude", station_longitude) + station_lat = _one_dimensional("station_latitude", station_latitude) + values = _one_dimensional("station_values", station_values) + if not (station_lon.size == station_lat.size == values.size): + raise ValueError("station longitude, latitude, and values must have matching lengths") + if power <= 0.0: + raise ValueError("power must be positive") + if min_neighbors <= 0: + raise ValueError("min_neighbors must be positive") + if max_distance_degrees is not None and max_distance_degrees <= 0.0: + raise ValueError("max_distance_degrees must be positive when provided") + + grid_lon, grid_lat = _grid_mesh(grid_longitude, grid_latitude) + output = np.full(grid_lon.shape, np.nan, dtype=float) + valid = np.isfinite(station_lon) & np.isfinite(station_lat) & np.isfinite(values) + if valid.sum() < min_neighbors: + return output + + station_lon = station_lon[valid] + station_lat = station_lat[valid] + values = values[valid] + + flat_lon = grid_lon.ravel() + flat_lat = grid_lat.ravel() + flat_output = output.ravel() + for idx, (lon, lat) in enumerate(zip(flat_lon, flat_lat, strict=True)): + if not np.isfinite(lon) or not np.isfinite(lat): + continue + dlon = (station_lon - lon) * np.cos(np.deg2rad(lat)) + dlat = station_lat - lat + distance = np.hypot(dlon, dlat) + exact = np.isclose(distance, 0.0) + if exact.any(): + flat_output[idx] = float(np.nanmean(values[exact])) + continue + neighbor = np.isfinite(distance) + if max_distance_degrees is not None: + neighbor &= distance <= max_distance_degrees + if neighbor.sum() < min_neighbors: + continue + weights = 1.0 / np.power(distance[neighbor], power) + flat_output[idx] = float(np.sum(weights * values[neighbor]) / np.sum(weights)) + return output diff --git a/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/precipitation.py b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/precipitation.py new file mode 100644 index 0000000..e488942 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/src/python_weather_diagnostics_toolkit/precipitation.py @@ -0,0 +1,80 @@ +"""Precipitation helpers for public-safe station and gridded workflows.""" + +from __future__ import annotations + +import numpy as np + + +def mark_missing_sentinel(values, *, sentinel: float = -32766.0) -> np.ndarray: + """Replace a numeric missing-value sentinel with NaN.""" + + array = np.asarray(values, dtype=float) + return np.where(np.isclose(array, sentinel), np.nan, array) + + +def cumulative_to_increment( + accumulated, + *, + axis: int = -1, + negative_tolerance: float = 1e-9, +) -> np.ndarray: + """Convert non-decreasing accumulated precipitation to per-step amounts.""" + + values = np.asarray(accumulated, dtype=float) + if values.size == 0: + raise ValueError("accumulated precipitation must not be empty") + moved = np.moveaxis(values, axis, -1) + if moved.shape[-1] == 0: + raise ValueError("accumulated precipitation axis must not be empty") + + increments = np.empty_like(moved) + increments[..., 0] = moved[..., 0] + increments[..., 1:] = np.diff(moved, axis=-1) + finite_negative = np.isfinite(increments) & (increments < -negative_tolerance) + if finite_negative.any(): + raise ValueError("accumulated precipitation must be non-decreasing along axis") + increments = np.where(np.isfinite(increments) & (increments < 0.0), 0.0, increments) + return np.moveaxis(increments, -1, axis) + + +def increment_to_rate(increment, *, step_hours: float) -> np.ndarray: + """Convert per-step precipitation amounts to mm/day-equivalent rates.""" + + if step_hours <= 0.0: + raise ValueError("step_hours must be positive") + return np.asarray(increment, dtype=float) * (24.0 / step_hours) + + +def cumulative_to_rate( + accumulated, + *, + step_hours: float, + axis: int = -1, + negative_tolerance: float = 1e-9, +) -> np.ndarray: + """Convert accumulated precipitation to mm/day-equivalent step rates.""" + + increment = cumulative_to_increment( + accumulated, + axis=axis, + negative_tolerance=negative_tolerance, + ) + return increment_to_rate(increment, step_hours=step_hours) + + +def event_total(precipitation, *, axis=0, min_count: int = 1) -> np.ndarray: + """Sum precipitation over an event axis while preserving all-missing regions.""" + + if min_count <= 0: + raise ValueError("min_count must be positive") + values = np.asarray(precipitation, dtype=float) + valid_count = np.sum(np.isfinite(values), axis=axis) + total = np.nansum(values, axis=axis) + return np.where(valid_count >= min_count, total, np.nan) + + +def threshold_exceedance(values, *, threshold: float) -> np.ndarray: + """Return a boolean mask where finite values meet or exceed a threshold.""" + + array = np.asarray(values, dtype=float) + return np.isfinite(array) & (array >= threshold) diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py b/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py index 2cf7381..488dbff 100644 --- a/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py +++ b/projects/python-weather-diagnostics-toolkit/tests/test_aliases_and_synthetic.py @@ -23,6 +23,14 @@ def test_get_data_array_accepts_known_variable_aliases(): assert float(da.values[0]) == 273.15 +def test_get_data_array_accepts_specific_humidity_alias(): + ds = xr.Dataset(data_vars={"q": (("x",), [0.012])}) + + da = get_data_array(ds, "specific_humidity") + + assert float(da.values[0]) == 0.012 + + def test_synthetic_dataset_is_tiny_and_labeled_synthetic(): ds = make_synthetic_weather_dataset() diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_climate.py b/projects/python-weather-diagnostics-toolkit/tests/test_climate.py new file mode 100644 index 0000000..ca2b79f --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_climate.py @@ -0,0 +1,60 @@ +import numpy as np +import pytest + +from python_weather_diagnostics_toolkit.climate import ( + anomaly, + composite_mean, + pearson_correlation_field, + standardized_anomaly, +) + + +def test_anomaly_and_standardized_anomaly_use_supplied_baseline(): + values = np.array([12.0, 15.0, 18.0]) + mean = np.array([10.0, 10.0, 10.0]) + spread = np.array([2.0, 0.0, 4.0]) + + np.testing.assert_allclose(anomaly(values, mean), np.array([2.0, 5.0, 8.0])) + standardized = standardized_anomaly(values, mean, spread) + + assert standardized[0] == 1.0 + assert np.isnan(standardized[1]) + assert standardized[2] == 2.0 + + +def test_composite_mean_selects_event_samples(): + values = np.array( + [ + [1.0, 2.0], + [3.0, 4.0], + [5.0, 6.0], + ] + ) + mask = np.array([True, False, True]) + + composite = composite_mean(values, mask, axis=0) + + np.testing.assert_allclose(composite, np.array([3.0, 4.0])) + + +def test_composite_mean_rejects_misaligned_mask(): + with pytest.raises(ValueError, match="mask length"): + composite_mean(np.ones((3, 2)), np.array([True, False])) + + +def test_pearson_correlation_field_handles_grid_points(): + index = np.array([1.0, 2.0, 3.0, 4.0]) + field = np.stack( + [ + np.array([[1.0, 4.0], [2.0, 5.0]]), + np.array([[2.0, 3.0], [4.0, 5.0]]), + np.array([[3.0, 2.0], [6.0, 5.0]]), + np.array([[4.0, 1.0], [8.0, 5.0]]), + ] + ) + + corr = pearson_correlation_field(index, field) + + np.testing.assert_allclose(corr[0, 0], 1.0) + np.testing.assert_allclose(corr[0, 1], -1.0) + assert np.isnan(corr[1, 1]) diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py b/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py index f563a8f..017893b 100644 --- a/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py +++ b/projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py @@ -4,6 +4,7 @@ from python_weather_diagnostics_toolkit.dynamics import ( gradient_on_latlon, horizontal_advection, + moisture_flux_divergence, relative_vorticity, ) @@ -52,3 +53,15 @@ def test_horizontal_advection_rejects_mismatched_wind_shape(): with pytest.raises(ValueError, match="u_wind shape"): horizontal_advection(scalar, u, v, lat, lon) + + +def test_uniform_moisture_flux_has_zero_divergence(): + lat = np.linspace(30.0, 35.0, 5) + lon = np.linspace(100.0, 105.0, 6) + q = np.ones((lat.size, lon.size)) * 0.01 + u = np.ones_like(q) * 5.0 + v = np.ones_like(q) * -2.0 + + divergence = moisture_flux_divergence(q, u, v, lat, lon) + + np.testing.assert_allclose(divergence, 0.0, atol=1e-15) diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_interpolation.py b/projects/python-weather-diagnostics-toolkit/tests/test_interpolation.py new file mode 100644 index 0000000..a20ee45 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_interpolation.py @@ -0,0 +1,49 @@ +import numpy as np + +from python_weather_diagnostics_toolkit.interpolation import idw_interpolate_station_to_grid + + +def test_idw_interpolation_preserves_exact_station_match(): + station_lon = np.array([100.0, 101.0]) + station_lat = np.array([30.0, 31.0]) + station_values = np.array([10.0, 20.0]) + + grid = idw_interpolate_station_to_grid( + station_lon, + station_lat, + station_values, + np.array([100.0, 101.0]), + np.array([30.0, 31.0]), + ) + + assert grid[0, 0] == 10.0 + assert grid[1, 1] == 20.0 + + +def test_idw_interpolation_ignores_missing_station_values(): + station_lon = np.array([100.0, 101.0]) + station_lat = np.array([30.0, 30.0]) + station_values = np.array([np.nan, 4.0]) + + grid = idw_interpolate_station_to_grid( + station_lon, + station_lat, + station_values, + np.array([100.0]), + np.array([30.0]), + ) + + assert grid[0, 0] == 4.0 + + +def test_idw_interpolation_respects_min_neighbors(): + grid = idw_interpolate_station_to_grid( + np.array([100.0]), + np.array([30.0]), + np.array([4.0]), + np.array([100.0]), + np.array([30.0]), + min_neighbors=2, + ) + + assert np.isnan(grid[0, 0]) diff --git a/projects/python-weather-diagnostics-toolkit/tests/test_precipitation.py b/projects/python-weather-diagnostics-toolkit/tests/test_precipitation.py new file mode 100644 index 0000000..e46c660 --- /dev/null +++ b/projects/python-weather-diagnostics-toolkit/tests/test_precipitation.py @@ -0,0 +1,55 @@ +import numpy as np +import pytest + +from python_weather_diagnostics_toolkit.precipitation import ( + cumulative_to_increment, + cumulative_to_rate, + event_total, + mark_missing_sentinel, + threshold_exceedance, +) + + +def test_mark_missing_sentinel_replaces_only_missing_code(): + values = np.array([0.0, -32766.0, 12.5]) + + cleaned = mark_missing_sentinel(values) + + assert cleaned[0] == 0.0 + assert np.isnan(cleaned[1]) + assert cleaned[2] == 12.5 + + +def test_cumulative_to_rate_returns_step_mm_per_day_values(): + accumulated = np.array([0.0, 2.0, 5.0]) + + rate = cumulative_to_rate(accumulated, step_hours=6) + + np.testing.assert_allclose(rate, np.array([0.0, 8.0, 12.0])) + + +def test_cumulative_to_increment_rejects_decreasing_series(): + with pytest.raises(ValueError, match="non-decreasing"): + cumulative_to_increment(np.array([0.0, 4.0, 3.0])) + + +def test_event_total_preserves_all_missing_grid_points(): + precipitation = np.array( + [ + [1.0, np.nan], + [2.0, np.nan], + ] + ) + + total = event_total(precipitation, axis=0) + + np.testing.assert_allclose(total[0], 3.0) + assert np.isnan(total[1]) + + +def test_threshold_exceedance_ignores_missing_values(): + values = np.array([49.0, 50.0, np.nan]) + + mask = threshold_exceedance(values, threshold=50.0) + + np.testing.assert_array_equal(mask, np.array([False, True, False]))