Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,21 @@ def _parse_json_or_raise(response: httpx.Response) -> pd.DataFrame:
raise


def _reject_unexpected_format(
kwargs: dict[str, Any], func_name: str, expected: str
) -> None:
"""Drop ``format`` from ``kwargs`` and reject any value other than ``expected``.

These getters always request a fixed ``format`` and parse that specific body,
so a caller-supplied ``format`` would either collide with the explicit
``format=`` argument or be silently overridden; reject it explicitly instead.
"""
if kwargs.pop("format", expected) != expected:
raise ValueError(
f"{func_name} returns {expected.upper()} and does not accept a `format`."
)


def format_response(
df: pd.DataFrame, service: str | None = None, **kwargs: Any
) -> pd.DataFrame:
Expand Down Expand Up @@ -277,6 +292,7 @@ def get_discharge_peaks(
kwargs["end_date"] = kwargs.pop("end_date", end)
kwargs["multi_index"] = multi_index

_reject_unexpected_format(kwargs, "get_discharge_peaks", "rdb")
response = query_waterdata("peaks", format="rdb", ssl_check=ssl_check, **kwargs)

# Parse raw (read_rdb), not _read_rdb — the latter already runs
Expand Down Expand Up @@ -355,6 +371,8 @@ def get_stats(
"""
_check_sites_value_types(sites)

# get_stats parses an RDB body; reject a caller-supplied non-RDB `format`.
_reject_unexpected_format(kwargs, "get_stats", "rdb")
response = query_waterservices(
service="stat", sites=sites, ssl_check=ssl_check, **kwargs
)
Expand Down Expand Up @@ -392,11 +410,13 @@ def query_waterdata(
"se_latitude_va",
]

if not any(key in kwargs for key in major_params + bbox_params):
# Present *and* non-None — see query_waterservices for why membership alone
# would let an unset filter slip through.
if not any(kwargs.get(key) is not None for key in major_params + bbox_params):
raise TypeError("Query must specify a major filter: site_no, stateCd, bBox")

elif any(key in kwargs for key in bbox_params) and not all(
key in kwargs for key in bbox_params
elif any(kwargs.get(key) is not None for key in bbox_params) and not all(
kwargs.get(key) is not None for key in bbox_params
):
raise TypeError("One or more lat/long coordinates missing or invalid.")

Expand Down Expand Up @@ -453,8 +473,15 @@ def query_waterservices(
The response object from the API request to the web service

"""
# A major filter must be present *and* non-None. Membership alone is not
# enough: callers may inject an unset filter (e.g. sites=None), and
# utils.query() later strips None-valued params, so a plain `"sites" in
# kwargs` would wave a filterless request through to a confusing "Bad
# Request". Checking the value keeps that decision in one place instead of
# forcing every getter to pre-filter its own kwargs.
if not any(
key in kwargs for key in ["sites", "stateCd", "bBox", "huc", "countyCd"]
kwargs.get(key) is not None
for key in ["sites", "stateCd", "bBox", "huc", "countyCd"]
):
raise TypeError(
"Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd"
Expand Down Expand Up @@ -537,6 +564,7 @@ def get_dv(
kwargs["sites"] = kwargs.pop("sites", sites)
kwargs["multi_index"] = multi_index

_reject_unexpected_format(kwargs, "get_dv", "json")
response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
df = _parse_json_or_raise(response)

Expand Down Expand Up @@ -724,6 +752,7 @@ def get_iv(
kwargs["sites"] = kwargs.pop("sites", sites)
kwargs["multi_index"] = multi_index

_reject_unexpected_format(kwargs, "get_iv", "json")
response = query_waterservices(
service="iv", format="json", ssl_check=ssl_check, **kwargs
)
Expand Down Expand Up @@ -788,6 +817,11 @@ def get_ratings(

"""
site = kwargs.pop("site_no", site)
# The ratings endpoint is per-site; without one it would issue a request
# that returns an unhelpful error page. Fail fast with a clear message
# (also covers get_record(service="ratings") called without a site).
if site is None:
raise TypeError("get_ratings requires a `site` (USGS site number).")

payload = {}
url = WATERDATA_BASE_URL + "nwisweb/get_ratings/"
Expand Down Expand Up @@ -956,6 +990,12 @@ def get_record(
if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
raise TypeError(f"Unrecognized service: {service}")

# Forward the documented `state` filter as the NWIS `stateCd` major filter;
# it was previously accepted but silently ignored, which produced a
# confusing "Bad Request" when used without `sites`.
if state is not None:
kwargs.setdefault("stateCd", state)

if service == "iv":
df, _ = get_iv(
sites=sites,
Expand Down
110 changes: 110 additions & 0 deletions tests/nwis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
NWIS_Metadata,
_read_rdb,
get_discharge_measurements,
get_discharge_peaks,
get_dv,
get_gwlevels,
get_info,
get_iv,
get_pmcodes,
get_qwdata,
get_record,
get_stats,
get_water_use,
preformat_peaks_response,
what_sites,
Expand Down Expand Up @@ -120,6 +123,113 @@ def test_preformat_peaks_response():
# Removed defunct gwlevels tests.


def test_get_dv_requires_major_filter():
"""Regression: get_dv() with no major filter must raise the documented
TypeError. The getters injected ``kwargs["sites"] = kwargs.pop("sites",
sites)``, which always set the key (None when absent) and so defeated
query_waterservices' filter check, yielding a confusing Bad Request."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(TypeError, match="major filter"):
get_dv()


def test_get_dv_rejects_non_json_format():
"""Regression: get_dv passed format= explicitly alongside **kwargs, so
get_dv(..., format="rdb") raised 'multiple values for format'. It now
rejects a non-json format with a clear ValueError (it parses JSON)."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(ValueError, match="JSON"):
get_dv(sites="01646500", format="rdb")


def test_get_record_forwards_state_as_statecd(monkeypatch):
"""Regression: get_record's documented `state` arg was accepted but never
forwarded. It now reaches the request as the NWIS `stateCd` major filter."""
import dataretrieval.nwis as nwis_mod

captured: dict = {}

def fake_query_waterservices(service, format=None, ssl_check=True, **kw):
captured.update(kw)
raise RuntimeError("stop before network")

monkeypatch.setattr(nwis_mod, "query_waterservices", fake_query_waterservices)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(RuntimeError, match="stop"):
get_record(state="OH", service="dv")
assert captured.get("stateCd") == "OH"


def test_get_stats_requires_major_filter():
"""Regression: get_stats passed ``sites=sites`` explicitly, so the key was
always present (None when absent) and defeated query_waterservices' filter
check -- get_stats() reached the network and returned a confusing Bad
Request instead of the documented TypeError."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(TypeError, match="major filter"):
get_stats()


@pytest.mark.parametrize("service", ["stat", "site"])
def test_get_record_requires_major_filter(service):
"""Regression: get_record(service="stat"/"site") forwarded sites=None into
get_stats/get_info, defeating the major-filter check. With no filter it must
raise the documented TypeError rather than reach the network."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(TypeError, match="major filter"):
get_record(service=service)


def test_get_discharge_peaks_rejects_format():
"""Regression: get_discharge_peaks passed format="rdb" explicitly alongside
**kwargs, so any caller-supplied format raised 'multiple values for format'.
A non-native format is now rejected with a clear ValueError (it parses RDB)."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(ValueError, match="RDB"):
get_discharge_peaks(sites="01491000", format="json")


def test_get_discharge_peaks_accepts_native_format(monkeypatch):
"""The reported collision was that even the *native* format="rdb" raised
'multiple values for format'. Popping it first resolves the collision, so
format="rdb" now reaches the request rather than crashing."""
import dataretrieval.nwis as nwis_mod

def fake_query_waterdata(service, format=None, ssl_check=True, **kw):
raise RuntimeError("stop before network")

monkeypatch.setattr(nwis_mod, "query_waterdata", fake_query_waterdata)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(RuntimeError, match="stop"):
get_discharge_peaks(sites="01491000", format="rdb")


def test_get_stats_rejects_non_rdb_format():
"""get_stats parses RDB; a caller-supplied non-RDB format would be requested
and then mis-parsed. It is now rejected with a clear ValueError."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(ValueError, match="RDB"):
get_stats(sites="01646500", format="json")


def test_get_ratings_requires_site():
"""The ratings endpoint is per-site; get_ratings() / get_record(
service="ratings") with no site previously issued a request that returned an
unhelpful error page. It now fails fast with a clear TypeError."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with pytest.raises(TypeError, match="requires a `site`"):
get_record(service="ratings")


class TestDeprecationWarnings:
"""Verify per-function DeprecationWarning fires with the right replacement.

Expand Down
Loading