Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions dataretrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

``nldi`` requires geopandas (``pip install dataretrieval[nldi]``) and is
imported on demand: ``from dataretrieval import nldi``.

Every request failure raises a subclass of :class:`dataretrieval.DataRetrievalError`;
the taxonomy lives in ``dataretrieval.exceptions``.
"""

from importlib.metadata import PackageNotFoundError, version
Expand All @@ -25,7 +28,21 @@
except PackageNotFoundError:
__version__ = "version-unknown"

from dataretrieval.exceptions import (
BadRequestError,
DataRetrievalError,
NoSitesError,
NotFoundError,
RateLimited,
RequestTooLarge,
ServiceUnavailable,
TransientError,
Unchunkable,
URLTooLong,
)

from . import (
exceptions,
nadp,
nwis,
samples,
Expand All @@ -36,12 +53,26 @@
)

__all__ = [
# service modules
"nadp",
"nwis",
"samples",
"streamstats",
"utils",
"waterdata",
"wqp",
# error taxonomy (canonical home: ``dataretrieval.exceptions``), re-exported
# so callers can ``except dataretrieval.DataRetrievalError``
"exceptions",
"BadRequestError",
"DataRetrievalError",
"NoSitesError",
"NotFoundError",
"RateLimited",
"RequestTooLarge",
"ServiceUnavailable",
"TransientError",
"URLTooLong",
"Unchunkable",
"__version__",
]
154 changes: 154 additions & 0 deletions dataretrieval/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""Exception taxonomy for ``dataretrieval``.

A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
handle any request failure with a single ``except dataretrieval.DataRetrievalError``.

The tree has two intermediate bases a caller can catch to span a whole family:
:class:`RequestTooLarge` (the request can't fit, however it was issued) and
:class:`TransientError` (a temporary failure worth retrying).

This module deliberately has no third-party dependencies, so any module can
import it without pulling in pandas/httpx.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
import httpx

__all__ = [
"DataRetrievalError",
"BadRequestError",
"NotFoundError",
"RequestTooLarge",
"URLTooLong",
"Unchunkable",
"NoSitesError",
"TransientError",
"RateLimited",
"ServiceUnavailable",
]


class DataRetrievalError(Exception):
"""Base class for errors raised when a request to a USGS or EPA web
service fails.

Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
raises a subclass of this when a request fails, so a caller can handle any
request failure uniformly::

try:
df, md = dataretrieval.wqp.get_results(...)
except dataretrieval.DataRetrievalError:
...

Subclasses also inherit from the built-in exception this package has
historically raised for the condition's *kind* -- :class:`ValueError` for a
request that can't succeed as written (bad params, too large), and
:class:`RuntimeError` for a transient transport failure -- so existing
``except ValueError`` / ``except RuntimeError`` handlers keep working.
"""


# --- Fatal client errors -------------------------------------------------
# The request can't succeed as written; retrying it unchanged won't help. Each
# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always
# raised -- so existing ``except ValueError`` handlers keep working.


class BadRequestError(DataRetrievalError, ValueError):
"""The service rejected the request parameters (HTTP 400)."""


class NotFoundError(DataRetrievalError, ValueError):
"""The requested resource was not found; often an empty query (HTTP 404)."""


class RequestTooLarge(DataRetrievalError, ValueError):
"""The request is too large for the service to satisfy.

A base for the two ways a request can exceed what the service accepts;
catch it to handle either. The concrete subclasses are :class:`URLTooLong`
(a single request the server rejected) and :class:`Unchunkable` (the Water
Data chunker could not split the call small enough to fit).
"""


class URLTooLong(RequestTooLarge):
"""A single request URL exceeded the service's limit (HTTP 414, or rejected
client-side before it was sent).

Raised by the legacy ``query`` path, which issues one request without
chunking. Remediation: query fewer sites, or split the call manually.
"""


class Unchunkable(RequestTooLarge):
"""No chunking plan fits the URL byte limit.

Raised by the Water Data chunker when even the smallest reducible plan
(every list axis at one atom per sub-request, the filter at one clause per
sub-request) still exceeds the server's byte limit -- so unlike
:class:`URLTooLong`, automatic splitting has already been tried and
exhausted. Shrink the input lists, simplify the filter, or split the call
manually.
"""


class NoSitesError(DataRetrievalError):
"""The selection criteria matched no sites/data."""

def __init__(self, url: httpx.URL) -> None:
self.url = url

def __str__(self) -> str:
return (
"No sites/data found using the selection criteria specified in "
f"url: {self.url}"
)


# --- Transient transport errors ------------------------------------------
# The service was reachable but temporarily refused the request; the same call
# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the
# waterdata path has always raised). The Water Data chunker recognizes them via
# ``isinstance(exc, TransientError)`` and wraps them as resumable
# ``ChunkInterrupted`` subclasses.


class TransientError(DataRetrievalError, RuntimeError):
"""Base for transient HTTP failures that are worth an automatic retry.

One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`,
5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them
by this shared base and wraps them as resumable interruptions.

Parameters
----------
message : str
Human-readable error message.
retry_after : float, optional
Seconds to wait before retrying, parsed from the ``Retry-After``
response header; stored on the :attr:`retry_after` attribute (``None``
when the header is absent or unparseable).
"""

def __init__(self, message: str, *, retry_after: float | None = None) -> None:
super().__init__(message)
self.retry_after = retry_after


class RateLimited(TransientError):
"""A request was rejected with HTTP 429 (too many requests)."""


class ServiceUnavailable(TransientError):
"""A request was rejected with a server error (HTTP 5xx).

Raised by both the legacy ``query`` path and the Water Data path, so a 5xx
surfaces as one type regardless of which subsystem issued the request.
"""
82 changes: 46 additions & 36 deletions dataretrieval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@

import dataretrieval
from dataretrieval.codes import tz
from dataretrieval.exceptions import (
BadRequestError,
NoSitesError,
NotFoundError,
ServiceUnavailable,
URLTooLong,
)

# Typed as ``dict[str, Any]`` (not the inferred ``dict[str, object]``) so that
# splatting it as ``**HTTPX_DEFAULTS`` into ``httpx.get`` / ``httpx.AsyncClient``
Expand Down Expand Up @@ -274,14 +281,42 @@ def __repr__(self) -> str:
data_list.append(data) # append results to list"""


def _url_too_long_error(detail: str) -> ValueError:
return ValueError(
def _url_too_long_error(detail: str) -> URLTooLong:
return URLTooLong(
"Request URL too long. Modify your query to use fewer sites. "
f"{detail}. Pseudo-code example of how to split your query: "
f"\n {_URL_TOO_LONG_EXAMPLE}"
)


def _raise_for_status(response: httpx.Response) -> None:
"""Map an unsuccessful HTTP status to a typed :class:`DataRetrievalError`;
return ``None`` on success.

Shared by the legacy :func:`query` path. The 4xx types stay
:class:`ValueError`-compatible (this path's historical contract), but a 5xx
raises the transient :class:`ServiceUnavailable` (a :class:`RuntimeError`),
since a server failure is retryable rather than a bad request.
"""
status = response.status_code
if status == 400:
raise BadRequestError(
f"Bad Request, check that your parameters are correct. URL: {response.url}"
)
elif status == 404:
raise NotFoundError(
"Page Not Found Error. May be the result of an empty query. "
f"URL: {response.url}"
)
elif status == 414:
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
elif 500 <= status < 600:
raise ServiceUnavailable(
f"Service Unavailable: {status} {response.reason_phrase}. "
f"The service at {response.url} may be down or experiencing issues."
)


def query(
url: str,
payload: dict[str, Any],
Expand Down Expand Up @@ -312,11 +347,14 @@ def query(

Raises
------
ValueError
If the service returns a 400, 404, 414, or 5xx status code, or if
``httpx`` rejects the URL client-side (e.g. it is too long).
NoSitesError
If the response indicates that no sites or data matched the query.
DataRetrievalError
On failure: :class:`~dataretrieval.exceptions.BadRequestError` (400),
:class:`~dataretrieval.exceptions.NotFoundError` (404),
:class:`~dataretrieval.exceptions.URLTooLong` (414 or a client-side
over-long URL), :class:`~dataretrieval.exceptions.ServiceUnavailable`
(5xx), or :class:`~dataretrieval.exceptions.NoSitesError` (no sites/data
matched). The 4xx types are also :class:`ValueError`;
``ServiceUnavailable`` is a :class:`RuntimeError`.
"""

for key, value in payload.items():
Expand All @@ -338,37 +376,9 @@ def query(
except httpx.InvalidURL as exc:
raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc

if response.status_code == 400:
raise ValueError(
f"Bad Request, check that your parameters are correct. URL: {response.url}"
)
elif response.status_code == 404:
raise ValueError(
"Page Not Found Error. May be the result of an empty query. "
+ f"URL: {response.url}"
)
elif response.status_code == 414:
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
elif 500 <= response.status_code < 600:
raise ValueError(
f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
+ f"The service at {response.url} may be down or experiencing issues."
)
_raise_for_status(response)

if response.text.startswith("No sites/data"):
raise NoSitesError(response.url)

return response


class NoSitesError(Exception):
"""Custom error class used when selection criteria return no sites/data."""

def __init__(self, url: httpx.URL) -> None:
self.url = url

def __str__(self) -> str:
return (
"No sites/data found using the selection criteria specified in "
f"url: {self.url}"
)
Loading
Loading