From d46de2bf72c30db4177174018b37442766671763 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Tue, 2 Jun 2026 15:15:17 -0400
Subject: [PATCH] refactor(errors)!: unify request failures under a
 DataRetrievalError taxonomy

Before, an HTTP failure surfaced as a different exception type depending on
which module made the request -- a ValueError (or bare Exception) on the legacy
query() path, RuntimeError-based types on the waterdata path, a bare
httpx.HTTPStatusError elsewhere -- so there was no single `except` for "any
dataretrieval request failure".

Introduce dataretrieval/exceptions.py (dependency-free, re-exported at top level
as dataretrieval.<Name>), rooted at DataRetrievalError, with two intermediate
bases that name the axes a caller reasons about:

  DataRetrievalError(Exception)
  |- BadRequestError(.., ValueError)     # 400
  |- NotFoundError(.., ValueError)       # 404
  |- RequestTooLarge(.., ValueError)     # base: request too large to satisfy
  |   |- URLTooLong                      #   414 / client-side URL reject
  |   '- Unchunkable                     #   chunker planner floor
  |- NoSitesError                        # empty result
  '- TransientError(.., RuntimeError)    # base: retryable; carries retry_after
      |- RateLimited                     #   429
      '- ServiceUnavailable              #   5xx (both paths)

- One type per condition, raised by both the legacy query() path and the Water
  Data chunker. Callers can catch a whole family (`except RequestTooLarge` /
  `except TransientError`); the chunker's retry check is a single
  isinstance(exc, TransientError).
- query()'s inline status ladder is extracted into a reusable _raise_for_status().
- NoSitesError now subclasses DataRetrievalError (was Exception).
- Built-in compatibility by kind: fatal client errors are also ValueError,
  transient transport errors also RuntimeError, so existing `except ValueError`
  / `except RuntimeError` handlers keep working.

BREAKING CHANGES
- The legacy query() path raises typed errors instead of ad-hoc ValueErrors
  (400 -> BadRequestError, 404 -> NotFoundError, 414/over-long URL -> URLTooLong).
- A 5xx on the legacy query() path now raises ServiceUnavailable, a RuntimeError
  (was a ValueError): a transient server failure is a runtime condition, not a
  bad value.
- The Water Data chunker's planner-floor error is Unchunkable (a RequestTooLarge
  subclass).
- Import the transport types/bases from dataretrieval / dataretrieval.exceptions,
  not from dataretrieval.waterdata.chunking.

Verified: 477 passed / 2 skipped, ruff clean; live API spot checks (404/400/
over-long URL raise the typed errors, 200 unaffected); all 21 example notebooks
execute end-to-end against the live API (227/227 cells, 0 errors).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 dataretrieval/__init__.py           |  31 ++++++
 dataretrieval/exceptions.py         | 154 ++++++++++++++++++++++++++++
 dataretrieval/utils.py              |  82 ++++++++-------
 dataretrieval/waterdata/chunking.py |  84 +++------------
 dataretrieval/waterdata/utils.py    |   7 +-
 tests/nldi_test.py                  |  15 +--
 tests/utils_test.py                 |  76 +++++++++++++-
 tests/waterdata_chunking_test.py    |  14 +--
 8 files changed, 338 insertions(+), 125 deletions(-)
 create mode 100644 dataretrieval/exceptions.py
diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py
index 29b184f7..4b58247e 100644
--- a/dataretrieval/__init__.py
+++ b/dataretrieval/__init__.py
@@ -16,6 +16,9 @@
 
 ``nldi`` requires geopandas (``pip install dataretrieval[nldi]``) and is
 imported on demand: ``from dataretrieval import nldi``.
+
+Every request failure raises a subclass of :class:`dataretrieval.DataRetrievalError`;
+the taxonomy lives in ``dataretrieval.exceptions``.
 """
 
 from importlib.metadata import PackageNotFoundError, version
@@ -25,7 +28,21 @@
 except PackageNotFoundError:
     __version__ = "version-unknown"
 
+from dataretrieval.exceptions import (
+    BadRequestError,
+    DataRetrievalError,
+    NoSitesError,
+    NotFoundError,
+    RateLimited,
+    RequestTooLarge,
+    ServiceUnavailable,
+    TransientError,
+    Unchunkable,
+    URLTooLong,
+)
+
 from . import (
+    exceptions,
     nadp,
     nwis,
     samples,
@@ -36,6 +53,7 @@
 )
 
 __all__ = [
+    # service modules
     "nadp",
     "nwis",
     "samples",
@@ -43,5 +61,18 @@
     "utils",
     "waterdata",
     "wqp",
+    # error taxonomy (canonical home: ``dataretrieval.exceptions``), re-exported
+    # so callers can ``except dataretrieval.DataRetrievalError``
+    "exceptions",
+    "BadRequestError",
+    "DataRetrievalError",
+    "NoSitesError",
+    "NotFoundError",
+    "RateLimited",
+    "RequestTooLarge",
+    "ServiceUnavailable",
+    "TransientError",
+    "URLTooLong",
+    "Unchunkable",
     "__version__",
 ]
diff --git a/dataretrieval/exceptions.py b/dataretrieval/exceptions.py
new file mode 100644
index 00000000..2bb955c3
--- /dev/null
+++ b/dataretrieval/exceptions.py
@@ -0,0 +1,154 @@
+"""Exception taxonomy for ``dataretrieval``.
+
+A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
+``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
+handle any request failure with a single ``except dataretrieval.DataRetrievalError``.
+
+The tree has two intermediate bases a caller can catch to span a whole family:
+:class:`RequestTooLarge` (the request can't fit, however it was issued) and
+:class:`TransientError` (a temporary failure worth retrying).
+
+This module deliberately has no third-party dependencies, so any module can
+import it without pulling in pandas/httpx.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import httpx
+
+__all__ = [
+    "DataRetrievalError",
+    "BadRequestError",
+    "NotFoundError",
+    "RequestTooLarge",
+    "URLTooLong",
+    "Unchunkable",
+    "NoSitesError",
+    "TransientError",
+    "RateLimited",
+    "ServiceUnavailable",
+]
+
+
+class DataRetrievalError(Exception):
+    """Base class for errors raised when a request to a USGS or EPA web
+    service fails.
+
+    Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
+    raises a subclass of this when a request fails, so a caller can handle any
+    request failure uniformly::
+
+        try:
+            df, md = dataretrieval.wqp.get_results(...)
+        except dataretrieval.DataRetrievalError:
+            ...
+
+    Subclasses also inherit from the built-in exception this package has
+    historically raised for the condition's *kind* -- :class:`ValueError` for a
+    request that can't succeed as written (bad params, too large), and
+    :class:`RuntimeError` for a transient transport failure -- so existing
+    ``except ValueError`` / ``except RuntimeError`` handlers keep working.
+    """
+
+
+# --- Fatal client errors -------------------------------------------------
+# The request can't succeed as written; retrying it unchanged won't help. Each
+# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always
+# raised -- so existing ``except ValueError`` handlers keep working.
+
+
+class BadRequestError(DataRetrievalError, ValueError):
+    """The service rejected the request parameters (HTTP 400)."""
+
+
+class NotFoundError(DataRetrievalError, ValueError):
+    """The requested resource was not found; often an empty query (HTTP 404)."""
+
+
+class RequestTooLarge(DataRetrievalError, ValueError):
+    """The request is too large for the service to satisfy.
+
+    A base for the two ways a request can exceed what the service accepts;
+    catch it to handle either. The concrete subclasses are :class:`URLTooLong`
+    (a single request the server rejected) and :class:`Unchunkable` (the Water
+    Data chunker could not split the call small enough to fit).
+    """
+
+
+class URLTooLong(RequestTooLarge):
+    """A single request URL exceeded the service's limit (HTTP 414, or rejected
+    client-side before it was sent).
+
+    Raised by the legacy ``query`` path, which issues one request without
+    chunking. Remediation: query fewer sites, or split the call manually.
+    """
+
+
+class Unchunkable(RequestTooLarge):
+    """No chunking plan fits the URL byte limit.
+
+    Raised by the Water Data chunker when even the smallest reducible plan
+    (every list axis at one atom per sub-request, the filter at one clause per
+    sub-request) still exceeds the server's byte limit -- so unlike
+    :class:`URLTooLong`, automatic splitting has already been tried and
+    exhausted. Shrink the input lists, simplify the filter, or split the call
+    manually.
+    """
+
+
+class NoSitesError(DataRetrievalError):
+    """The selection criteria matched no sites/data."""
+
+    def __init__(self, url: httpx.URL) -> None:
+        self.url = url
+
+    def __str__(self) -> str:
+        return (
+            "No sites/data found using the selection criteria specified in "
+            f"url: {self.url}"
+        )
+
+
+# --- Transient transport errors ------------------------------------------
+# The service was reachable but temporarily refused the request; the same call
+# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the
+# waterdata path has always raised). The Water Data chunker recognizes them via
+# ``isinstance(exc, TransientError)`` and wraps them as resumable
+# ``ChunkInterrupted`` subclasses.
+
+
+class TransientError(DataRetrievalError, RuntimeError):
+    """Base for transient HTTP failures that are worth an automatic retry.
+
+    One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`,
+    5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them
+    by this shared base and wraps them as resumable interruptions.
+
+    Parameters
+    ----------
+    message : str
+        Human-readable error message.
+    retry_after : float, optional
+        Seconds to wait before retrying, parsed from the ``Retry-After``
+        response header; stored on the :attr:`retry_after` attribute (``None``
+        when the header is absent or unparseable).
+    """
+
+    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+class RateLimited(TransientError):
+    """A request was rejected with HTTP 429 (too many requests)."""
+
+
+class ServiceUnavailable(TransientError):
+    """A request was rejected with a server error (HTTP 5xx).
+
+    Raised by both the legacy ``query`` path and the Water Data path, so a 5xx
+    surfaces as one type regardless of which subsystem issued the request.
+    """
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 154f2a4d..9751b269 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -13,6 +13,13 @@
 
 import dataretrieval
 from dataretrieval.codes import tz
+from dataretrieval.exceptions import (
+    BadRequestError,
+    NoSitesError,
+    NotFoundError,
+    ServiceUnavailable,
+    URLTooLong,
+)
 
 # Typed as ``dict[str, Any]`` (not the inferred ``dict[str, object]``) so that
 # splatting it as ``**HTTPX_DEFAULTS`` into ``httpx.get`` / ``httpx.AsyncClient``
@@ -274,14 +281,42 @@ def __repr__(self) -> str:
                         data_list.append(data)  # append results to list"""
 
 
-def _url_too_long_error(detail: str) -> ValueError:
-    return ValueError(
+def _url_too_long_error(detail: str) -> URLTooLong:
+    return URLTooLong(
         "Request URL too long. Modify your query to use fewer sites. "
         f"{detail}. Pseudo-code example of how to split your query: "
         f"\n {_URL_TOO_LONG_EXAMPLE}"
     )
 
 
+def _raise_for_status(response: httpx.Response) -> None:
+    """Map an unsuccessful HTTP status to a typed :class:`DataRetrievalError`;
+    return ``None`` on success.
+
+    Shared by the legacy :func:`query` path. The 4xx types stay
+    :class:`ValueError`-compatible (this path's historical contract), but a 5xx
+    raises the transient :class:`ServiceUnavailable` (a :class:`RuntimeError`),
+    since a server failure is retryable rather than a bad request.
+    """
+    status = response.status_code
+    if status == 400:
+        raise BadRequestError(
+            f"Bad Request, check that your parameters are correct. URL: {response.url}"
+        )
+    elif status == 404:
+        raise NotFoundError(
+            "Page Not Found Error. May be the result of an empty query. "
+            f"URL: {response.url}"
+        )
+    elif status == 414:
+        raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
+    elif 500 <= status < 600:
+        raise ServiceUnavailable(
+            f"Service Unavailable: {status} {response.reason_phrase}. "
+            f"The service at {response.url} may be down or experiencing issues."
+        )
+
+
 def query(
     url: str,
     payload: dict[str, Any],
@@ -312,11 +347,14 @@ def query(
 
     Raises
     ------
-    ValueError
-        If the service returns a 400, 404, 414, or 5xx status code, or if
-        ``httpx`` rejects the URL client-side (e.g. it is too long).
-    NoSitesError
-        If the response indicates that no sites or data matched the query.
+    DataRetrievalError
+        On failure: :class:`~dataretrieval.exceptions.BadRequestError` (400),
+        :class:`~dataretrieval.exceptions.NotFoundError` (404),
+        :class:`~dataretrieval.exceptions.URLTooLong` (414 or a client-side
+        over-long URL), :class:`~dataretrieval.exceptions.ServiceUnavailable`
+        (5xx), or :class:`~dataretrieval.exceptions.NoSitesError` (no sites/data
+        matched). The 4xx types are also :class:`ValueError`;
+        ``ServiceUnavailable`` is a :class:`RuntimeError`.
     """
 
     for key, value in payload.items():
@@ -338,37 +376,9 @@ def query(
     except httpx.InvalidURL as exc:
         raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc
 
-    if response.status_code == 400:
-        raise ValueError(
-            f"Bad Request, check that your parameters are correct. URL: {response.url}"
-        )
-    elif response.status_code == 404:
-        raise ValueError(
-            "Page Not Found Error. May be the result of an empty query. "
-            + f"URL: {response.url}"
-        )
-    elif response.status_code == 414:
-        raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
-    elif 500 <= response.status_code < 600:
-        raise ValueError(
-            f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
-            + f"The service at {response.url} may be down or experiencing issues."
-        )
+    _raise_for_status(response)
 
     if response.text.startswith("No sites/data"):
         raise NoSitesError(response.url)
 
     return response
-
-
-class NoSitesError(Exception):
-    """Custom error class used when selection criteria return no sites/data."""
-
-    def __init__(self, url: httpx.URL) -> None:
-        self.url = url
-
-    def __str__(self) -> str:
-        return (
-            "No sites/data found using the selection criteria specified in "
-            f"url: {self.url}"
-        )
diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py
index 9a5e20f5..82f5a37e 100644
--- a/dataretrieval/waterdata/chunking.py
+++ b/dataretrieval/waterdata/chunking.py
@@ -66,6 +66,13 @@
 import pandas as pd
 from anyio.from_thread import start_blocking_portal
 
+from dataretrieval.exceptions import (
+    DataRetrievalError,
+    RateLimited,
+    ServiceUnavailable,
+    TransientError,
+    Unchunkable,
+)
 from dataretrieval.utils import HTTPX_DEFAULTS
 
 from . import _progress
@@ -383,70 +390,7 @@ def _passthrough_result(
     return frame, response
 
 
-class _RetryableTransportError(RuntimeError):
-    """
-    Base for typed HTTP transport failures the chunker recognizes as
-    transient.
-
-    Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
-    and walked by :func:`_classify_chunk_error`. One subclass per
-    recoverable HTTP status family (429 → :class:`RateLimited`,
-    5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as
-    resumable :class:`ChunkInterrupted` subclasses.
-
-    Parameters
-    ----------
-    message : str
-        Human-readable error message.
-    retry_after : float, optional
-        Seconds to wait before retrying, parsed from the
-        ``Retry-After`` response header.
-
-    Attributes
-    ----------
-    retry_after : float or None
-        Seconds to wait before retrying, parsed from the
-        ``Retry-After`` response header. ``None`` when the header was
-        absent or unparseable.
-    """
-
-    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
-        super().__init__(message)
-        self.retry_after = retry_after
-
-
-class RateLimited(_RetryableTransportError):
-    """
-    A USGS Water Data API request was rejected with HTTP 429.
-
-    Exposed as a typed exception so callers (notably the multi-value
-    chunker) can detect rate-limit failures via ``isinstance`` instead
-    of string-matching error messages.
-    """
-
-
-class ServiceUnavailable(_RetryableTransportError):
-    """
-    A USGS Water Data API request was rejected with HTTP 5xx.
-
-    Surfaced as a typed exception (parallel to :class:`RateLimited`)
-    so ``ChunkedCall`` can treat transient server failures as
-    resumable interruptions rather than fatal programmer errors.
-    """
-
-
-class RequestTooLarge(ValueError):
-    """
-    No chunking plan fits the URL byte limit.
-
-    Raised when even the smallest reducible plan (every list axis at
-    singleton chunks and the filter at one clause per sub-request)
-    still exceeds the server's byte limit. Shrink the input lists,
-    simplify the filter, or split the call manually.
-    """
-
-
-class ChunkInterrupted(RuntimeError):
+class ChunkInterrupted(DataRetrievalError, RuntimeError):
     """
     Base class for mid-stream chunk failures whose completed work is
     preserved and resumable.
@@ -854,7 +798,7 @@ class ChunkPlan:
 
     Raises
     ------
-    RequestTooLarge
+    Unchunkable
         If the request needs chunking but even the singleton plan
         doesn't fit ``url_limit``.
     """
@@ -889,7 +833,7 @@ def __init__(
                 filter_expr, args.get("filter_lang")
             ):
                 return
-            raise RequestTooLarge(
+            raise Unchunkable(
                 f"Request exceeds {url_limit} bytes (URL + body) and has no "
                 f"chunkable multi-value argument to split (e.g. a single large "
                 f"CQL `IN` clause, or one oversized value). Narrow the query, "
@@ -940,7 +884,7 @@ def _plan(
 
         Raises
         ------
-        RequestTooLarge
+        Unchunkable
             If even the singleton plan (every axis at one atom per
             chunk) still exceeds ``url_limit``.
         """
@@ -961,7 +905,7 @@ def _plan(
                         biggest_axis, biggest_idx, biggest_size = axis, idx, size
 
             if biggest_axis is None:
-                raise RequestTooLarge(
+                raise Unchunkable(
                     f"Request exceeds {url_limit} bytes (URL + body) at the "
                     f"smallest reducible plan (every axis at one atom per "
                     f"sub-request). Reduce input sizes, shorten or simplify "
@@ -1136,7 +1080,7 @@ def _retryable(exc: BaseException) -> tuple[bool, float | None]:
         ``(retryable, retry_after)`` — the server ``Retry-After`` hint
         (seconds) when the transient carried one, else ``None``.
     """
-    if isinstance(exc, (RateLimited, ServiceUnavailable)):
+    if isinstance(exc, TransientError):
         return True, exc.retry_after
     if isinstance(exc, httpx.TransportError):
         return True, None
@@ -1730,7 +1674,7 @@ def multi_value_chunked(
 
     Raises
     ------
-    RequestTooLarge
+    Unchunkable
         If no plan can fit ``url_limit``.
     ChunkInterrupted
         On a mid-execution transient — 429, 5xx, or a bare transport
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index 5c98967c..a4706c15 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -27,12 +27,11 @@
 from anyio.from_thread import start_blocking_portal
 
 from dataretrieval import __version__
+from dataretrieval.exceptions import RateLimited, ServiceUnavailable
 from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata
 from dataretrieval.waterdata import _progress, chunking
 from dataretrieval.waterdata.chunking import (
     _QUOTA_HEADER,
-    RateLimited,
-    ServiceUnavailable,
     _safe_elapsed,
     get_active_client,
 )
@@ -1104,8 +1103,8 @@ async def _paginate(
     ------
     RuntimeError
         On a non-200 initial response (typed
-        :class:`~dataretrieval.waterdata.chunking.RateLimited` /
-        :class:`~dataretrieval.waterdata.chunking.ServiceUnavailable`
+        :class:`~dataretrieval.exceptions.RateLimited` /
+        :class:`~dataretrieval.exceptions.ServiceUnavailable`
         for 429/5xx, otherwise plain ``RuntimeError`` from
         :func:`_error_body`), on an initial-page parse failure
         (wrapped via :func:`_paginated_failure_message` with the
diff --git a/tests/nldi_test.py b/tests/nldi_test.py
index 988d9672..2249e4f2 100644
--- a/tests/nldi_test.py
+++ b/tests/nldi_test.py
@@ -373,15 +373,16 @@ def test_validate_data_source_rejects_malformed_catalog(httpx_mock, monkeypatch)
         nldi._validate_data_source("WQP")
 
 
-def test_query_504_raises_value_error(httpx_mock):
-    """``utils.query`` must classify 504 Gateway Timeout as a 5xx
-    failure. Pre-fix: the membership check ``[500, 502, 503]`` missed
-    504 and returned the response unchanged, leading downstream
-    callers (e.g. ``_query_nldi``) to silently swallow the failure as
-    an empty dict via JSONDecodeError."""
+def test_query_504_raises_service_unavailable(httpx_mock):
+    """``utils.query`` must classify 504 Gateway Timeout as a 5xx failure
+    (the transient ``ServiceUnavailable``). Pre-fix: the membership check
+    ``[500, 502, 503]`` missed 504 and returned the response unchanged,
+    leading downstream callers (e.g. ``_query_nldi``) to silently swallow
+    the failure as an empty dict via JSONDecodeError."""
+    from dataretrieval.exceptions import ServiceUnavailable
     from dataretrieval.utils import query
 
     url = "https://example.invalid/x"
     httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=504)
-    with pytest.raises(ValueError, match="Service Unavailable: 504"):
+    with pytest.raises(ServiceUnavailable, match="Service Unavailable: 504"):
         query(url, {"a": "1"})
diff --git a/tests/utils_test.py b/tests/utils_test.py
index c25e1084..00cec52e 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import pytest
 
-from dataretrieval import nwis, utils
+from dataretrieval import exceptions, nwis, utils
 
 
 class Test_query:
@@ -42,6 +42,80 @@ def test_header(self):
         assert "user-agent" in response.request.headers
 
 
+class Test_error_taxonomy:
+    """The unified request-error hierarchy.
+
+    Every module's request failures are catchable as ``DataRetrievalError``,
+    while remaining backward-compatible with the built-in type each path
+    historically raised (``ValueError`` for the legacy ``query`` path,
+    ``RuntimeError`` for the waterdata retryable types).
+    """
+
+    @pytest.mark.parametrize(
+        "status, exc_name, match, builtin",
+        [
+            (400, "BadRequestError", "Bad Request", ValueError),
+            (404, "NotFoundError", "Page Not Found", ValueError),
+            (414, "URLTooLong", "Request URL too long", ValueError),
+            (503, "ServiceUnavailable", "Service Unavailable: 503", RuntimeError),
+        ],
+    )
+    def test_query_maps_status_to_typed_error(
+        self, httpx_mock, status, exc_name, match, builtin
+    ):
+        """``query`` maps each HTTP status family to a typed error that is both a
+        ``DataRetrievalError`` (new, unified) and the built-in this path
+        historically raised for that kind of failure -- ``ValueError`` for a bad
+        request, ``RuntimeError`` for a transient 5xx -- with the message kept."""
+        exc_cls = getattr(exceptions, exc_name)
+        url = "https://example.invalid/x"
+        httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=status)
+        with pytest.raises(exc_cls, match=match) as excinfo:
+            utils.query(url, {"a": "1"})
+        assert isinstance(excinfo.value, exceptions.DataRetrievalError)
+        assert isinstance(excinfo.value, builtin)  # backward compatibility
+
+    def test_query_failure_catchable_as_base(self, httpx_mock):
+        """A bare ``except DataRetrievalError`` catches a legacy query failure."""
+        url = "https://example.invalid/y"
+        httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=400)
+        with pytest.raises(exceptions.DataRetrievalError):
+            utils.query(url, {"a": "1"})
+
+    def test_no_sites_error_is_data_retrieval_error(self):
+        """``NoSitesError`` joins the root (was a bare ``Exception``)."""
+        assert issubclass(exceptions.NoSitesError, exceptions.DataRetrievalError)
+        assert not issubclass(exceptions.NoSitesError, ValueError)  # unchanged
+
+    def test_waterdata_exceptions_share_the_root(self):
+        """waterdata's typed exceptions are ``DataRetrievalError`` too, so one
+        ``except`` clause spans the legacy and waterdata subsystems — while
+        keeping their historical ``RuntimeError`` / ``ValueError`` bases and the
+        shared family bases (``TransientError``, ``RequestTooLarge``)."""
+        from dataretrieval.waterdata.chunking import (
+            ChunkInterrupted,
+            RateLimited,
+            ServiceUnavailable,
+            Unchunkable,
+        )
+
+        for cls in (RateLimited, ServiceUnavailable, Unchunkable, ChunkInterrupted):
+            assert issubclass(cls, exceptions.DataRetrievalError)
+        # Transient transport failures: RuntimeError, under TransientError.
+        assert issubclass(RateLimited, exceptions.TransientError)
+        assert issubclass(ServiceUnavailable, exceptions.TransientError)
+        assert issubclass(ServiceUnavailable, RuntimeError)
+        # "Too large" failures: ValueError, under RequestTooLarge.
+        assert issubclass(Unchunkable, exceptions.RequestTooLarge)
+        assert issubclass(Unchunkable, ValueError)
+
+    def test_base_exported_at_top_level(self):
+        """Users can write ``except dataretrieval.DataRetrievalError``."""
+        import dataretrieval
+
+        assert dataretrieval.DataRetrievalError is exceptions.DataRetrievalError
+
+
 class Test_BaseMetadata:
     """Tests of BaseMetadata"""
 
diff --git a/tests/waterdata_chunking_test.py b/tests/waterdata_chunking_test.py
index 8fede638..4ee4b555 100644
--- a/tests/waterdata_chunking_test.py
+++ b/tests/waterdata_chunking_test.py
@@ -42,10 +42,10 @@
     ChunkPlan,
     QuotaExhausted,
     RateLimited,
-    RequestTooLarge,
     RetryPolicy,
     ServiceInterrupted,
     ServiceUnavailable,
+    Unchunkable,
     _chunked_client,
     _combine_chunk_frames,
     _combine_chunk_responses,
@@ -172,10 +172,10 @@ def test_chunk_plan_returns_passthrough_when_no_chunkable_axes():
 def test_chunk_plan_raises_when_unchunkable_request_exceeds_limit():
     """A request with nothing to chunk that still exceeds the byte limit (e.g.
     a single large CQL ``IN`` clause with no top-level ``OR``) raises
-    RequestTooLarge instead of being shipped for the server to reject with an
+    Unchunkable instead of being shipped for the server to reject with an
     opaque HTTP 414."""
     args = {"monitoring_location_id": "scalar-only"}
-    with pytest.raises(RequestTooLarge):
+    with pytest.raises(Unchunkable):
         ChunkPlan(args, _fake_build, url_limit=10)
 
 
@@ -205,11 +205,11 @@ def test_chunk_plan_greedy_halving_targets_largest_axis_chunk():
 
 
 def test_chunk_plan_raises_request_too_large_at_singleton_floor():
-    """Limit below the singleton-per-axis floor → ``RequestTooLarge``;
+    """Limit below the singleton-per-axis floor → ``Unchunkable``;
     there's nothing left to shrink."""
     args = {"monitoring_location_id": ["A", "B"]}
     # base=200 alone exceeds limit=100; chunking can't help.
-    with pytest.raises(RequestTooLarge, match="smallest reducible"):
+    with pytest.raises(Unchunkable, match="smallest reducible"):
         ChunkPlan(args, _fake_build, url_limit=100)
 
 
@@ -250,14 +250,14 @@ def test_chunk_plan_minimizes_total_sub_requests():
 def test_chunk_plan_raises_when_smallest_plan_doesnt_fit():
     """If even the most aggressive joint plan (singleton lists +
     singleton filter clauses) still exceeds the limit, surface
-    RequestTooLarge — there's nothing left to shrink."""
+    Unchunkable — there's nothing left to shrink."""
     args = {
         "monitoring_location_id": ["A" * 10, "B" * 10],
         "filter": "x='12345' OR x='67890'",  # min clause is 9 chars
     }
     # Base 200 + singleton site (10) + singleton clause (9) = 219; limit
     # below 219 → no joint plan can fit.
-    with pytest.raises(RequestTooLarge):
+    with pytest.raises(Unchunkable):
         ChunkPlan(args, _fake_build, url_limit=210)