From d46de2bf72c30db4177174018b37442766671763 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 2 Jun 2026 15:15:17 -0400 Subject: [PATCH] refactor(errors)!: unify request failures under a DataRetrievalError taxonomy Before, an HTTP failure surfaced as a different exception type depending on which module made the request -- a ValueError (or bare Exception) on the legacy query() path, RuntimeError-based types on the waterdata path, a bare httpx.HTTPStatusError elsewhere -- so there was no single `except` for "any dataretrieval request failure". Introduce dataretrieval/exceptions.py (dependency-free, re-exported at top level as dataretrieval.), rooted at DataRetrievalError, with two intermediate bases that name the axes a caller reasons about: DataRetrievalError(Exception) |- BadRequestError(.., ValueError) # 400 |- NotFoundError(.., ValueError) # 404 |- RequestTooLarge(.., ValueError) # base: request too large to satisfy | |- URLTooLong # 414 / client-side URL reject | '- Unchunkable # chunker planner floor |- NoSitesError # empty result '- TransientError(.., RuntimeError) # base: retryable; carries retry_after |- RateLimited # 429 '- ServiceUnavailable # 5xx (both paths) - One type per condition, raised by both the legacy query() path and the Water Data chunker. Callers can catch a whole family (`except RequestTooLarge` / `except TransientError`); the chunker's retry check is a single isinstance(exc, TransientError). - query()'s inline status ladder is extracted into a reusable _raise_for_status(). - NoSitesError now subclasses DataRetrievalError (was Exception). - Built-in compatibility by kind: fatal client errors are also ValueError, transient transport errors also RuntimeError, so existing `except ValueError` / `except RuntimeError` handlers keep working. BREAKING CHANGES - The legacy query() path raises typed errors instead of ad-hoc ValueErrors (400 -> BadRequestError, 404 -> NotFoundError, 414/over-long URL -> URLTooLong). - A 5xx on the legacy query() path now raises ServiceUnavailable, a RuntimeError (was a ValueError): a transient server failure is a runtime condition, not a bad value. - The Water Data chunker's planner-floor error is Unchunkable (a RequestTooLarge subclass). - Import the transport types/bases from dataretrieval / dataretrieval.exceptions, not from dataretrieval.waterdata.chunking. Verified: 477 passed / 2 skipped, ruff clean; live API spot checks (404/400/ over-long URL raise the typed errors, 200 unaffected); all 21 example notebooks execute end-to-end against the live API (227/227 cells, 0 errors). Co-Authored-By: Claude Opus 4.8 (1M context) --- dataretrieval/__init__.py | 31 ++++++ dataretrieval/exceptions.py | 154 ++++++++++++++++++++++++++++ dataretrieval/utils.py | 82 ++++++++------- dataretrieval/waterdata/chunking.py | 84 +++------------ dataretrieval/waterdata/utils.py | 7 +- tests/nldi_test.py | 15 +-- tests/utils_test.py | 76 +++++++++++++- tests/waterdata_chunking_test.py | 14 +-- 8 files changed, 338 insertions(+), 125 deletions(-) create mode 100644 dataretrieval/exceptions.py diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py index 29b184f7..4b58247e 100644 --- a/dataretrieval/__init__.py +++ b/dataretrieval/__init__.py @@ -16,6 +16,9 @@ ``nldi`` requires geopandas (``pip install dataretrieval[nldi]``) and is imported on demand: ``from dataretrieval import nldi``. + +Every request failure raises a subclass of :class:`dataretrieval.DataRetrievalError`; +the taxonomy lives in ``dataretrieval.exceptions``. """ from importlib.metadata import PackageNotFoundError, version @@ -25,7 +28,21 @@ except PackageNotFoundError: __version__ = "version-unknown" +from dataretrieval.exceptions import ( + BadRequestError, + DataRetrievalError, + NoSitesError, + NotFoundError, + RateLimited, + RequestTooLarge, + ServiceUnavailable, + TransientError, + Unchunkable, + URLTooLong, +) + from . import ( + exceptions, nadp, nwis, samples, @@ -36,6 +53,7 @@ ) __all__ = [ + # service modules "nadp", "nwis", "samples", @@ -43,5 +61,18 @@ "utils", "waterdata", "wqp", + # error taxonomy (canonical home: ``dataretrieval.exceptions``), re-exported + # so callers can ``except dataretrieval.DataRetrievalError`` + "exceptions", + "BadRequestError", + "DataRetrievalError", + "NoSitesError", + "NotFoundError", + "RateLimited", + "RequestTooLarge", + "ServiceUnavailable", + "TransientError", + "URLTooLong", + "Unchunkable", "__version__", ] diff --git a/dataretrieval/exceptions.py b/dataretrieval/exceptions.py new file mode 100644 index 00000000..2bb955c3 --- /dev/null +++ b/dataretrieval/exceptions.py @@ -0,0 +1,154 @@ +"""Exception taxonomy for ``dataretrieval``. + +A failed request from any service module (``nwis``, ``wqp``, ``waterdata``, +``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can +handle any request failure with a single ``except dataretrieval.DataRetrievalError``. + +The tree has two intermediate bases a caller can catch to span a whole family: +:class:`RequestTooLarge` (the request can't fit, however it was issued) and +:class:`TransientError` (a temporary failure worth retrying). + +This module deliberately has no third-party dependencies, so any module can +import it without pulling in pandas/httpx. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import httpx + +__all__ = [ + "DataRetrievalError", + "BadRequestError", + "NotFoundError", + "RequestTooLarge", + "URLTooLong", + "Unchunkable", + "NoSitesError", + "TransientError", + "RateLimited", + "ServiceUnavailable", +] + + +class DataRetrievalError(Exception): + """Base class for errors raised when a request to a USGS or EPA web + service fails. + + Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...) + raises a subclass of this when a request fails, so a caller can handle any + request failure uniformly:: + + try: + df, md = dataretrieval.wqp.get_results(...) + except dataretrieval.DataRetrievalError: + ... + + Subclasses also inherit from the built-in exception this package has + historically raised for the condition's *kind* -- :class:`ValueError` for a + request that can't succeed as written (bad params, too large), and + :class:`RuntimeError` for a transient transport failure -- so existing + ``except ValueError`` / ``except RuntimeError`` handlers keep working. + """ + + +# --- Fatal client errors ------------------------------------------------- +# The request can't succeed as written; retrying it unchanged won't help. Each +# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always +# raised -- so existing ``except ValueError`` handlers keep working. + + +class BadRequestError(DataRetrievalError, ValueError): + """The service rejected the request parameters (HTTP 400).""" + + +class NotFoundError(DataRetrievalError, ValueError): + """The requested resource was not found; often an empty query (HTTP 404).""" + + +class RequestTooLarge(DataRetrievalError, ValueError): + """The request is too large for the service to satisfy. + + A base for the two ways a request can exceed what the service accepts; + catch it to handle either. The concrete subclasses are :class:`URLTooLong` + (a single request the server rejected) and :class:`Unchunkable` (the Water + Data chunker could not split the call small enough to fit). + """ + + +class URLTooLong(RequestTooLarge): + """A single request URL exceeded the service's limit (HTTP 414, or rejected + client-side before it was sent). + + Raised by the legacy ``query`` path, which issues one request without + chunking. Remediation: query fewer sites, or split the call manually. + """ + + +class Unchunkable(RequestTooLarge): + """No chunking plan fits the URL byte limit. + + Raised by the Water Data chunker when even the smallest reducible plan + (every list axis at one atom per sub-request, the filter at one clause per + sub-request) still exceeds the server's byte limit -- so unlike + :class:`URLTooLong`, automatic splitting has already been tried and + exhausted. Shrink the input lists, simplify the filter, or split the call + manually. + """ + + +class NoSitesError(DataRetrievalError): + """The selection criteria matched no sites/data.""" + + def __init__(self, url: httpx.URL) -> None: + self.url = url + + def __str__(self) -> str: + return ( + "No sites/data found using the selection criteria specified in " + f"url: {self.url}" + ) + + +# --- Transient transport errors ------------------------------------------ +# The service was reachable but temporarily refused the request; the same call +# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the +# waterdata path has always raised). The Water Data chunker recognizes them via +# ``isinstance(exc, TransientError)`` and wraps them as resumable +# ``ChunkInterrupted`` subclasses. + + +class TransientError(DataRetrievalError, RuntimeError): + """Base for transient HTTP failures that are worth an automatic retry. + + One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`, + 5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them + by this shared base and wraps them as resumable interruptions. + + Parameters + ---------- + message : str + Human-readable error message. + retry_after : float, optional + Seconds to wait before retrying, parsed from the ``Retry-After`` + response header; stored on the :attr:`retry_after` attribute (``None`` + when the header is absent or unparseable). + """ + + def __init__(self, message: str, *, retry_after: float | None = None) -> None: + super().__init__(message) + self.retry_after = retry_after + + +class RateLimited(TransientError): + """A request was rejected with HTTP 429 (too many requests).""" + + +class ServiceUnavailable(TransientError): + """A request was rejected with a server error (HTTP 5xx). + + Raised by both the legacy ``query`` path and the Water Data path, so a 5xx + surfaces as one type regardless of which subsystem issued the request. + """ diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 154f2a4d..9751b269 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -13,6 +13,13 @@ import dataretrieval from dataretrieval.codes import tz +from dataretrieval.exceptions import ( + BadRequestError, + NoSitesError, + NotFoundError, + ServiceUnavailable, + URLTooLong, +) # Typed as ``dict[str, Any]`` (not the inferred ``dict[str, object]``) so that # splatting it as ``**HTTPX_DEFAULTS`` into ``httpx.get`` / ``httpx.AsyncClient`` @@ -274,14 +281,42 @@ def __repr__(self) -> str: data_list.append(data) # append results to list""" -def _url_too_long_error(detail: str) -> ValueError: - return ValueError( +def _url_too_long_error(detail: str) -> URLTooLong: + return URLTooLong( "Request URL too long. Modify your query to use fewer sites. " f"{detail}. Pseudo-code example of how to split your query: " f"\n {_URL_TOO_LONG_EXAMPLE}" ) +def _raise_for_status(response: httpx.Response) -> None: + """Map an unsuccessful HTTP status to a typed :class:`DataRetrievalError`; + return ``None`` on success. + + Shared by the legacy :func:`query` path. The 4xx types stay + :class:`ValueError`-compatible (this path's historical contract), but a 5xx + raises the transient :class:`ServiceUnavailable` (a :class:`RuntimeError`), + since a server failure is retryable rather than a bad request. + """ + status = response.status_code + if status == 400: + raise BadRequestError( + f"Bad Request, check that your parameters are correct. URL: {response.url}" + ) + elif status == 404: + raise NotFoundError( + "Page Not Found Error. May be the result of an empty query. " + f"URL: {response.url}" + ) + elif status == 414: + raise _url_too_long_error(f"API response reason: {response.reason_phrase}") + elif 500 <= status < 600: + raise ServiceUnavailable( + f"Service Unavailable: {status} {response.reason_phrase}. " + f"The service at {response.url} may be down or experiencing issues." + ) + + def query( url: str, payload: dict[str, Any], @@ -312,11 +347,14 @@ def query( Raises ------ - ValueError - If the service returns a 400, 404, 414, or 5xx status code, or if - ``httpx`` rejects the URL client-side (e.g. it is too long). - NoSitesError - If the response indicates that no sites or data matched the query. + DataRetrievalError + On failure: :class:`~dataretrieval.exceptions.BadRequestError` (400), + :class:`~dataretrieval.exceptions.NotFoundError` (404), + :class:`~dataretrieval.exceptions.URLTooLong` (414 or a client-side + over-long URL), :class:`~dataretrieval.exceptions.ServiceUnavailable` + (5xx), or :class:`~dataretrieval.exceptions.NoSitesError` (no sites/data + matched). The 4xx types are also :class:`ValueError`; + ``ServiceUnavailable`` is a :class:`RuntimeError`. """ for key, value in payload.items(): @@ -338,37 +376,9 @@ def query( except httpx.InvalidURL as exc: raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc - if response.status_code == 400: - raise ValueError( - f"Bad Request, check that your parameters are correct. URL: {response.url}" - ) - elif response.status_code == 404: - raise ValueError( - "Page Not Found Error. May be the result of an empty query. " - + f"URL: {response.url}" - ) - elif response.status_code == 414: - raise _url_too_long_error(f"API response reason: {response.reason_phrase}") - elif 500 <= response.status_code < 600: - raise ValueError( - f"Service Unavailable: {response.status_code} {response.reason_phrase}. " - + f"The service at {response.url} may be down or experiencing issues." - ) + _raise_for_status(response) if response.text.startswith("No sites/data"): raise NoSitesError(response.url) return response - - -class NoSitesError(Exception): - """Custom error class used when selection criteria return no sites/data.""" - - def __init__(self, url: httpx.URL) -> None: - self.url = url - - def __str__(self) -> str: - return ( - "No sites/data found using the selection criteria specified in " - f"url: {self.url}" - ) diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py index 9a5e20f5..82f5a37e 100644 --- a/dataretrieval/waterdata/chunking.py +++ b/dataretrieval/waterdata/chunking.py @@ -66,6 +66,13 @@ import pandas as pd from anyio.from_thread import start_blocking_portal +from dataretrieval.exceptions import ( + DataRetrievalError, + RateLimited, + ServiceUnavailable, + TransientError, + Unchunkable, +) from dataretrieval.utils import HTTPX_DEFAULTS from . import _progress @@ -383,70 +390,7 @@ def _passthrough_result( return frame, response -class _RetryableTransportError(RuntimeError): - """ - Base for typed HTTP transport failures the chunker recognizes as - transient. - - Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200` - and walked by :func:`_classify_chunk_error`. One subclass per - recoverable HTTP status family (429 → :class:`RateLimited`, - 5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as - resumable :class:`ChunkInterrupted` subclasses. - - Parameters - ---------- - message : str - Human-readable error message. - retry_after : float, optional - Seconds to wait before retrying, parsed from the - ``Retry-After`` response header. - - Attributes - ---------- - retry_after : float or None - Seconds to wait before retrying, parsed from the - ``Retry-After`` response header. ``None`` when the header was - absent or unparseable. - """ - - def __init__(self, message: str, *, retry_after: float | None = None) -> None: - super().__init__(message) - self.retry_after = retry_after - - -class RateLimited(_RetryableTransportError): - """ - A USGS Water Data API request was rejected with HTTP 429. - - Exposed as a typed exception so callers (notably the multi-value - chunker) can detect rate-limit failures via ``isinstance`` instead - of string-matching error messages. - """ - - -class ServiceUnavailable(_RetryableTransportError): - """ - A USGS Water Data API request was rejected with HTTP 5xx. - - Surfaced as a typed exception (parallel to :class:`RateLimited`) - so ``ChunkedCall`` can treat transient server failures as - resumable interruptions rather than fatal programmer errors. - """ - - -class RequestTooLarge(ValueError): - """ - No chunking plan fits the URL byte limit. - - Raised when even the smallest reducible plan (every list axis at - singleton chunks and the filter at one clause per sub-request) - still exceeds the server's byte limit. Shrink the input lists, - simplify the filter, or split the call manually. - """ - - -class ChunkInterrupted(RuntimeError): +class ChunkInterrupted(DataRetrievalError, RuntimeError): """ Base class for mid-stream chunk failures whose completed work is preserved and resumable. @@ -854,7 +798,7 @@ class ChunkPlan: Raises ------ - RequestTooLarge + Unchunkable If the request needs chunking but even the singleton plan doesn't fit ``url_limit``. """ @@ -889,7 +833,7 @@ def __init__( filter_expr, args.get("filter_lang") ): return - raise RequestTooLarge( + raise Unchunkable( f"Request exceeds {url_limit} bytes (URL + body) and has no " f"chunkable multi-value argument to split (e.g. a single large " f"CQL `IN` clause, or one oversized value). Narrow the query, " @@ -940,7 +884,7 @@ def _plan( Raises ------ - RequestTooLarge + Unchunkable If even the singleton plan (every axis at one atom per chunk) still exceeds ``url_limit``. """ @@ -961,7 +905,7 @@ def _plan( biggest_axis, biggest_idx, biggest_size = axis, idx, size if biggest_axis is None: - raise RequestTooLarge( + raise Unchunkable( f"Request exceeds {url_limit} bytes (URL + body) at the " f"smallest reducible plan (every axis at one atom per " f"sub-request). Reduce input sizes, shorten or simplify " @@ -1136,7 +1080,7 @@ def _retryable(exc: BaseException) -> tuple[bool, float | None]: ``(retryable, retry_after)`` — the server ``Retry-After`` hint (seconds) when the transient carried one, else ``None``. """ - if isinstance(exc, (RateLimited, ServiceUnavailable)): + if isinstance(exc, TransientError): return True, exc.retry_after if isinstance(exc, httpx.TransportError): return True, None @@ -1730,7 +1674,7 @@ def multi_value_chunked( Raises ------ - RequestTooLarge + Unchunkable If no plan can fit ``url_limit``. ChunkInterrupted On a mid-execution transient — 429, 5xx, or a bare transport diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 5c98967c..a4706c15 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -27,12 +27,11 @@ from anyio.from_thread import start_blocking_portal from dataretrieval import __version__ +from dataretrieval.exceptions import RateLimited, ServiceUnavailable from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata from dataretrieval.waterdata import _progress, chunking from dataretrieval.waterdata.chunking import ( _QUOTA_HEADER, - RateLimited, - ServiceUnavailable, _safe_elapsed, get_active_client, ) @@ -1104,8 +1103,8 @@ async def _paginate( ------ RuntimeError On a non-200 initial response (typed - :class:`~dataretrieval.waterdata.chunking.RateLimited` / - :class:`~dataretrieval.waterdata.chunking.ServiceUnavailable` + :class:`~dataretrieval.exceptions.RateLimited` / + :class:`~dataretrieval.exceptions.ServiceUnavailable` for 429/5xx, otherwise plain ``RuntimeError`` from :func:`_error_body`), on an initial-page parse failure (wrapped via :func:`_paginated_failure_message` with the diff --git a/tests/nldi_test.py b/tests/nldi_test.py index 988d9672..2249e4f2 100644 --- a/tests/nldi_test.py +++ b/tests/nldi_test.py @@ -373,15 +373,16 @@ def test_validate_data_source_rejects_malformed_catalog(httpx_mock, monkeypatch) nldi._validate_data_source("WQP") -def test_query_504_raises_value_error(httpx_mock): - """``utils.query`` must classify 504 Gateway Timeout as a 5xx - failure. Pre-fix: the membership check ``[500, 502, 503]`` missed - 504 and returned the response unchanged, leading downstream - callers (e.g. ``_query_nldi``) to silently swallow the failure as - an empty dict via JSONDecodeError.""" +def test_query_504_raises_service_unavailable(httpx_mock): + """``utils.query`` must classify 504 Gateway Timeout as a 5xx failure + (the transient ``ServiceUnavailable``). Pre-fix: the membership check + ``[500, 502, 503]`` missed 504 and returned the response unchanged, + leading downstream callers (e.g. ``_query_nldi``) to silently swallow + the failure as an empty dict via JSONDecodeError.""" + from dataretrieval.exceptions import ServiceUnavailable from dataretrieval.utils import query url = "https://example.invalid/x" httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=504) - with pytest.raises(ValueError, match="Service Unavailable: 504"): + with pytest.raises(ServiceUnavailable, match="Service Unavailable: 504"): query(url, {"a": "1"}) diff --git a/tests/utils_test.py b/tests/utils_test.py index c25e1084..00cec52e 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -5,7 +5,7 @@ import pandas as pd import pytest -from dataretrieval import nwis, utils +from dataretrieval import exceptions, nwis, utils class Test_query: @@ -42,6 +42,80 @@ def test_header(self): assert "user-agent" in response.request.headers +class Test_error_taxonomy: + """The unified request-error hierarchy. + + Every module's request failures are catchable as ``DataRetrievalError``, + while remaining backward-compatible with the built-in type each path + historically raised (``ValueError`` for the legacy ``query`` path, + ``RuntimeError`` for the waterdata retryable types). + """ + + @pytest.mark.parametrize( + "status, exc_name, match, builtin", + [ + (400, "BadRequestError", "Bad Request", ValueError), + (404, "NotFoundError", "Page Not Found", ValueError), + (414, "URLTooLong", "Request URL too long", ValueError), + (503, "ServiceUnavailable", "Service Unavailable: 503", RuntimeError), + ], + ) + def test_query_maps_status_to_typed_error( + self, httpx_mock, status, exc_name, match, builtin + ): + """``query`` maps each HTTP status family to a typed error that is both a + ``DataRetrievalError`` (new, unified) and the built-in this path + historically raised for that kind of failure -- ``ValueError`` for a bad + request, ``RuntimeError`` for a transient 5xx -- with the message kept.""" + exc_cls = getattr(exceptions, exc_name) + url = "https://example.invalid/x" + httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=status) + with pytest.raises(exc_cls, match=match) as excinfo: + utils.query(url, {"a": "1"}) + assert isinstance(excinfo.value, exceptions.DataRetrievalError) + assert isinstance(excinfo.value, builtin) # backward compatibility + + def test_query_failure_catchable_as_base(self, httpx_mock): + """A bare ``except DataRetrievalError`` catches a legacy query failure.""" + url = "https://example.invalid/y" + httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=400) + with pytest.raises(exceptions.DataRetrievalError): + utils.query(url, {"a": "1"}) + + def test_no_sites_error_is_data_retrieval_error(self): + """``NoSitesError`` joins the root (was a bare ``Exception``).""" + assert issubclass(exceptions.NoSitesError, exceptions.DataRetrievalError) + assert not issubclass(exceptions.NoSitesError, ValueError) # unchanged + + def test_waterdata_exceptions_share_the_root(self): + """waterdata's typed exceptions are ``DataRetrievalError`` too, so one + ``except`` clause spans the legacy and waterdata subsystems — while + keeping their historical ``RuntimeError`` / ``ValueError`` bases and the + shared family bases (``TransientError``, ``RequestTooLarge``).""" + from dataretrieval.waterdata.chunking import ( + ChunkInterrupted, + RateLimited, + ServiceUnavailable, + Unchunkable, + ) + + for cls in (RateLimited, ServiceUnavailable, Unchunkable, ChunkInterrupted): + assert issubclass(cls, exceptions.DataRetrievalError) + # Transient transport failures: RuntimeError, under TransientError. + assert issubclass(RateLimited, exceptions.TransientError) + assert issubclass(ServiceUnavailable, exceptions.TransientError) + assert issubclass(ServiceUnavailable, RuntimeError) + # "Too large" failures: ValueError, under RequestTooLarge. + assert issubclass(Unchunkable, exceptions.RequestTooLarge) + assert issubclass(Unchunkable, ValueError) + + def test_base_exported_at_top_level(self): + """Users can write ``except dataretrieval.DataRetrievalError``.""" + import dataretrieval + + assert dataretrieval.DataRetrievalError is exceptions.DataRetrievalError + + class Test_BaseMetadata: """Tests of BaseMetadata""" diff --git a/tests/waterdata_chunking_test.py b/tests/waterdata_chunking_test.py index 8fede638..4ee4b555 100644 --- a/tests/waterdata_chunking_test.py +++ b/tests/waterdata_chunking_test.py @@ -42,10 +42,10 @@ ChunkPlan, QuotaExhausted, RateLimited, - RequestTooLarge, RetryPolicy, ServiceInterrupted, ServiceUnavailable, + Unchunkable, _chunked_client, _combine_chunk_frames, _combine_chunk_responses, @@ -172,10 +172,10 @@ def test_chunk_plan_returns_passthrough_when_no_chunkable_axes(): def test_chunk_plan_raises_when_unchunkable_request_exceeds_limit(): """A request with nothing to chunk that still exceeds the byte limit (e.g. a single large CQL ``IN`` clause with no top-level ``OR``) raises - RequestTooLarge instead of being shipped for the server to reject with an + Unchunkable instead of being shipped for the server to reject with an opaque HTTP 414.""" args = {"monitoring_location_id": "scalar-only"} - with pytest.raises(RequestTooLarge): + with pytest.raises(Unchunkable): ChunkPlan(args, _fake_build, url_limit=10) @@ -205,11 +205,11 @@ def test_chunk_plan_greedy_halving_targets_largest_axis_chunk(): def test_chunk_plan_raises_request_too_large_at_singleton_floor(): - """Limit below the singleton-per-axis floor → ``RequestTooLarge``; + """Limit below the singleton-per-axis floor → ``Unchunkable``; there's nothing left to shrink.""" args = {"monitoring_location_id": ["A", "B"]} # base=200 alone exceeds limit=100; chunking can't help. - with pytest.raises(RequestTooLarge, match="smallest reducible"): + with pytest.raises(Unchunkable, match="smallest reducible"): ChunkPlan(args, _fake_build, url_limit=100) @@ -250,14 +250,14 @@ def test_chunk_plan_minimizes_total_sub_requests(): def test_chunk_plan_raises_when_smallest_plan_doesnt_fit(): """If even the most aggressive joint plan (singleton lists + singleton filter clauses) still exceeds the limit, surface - RequestTooLarge — there's nothing left to shrink.""" + Unchunkable — there's nothing left to shrink.""" args = { "monitoring_location_id": ["A" * 10, "B" * 10], "filter": "x='12345' OR x='67890'", # min clause is 9 chars } # Base 200 + singleton site (10) + singleton clause (9) = 219; limit # below 219 → no joint plan can fit. - with pytest.raises(RequestTooLarge): + with pytest.raises(Unchunkable): ChunkPlan(args, _fake_build, url_limit=210)