From ece83da0ea1b217f4d210c0e4f46ba485fcb2658 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 15 Jun 2026 09:51:24 -0500 Subject: [PATCH] test(waterdata): retry every transient error in the flaky-rerun filter The suite already retries transient HTTP failures (flaky's `only_rerun`), but the patterns missed two kinds, so a transient upstream 502 failed CI instead of retrying: - a direct 5xx is raised as `ServiceUnavailable`, and - a chunked request wraps a transient 429/5xx as `QuotaExhausted` / `ServiceInterrupted`. Add patterns for both. Verified they retry these transient errors but not deterministic ones (e.g. a 404 or an assertion failure). Co-Authored-By: Claude Opus 4.8 --- tests/waterdata_test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 34ccf4f2..783e33dd 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -53,7 +53,14 @@ reruns=2, reruns_delay=5, only_rerun=[ - r"(?:RateLimited|RuntimeError):\s*(?:429|5\d\d):", # _raise_for_non_200 output + # Transient HTTP errors (429 / 5xx) on the direct path: RateLimited / + # ServiceUnavailable carry a ": ..." message (the RuntimeError + # shape is kept for any legacy call site). + r"(?:RateLimited|ServiceUnavailable|RuntimeError):\s*(?:429|5\d\d):", + # The chunked fan-out wraps a transient sub-request as a ChunkInterrupted + # subclass (QuotaExhausted for 429, ServiceInterrupted for 5xx), whose + # message has no leading status token. + r"(?:QuotaExhausted|ServiceInterrupted):", r"Connect(ion)?Error", # requests' ConnectionError + httpx' ConnectError r"ReadTimeout|ConnectTimeout|Timeout", ],