From 1db5cfa2f5ea53705dfba0707a9d74f6222c324b Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 13 Mar 2024 07:33:43 -0500 Subject: [PATCH 01/14] delay another import --- pymongo/_azure_helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymongo/_azure_helpers.py b/pymongo/_azure_helpers.py index 661e4ce37a..1ce00c0603 100644 --- a/pymongo/_azure_helpers.py +++ b/pymongo/_azure_helpers.py @@ -17,12 +17,14 @@ import json from typing import Any, Optional -from urllib.request import Request, urlopen def _get_azure_response( resource: str, object_id: Optional[str] = None, timeout: float = 5 ) -> dict[str, Any]: + # Deferred import to save overall import time. + from urllib.request import Request, urlopen + url = "http://169.254.169.254/metadata/identity/oauth2/token" url += "?api-version=2018-02-01" url += f"&resource={resource}" From c6526f841189b6c023d3b2eb0fc158e0f07dad6a Mon Sep 17 00:00:00 2001 From: Shane Harvey Date: Tue, 19 Aug 2025 12:56:02 -0700 Subject: [PATCH 02/14] PYTHON-5504 Prototype exponential backoff in with_transaction (#2492) PYTHON-5505 Prototype system overload retry loop for all operations (#2497) All commands that fail with the "Retryable" error label will be retried up to 3 times. When the error includes the "SystemOverloaded" error label we apply exponential backoff with jitter before attempting a retry. PYTHON-5506 Prototype adaptive token bucket retry (#2501) Add adaptive token bucket based retry policy. Successfully completed commands deposit 0.1 token. Failed retry attempts consume 1 token. A retry is only permitted if there is an available token. Token bucket starts full with the maximum 1000 tokens. PYTHON-5505 Use proper RetryableError and SystemOverloadedError labels --- pymongo/asynchronous/client_session.py | 10 ++ pymongo/asynchronous/collection.py | 6 +- pymongo/asynchronous/database.py | 6 + pymongo/asynchronous/helpers.py | 126 ++++++++++++++ pymongo/asynchronous/mongo_client.py | 69 ++++++-- pymongo/synchronous/client_session.py | 9 + pymongo/synchronous/collection.py | 6 +- pymongo/synchronous/database.py | 6 + pymongo/synchronous/mongo_client.py | 69 ++++++-- test/asynchronous/test_backpressure.py | 230 +++++++++++++++++++++++++ test/test_backpressure.py | 230 +++++++++++++++++++++++++ tools/synchro.py | 1 + 12 files changed, 730 insertions(+), 38 deletions(-) create mode 100644 test/asynchronous/test_backpressure.py create mode 100644 test/test_backpressure.py diff --git a/pymongo/asynchronous/client_session.py b/pymongo/asynchronous/client_session.py index 6ab3b39983..697de81b1c 100644 --- a/pymongo/asynchronous/client_session.py +++ b/pymongo/asynchronous/client_session.py @@ -135,7 +135,9 @@ from __future__ import annotations +import asyncio import collections +import random import time import uuid from collections.abc import Mapping as _Mapping @@ -470,6 +472,8 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: # This limit is non-configurable and was chosen to be twice the 60 second # default value of MongoDB's `transactionLifetimeLimitSeconds` parameter. _WITH_TRANSACTION_RETRY_TIME_LIMIT = 120 +_BACKOFF_MAX = 1 +_BACKOFF_INITIAL = 0.050 # 50ms initial backoff def _within_time_limit(start_time: float) -> bool: @@ -703,7 +707,13 @@ async def callback(session, custom_arg, custom_kwarg=None): https://github.com/mongodb/specifications/blob/master/source/transactions-convenient-api/transactions-convenient-api.md#handling-errors-inside-the-callback """ start_time = time.monotonic() + retry = 0 while True: + if retry: # Implement exponential backoff on retry. + jitter = random.random() # noqa: S311 + backoff = jitter * min(_BACKOFF_INITIAL * (2**retry), _BACKOFF_MAX) + await asyncio.sleep(backoff) + retry += 1 await self.start_transaction( read_concern, write_concern, read_preference, max_commit_time_ms ) diff --git a/pymongo/asynchronous/collection.py b/pymongo/asynchronous/collection.py index e7e2f58031..c6cb69af25 100644 --- a/pymongo/asynchronous/collection.py +++ b/pymongo/asynchronous/collection.py @@ -58,6 +58,7 @@ AsyncCursor, AsyncRawBatchCursor, ) +from pymongo.asynchronous.helpers import _retry_overload from pymongo.collation import validate_collation_or_none from pymongo.common import _ecoc_coll_name, _esc_coll_name from pymongo.errors import ( @@ -252,6 +253,7 @@ def __init__( unicode_decode_error_handler="replace", document_class=dict ) self._timeout = database.client.options.timeout + self._retry_policy = database.client._retry_policy if create or kwargs: if _IS_SYNC: @@ -2227,6 +2229,7 @@ async def create_indexes( return await self._create_indexes(indexes, session, **kwargs) @_csot.apply + @_retry_overload async def _create_indexes( self, indexes: Sequence[IndexModel], session: Optional[AsyncClientSession], **kwargs: Any ) -> list[str]: @@ -2422,7 +2425,6 @@ async def drop_indexes( kwargs["comment"] = comment await self._drop_index("*", session=session, **kwargs) - @_csot.apply async def drop_index( self, index_or_name: _IndexKeyHint, @@ -2472,6 +2474,7 @@ async def drop_index( await self._drop_index(index_or_name, session, comment, **kwargs) @_csot.apply + @_retry_overload async def _drop_index( self, index_or_name: _IndexKeyHint, @@ -3072,6 +3075,7 @@ async def aggregate_raw_batches( ) @_csot.apply + @_retry_overload async def rename( self, new_name: str, diff --git a/pymongo/asynchronous/database.py b/pymongo/asynchronous/database.py index 8e0afc9dc9..9b84869315 100644 --- a/pymongo/asynchronous/database.py +++ b/pymongo/asynchronous/database.py @@ -38,6 +38,7 @@ from pymongo.asynchronous.change_stream import AsyncDatabaseChangeStream from pymongo.asynchronous.collection import AsyncCollection from pymongo.asynchronous.command_cursor import AsyncCommandCursor +from pymongo.asynchronous.helpers import _retry_overload from pymongo.common import _ecoc_coll_name, _esc_coll_name from pymongo.database_shared import _check_name, _CodecDocumentType from pymongo.errors import CollectionInvalid, InvalidOperation @@ -135,6 +136,7 @@ def __init__( self._name = name self._client: AsyncMongoClient[_DocumentType] = client self._timeout = client.options.timeout + self._retry_policy = client._retry_policy @property def client(self) -> AsyncMongoClient[_DocumentType]: @@ -477,6 +479,7 @@ async def watch( return change_stream @_csot.apply + @_retry_overload async def create_collection( self, name: str, @@ -819,6 +822,7 @@ async def command( ... @_csot.apply + @_retry_overload async def command( self, command: Union[str, MutableMapping[str, Any]], @@ -950,6 +954,7 @@ async def command( ) @_csot.apply + @_retry_overload async def cursor_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1265,6 +1270,7 @@ async def _drop_helper( ) @_csot.apply + @_retry_overload async def drop_collection( self, name_or_collection: Union[str, AsyncCollection[_DocumentTypeArg]], diff --git a/pymongo/asynchronous/helpers.py b/pymongo/asynchronous/helpers.py index 4a8c918133..4ff3caa104 100644 --- a/pymongo/asynchronous/helpers.py +++ b/pymongo/asynchronous/helpers.py @@ -16,7 +16,12 @@ from __future__ import annotations import asyncio +import builtins +import functools +import random import socket +import time +import time as time # noqa: PLC0414 # needed in sync version from typing import ( Any, Callable, @@ -24,10 +29,13 @@ cast, ) +from pymongo import _csot from pymongo.errors import ( OperationFailure, + PyMongoError, ) from pymongo.helpers_shared import _REAUTHENTICATION_REQUIRED_CODE +from pymongo.lock import _async_create_lock _IS_SYNC = False @@ -36,6 +44,7 @@ def _handle_reauth(func: F) -> F: + @functools.wraps(func) async def inner(*args: Any, **kwargs: Any) -> Any: no_reauth = kwargs.pop("no_reauth", False) from pymongo.asynchronous.pool import AsyncConnection @@ -68,6 +77,123 @@ async def inner(*args: Any, **kwargs: Any) -> Any: return cast(F, inner) +_MAX_RETRIES = 3 +_BACKOFF_INITIAL = 0.05 +_BACKOFF_MAX = 10 +# DRIVERS-3240 will determine these defaults. +DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 +DEFAULT_RETRY_TOKEN_RETURN = 0.1 + + +def _backoff( + attempt: int, initial_delay: float = _BACKOFF_INITIAL, max_delay: float = _BACKOFF_MAX +) -> float: + jitter = random.random() # noqa: S311 + return jitter * min(initial_delay * (2**attempt), max_delay) + + +class _TokenBucket: + """A token bucket implementation for rate limiting.""" + + def __init__( + self, + capacity: float = DEFAULT_RETRY_TOKEN_CAPACITY, + return_rate: float = DEFAULT_RETRY_TOKEN_RETURN, + ): + self.lock = _async_create_lock() + self.capacity = capacity + # DRIVERS-3240 will determine how full the bucket should start. + self.tokens = capacity + self.return_rate = return_rate + + async def consume(self) -> bool: + """Consume a token from the bucket if available.""" + async with self.lock: + if self.tokens >= 1: + self.tokens -= 1 + return True + return False + + async def deposit(self, retry: bool = False) -> None: + """Deposit a token back into the bucket.""" + retry_token = 1 if retry else 0 + async with self.lock: + self.tokens = min(self.capacity, self.tokens + retry_token + self.return_rate) + + +class _RetryPolicy: + """A retry limiter that performs exponential backoff with jitter. + + Retry attempts are limited by a token bucket to prevent overwhelming the server during + a prolonged outage or high load. + """ + + def __init__( + self, + token_bucket: _TokenBucket, + attempts: int = _MAX_RETRIES, + backoff_initial: float = _BACKOFF_INITIAL, + backoff_max: float = _BACKOFF_MAX, + ): + self.token_bucket = token_bucket + self.attempts = attempts + self.backoff_initial = backoff_initial + self.backoff_max = backoff_max + + async def record_success(self, retry: bool) -> None: + """Record a successful operation.""" + await self.token_bucket.deposit(retry) + + def backoff(self, attempt: int) -> float: + """Return the backoff duration for the given .""" + return _backoff(max(0, attempt - 1), self.backoff_initial, self.backoff_max) + + async def should_retry(self, attempt: int, delay: float) -> bool: + """Return if we have budget to retry and how long to backoff.""" + if attempt > self.attempts: + return False + + # If the delay would exceed the deadline, bail early before consuming a token. + if _csot.get_timeout(): + if time.monotonic() + delay > _csot.get_deadline(): + return False + + # Check token bucket last since we only want to consume a token if we actually retry. + if not await self.token_bucket.consume(): + # DRIVERS-3246 Improve diagnostics when this case happens. + # We could add info to the exception and log. + return False + return True + + +def _retry_overload(func: F) -> F: + @functools.wraps(func) + async def inner(self: Any, *args: Any, **kwargs: Any) -> Any: + retry_policy = self._retry_policy + attempt = 0 + while True: + try: + res = await func(self, *args, **kwargs) + await retry_policy.record_success(retry=attempt > 0) + return res + except PyMongoError as exc: + if not exc.has_error_label("RetryableError"): + raise + attempt += 1 + delay = 0 + if exc.has_error_label("SystemOverloadedError"): + delay = retry_policy.backoff(attempt) + if not await retry_policy.should_retry(attempt, delay): + raise + + # Implement exponential backoff on retry. + if delay: + await asyncio.sleep(delay) + continue + + return cast(F, inner) + + async def _getaddrinfo( host: Any, port: Any, **kwargs: Any ) -> list[ diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 2a8ff43392..486e00ae46 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -35,6 +35,7 @@ import asyncio import contextlib import os +import time as time # noqa: PLC0414 # needed in sync version import warnings import weakref from collections import defaultdict @@ -67,6 +68,11 @@ from pymongo.asynchronous.client_bulk import _AsyncClientBulk from pymongo.asynchronous.client_session import _EmptyServerSession from pymongo.asynchronous.command_cursor import AsyncCommandCursor +from pymongo.asynchronous.helpers import ( + _retry_overload, + _RetryPolicy, + _TokenBucket, +) from pymongo.asynchronous.settings import TopologySettings from pymongo.asynchronous.topology import Topology, _ErrorContext from pymongo.client_options import ClientOptions @@ -773,6 +779,7 @@ def __init__( self._timeout: float | None = None self._topology_settings: TopologySettings = None # type: ignore[assignment] self._event_listeners: _EventListeners | None = None + self._retry_policy = _RetryPolicy(_TokenBucket()) # _pool_class, _monitor_class, and _condition_class are for deep # customization of PyMongo, e.g. Motor. @@ -2396,6 +2403,7 @@ async def list_database_names( return [doc["name"] async for doc in res] @_csot.apply + @_retry_overload async def drop_database( self, name_or_database: Union[str, database.AsyncDatabase[_DocumentTypeArg]], @@ -2733,9 +2741,10 @@ def __init__( ): self._last_error: Optional[Exception] = None self._retrying = False + self._always_retryable = False self._multiple_retries = _csot.get_timeout() is not None self._client = mongo_client - + self._retry_policy = mongo_client._retry_policy self._func = func self._bulk = bulk self._session = session @@ -2770,7 +2779,9 @@ async def run(self) -> T: while True: self._check_last_error(check_csot=True) try: - return await self._read() if self._is_read else await self._write() + res = await self._read() if self._is_read else await self._write() + await self._retry_policy.record_success(self._attempt_number > 0) + return res except ServerSelectionTimeoutError: # The application may think the write was never attempted # if we raise ServerSelectionTimeoutError on the retry @@ -2781,14 +2792,22 @@ async def run(self) -> T: # most likely be a waste of time. raise except PyMongoError as exc: + always_retryable = False + overloaded = False + exc_to_check = exc # Execute specialized catch on read if self._is_read: if isinstance(exc, (ConnectionFailure, OperationFailure)): # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) - if self._is_not_eligible_for_retry() or ( - isinstance(exc, OperationFailure) - and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + always_retryable = exc.has_error_label("RetryableError") + overloaded = exc.has_error_label("SystemOverloadedError") + if not always_retryable and ( + self._is_not_eligible_for_retry() + or ( + isinstance(exc, OperationFailure) + and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + ) ): raise self._retrying = True @@ -2799,19 +2818,22 @@ async def run(self) -> T: # Specialized catch on write operation if not self._is_read: - if not self._retryable: + if isinstance(exc, ClientBulkWriteException) and isinstance( + exc.error, PyMongoError + ): + exc_to_check = exc.error + retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") + always_retryable = exc_to_check.has_error_label("RetryableError") + overloaded = exc_to_check.has_error_label("SystemOverloadedError") + if not self._retryable and not always_retryable: raise - if isinstance(exc, ClientBulkWriteException) and exc.error: - retryable_write_error_exc = isinstance( - exc.error, PyMongoError - ) and exc.error.has_error_label("RetryableWriteError") - else: - retryable_write_error_exc = exc.has_error_label("RetryableWriteError") - if retryable_write_error_exc: + if retryable_write_label or always_retryable: assert self._session await self._session._unpin() - if not retryable_write_error_exc or self._is_not_eligible_for_retry(): - if exc.has_error_label("NoWritesPerformed") and self._last_error: + if not always_retryable and ( + not retryable_write_label or self._is_not_eligible_for_retry() + ): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: raise self._last_error from exc else: raise @@ -2820,7 +2842,7 @@ async def run(self) -> T: self._bulk.retrying = True else: self._retrying = True - if not exc.has_error_label("NoWritesPerformed"): + if not exc_to_check.has_error_label("NoWritesPerformed"): self._last_error = exc if self._last_error is None: self._last_error = exc @@ -2828,6 +2850,17 @@ async def run(self) -> T: if self._client.topology_description.topology_type == TOPOLOGY_TYPE.Sharded: self._deprioritized_servers.append(self._server) + self._always_retryable = always_retryable + if always_retryable: + delay = self._retry_policy.backoff(self._attempt_number) if overloaded else 0 + if not await self._retry_policy.should_retry(self._attempt_number, delay): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: + raise self._last_error from exc + else: + raise + if overloaded: + await asyncio.sleep(delay) + def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" return not self._retryable or (self._is_retrying() and not self._multiple_retries) @@ -2889,7 +2922,7 @@ async def _write(self) -> T: and conn.supports_sessions ) is_mongos = conn.is_mongos - if not sessions_supported: + if not self._always_retryable and not sessions_supported: # A retry is not possible because this server does # not support sessions raise the last error. self._check_last_error() @@ -2921,7 +2954,7 @@ async def _read(self) -> T: conn, read_pref, ): - if self._retrying and not self._retryable: + if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: _debug_log( diff --git a/pymongo/synchronous/client_session.py b/pymongo/synchronous/client_session.py index 9b547dc946..d5a37eb108 100644 --- a/pymongo/synchronous/client_session.py +++ b/pymongo/synchronous/client_session.py @@ -136,6 +136,7 @@ from __future__ import annotations import collections +import random import time import uuid from collections.abc import Mapping as _Mapping @@ -469,6 +470,8 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: # This limit is non-configurable and was chosen to be twice the 60 second # default value of MongoDB's `transactionLifetimeLimitSeconds` parameter. _WITH_TRANSACTION_RETRY_TIME_LIMIT = 120 +_BACKOFF_MAX = 1 +_BACKOFF_INITIAL = 0.050 # 50ms initial backoff def _within_time_limit(start_time: float) -> bool: @@ -702,7 +705,13 @@ def callback(session, custom_arg, custom_kwarg=None): https://github.com/mongodb/specifications/blob/master/source/transactions-convenient-api/transactions-convenient-api.md#handling-errors-inside-the-callback """ start_time = time.monotonic() + retry = 0 while True: + if retry: # Implement exponential backoff on retry. + jitter = random.random() # noqa: S311 + backoff = jitter * min(_BACKOFF_INITIAL * (2**retry), _BACKOFF_MAX) + time.sleep(backoff) + retry += 1 self.start_transaction(read_concern, write_concern, read_preference, max_commit_time_ms) try: ret = callback(self) diff --git a/pymongo/synchronous/collection.py b/pymongo/synchronous/collection.py index 4e5f7d08fb..9ee8e63947 100644 --- a/pymongo/synchronous/collection.py +++ b/pymongo/synchronous/collection.py @@ -89,6 +89,7 @@ Cursor, RawBatchCursor, ) +from pymongo.synchronous.helpers import _retry_overload from pymongo.typings import _CollationIn, _DocumentType, _DocumentTypeArg, _Pipeline from pymongo.write_concern import DEFAULT_WRITE_CONCERN, WriteConcern, validate_boolean @@ -255,6 +256,7 @@ def __init__( unicode_decode_error_handler="replace", document_class=dict ) self._timeout = database.client.options.timeout + self._retry_policy = database.client._retry_policy if create or kwargs: if _IS_SYNC: @@ -2224,6 +2226,7 @@ def create_indexes( return self._create_indexes(indexes, session, **kwargs) @_csot.apply + @_retry_overload def _create_indexes( self, indexes: Sequence[IndexModel], session: Optional[ClientSession], **kwargs: Any ) -> list[str]: @@ -2419,7 +2422,6 @@ def drop_indexes( kwargs["comment"] = comment self._drop_index("*", session=session, **kwargs) - @_csot.apply def drop_index( self, index_or_name: _IndexKeyHint, @@ -2469,6 +2471,7 @@ def drop_index( self._drop_index(index_or_name, session, comment, **kwargs) @_csot.apply + @_retry_overload def _drop_index( self, index_or_name: _IndexKeyHint, @@ -3065,6 +3068,7 @@ def aggregate_raw_batches( ) @_csot.apply + @_retry_overload def rename( self, new_name: str, diff --git a/pymongo/synchronous/database.py b/pymongo/synchronous/database.py index 0d129ba972..6877854f46 100644 --- a/pymongo/synchronous/database.py +++ b/pymongo/synchronous/database.py @@ -43,6 +43,7 @@ from pymongo.synchronous.change_stream import DatabaseChangeStream from pymongo.synchronous.collection import Collection from pymongo.synchronous.command_cursor import CommandCursor +from pymongo.synchronous.helpers import _retry_overload from pymongo.typings import _CollationIn, _DocumentType, _DocumentTypeArg, _Pipeline if TYPE_CHECKING: @@ -135,6 +136,7 @@ def __init__( self._name = name self._client: MongoClient[_DocumentType] = client self._timeout = client.options.timeout + self._retry_policy = client._retry_policy @property def client(self) -> MongoClient[_DocumentType]: @@ -477,6 +479,7 @@ def watch( return change_stream @_csot.apply + @_retry_overload def create_collection( self, name: str, @@ -819,6 +822,7 @@ def command( ... @_csot.apply + @_retry_overload def command( self, command: Union[str, MutableMapping[str, Any]], @@ -948,6 +952,7 @@ def command( ) @_csot.apply + @_retry_overload def cursor_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1258,6 +1263,7 @@ def _drop_helper( ) @_csot.apply + @_retry_overload def drop_collection( self, name_or_collection: Union[str, Collection[_DocumentTypeArg]], diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index fea2d6daef..30b8c4fc61 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -35,6 +35,7 @@ import asyncio import contextlib import os +import time as time # noqa: PLC0414 # needed in sync version import warnings import weakref from collections import defaultdict @@ -110,6 +111,11 @@ from pymongo.synchronous.client_bulk import _ClientBulk from pymongo.synchronous.client_session import _EmptyServerSession from pymongo.synchronous.command_cursor import CommandCursor +from pymongo.synchronous.helpers import ( + _retry_overload, + _RetryPolicy, + _TokenBucket, +) from pymongo.synchronous.settings import TopologySettings from pymongo.synchronous.topology import Topology, _ErrorContext from pymongo.topology_description import TOPOLOGY_TYPE, TopologyDescription @@ -773,6 +779,7 @@ def __init__( self._timeout: float | None = None self._topology_settings: TopologySettings = None # type: ignore[assignment] self._event_listeners: _EventListeners | None = None + self._retry_policy = _RetryPolicy(_TokenBucket()) # _pool_class, _monitor_class, and _condition_class are for deep # customization of PyMongo, e.g. Motor. @@ -2386,6 +2393,7 @@ def list_database_names( return [doc["name"] for doc in res] @_csot.apply + @_retry_overload def drop_database( self, name_or_database: Union[str, database.Database[_DocumentTypeArg]], @@ -2723,9 +2731,10 @@ def __init__( ): self._last_error: Optional[Exception] = None self._retrying = False + self._always_retryable = False self._multiple_retries = _csot.get_timeout() is not None self._client = mongo_client - + self._retry_policy = mongo_client._retry_policy self._func = func self._bulk = bulk self._session = session @@ -2760,7 +2769,9 @@ def run(self) -> T: while True: self._check_last_error(check_csot=True) try: - return self._read() if self._is_read else self._write() + res = self._read() if self._is_read else self._write() + self._retry_policy.record_success(self._attempt_number > 0) + return res except ServerSelectionTimeoutError: # The application may think the write was never attempted # if we raise ServerSelectionTimeoutError on the retry @@ -2771,14 +2782,22 @@ def run(self) -> T: # most likely be a waste of time. raise except PyMongoError as exc: + always_retryable = False + overloaded = False + exc_to_check = exc # Execute specialized catch on read if self._is_read: if isinstance(exc, (ConnectionFailure, OperationFailure)): # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) - if self._is_not_eligible_for_retry() or ( - isinstance(exc, OperationFailure) - and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + always_retryable = exc.has_error_label("RetryableError") + overloaded = exc.has_error_label("SystemOverloadedError") + if not always_retryable and ( + self._is_not_eligible_for_retry() + or ( + isinstance(exc, OperationFailure) + and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + ) ): raise self._retrying = True @@ -2789,19 +2808,22 @@ def run(self) -> T: # Specialized catch on write operation if not self._is_read: - if not self._retryable: + if isinstance(exc, ClientBulkWriteException) and isinstance( + exc.error, PyMongoError + ): + exc_to_check = exc.error + retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") + always_retryable = exc_to_check.has_error_label("RetryableError") + overloaded = exc_to_check.has_error_label("SystemOverloadedError") + if not self._retryable and not always_retryable: raise - if isinstance(exc, ClientBulkWriteException) and exc.error: - retryable_write_error_exc = isinstance( - exc.error, PyMongoError - ) and exc.error.has_error_label("RetryableWriteError") - else: - retryable_write_error_exc = exc.has_error_label("RetryableWriteError") - if retryable_write_error_exc: + if retryable_write_label or always_retryable: assert self._session self._session._unpin() - if not retryable_write_error_exc or self._is_not_eligible_for_retry(): - if exc.has_error_label("NoWritesPerformed") and self._last_error: + if not always_retryable and ( + not retryable_write_label or self._is_not_eligible_for_retry() + ): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: raise self._last_error from exc else: raise @@ -2810,7 +2832,7 @@ def run(self) -> T: self._bulk.retrying = True else: self._retrying = True - if not exc.has_error_label("NoWritesPerformed"): + if not exc_to_check.has_error_label("NoWritesPerformed"): self._last_error = exc if self._last_error is None: self._last_error = exc @@ -2818,6 +2840,17 @@ def run(self) -> T: if self._client.topology_description.topology_type == TOPOLOGY_TYPE.Sharded: self._deprioritized_servers.append(self._server) + self._always_retryable = always_retryable + if always_retryable: + delay = self._retry_policy.backoff(self._attempt_number) if overloaded else 0 + if not self._retry_policy.should_retry(self._attempt_number, delay): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: + raise self._last_error from exc + else: + raise + if overloaded: + time.sleep(delay) + def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" return not self._retryable or (self._is_retrying() and not self._multiple_retries) @@ -2879,7 +2912,7 @@ def _write(self) -> T: and conn.supports_sessions ) is_mongos = conn.is_mongos - if not sessions_supported: + if not self._always_retryable and not sessions_supported: # A retry is not possible because this server does # not support sessions raise the last error. self._check_last_error() @@ -2911,7 +2944,7 @@ def _read(self) -> T: conn, read_pref, ): - if self._retrying and not self._retryable: + if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: _debug_log( diff --git a/test/asynchronous/test_backpressure.py b/test/asynchronous/test_backpressure.py new file mode 100644 index 0000000000..11f8edde67 --- /dev/null +++ b/test/asynchronous/test_backpressure.py @@ -0,0 +1,230 @@ +# Copyright 2025-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Client Backpressure spec.""" +from __future__ import annotations + +import asyncio +import sys + +import pymongo + +sys.path[0:0] = [""] + +from test.asynchronous import ( + AsyncIntegrationTest, + AsyncPyMongoTestCase, + async_client_context, + unittest, +) + +from pymongo.asynchronous import helpers +from pymongo.asynchronous.helpers import _MAX_RETRIES, _RetryPolicy, _TokenBucket +from pymongo.errors import PyMongoError + +_IS_SYNC = False + +# Mock an system overload error. +mock_overload_error = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find", "insert", "update"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError"], + }, +} + + +class TestBackpressure(AsyncIntegrationTest): + RUN_ON_LOAD_BALANCER = True + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_command(self): + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.command("find", "t") + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.command("find", "t") + + self.assertIn("RetryableError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_find(self): + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.t.find_one() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.t.find_one() + + self.assertIn("RetryableError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_insert_one(self): + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.t.find_one() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.t.find_one() + + self.assertIn("RetryableError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_update_many(self): + # Even though update_many is not a retryable write operation, it will + # still be retried via the "RetryableError" error label. + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.t.update_many({}, {"$set": {"x": 2}}) + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.t.update_many({}, {"$set": {"x": 2}}) + + self.assertIn("RetryableError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_getMore(self): + coll = self.db.t + await coll.insert_many([{"x": 1} for _ in range(10)]) + + # Ensure command is retried on overload error. + fail_many = { + "configureFailPoint": "failCommand", + "mode": {"times": _MAX_RETRIES}, + "data": { + "failCommands": ["getMore"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError"], + }, + } + cursor = coll.find(batch_size=2) + await cursor.next() + async with self.fail_point(fail_many): + await cursor.to_list() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = fail_many.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + cursor = coll.find(batch_size=2) + await cursor.next() + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await cursor.to_list() + + self.assertIn("RetryableError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_limit_retry_command(self): + client = await self.async_rs_or_single_client() + client._retry_policy.token_bucket.tokens = 1 + db = client.pymongo_test + await db.t.insert_one({"x": 1}) + + # Ensure command is retried once overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": 1} + async with self.fail_point(fail_many): + await db.command("find", "t") + + # Ensure command stops retrying when there are no tokens left. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": 2} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await db.command("find", "t") + + self.assertIn("RetryableError", str(error.exception)) + + +class TestRetryPolicy(AsyncPyMongoTestCase): + async def test_retry_policy(self): + capacity = 10 + retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity)) + self.assertEqual(retry_policy.attempts, helpers._MAX_RETRIES) + self.assertEqual(retry_policy.backoff_initial, helpers._BACKOFF_INITIAL) + self.assertEqual(retry_policy.backoff_max, helpers._BACKOFF_MAX) + for i in range(1, helpers._MAX_RETRIES + 1): + self.assertTrue(await retry_policy.should_retry(i, 0)) + self.assertFalse(await retry_policy.should_retry(helpers._MAX_RETRIES + 1, 0)) + for i in range(capacity - helpers._MAX_RETRIES): + self.assertTrue(await retry_policy.should_retry(1, 0)) + # No tokens left, should not retry. + self.assertFalse(await retry_policy.should_retry(1, 0)) + self.assertEqual(retry_policy.token_bucket.tokens, 0) + + # record_success should generate tokens. + for _ in range(int(2 / helpers.DEFAULT_RETRY_TOKEN_RETURN)): + await retry_policy.record_success(retry=False) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, 2) + for i in range(2): + self.assertTrue(await retry_policy.should_retry(1, 0)) + self.assertFalse(await retry_policy.should_retry(1, 0)) + + # Recording a successful retry should return 1 additional token. + await retry_policy.record_success(retry=True) + self.assertAlmostEqual( + retry_policy.token_bucket.tokens, 1 + helpers.DEFAULT_RETRY_TOKEN_RETURN + ) + self.assertTrue(await retry_policy.should_retry(1, 0)) + self.assertFalse(await retry_policy.should_retry(1, 0)) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, helpers.DEFAULT_RETRY_TOKEN_RETURN) + + async def test_retry_policy_csot(self): + retry_policy = _RetryPolicy(_TokenBucket()) + self.assertTrue(await retry_policy.should_retry(1, 0.5)) + with pymongo.timeout(0.5): + self.assertTrue(await retry_policy.should_retry(1, 0)) + self.assertTrue(await retry_policy.should_retry(1, 0.1)) + # Would exceed the timeout, should not retry. + self.assertFalse(await retry_policy.should_retry(1, 1.0)) + self.assertTrue(await retry_policy.should_retry(1, 1.0)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_backpressure.py b/test/test_backpressure.py new file mode 100644 index 0000000000..fac1d6236d --- /dev/null +++ b/test/test_backpressure.py @@ -0,0 +1,230 @@ +# Copyright 2025-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Client Backpressure spec.""" +from __future__ import annotations + +import asyncio +import sys + +import pymongo + +sys.path[0:0] = [""] + +from test import ( + IntegrationTest, + PyMongoTestCase, + client_context, + unittest, +) + +from pymongo.errors import PyMongoError +from pymongo.synchronous import helpers +from pymongo.synchronous.helpers import _MAX_RETRIES, _RetryPolicy, _TokenBucket + +_IS_SYNC = True + +# Mock an system overload error. +mock_overload_error = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find", "insert", "update"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError"], + }, +} + + +class TestBackpressure(IntegrationTest): + RUN_ON_LOAD_BALANCER = True + + @client_context.require_failCommand_appName + def test_retry_overload_error_command(self): + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.command("find", "t") + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.command("find", "t") + + self.assertIn("RetryableError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_find(self): + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.t.find_one() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.t.find_one() + + self.assertIn("RetryableError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_insert_one(self): + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.t.find_one() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.t.find_one() + + self.assertIn("RetryableError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_update_many(self): + # Even though update_many is not a retryable write operation, it will + # still be retried via the "RetryableError" error label. + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.t.update_many({}, {"$set": {"x": 2}}) + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.t.update_many({}, {"$set": {"x": 2}}) + + self.assertIn("RetryableError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_getMore(self): + coll = self.db.t + coll.insert_many([{"x": 1} for _ in range(10)]) + + # Ensure command is retried on overload error. + fail_many = { + "configureFailPoint": "failCommand", + "mode": {"times": _MAX_RETRIES}, + "data": { + "failCommands": ["getMore"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError"], + }, + } + cursor = coll.find(batch_size=2) + cursor.next() + with self.fail_point(fail_many): + cursor.to_list() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = fail_many.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + cursor = coll.find(batch_size=2) + cursor.next() + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + cursor.to_list() + + self.assertIn("RetryableError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_limit_retry_command(self): + client = self.rs_or_single_client() + client._retry_policy.token_bucket.tokens = 1 + db = client.pymongo_test + db.t.insert_one({"x": 1}) + + # Ensure command is retried once overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": 1} + with self.fail_point(fail_many): + db.command("find", "t") + + # Ensure command stops retrying when there are no tokens left. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": 2} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + db.command("find", "t") + + self.assertIn("RetryableError", str(error.exception)) + + +class TestRetryPolicy(PyMongoTestCase): + def test_retry_policy(self): + capacity = 10 + retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity)) + self.assertEqual(retry_policy.attempts, helpers._MAX_RETRIES) + self.assertEqual(retry_policy.backoff_initial, helpers._BACKOFF_INITIAL) + self.assertEqual(retry_policy.backoff_max, helpers._BACKOFF_MAX) + for i in range(1, helpers._MAX_RETRIES + 1): + self.assertTrue(retry_policy.should_retry(i, 0)) + self.assertFalse(retry_policy.should_retry(helpers._MAX_RETRIES + 1, 0)) + for i in range(capacity - helpers._MAX_RETRIES): + self.assertTrue(retry_policy.should_retry(1, 0)) + # No tokens left, should not retry. + self.assertFalse(retry_policy.should_retry(1, 0)) + self.assertEqual(retry_policy.token_bucket.tokens, 0) + + # record_success should generate tokens. + for _ in range(int(2 / helpers.DEFAULT_RETRY_TOKEN_RETURN)): + retry_policy.record_success(retry=False) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, 2) + for i in range(2): + self.assertTrue(retry_policy.should_retry(1, 0)) + self.assertFalse(retry_policy.should_retry(1, 0)) + + # Recording a successful retry should return 1 additional token. + retry_policy.record_success(retry=True) + self.assertAlmostEqual( + retry_policy.token_bucket.tokens, 1 + helpers.DEFAULT_RETRY_TOKEN_RETURN + ) + self.assertTrue(retry_policy.should_retry(1, 0)) + self.assertFalse(retry_policy.should_retry(1, 0)) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, helpers.DEFAULT_RETRY_TOKEN_RETURN) + + def test_retry_policy_csot(self): + retry_policy = _RetryPolicy(_TokenBucket()) + self.assertTrue(retry_policy.should_retry(1, 0.5)) + with pymongo.timeout(0.5): + self.assertTrue(retry_policy.should_retry(1, 0)) + self.assertTrue(retry_policy.should_retry(1, 0.1)) + # Would exceed the timeout, should not retry. + self.assertFalse(retry_policy.should_retry(1, 1.0)) + self.assertTrue(retry_policy.should_retry(1, 1.0)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/synchro.py b/tools/synchro.py index 1444b22994..492fbf4287 100644 --- a/tools/synchro.py +++ b/tools/synchro.py @@ -209,6 +209,7 @@ def async_only_test(f: str) -> bool: "test_auth_oidc.py", "test_auth_spec.py", "test_bulk.py", + "test_backpressure.py", "test_change_stream.py", "test_client.py", "test_client_bulk_write.py", From 0010325aa893d450e11955e21441ca3a93235c67 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Thu, 9 Oct 2025 11:43:03 -0500 Subject: [PATCH 03/14] PYTHON-5536 Avoid clearing the connection pool when the server connection rate limiter triggers (#2509) Co-authored-by: Iris <58442094+sleepyStick@users.noreply.github.com> Co-authored-by: Noah Stapp Co-authored-by: Shane Harvey Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> PYTHON-5629 Increase max overload retries from 3 to 5 and initial delay from 50ms to 100ms (#2599) PYTHON-5517 Simplify pool backpressure behavior (#2611) synchro update network_layer update pool shared update pool shared update run-tests --- justfile | 16 +- pymongo/asynchronous/helpers.py | 6 +- pymongo/asynchronous/pool.py | 38 ++++- pymongo/asynchronous/topology.py | 4 +- pymongo/logger.py | 2 + pymongo/monitoring.py | 3 + pymongo/synchronous/helpers.py | 124 +++++++++++++++ pymongo/synchronous/pool.py | 38 ++++- pymongo/synchronous/topology.py | 4 +- .../test_discovery_and_monitoring.py | 62 +++++++- test/asynchronous/test_pooling.py | 34 +++++ .../pool-create-min-size-error.json | 6 +- .../backpressure-network-error-fail.json | 140 +++++++++++++++++ .../backpressure-network-timeout-fail.json | 143 ++++++++++++++++++ ...ged-on-min-pool-size-population-error.json | 106 +++++++++++++ test/load_balancer/sdam-error-handling.json | 4 +- test/test_discovery_and_monitoring.py | 60 +++++++- test/test_pooling.py | 34 +++++ tools/synchro.py | 2 +- 19 files changed, 788 insertions(+), 38 deletions(-) create mode 100644 test/discovery_and_monitoring/unified/backpressure-network-error-fail.json create mode 100644 test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json create mode 100644 test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json diff --git a/justfile b/justfile index 92bdee5be3..7ac5bd33ff 100644 --- a/justfile +++ b/justfile @@ -1,5 +1,7 @@ # See https://just.systems/man/en/ for instructions set shell := ["bash", "-c"] +# Do not modify the lock file when running justfile commands. +export UV_FROZEN := "1" # Commonly used command segments. typing_run := "uv run --group typing --extra aws --extra encryption --extra ocsp --extra snappy --extra test --extra zstd" @@ -14,7 +16,7 @@ default: [private] resync: - @uv sync --quiet + @uv sync --quiet --frozen install: bash .evergreen/scripts/setup-dev-env.sh @@ -48,12 +50,12 @@ typing-pyright: && resync {{typing_run}} pyright -p strict_pyrightconfig.json test/test_typing_strict.py [group('lint')] -lint *args="": && resync - uvx pre-commit run --all-files {{args}} +lint: && resync + uv run pre-commit run --all-files [group('lint')] -lint-manual *args="": && resync - uvx pre-commit run --all-files --hook-stage manual {{args}} +lint-manual: && resync + uv run pre-commit run --all-files --hook-stage manual [group('test')] test *args="-v --durations=5 --maxfail=10": && resync @@ -71,10 +73,6 @@ setup-tests *args="": teardown-tests: bash .evergreen/scripts/teardown-tests.sh -[group('test')] -integration-tests: - bash integration_tests/run.sh - [group('server')] run-server *args="": bash .evergreen/scripts/run-server.sh {{args}} diff --git a/pymongo/asynchronous/helpers.py b/pymongo/asynchronous/helpers.py index 4ff3caa104..9dc87ad285 100644 --- a/pymongo/asynchronous/helpers.py +++ b/pymongo/asynchronous/helpers.py @@ -16,11 +16,9 @@ from __future__ import annotations import asyncio -import builtins import functools import random import socket -import time import time as time # noqa: PLC0414 # needed in sync version from typing import ( Any, @@ -77,8 +75,8 @@ async def inner(*args: Any, **kwargs: Any) -> Any: return cast(F, inner) -_MAX_RETRIES = 3 -_BACKOFF_INITIAL = 0.05 +_MAX_RETRIES = 5 +_BACKOFF_INITIAL = 0.1 _BACKOFF_MAX = 10 # DRIVERS-3240 will determine these defaults. DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index f521091e3c..2c9c70cd46 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -19,6 +19,8 @@ import contextlib import logging import os +import socket +import ssl import sys import time import weakref @@ -52,10 +54,12 @@ DocumentTooLarge, ExecutionTimeout, InvalidOperation, + NetworkTimeout, NotPrimaryError, OperationFailure, PyMongoError, WaitQueueTimeoutError, + _CertificateError, ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _get_timeout_details, format_timeout_details @@ -769,8 +773,8 @@ def __init__( # Enforces: maxConnecting # Also used for: clearing the wait queue self._max_connecting_cond = _async_create_condition(self.lock) - self._max_connecting = self.opts.max_connecting self._pending = 0 + self._max_connecting = self.opts.max_connecting self._client_id = client_id if self.enabled_for_cmap: assert self.opts._event_listeners is not None @@ -1003,6 +1007,21 @@ async def remove_stale_sockets(self, reference_generation: int) -> None: self.requests -= 1 self.size_cond.notify() + def _handle_connection_error(self, error: BaseException) -> None: + # Handle system overload condition for non-sdam pools. + # Look for errors of type AutoReconnect and add error labels if appropriate. + if self.is_sdam or type(error) not in (AutoReconnect, NetworkTimeout): + return + assert isinstance(error, AutoReconnect) # Appease type checker. + # If the original error was a DNS, certificate, or SSL error, ignore it. + if isinstance(error.__cause__, (_CertificateError, SSLErrors, socket.gaierror)): + # End of file errors are excluded, because the server may have disconnected + # during the handshake. + if not isinstance(error.__cause__, (ssl.SSLEOFError, ssl.SSLZeroReturnError)): + return + error._add_error_label("SystemOverloadedError") + error._add_error_label("RetryableError") + async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> AsyncConnection: """Connect to Mongo and return a new AsyncConnection. @@ -1054,10 +1073,10 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), error=ConnectionClosedReason.ERROR, ) + self._handle_connection_error(error) if isinstance(error, (IOError, OSError, *SSLErrors)): details = _get_timeout_details(self.opts) _raise_connection_failure(self.address, error, timeout_details=details) - raise conn = AsyncConnection(networking_interface, self, self.address, conn_id, self.is_sdam) # type: ignore[arg-type] @@ -1066,18 +1085,22 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A self.active_contexts.discard(tmp_context) if tmp_context.cancelled: conn.cancel_context.cancel() + completed_hello = False try: if not self.is_sdam: await conn.hello() + completed_hello = True self.is_writable = conn.is_writable if handler: handler.contribute_socket(conn, completed_handshake=False) await conn.authenticate() # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. - except BaseException: + except BaseException as e: async with self.lock: self.active_contexts.discard(conn.cancel_context) + if not completed_hello: + self._handle_connection_error(e) await conn.close_conn(ConnectionClosedReason.ERROR) raise @@ -1406,8 +1429,8 @@ async def _perished(self, conn: AsyncConnection) -> bool: :class:`~pymongo.errors.AutoReconnect` exceptions on server hiccups, etc. We only check if the socket was closed by an external error if it has been > 1 second since the socket was checked into the - pool, to keep performance reasonable - we can't avoid AutoReconnects - completely anyway. + pool to keep performance reasonable - + we can't avoid AutoReconnects completely anyway. """ idle_time_seconds = conn.idle_time_seconds() # If socket is idle, open a new one. @@ -1418,8 +1441,9 @@ async def _perished(self, conn: AsyncConnection) -> bool: await conn.close_conn(ConnectionClosedReason.IDLE) return True - if self._check_interval_seconds is not None and ( - self._check_interval_seconds == 0 or idle_time_seconds > self._check_interval_seconds + check_interval_seconds = self._check_interval_seconds + if check_interval_seconds is not None and ( + check_interval_seconds == 0 or idle_time_seconds > check_interval_seconds ): if conn.conn_closed(): await conn.close_conn(ConnectionClosedReason.ERROR) diff --git a/pymongo/asynchronous/topology.py b/pymongo/asynchronous/topology.py index 283aabc690..a2b354f7cc 100644 --- a/pymongo/asynchronous/topology.py +++ b/pymongo/asynchronous/topology.py @@ -890,7 +890,9 @@ async def _handle_error(self, address: _Address, err_ctx: _ErrorContext) -> None # Clear the pool. await server.reset(service_id) elif isinstance(error, ConnectionFailure): - if isinstance(error, WaitQueueTimeoutError): + if isinstance(error, WaitQueueTimeoutError) or ( + error.has_error_label("SystemOverloadedError") + ): return # "Client MUST replace the server's description with type Unknown # ... MUST NOT request an immediate check of the server." diff --git a/pymongo/logger.py b/pymongo/logger.py index 1b3fe43b86..ccfc45ed88 100644 --- a/pymongo/logger.py +++ b/pymongo/logger.py @@ -42,6 +42,7 @@ class _ConnectionStatusMessage(str, enum.Enum): POOL_READY = "Connection pool ready" POOL_CLOSED = "Connection pool closed" POOL_CLEARED = "Connection pool cleared" + POOL_BACKOFF = "Connection pool backoff" CONN_CREATED = "Connection created" CONN_READY = "Connection ready" @@ -88,6 +89,7 @@ class _SDAMStatusMessage(str, enum.Enum): _VERBOSE_CONNECTION_ERROR_REASONS = { ConnectionClosedReason.POOL_CLOSED: "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED: "Connection pool was closed", + ConnectionClosedReason.POOL_BACKOFF: "Connection pool is in backoff", ConnectionClosedReason.STALE: "Connection pool was stale", ConnectionClosedReason.ERROR: "An error occurred while using the connection", ConnectionCheckOutFailedReason.CONN_ERROR: "An error occurred while trying to establish a new connection", diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py index 46a78aea0b..0dfbbb915a 100644 --- a/pymongo/monitoring.py +++ b/pymongo/monitoring.py @@ -934,6 +934,9 @@ class ConnectionClosedReason: POOL_CLOSED = "poolClosed" """The pool was closed, making the connection no longer valid.""" + POOL_BACKOFF = "poolBackoff" + """The pool is in backoff mode.""" + class ConnectionCheckOutFailedReason: """An enum that defines values for `reason` on a diff --git a/pymongo/synchronous/helpers.py b/pymongo/synchronous/helpers.py index c1b75a3c95..2c57b7a597 100644 --- a/pymongo/synchronous/helpers.py +++ b/pymongo/synchronous/helpers.py @@ -16,7 +16,10 @@ from __future__ import annotations import asyncio +import functools +import random import socket +import time as time # noqa: PLC0414 # needed in sync version from typing import ( Any, Callable, @@ -24,10 +27,13 @@ cast, ) +from pymongo import _csot from pymongo.errors import ( OperationFailure, + PyMongoError, ) from pymongo.helpers_shared import _REAUTHENTICATION_REQUIRED_CODE +from pymongo.lock import _create_lock _IS_SYNC = True @@ -36,6 +42,7 @@ def _handle_reauth(func: F) -> F: + @functools.wraps(func) def inner(*args: Any, **kwargs: Any) -> Any: no_reauth = kwargs.pop("no_reauth", False) from pymongo.message import _BulkWriteContext @@ -68,6 +75,123 @@ def inner(*args: Any, **kwargs: Any) -> Any: return cast(F, inner) +_MAX_RETRIES = 5 +_BACKOFF_INITIAL = 0.1 +_BACKOFF_MAX = 10 +# DRIVERS-3240 will determine these defaults. +DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 +DEFAULT_RETRY_TOKEN_RETURN = 0.1 + + +def _backoff( + attempt: int, initial_delay: float = _BACKOFF_INITIAL, max_delay: float = _BACKOFF_MAX +) -> float: + jitter = random.random() # noqa: S311 + return jitter * min(initial_delay * (2**attempt), max_delay) + + +class _TokenBucket: + """A token bucket implementation for rate limiting.""" + + def __init__( + self, + capacity: float = DEFAULT_RETRY_TOKEN_CAPACITY, + return_rate: float = DEFAULT_RETRY_TOKEN_RETURN, + ): + self.lock = _create_lock() + self.capacity = capacity + # DRIVERS-3240 will determine how full the bucket should start. + self.tokens = capacity + self.return_rate = return_rate + + def consume(self) -> bool: + """Consume a token from the bucket if available.""" + with self.lock: + if self.tokens >= 1: + self.tokens -= 1 + return True + return False + + def deposit(self, retry: bool = False) -> None: + """Deposit a token back into the bucket.""" + retry_token = 1 if retry else 0 + with self.lock: + self.tokens = min(self.capacity, self.tokens + retry_token + self.return_rate) + + +class _RetryPolicy: + """A retry limiter that performs exponential backoff with jitter. + + Retry attempts are limited by a token bucket to prevent overwhelming the server during + a prolonged outage or high load. + """ + + def __init__( + self, + token_bucket: _TokenBucket, + attempts: int = _MAX_RETRIES, + backoff_initial: float = _BACKOFF_INITIAL, + backoff_max: float = _BACKOFF_MAX, + ): + self.token_bucket = token_bucket + self.attempts = attempts + self.backoff_initial = backoff_initial + self.backoff_max = backoff_max + + def record_success(self, retry: bool) -> None: + """Record a successful operation.""" + self.token_bucket.deposit(retry) + + def backoff(self, attempt: int) -> float: + """Return the backoff duration for the given .""" + return _backoff(max(0, attempt - 1), self.backoff_initial, self.backoff_max) + + def should_retry(self, attempt: int, delay: float) -> bool: + """Return if we have budget to retry and how long to backoff.""" + if attempt > self.attempts: + return False + + # If the delay would exceed the deadline, bail early before consuming a token. + if _csot.get_timeout(): + if time.monotonic() + delay > _csot.get_deadline(): + return False + + # Check token bucket last since we only want to consume a token if we actually retry. + if not self.token_bucket.consume(): + # DRIVERS-3246 Improve diagnostics when this case happens. + # We could add info to the exception and log. + return False + return True + + +def _retry_overload(func: F) -> F: + @functools.wraps(func) + def inner(self: Any, *args: Any, **kwargs: Any) -> Any: + retry_policy = self._retry_policy + attempt = 0 + while True: + try: + res = func(self, *args, **kwargs) + retry_policy.record_success(retry=attempt > 0) + return res + except PyMongoError as exc: + if not exc.has_error_label("RetryableError"): + raise + attempt += 1 + delay = 0 + if exc.has_error_label("SystemOverloadedError"): + delay = retry_policy.backoff(attempt) + if not retry_policy.should_retry(attempt, delay): + raise + + # Implement exponential backoff on retry. + if delay: + time.sleep(delay) + continue + + return cast(F, inner) + + def _getaddrinfo( host: Any, port: Any, **kwargs: Any ) -> list[ diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 66258fda18..9e50fa5903 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -19,6 +19,8 @@ import contextlib import logging import os +import socket +import ssl import sys import time import weakref @@ -49,10 +51,12 @@ DocumentTooLarge, ExecutionTimeout, InvalidOperation, + NetworkTimeout, NotPrimaryError, OperationFailure, PyMongoError, WaitQueueTimeoutError, + _CertificateError, ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _get_timeout_details, format_timeout_details @@ -767,8 +771,8 @@ def __init__( # Enforces: maxConnecting # Also used for: clearing the wait queue self._max_connecting_cond = _create_condition(self.lock) - self._max_connecting = self.opts.max_connecting self._pending = 0 + self._max_connecting = self.opts.max_connecting self._client_id = client_id if self.enabled_for_cmap: assert self.opts._event_listeners is not None @@ -999,6 +1003,21 @@ def remove_stale_sockets(self, reference_generation: int) -> None: self.requests -= 1 self.size_cond.notify() + def _handle_connection_error(self, error: BaseException) -> None: + # Handle system overload condition for non-sdam pools. + # Look for errors of type AutoReconnect and add error labels if appropriate. + if self.is_sdam or type(error) not in (AutoReconnect, NetworkTimeout): + return + assert isinstance(error, AutoReconnect) # Appease type checker. + # If the original error was a DNS, certificate, or SSL error, ignore it. + if isinstance(error.__cause__, (_CertificateError, SSLErrors, socket.gaierror)): + # End of file errors are excluded, because the server may have disconnected + # during the handshake. + if not isinstance(error.__cause__, (ssl.SSLEOFError, ssl.SSLZeroReturnError)): + return + error._add_error_label("SystemOverloadedError") + error._add_error_label("RetryableError") + def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connection: """Connect to Mongo and return a new Connection. @@ -1050,10 +1069,10 @@ def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connect reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), error=ConnectionClosedReason.ERROR, ) + self._handle_connection_error(error) if isinstance(error, (IOError, OSError, *SSLErrors)): details = _get_timeout_details(self.opts) _raise_connection_failure(self.address, error, timeout_details=details) - raise conn = Connection(networking_interface, self, self.address, conn_id, self.is_sdam) # type: ignore[arg-type] @@ -1062,18 +1081,22 @@ def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connect self.active_contexts.discard(tmp_context) if tmp_context.cancelled: conn.cancel_context.cancel() + completed_hello = False try: if not self.is_sdam: conn.hello() + completed_hello = True self.is_writable = conn.is_writable if handler: handler.contribute_socket(conn, completed_handshake=False) conn.authenticate() # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. - except BaseException: + except BaseException as e: with self.lock: self.active_contexts.discard(conn.cancel_context) + if not completed_hello: + self._handle_connection_error(e) conn.close_conn(ConnectionClosedReason.ERROR) raise @@ -1402,8 +1425,8 @@ def _perished(self, conn: Connection) -> bool: :class:`~pymongo.errors.AutoReconnect` exceptions on server hiccups, etc. We only check if the socket was closed by an external error if it has been > 1 second since the socket was checked into the - pool, to keep performance reasonable - we can't avoid AutoReconnects - completely anyway. + pool to keep performance reasonable - + we can't avoid AutoReconnects completely anyway. """ idle_time_seconds = conn.idle_time_seconds() # If socket is idle, open a new one. @@ -1414,8 +1437,9 @@ def _perished(self, conn: Connection) -> bool: conn.close_conn(ConnectionClosedReason.IDLE) return True - if self._check_interval_seconds is not None and ( - self._check_interval_seconds == 0 or idle_time_seconds > self._check_interval_seconds + check_interval_seconds = self._check_interval_seconds + if check_interval_seconds is not None and ( + check_interval_seconds == 0 or idle_time_seconds > check_interval_seconds ): if conn.conn_closed(): conn.close_conn(ConnectionClosedReason.ERROR) diff --git a/pymongo/synchronous/topology.py b/pymongo/synchronous/topology.py index a4ca0e6e0f..e967c2089f 100644 --- a/pymongo/synchronous/topology.py +++ b/pymongo/synchronous/topology.py @@ -888,7 +888,9 @@ def _handle_error(self, address: _Address, err_ctx: _ErrorContext) -> None: # Clear the pool. server.reset(service_id) elif isinstance(error, ConnectionFailure): - if isinstance(error, WaitQueueTimeoutError): + if isinstance(error, WaitQueueTimeoutError) or ( + error.has_error_label("SystemOverloadedError") + ): return # "Client MUST replace the server's description with type Unknown # ... MUST NOT request an immediate check of the server." diff --git a/test/asynchronous/test_discovery_and_monitoring.py b/test/asynchronous/test_discovery_and_monitoring.py index 5820d00c48..67df478b9f 100644 --- a/test/asynchronous/test_discovery_and_monitoring.py +++ b/test/asynchronous/test_discovery_and_monitoring.py @@ -25,8 +25,10 @@ from pathlib import Path from test.asynchronous.helpers import ConcurrentRunner from test.asynchronous.utils import flaky +from test.utils_shared import delay from pymongo.asynchronous.pool import AsyncConnection +from pymongo.errors import ConnectionFailure from pymongo.operations import _Op from pymongo.server_selectors import writable_server_selector @@ -70,7 +72,12 @@ ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _check_command_response, _check_write_command_response -from pymongo.monitoring import ServerHeartbeatFailedEvent, ServerHeartbeatStartedEvent +from pymongo.monitoring import ( + ConnectionCheckOutFailedEvent, + PoolClearedEvent, + ServerHeartbeatFailedEvent, + ServerHeartbeatStartedEvent, +) from pymongo.server_description import SERVER_TYPE, ServerDescription from pymongo.topology_description import TOPOLOGY_TYPE @@ -446,6 +453,59 @@ async def mock_close(self, reason): AsyncConnection.close_conn = original_close +class TestPoolBackpressure(AsyncIntegrationTest): + @async_client_context.require_version_min(7, 0, 0) + async def test_connection_pool_is_not_cleared(self): + listener = CMAPListener() + + # Create a client that listens to CMAP events, with maxConnecting=100. + client = await self.async_rs_or_single_client(maxConnecting=100, event_listeners=[listener]) + + # Enable the ingress rate limiter. + await client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=True + ) + await client.admin.command("setParameter", 1, ingressConnectionEstablishmentRatePerSec=20) + await client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentBurstCapacitySecs=1 + ) + await client.admin.command("setParameter", 1, ingressConnectionEstablishmentMaxQueueDepth=1) + + # Disable the ingress rate limiter on teardown. + # Sleep for 1 second before disabling to avoid the rate limiter. + async def teardown(): + await asyncio.sleep(1) + await client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=False + ) + + self.addAsyncCleanup(teardown) + + # Make sure the collection has at least one document. + await client.test.test.delete_many({}) + await client.test.test.insert_one({}) + + # Run a slow operation to tie up the connection. + async def target(): + try: + await client.test.test.find_one({"$where": delay(0.1)}) + except ConnectionFailure: + pass + + # Run 100 parallel operations that contend for connections. + tasks = [] + for _ in range(100): + tasks.append(ConcurrentRunner(target=target)) + for t in tasks: + await t.start() + for t in tasks: + await t.join() + + # Verify there were at least 10 connection checkout failed event but no pool cleared events. + self.assertGreater(len(listener.events_by_type(ConnectionCheckOutFailedEvent)), 10) + self.assertEqual(len(listener.events_by_type(PoolClearedEvent)), 0) + + class TestServerMonitoringMode(AsyncIntegrationTest): @async_client_context.require_no_load_balancer async def asyncSetUp(self): diff --git a/test/asynchronous/test_pooling.py b/test/asynchronous/test_pooling.py index 3193d9e3d5..2f0d5fc962 100644 --- a/test/asynchronous/test_pooling.py +++ b/test/asynchronous/test_pooling.py @@ -29,6 +29,7 @@ from pymongo.errors import AutoReconnect, ConnectionFailure, DuplicateKeyError from pymongo.hello import HelloCompat from pymongo.lock import _async_create_lock +from pymongo.read_preferences import ReadPreference sys.path[0:0] = [""] @@ -513,6 +514,39 @@ async def test_connection_timeout_message(self): str(error.exception), ) + @async_client_context.require_failCommand_appName + async def test_pool_backpressure_preserves_existing_connections(self): + client = await self.async_rs_or_single_client() + coll = client.pymongo_test.t + pool = await async_get_pool(client) + await coll.insert_many([{"x": 1} for _ in range(10)]) + t = SocketGetter(self.c, pool) + await t.start() + while t.state != "connection": + await asyncio.sleep(0.1) + + assert not t.sock.conn_closed() + + # Mock a session establishment overload. + mock_connection_fail = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "closeConnection": True, + }, + } + + async with self.fail_point(mock_connection_fail): + await coll.find_one({}) + + # Make sure the existing socket was not affected. + assert not t.sock.conn_closed() + + # Cleanup + await t.release_conn() + await t.join() + await pool.close() + class TestPoolMaxSize(_TestPoolingBase): async def test_max_pool_size(self): diff --git a/test/connection_monitoring/pool-create-min-size-error.json b/test/connection_monitoring/pool-create-min-size-error.json index 1c744b850c..4334ce2571 100644 --- a/test/connection_monitoring/pool-create-min-size-error.json +++ b/test/connection_monitoring/pool-create-min-size-error.json @@ -9,15 +9,13 @@ ], "failPoint": { "configureFailPoint": "failCommand", - "mode": { - "times": 50 - }, + "mode": "alwaysOn", "data": { "failCommands": [ "isMaster", "hello" ], - "closeConnection": true, + "errorCode": 91, "appName": "poolCreateMinSizeErrorTest" } }, diff --git a/test/discovery_and_monitoring/unified/backpressure-network-error-fail.json b/test/discovery_and_monitoring/unified/backpressure-network-error-fail.json new file mode 100644 index 0000000000..f41b76459c --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-error-fail.json @@ -0,0 +1,140 @@ +{ + "description": "backpressure-network-error-fail", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "single", + "replicaset", + "sharded" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-error-fail", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network connection errors during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverHeartbeatSucceededEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "serverMonitoringMode": "poll", + "appname": "backpressureNetworkErrorFailTest" + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-error-fail" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverHeartbeatSucceededEvent": {} + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "appName": "backpressureNetworkErrorFailTest", + "closeConnection": true + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json new file mode 100644 index 0000000000..a97c7a329f --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json @@ -0,0 +1,143 @@ +{ + "description": "backpressure-network-timeout-error", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "single", + "replicaset", + "sharded" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-timeout-error", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network timeout error during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "appname": "backpressureNetworkTimeoutErrorTest", + "serverMonitoringMode": "poll", + "connectTimeoutMS": 250, + "socketTimeoutMS": 250 + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-timeout-error" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": {} + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "blockConnection": true, + "blockTimeMS": 500, + "appName": "backpressureNetworkTimeoutErrorTest" + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json b/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json new file mode 100644 index 0000000000..35a49c1323 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json @@ -0,0 +1,106 @@ +{ + "description": "backpressure-server-description-unchanged-on-min-pool-size-population-error", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "single" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "tests": [ + { + "description": "the server description is not changed on handshake error during minPoolSize population", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "observeEvents": [ + "serverDescriptionChangedEvent", + "connectionClosedEvent" + ], + "uriOptions": { + "appname": "authErrorTest", + "minPoolSize": 5, + "maxConnecting": 1, + "serverMonitoringMode": "poll", + "heartbeatFrequencyMS": 1000000 + } + } + } + ] + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "skip": 1 + }, + "data": { + "failCommands": [ + "hello", + "isMaster" + ], + "appName": "authErrorTest", + "closeConnection": true + } + } + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": {} + }, + "count": 1 + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "connectionClosedEvent": {} + }, + "count": 1 + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "sdam", + "events": [ + { + "serverDescriptionChangedEvent": {} + } + ] + } + ] + } + ] +} diff --git a/test/load_balancer/sdam-error-handling.json b/test/load_balancer/sdam-error-handling.json index 5892dcacd6..41bc90be7f 100644 --- a/test/load_balancer/sdam-error-handling.json +++ b/test/load_balancer/sdam-error-handling.json @@ -282,7 +282,7 @@ "isMaster", "hello" ], - "closeConnection": true, + "errorCode": 11600, "appName": "lbSDAMErrorTestClient" } } @@ -297,7 +297,7 @@ } }, "expectError": { - "isClientError": true + "isError": true } } ], diff --git a/test/test_discovery_and_monitoring.py b/test/test_discovery_and_monitoring.py index 67a82996bd..2318f259ed 100644 --- a/test/test_discovery_and_monitoring.py +++ b/test/test_discovery_and_monitoring.py @@ -25,7 +25,9 @@ from pathlib import Path from test.helpers import ConcurrentRunner from test.utils import flaky +from test.utils_shared import delay +from pymongo.errors import ConnectionFailure from pymongo.operations import _Op from pymongo.server_selectors import writable_server_selector from pymongo.synchronous.pool import Connection @@ -67,7 +69,12 @@ ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _check_command_response, _check_write_command_response -from pymongo.monitoring import ServerHeartbeatFailedEvent, ServerHeartbeatStartedEvent +from pymongo.monitoring import ( + ConnectionCheckOutFailedEvent, + PoolClearedEvent, + ServerHeartbeatFailedEvent, + ServerHeartbeatStartedEvent, +) from pymongo.server_description import SERVER_TYPE, ServerDescription from pymongo.synchronous.settings import TopologySettings from pymongo.synchronous.topology import Topology, _ErrorContext @@ -444,6 +451,57 @@ def mock_close(self, reason): Connection.close_conn = original_close +class TestPoolBackpressure(IntegrationTest): + @client_context.require_version_min(7, 0, 0) + def test_connection_pool_is_not_cleared(self): + listener = CMAPListener() + + # Create a client that listens to CMAP events, with maxConnecting=100. + client = self.rs_or_single_client(maxConnecting=100, event_listeners=[listener]) + + # Enable the ingress rate limiter. + client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=True + ) + client.admin.command("setParameter", 1, ingressConnectionEstablishmentRatePerSec=20) + client.admin.command("setParameter", 1, ingressConnectionEstablishmentBurstCapacitySecs=1) + client.admin.command("setParameter", 1, ingressConnectionEstablishmentMaxQueueDepth=1) + + # Disable the ingress rate limiter on teardown. + # Sleep for 1 second before disabling to avoid the rate limiter. + def teardown(): + time.sleep(1) + client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=False + ) + + self.addCleanup(teardown) + + # Make sure the collection has at least one document. + client.test.test.delete_many({}) + client.test.test.insert_one({}) + + # Run a slow operation to tie up the connection. + def target(): + try: + client.test.test.find_one({"$where": delay(0.1)}) + except ConnectionFailure: + pass + + # Run 100 parallel operations that contend for connections. + tasks = [] + for _ in range(100): + tasks.append(ConcurrentRunner(target=target)) + for t in tasks: + t.start() + for t in tasks: + t.join() + + # Verify there were at least 10 connection checkout failed event but no pool cleared events. + self.assertGreater(len(listener.events_by_type(ConnectionCheckOutFailedEvent)), 10) + self.assertEqual(len(listener.events_by_type(PoolClearedEvent)), 0) + + class TestServerMonitoringMode(IntegrationTest): @client_context.require_no_load_balancer def setUp(self): diff --git a/test/test_pooling.py b/test/test_pooling.py index cb5b206996..0f7ef144f6 100644 --- a/test/test_pooling.py +++ b/test/test_pooling.py @@ -29,6 +29,7 @@ from pymongo.errors import AutoReconnect, ConnectionFailure, DuplicateKeyError from pymongo.hello import HelloCompat from pymongo.lock import _create_lock +from pymongo.read_preferences import ReadPreference sys.path[0:0] = [""] @@ -511,6 +512,39 @@ def test_connection_timeout_message(self): str(error.exception), ) + @client_context.require_failCommand_appName + def test_pool_backpressure_preserves_existing_connections(self): + client = self.rs_or_single_client() + coll = client.pymongo_test.t + pool = get_pool(client) + coll.insert_many([{"x": 1} for _ in range(10)]) + t = SocketGetter(self.c, pool) + t.start() + while t.state != "connection": + time.sleep(0.1) + + assert not t.sock.conn_closed() + + # Mock a session establishment overload. + mock_connection_fail = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "closeConnection": True, + }, + } + + with self.fail_point(mock_connection_fail): + coll.find_one({}) + + # Make sure the existing socket was not affected. + assert not t.sock.conn_closed() + + # Cleanup + t.release_conn() + t.join() + pool.close() + class TestPoolMaxSize(_TestPoolingBase): def test_max_pool_size(self): diff --git a/tools/synchro.py b/tools/synchro.py index 492fbf4287..661d8988cb 100644 --- a/tools/synchro.py +++ b/tools/synchro.py @@ -349,7 +349,7 @@ def translate_async_sleeps(lines: list[str]) -> list[str]: sleeps = [line for line in lines if "asyncio.sleep" in line] for line in sleeps: - res = re.search(r"asyncio.sleep\(([^()]*)\)", line) + res = re.search(r"asyncio\.sleep\(\s*(.*?)\)", line) if res: old = res[0] index = lines.index(line) From d767759ff4b6015e5a90007727cd7cdaa79107c4 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Mon, 1 Dec 2025 10:20:29 -0600 Subject: [PATCH 04/14] PYTHON-5517 update SDAM error handling tests to ignore handshake failures (#2631) --- .../test_discovery_and_monitoring.py | 3 +++ .../errors/error_handling_handshake.json | 16 ++++++++++++---- test/test_discovery_and_monitoring.py | 3 +++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/test/asynchronous/test_discovery_and_monitoring.py b/test/asynchronous/test_discovery_and_monitoring.py index 67df478b9f..e50ed78fb8 100644 --- a/test/asynchronous/test_discovery_and_monitoring.py +++ b/test/asynchronous/test_discovery_and_monitoring.py @@ -145,6 +145,9 @@ async def got_app_error(topology, app_error): raise AssertionError except (AutoReconnect, NotPrimaryError, OperationFailure) as e: if when == "beforeHandshakeCompletes": + # The pool would have added the SystemOverloadedError in this case. + if isinstance(e, AutoReconnect): + e._add_error_label("SystemOverloadedError") completed_handshake = False elif when == "afterHandshakeCompletes": completed_handshake = True diff --git a/test/discovery_and_monitoring/errors/error_handling_handshake.json b/test/discovery_and_monitoring/errors/error_handling_handshake.json index 56ca7d1132..bf83f46f6a 100644 --- a/test/discovery_and_monitoring/errors/error_handling_handshake.json +++ b/test/discovery_and_monitoring/errors/error_handling_handshake.json @@ -97,14 +97,22 @@ "outcome": { "servers": { "a:27017": { - "type": "Unknown", - "topologyVersion": null, + "type": "RSPrimary", + "setName": "rs", + "topologyVersion": { + "processId": { + "$oid": "000000000000000000000001" + }, + "counter": { + "$numberLong": "1" + } + }, "pool": { - "generation": 1 + "generation": 0 } } }, - "topologyType": "ReplicaSetNoPrimary", + "topologyType": "ReplicaSetWithPrimary", "logicalSessionTimeoutMinutes": null, "setName": "rs" } diff --git a/test/test_discovery_and_monitoring.py b/test/test_discovery_and_monitoring.py index 2318f259ed..3a96db0abe 100644 --- a/test/test_discovery_and_monitoring.py +++ b/test/test_discovery_and_monitoring.py @@ -145,6 +145,9 @@ def got_app_error(topology, app_error): raise AssertionError except (AutoReconnect, NotPrimaryError, OperationFailure) as e: if when == "beforeHandshakeCompletes": + # The pool would have added the SystemOverloadedError in this case. + if isinstance(e, AutoReconnect): + e._add_error_label("SystemOverloadedError") completed_handshake = False elif when == "afterHandshakeCompletes": completed_handshake = True From 88f05fae8deeed4ff93551601ce30a7f6078e748 Mon Sep 17 00:00:00 2001 From: Iris <58442094+sleepyStick@users.noreply.github.com> Date: Tue, 2 Dec 2025 14:28:33 -0800 Subject: [PATCH 05/14] PYTHON-5518: withTransaction API retries too frequently (#2600) Co-authored-by: Shane Harvey Co-authored-by: Steven Silvester Co-authored-by: Noah Stapp Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pymongo/asynchronous/client_session.py | 16 +++++- pymongo/synchronous/client_session.py | 16 +++++- test/asynchronous/test_transactions.py | 78 ++++++++++++++++++++++++-- test/test_transactions.py | 74 ++++++++++++++++++++++-- 4 files changed, 168 insertions(+), 16 deletions(-) diff --git a/pymongo/asynchronous/client_session.py b/pymongo/asynchronous/client_session.py index 697de81b1c..27f26c35b2 100644 --- a/pymongo/asynchronous/client_session.py +++ b/pymongo/asynchronous/client_session.py @@ -472,8 +472,8 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: # This limit is non-configurable and was chosen to be twice the 60 second # default value of MongoDB's `transactionLifetimeLimitSeconds` parameter. _WITH_TRANSACTION_RETRY_TIME_LIMIT = 120 -_BACKOFF_MAX = 1 -_BACKOFF_INITIAL = 0.050 # 50ms initial backoff +_BACKOFF_MAX = 0.500 # 500ms max backoff +_BACKOFF_INITIAL = 0.005 # 5ms initial backoff def _within_time_limit(start_time: float) -> bool: @@ -481,6 +481,11 @@ def _within_time_limit(start_time: float) -> bool: return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT +def _would_exceed_time_limit(start_time: float, backoff: float) -> bool: + """Is the backoff within the with_transaction retry limit?""" + return time.monotonic() + backoff - start_time >= _WITH_TRANSACTION_RETRY_TIME_LIMIT + + _T = TypeVar("_T") if TYPE_CHECKING: @@ -708,10 +713,14 @@ async def callback(session, custom_arg, custom_kwarg=None): """ start_time = time.monotonic() retry = 0 + last_error: Optional[BaseException] = None while True: if retry: # Implement exponential backoff on retry. jitter = random.random() # noqa: S311 - backoff = jitter * min(_BACKOFF_INITIAL * (2**retry), _BACKOFF_MAX) + backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX) + if _would_exceed_time_limit(start_time, backoff): + assert last_error is not None + raise last_error await asyncio.sleep(backoff) retry += 1 await self.start_transaction( @@ -721,6 +730,7 @@ async def callback(session, custom_arg, custom_kwarg=None): ret = await callback(self) # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. except BaseException as exc: + last_error = exc if self.in_transaction: await self.abort_transaction() if ( diff --git a/pymongo/synchronous/client_session.py b/pymongo/synchronous/client_session.py index d5a37eb108..28999bcd62 100644 --- a/pymongo/synchronous/client_session.py +++ b/pymongo/synchronous/client_session.py @@ -470,8 +470,8 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: # This limit is non-configurable and was chosen to be twice the 60 second # default value of MongoDB's `transactionLifetimeLimitSeconds` parameter. _WITH_TRANSACTION_RETRY_TIME_LIMIT = 120 -_BACKOFF_MAX = 1 -_BACKOFF_INITIAL = 0.050 # 50ms initial backoff +_BACKOFF_MAX = 0.500 # 500ms max backoff +_BACKOFF_INITIAL = 0.005 # 5ms initial backoff def _within_time_limit(start_time: float) -> bool: @@ -479,6 +479,11 @@ def _within_time_limit(start_time: float) -> bool: return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT +def _would_exceed_time_limit(start_time: float, backoff: float) -> bool: + """Is the backoff within the with_transaction retry limit?""" + return time.monotonic() + backoff - start_time >= _WITH_TRANSACTION_RETRY_TIME_LIMIT + + _T = TypeVar("_T") if TYPE_CHECKING: @@ -706,10 +711,14 @@ def callback(session, custom_arg, custom_kwarg=None): """ start_time = time.monotonic() retry = 0 + last_error: Optional[BaseException] = None while True: if retry: # Implement exponential backoff on retry. jitter = random.random() # noqa: S311 - backoff = jitter * min(_BACKOFF_INITIAL * (2**retry), _BACKOFF_MAX) + backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX) + if _would_exceed_time_limit(start_time, backoff): + assert last_error is not None + raise last_error time.sleep(backoff) retry += 1 self.start_transaction(read_concern, write_concern, read_preference, max_commit_time_ms) @@ -717,6 +726,7 @@ def callback(session, custom_arg, custom_kwarg=None): ret = callback(self) # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. except BaseException as exc: + last_error = exc if self.in_transaction: self.abort_transaction() if ( diff --git a/test/asynchronous/test_transactions.py b/test/asynchronous/test_transactions.py index 29c5d26423..18f9778463 100644 --- a/test/asynchronous/test_transactions.py +++ b/test/asynchronous/test_transactions.py @@ -16,7 +16,9 @@ from __future__ import annotations import asyncio +import random import sys +import time from io import BytesIO from test.asynchronous.utils_spec_runner import AsyncSpecRunner @@ -441,7 +443,7 @@ async def set_fail_point(self, command_args): await self.configure_fail_point(client, command_args) @async_client_context.require_transactions - async def test_callback_raises_custom_error(self): + async def test_1_callback_raises_custom_error(self): class _MyException(Exception): pass @@ -453,7 +455,7 @@ async def raise_error(_): await s.with_transaction(raise_error) @async_client_context.require_transactions - async def test_callback_returns_value(self): + async def test_2_callback_returns_value(self): async def callback(_): return "Foo" @@ -481,7 +483,7 @@ def callback(_): self.assertEqual(await s.with_transaction(callback), "Foo") @async_client_context.require_transactions - async def test_callback_not_retried_after_timeout(self): + async def test_3_1_callback_not_retried_after_timeout(self): listener = OvertCommandListener() client = await self.async_rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -509,7 +511,7 @@ async def callback(session): @async_client_context.require_test_commands @async_client_context.require_transactions - async def test_callback_not_retried_after_commit_timeout(self): + async def test_3_2_callback_not_retried_after_commit_timeout(self): listener = OvertCommandListener() client = await self.async_rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -543,7 +545,7 @@ async def callback(session): @async_client_context.require_test_commands @async_client_context.require_transactions - async def test_commit_not_retried_after_timeout(self): + async def test_3_3_commit_not_retried_after_timeout(self): listener = OvertCommandListener() client = await self.async_rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -613,6 +615,72 @@ async def callback(session): await s.with_transaction(callback) self.assertFalse(s.in_transaction) + @async_client_context.require_test_commands + @async_client_context.require_transactions + async def test_4_retry_backoff_is_enforced(self): + client = async_client_context.client + coll = client[self.db.name].test + # patch random to make it deterministic -- once to effectively have + # no backoff and the second time with "max" backoff (always waiting the longest + # possible time) + _original_random_random = random.random + + def always_one(): + return 1 + + def always_zero(): + return 0 + + random.random = always_zero + # set fail point to trigger transaction failure and trigger backoff + await self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": {"times": 13}, + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addAsyncCleanup( + self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"} + ) + + async def callback(session): + await coll.insert_one({}, session=session) + + start = time.monotonic() + async with self.client.start_session() as s: + await s.with_transaction(callback) + end = time.monotonic() + no_backoff_time = end - start + + random.random = always_one + # set fail point to trigger transaction failure and trigger backoff + await self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": { + "times": 13 + }, # sufficiently high enough such that the time effect of backoff is noticeable + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addAsyncCleanup( + self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"} + ) + start = time.monotonic() + async with self.client.start_session() as s: + await s.with_transaction(callback) + end = time.monotonic() + self.assertLess(abs(end - start - (no_backoff_time + 2.2)), 1) # sum of 13 backoffs is 2.2 + + random.random = _original_random_random + class TestOptionsInsideTransactionProse(AsyncTransactionsBase): @async_client_context.require_transactions diff --git a/test/test_transactions.py b/test/test_transactions.py index 37e1a249e0..94d70396fc 100644 --- a/test/test_transactions.py +++ b/test/test_transactions.py @@ -16,7 +16,9 @@ from __future__ import annotations import asyncio +import random import sys +import time from io import BytesIO from test.utils_spec_runner import SpecRunner @@ -433,7 +435,7 @@ def set_fail_point(self, command_args): self.configure_fail_point(client, command_args) @client_context.require_transactions - def test_callback_raises_custom_error(self): + def test_1_callback_raises_custom_error(self): class _MyException(Exception): pass @@ -445,7 +447,7 @@ def raise_error(_): s.with_transaction(raise_error) @client_context.require_transactions - def test_callback_returns_value(self): + def test_2_callback_returns_value(self): def callback(_): return "Foo" @@ -473,7 +475,7 @@ def callback(_): self.assertEqual(s.with_transaction(callback), "Foo") @client_context.require_transactions - def test_callback_not_retried_after_timeout(self): + def test_3_1_callback_not_retried_after_timeout(self): listener = OvertCommandListener() client = self.rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -501,7 +503,7 @@ def callback(session): @client_context.require_test_commands @client_context.require_transactions - def test_callback_not_retried_after_commit_timeout(self): + def test_3_2_callback_not_retried_after_commit_timeout(self): listener = OvertCommandListener() client = self.rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -533,7 +535,7 @@ def callback(session): @client_context.require_test_commands @client_context.require_transactions - def test_commit_not_retried_after_timeout(self): + def test_3_3_commit_not_retried_after_timeout(self): listener = OvertCommandListener() client = self.rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -601,6 +603,68 @@ def callback(session): s.with_transaction(callback) self.assertFalse(s.in_transaction) + @client_context.require_test_commands + @client_context.require_transactions + def test_4_retry_backoff_is_enforced(self): + client = client_context.client + coll = client[self.db.name].test + # patch random to make it deterministic -- once to effectively have + # no backoff and the second time with "max" backoff (always waiting the longest + # possible time) + _original_random_random = random.random + + def always_one(): + return 1 + + def always_zero(): + return 0 + + random.random = always_zero + # set fail point to trigger transaction failure and trigger backoff + self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": {"times": 13}, + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addCleanup(self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"}) + + def callback(session): + coll.insert_one({}, session=session) + + start = time.monotonic() + with self.client.start_session() as s: + s.with_transaction(callback) + end = time.monotonic() + no_backoff_time = end - start + + random.random = always_one + # set fail point to trigger transaction failure and trigger backoff + self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": { + "times": 13 + }, # sufficiently high enough such that the time effect of backoff is noticeable + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addCleanup(self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"}) + start = time.monotonic() + with self.client.start_session() as s: + s.with_transaction(callback) + end = time.monotonic() + self.assertLess(abs(end - start - (no_backoff_time + 2.2)), 1) # sum of 13 backoffs is 2.2 + + random.random = _original_random_random + class TestOptionsInsideTransactionProse(TransactionsBase): @client_context.require_transactions From e1249d091ff3217b5160095320fb24739a75d2bc Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 17 Dec 2025 20:21:07 -0600 Subject: [PATCH 06/14] PYTHON-5675 Fix Backpressure-related racy tests (#2653) --- ...ged-on-min-pool-size-population-error.json | 42 +++++++++---------- .../unified/minPoolSize-error.json | 5 ++- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json b/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json index 35a49c1323..f0597124b7 100644 --- a/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json +++ b/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json @@ -22,6 +22,27 @@ { "description": "the server description is not changed on handshake error during minPoolSize population", "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "skip": 1 + }, + "data": { + "failCommands": [ + "hello", + "isMaster" + ], + "appName": "authErrorTest", + "closeConnection": true + } + } + } + }, { "name": "createEntities", "object": "testRunner", @@ -46,27 +67,6 @@ ] } }, - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "setupClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "skip": 1 - }, - "data": { - "failCommands": [ - "hello", - "isMaster" - ], - "appName": "authErrorTest", - "closeConnection": true - } - } - } - }, { "name": "waitForEvent", "object": "testRunner", diff --git a/test/discovery_and_monitoring/unified/minPoolSize-error.json b/test/discovery_and_monitoring/unified/minPoolSize-error.json index bd9e9fcdec..0ce2bdd6c1 100644 --- a/test/discovery_and_monitoring/unified/minPoolSize-error.json +++ b/test/discovery_and_monitoring/unified/minPoolSize-error.json @@ -27,7 +27,7 @@ ], "tests": [ { - "description": "Network error on minPoolSize background creation", + "description": "Server error on minPoolSize background creation", "operations": [ { "name": "failPoint", @@ -45,7 +45,7 @@ "isMaster" ], "appName": "SDAMminPoolSizeError", - "closeConnection": true + "errorCode": 91 } } } @@ -68,6 +68,7 @@ "heartbeatFrequencyMS": 10000, "appname": "SDAMminPoolSizeError", "minPoolSize": 10, + "serverMonitoringMode": "poll", "serverSelectionTimeoutMS": 1000 } } From 27a9f477a9c3c6ddac61af0234d4e9804c8b8827 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 21 Jan 2026 08:21:55 -0600 Subject: [PATCH 07/14] PYTHON-5696 Fix racy backpressure-network tests (#2678) --- ...ressure-network-error-fail-replicaset.json | 142 +++++++++++++++++ ...ckpressure-network-error-fail-single.json} | 14 +- ...ssure-network-timeout-fail-replicaset.json | 145 ++++++++++++++++++ ...pressure-network-timeout-fail-single.json} | 12 +- 4 files changed, 302 insertions(+), 11 deletions(-) create mode 100644 test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json rename test/discovery_and_monitoring/unified/{backpressure-network-error-fail.json => backpressure-network-error-fail-single.json} (91%) create mode 100644 test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json rename test/discovery_and_monitoring/unified/{backpressure-network-timeout-fail.json => backpressure-network-timeout-fail-single.json} (93%) diff --git a/test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json new file mode 100644 index 0000000000..ccaea8d135 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json @@ -0,0 +1,142 @@ +{ + "description": "backpressure-network-error-fail-replicaset", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-error-fail", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network connection errors during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "serverMonitoringMode": "poll", + "appname": "backpressureNetworkErrorFailTest" + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-error-fail" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "RSPrimary" + } + } + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "appName": "backpressureNetworkErrorFailTest", + "closeConnection": true + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-network-error-fail.json b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-single.json similarity index 91% rename from test/discovery_and_monitoring/unified/backpressure-network-error-fail.json rename to test/discovery_and_monitoring/unified/backpressure-network-error-fail-single.json index f41b76459c..c1ff67c732 100644 --- a/test/discovery_and_monitoring/unified/backpressure-network-error-fail.json +++ b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-single.json @@ -1,14 +1,12 @@ { - "description": "backpressure-network-error-fail", + "description": "backpressure-network-error-fail-single", "schemaVersion": "1.17", "runOnRequirements": [ { "minServerVersion": "4.4", "serverless": "forbid", "topologies": [ - "single", - "replicaset", - "sharded" + "single" ] } ], @@ -48,7 +46,7 @@ "id": "client", "useMultipleMongoses": false, "observeEvents": [ - "serverHeartbeatSucceededEvent", + "serverDescriptionChangedEvent", "poolClearedEvent" ], "uriOptions": { @@ -82,7 +80,11 @@ "arguments": { "client": "client", "event": { - "serverHeartbeatSucceededEvent": {} + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "Standalone" + } + } }, "count": 1 } diff --git a/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json new file mode 100644 index 0000000000..35b088f422 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json @@ -0,0 +1,145 @@ +{ + "description": "backpressure-network-timeout-error-replicaset", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-timeout-error", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network timeout error during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "appname": "backpressureNetworkTimeoutErrorTest", + "serverMonitoringMode": "poll", + "connectTimeoutMS": 250, + "socketTimeoutMS": 250 + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-timeout-error" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "RSPrimary" + } + } + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "blockConnection": true, + "blockTimeMS": 500, + "appName": "backpressureNetworkTimeoutErrorTest" + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-single.json similarity index 93% rename from test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json rename to test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-single.json index a97c7a329f..54b11d4d5b 100644 --- a/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail.json +++ b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-single.json @@ -1,14 +1,12 @@ { - "description": "backpressure-network-timeout-error", + "description": "backpressure-network-timeout-error-single", "schemaVersion": "1.17", "runOnRequirements": [ { "minServerVersion": "4.4", "serverless": "forbid", "topologies": [ - "single", - "replicaset", - "sharded" + "single" ] } ], @@ -84,7 +82,11 @@ "arguments": { "client": "client", "event": { - "serverDescriptionChangedEvent": {} + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "Standalone" + } + } }, "count": 1 } From 8dbf90372b0168ac231354f167bc76401a8f1a3e Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 4 Feb 2026 12:12:42 -0600 Subject: [PATCH 08/14] PYTHON-5528 & PYTHON-5651 Add exponential backoff to operation retry loop for server overloaded errors (#2635) Co-authored-by: Kevin Albertson Co-authored-by: Casey Clements --- .evergreen/resync-specs.sh | 3 + justfile | 16 +- pymongo/asynchronous/client_bulk.py | 15 +- pymongo/asynchronous/client_session.py | 4 + pymongo/asynchronous/collection.py | 86 +- pymongo/asynchronous/database.py | 54 +- pymongo/asynchronous/helpers.py | 29 - pymongo/asynchronous/mongo_client.py | 51 +- pymongo/asynchronous/pool.py | 1 + pymongo/synchronous/client_bulk.py | 15 +- pymongo/synchronous/client_session.py | 4 + pymongo/synchronous/collection.py | 82 +- pymongo/synchronous/database.py | 50 +- pymongo/synchronous/helpers.py | 29 - pymongo/synchronous/mongo_client.py | 51 +- pymongo/synchronous/pool.py | 1 + ...ressure.py => test_client_backpressure.py} | 100 +- test/asynchronous/test_client_metadata.py | 13 + .../backpressure-retry-loop.json | 2759 +++++++++++++ .../backpressure-retry-max-attempts.json | 3448 +++++++++++++++++ test/client-backpressure/getMore-retried.json | 291 ++ ...ressure.py => test_client_backpressure.py} | 100 +- test/test_client_metadata.py | 13 + .../unified/backpressure-retryable-abort.json | 357 ++ .../backpressure-retryable-commit.json | 374 ++ .../unified/backpressure-retryable-reads.json | 328 ++ .../backpressure-retryable-writes.json | 440 +++ tools/synchro.py | 2 +- 28 files changed, 8506 insertions(+), 210 deletions(-) rename test/asynchronous/{test_backpressure.py => test_client_backpressure.py} (70%) create mode 100644 test/client-backpressure/backpressure-retry-loop.json create mode 100644 test/client-backpressure/backpressure-retry-max-attempts.json create mode 100644 test/client-backpressure/getMore-retried.json rename test/{test_backpressure.py => test_client_backpressure.py} (70%) create mode 100644 test/transactions/unified/backpressure-retryable-abort.json create mode 100644 test/transactions/unified/backpressure-retryable-commit.json create mode 100644 test/transactions/unified/backpressure-retryable-reads.json create mode 100644 test/transactions/unified/backpressure-retryable-writes.json diff --git a/.evergreen/resync-specs.sh b/.evergreen/resync-specs.sh index d2bd89c781..4bb9c86304 100755 --- a/.evergreen/resync-specs.sh +++ b/.evergreen/resync-specs.sh @@ -94,6 +94,9 @@ do change-streams|change_streams) cpjson change-streams/tests/ change_streams/ ;; + client-backpressure|client_backpressure) + cpjson client-backpressure/tests client-backpressure + ;; client-side-encryption|csfle|fle) cpjson client-side-encryption/tests/ client-side-encryption/spec cpjson client-side-encryption/corpus/ client-side-encryption/corpus diff --git a/justfile b/justfile index 0ef05365d5..082b6ea170 100644 --- a/justfile +++ b/justfile @@ -1,7 +1,5 @@ # See https://just.systems/man/en/ for instructions set shell := ["bash", "-c"] -# Do not modify the lock file when running justfile commands. -export UV_FROZEN := "1" # Commonly used command segments. typing_run := "uv run --group typing --extra aws --extra encryption --with numpy --extra ocsp --extra snappy --extra test --extra zstd" @@ -16,7 +14,7 @@ default: [private] resync: - @uv sync --quiet --frozen + @uv sync --quiet install: bash .evergreen/scripts/setup-dev-env.sh @@ -50,12 +48,12 @@ typing-pyright: && resync {{typing_run}} python -m pyright -p strict_pyrightconfig.json test/test_typing_strict.py [group('lint')] -lint: && resync - uv run pre-commit run --all-files +lint *args="": && resync + uvx pre-commit run --all-files {{args}} [group('lint')] -lint-manual: && resync - uv run pre-commit run --all-files --hook-stage manual +lint-manual *args="": && resync + uvx pre-commit run --all-files --hook-stage manual {{args}} [group('test')] test *args="-v --durations=5 --maxfail=10": && resync @@ -77,6 +75,10 @@ setup-tests *args="": teardown-tests: bash .evergreen/scripts/teardown-tests.sh +[group('test')] +integration-tests: + bash integration_tests/run.sh + [group('server')] run-server *args="": bash .evergreen/scripts/run-server.sh {{args}} diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index 151942c8a8..bda374e9b9 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -563,9 +563,22 @@ async def _execute_command( error, ConnectionFailure ) and not isinstance(error, (NotPrimaryError, WaitQueueTimeoutError)) + retryable_label_error = ( + hasattr(error, "details") + and isinstance(error.details, dict) + and "errorLabels" in error.details + and isinstance(error.details["errorLabels"], list) + and "RetryableError" in error.details["errorLabels"] + and "SystemOverloadedError" in error.details["errorLabels"] + ) + # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. - if retryable and (retryable_top_level_error or retryable_network_error): + if retryable and ( + retryable_top_level_error + or retryable_network_error + or retryable_label_error + ): full = copy.deepcopy(full_result) _merge_command(self.ops, self.idx_offset, full, result) _throw_client_bulk_write_exception(full, self.verbose_results) diff --git a/pymongo/asynchronous/client_session.py b/pymongo/asynchronous/client_session.py index 27f26c35b2..c74d91154a 100644 --- a/pymongo/asynchronous/client_session.py +++ b/pymongo/asynchronous/client_session.py @@ -406,6 +406,7 @@ def __init__(self, opts: Optional[TransactionOptions], client: AsyncMongoClient[ self.recovery_token = None self.attempt = 0 self.client = client + self.has_completed_command = False def active(self) -> bool: return self.state in (_TxnState.STARTING, _TxnState.IN_PROGRESS) @@ -413,6 +414,9 @@ def active(self) -> bool: def starting(self) -> bool: return self.state == _TxnState.STARTING + def set_starting(self) -> None: + self.state = _TxnState.STARTING + @property def pinned_conn(self) -> Optional[AsyncConnection]: if self.active() and self.conn_mgr: diff --git a/pymongo/asynchronous/collection.py b/pymongo/asynchronous/collection.py index eefce63a7b..4de72af47b 100644 --- a/pymongo/asynchronous/collection.py +++ b/pymongo/asynchronous/collection.py @@ -20,7 +20,6 @@ from typing import ( TYPE_CHECKING, Any, - AsyncContextManager, Callable, Coroutine, Generic, @@ -58,7 +57,6 @@ AsyncCursor, AsyncRawBatchCursor, ) -from pymongo.asynchronous.helpers import _retry_overload from pymongo.collation import validate_collation_or_none from pymongo.common import _ecoc_coll_name, _esc_coll_name from pymongo.errors import ( @@ -573,11 +571,6 @@ async def watch( await change_stream._initialize_cursor() return change_stream - async def _conn_for_writes( - self, session: Optional[AsyncClientSession], operation: str - ) -> AsyncContextManager[AsyncConnection]: - return await self._database.client._conn_for_writes(session, operation) - async def _command( self, conn: AsyncConnection, @@ -654,7 +647,10 @@ async def _create_helper( if "size" in options: options["size"] = float(options["size"]) cmd.update(options) - async with await self._conn_for_writes(session, operation=_Op.CREATE) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: if qev2_required and conn.max_wire_version < 21: raise ConfigurationError( "Driver support of Queryable Encryption is incompatible with server. " @@ -671,6 +667,8 @@ async def _create_helper( session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.CREATE) + async def _create( self, options: MutableMapping[str, Any], @@ -2229,7 +2227,6 @@ async def create_indexes( return await self._create_indexes(indexes, session, **kwargs) @_csot.apply - @_retry_overload async def _create_indexes( self, indexes: Sequence[IndexModel], session: Optional[AsyncClientSession], **kwargs: Any ) -> list[str]: @@ -2243,7 +2240,10 @@ async def _create_indexes( command (like maxTimeMS) can be passed as keyword arguments. """ names = [] - async with await self._conn_for_writes(session, operation=_Op.CREATE_INDEXES) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> list[str]: supports_quorum = conn.max_wire_version >= 9 def gen_indexes() -> Iterator[Mapping[str, Any]]: @@ -2272,7 +2272,11 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: write_concern=self._write_concern_for(session), session=session, ) - return names + return names + + return await self.database.client._retryable_write( + False, inner, session, _Op.CREATE_INDEXES + ) async def create_index( self, @@ -2474,7 +2478,6 @@ async def drop_index( await self._drop_index(index_or_name, session, comment, **kwargs) @_csot.apply - @_retry_overload async def _drop_index( self, index_or_name: _IndexKeyHint, @@ -2493,7 +2496,10 @@ async def _drop_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - async with await self._conn_for_writes(session, operation=_Op.DROP_INDEXES) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: await self._command( conn, cmd, @@ -2503,6 +2509,8 @@ async def _drop_index( session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.DROP_INDEXES) + async def list_indexes( self, session: Optional[AsyncClientSession] = None, @@ -2766,17 +2774,22 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: cmd = {"createSearchIndexes": self.name, "indexes": list(gen_indexes())} cmd.update(kwargs) - async with await self._conn_for_writes( - session, operation=_Op.CREATE_SEARCH_INDEXES - ) as conn: + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> list[str]: resp = await self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) return [index["name"] for index in resp["indexesCreated"]] + return await self.database.client._retryable_write( + False, inner, session, _Op.CREATE_SEARCH_INDEXES + ) + async def drop_search_index( self, name: str, @@ -2802,15 +2815,21 @@ async def drop_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - async with await self._conn_for_writes(session, operation=_Op.DROP_SEARCH_INDEXES) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: await self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.DROP_SEARCH_INDEXES) + async def update_search_index( self, name: str, @@ -2838,15 +2857,21 @@ async def update_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - async with await self._conn_for_writes(session, operation=_Op.UPDATE_SEARCH_INDEX) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: await self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.UPDATE_SEARCH_INDEX) + async def options( self, session: Optional[AsyncClientSession] = None, @@ -3075,7 +3100,6 @@ async def aggregate_raw_batches( ) @_csot.apply - @_retry_overload async def rename( self, new_name: str, @@ -3127,17 +3151,21 @@ async def rename( if comment is not None: cmd["comment"] = comment write_concern = self._write_concern_for_cmd(cmd, session) + client = self._database.client - async with await self._conn_for_writes(session, operation=_Op.RENAME) as conn: - async with self._database.client._tmp_session(session) as s: - return await conn.command( - "admin", - cmd, - write_concern=write_concern, - parse_write_concern_error=True, - session=s, - client=self._database.client, - ) + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> MutableMapping[str, Any]: + return await conn.command( + "admin", + cmd, + write_concern=write_concern, + parse_write_concern_error=True, + session=session, + client=client, + ) + + return await client._retryable_write(False, inner, session, _Op.RENAME) async def distinct( self, diff --git a/pymongo/asynchronous/database.py b/pymongo/asynchronous/database.py index 5e34e52e69..3af77ffe15 100644 --- a/pymongo/asynchronous/database.py +++ b/pymongo/asynchronous/database.py @@ -38,7 +38,6 @@ from pymongo.asynchronous.change_stream import AsyncDatabaseChangeStream from pymongo.asynchronous.collection import AsyncCollection from pymongo.asynchronous.command_cursor import AsyncCommandCursor -from pymongo.asynchronous.helpers import _retry_overload from pymongo.common import _ecoc_coll_name, _esc_coll_name from pymongo.database_shared import _check_name, _CodecDocumentType from pymongo.errors import CollectionInvalid, InvalidOperation @@ -479,7 +478,6 @@ async def watch( return change_stream @_csot.apply - @_retry_overload async def create_collection( self, name: str, @@ -822,7 +820,6 @@ async def command( ... @_csot.apply - @_retry_overload async def command( self, command: Union[str, MutableMapping[str, Any]], @@ -935,14 +932,15 @@ async def command( if read_preference is None: read_preference = (session and session._txn_read_preference()) or ReadPreference.PRIMARY - async with await self._client._conn_for_reads( - read_preference, session, operation=command_name - ) as ( - connection, - read_preference, - ): + + async def inner( + session: Optional[AsyncClientSession], + _server: Server, + conn: AsyncConnection, + read_preference: _ServerMode, + ) -> Union[dict[str, Any], _CodecDocumentType]: return await self._command( - connection, + conn, command, value, check, @@ -953,8 +951,11 @@ async def command( **kwargs, ) + return await self._client._retryable_read( + inner, read_preference, session, command_name, None, False + ) + @_csot.apply - @_retry_overload async def cursor_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1021,17 +1022,17 @@ async def cursor_command( async with self._client._tmp_session(session) as tmp_session: opts = codec_options or DEFAULT_CODEC_OPTIONS - if read_preference is None: read_preference = ( tmp_session and tmp_session._txn_read_preference() ) or ReadPreference.PRIMARY - async with await self._client._conn_for_reads( - read_preference, tmp_session, command_name - ) as ( - conn, - read_preference, - ): + + async def inner( + session: Optional[AsyncClientSession], + _server: Server, + conn: AsyncConnection, + read_preference: _ServerMode, + ) -> AsyncCommandCursor[_DocumentType]: response = await self._command( conn, command, @@ -1040,7 +1041,7 @@ async def cursor_command( None, read_preference, opts, - session=tmp_session, + session=session, **kwargs, ) coll = self.get_collection("$cmd", read_preference=read_preference) @@ -1050,7 +1051,7 @@ async def cursor_command( response["cursor"], conn.address, max_await_time_ms=max_await_time_ms, - session=tmp_session, + session=session, comment=comment, ) await cmd_cursor._maybe_pin_connection(conn) @@ -1058,6 +1059,10 @@ async def cursor_command( else: raise InvalidOperation("Command does not return a cursor.") + return await self.client._retryable_read( + inner, read_preference, tmp_session, command_name, None, False + ) + async def _retryable_read_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1259,9 +1264,11 @@ async def _drop_helper( if comment is not None: command["comment"] = comment - async with await self._client._conn_for_writes(session, operation=_Op.DROP) as connection: + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> dict[str, Any]: return await self._command( - connection, + conn, command, allowable_errors=["ns not found", 26], write_concern=self._write_concern_for(session), @@ -1269,8 +1276,9 @@ async def _drop_helper( session=session, ) + return await self.client._retryable_write(False, inner, session, _Op.DROP) + @_csot.apply - @_retry_overload async def drop_collection( self, name_or_collection: Union[str, AsyncCollection[_DocumentTypeArg]], diff --git a/pymongo/asynchronous/helpers.py b/pymongo/asynchronous/helpers.py index a389455e24..cc9fbfb2fc 100644 --- a/pymongo/asynchronous/helpers.py +++ b/pymongo/asynchronous/helpers.py @@ -32,7 +32,6 @@ from pymongo import _csot from pymongo.errors import ( OperationFailure, - PyMongoError, ) from pymongo.helpers_shared import _REAUTHENTICATION_REQUIRED_CODE from pymongo.lock import _async_create_lock @@ -166,34 +165,6 @@ async def should_retry(self, attempt: int, delay: float) -> bool: return True -def _retry_overload(func: F) -> F: - @functools.wraps(func) - async def inner(self: Any, *args: Any, **kwargs: Any) -> Any: - retry_policy = self._retry_policy - attempt = 0 - while True: - try: - res = await func(self, *args, **kwargs) - await retry_policy.record_success(retry=attempt > 0) - return res - except PyMongoError as exc: - if not exc.has_error_label("RetryableError"): - raise - attempt += 1 - delay = 0 - if exc.has_error_label("SystemOverloadedError"): - delay = retry_policy.backoff(attempt) - if not await retry_policy.should_retry(attempt, delay): - raise - - # Implement exponential backoff on retry. - if delay: - await asyncio.sleep(delay) - continue - - return cast(F, inner) - - async def _getaddrinfo( host: Any, port: Any, **kwargs: Any ) -> list[ diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 1581d52e59..4a8d41aca1 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -69,7 +69,6 @@ from pymongo.asynchronous.client_session import _EmptyServerSession from pymongo.asynchronous.command_cursor import AsyncCommandCursor from pymongo.asynchronous.helpers import ( - _retry_overload, _RetryPolicy, _TokenBucket, ) @@ -2403,7 +2402,6 @@ async def list_database_names( return [doc["name"] async for doc in res] @_csot.apply - @_retry_overload async def drop_database( self, name_or_database: Union[str, database.AsyncDatabase[_DocumentTypeArg]], @@ -2446,15 +2444,13 @@ async def drop_database( f"name_or_database must be an instance of str or a AsyncDatabase, not {type(name)}" ) - async with await self._conn_for_writes(session, operation=_Op.DROP_DATABASE) as conn: - await self[name]._command( - conn, - {"dropDatabase": 1, "comment": comment}, - read_preference=ReadPreference.PRIMARY, - write_concern=self._write_concern_for(session), - parse_write_concern_error=True, - session=session, - ) + await self[name].command( + {"dropDatabase": 1, "comment": comment}, + read_preference=ReadPreference.PRIMARY, + write_concern=self._write_concern_for(session), + parse_write_concern_error=True, + session=session, + ) @_csot.apply async def bulk_write( @@ -2781,6 +2777,11 @@ async def run(self) -> T: try: res = await self._read() if self._is_read else await self._write() await self._retry_policy.record_success(self._attempt_number > 0) + # Track whether the transaction has completed a command. + # If we need to apply backpressure to the first command, + # we will need to revert back to starting state. + if self._session is not None and self._session.in_transaction: + self._session._transaction.has_completed_command = True return res except ServerSelectionTimeoutError: # The application may think the write was never attempted @@ -2800,8 +2801,8 @@ async def run(self) -> T: if isinstance(exc, (ConnectionFailure, OperationFailure)): # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) - always_retryable = exc.has_error_label("RetryableError") overloaded = exc.has_error_label("SystemOverloadedError") + always_retryable = exc.has_error_label("RetryableError") and overloaded if not always_retryable and ( self._is_not_eligible_for_retry() or ( @@ -2813,6 +2814,18 @@ async def run(self) -> T: self._retrying = True self._last_error = exc self._attempt_number += 1 + + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if ( + overloaded + and self._session is not None + and self._session.in_transaction + ): + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 else: raise @@ -2823,8 +2836,8 @@ async def run(self) -> T: ): exc_to_check = exc.error retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") - always_retryable = exc_to_check.has_error_label("RetryableError") overloaded = exc_to_check.has_error_label("SystemOverloadedError") + always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded if not self._retryable and not always_retryable: raise if retryable_write_label or always_retryable: @@ -2846,20 +2859,26 @@ async def run(self) -> T: self._last_error = exc if self._last_error is None: self._last_error = exc + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if overloaded and self._session is not None and self._session.in_transaction: + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 if self._server is not None: self._deprioritized_servers.append(self._server) self._always_retryable = always_retryable - if always_retryable: + if overloaded: delay = self._retry_policy.backoff(self._attempt_number) if overloaded else 0 if not await self._retry_policy.should_retry(self._attempt_number, delay): if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: raise self._last_error from exc else: raise - if overloaded: - await asyncio.sleep(delay) + await asyncio.sleep(delay) def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index f30e8fd9f3..3c1a85246e 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -254,6 +254,7 @@ async def _hello( cmd = self.hello_cmd() performing_handshake = not self.performed_handshake awaitable = False + cmd["backpressure"] = True if performing_handshake: self.performed_handshake = True cmd["client"] = self.opts.metadata diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index a606d028e1..30f32488ec 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -561,9 +561,22 @@ def _execute_command( error, ConnectionFailure ) and not isinstance(error, (NotPrimaryError, WaitQueueTimeoutError)) + retryable_label_error = ( + hasattr(error, "details") + and isinstance(error.details, dict) + and "errorLabels" in error.details + and isinstance(error.details["errorLabels"], list) + and "RetryableError" in error.details["errorLabels"] + and "SystemOverloadedError" in error.details["errorLabels"] + ) + # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. - if retryable and (retryable_top_level_error or retryable_network_error): + if retryable and ( + retryable_top_level_error + or retryable_network_error + or retryable_label_error + ): full = copy.deepcopy(full_result) _merge_command(self.ops, self.idx_offset, full, result) _throw_client_bulk_write_exception(full, self.verbose_results) diff --git a/pymongo/synchronous/client_session.py b/pymongo/synchronous/client_session.py index 28999bcd62..d67bf4424b 100644 --- a/pymongo/synchronous/client_session.py +++ b/pymongo/synchronous/client_session.py @@ -404,6 +404,7 @@ def __init__(self, opts: Optional[TransactionOptions], client: MongoClient[Any]) self.recovery_token = None self.attempt = 0 self.client = client + self.has_completed_command = False def active(self) -> bool: return self.state in (_TxnState.STARTING, _TxnState.IN_PROGRESS) @@ -411,6 +412,9 @@ def active(self) -> bool: def starting(self) -> bool: return self.state == _TxnState.STARTING + def set_starting(self) -> None: + self.state = _TxnState.STARTING + @property def pinned_conn(self) -> Optional[Connection]: if self.active() and self.conn_mgr: diff --git a/pymongo/synchronous/collection.py b/pymongo/synchronous/collection.py index c08d7c8163..73207d6027 100644 --- a/pymongo/synchronous/collection.py +++ b/pymongo/synchronous/collection.py @@ -21,7 +21,6 @@ TYPE_CHECKING, Any, Callable, - ContextManager, Generic, Iterable, Iterator, @@ -89,7 +88,6 @@ Cursor, RawBatchCursor, ) -from pymongo.synchronous.helpers import _retry_overload from pymongo.typings import _CollationIn, _DocumentType, _DocumentTypeArg, _Pipeline from pymongo.write_concern import DEFAULT_WRITE_CONCERN, WriteConcern, validate_boolean @@ -574,11 +572,6 @@ def watch( change_stream._initialize_cursor() return change_stream - def _conn_for_writes( - self, session: Optional[ClientSession], operation: str - ) -> ContextManager[Connection]: - return self._database.client._conn_for_writes(session, operation) - def _command( self, conn: Connection, @@ -655,7 +648,10 @@ def _create_helper( if "size" in options: options["size"] = float(options["size"]) cmd.update(options) - with self._conn_for_writes(session, operation=_Op.CREATE) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: if qev2_required and conn.max_wire_version < 21: raise ConfigurationError( "Driver support of Queryable Encryption is incompatible with server. " @@ -672,6 +668,8 @@ def _create_helper( session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.CREATE) + def _create( self, options: MutableMapping[str, Any], @@ -2226,7 +2224,6 @@ def create_indexes( return self._create_indexes(indexes, session, **kwargs) @_csot.apply - @_retry_overload def _create_indexes( self, indexes: Sequence[IndexModel], session: Optional[ClientSession], **kwargs: Any ) -> list[str]: @@ -2240,7 +2237,10 @@ def _create_indexes( command (like maxTimeMS) can be passed as keyword arguments. """ names = [] - with self._conn_for_writes(session, operation=_Op.CREATE_INDEXES) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> list[str]: supports_quorum = conn.max_wire_version >= 9 def gen_indexes() -> Iterator[Mapping[str, Any]]: @@ -2269,7 +2269,9 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: write_concern=self._write_concern_for(session), session=session, ) - return names + return names + + return self.database.client._retryable_write(False, inner, session, _Op.CREATE_INDEXES) def create_index( self, @@ -2471,7 +2473,6 @@ def drop_index( self._drop_index(index_or_name, session, comment, **kwargs) @_csot.apply - @_retry_overload def _drop_index( self, index_or_name: _IndexKeyHint, @@ -2490,7 +2491,10 @@ def _drop_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - with self._conn_for_writes(session, operation=_Op.DROP_INDEXES) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: self._command( conn, cmd, @@ -2500,6 +2504,8 @@ def _drop_index( session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.DROP_INDEXES) + def list_indexes( self, session: Optional[ClientSession] = None, @@ -2763,15 +2769,22 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: cmd = {"createSearchIndexes": self.name, "indexes": list(gen_indexes())} cmd.update(kwargs) - with self._conn_for_writes(session, operation=_Op.CREATE_SEARCH_INDEXES) as conn: + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> list[str]: resp = self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) return [index["name"] for index in resp["indexesCreated"]] + return self.database.client._retryable_write( + False, inner, session, _Op.CREATE_SEARCH_INDEXES + ) + def drop_search_index( self, name: str, @@ -2797,15 +2810,21 @@ def drop_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - with self._conn_for_writes(session, operation=_Op.DROP_SEARCH_INDEXES) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.DROP_SEARCH_INDEXES) + def update_search_index( self, name: str, @@ -2833,15 +2852,21 @@ def update_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - with self._conn_for_writes(session, operation=_Op.UPDATE_SEARCH_INDEX) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.UPDATE_SEARCH_INDEX) + def options( self, session: Optional[ClientSession] = None, @@ -3068,7 +3093,6 @@ def aggregate_raw_batches( ) @_csot.apply - @_retry_overload def rename( self, new_name: str, @@ -3120,17 +3144,21 @@ def rename( if comment is not None: cmd["comment"] = comment write_concern = self._write_concern_for_cmd(cmd, session) + client = self._database.client - with self._conn_for_writes(session, operation=_Op.RENAME) as conn: - with self._database.client._tmp_session(session) as s: - return conn.command( - "admin", - cmd, - write_concern=write_concern, - parse_write_concern_error=True, - session=s, - client=self._database.client, - ) + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> MutableMapping[str, Any]: + return conn.command( + "admin", + cmd, + write_concern=write_concern, + parse_write_concern_error=True, + session=session, + client=client, + ) + + return client._retryable_write(False, inner, session, _Op.RENAME) def distinct( self, diff --git a/pymongo/synchronous/database.py b/pymongo/synchronous/database.py index 27cc102d05..60a55276c6 100644 --- a/pymongo/synchronous/database.py +++ b/pymongo/synchronous/database.py @@ -43,7 +43,6 @@ from pymongo.synchronous.change_stream import DatabaseChangeStream from pymongo.synchronous.collection import Collection from pymongo.synchronous.command_cursor import CommandCursor -from pymongo.synchronous.helpers import _retry_overload from pymongo.typings import _CollationIn, _DocumentType, _DocumentTypeArg, _Pipeline if TYPE_CHECKING: @@ -479,7 +478,6 @@ def watch( return change_stream @_csot.apply - @_retry_overload def create_collection( self, name: str, @@ -822,7 +820,6 @@ def command( ... @_csot.apply - @_retry_overload def command( self, command: Union[str, MutableMapping[str, Any]], @@ -935,12 +932,15 @@ def command( if read_preference is None: read_preference = (session and session._txn_read_preference()) or ReadPreference.PRIMARY - with self._client._conn_for_reads(read_preference, session, operation=command_name) as ( - connection, - read_preference, - ): + + def inner( + session: Optional[ClientSession], + _server: Server, + conn: Connection, + read_preference: _ServerMode, + ) -> Union[dict[str, Any], _CodecDocumentType]: return self._command( - connection, + conn, command, value, check, @@ -951,8 +951,11 @@ def command( **kwargs, ) + return self._client._retryable_read( + inner, read_preference, session, command_name, None, False + ) + @_csot.apply - @_retry_overload def cursor_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1019,15 +1022,17 @@ def cursor_command( with self._client._tmp_session(session) as tmp_session: opts = codec_options or DEFAULT_CODEC_OPTIONS - if read_preference is None: read_preference = ( tmp_session and tmp_session._txn_read_preference() ) or ReadPreference.PRIMARY - with self._client._conn_for_reads(read_preference, tmp_session, command_name) as ( - conn, - read_preference, - ): + + def inner( + session: Optional[ClientSession], + _server: Server, + conn: Connection, + read_preference: _ServerMode, + ) -> CommandCursor[_DocumentType]: response = self._command( conn, command, @@ -1036,7 +1041,7 @@ def cursor_command( None, read_preference, opts, - session=tmp_session, + session=session, **kwargs, ) coll = self.get_collection("$cmd", read_preference=read_preference) @@ -1046,7 +1051,7 @@ def cursor_command( response["cursor"], conn.address, max_await_time_ms=max_await_time_ms, - session=tmp_session, + session=session, comment=comment, ) cmd_cursor._maybe_pin_connection(conn) @@ -1054,6 +1059,10 @@ def cursor_command( else: raise InvalidOperation("Command does not return a cursor.") + return self.client._retryable_read( + inner, read_preference, tmp_session, command_name, None, False + ) + def _retryable_read_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1252,9 +1261,11 @@ def _drop_helper( if comment is not None: command["comment"] = comment - with self._client._conn_for_writes(session, operation=_Op.DROP) as connection: + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> dict[str, Any]: return self._command( - connection, + conn, command, allowable_errors=["ns not found", 26], write_concern=self._write_concern_for(session), @@ -1262,8 +1273,9 @@ def _drop_helper( session=session, ) + return self.client._retryable_write(False, inner, session, _Op.DROP) + @_csot.apply - @_retry_overload def drop_collection( self, name_or_collection: Union[str, Collection[_DocumentTypeArg]], diff --git a/pymongo/synchronous/helpers.py b/pymongo/synchronous/helpers.py index 95be8b7d0a..9d93f9c47f 100644 --- a/pymongo/synchronous/helpers.py +++ b/pymongo/synchronous/helpers.py @@ -32,7 +32,6 @@ from pymongo import _csot from pymongo.errors import ( OperationFailure, - PyMongoError, ) from pymongo.helpers_shared import _REAUTHENTICATION_REQUIRED_CODE from pymongo.lock import _create_lock @@ -166,34 +165,6 @@ def should_retry(self, attempt: int, delay: float) -> bool: return True -def _retry_overload(func: F) -> F: - @functools.wraps(func) - def inner(self: Any, *args: Any, **kwargs: Any) -> Any: - retry_policy = self._retry_policy - attempt = 0 - while True: - try: - res = func(self, *args, **kwargs) - retry_policy.record_success(retry=attempt > 0) - return res - except PyMongoError as exc: - if not exc.has_error_label("RetryableError"): - raise - attempt += 1 - delay = 0 - if exc.has_error_label("SystemOverloadedError"): - delay = retry_policy.backoff(attempt) - if not retry_policy.should_retry(attempt, delay): - raise - - # Implement exponential backoff on retry. - if delay: - time.sleep(delay) - continue - - return cast(F, inner) - - def _getaddrinfo( host: Any, port: Any, **kwargs: Any ) -> list[ diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 19077fb5d4..a6a07a6c9f 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -112,7 +112,6 @@ from pymongo.synchronous.client_session import _EmptyServerSession from pymongo.synchronous.command_cursor import CommandCursor from pymongo.synchronous.helpers import ( - _retry_overload, _RetryPolicy, _TokenBucket, ) @@ -2393,7 +2392,6 @@ def list_database_names( return [doc["name"] for doc in res] @_csot.apply - @_retry_overload def drop_database( self, name_or_database: Union[str, database.Database[_DocumentTypeArg]], @@ -2436,15 +2434,13 @@ def drop_database( f"name_or_database must be an instance of str or a Database, not {type(name)}" ) - with self._conn_for_writes(session, operation=_Op.DROP_DATABASE) as conn: - self[name]._command( - conn, - {"dropDatabase": 1, "comment": comment}, - read_preference=ReadPreference.PRIMARY, - write_concern=self._write_concern_for(session), - parse_write_concern_error=True, - session=session, - ) + self[name].command( + {"dropDatabase": 1, "comment": comment}, + read_preference=ReadPreference.PRIMARY, + write_concern=self._write_concern_for(session), + parse_write_concern_error=True, + session=session, + ) @_csot.apply def bulk_write( @@ -2771,6 +2767,11 @@ def run(self) -> T: try: res = self._read() if self._is_read else self._write() self._retry_policy.record_success(self._attempt_number > 0) + # Track whether the transaction has completed a command. + # If we need to apply backpressure to the first command, + # we will need to revert back to starting state. + if self._session is not None and self._session.in_transaction: + self._session._transaction.has_completed_command = True return res except ServerSelectionTimeoutError: # The application may think the write was never attempted @@ -2790,8 +2791,8 @@ def run(self) -> T: if isinstance(exc, (ConnectionFailure, OperationFailure)): # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) - always_retryable = exc.has_error_label("RetryableError") overloaded = exc.has_error_label("SystemOverloadedError") + always_retryable = exc.has_error_label("RetryableError") and overloaded if not always_retryable and ( self._is_not_eligible_for_retry() or ( @@ -2803,6 +2804,18 @@ def run(self) -> T: self._retrying = True self._last_error = exc self._attempt_number += 1 + + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if ( + overloaded + and self._session is not None + and self._session.in_transaction + ): + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 else: raise @@ -2813,8 +2826,8 @@ def run(self) -> T: ): exc_to_check = exc.error retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") - always_retryable = exc_to_check.has_error_label("RetryableError") overloaded = exc_to_check.has_error_label("SystemOverloadedError") + always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded if not self._retryable and not always_retryable: raise if retryable_write_label or always_retryable: @@ -2836,20 +2849,26 @@ def run(self) -> T: self._last_error = exc if self._last_error is None: self._last_error = exc + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if overloaded and self._session is not None and self._session.in_transaction: + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 if self._server is not None: self._deprioritized_servers.append(self._server) self._always_retryable = always_retryable - if always_retryable: + if overloaded: delay = self._retry_policy.backoff(self._attempt_number) if overloaded else 0 if not self._retry_policy.should_retry(self._attempt_number, delay): if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: raise self._last_error from exc else: raise - if overloaded: - time.sleep(delay) + time.sleep(delay) def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 2683b03630..d33cb59a98 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -254,6 +254,7 @@ def _hello( cmd = self.hello_cmd() performing_handshake = not self.performed_handshake awaitable = False + cmd["backpressure"] = True if performing_handshake: self.performed_handshake = True cmd["client"] = self.opts.metadata diff --git a/test/asynchronous/test_backpressure.py b/test/asynchronous/test_client_backpressure.py similarity index 70% rename from test/asynchronous/test_backpressure.py rename to test/asynchronous/test_client_backpressure.py index 11f8edde67..c82d84e181 100644 --- a/test/asynchronous/test_backpressure.py +++ b/test/asynchronous/test_client_backpressure.py @@ -15,10 +15,11 @@ """Test Client Backpressure spec.""" from __future__ import annotations -import asyncio +import os +import pathlib import sys - -import pymongo +from time import perf_counter +from unittest.mock import patch sys.path[0:0] = [""] @@ -28,10 +29,13 @@ async_client_context, unittest, ) +from test.asynchronous.unified_format import generate_test_classes +from test.utils_shared import EventListener, OvertCommandListener +import pymongo from pymongo.asynchronous import helpers from pymongo.asynchronous.helpers import _MAX_RETRIES, _RetryPolicy, _TokenBucket -from pymongo.errors import PyMongoError +from pymongo.errors import OperationFailure, PyMongoError _IS_SYNC = False @@ -42,7 +46,7 @@ "data": { "failCommands": ["find", "insert", "update"], "errorCode": 462, # IngressRequestRateLimitExceeded - "errorLabels": ["RetryableError"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], }, } @@ -68,6 +72,7 @@ async def test_retry_overload_error_command(self): await self.db.command("find", "t") self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @async_client_context.require_failCommand_appName async def test_retry_overload_error_find(self): @@ -87,6 +92,7 @@ async def test_retry_overload_error_find(self): await self.db.t.find_one() self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @async_client_context.require_failCommand_appName async def test_retry_overload_error_insert_one(self): @@ -106,6 +112,7 @@ async def test_retry_overload_error_insert_one(self): await self.db.t.find_one() self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @async_client_context.require_failCommand_appName async def test_retry_overload_error_update_many(self): @@ -127,6 +134,7 @@ async def test_retry_overload_error_update_many(self): await self.db.t.update_many({}, {"$set": {"x": 2}}) self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @async_client_context.require_failCommand_appName async def test_retry_overload_error_getMore(self): @@ -140,7 +148,7 @@ async def test_retry_overload_error_getMore(self): "data": { "failCommands": ["getMore"], "errorCode": 462, # IngressRequestRateLimitExceeded - "errorLabels": ["RetryableError"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], }, } cursor = coll.find(batch_size=2) @@ -158,6 +166,7 @@ async def test_retry_overload_error_getMore(self): await cursor.to_list() self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @async_client_context.require_failCommand_appName async def test_limit_retry_command(self): @@ -180,6 +189,7 @@ async def test_limit_retry_command(self): await db.command("find", "t") self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) class TestRetryPolicy(AsyncPyMongoTestCase): @@ -226,5 +236,83 @@ async def test_retry_policy_csot(self): self.assertTrue(await retry_policy.should_retry(1, 1.0)) +# Prose tests. +class AsyncTestClientBackpressure(AsyncIntegrationTest): + listener: EventListener + + @classmethod + def setUpClass(cls) -> None: + cls.listener = OvertCommandListener() + + @async_client_context.require_connection + async def asyncSetUp(self) -> None: + await super().asyncSetUp() + self.listener.reset() + self.app_name = self.__class__.__name__.lower() + self.client = await self.async_rs_or_single_client( + event_listeners=[self.listener], retryWrites=False, appName=self.app_name + ) + + @patch("random.random") + @async_client_context.require_failCommand_appName + async def test_01_operation_retry_uses_exponential_backoff(self, random_func): + # Drivers should test that retries do not occur immediately when a SystemOverloadedError is encountered. + + # 1. let `client` be a `MongoClient` + client = self.client + + # 2. let `collection` be a collection + collection = client.test.test + + # 3. Now, run transactions without backoff: + + # a. Configure the random number generator used for jitter to always return `0` -- this effectively disables backoff. + random_func.return_value = 0 + + # b. Configure the following failPoint: + fail_point = dict( + mode="alwaysOn", + data=dict( + failCommands=["insert"], + errorCode=2, + errorLabels=["SystemOverloadedError", "RetryableError"], + appName=self.app_name, + ), + ) + async with self.fail_point(fail_point): + # c. Execute the following command. Expect that the command errors. Measure the duration of the command execution. + start0 = perf_counter() + with self.assertRaises(OperationFailure): + await collection.insert_one({"a": 1}) + end0 = perf_counter() + + # d. Configure the random number generator used for jitter to always return `1`. + random_func.return_value = 1 + + # e. Execute step c again. + start1 = perf_counter() + with self.assertRaises(OperationFailure): + await collection.insert_one({"a": 1}) + end1 = perf_counter() + + # f. Compare the two time between the two runs. + # The sum of 5 backoffs is 3.1 seconds. There is a 1-second window to account for potential variance between the two + # runs. + self.assertTrue(abs((end1 - start1) - (end0 - start0 + 3.1)) < 1) + + +# Location of JSON test specifications. +if _IS_SYNC: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent, "client-backpressure") +else: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent.parent, "client-backpressure") + +globals().update( + generate_test_classes( + _TEST_PATH, + module=__name__, + ) +) + if __name__ == "__main__": unittest.main() diff --git a/test/asynchronous/test_client_metadata.py b/test/asynchronous/test_client_metadata.py index 45c1bd1b3b..d4f887b1fc 100644 --- a/test/asynchronous/test_client_metadata.py +++ b/test/asynchronous/test_client_metadata.py @@ -219,6 +219,19 @@ async def test_duplicate_driver_name_no_op(self): # add same metadata again await self.check_metadata_added(client, "Framework", None, None) + async def test_handshake_documents_include_backpressure(self): + # Create a `MongoClient` that is configured to record all handshake documents sent to the server as a part of + # connection establishment. + client = await self.async_rs_or_single_client("mongodb://" + self.server.address_string) + + # Send a `ping` command to the server and verify that the command succeeds. This ensure that a connection is + # established on all topologies. Note: MockupDB only supports standalone servers. + await client.admin.command("ping") + + # Assert that for every handshake document intercepted: + # the document has a field `backpressure` whose value is `true`. + self.assertEqual(self.handshake_req["backpressure"], True) + if __name__ == "__main__": unittest.main() diff --git a/test/client-backpressure/backpressure-retry-loop.json b/test/client-backpressure/backpressure-retry-loop.json new file mode 100644 index 0000000000..2542344b38 --- /dev/null +++ b/test/client-backpressure/backpressure-retry-loop.json @@ -0,0 +1,2759 @@ +{ + "description": "tests that operations respect overload backoff retry loop", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ] + } + }, + { + "client": { + "id": "internal_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "internal_db", + "client": "internal_client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "retryable-writes-tests", + "database": "internal_db", + "collectionName": "coll" + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "retryable-writes-tests", + "documents": [] + } + ], + "_yamlAnchors": { + "bulWriteInsertNamespace": "retryable-writes-tests.coll" + }, + "tests": [ + { + "description": "client.listDatabases retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "listDatabases", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandSucceededEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.listDatabaseNames retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "listDatabaseNames" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandSucceededEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.createChangeStream retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "createChangeStream", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "client.clientBulkWrite retries using operation loop", + "runOnRequirements": [ + { + "minServerVersion": "8.0" + } + ], + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "bulkWrite" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "clientBulkWrite", + "arguments": { + "models": [ + { + "insertOne": { + "namespace": "retryable-writes-tests.coll", + "document": { + "_id": 8, + "x": 88 + } + } + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandSucceededEvent": { + "commandName": "bulkWrite" + } + } + ] + } + ] + }, + { + "description": "database.aggregate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "aggregate", + "arguments": { + "pipeline": [ + { + "$listLocalSessions": {} + }, + { + "$limit": 1 + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "database.listCollections retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "listCollections", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandSucceededEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.listCollectionNames retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "listCollectionNames", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandSucceededEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.runCommand retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "ping" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "runCommand", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandSucceededEvent": { + "commandName": "ping" + } + } + ] + } + ] + }, + { + "description": "database.createChangeStream retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "createChangeStream", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "aggregate", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.countDocuments retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "countDocuments", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.estimatedDocumentCount retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "count" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "estimatedDocumentCount" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandSucceededEvent": { + "commandName": "count" + } + } + ] + } + ] + }, + { + "description": "collection.distinct retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "distinct" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "distinct", + "arguments": { + "fieldName": "x", + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandSucceededEvent": { + "commandName": "distinct" + } + } + ] + } + ] + }, + { + "description": "collection.find retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "find", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.findOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOne", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexes retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "listIndexes" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexNames retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "listIndexNames" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createChangeStream retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "createChangeStream", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "insertOne", + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "insertMany", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "deleteOne", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandSucceededEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "deleteMany", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandSucceededEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "replaceOne", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "updateOne", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "updateMany", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOneAndDelete", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOneAndReplace", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOneAndUpdate", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "bulkWrite", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "createIndex", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex retries using operation loop", + "operations": [ + { + "object": "retryable-writes-tests", + "name": "createIndex", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "dropIndex", + "arguments": { + "name": "x_11" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndexes retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "dropIndexes" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + } + ] +} diff --git a/test/client-backpressure/backpressure-retry-max-attempts.json b/test/client-backpressure/backpressure-retry-max-attempts.json new file mode 100644 index 0000000000..1de8cb38d4 --- /dev/null +++ b/test/client-backpressure/backpressure-retry-max-attempts.json @@ -0,0 +1,3448 @@ +{ + "description": "tests that operations retry at most maxAttempts=5 times", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ] + } + }, + { + "client": { + "id": "fail_point_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ], + "_yamlAnchors": { + "bulkWriteInsertNamespace": "retryable-writes-tests.coll" + }, + "initialData": [ + { + "collectionName": "coll", + "databaseName": "retryable-writes-tests", + "documents": [ + { + "_id": 1, + "x": 11 + }, + { + "_id": 2, + "x": 22 + } + ] + } + ], + "tests": [ + { + "description": "client.listDatabases retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "listDatabases", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.listDatabaseNames retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "listDatabaseNames", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.createChangeStream retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "createChangeStream", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "client.clientBulkWrite retries at most maxAttempts=5 times", + "runOnRequirements": [ + { + "minServerVersion": "8.0" + } + ], + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "bulkWrite" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "client", + "name": "clientBulkWrite", + "arguments": { + "models": [ + { + "insertOne": { + "namespace": "retryable-writes-tests.coll", + "document": { + "_id": 8, + "x": 88 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + } + ] + } + ] + }, + { + "description": "database.aggregate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "aggregate", + "arguments": { + "pipeline": [ + { + "$listLocalSessions": {} + }, + { + "$limit": 1 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "database.listCollections retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "listCollections", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.listCollectionNames retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "listCollectionNames", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.runCommand retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "ping" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "runCommand", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + } + ] + } + ] + }, + { + "description": "database.createChangeStream retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "database", + "name": "createChangeStream", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "aggregate", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.countDocuments retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "countDocuments", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.estimatedDocumentCount retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "count" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "estimatedDocumentCount", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + } + ] + } + ] + }, + { + "description": "collection.distinct retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "distinct" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "distinct", + "arguments": { + "fieldName": "x", + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + } + ] + } + ] + }, + { + "description": "collection.find retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "find", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.findOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOne", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexes retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "listIndexes", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexNames retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "listIndexNames", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createChangeStream retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "createChangeStream", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "insertOne", + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "insertMany", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "deleteOne", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "deleteMany", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "replaceOne", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "updateOne", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "updateMany", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOneAndDelete", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOneAndReplace", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "findOneAndUpdate", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "bulkWrite", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "createIndex", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "dropIndex", + "arguments": { + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndexes retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "object": "collection", + "name": "dropIndexes", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + } + ] +} diff --git a/test/client-backpressure/getMore-retried.json b/test/client-backpressure/getMore-retried.json new file mode 100644 index 0000000000..70eff84612 --- /dev/null +++ b/test/client-backpressure/getMore-retried.json @@ -0,0 +1,291 @@ +{ + "description": "getMore-retries-backpressure", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4" + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "observeEvents": [ + "commandStartedEvent", + "commandFailedEvent", + "commandSucceededEvent" + ] + } + }, + { + "client": { + "id": "failPointClient" + } + }, + { + "database": { + "id": "db", + "client": "client0", + "databaseName": "default" + } + }, + { + "collection": { + "id": "coll", + "database": "db", + "collectionName": "default" + } + } + ], + "initialData": [ + { + "databaseName": "default", + "collectionName": "default", + "documents": [ + { + "a": 1 + }, + { + "a": 2 + }, + { + "a": 3 + } + ] + } + ], + "tests": [ + { + "description": "getMores are retried", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "getMore" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "arguments": { + "batchSize": 2, + "filter": {}, + "sort": { + "a": 1 + } + }, + "object": "coll", + "expectResult": [ + { + "a": 1 + }, + { + "a": 2 + }, + { + "a": 3 + } + ] + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandSucceededEvent": { + "commandName": "getMore" + } + } + ] + } + ] + }, + { + "description": "getMores are retried maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "getMore" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "arguments": { + "batchSize": 2, + "filter": {} + }, + "object": "coll", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "killCursors" + } + }, + { + "commandSucceededEvent": { + "commandName": "killCursors" + } + } + ] + } + ] + } + ] +} diff --git a/test/test_backpressure.py b/test/test_client_backpressure.py similarity index 70% rename from test/test_backpressure.py rename to test/test_client_backpressure.py index fac1d6236d..40ea5eb8e1 100644 --- a/test/test_backpressure.py +++ b/test/test_client_backpressure.py @@ -15,10 +15,11 @@ """Test Client Backpressure spec.""" from __future__ import annotations -import asyncio +import os +import pathlib import sys - -import pymongo +from time import perf_counter +from unittest.mock import patch sys.path[0:0] = [""] @@ -28,8 +29,11 @@ client_context, unittest, ) +from test.unified_format import generate_test_classes +from test.utils_shared import EventListener, OvertCommandListener -from pymongo.errors import PyMongoError +import pymongo +from pymongo.errors import OperationFailure, PyMongoError from pymongo.synchronous import helpers from pymongo.synchronous.helpers import _MAX_RETRIES, _RetryPolicy, _TokenBucket @@ -42,7 +46,7 @@ "data": { "failCommands": ["find", "insert", "update"], "errorCode": 462, # IngressRequestRateLimitExceeded - "errorLabels": ["RetryableError"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], }, } @@ -68,6 +72,7 @@ def test_retry_overload_error_command(self): self.db.command("find", "t") self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @client_context.require_failCommand_appName def test_retry_overload_error_find(self): @@ -87,6 +92,7 @@ def test_retry_overload_error_find(self): self.db.t.find_one() self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @client_context.require_failCommand_appName def test_retry_overload_error_insert_one(self): @@ -106,6 +112,7 @@ def test_retry_overload_error_insert_one(self): self.db.t.find_one() self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @client_context.require_failCommand_appName def test_retry_overload_error_update_many(self): @@ -127,6 +134,7 @@ def test_retry_overload_error_update_many(self): self.db.t.update_many({}, {"$set": {"x": 2}}) self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @client_context.require_failCommand_appName def test_retry_overload_error_getMore(self): @@ -140,7 +148,7 @@ def test_retry_overload_error_getMore(self): "data": { "failCommands": ["getMore"], "errorCode": 462, # IngressRequestRateLimitExceeded - "errorLabels": ["RetryableError"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], }, } cursor = coll.find(batch_size=2) @@ -158,6 +166,7 @@ def test_retry_overload_error_getMore(self): cursor.to_list() self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) @client_context.require_failCommand_appName def test_limit_retry_command(self): @@ -180,6 +189,7 @@ def test_limit_retry_command(self): db.command("find", "t") self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) class TestRetryPolicy(PyMongoTestCase): @@ -226,5 +236,83 @@ def test_retry_policy_csot(self): self.assertTrue(retry_policy.should_retry(1, 1.0)) +# Prose tests. +class TestClientBackpressure(IntegrationTest): + listener: EventListener + + @classmethod + def setUpClass(cls) -> None: + cls.listener = OvertCommandListener() + + @client_context.require_connection + def setUp(self) -> None: + super().setUp() + self.listener.reset() + self.app_name = self.__class__.__name__.lower() + self.client = self.rs_or_single_client( + event_listeners=[self.listener], retryWrites=False, appName=self.app_name + ) + + @patch("random.random") + @client_context.require_failCommand_appName + def test_01_operation_retry_uses_exponential_backoff(self, random_func): + # Drivers should test that retries do not occur immediately when a SystemOverloadedError is encountered. + + # 1. let `client` be a `MongoClient` + client = self.client + + # 2. let `collection` be a collection + collection = client.test.test + + # 3. Now, run transactions without backoff: + + # a. Configure the random number generator used for jitter to always return `0` -- this effectively disables backoff. + random_func.return_value = 0 + + # b. Configure the following failPoint: + fail_point = dict( + mode="alwaysOn", + data=dict( + failCommands=["insert"], + errorCode=2, + errorLabels=["SystemOverloadedError", "RetryableError"], + appName=self.app_name, + ), + ) + with self.fail_point(fail_point): + # c. Execute the following command. Expect that the command errors. Measure the duration of the command execution. + start0 = perf_counter() + with self.assertRaises(OperationFailure): + collection.insert_one({"a": 1}) + end0 = perf_counter() + + # d. Configure the random number generator used for jitter to always return `1`. + random_func.return_value = 1 + + # e. Execute step c again. + start1 = perf_counter() + with self.assertRaises(OperationFailure): + collection.insert_one({"a": 1}) + end1 = perf_counter() + + # f. Compare the two time between the two runs. + # The sum of 5 backoffs is 3.1 seconds. There is a 1-second window to account for potential variance between the two + # runs. + self.assertTrue(abs((end1 - start1) - (end0 - start0 + 3.1)) < 1) + + +# Location of JSON test specifications. +if _IS_SYNC: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent, "client-backpressure") +else: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent.parent, "client-backpressure") + +globals().update( + generate_test_classes( + _TEST_PATH, + module=__name__, + ) +) + if __name__ == "__main__": unittest.main() diff --git a/test/test_client_metadata.py b/test/test_client_metadata.py index 5f103f739a..8cdb728ea2 100644 --- a/test/test_client_metadata.py +++ b/test/test_client_metadata.py @@ -219,6 +219,19 @@ def test_duplicate_driver_name_no_op(self): # add same metadata again self.check_metadata_added(client, "Framework", None, None) + def test_handshake_documents_include_backpressure(self): + # Create a `MongoClient` that is configured to record all handshake documents sent to the server as a part of + # connection establishment. + client = self.rs_or_single_client("mongodb://" + self.server.address_string) + + # Send a `ping` command to the server and verify that the command succeeds. This ensure that a connection is + # established on all topologies. Note: MockupDB only supports standalone servers. + client.admin.command("ping") + + # Assert that for every handshake document intercepted: + # the document has a field `backpressure` whose value is `true`. + self.assertEqual(self.handshake_req["backpressure"], True) + if __name__ == "__main__": unittest.main() diff --git a/test/transactions/unified/backpressure-retryable-abort.json b/test/transactions/unified/backpressure-retryable-abort.json new file mode 100644 index 0000000000..53fc9c6f09 --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-abort.json @@ -0,0 +1,357 @@ +{ + "description": "backpressure-retryable-abort", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "abortTransaction retries if backpressure labels are added", + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "abortTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + }, + { + "description": "abortTransaction is retried maxAttempts=5 times if backpressure labels are added", + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "abortTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + } + ] +} diff --git a/test/transactions/unified/backpressure-retryable-commit.json b/test/transactions/unified/backpressure-retryable-commit.json new file mode 100644 index 0000000000..ae873561a9 --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-commit.json @@ -0,0 +1,374 @@ +{ + "description": "backpressure-retryable-commit", + "schemaVersion": "1.4", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "sharded", + "replicaset", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "commitTransaction retries if backpressure labels are added", + "runOnRequirements": [ + { + "serverless": "forbid" + } + ], + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "commitTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "commitTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [ + { + "_id": 1 + } + ] + } + ] + }, + { + "description": "commitTransaction is retried maxAttempts=5 times if backpressure labels are added", + "runOnRequirements": [ + { + "serverless": "forbid" + } + ], + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "commitTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "commitTransaction", + "expectError": { + "isError": true + } + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + } + ] +} diff --git a/test/transactions/unified/backpressure-retryable-reads.json b/test/transactions/unified/backpressure-retryable-reads.json new file mode 100644 index 0000000000..731762830e --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-reads.json @@ -0,0 +1,328 @@ +{ + "description": "backpressure-retryable-reads", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "reads are retried if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "find", + "arguments": { + "filter": {}, + "session": "session0" + } + }, + { + "object": "session0", + "name": "commitTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "find": "test", + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "find", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "find": "test", + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "find", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + } + ] + } + ] + }, + { + "description": "reads are retried maxAttempts=5 times if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "find", + "arguments": { + "filter": {}, + "session": "session0" + }, + "expectError": { + "isError": true + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ] + } + ] +} diff --git a/test/transactions/unified/backpressure-retryable-writes.json b/test/transactions/unified/backpressure-retryable-writes.json new file mode 100644 index 0000000000..0817e03f2f --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-writes.json @@ -0,0 +1,440 @@ +{ + "description": "backpressure-retryable-writes", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "writes are retried if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 2 + } + } + }, + { + "object": "session0", + "name": "commitTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 2 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 2 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ] + }, + { + "description": "writes are retried maxAttempts=5 times if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 2 + } + }, + "expectError": { + "isError": true + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + }, + { + "description": "retry succeeds if backpressure labels are added to the first operation in a transaction", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 2 + } + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + } + ] +} diff --git a/tools/synchro.py b/tools/synchro.py index 661d8988cb..91820644e3 100644 --- a/tools/synchro.py +++ b/tools/synchro.py @@ -209,9 +209,9 @@ def async_only_test(f: str) -> bool: "test_auth_oidc.py", "test_auth_spec.py", "test_bulk.py", - "test_backpressure.py", "test_change_stream.py", "test_client.py", + "test_client_backpressure.py", "test_client_bulk_write.py", "test_client_context.py", "test_client_metadata.py", From 1d219a90023543089754448807b80ab29395c252 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 18 Feb 2026 11:00:20 -0600 Subject: [PATCH 09/14] PYTHON-5695 Clarify NoWritesPerformed error label behavior when multiple retries occur (#2707) --- test/asynchronous/test_retryable_writes.py | 191 ++++++++++++++++++++- test/test_retryable_writes.py | 191 ++++++++++++++++++++- 2 files changed, 380 insertions(+), 2 deletions(-) diff --git a/test/asynchronous/test_retryable_writes.py b/test/asynchronous/test_retryable_writes.py index ddb1d39eb7..d1568b3ec3 100644 --- a/test/asynchronous/test_retryable_writes.py +++ b/test/asynchronous/test_retryable_writes.py @@ -43,14 +43,16 @@ from bson.int64 import Int64 from bson.raw_bson import RawBSONDocument from bson.son import SON +from pymongo import MongoClient from pymongo.errors import ( AutoReconnect, ConnectionFailure, - OperationFailure, + NotPrimaryError, ServerSelectionTimeoutError, WriteConcernError, ) from pymongo.monitoring import ( + CommandFailedEvent, CommandSucceededEvent, ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, @@ -601,5 +603,192 @@ def raise_connection_err_select_server(*args, **kwargs): self.assertEqual(sent_txn_id, final_txn_id, msg) +class TestErrorPropagationAfterEncounteringMultipleErrors(AsyncIntegrationTest): + # Only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers. + @async_client_context.require_replica_set + # Run against server versions 6.0 and above. + @async_client_context.require_version_min(6, 0) # type: ignore[untyped-decorator] + async def asyncSetUp(self) -> None: + await super().asyncSetUp() + self.setup_client = MongoClient(**async_client_context.default_client_options) + self.addCleanup(self.setup_client.close) + + # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. + def configure_fail_point_sync(self, command_args, off=False) -> None: + cmd = {"configureFailPoint": "failCommand"} + cmd.update(command_args) + if off: + cmd["mode"] = "off" + cmd.pop("data", None) + self.setup_client.admin.command(cmd) + + async def test_01_drivers_return_the_correct_error_when_receiving_only_errors_without_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code 10107 (NotWritablePrimary). + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + await client.test.test.insert_one({}) + + # Assert that the error code of the server error is 10107. + assert exc.exception.errors["code"] == 10107 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + async def test_02_drivers_return_the_correct_error_when_receiving_only_errors_with_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code `10107` (NotWritablePrimary) + # and a NoWritesPerformed label. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + if listener.failed_events: + return + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + await client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + async def test_03_drivers_return_the_correct_error_when_receiving_some_errors_with_NoWritesPerformed_and_some_without_NoWritesPerformed( + self + ) -> None: + # TODO: read the expected behavior and add breakpoint() to the retry loop + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and the `NoWritesPerformed`, `RetryableError` and `SystemOverloadedError` labels. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and + # `SystemOverloadedError` error labels but without the `NoWritesPerformed` error label. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorCode": 91, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + from pymongo.errors import OperationFailure + + with self.assertRaises(Exception) as exc: + await client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 + # Assert that the error does not contain the error label `NoWritesPerformed`. + assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_retryable_writes.py b/test/test_retryable_writes.py index a74a3e8030..8763c89de0 100644 --- a/test/test_retryable_writes.py +++ b/test/test_retryable_writes.py @@ -43,14 +43,16 @@ from bson.int64 import Int64 from bson.raw_bson import RawBSONDocument from bson.son import SON +from pymongo import MongoClient from pymongo.errors import ( AutoReconnect, ConnectionFailure, - OperationFailure, + NotPrimaryError, ServerSelectionTimeoutError, WriteConcernError, ) from pymongo.monitoring import ( + CommandFailedEvent, CommandSucceededEvent, ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, @@ -597,5 +599,192 @@ def raise_connection_err_select_server(*args, **kwargs): self.assertEqual(sent_txn_id, final_txn_id, msg) +class TestErrorPropagationAfterEncounteringMultipleErrors(IntegrationTest): + # Only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers. + @client_context.require_replica_set + # Run against server versions 6.0 and above. + @client_context.require_version_min(6, 0) # type: ignore[untyped-decorator] + def setUp(self) -> None: + super().setUp() + self.setup_client = MongoClient(**client_context.default_client_options) + self.addCleanup(self.setup_client.close) + + # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. + def configure_fail_point_sync(self, command_args, off=False) -> None: + cmd = {"configureFailPoint": "failCommand"} + cmd.update(command_args) + if off: + cmd["mode"] = "off" + cmd.pop("data", None) + self.setup_client.admin.command(cmd) + + def test_01_drivers_return_the_correct_error_when_receiving_only_errors_without_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code 10107 (NotWritablePrimary). + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + client.test.test.insert_one({}) + + # Assert that the error code of the server error is 10107. + assert exc.exception.errors["code"] == 10107 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + def test_02_drivers_return_the_correct_error_when_receiving_only_errors_with_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code `10107` (NotWritablePrimary) + # and a NoWritesPerformed label. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + if listener.failed_events: + return + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + def test_03_drivers_return_the_correct_error_when_receiving_some_errors_with_NoWritesPerformed_and_some_without_NoWritesPerformed( + self + ) -> None: + # TODO: read the expected behavior and add breakpoint() to the retry loop + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and the `NoWritesPerformed`, `RetryableError` and `SystemOverloadedError` labels. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and + # `SystemOverloadedError` error labels but without the `NoWritesPerformed` error label. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorCode": 91, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + from pymongo.errors import OperationFailure + + with self.assertRaises(Exception) as exc: + client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 + # Assert that the error does not contain the error label `NoWritesPerformed`. + assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + if __name__ == "__main__": unittest.main() From e7a5247bed99c6c1875634ac11c056c9d00f6cc7 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 26 Feb 2026 09:59:44 -0800 Subject: [PATCH 10/14] PYTHON-5528 - Token buckets disabled by default (#2713) Co-authored-by: Sergey Zelenov --- pymongo/asynchronous/helpers.py | 11 ++- pymongo/asynchronous/mongo_client.py | 15 ++- pymongo/client_options.py | 13 +++ pymongo/common.py | 5 + pymongo/synchronous/helpers.py | 11 ++- pymongo/synchronous/mongo_client.py | 15 ++- test/asynchronous/test_client.py | 15 +++ test/asynchronous/test_client_backpressure.py | 93 ++++++++++++++----- test/test_client.py | 15 +++ test/test_client_backpressure.py | 91 +++++++++++++----- .../client-backpressure-options.json | 35 +++++++ 11 files changed, 259 insertions(+), 60 deletions(-) create mode 100644 test/uri_options/client-backpressure-options.json diff --git a/pymongo/asynchronous/helpers.py b/pymongo/asynchronous/helpers.py index cc9fbfb2fc..2c01c19b7a 100644 --- a/pymongo/asynchronous/helpers.py +++ b/pymongo/asynchronous/helpers.py @@ -79,7 +79,6 @@ async def inner(*args: Any, **kwargs: Any) -> Any: _MAX_RETRIES = 5 _BACKOFF_INITIAL = 0.1 _BACKOFF_MAX = 10 -# DRIVERS-3240 will determine these defaults. DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 DEFAULT_RETRY_TOKEN_RETURN = 0.1 @@ -101,7 +100,6 @@ def __init__( ): self.lock = _async_create_lock() self.capacity = capacity - # DRIVERS-3240 will determine how full the bucket should start. self.tokens = capacity self.return_rate = return_rate @@ -123,7 +121,7 @@ async def deposit(self, retry: bool = False) -> None: class _RetryPolicy: """A retry limiter that performs exponential backoff with jitter. - Retry attempts are limited by a token bucket to prevent overwhelming the server during + When adaptive retries are enabled, retry attempts are limited by a token bucket to prevent overwhelming the server during a prolonged outage or high load. """ @@ -133,15 +131,18 @@ def __init__( attempts: int = _MAX_RETRIES, backoff_initial: float = _BACKOFF_INITIAL, backoff_max: float = _BACKOFF_MAX, + adaptive_retry: bool = False, ): self.token_bucket = token_bucket self.attempts = attempts self.backoff_initial = backoff_initial self.backoff_max = backoff_max + self.adaptive_retry = adaptive_retry async def record_success(self, retry: bool) -> None: """Record a successful operation.""" - await self.token_bucket.deposit(retry) + if self.adaptive_retry: + await self.token_bucket.deposit(retry) def backoff(self, attempt: int) -> float: """Return the backoff duration for the given .""" @@ -158,7 +159,7 @@ async def should_retry(self, attempt: int, delay: float) -> bool: return False # Check token bucket last since we only want to consume a token if we actually retry. - if not await self.token_bucket.consume(): + if self.adaptive_retry and not await self.token_bucket.consume(): # DRIVERS-3246 Improve diagnostics when this case happens. # We could add info to the exception and log. return False diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index adab640fd2..a7ca7b0144 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -615,8 +615,18 @@ def __init__( client to use Stable API. See `versioned API `_ for details. + | **Adaptive retry options:** + | (If not enabled explicitly, adaptive retries will not be enabled.) + + - `adaptive_retries`: (boolean) Whether the adaptive retry mechanism is enabled for this client. + If enabled, server overload errors will use a token-bucket based system to mitigate further overload. + Defaults to ``False``. + .. seealso:: The MongoDB documentation on `connections `_. + .. versionchanged:: 4.17 + Added the ``adaptive_retries`` URI and keyword argument. + .. versionchanged:: 4.5 Added the ``serverMonitoringMode`` keyword argument. @@ -778,7 +788,6 @@ def __init__( self._timeout: float | None = None self._topology_settings: TopologySettings = None # type: ignore[assignment] self._event_listeners: _EventListeners | None = None - self._retry_policy = _RetryPolicy(_TokenBucket()) # _pool_class, _monitor_class, and _condition_class are for deep # customization of PyMongo, e.g. Motor. @@ -890,6 +899,10 @@ def __init__( self._opened = False self._closed = False self._loop: Optional[asyncio.AbstractEventLoop] = None + + self._retry_policy = _RetryPolicy( + _TokenBucket(), adaptive_retry=self._options.adaptive_retries + ) if not is_srv: self._init_background() diff --git a/pymongo/client_options.py b/pymongo/client_options.py index 8b4eea7e65..1e488c2b8f 100644 --- a/pymongo/client_options.py +++ b/pymongo/client_options.py @@ -235,6 +235,11 @@ def __init__( self.__server_monitoring_mode = options.get( "servermonitoringmode", common.SERVER_MONITORING_MODE ) + self.__adaptive_retries = ( + options.get("adaptive_retries", common.ADAPTIVE_RETRIES) + if "adaptive_retries" in options + else options.get("adaptiveretries", common.ADAPTIVE_RETRIES) + ) @property def _options(self) -> Mapping[str, Any]: @@ -346,3 +351,11 @@ def server_monitoring_mode(self) -> str: .. versionadded:: 4.5 """ return self.__server_monitoring_mode + + @property + def adaptive_retries(self) -> bool: + """The configured adaptiveRetries option. + + .. versionadded:: 4.XX + """ + return self.__adaptive_retries diff --git a/pymongo/common.py b/pymongo/common.py index e23adac426..8b9797682f 100644 --- a/pymongo/common.py +++ b/pymongo/common.py @@ -140,6 +140,9 @@ # Default value for serverMonitoringMode SERVER_MONITORING_MODE = "auto" # poll/stream/auto +# Default value for adaptiveRetries +ADAPTIVE_RETRIES = False + # Auth mechanism properties that must raise an error instead of warning if they invalidate. _MECH_PROP_MUST_RAISE = ["CANONICALIZE_HOST_NAME"] @@ -738,6 +741,7 @@ def validate_server_monitoring_mode(option: str, value: str) -> str: "srvmaxhosts": validate_non_negative_integer, "timeoutms": validate_timeoutms, "servermonitoringmode": validate_server_monitoring_mode, + "adaptiveretries": validate_boolean_or_string, } # Dictionary where keys are the names of URI options specific to pymongo, @@ -771,6 +775,7 @@ def validate_server_monitoring_mode(option: str, value: str) -> str: "server_selector": validate_is_callable_or_none, "auto_encryption_opts": validate_auto_encryption_opts_or_none, "authoidcallowedhosts": validate_list, + "adaptive_retries": validate_boolean_or_string, } # Dictionary where keys are any URI option name, and values are the diff --git a/pymongo/synchronous/helpers.py b/pymongo/synchronous/helpers.py index 9d93f9c47f..1a27fc11a5 100644 --- a/pymongo/synchronous/helpers.py +++ b/pymongo/synchronous/helpers.py @@ -79,7 +79,6 @@ def inner(*args: Any, **kwargs: Any) -> Any: _MAX_RETRIES = 5 _BACKOFF_INITIAL = 0.1 _BACKOFF_MAX = 10 -# DRIVERS-3240 will determine these defaults. DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 DEFAULT_RETRY_TOKEN_RETURN = 0.1 @@ -101,7 +100,6 @@ def __init__( ): self.lock = _create_lock() self.capacity = capacity - # DRIVERS-3240 will determine how full the bucket should start. self.tokens = capacity self.return_rate = return_rate @@ -123,7 +121,7 @@ def deposit(self, retry: bool = False) -> None: class _RetryPolicy: """A retry limiter that performs exponential backoff with jitter. - Retry attempts are limited by a token bucket to prevent overwhelming the server during + When adaptive retries are enabled, retry attempts are limited by a token bucket to prevent overwhelming the server during a prolonged outage or high load. """ @@ -133,15 +131,18 @@ def __init__( attempts: int = _MAX_RETRIES, backoff_initial: float = _BACKOFF_INITIAL, backoff_max: float = _BACKOFF_MAX, + adaptive_retry: bool = False, ): self.token_bucket = token_bucket self.attempts = attempts self.backoff_initial = backoff_initial self.backoff_max = backoff_max + self.adaptive_retry = adaptive_retry def record_success(self, retry: bool) -> None: """Record a successful operation.""" - self.token_bucket.deposit(retry) + if self.adaptive_retry: + self.token_bucket.deposit(retry) def backoff(self, attempt: int) -> float: """Return the backoff duration for the given .""" @@ -158,7 +159,7 @@ def should_retry(self, attempt: int, delay: float) -> bool: return False # Check token bucket last since we only want to consume a token if we actually retry. - if not self.token_bucket.consume(): + if self.adaptive_retry and not self.token_bucket.consume(): # DRIVERS-3246 Improve diagnostics when this case happens. # We could add info to the exception and log. return False diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 4e3d178f89..36f432c67d 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -615,8 +615,18 @@ def __init__( client to use Stable API. See `versioned API `_ for details. + | **Adaptive retry options:** + | (If not enabled explicitly, adaptive retries will not be enabled.) + + - `adaptive_retries`: (boolean) Whether the adaptive retry mechanism is enabled for this client. + If enabled, server overload errors will use a token-bucket based system to mitigate further overload. + Defaults to ``False``. + .. seealso:: The MongoDB documentation on `connections `_. + .. versionchanged:: 4.17 + Added the ``adaptive_retries`` URI and keyword argument. + .. versionchanged:: 4.5 Added the ``serverMonitoringMode`` keyword argument. @@ -778,7 +788,6 @@ def __init__( self._timeout: float | None = None self._topology_settings: TopologySettings = None # type: ignore[assignment] self._event_listeners: _EventListeners | None = None - self._retry_policy = _RetryPolicy(_TokenBucket()) # _pool_class, _monitor_class, and _condition_class are for deep # customization of PyMongo, e.g. Motor. @@ -890,6 +899,10 @@ def __init__( self._opened = False self._closed = False self._loop: Optional[asyncio.AbstractEventLoop] = None + + self._retry_policy = _RetryPolicy( + _TokenBucket(), adaptive_retry=self._options.adaptive_retries + ) if not is_srv: self._init_background() diff --git a/test/asynchronous/test_client.py b/test/asynchronous/test_client.py index 5511765bae..f3cee70e15 100644 --- a/test/asynchronous/test_client.py +++ b/test/asynchronous/test_client.py @@ -652,6 +652,21 @@ async def test_detected_environment_warning(self, mock_get_hosts): with self.assertWarns(UserWarning): self.simple_client(multi_host) + async def test_adaptive_retries(self): + # Assert that adaptive retries are disabled by default. + c = self.simple_client(connect=False) + self.assertFalse(c.options.adaptive_retries) + + # Assert that adaptive retries can be enabled through connection or client options. + c = self.simple_client(connect=False, adaptive_retries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(connect=False, adaptiveRetries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(host="mongodb://localhost/?adaptiveretries=true", connect=False) + self.assertTrue(c.options.adaptive_retries) + class TestClient(AsyncIntegrationTest): def test_multiple_uris(self): diff --git a/test/asynchronous/test_client_backpressure.py b/test/asynchronous/test_client_backpressure.py index c82d84e181..3100d5064e 100644 --- a/test/asynchronous/test_client_backpressure.py +++ b/test/asynchronous/test_client_backpressure.py @@ -168,34 +168,11 @@ async def test_retry_overload_error_getMore(self): self.assertIn("RetryableError", str(error.exception)) self.assertIn("SystemOverloadedError", str(error.exception)) - @async_client_context.require_failCommand_appName - async def test_limit_retry_command(self): - client = await self.async_rs_or_single_client() - client._retry_policy.token_bucket.tokens = 1 - db = client.pymongo_test - await db.t.insert_one({"x": 1}) - - # Ensure command is retried once overload error. - fail_many = mock_overload_error.copy() - fail_many["mode"] = {"times": 1} - async with self.fail_point(fail_many): - await db.command("find", "t") - - # Ensure command stops retrying when there are no tokens left. - fail_too_many = mock_overload_error.copy() - fail_too_many["mode"] = {"times": 2} - async with self.fail_point(fail_too_many): - with self.assertRaises(PyMongoError) as error: - await db.command("find", "t") - - self.assertIn("RetryableError", str(error.exception)) - self.assertIn("SystemOverloadedError", str(error.exception)) - class TestRetryPolicy(AsyncPyMongoTestCase): async def test_retry_policy(self): capacity = 10 - retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity)) + retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity), adaptive_retry=True) self.assertEqual(retry_policy.attempts, helpers._MAX_RETRIES) self.assertEqual(retry_policy.backoff_initial, helpers._BACKOFF_INITIAL) self.assertEqual(retry_policy.backoff_max, helpers._BACKOFF_MAX) @@ -300,6 +277,74 @@ async def test_01_operation_retry_uses_exponential_backoff(self, random_func): # runs. self.assertTrue(abs((end1 - start1) - (end0 - start0 + 3.1)) < 1) + @async_client_context.require_failCommand_appName + async def test_03_overload_retries_limited(self): + # Drivers should test that without adaptive retries enabled, overload errors are retried a maximum of five times. + + # 1. Let `client` be a `MongoClient`. + client = self.client + # 2. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 3. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 4. Perform a find operation with `coll` that fails. + async with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + await coll.find_one({}) + + # 5. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 6. Assert that the total number of started commands is MAX_RETRIES + 1. + self.assertEqual(len(self.listener.started_events), _MAX_RETRIES + 1) + + @async_client_context.require_failCommand_appName + async def test_04_adaptive_retries_limited_by_tokens(self): + # Drivers should test that when enabled, adaptive retries are limited by the number of tokens in the bucket. + + # 1. Let `client` be a `MongoClient` with adaptiveRetries=True. + client = await self.async_rs_or_single_client( + adaptive_retries=True, event_listeners=[self.listener] + ) + # 2. Set `client`'s retry token bucket to have 2 tokens. + client._retry_policy.token_bucket.tokens = 2 + # 3. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 4. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": {"times": 3}, + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 5. Perform a find operation with `coll` that fails. + async with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + await coll.find_one({}) + + # 6. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 7. Assert that the total number of started commands is 3: one for the initial attempt and two for the retries. + self.assertEqual(len(self.listener.started_events), 3) + # Location of JSON test specifications. if _IS_SYNC: diff --git a/test/test_client.py b/test/test_client.py index 737b3afe60..9cfa36b2cd 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -645,6 +645,21 @@ def test_detected_environment_warning(self, mock_get_hosts): with self.assertWarns(UserWarning): self.simple_client(multi_host) + def test_adaptive_retries(self): + # Assert that adaptive retries are disabled by default. + c = self.simple_client(connect=False) + self.assertFalse(c.options.adaptive_retries) + + # Assert that adaptive retries can be enabled through connection or client options. + c = self.simple_client(connect=False, adaptive_retries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(connect=False, adaptiveRetries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(host="mongodb://localhost/?adaptiveretries=true", connect=False) + self.assertTrue(c.options.adaptive_retries) + class TestClient(IntegrationTest): def test_multiple_uris(self): diff --git a/test/test_client_backpressure.py b/test/test_client_backpressure.py index 40ea5eb8e1..f3146c9450 100644 --- a/test/test_client_backpressure.py +++ b/test/test_client_backpressure.py @@ -168,34 +168,11 @@ def test_retry_overload_error_getMore(self): self.assertIn("RetryableError", str(error.exception)) self.assertIn("SystemOverloadedError", str(error.exception)) - @client_context.require_failCommand_appName - def test_limit_retry_command(self): - client = self.rs_or_single_client() - client._retry_policy.token_bucket.tokens = 1 - db = client.pymongo_test - db.t.insert_one({"x": 1}) - - # Ensure command is retried once overload error. - fail_many = mock_overload_error.copy() - fail_many["mode"] = {"times": 1} - with self.fail_point(fail_many): - db.command("find", "t") - - # Ensure command stops retrying when there are no tokens left. - fail_too_many = mock_overload_error.copy() - fail_too_many["mode"] = {"times": 2} - with self.fail_point(fail_too_many): - with self.assertRaises(PyMongoError) as error: - db.command("find", "t") - - self.assertIn("RetryableError", str(error.exception)) - self.assertIn("SystemOverloadedError", str(error.exception)) - class TestRetryPolicy(PyMongoTestCase): def test_retry_policy(self): capacity = 10 - retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity)) + retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity), adaptive_retry=True) self.assertEqual(retry_policy.attempts, helpers._MAX_RETRIES) self.assertEqual(retry_policy.backoff_initial, helpers._BACKOFF_INITIAL) self.assertEqual(retry_policy.backoff_max, helpers._BACKOFF_MAX) @@ -300,6 +277,72 @@ def test_01_operation_retry_uses_exponential_backoff(self, random_func): # runs. self.assertTrue(abs((end1 - start1) - (end0 - start0 + 3.1)) < 1) + @client_context.require_failCommand_appName + def test_03_overload_retries_limited(self): + # Drivers should test that without adaptive retries enabled, overload errors are retried a maximum of five times. + + # 1. Let `client` be a `MongoClient`. + client = self.client + # 2. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 3. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 4. Perform a find operation with `coll` that fails. + with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + coll.find_one({}) + + # 5. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 6. Assert that the total number of started commands is MAX_RETRIES + 1. + self.assertEqual(len(self.listener.started_events), _MAX_RETRIES + 1) + + @client_context.require_failCommand_appName + def test_04_adaptive_retries_limited_by_tokens(self): + # Drivers should test that when enabled, adaptive retries are limited by the number of tokens in the bucket. + + # 1. Let `client` be a `MongoClient` with adaptiveRetries=True. + client = self.rs_or_single_client(adaptive_retries=True, event_listeners=[self.listener]) + # 2. Set `client`'s retry token bucket to have 2 tokens. + client._retry_policy.token_bucket.tokens = 2 + # 3. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 4. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": {"times": 3}, + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 5. Perform a find operation with `coll` that fails. + with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + coll.find_one({}) + + # 6. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 7. Assert that the total number of started commands is 3: one for the initial attempt and two for the retries. + self.assertEqual(len(self.listener.started_events), 3) + # Location of JSON test specifications. if _IS_SYNC: diff --git a/test/uri_options/client-backpressure-options.json b/test/uri_options/client-backpressure-options.json new file mode 100644 index 0000000000..3fcf2c86b0 --- /dev/null +++ b/test/uri_options/client-backpressure-options.json @@ -0,0 +1,35 @@ +{ + "tests": [ + { + "description": "adaptiveRetries=true is parsed correctly", + "uri": "mongodb://example.com/?adaptiveRetries=true", + "valid": true, + "warning": false, + "hosts": null, + "auth": null, + "options": { + "adaptiveRetries": true + } + }, + { + "description": "adaptiveRetries=false is parsed correctly", + "uri": "mongodb://example.com/?adaptiveRetries=false", + "valid": true, + "warning": false, + "hosts": null, + "auth": null, + "options": { + "adaptiveRetries": false + } + }, + { + "description": "adaptiveRetries with invalid value causes a warning", + "uri": "mongodb://example.com/?adaptiveRetries=invalid", + "valid": true, + "warning": true, + "hosts": null, + "auth": null, + "options": null + } + ] +} From 359ddfaad751eab9a5efb134a3f537af4e1b7fb9 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Wed, 4 Mar 2026 10:07:54 -0800 Subject: [PATCH 11/14] PYTHON-5741 - Transaction state is preserved across retries (#2718) --- pymongo/asynchronous/mongo_client.py | 7 ++++--- pymongo/synchronous/mongo_client.py | 7 ++++--- .../backpressure-retryable-writes.json | 20 ++++++++++++++++--- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index a7ca7b0144..27d2544693 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -894,15 +894,16 @@ def __init__( self._options.read_concern, ) + self._retry_policy = _RetryPolicy( + _TokenBucket(), adaptive_retry=self._options.adaptive_retries + ) + self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name) self._opened = False self._closed = False self._loop: Optional[asyncio.AbstractEventLoop] = None - self._retry_policy = _RetryPolicy( - _TokenBucket(), adaptive_retry=self._options.adaptive_retries - ) if not is_srv: self._init_background() diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 36f432c67d..12892e52ff 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -894,15 +894,16 @@ def __init__( self._options.read_concern, ) + self._retry_policy = _RetryPolicy( + _TokenBucket(), adaptive_retry=self._options.adaptive_retries + ) + self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name) self._opened = False self._closed = False self._loop: Optional[asyncio.AbstractEventLoop] = None - self._retry_policy = _RetryPolicy( - _TokenBucket(), adaptive_retry=self._options.adaptive_retries - ) if not is_srv: self._init_background() diff --git a/test/transactions/unified/backpressure-retryable-writes.json b/test/transactions/unified/backpressure-retryable-writes.json index 0817e03f2f..eea0e6b5da 100644 --- a/test/transactions/unified/backpressure-retryable-writes.json +++ b/test/transactions/unified/backpressure-retryable-writes.json @@ -412,17 +412,31 @@ "events": [ { "commandStartedEvent": { - "commandName": "insert" + "command": { + "startTransaction": true + }, + "commandName": "insert", + "databaseName": "transaction-tests" } }, { "commandStartedEvent": { - "commandName": "insert" + "command": { + "startTransaction": true + }, + "commandName": "insert", + "databaseName": "transaction-tests" } }, { "commandStartedEvent": { - "commandName": "abortTransaction" + "command": { + "startTransaction": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" } } ] From cc5b9c44437753a52fa47b3ce54863cb124af0fa Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 10 Mar 2026 12:24:53 -0400 Subject: [PATCH 12/14] =?UTF-8?q?PYTHON-5716=20-=20Clarify=20expected=20er?= =?UTF-8?q?ror=20if=20backoff=20exceeds=20CSOT's=20deadli=E2=80=A6=20(#271?= =?UTF-8?q?9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pymongo/asynchronous/client_session.py | 49 +++++++++++++++----------- pymongo/synchronous/client_session.py | 49 +++++++++++++++----------- test/asynchronous/test_transactions.py | 41 +++++++++++++++++++-- test/test_transactions.py | 41 +++++++++++++++++++-- 4 files changed, 132 insertions(+), 48 deletions(-) diff --git a/pymongo/asynchronous/client_session.py b/pymongo/asynchronous/client_session.py index 5967651b53..c72e828849 100644 --- a/pymongo/asynchronous/client_session.py +++ b/pymongo/asynchronous/client_session.py @@ -163,7 +163,9 @@ from pymongo.errors import ( ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, PyMongoError, WTimeoutError, @@ -480,14 +482,20 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: _BACKOFF_INITIAL = 0.005 # 5ms initial backoff -def _within_time_limit(start_time: float) -> bool: +def _within_time_limit(start_time: float, backoff: float = 0) -> bool: """Are we within the with_transaction retry limit?""" - return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT + remaining = _csot.remaining() + if remaining is not None and remaining <= 0: + return False + return time.monotonic() + backoff - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT -def _would_exceed_time_limit(start_time: float, backoff: float) -> bool: - """Is the backoff within the with_transaction retry limit?""" - return time.monotonic() + backoff - start_time >= _WITH_TRANSACTION_RETRY_TIME_LIMIT +def _make_timeout_error(error: BaseException) -> PyMongoError: + """Convert error to a NetworkTimeout or ExecutionTimeout as appropriate.""" + if _csot.remaining() is not None: + return ExecutionTimeout(str(error), 50, {"ok": 0, "errmsg": str(error), "code": 50}) + else: + return NetworkTimeout(str(error)) _T = TypeVar("_T") @@ -722,9 +730,9 @@ async def callback(session, custom_arg, custom_kwarg=None): if retry: # Implement exponential backoff on retry. jitter = random.random() # noqa: S311 backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX) - if _would_exceed_time_limit(start_time, backoff): + if not _within_time_limit(start_time, backoff): assert last_error is not None - raise last_error + raise _make_timeout_error(last_error) from last_error await asyncio.sleep(backoff) retry += 1 await self.start_transaction( @@ -737,13 +745,13 @@ async def callback(session, custom_arg, custom_kwarg=None): last_error = exc if self.in_transaction: await self.abort_transaction() - if ( - isinstance(exc, PyMongoError) - and exc.has_error_label("TransientTransactionError") - and _within_time_limit(start_time) + if isinstance(exc, PyMongoError) and exc.has_error_label( + "TransientTransactionError" ): - # Retry the entire transaction. - continue + if _within_time_limit(start_time): + # Retry the entire transaction. + continue + raise _make_timeout_error(last_error) from exc raise if not self.in_transaction: @@ -754,17 +762,16 @@ async def callback(session, custom_arg, custom_kwarg=None): try: await self.commit_transaction() except PyMongoError as exc: - if ( - exc.has_error_label("UnknownTransactionCommitResult") - and _within_time_limit(start_time) - and not _max_time_expired_error(exc) - ): + last_error = exc + if not _within_time_limit(start_time): + raise _make_timeout_error(last_error) from exc + if exc.has_error_label( + "UnknownTransactionCommitResult" + ) and not _max_time_expired_error(exc): # Retry the commit. continue - if exc.has_error_label("TransientTransactionError") and _within_time_limit( - start_time - ): + if exc.has_error_label("TransientTransactionError"): # Retry the entire transaction. break raise diff --git a/pymongo/synchronous/client_session.py b/pymongo/synchronous/client_session.py index dcda05dc46..2467bc71b3 100644 --- a/pymongo/synchronous/client_session.py +++ b/pymongo/synchronous/client_session.py @@ -160,7 +160,9 @@ from pymongo.errors import ( ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, PyMongoError, WTimeoutError, @@ -478,14 +480,20 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: _BACKOFF_INITIAL = 0.005 # 5ms initial backoff -def _within_time_limit(start_time: float) -> bool: +def _within_time_limit(start_time: float, backoff: float = 0) -> bool: """Are we within the with_transaction retry limit?""" - return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT + remaining = _csot.remaining() + if remaining is not None and remaining <= 0: + return False + return time.monotonic() + backoff - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT -def _would_exceed_time_limit(start_time: float, backoff: float) -> bool: - """Is the backoff within the with_transaction retry limit?""" - return time.monotonic() + backoff - start_time >= _WITH_TRANSACTION_RETRY_TIME_LIMIT +def _make_timeout_error(error: BaseException) -> PyMongoError: + """Convert error to a NetworkTimeout or ExecutionTimeout as appropriate.""" + if _csot.remaining() is not None: + return ExecutionTimeout(str(error), 50, {"ok": 0, "errmsg": str(error), "code": 50}) + else: + return NetworkTimeout(str(error)) _T = TypeVar("_T") @@ -720,9 +728,9 @@ def callback(session, custom_arg, custom_kwarg=None): if retry: # Implement exponential backoff on retry. jitter = random.random() # noqa: S311 backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX) - if _would_exceed_time_limit(start_time, backoff): + if not _within_time_limit(start_time, backoff): assert last_error is not None - raise last_error + raise _make_timeout_error(last_error) from last_error time.sleep(backoff) retry += 1 self.start_transaction(read_concern, write_concern, read_preference, max_commit_time_ms) @@ -733,13 +741,13 @@ def callback(session, custom_arg, custom_kwarg=None): last_error = exc if self.in_transaction: self.abort_transaction() - if ( - isinstance(exc, PyMongoError) - and exc.has_error_label("TransientTransactionError") - and _within_time_limit(start_time) + if isinstance(exc, PyMongoError) and exc.has_error_label( + "TransientTransactionError" ): - # Retry the entire transaction. - continue + if _within_time_limit(start_time): + # Retry the entire transaction. + continue + raise _make_timeout_error(last_error) from exc raise if not self.in_transaction: @@ -750,17 +758,16 @@ def callback(session, custom_arg, custom_kwarg=None): try: self.commit_transaction() except PyMongoError as exc: - if ( - exc.has_error_label("UnknownTransactionCommitResult") - and _within_time_limit(start_time) - and not _max_time_expired_error(exc) - ): + last_error = exc + if not _within_time_limit(start_time): + raise _make_timeout_error(last_error) from exc + if exc.has_error_label( + "UnknownTransactionCommitResult" + ) and not _max_time_expired_error(exc): # Retry the commit. continue - if exc.has_error_label("TransientTransactionError") and _within_time_limit( - start_time - ): + if exc.has_error_label("TransientTransactionError"): # Retry the entire transaction. break raise diff --git a/test/asynchronous/test_transactions.py b/test/asynchronous/test_transactions.py index 4e26c29618..95a07a743c 100644 --- a/test/asynchronous/test_transactions.py +++ b/test/asynchronous/test_transactions.py @@ -21,6 +21,7 @@ import time from io import BytesIO +import pymongo from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket from pymongo.asynchronous.pool import PoolState from pymongo.server_selectors import writable_server_selector @@ -47,7 +48,9 @@ CollectionInvalid, ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, ) from pymongo.operations import IndexModel, InsertOne @@ -497,7 +500,7 @@ async def callback(session): listener.reset() async with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): await s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "abortTransaction"]) @@ -531,7 +534,7 @@ async def callback(session): async with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): await s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "commitTransaction"]) @@ -562,7 +565,7 @@ async def callback(session): async with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(ConnectionFailure): + with self.assertRaises(NetworkTimeout): await s.with_transaction(callback) # One insert for the callback and two commits (includes the automatic @@ -571,6 +574,38 @@ async def callback(session): listener.started_command_names(), ["insert", "commitTransaction", "commitTransaction"] ) + @async_client_context.require_transactions + async def test_callback_not_retried_after_csot_timeout(self): + listener = OvertCommandListener() + client = await self.async_rs_client(event_listeners=[listener]) + coll = client[self.db.name].test + + async def callback(session): + await coll.insert_one({}, session=session) + err: dict = { + "ok": 0, + "errmsg": "Transaction 7819 has been aborted.", + "code": 251, + "codeName": "NoSuchTransaction", + "errorLabels": ["TransientTransactionError"], + } + raise OperationFailure(err["errmsg"], err["code"], err) + + # Create the collection. + await coll.insert_one({}) + listener.reset() + async with client.start_session() as s: + with pymongo.timeout(1.0): + with self.assertRaises(ExecutionTimeout): + await s.with_transaction(callback) + + # At least two attempts: the original and one or more retries. + inserts = len([x for x in listener.started_command_names() if x == "insert"]) + aborts = len([x for x in listener.started_command_names() if x == "abortTransaction"]) + + self.assertGreaterEqual(inserts, 2) + self.assertGreaterEqual(aborts, 2) + # Tested here because this supports Motor's convenient transactions API. @async_client_context.require_transactions async def test_in_transaction_property(self): diff --git a/test/test_transactions.py b/test/test_transactions.py index ff80745edc..9e370294ef 100644 --- a/test/test_transactions.py +++ b/test/test_transactions.py @@ -21,6 +21,7 @@ import time from io import BytesIO +import pymongo from gridfs.synchronous.grid_file import GridFS, GridFSBucket from pymongo.server_selectors import writable_server_selector from pymongo.synchronous.pool import PoolState @@ -42,7 +43,9 @@ CollectionInvalid, ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, ) from pymongo.operations import IndexModel, InsertOne @@ -489,7 +492,7 @@ def callback(session): listener.reset() with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "abortTransaction"]) @@ -521,7 +524,7 @@ def callback(session): with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "commitTransaction"]) @@ -550,7 +553,7 @@ def callback(session): with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(ConnectionFailure): + with self.assertRaises(NetworkTimeout): s.with_transaction(callback) # One insert for the callback and two commits (includes the automatic @@ -559,6 +562,38 @@ def callback(session): listener.started_command_names(), ["insert", "commitTransaction", "commitTransaction"] ) + @client_context.require_transactions + def test_callback_not_retried_after_csot_timeout(self): + listener = OvertCommandListener() + client = self.rs_client(event_listeners=[listener]) + coll = client[self.db.name].test + + def callback(session): + coll.insert_one({}, session=session) + err: dict = { + "ok": 0, + "errmsg": "Transaction 7819 has been aborted.", + "code": 251, + "codeName": "NoSuchTransaction", + "errorLabels": ["TransientTransactionError"], + } + raise OperationFailure(err["errmsg"], err["code"], err) + + # Create the collection. + coll.insert_one({}) + listener.reset() + with client.start_session() as s: + with pymongo.timeout(1.0): + with self.assertRaises(ExecutionTimeout): + s.with_transaction(callback) + + # At least two attempts: the original and one or more retries. + inserts = len([x for x in listener.started_command_names() if x == "insert"]) + aborts = len([x for x in listener.started_command_names() if x == "abortTransaction"]) + + self.assertGreaterEqual(inserts, 2) + self.assertGreaterEqual(aborts, 2) + # Tested here because this supports Motor's convenient transactions API. @client_context.require_transactions def test_in_transaction_property(self): From 0a47a19e13af56a108967a19ac53c9ad037f6718 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 12 Mar 2026 15:35:08 -0400 Subject: [PATCH 13/14] PYTHON-5752 - Only retry overload errors if retries are enabled (#2726) --- pymongo/asynchronous/collection.py | 1 + pymongo/asynchronous/database.py | 2 +- pymongo/asynchronous/mongo_client.py | 45 +- pymongo/synchronous/collection.py | 1 + pymongo/synchronous/database.py | 2 +- pymongo/synchronous/mongo_client.py | 45 +- test/asynchronous/test_client_backpressure.py | 2 +- .../backpressure-connection-checkin.json | 141 + .../backpressure-retry-loop.json | 3560 +++++++++++++---- .../backpressure-retry-max-attempts.json | 175 +- test/test_client_backpressure.py | 2 +- 11 files changed, 3210 insertions(+), 766 deletions(-) create mode 100644 test/client-backpressure/backpressure-connection-checkin.json diff --git a/pymongo/asynchronous/collection.py b/pymongo/asynchronous/collection.py index 4de72af47b..127136dd48 100644 --- a/pymongo/asynchronous/collection.py +++ b/pymongo/asynchronous/collection.py @@ -2946,6 +2946,7 @@ async def _aggregate( session, retryable=not cmd._performs_write, operation=_Op.AGGREGATE, + is_aggregate_write=cmd._performs_write, ) async def aggregate( diff --git a/pymongo/asynchronous/database.py b/pymongo/asynchronous/database.py index 3af77ffe15..2c482f0415 100644 --- a/pymongo/asynchronous/database.py +++ b/pymongo/asynchronous/database.py @@ -952,7 +952,7 @@ async def inner( ) return await self._client._retryable_read( - inner, read_preference, session, command_name, None, False + inner, read_preference, session, command_name, None, False, is_run_command=True ) @_csot.apply diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 27d2544693..5c8251b849 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -2010,6 +2010,8 @@ async def _retry_internal( read_pref: Optional[_ServerMode] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Internal retryable helper for all client transactions. @@ -2021,6 +2023,8 @@ async def _retry_internal( :param address: Server Address, defaults to None :param read_pref: Topology of read operation, defaults to None :param retryable: If the operation should be retried once, defaults to None + :param is_run_command: If this is a runCommand operation, defaults to False + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. :return: Output of the calling func() """ @@ -2035,6 +2039,8 @@ async def _retry_internal( address=address, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ).run() async def _retryable_read( @@ -2046,6 +2052,8 @@ async def _retryable_read( address: Optional[_Address] = None, retryable: bool = True, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Execute an operation with consecutive retries if possible @@ -2061,6 +2069,8 @@ async def _retryable_read( :param address: Optional address when sending a message, defaults to None :param retryable: if we should attempt retries (may not always be supported even if supplied), defaults to False + :param is_run_command: If this is a runCommand operation, defaults to False. + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. """ # Ensure that the client supports retrying on reads and there is no session in @@ -2079,6 +2089,8 @@ async def _retryable_read( read_pref=read_pref, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ) async def _retryable_write( @@ -2748,6 +2760,8 @@ def __init__( address: Optional[_Address] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ): self._last_error: Optional[Exception] = None self._retrying = False @@ -2770,6 +2784,8 @@ def __init__( self._operation = operation self._operation_id = operation_id self._attempt_number = 0 + self._is_run_command = is_run_command + self._is_aggregate_write = is_aggregate_write async def run(self) -> T: """Runs the supplied func() and attempts a retry @@ -2810,6 +2826,14 @@ async def run(self) -> T: always_retryable = False overloaded = False exc_to_check = exc + + if self._is_run_command and not ( + self._client.options.retry_reads and self._client.options.retry_writes + ): + raise + if self._is_aggregate_write and not self._client.options.retry_writes: + raise + # Execute specialized catch on read if self._is_read: if isinstance(exc, (ConnectionFailure, OperationFailure)): @@ -2817,11 +2841,15 @@ async def run(self) -> T: exc_code = getattr(exc, "code", None) overloaded = exc.has_error_label("SystemOverloadedError") always_retryable = exc.has_error_label("RetryableError") and overloaded - if not always_retryable and ( - self._is_not_eligible_for_retry() - or ( - isinstance(exc, OperationFailure) - and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + if ( + not self._client.options.retry_reads + or not always_retryable + and ( + self._is_not_eligible_for_retry() + or ( + isinstance(exc, OperationFailure) + and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + ) ) ): raise @@ -2852,7 +2880,12 @@ async def run(self) -> T: retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") overloaded = exc_to_check.has_error_label("SystemOverloadedError") always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded - if not self._retryable and not always_retryable: + + # Always retry abortTransaction and commitTransaction up to once + if self._operation not in ["abortTransaction", "commitTransaction"] and ( + not self._client.options.retry_writes + or not (self._retryable or always_retryable) + ): raise if retryable_write_label or always_retryable: assert self._session diff --git a/pymongo/synchronous/collection.py b/pymongo/synchronous/collection.py index 73207d6027..34fd7190d1 100644 --- a/pymongo/synchronous/collection.py +++ b/pymongo/synchronous/collection.py @@ -2939,6 +2939,7 @@ def _aggregate( session, retryable=not cmd._performs_write, operation=_Op.AGGREGATE, + is_aggregate_write=cmd._performs_write, ) def aggregate( diff --git a/pymongo/synchronous/database.py b/pymongo/synchronous/database.py index 60a55276c6..cc041a2e30 100644 --- a/pymongo/synchronous/database.py +++ b/pymongo/synchronous/database.py @@ -952,7 +952,7 @@ def inner( ) return self._client._retryable_read( - inner, read_preference, session, command_name, None, False + inner, read_preference, session, command_name, None, False, is_run_command=True ) @_csot.apply diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 12892e52ff..9cec36a752 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -2006,6 +2006,8 @@ def _retry_internal( read_pref: Optional[_ServerMode] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Internal retryable helper for all client transactions. @@ -2017,6 +2019,8 @@ def _retry_internal( :param address: Server Address, defaults to None :param read_pref: Topology of read operation, defaults to None :param retryable: If the operation should be retried once, defaults to None + :param is_run_command: If this is a runCommand operation, defaults to False + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. :return: Output of the calling func() """ @@ -2031,6 +2035,8 @@ def _retry_internal( address=address, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ).run() def _retryable_read( @@ -2042,6 +2048,8 @@ def _retryable_read( address: Optional[_Address] = None, retryable: bool = True, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Execute an operation with consecutive retries if possible @@ -2057,6 +2065,8 @@ def _retryable_read( :param address: Optional address when sending a message, defaults to None :param retryable: if we should attempt retries (may not always be supported even if supplied), defaults to False + :param is_run_command: If this is a runCommand operation, defaults to False. + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. """ # Ensure that the client supports retrying on reads and there is no session in @@ -2075,6 +2085,8 @@ def _retryable_read( read_pref=read_pref, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ) def _retryable_write( @@ -2738,6 +2750,8 @@ def __init__( address: Optional[_Address] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ): self._last_error: Optional[Exception] = None self._retrying = False @@ -2760,6 +2774,8 @@ def __init__( self._operation = operation self._operation_id = operation_id self._attempt_number = 0 + self._is_run_command = is_run_command + self._is_aggregate_write = is_aggregate_write def run(self) -> T: """Runs the supplied func() and attempts a retry @@ -2800,6 +2816,14 @@ def run(self) -> T: always_retryable = False overloaded = False exc_to_check = exc + + if self._is_run_command and not ( + self._client.options.retry_reads and self._client.options.retry_writes + ): + raise + if self._is_aggregate_write and not self._client.options.retry_writes: + raise + # Execute specialized catch on read if self._is_read: if isinstance(exc, (ConnectionFailure, OperationFailure)): @@ -2807,11 +2831,15 @@ def run(self) -> T: exc_code = getattr(exc, "code", None) overloaded = exc.has_error_label("SystemOverloadedError") always_retryable = exc.has_error_label("RetryableError") and overloaded - if not always_retryable and ( - self._is_not_eligible_for_retry() - or ( - isinstance(exc, OperationFailure) - and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + if ( + not self._client.options.retry_reads + or not always_retryable + and ( + self._is_not_eligible_for_retry() + or ( + isinstance(exc, OperationFailure) + and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + ) ) ): raise @@ -2842,7 +2870,12 @@ def run(self) -> T: retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") overloaded = exc_to_check.has_error_label("SystemOverloadedError") always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded - if not self._retryable and not always_retryable: + + # Always retry abortTransaction and commitTransaction up to once + if self._operation not in ["abortTransaction", "commitTransaction"] and ( + not self._client.options.retry_writes + or not (self._retryable or always_retryable) + ): raise if retryable_write_label or always_retryable: assert self._session diff --git a/test/asynchronous/test_client_backpressure.py b/test/asynchronous/test_client_backpressure.py index 3100d5064e..9e617d74e6 100644 --- a/test/asynchronous/test_client_backpressure.py +++ b/test/asynchronous/test_client_backpressure.py @@ -227,7 +227,7 @@ async def asyncSetUp(self) -> None: self.listener.reset() self.app_name = self.__class__.__name__.lower() self.client = await self.async_rs_or_single_client( - event_listeners=[self.listener], retryWrites=False, appName=self.app_name + event_listeners=[self.listener], appName=self.app_name ) @patch("random.random") diff --git a/test/client-backpressure/backpressure-connection-checkin.json b/test/client-backpressure/backpressure-connection-checkin.json new file mode 100644 index 0000000000..307fc9fb53 --- /dev/null +++ b/test/client-backpressure/backpressure-connection-checkin.json @@ -0,0 +1,141 @@ +{ + "description": "tests that connections are returned to the pool on retry attempts for overload errors", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "connectionCheckedOutEvent", + "connectionCheckedInEvent" + ] + } + }, + { + "client": { + "id": "fail_point_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "retryable-writes-tests", + "documents": [ + { + "_id": 1, + "x": 11 + } + ] + } + ], + "tests": [ + { + "description": "overload error retry attempts return connections to the pool", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/test/client-backpressure/backpressure-retry-loop.json b/test/client-backpressure/backpressure-retry-loop.json index 2542344b38..0e8840f523 100644 --- a/test/client-backpressure/backpressure-retry-loop.json +++ b/test/client-backpressure/backpressure-retry-loop.json @@ -20,6 +20,9 @@ "commandStartedEvent", "commandSucceededEvent", "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" ] } }, @@ -56,6 +59,68 @@ "database": "database", "collectionName": "coll" } + }, + { + "client": { + "id": "client_retryReads_false", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" + ], + "uriOptions": { + "retryReads": false + } + } + }, + { + "database": { + "id": "database_retryReads_false", + "client": "client_retryReads_false", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection_retryReads_false", + "database": "database_retryReads_false", + "collectionName": "coll" + } + }, + { + "client": { + "id": "client_retryWrites_false", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" + ], + "uriOptions": { + "retryWrites": false + } + } + }, + { + "database": { + "id": "database_retryWrites_false", + "client": "client_retryWrites_false", + "databaseName": "backpressure-db" + } + }, + { + "collection": { + "id": "collection_retryWrites_false", + "database": "database_retryWrites_false", + "collectionName": "coll" + } } ], "initialData": [ @@ -96,8 +161,8 @@ } }, { - "object": "client", "name": "listDatabases", + "object": "client", "arguments": { "filter": {} } @@ -151,6 +216,62 @@ } ] }, + { + "description": "client.listDatabases (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabases", + "object": "client_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, { "description": "client.listDatabaseNames retries using operation loop", "operations": [ @@ -178,8 +299,8 @@ } }, { - "object": "client", - "name": "listDatabaseNames" + "name": "listDatabaseNames", + "object": "client" } ], "expectEvents": [ @@ -230,6 +351,59 @@ } ] }, + { + "description": "client.listDatabaseNames (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabaseNames", + "object": "client_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, { "description": "client.createChangeStream retries using operation loop", "operations": [ @@ -257,8 +431,8 @@ } }, { - "object": "client", "name": "createChangeStream", + "object": "client", "arguments": { "pipeline": [] } @@ -312,6 +486,62 @@ } ] }, + { + "description": "client.createChangeStream (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "client_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, { "description": "client.clientBulkWrite retries using operation loop", "runOnRequirements": [ @@ -344,8 +574,8 @@ } }, { - "object": "client", "name": "clientBulkWrite", + "object": "client", "arguments": { "models": [ { @@ -409,6 +639,77 @@ } ] }, + { + "description": "client.clientBulkWrite (write) does not retry if retryWrites=false", + "runOnRequirements": [ + { + "minServerVersion": "8.0" + } + ], + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "bulkWrite" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "clientBulkWrite", + "object": "client_retryWrites_false", + "arguments": { + "models": [ + { + "insertOne": { + "namespace": "retryable-writes-tests.coll", + "document": { + "_id": 8, + "x": 88 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + } + ] + } + ] + }, { "description": "database.aggregate retries using operation loop", "operations": [ @@ -436,8 +737,8 @@ } }, { - "object": "database", "name": "aggregate", + "object": "database", "arguments": { "pipeline": [ { @@ -499,7 +800,7 @@ ] }, { - "description": "database.listCollections retries using operation loop", + "description": "database.aggregate (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -509,11 +810,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "listCollections" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -525,55 +826,36 @@ } }, { - "object": "database", - "name": "listCollections", + "name": "aggregate", + "object": "database_retryReads_false", "arguments": { - "filter": {} + "pipeline": [ + { + "$listLocalSessions": {} + }, + { + "$limit": 1 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "listCollections" - } - }, - { - "commandFailedEvent": { - "commandName": "listCollections" - } - }, - { - "commandStartedEvent": { - "commandName": "listCollections" - } - }, - { - "commandFailedEvent": { - "commandName": "listCollections" - } - }, - { - "commandStartedEvent": { - "commandName": "listCollections" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "listCollections" - } - }, - { - "commandStartedEvent": { - "commandName": "listCollections" - } - }, - { - "commandSucceededEvent": { - "commandName": "listCollections" + "commandName": "aggregate" } } ] @@ -581,7 +863,7 @@ ] }, { - "description": "database.listCollectionNames retries using operation loop", + "description": "database.listCollections retries using operation loop", "operations": [ { "name": "failPoint", @@ -607,8 +889,8 @@ } }, { + "name": "listCollections", "object": "database", - "name": "listCollectionNames", "arguments": { "filter": {} } @@ -663,7 +945,7 @@ ] }, { - "description": "database.runCommand retries using operation loop", + "description": "database.listCollections (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -673,11 +955,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "ping" + "listCollections" ], "errorLabels": [ "RetryableError", @@ -689,58 +971,29 @@ } }, { - "object": "database", - "name": "runCommand", + "name": "listCollections", + "object": "database_retryReads_false", "arguments": { - "command": { - "ping": 1 - }, - "commandName": "ping" + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "ping" - } - }, - { - "commandFailedEvent": { - "commandName": "ping" - } - }, - { - "commandStartedEvent": { - "commandName": "ping" - } - }, - { - "commandFailedEvent": { - "commandName": "ping" - } - }, - { - "commandStartedEvent": { - "commandName": "ping" + "commandName": "listCollections" } }, { "commandFailedEvent": { - "commandName": "ping" - } - }, - { - "commandStartedEvent": { - "commandName": "ping" - } - }, - { - "commandSucceededEvent": { - "commandName": "ping" + "commandName": "listCollections" } } ] @@ -748,7 +1001,7 @@ ] }, { - "description": "database.createChangeStream retries using operation loop", + "description": "database.listCollectionNames retries using operation loop", "operations": [ { "name": "failPoint", @@ -762,7 +1015,7 @@ }, "data": { "failCommands": [ - "aggregate" + "listCollections" ], "errorLabels": [ "RetryableError", @@ -774,10 +1027,10 @@ } }, { + "name": "listCollectionNames", "object": "database", - "name": "createChangeStream", "arguments": { - "pipeline": [] + "filter": {} } } ], @@ -787,42 +1040,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandFailedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandFailedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandFailedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandSucceededEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } } ] @@ -830,7 +1083,7 @@ ] }, { - "description": "collection.aggregate retries using operation loop", + "description": "database.listCollectionNames (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -840,11 +1093,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "aggregate" + "listCollections" ], "errorLabels": [ "RetryableError", @@ -856,55 +1109,29 @@ } }, { - "object": "collection", - "name": "aggregate", + "name": "listCollectionNames", + "object": "database_retryReads_false", "arguments": { - "pipeline": [] + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "aggregate" - } - }, - { - "commandFailedEvent": { - "commandName": "aggregate" - } - }, - { - "commandStartedEvent": { - "commandName": "aggregate" - } - }, - { - "commandFailedEvent": { - "commandName": "aggregate" - } - }, - { - "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } }, { "commandFailedEvent": { - "commandName": "aggregate" - } - }, - { - "commandStartedEvent": { - "commandName": "aggregate" - } - }, - { - "commandSucceededEvent": { - "commandName": "aggregate" + "commandName": "listCollections" } } ] @@ -912,7 +1139,7 @@ ] }, { - "description": "collection.countDocuments retries using operation loop", + "description": "database.runCommand retries using operation loop", "operations": [ { "name": "failPoint", @@ -926,7 +1153,7 @@ }, "data": { "failCommands": [ - "aggregate" + "ping" ], "errorLabels": [ "RetryableError", @@ -938,10 +1165,13 @@ } }, { - "object": "collection", - "name": "countDocuments", + "name": "runCommand", + "object": "database", "arguments": { - "filter": {} + "command": { + "ping": 1 + }, + "commandName": "ping" } } ], @@ -951,42 +1181,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandFailedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandFailedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandFailedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandStartedEvent": { - "commandName": "aggregate" + "commandName": "ping" } }, { "commandSucceededEvent": { - "commandName": "aggregate" + "commandName": "ping" } } ] @@ -994,7 +1224,7 @@ ] }, { - "description": "collection.estimatedDocumentCount retries using operation loop", + "description": "database.runCommand (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -1004,11 +1234,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "count" + "ping" ], "errorLabels": [ "RetryableError", @@ -1020,52 +1250,32 @@ } }, { - "object": "collection", - "name": "estimatedDocumentCount" + "name": "runCommand", + "object": "database_retryReads_false", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + }, + "expectError": { + "isError": true, + "isClientError": false + } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "count" - } - }, - { - "commandFailedEvent": { - "commandName": "count" - } - }, - { - "commandStartedEvent": { - "commandName": "count" - } - }, - { - "commandFailedEvent": { - "commandName": "count" - } - }, - { - "commandStartedEvent": { - "commandName": "count" + "commandName": "ping" } }, { "commandFailedEvent": { - "commandName": "count" - } - }, - { - "commandStartedEvent": { - "commandName": "count" - } - }, - { - "commandSucceededEvent": { - "commandName": "count" + "commandName": "ping" } } ] @@ -1073,7 +1283,7 @@ ] }, { - "description": "collection.distinct retries using operation loop", + "description": "database.runCommand (write) does not retry if retryWrites=false", "operations": [ { "name": "failPoint", @@ -1083,11 +1293,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "distinct" + "ping" ], "errorLabels": [ "RetryableError", @@ -1099,56 +1309,32 @@ } }, { - "object": "collection", - "name": "distinct", + "name": "runCommand", + "object": "database_retryWrites_false", "arguments": { - "fieldName": "x", - "filter": {} + "command": { + "ping": 1 + }, + "commandName": "ping" + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryWrites_false", "events": [ { "commandStartedEvent": { - "commandName": "distinct" - } - }, - { - "commandFailedEvent": { - "commandName": "distinct" - } - }, - { - "commandStartedEvent": { - "commandName": "distinct" - } - }, - { - "commandFailedEvent": { - "commandName": "distinct" - } - }, - { - "commandStartedEvent": { - "commandName": "distinct" + "commandName": "ping" } }, { "commandFailedEvent": { - "commandName": "distinct" - } - }, - { - "commandStartedEvent": { - "commandName": "distinct" - } - }, - { - "commandSucceededEvent": { - "commandName": "distinct" + "commandName": "ping" } } ] @@ -1156,7 +1342,7 @@ ] }, { - "description": "collection.find retries using operation loop", + "description": "database.createChangeStream retries using operation loop", "operations": [ { "name": "failPoint", @@ -1170,7 +1356,7 @@ }, "data": { "failCommands": [ - "find" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -1182,10 +1368,10 @@ } }, { - "object": "collection", - "name": "find", + "name": "createChangeStream", + "object": "database", "arguments": { - "filter": {} + "pipeline": [] } } ], @@ -1195,42 +1381,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandSucceededEvent": { - "commandName": "find" + "commandName": "aggregate" } } ] @@ -1238,7 +1424,7 @@ ] }, { - "description": "collection.findOne retries using operation loop", + "description": "database.createChangeStream (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -1248,11 +1434,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "find" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -1264,55 +1450,29 @@ } }, { - "object": "collection", - "name": "findOne", + "name": "createChangeStream", + "object": "database_retryReads_false", "arguments": { - "filter": {} + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "find" - } - }, - { - "commandFailedEvent": { - "commandName": "find" - } - }, - { - "commandStartedEvent": { - "commandName": "find" - } - }, - { - "commandFailedEvent": { - "commandName": "find" - } - }, - { - "commandStartedEvent": { - "commandName": "find" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "find" - } - }, - { - "commandStartedEvent": { - "commandName": "find" - } - }, - { - "commandSucceededEvent": { - "commandName": "find" + "commandName": "aggregate" } } ] @@ -1320,7 +1480,7 @@ ] }, { - "description": "collection.listIndexes retries using operation loop", + "description": "collection.aggregate retries using operation loop", "operations": [ { "name": "failPoint", @@ -1334,7 +1494,7 @@ }, "data": { "failCommands": [ - "listIndexes" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -1346,8 +1506,11 @@ } }, { + "name": "aggregate", "object": "collection", - "name": "listIndexes" + "arguments": { + "pipeline": [] + } } ], "expectEvents": [ @@ -1356,42 +1519,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandSucceededEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } } ] @@ -1399,7 +1562,7 @@ ] }, { - "description": "collection.listIndexNames retries using operation loop", + "description": "collection.aggregate (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -1409,11 +1572,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "listIndexes" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -1425,52 +1588,29 @@ } }, { - "object": "collection", - "name": "listIndexNames" + "name": "aggregate", + "object": "collection_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "listIndexes" - } - }, - { - "commandFailedEvent": { - "commandName": "listIndexes" - } - }, - { - "commandStartedEvent": { - "commandName": "listIndexes" - } - }, - { - "commandFailedEvent": { - "commandName": "listIndexes" - } - }, - { - "commandStartedEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "listIndexes" - } - }, - { - "commandStartedEvent": { - "commandName": "listIndexes" - } - }, - { - "commandSucceededEvent": { - "commandName": "listIndexes" + "commandName": "aggregate" } } ] @@ -1478,7 +1618,7 @@ ] }, { - "description": "collection.createChangeStream retries using operation loop", + "description": "collection.countDocuments retries using operation loop", "operations": [ { "name": "failPoint", @@ -1504,10 +1644,10 @@ } }, { + "name": "countDocuments", "object": "collection", - "name": "createChangeStream", "arguments": { - "pipeline": [] + "filter": {} } } ], @@ -1560,7 +1700,7 @@ ] }, { - "description": "collection.insertOne retries using operation loop", + "description": "collection.countDocuments (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -1570,11 +1710,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "insert" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -1586,58 +1726,29 @@ } }, { - "object": "collection", - "name": "insertOne", + "name": "countDocuments", + "object": "collection_retryReads_false", "arguments": { - "document": { - "_id": 2, - "x": 22 - } + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "insert" - } - }, - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandStartedEvent": { - "commandName": "insert" - } - }, - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandStartedEvent": { - "commandName": "insert" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandStartedEvent": { - "commandName": "insert" - } - }, - { - "commandSucceededEvent": { - "commandName": "insert" + "commandName": "aggregate" } } ] @@ -1645,7 +1756,7 @@ ] }, { - "description": "collection.insertMany retries using operation loop", + "description": "collection.estimatedDocumentCount retries using operation loop", "operations": [ { "name": "failPoint", @@ -1659,7 +1770,7 @@ }, "data": { "failCommands": [ - "insert" + "count" ], "errorLabels": [ "RetryableError", @@ -1671,16 +1782,8 @@ } }, { - "object": "collection", - "name": "insertMany", - "arguments": { - "documents": [ - { - "_id": 2, - "x": 22 - } - ] - } + "name": "estimatedDocumentCount", + "object": "collection" } ], "expectEvents": [ @@ -1689,42 +1792,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandFailedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandStartedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandFailedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandStartedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandFailedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandStartedEvent": { - "commandName": "insert" + "commandName": "count" } }, { "commandSucceededEvent": { - "commandName": "insert" + "commandName": "count" } } ] @@ -1732,7 +1835,7 @@ ] }, { - "description": "collection.deleteOne retries using operation loop", + "description": "collection.estimatedDocumentCount (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -1742,11 +1845,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "delete" + "count" ], "errorLabels": [ "RetryableError", @@ -1758,55 +1861,26 @@ } }, { - "object": "collection", - "name": "deleteOne", - "arguments": { - "filter": {} + "name": "estimatedDocumentCount", + "object": "collection_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "delete" - } - }, - { - "commandFailedEvent": { - "commandName": "delete" - } - }, - { - "commandStartedEvent": { - "commandName": "delete" - } - }, - { - "commandFailedEvent": { - "commandName": "delete" - } - }, - { - "commandStartedEvent": { - "commandName": "delete" + "commandName": "count" } }, { "commandFailedEvent": { - "commandName": "delete" - } - }, - { - "commandStartedEvent": { - "commandName": "delete" - } - }, - { - "commandSucceededEvent": { - "commandName": "delete" + "commandName": "count" } } ] @@ -1814,7 +1888,7 @@ ] }, { - "description": "collection.deleteMany retries using operation loop", + "description": "collection.distinct retries using operation loop", "operations": [ { "name": "failPoint", @@ -1828,7 +1902,7 @@ }, "data": { "failCommands": [ - "delete" + "distinct" ], "errorLabels": [ "RetryableError", @@ -1840,9 +1914,10 @@ } }, { + "name": "distinct", "object": "collection", - "name": "deleteMany", "arguments": { + "fieldName": "x", "filter": {} } } @@ -1853,42 +1928,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandFailedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandStartedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandFailedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandStartedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandFailedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandStartedEvent": { - "commandName": "delete" + "commandName": "distinct" } }, { "commandSucceededEvent": { - "commandName": "delete" + "commandName": "distinct" } } ] @@ -1896,7 +1971,7 @@ ] }, { - "description": "collection.replaceOne retries using operation loop", + "description": "collection.distinct (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -1906,11 +1981,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "update" + "distinct" ], "errorLabels": [ "RetryableError", @@ -1922,58 +1997,30 @@ } }, { - "object": "collection", - "name": "replaceOne", + "name": "distinct", + "object": "collection_retryReads_false", "arguments": { - "filter": {}, - "replacement": { - "x": 22 - } + "fieldName": "x", + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "update" - } - }, - { - "commandFailedEvent": { - "commandName": "update" - } - }, - { - "commandStartedEvent": { - "commandName": "update" - } - }, - { - "commandFailedEvent": { - "commandName": "update" - } - }, - { - "commandStartedEvent": { - "commandName": "update" + "commandName": "distinct" } }, { "commandFailedEvent": { - "commandName": "update" - } - }, - { - "commandStartedEvent": { - "commandName": "update" - } - }, - { - "commandSucceededEvent": { - "commandName": "update" + "commandName": "distinct" } } ] @@ -1981,7 +2028,7 @@ ] }, { - "description": "collection.updateOne retries using operation loop", + "description": "collection.find retries using operation loop", "operations": [ { "name": "failPoint", @@ -1995,7 +2042,7 @@ }, "data": { "failCommands": [ - "update" + "find" ], "errorLabels": [ "RetryableError", @@ -2007,15 +2054,10 @@ } }, { + "name": "find", "object": "collection", - "name": "updateOne", "arguments": { - "filter": {}, - "update": { - "$set": { - "x": 22 - } - } + "filter": {} } } ], @@ -2025,42 +2067,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandStartedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandStartedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandStartedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandSucceededEvent": { - "commandName": "update" + "commandName": "find" } } ] @@ -2068,7 +2110,7 @@ ] }, { - "description": "collection.updateMany retries using operation loop", + "description": "collection.find (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -2078,11 +2120,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "update" + "find" ], "errorLabels": [ "RetryableError", @@ -2094,60 +2136,29 @@ } }, { - "object": "collection", - "name": "updateMany", + "name": "find", + "object": "collection_retryReads_false", "arguments": { - "filter": {}, - "update": { - "$set": { - "x": 22 - } - } + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "update" - } - }, - { - "commandFailedEvent": { - "commandName": "update" - } - }, - { - "commandStartedEvent": { - "commandName": "update" - } - }, - { - "commandFailedEvent": { - "commandName": "update" - } - }, - { - "commandStartedEvent": { - "commandName": "update" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "update" - } - }, - { - "commandStartedEvent": { - "commandName": "update" - } - }, - { - "commandSucceededEvent": { - "commandName": "update" + "commandName": "find" } } ] @@ -2155,7 +2166,7 @@ ] }, { - "description": "collection.findOneAndDelete retries using operation loop", + "description": "collection.findOne retries using operation loop", "operations": [ { "name": "failPoint", @@ -2169,7 +2180,7 @@ }, "data": { "failCommands": [ - "findAndModify" + "find" ], "errorLabels": [ "RetryableError", @@ -2181,8 +2192,8 @@ } }, { + "name": "findOne", "object": "collection", - "name": "findOneAndDelete", "arguments": { "filter": {} } @@ -2194,42 +2205,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandSucceededEvent": { - "commandName": "findAndModify" + "commandName": "find" } } ] @@ -2237,7 +2248,7 @@ ] }, { - "description": "collection.findOneAndReplace retries using operation loop", + "description": "collection.findOne (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -2247,11 +2258,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "findAndModify" + "find" ], "errorLabels": [ "RetryableError", @@ -2263,58 +2274,29 @@ } }, { - "object": "collection", - "name": "findOneAndReplace", + "name": "findOne", + "object": "collection_retryReads_false", "arguments": { - "filter": {}, - "replacement": { - "x": 22 - } + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "findAndModify" - } - }, - { - "commandFailedEvent": { - "commandName": "findAndModify" - } - }, - { - "commandStartedEvent": { - "commandName": "findAndModify" - } - }, - { - "commandFailedEvent": { - "commandName": "findAndModify" - } - }, - { - "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "find" } }, { "commandFailedEvent": { - "commandName": "findAndModify" - } - }, - { - "commandStartedEvent": { - "commandName": "findAndModify" - } - }, - { - "commandSucceededEvent": { - "commandName": "findAndModify" + "commandName": "find" } } ] @@ -2322,7 +2304,7 @@ ] }, { - "description": "collection.findOneAndUpdate retries using operation loop", + "description": "collection.listIndexes retries using operation loop", "operations": [ { "name": "failPoint", @@ -2336,7 +2318,7 @@ }, "data": { "failCommands": [ - "findAndModify" + "listIndexes" ], "errorLabels": [ "RetryableError", @@ -2348,16 +2330,8 @@ } }, { - "object": "collection", - "name": "findOneAndUpdate", - "arguments": { - "filter": {}, - "update": { - "$set": { - "x": 22 - } - } - } + "name": "listIndexes", + "object": "collection" } ], "expectEvents": [ @@ -2366,42 +2340,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandStartedEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } }, { "commandSucceededEvent": { - "commandName": "findAndModify" + "commandName": "listIndexes" } } ] @@ -2409,7 +2383,7 @@ ] }, { - "description": "collection.bulkWrite retries using operation loop", + "description": "collection.listIndexes (read) does not retry if retryReads=false", "operations": [ { "name": "failPoint", @@ -2419,11 +2393,11 @@ "failPoint": { "configureFailPoint": "failCommand", "mode": { - "times": 3 + "times": 1 }, "data": { "failCommands": [ - "insert" + "listIndexes" ], "errorLabels": [ "RetryableError", @@ -2435,64 +2409,26 @@ } }, { - "object": "collection", - "name": "bulkWrite", - "arguments": { - "requests": [ - { - "insertOne": { - "document": { - "_id": 2, - "x": 22 - } - } - } - ] + "name": "listIndexes", + "object": "collection_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false } } ], "expectEvents": [ { - "client": "client", + "client": "client_retryReads_false", "events": [ { "commandStartedEvent": { - "commandName": "insert" - } - }, - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandStartedEvent": { - "commandName": "insert" - } - }, - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandStartedEvent": { - "commandName": "insert" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandStartedEvent": { - "commandName": "insert" - } - }, - { - "commandSucceededEvent": { - "commandName": "insert" + "commandName": "listIndexes" } } ] @@ -2500,7 +2436,7 @@ ] }, { - "description": "collection.createIndex retries using operation loop", + "description": "collection.listIndexNames retries using operation loop", "operations": [ { "name": "failPoint", @@ -2514,7 +2450,7 @@ }, "data": { "failCommands": [ - "createIndexes" + "listIndexes" ], "errorLabels": [ "RetryableError", @@ -2526,14 +2462,8 @@ } }, { - "object": "collection", - "name": "createIndex", - "arguments": { - "keys": { - "x": 11 - }, - "name": "x_11" - } + "name": "listIndexNames", + "object": "collection" } ], "expectEvents": [ @@ -2542,42 +2472,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandStartedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandStartedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandFailedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandStartedEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } }, { "commandSucceededEvent": { - "commandName": "createIndexes" + "commandName": "listIndexes" } } ] @@ -2585,18 +2515,61 @@ ] }, { - "description": "collection.dropIndex retries using operation loop", + "description": "collection.listIndexNames (read) does not retry if retryReads=false", "operations": [ { - "object": "retryable-writes-tests", - "name": "createIndex", + "name": "failPoint", + "object": "testRunner", "arguments": { - "keys": { - "x": 11 - }, - "name": "x_11" + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } } }, + { + "name": "listIndexNames", + "object": "collection_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createChangeStream retries using operation loop", + "operations": [ { "name": "failPoint", "object": "testRunner", @@ -2609,7 +2582,7 @@ }, "data": { "failCommands": [ - "dropIndexes" + "aggregate" ], "errorLabels": [ "RetryableError", @@ -2621,10 +2594,10 @@ } }, { + "name": "createChangeStream", "object": "collection", - "name": "dropIndex", "arguments": { - "name": "x_11" + "pipeline": [] } } ], @@ -2634,42 +2607,42 @@ "events": [ { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandFailedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } }, { "commandSucceededEvent": { - "commandName": "dropIndexes" + "commandName": "aggregate" } } ] @@ -2677,7 +2650,63 @@ ] }, { - "description": "collection.dropIndexes retries using operation loop", + "description": "collection.createChangeStream (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "collection_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne retries using operation loop", "operations": [ { "name": "failPoint", @@ -2691,7 +2720,7 @@ }, "data": { "failCommands": [ - "dropIndexes" + "insert" ], "errorLabels": [ "RetryableError", @@ -2703,8 +2732,14 @@ } }, { + "name": "insertOne", "object": "collection", - "name": "dropIndexes" + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + } } ], "expectEvents": [ @@ -2713,32 +2748,1922 @@ "events": [ { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "insert" } }, { "commandFailedEvent": { - "commandName": "dropIndexes" + "commandName": "insert" } }, { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "insert" } }, { "commandFailedEvent": { - "commandName": "dropIndexes" + "commandName": "insert" } }, { "commandStartedEvent": { - "commandName": "dropIndexes" + "commandName": "insert" } }, { "commandFailedEvent": { - "commandName": "dropIndexes" + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertOne", + "object": "collection_retryWrites_false", + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertMany", + "object": "collection_retryWrites_false", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteOne", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandSucceededEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteOne", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteMany", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandSucceededEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteMany", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "replaceOne", + "object": "collection", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "replaceOne", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateOne", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateOne", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateMany", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateMany", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndDelete", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndDelete", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndReplace", + "object": "collection", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndReplace", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndUpdate", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndUpdate", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "bulkWrite", + "object": "collection", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "bulkWrite", + "object": "collection_retryWrites_false", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createIndex", + "object": "collection", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createIndex", + "object": "collection_retryWrites_false", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex retries using operation loop", + "operations": [ + { + "name": "createIndex", + "object": "retryable-writes-tests", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndex", + "object": "collection", + "arguments": { + "name": "x_11" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "createIndex", + "object": "retryable-writes-tests", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndex", + "object": "collection_retryWrites_false", + "arguments": { + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndexes retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndexes", + "object": "collection" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" } }, { @@ -2754,6 +4679,205 @@ ] } ] + }, + { + "description": "collection.dropIndexes (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndexes", + "object": "collection_retryWrites_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection", + "arguments": { + "pipeline": [ + { + "$out": "output" + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection_retryWrites_false", + "arguments": { + "pipeline": [ + { + "$out": "output" + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] } ] } diff --git a/test/client-backpressure/backpressure-retry-max-attempts.json b/test/client-backpressure/backpressure-retry-max-attempts.json index 1de8cb38d4..1e6f46f076 100644 --- a/test/client-backpressure/backpressure-retry-max-attempts.json +++ b/test/client-backpressure/backpressure-retry-max-attempts.json @@ -20,6 +20,9 @@ "commandStartedEvent", "commandSucceededEvent", "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" ] } }, @@ -89,8 +92,8 @@ } }, { - "object": "client", "name": "listDatabases", + "object": "client", "arguments": { "filter": {} }, @@ -193,8 +196,8 @@ } }, { - "object": "client", "name": "listDatabaseNames", + "object": "client", "expectError": { "isError": true, "isClientError": false @@ -294,8 +297,8 @@ } }, { - "object": "client", "name": "createChangeStream", + "object": "client", "arguments": { "pipeline": [] }, @@ -403,8 +406,8 @@ } }, { - "object": "client", "name": "clientBulkWrite", + "object": "client", "arguments": { "models": [ { @@ -517,8 +520,8 @@ } }, { - "object": "database", "name": "aggregate", + "object": "database", "arguments": { "pipeline": [ { @@ -628,8 +631,8 @@ } }, { - "object": "database", "name": "listCollections", + "object": "database", "arguments": { "filter": {} }, @@ -732,8 +735,8 @@ } }, { - "object": "database", "name": "listCollectionNames", + "object": "database", "arguments": { "filter": {} }, @@ -836,8 +839,8 @@ } }, { - "object": "database", "name": "runCommand", + "object": "database", "arguments": { "command": { "ping": 1 @@ -943,8 +946,8 @@ } }, { - "object": "database", "name": "createChangeStream", + "object": "database", "arguments": { "pipeline": [] }, @@ -1047,8 +1050,8 @@ } }, { - "object": "collection", "name": "aggregate", + "object": "collection", "arguments": { "pipeline": [] }, @@ -1151,8 +1154,8 @@ } }, { - "object": "collection", "name": "countDocuments", + "object": "collection", "arguments": { "filter": {} }, @@ -1255,8 +1258,8 @@ } }, { - "object": "collection", "name": "estimatedDocumentCount", + "object": "collection", "expectError": { "isError": true, "isClientError": false @@ -1356,8 +1359,8 @@ } }, { - "object": "collection", "name": "distinct", + "object": "collection", "arguments": { "fieldName": "x", "filter": {} @@ -1461,8 +1464,8 @@ } }, { - "object": "collection", "name": "find", + "object": "collection", "arguments": { "filter": {} }, @@ -1565,8 +1568,8 @@ } }, { - "object": "collection", "name": "findOne", + "object": "collection", "arguments": { "filter": {} }, @@ -1669,8 +1672,8 @@ } }, { - "object": "collection", "name": "listIndexes", + "object": "collection", "expectError": { "isError": true, "isClientError": false @@ -1770,8 +1773,8 @@ } }, { - "object": "collection", "name": "listIndexNames", + "object": "collection", "expectError": { "isError": true, "isClientError": false @@ -1871,8 +1874,8 @@ } }, { - "object": "collection", "name": "createChangeStream", + "object": "collection", "arguments": { "pipeline": [] }, @@ -1975,8 +1978,8 @@ } }, { - "object": "collection", "name": "insertOne", + "object": "collection", "arguments": { "document": { "_id": 2, @@ -2082,8 +2085,8 @@ } }, { - "object": "collection", "name": "insertMany", + "object": "collection", "arguments": { "documents": [ { @@ -2191,8 +2194,8 @@ } }, { - "object": "collection", "name": "deleteOne", + "object": "collection", "arguments": { "filter": {} }, @@ -2295,8 +2298,8 @@ } }, { - "object": "collection", "name": "deleteMany", + "object": "collection", "arguments": { "filter": {} }, @@ -2399,8 +2402,8 @@ } }, { - "object": "collection", "name": "replaceOne", + "object": "collection", "arguments": { "filter": {}, "replacement": { @@ -2506,8 +2509,8 @@ } }, { - "object": "collection", "name": "updateOne", + "object": "collection", "arguments": { "filter": {}, "update": { @@ -2615,8 +2618,8 @@ } }, { - "object": "collection", "name": "updateMany", + "object": "collection", "arguments": { "filter": {}, "update": { @@ -2724,8 +2727,8 @@ } }, { - "object": "collection", "name": "findOneAndDelete", + "object": "collection", "arguments": { "filter": {} }, @@ -2828,8 +2831,8 @@ } }, { - "object": "collection", "name": "findOneAndReplace", + "object": "collection", "arguments": { "filter": {}, "replacement": { @@ -2935,8 +2938,8 @@ } }, { - "object": "collection", "name": "findOneAndUpdate", + "object": "collection", "arguments": { "filter": {}, "update": { @@ -3044,8 +3047,8 @@ } }, { - "object": "collection", "name": "bulkWrite", + "object": "collection", "arguments": { "requests": [ { @@ -3157,8 +3160,8 @@ } }, { - "object": "collection", "name": "createIndex", + "object": "collection", "arguments": { "keys": { "x": 11 @@ -3264,8 +3267,8 @@ } }, { - "object": "collection", "name": "dropIndex", + "object": "collection", "arguments": { "name": "x_11" }, @@ -3368,8 +3371,8 @@ } }, { - "object": "collection", "name": "dropIndexes", + "object": "collection", "expectError": { "isError": true, "isClientError": false @@ -3443,6 +3446,114 @@ ] } ] + }, + { + "description": "collection.aggregate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection", + "arguments": { + "pipeline": [ + { + "$out": "output" + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] } ] } diff --git a/test/test_client_backpressure.py b/test/test_client_backpressure.py index f3146c9450..0a2e609413 100644 --- a/test/test_client_backpressure.py +++ b/test/test_client_backpressure.py @@ -227,7 +227,7 @@ def setUp(self) -> None: self.listener.reset() self.app_name = self.__class__.__name__.lower() self.client = self.rs_or_single_client( - event_listeners=[self.listener], retryWrites=False, appName=self.app_name + event_listeners=[self.listener], appName=self.app_name ) @patch("random.random") From a7fc68f3ee84889945d487493daf2067293c05f4 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Fri, 13 Mar 2026 10:21:59 -0400 Subject: [PATCH 14/14] SH + CP review --- pymongo/asynchronous/client_bulk.py | 1 - pymongo/synchronous/client_bulk.py | 1 - test/asynchronous/test_client_backpressure.py | 6 ++---- test/test_client_backpressure.py | 6 ++---- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index bda374e9b9..508b8e41c3 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -569,7 +569,6 @@ async def _execute_command( and "errorLabels" in error.details and isinstance(error.details["errorLabels"], list) and "RetryableError" in error.details["errorLabels"] - and "SystemOverloadedError" in error.details["errorLabels"] ) # Synthesize the full bulk result without modifying the diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index 30f32488ec..e8167bcedc 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -567,7 +567,6 @@ def _execute_command( and "errorLabels" in error.details and isinstance(error.details["errorLabels"], list) and "RetryableError" in error.details["errorLabels"] - and "SystemOverloadedError" in error.details["errorLabels"] ) # Synthesize the full bulk result without modifying the diff --git a/test/asynchronous/test_client_backpressure.py b/test/asynchronous/test_client_backpressure.py index 9e617d74e6..d79a9114dc 100644 --- a/test/asynchronous/test_client_backpressure.py +++ b/test/asynchronous/test_client_backpressure.py @@ -96,20 +96,18 @@ async def test_retry_overload_error_find(self): @async_client_context.require_failCommand_appName async def test_retry_overload_error_insert_one(self): - await self.db.t.insert_one({"x": 1}) - # Ensure command is retried on overload error. fail_many = mock_overload_error.copy() fail_many["mode"] = {"times": _MAX_RETRIES} async with self.fail_point(fail_many): - await self.db.t.find_one() + await self.db.t.insert_one({"x": 1}) # Ensure command stops retrying after _MAX_RETRIES. fail_too_many = mock_overload_error.copy() fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} async with self.fail_point(fail_too_many): with self.assertRaises(PyMongoError) as error: - await self.db.t.find_one() + await self.db.t.insert_one({"x": 1}) self.assertIn("RetryableError", str(error.exception)) self.assertIn("SystemOverloadedError", str(error.exception)) diff --git a/test/test_client_backpressure.py b/test/test_client_backpressure.py index 0a2e609413..b82846d35d 100644 --- a/test/test_client_backpressure.py +++ b/test/test_client_backpressure.py @@ -96,20 +96,18 @@ def test_retry_overload_error_find(self): @client_context.require_failCommand_appName def test_retry_overload_error_insert_one(self): - self.db.t.insert_one({"x": 1}) - # Ensure command is retried on overload error. fail_many = mock_overload_error.copy() fail_many["mode"] = {"times": _MAX_RETRIES} with self.fail_point(fail_many): - self.db.t.find_one() + self.db.t.insert_one({"x": 1}) # Ensure command stops retrying after _MAX_RETRIES. fail_too_many = mock_overload_error.copy() fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} with self.fail_point(fail_too_many): with self.assertRaises(PyMongoError) as error: - self.db.t.find_one() + self.db.t.insert_one({"x": 1}) self.assertIn("RetryableError", str(error.exception)) self.assertIn("SystemOverloadedError", str(error.exception))