From 50c10b12151651abac666f99aa47792b65aadb32 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 08:51:10 +0000 Subject: [PATCH 01/14] Adding more scenario-based tests --- .../CUSTOMER_WORKFLOW_COVERAGE_MAP.md | 83 +++++ sdk/cosmos/azure-cosmos-tests/pom.xml | 54 +++ .../com/azure/cosmos/rx/TestSuiteBase.java | 4 +- ...erWorkflowAvailabilityFaultMatrixTest.java | 194 +++++++++++ ...stomerWorkflowChangeFeedProcessorTest.java | 150 ++++++++ ...ustomerWorkflowDaoStyleOperationsTest.java | 147 ++++++++ .../CustomerWorkflowHighE2ETimeoutTest.java | 217 ++++++++++++ .../CustomerWorkflowLatestCommittedTest.java | 169 +++++++++ ...kflowPartitionLevelCircuitBreakerTest.java | 124 +++++++ .../CustomerWorkflowRequestOptionsTest.java | 157 +++++++++ .../CustomerWorkflowSessionTokenTest.java | 91 +++++ ...rWorkflowSingleMasterAvailabilityTest.java | 249 ++++++++++++++ .../CustomerWorkflowStoredProcedureTest.java | 96 ++++++ .../customer/CustomerWorkflowTestBase.java | 320 ++++++++++++++++++ .../fi-customer-workflows-testng.xml | 38 +++ .../fi-sm-customer-workflows-testng.xml | 38 +++ sdk/cosmos/live-platform-matrix.json | 20 +- 17 files changed, 2148 insertions(+), 3 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-customer-workflows-testng.xml create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-sm-customer-workflows-testng.xml diff --git a/sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md b/sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md new file mode 100644 index 000000000000..52d55708292f --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md @@ -0,0 +1,83 @@ +# Customer Workflow Coverage Map + +This file tracks the customer release-validation workflows from `e:\benchmark-tests` against the runnable Cosmos SDK tests in this module. Keep it updated as customer-derived workflows are ported, disabled, or marked duplicate. + +## Classification + +| Classification | Meaning | +|---|---| +| `gap` | Customer workflow has no equivalent SDK scenario coverage. | +| `partial` | SDK covers the primitives, but not the customer-style operation chain or diagnostics assertion. | +| `duplicate` | Existing SDK tests already cover the behavior with enough release-signal fidelity. | +| `wrapper-specific` | Assertion belongs to the customer's wrapper defaults, not an SDK contract. | +| `deferred` | Candidate workflow, but not enabled until runtime/flakiness/account-shape trade-offs are reviewed. | + +## Initial Coverage Triage + +| Customer source area | Customer workflow signal | Existing SDK references | Classification | Initial action | +|---|---|---|---|---| +| `test/CosmosDaoTest.java` | Create/read/query/upsert/delete, readAll, bulk/batch, custom serializer through a DAO wrapper | `CosmosItemTest`, `DocumentCrudTest`, `CosmosBulkTest`, `CosmosBulkAsyncTest`, `TransactionalBatchTest`, `CosmosItemSerializerTest` | `partial` / `wrapper-specific` | Port only customer-style operation chains with diagnostics assertions; mark DAO cache/session-map/default policy checks wrapper-specific. | +| `test/CosmosMultiFeatureTests.java` | App-style create/read/query/upsert/delete, keyword identifiers, invalid session token, no preferred region default routing | `CosmosDiagnosticsTest`, `ExcludeRegionTests`, `SessionConsistencyWithRegionScopingTests` | `partial` | Start with keyword identifier and region-routing workflows in `fi-customer-workflows`. | +| `test/CosmosDriverDynamicRequestOptionTest.java` | Dynamic operation policy changes request options per operation and validates request options through diagnostics | `OperationPoliciesTest`, `GatewayReadConsistencyStrategyE2ETest`, `CosmosLatestCommittedItemTests` | `partial` | Add customer workflow tests that combine create/read/query/readMany/upsert chains and diagnostics request-option validation. | +| `test/Latest_Committed_Tests.java` | Latest-committed with excluded regions, consistency combinations, RU and contacted-region expectations | `CosmosLatestCommittedItemTests`, `GatewayReadConsistencyStrategyE2ETest`, `ClientRetryPolicyE2ETests` | `partial` | Add focused live multi-region workflow rows; keep primitive latest-committed behavior as duplicate references. | +| `regression/direct/*.java` | Latest-committed direct-mode regression matrix for change feed, read, query, readMany, session/eventual combinations | `CosmosLatestCommittedItemTests`, change feed processor tests | `partial` | Port only variants that add multi-region workflow signal beyond existing latest-committed tests. | +| `regression/gateway/*.java` | Gateway latest-committed regression matrix | `GatewayReadConsistencyStrategyE2ETest`, `GatewayReadConsistencyStrategySpyWireTest` | `duplicate` / `partial` | Keep as duplicate unless the coverage table identifies a workflow assertion not present in gateway tests. | +| `test/CosmosHighE2ETimeoutTest.java` | E2E timeout behavior under response delay and partition migrating faults for create/query/readMany/batch | `EndToEndTimeOutValidationTests`, `EndToEndTimeOutWithAvailabilityTest`, `FaultInjectionWithAvailabilityStrategyTestsBase` | `partial` | Add one customer-style chain after request-option workflows stabilize. | +| `test/CosmosStoredProcedureTest.java` | Stored procedure create/read/update diagnostics under response delay and read-session-not-available faults | `StoredProcedureCrudTest`, `StoredProcedureQueryTest`, `StoredProcedureUpsertReplaceTest`, `CosmosSyncStoredProcTest` | `gap` / `partial` | Add targeted stored-procedure fault workflow that deploys scripts in setup. | +| `test/ChangeFeedProcessorTest.java` | CFP start/stop, latest-version handler, current state, restart, and fault-injected read feed | `IncrementalChangeFeedProcessorTest`, `FullFidelityChangeFeedProcessorTest`, `CosmosContainerChangeFeedTest` | `partial` | Add a small CFP workflow and replace fixed sleeps with polling. | +| `test/PartitionLevelCircuitBreakerTests.java` | PCLB app chain and query-plan behavior under regional faults | `PerPartitionCircuitBreakerE2ETests`, `PerPartitionAutomaticFailoverE2ETests` | `partial` | Add one PCLB-enabled workflow row after first live suite run. | +| `test/CosmosConflictResolutionTest.java` | Multi-client conflict detection and conflict query | `CosmosConflictsTest`, `ConflictTests`, `MultiMasterConflictResolutionTest` | `duplicate` / `partial` | Document existing coverage first; port only if customer ordering/diagnostics differs. | +| `test/Cosmos429test.java` | 429 and connection delay behavior in app-shaped calls | `RetryThrottleTest`, `ResourceThrottleRetryPolicyTest`, `FaultInjectionServerErrorRuleOnDirectTests`, `FaultInjectionServerErrorRuleOnGatewayTests` | `duplicate` / `partial` | Prefer parameterized FI rows; do not create a standalone clone. | +| `singlemaster/direct/*.java` | Single-write account availability strategies in direct mode | `EndToEndTimeOutWithAvailabilityTest`, `ExcludeRegionTests`, `FITests_*` | `deferred` | Document rows first; add a single-write multi-region matrix only if unique customer coverage remains. | +| `singlemaster/gateway/*.java` | Single-write account availability strategies in gateway mode | Gateway retry/fault-injection tests | `deferred` | Same as singlemaster/direct. | +| `multimaster/direct/*.java` | Multi-write direct availability strategy matrix across fault/status/operation combinations | `FaultInjectionWithAvailabilityStrategyTestsBase`, `FITests_*`, `PerPartitionAutomaticFailoverE2ETests` | `partial` | Port representative workflow matrix with TestNG data providers instead of one class per customer file. | +| `multimaster/gateway/*.java` | Multi-write gateway availability strategy matrix | `FaultInjectionServerErrorRuleOnGatewayTests`, `FaultInjectionServerErrorRuleOnGatewayV2Tests`, `FITests_*` | `partial` | Port selected gateway workflow rows after direct-mode baseline. | + +## Enabled Suite + +The initial implementation adds TestNG group `fi-customer-workflows`, Maven profile `-Pfi-customer-workflows`, and live matrix display name `FaultInjectionCustomerWorkflows`. The suite is intended to run only through the existing on-demand Cosmos live test path. + +Single-write multi-region customer workflows use TestNG group `fi-sm-customer-workflows`, Maven profile `-Pfi-sm-customer-workflows`, and live matrix display name `FaultInjectionSingleMasterCustomerWorkflows`. + +## Implemented Workflow Classes + +| Workflow class | Customer coverage areas represented | +|---|---| +| `CustomerWorkflowRequestOptionsTest` | Dynamic request options, keyword identifiers, excluded regions, create/read/query/readMany/upsert/delete diagnostics. | +| `CustomerWorkflowDaoStyleOperationsTest` | DAO-style CRUD chain, readAll, patch, transactional batch, bulk read/patch with max micro-batch sizing, and request-level serializer propagation. | +| `CustomerWorkflowLatestCommittedTest` | Latest-committed point read, query, readMany, change feed, excluded regions, diagnostics request-option propagation, regional lease-not-found fault coverage, and direct/gateway client variants. | +| `CustomerWorkflowSessionTokenTest` | ReadMany with valid and advanced user session tokens, validating read-session-not-available behavior. | +| `CustomerWorkflowStoredProcedureTest` | Stored procedure create/read/execute with script logging and metadata fault-rule coverage. | +| `CustomerWorkflowChangeFeedProcessorTest` | Latest-version CFP start, restart, current state/lag, and read-feed fault recovery. | +| `CustomerWorkflowAvailabilityFaultMatrixTest` | Expanded multi-master direct/gateway fault matrix for read, query, readMany, create, upsert, replace, delete, and patch operations across representative 404/408/410/429/449/500/503 families. | +| `CustomerWorkflowHighE2ETimeoutTest` | Response-delay workflow with E2E timeout and availability strategy for create, read, query, readMany, upsert, batch, patch, and partition-migrating read. | +| `CustomerWorkflowPartitionLevelCircuitBreakerTest` | PCLB-oriented point read, query-plan diagnostics/query, and patch app-chain workflow under the PCLB-enabled live matrix leg. | +| `CustomerWorkflowSingleMasterAvailabilityTest` | Single-write multi-region excluded-readable-region reads, local readable-region read faults, write faults constrained to the single writable region, and representative direct/gateway read/create fault matrices. | + +## Remaining Gap Summary + +| Remaining area | Current status after `fi-customer-workflows` | Importance of adding more | +|---|---|---| +| Exhaustive dynamic request-option matrix | Core app-style create/read/query/readMany/upsert/delete request-option propagation is covered; the exhaustive per-option matrix remains in existing SDK primitive tests. | `nice to have` / mostly duplicate. Add only if release owners want customer-style chaining for every option combination. | +| Latest-committed RU comparison variants | Point read, query, readMany, change feed, excluded regions, diagnostics propagation, and a regional lease-not-found fault are covered; strict RU comparison checks remain. | `nice to have`. RU comparisons are service-sensitive and less valuable than the diagnostics/routing checks now covered. | +| Gateway latest-committed regression variants | Direct and gateway latest-committed workflow variants are covered by `CustomerWorkflowLatestCommittedTest`; existing gateway read-consistency tests remain the primitive anchor. | `covered enough`. No further action unless strict one-class-per-customer-file parity is required. | +| Stored procedure exact fault parity | Stored procedure create/read/execute/script-log and metadata fault-rule coverage are added; exact response-delay/read-session-not-available stored-procedure fault parity is not fully represented because fault injection has no stored-procedure-specific operation type. | `addressing significant partial gap`, but may require deeper test-infra support or a carefully scoped metadata/data-plane proxy scenario. | +| CFP full customer matrix | Latest-version CFP start, restart, current state/lag, and read-feed fault recovery are covered; full-fidelity/all-versions, side-cart, and deeper lease recovery variants remain. | `nice to have`. Current workflow covers the highest-signal CFP behavior without copying the large CFP matrix. | +| Full multi-write availability matrix | Expanded direct/gateway multi-write fault rows now cover read/query/readMany/create/upsert/replace/delete/patch across representative 404/408/410/429/449/500/503 families. The only unported portion is exact one-class-per-customer-file parity and every operation/error permutation. | `runtime-heavy duplicate`. Stop here unless parity is required over runtime. | +| Single-write direct/gateway availability matrix | Dedicated single-write multi-region live leg and representative direct/gateway read/create fault matrices are added through `fi-sm-customer-workflows`; exact one-class-per-error-file parity remains. | `runtime-heavy duplicate`. Stop here unless strict customer-suite parity is required. | +| High E2E timeout extended fault variants | Response-delay E2E timeout with availability strategy now covers create/read/query/readMany/upsert/batch/patch plus partition-migrating read; deeper customer-specific timing/RU assertions remain. | `nice to have`. The main workflow gap is covered; remaining work is runtime-sensitive strict parity. | +| PCLB exact regional circuit-breaker assertions | PCLB-oriented read/query-plan diagnostics/query/patch app-chain workflow is added; exact circuit-breaker state transitions remain in existing PCLB tests. | `nice to have` for customer parity; existing SDK PCLB tests already cover the lower-level behavior. | +| 429 and connection-delay app-shaped calls | 429-style rows are now represented in multi-write and single-write matrices; connection-delay/connect-reset style network transport variants remain in existing transport/FI tests and selected timeout workflows. | `runtime-heavy duplicate`. Add only if network-fault parity is explicitly required. | +| Conflict resolution and conflict query | Not added to the new workflow suite; existing conflict tests cover core SDK behavior. | `nice to have` / duplicate. Add only if customer multi-client ordering or diagnostics are materially different. | +| Basic multi-write behavior and feature-validation classes | Covered indirectly by CRUD/request-options/latest-committed/session-token workflows and existing multi-master tests. | `completely duplicate` for this suite unless a specific uncovered assertion is identified. | +| Custom serializer standalone tests | Request-level serializer propagation is represented in the DAO-style workflow; existing serializer tests cover normal and exception behavior. | `completely duplicate`. Keep deeper standalone serializer tests out of this workflow suite. | +| Customer wrapper defaults, caches, DAO session maps, and configuration defaults | Not ported by design because these are not SDK contracts. | `completely useless for SDK coverage` / wrapper-specific. Keep documented only. | + +## Porting Rules + +- Use SDK-native tests in `azure-cosmos-tests`; do not copy customer-specific package dependencies. +- Do not copy hardcoded customer endpoints, account keys, database names, or container names. +- Prefer dynamic account-region discovery over hardcoded region order. +- Replace fixed sleeps with polling or retry loops. +- Preserve customer workflow shape where it adds release signal: operation chains, contacted-region diagnostics, effective consistency, effective read-consistency strategy, retry counts, and request-option propagation. +- Mark wrapper default assertions as `wrapper-specific` unless the SDK owns the behavior. diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index 680f9470e6bf..18cb8e9cd2d1 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -659,6 +659,60 @@ Licensed under the MIT License. + + + fi-customer-workflows + + fi-customer-workflows + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.5.3 + + + src/test/resources/fi-customer-workflows-testng.xml + + + true + 1 + 256 + paranoid + + + + + + + + + fi-sm-customer-workflows + + fi-sm-customer-workflows + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.5.3 + + + src/test/resources/fi-sm-customer-workflows-testng.xml + + + true + 1 + 256 + paranoid + + + + + + multi-region diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index 66b6e6314ad0..4f9f54f2fdb9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -302,7 +302,7 @@ public CosmosAsyncDatabase getDatabase(String id) { @BeforeSuite(groups = {"thinclient", "fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "emulator-vnext", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", - "circuit-breaker-read-all-read-many", "fi-multi-master", "long-emulator", "fi-thinclient-multi-region", "fi-thinclient-multi-master", "multi-region-strong", "manual-http-network-fault"}, timeOut = SUITE_SETUP_TIMEOUT) + "circuit-breaker-read-all-read-many", "fi-multi-master", "fi-customer-workflows", "fi-sm-customer-workflows", "long-emulator", "fi-thinclient-multi-region", "fi-thinclient-multi-master", "multi-region-strong", "manual-http-network-fault"}, timeOut = SUITE_SETUP_TIMEOUT) public void beforeSuite() { logger.info("beforeSuite Started"); @@ -353,7 +353,7 @@ private static DocumentCollection getInternalDocumentCollection(CosmosAsyncConta @AfterSuite(groups = {"thinclient", "fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", - "circuit-breaker-read-all-read-many", "fi-multi-master", "long-emulator", "fi-thinclient-multi-region", "fi-thinclient-multi-master", "multi-region-strong", "manual-http-network-fault"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) + "circuit-breaker-read-all-read-many", "fi-multi-master", "fi-customer-workflows", "fi-sm-customer-workflows", "long-emulator", "fi-thinclient-multi-region", "fi-thinclient-multi-master", "multi-region-strong", "manual-http-network-fault"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) public void afterSuite() { logger.info("afterSuite Started"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java new file mode 100644 index 000000000000..1d5300249d66 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java @@ -0,0 +1,194 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosReadManyRequestOptions; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowAvailabilityFaultMatrixTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithSessionConsistency") + public CustomerWorkflowAvailabilityFaultMatrixTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer availability fault workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @DataProvider(name = "availabilityFaultScenarios") + public Object[][] availabilityFaultScenarios() { + return new Object[][]{ + {"read", FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.GONE}, + {"read", FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.TIMEOUT}, + {"read", FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE}, + {"read", FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR}, + {"query", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE}, + {"query", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.GONE}, + {"query", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.TIMEOUT}, + {"query", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR}, + {"readMany", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.GONE}, + {"readMany", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE}, + {"readMany", FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.TOO_MANY_REQUEST}, + {"create", FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR}, + {"create", FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.TOO_MANY_REQUEST}, + {"create", FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.TIMEOUT}, + {"create", FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.RETRY_WITH}, + {"create", FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.PARTITION_IS_MIGRATING}, + {"upsert", FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE}, + {"upsert", FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.PARTITION_IS_MIGRATING}, + {"upsert", FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.TOO_MANY_REQUEST}, + {"replace", FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.GONE}, + {"replace", FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.TIMEOUT}, + {"replace", FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE}, + {"delete", FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE}, + {"delete", FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.GONE}, + {"delete", FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.TIMEOUT}, + {"patch", FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR}, + {"patch", FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE}, + {"patch", FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.GONE} + }; + } + + @Test(groups = {"fi-customer-workflows"}, dataProvider = "availabilityFaultScenarios", timeOut = TIMEOUT) + public void representativeDirectMultiMasterFaultWorkflow( + String operation, + FaultInjectionOperationType faultInjectionOperationType, + FaultInjectionServerErrorType errorType) { + + TestObject item = TestObject.create(); + if (!"create".equals(operation)) { + this.container.createItem(item).block(); + } + + FaultInjectionRule faultRule = configureServerErrorRule( + this.container, + faultInjectionOperationType, + errorType, + this.writableRegions.get(0), + currentFaultInjectionConnectionType(), + 1); + + try { + CosmosDiagnosticsContext diagnosticsContext = executeOperation(operation, item); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + assertThat(diagnosticsContext.getDuration()).isNotNull(); + } finally { + faultRule.disable(); + } + } + + private CosmosDiagnosticsContext executeOperation(String operation, TestObject item) { + try { + if ("read".equals(operation)) { + CosmosItemResponse response = this.container + .readItem(item.getId(), partitionKey(item), new CosmosItemRequestOptions(), TestObject.class) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } + + if ("query".equals(operation)) { + FeedResponse response = this.container + .queryItems( + String.format("SELECT * FROM c WHERE c.id = '%s'", item.getId()), + new CosmosQueryRequestOptions().setQueryName("AvailabilityFaultWorkflowQuery"), + TestObject.class) + .byPage() + .blockFirst(); + + return response.getCosmosDiagnostics().getDiagnosticsContext(); + } + + if ("readMany".equals(operation)) { + FeedResponse response = this.container + .readMany( + Collections.singletonList(new CosmosItemIdentity(partitionKey(item), item.getId())), + new CosmosReadManyRequestOptions(), + TestObject.class) + .block(); + + return response.getCosmosDiagnostics().getDiagnosticsContext(); + } + + if ("upsert".equals(operation)) { + item.setStringProp("fault-upsert-" + item.getStringProp()); + CosmosItemResponse response = this.container + .upsertItem(item, new CosmosItemRequestOptions().setContentResponseOnWriteEnabled(true)) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } + + if ("replace".equals(operation)) { + item.setStringProp("fault-replace-" + item.getStringProp()); + CosmosItemResponse response = this.container + .replaceItem(item, item.getId(), partitionKey(item), new CosmosItemRequestOptions()) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } + + if ("delete".equals(operation)) { + CosmosItemResponse response = this.container + .deleteItem(item.getId(), partitionKey(item), new CosmosItemRequestOptions()) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } + + if ("patch".equals(operation)) { + CosmosItemResponse response = this.container + .patchItem( + item.getId(), + partitionKey(item), + CosmosPatchOperations.create().set("/stringProp", "fault-patch-" + item.getStringProp()), + TestObject.class) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } + + CosmosItemResponse response = this.container + .createItem(TestObject.create(), new CosmosItemRequestOptions().setContentResponseOnWriteEnabled(true)) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } catch (CosmosException error) { + CosmosDiagnosticsContext diagnosticsContext = error.getDiagnostics().getDiagnosticsContext(); + assertThat(error.getStatusCode()).isGreaterThanOrEqualTo(HttpConstants.StatusCodes.BADREQUEST); + return diagnosticsContext; + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java new file mode 100644 index 000000000000..4a7e033a7fbf --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java @@ -0,0 +1,150 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.ChangeFeedProcessor; +import com.azure.cosmos.ChangeFeedProcessorBuilder; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.models.ChangeFeedProcessorItem; +import com.azure.cosmos.models.ChangeFeedProcessorOptions; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.fasterxml.jackson.databind.JsonNode; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowChangeFeedProcessorTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowChangeFeedProcessorTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer change feed processor workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = 2 * TIMEOUT) + public void latestVersionProcessorRestartAndReadFeedFaultWorkflow() throws InterruptedException { + CosmosAsyncContainer feedContainer = createTemporaryContainer("customer-cfp-feed", "/mypk"); + CosmosAsyncContainer leaseContainer = createTemporaryContainer("customer-cfp-lease", "/id"); + ChangeFeedProcessor processor = null; + FaultInjectionRule readFeedDelayRule = null; + + try { + Set expectedIds = Collections.newSetFromMap(new ConcurrentHashMap()); + Set receivedIds = Collections.newSetFromMap(new ConcurrentHashMap()); + CountDownLatch initialLatch = new CountDownLatch(2); + + createFeedItem(feedContainer, expectedIds, "cfp-initial-1"); + createFeedItem(feedContainer, expectedIds, "cfp-initial-2"); + + processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, receivedIds, initialLatch, "initial"); + processor.start().block(); + ChangeFeedProcessor initialProcessor = processor; + + assertThat(processor.isStarted()).isTrue(); + assertThat(initialLatch.await(30, TimeUnit.SECONDS)).isTrue(); + assertThat(receivedIds).containsAll(expectedIds); + + awaitCondition( + () -> !initialProcessor.getCurrentState().block().isEmpty(), + Duration.ofSeconds(20), + "Change feed processor did not acquire leases."); + + processor.stop().block(); + assertThat(processor.isStarted()).isFalse(); + + CountDownLatch restartLatch = new CountDownLatch(1); + TestObject restartedItem = createFeedItem(feedContainer, expectedIds, "cfp-restart"); + readFeedDelayRule = configureResponseDelayRule(feedContainer, FaultInjectionOperationType.READ_FEED_ITEM, Duration.ofMillis(100), 1); + + processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, receivedIds, restartLatch, "restart"); + processor.start().block(); + + assertThat(processor.isStarted()).isTrue(); + assertThat(restartLatch.await(30, TimeUnit.SECONDS)).isTrue(); + assertThat(receivedIds).contains(restartedItem.getId()); + assertThat(processor.getEstimatedLag().block()).isNotNull(); + } finally { + if (readFeedDelayRule != null) { + readFeedDelayRule.disable(); + } + if (processor != null && processor.isStarted()) { + processor.stop().block(); + } + deleteTemporaryContainer(feedContainer); + deleteTemporaryContainer(leaseContainer); + } + } + + private TestObject createFeedItem(CosmosAsyncContainer feedContainer, Set expectedIds, String partitionKey) { + TestObject item = TestObject.create(partitionKey + "-" + UUID.randomUUID()); + feedContainer.createItem(item).block(); + expectedIds.add(item.getId()); + return item; + } + + private ChangeFeedProcessor createLatestVersionProcessor( + CosmosAsyncContainer feedContainer, + CosmosAsyncContainer leaseContainer, + Set expectedIds, + Set receivedIds, + CountDownLatch latch, + String leasePrefix) { + + return new ChangeFeedProcessorBuilder() + .hostName("customer-workflow-" + leasePrefix + "-" + UUID.randomUUID()) + .feedContainer(feedContainer) + .leaseContainer(leaseContainer) + .handleLatestVersionChanges(items -> recordLatestVersionItems(items, expectedIds, receivedIds, latch)) + .options(new ChangeFeedProcessorOptions() + .setStartFromBeginning(true) + .setFeedPollDelay(Duration.ofMillis(500)) + .setLeaseAcquireInterval(Duration.ofSeconds(1)) + .setLeaseRenewInterval(Duration.ofSeconds(2)) + .setLeaseExpirationInterval(Duration.ofSeconds(6)) + .setMaxItemCount(10) + .setLeasePrefix("customer-" + leasePrefix)) + .buildChangeFeedProcessor(); + } + + private static void recordLatestVersionItems( + List items, + Set expectedIds, + Set receivedIds, + CountDownLatch latch) { + + for (ChangeFeedProcessorItem item : items) { + JsonNode current = item.getCurrent(); + if (current != null && current.has("id")) { + String id = current.get("id").asText(); + if (expectedIds.contains(id) && receivedIds.add(id)) { + latch.countDown(); + } + } + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java new file mode 100644 index 000000000000..bc55ec5681f8 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosItemSerializer; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosBulkExecutionOptions; +import com.azure.cosmos.models.CosmosBulkOperationResponse; +import com.azure.cosmos.models.CosmosBulkOperations; +import com.azure.cosmos.models.CosmosItemOperation; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.FeedResponse; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; +import reactor.core.publisher.Flux; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowDaoStyleOperationsTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowDaoStyleOperationsTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer DAO-style workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = TIMEOUT) + public void crudReadAllPatchBatchAndBulkWorkflow() { + List excludedRegions = excludeFirstWritableRegion(); + TestObject item = TestObject.create(); + + CosmosItemRequestOptions createOptions = new CosmosItemRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("workflow-crud-create"))) + .setExcludedRegions(excludedRegions) + .setCustomItemSerializer(CosmosItemSerializer.DEFAULT_SERIALIZER) + .setContentResponseOnWriteEnabled(true); + + CosmosItemResponse createResponse = this.container + .createItem(item, createOptions) + .block(); + + assertThat(createResponse).isNotNull(); + assertThat(createResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.CREATED); + assertKeywordIdentifier(createResponse.getDiagnostics().getDiagnosticsContext(), "workflow-crud-create"); + assertThat(getRequestOptions(createResponse.getDiagnostics().getDiagnosticsContext()).getCustomItemSerializer()) + .isSameAs(CosmosItemSerializer.DEFAULT_SERIALIZER); + assertDidNotContactExcludedRegions(createResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + + CosmosItemResponse readResponse = this.container + .readItem(item.getId(), partitionKey(item), new CosmosItemRequestOptions().setExcludedRegions(excludedRegions), TestObject.class) + .block(); + + assertThat(readResponse).isNotNull(); + assertThat(readResponse.getItem()).isEqualTo(item); + + FeedResponse readAllResponse = this.container + .readAllItems( + partitionKey(item), + new CosmosQueryRequestOptions() + .setExcludedRegions(excludedRegions) + .setCustomItemSerializer(CosmosItemSerializer.DEFAULT_SERIALIZER), + TestObject.class) + .byPage() + .blockFirst(); + + assertThat(readAllResponse).isNotNull(); + assertThat(readAllResponse.getResults()).extracting(TestObject::getId).contains(item.getId()); + assertExcludedRegions(readAllResponse.getCosmosDiagnostics().getDiagnosticsContext(), excludedRegions); + assertThat(getRequestOptions(readAllResponse.getCosmosDiagnostics().getDiagnosticsContext()).getCustomItemSerializer()) + .isSameAs(CosmosItemSerializer.DEFAULT_SERIALIZER); + + CosmosPatchOperations patchOperations = CosmosPatchOperations.create() + .set("/stringProp", "patched-" + item.getStringProp()); + + CosmosItemResponse patchResponse = this.container + .patchItem(item.getId(), partitionKey(item), patchOperations, TestObject.class) + .block(); + + assertThat(patchResponse).isNotNull(); + assertThat(patchResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.OK); + assertThat(patchResponse.getItem().getStringProp()).startsWith("patched-"); + + String batchPk = "batch-" + UUID.randomUUID(); + TestObject batchItem = TestObject.create(batchPk); + CosmosBatch batch = CosmosBatch.createCosmosBatch(partitionKey(batchItem)); + batch.createItemOperation(batchItem); + batch.readItemOperation(batchItem.getId()); + + CosmosBatchResponse batchResponse = this.container.executeCosmosBatch(batch).block(); + + assertThat(batchResponse).isNotNull(); + assertThat(batchResponse.isSuccessStatusCode()).isTrue(); + assertThat(batchResponse.size()).isEqualTo(2); + assertThat(batchResponse.getDiagnostics()).isNotNull(); + + TestObject bulkItem = TestObject.create(); + this.container.createItem(bulkItem).block(); + CosmosPatchOperations bulkPatchOperations = CosmosPatchOperations.create() + .set("/stringProp", "bulk-patched-" + bulkItem.getStringProp()); + + List bulkOperations = new ArrayList<>(); + bulkOperations.add(CosmosBulkOperations.getReadItemOperation(bulkItem.getId(), partitionKey(bulkItem))); + bulkOperations.add(CosmosBulkOperations.getPatchItemOperation(bulkItem.getId(), partitionKey(bulkItem), bulkPatchOperations)); + + CosmosBulkExecutionOptions bulkExecutionOptions = new CosmosBulkExecutionOptions() + .setMaxMicroBatchSize(2) + .setExcludedRegions(excludedRegions) + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("workflow-bulk"))); + + List> bulkResponses = this.container + .executeBulkOperations(Flux.fromIterable(bulkOperations), bulkExecutionOptions) + .collectList() + .block(); + + assertThat(bulkResponses).isNotNull(); + assertThat(bulkResponses).hasSize(2); + assertThat(bulkResponses).allSatisfy(response -> { + assertThat(response.getException()).isNull(); + assertThat(response.getResponse().getStatusCode()).isIn(HttpConstants.StatusCodes.OK, HttpConstants.StatusCodes.CREATED); + assertThat(response.getResponse().getCosmosDiagnostics()).isNotNull(); + }); + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java new file mode 100644 index 000000000000..5624fafdd732 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java @@ -0,0 +1,217 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchRequestOptions; +import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchItemRequestOptions; +import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosReadManyRequestOptions; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.time.Duration; +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowHighE2ETimeoutTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowHighE2ETimeoutTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer high E2E timeout workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @DataProvider(name = "timeoutWorkflowOperations") + public Object[][] timeoutWorkflowOperations() { + return new Object[][]{ + {"create", FaultInjectionOperationType.CREATE_ITEM}, + {"read", FaultInjectionOperationType.READ_ITEM}, + {"query", FaultInjectionOperationType.QUERY_ITEM}, + {"readMany", FaultInjectionOperationType.QUERY_ITEM}, + {"upsert", FaultInjectionOperationType.UPSERT_ITEM}, + {"batch", FaultInjectionOperationType.BATCH_ITEM}, + {"patch", FaultInjectionOperationType.PATCH_ITEM} + }; + } + + @Test(groups = {"fi-customer-workflows"}, dataProvider = "timeoutWorkflowOperations", timeOut = 2 * TIMEOUT) + public void responseDelayWithAvailabilityStrategyWorkflow(String operation, FaultInjectionOperationType faultInjectionOperationType) { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(4)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build(); + + FaultInjectionRule delayRule = configureResponseDelayRule(this.container, faultInjectionOperationType, Duration.ofMillis(1500), 1); + + try { + CosmosDiagnosticsContext diagnosticsContext = executeWithE2EPolicy(operation, item, e2ePolicy); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getDuration()).isLessThan(Duration.ofSeconds(10)); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + } finally { + delayRule.disable(); + } + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = 2 * TIMEOUT) + public void partitionMigratingFaultWithE2EPolicyWorkflow() { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(4)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build(); + + FaultInjectionRule migratingRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.READ_ITEM, + FaultInjectionServerErrorType.PARTITION_IS_MIGRATING, + 1); + + try { + CosmosDiagnosticsContext diagnosticsContext = executeWithE2EPolicy("read", item, e2ePolicy); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getDuration()).isLessThan(Duration.ofSeconds(10)); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + } finally { + migratingRule.disable(); + } + } + + private CosmosDiagnosticsContext executeWithE2EPolicy( + String operation, + TestObject item, + CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy) { + + try { + if ("create".equals(operation)) { + CosmosItemRequestOptions options = new CosmosItemRequestOptions() + .setContentResponseOnWriteEnabled(true) + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + return this.container + .createItem(TestObject.create(), options) + .block() + .getDiagnostics() + .getDiagnosticsContext(); + } + + if ("read".equals(operation)) { + CosmosItemRequestOptions options = new CosmosItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + return this.container + .readItem(item.getId(), partitionKey(item), options, TestObject.class) + .block() + .getDiagnostics() + .getDiagnosticsContext(); + } + + if ("query".equals(operation)) { + CosmosQueryRequestOptions options = new CosmosQueryRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy) + .setQueryName("HighE2ETimeoutWorkflowQuery"); + + FeedResponse response = this.container + .queryItems(String.format("SELECT * FROM c WHERE c.id = '%s'", item.getId()), options, TestObject.class) + .byPage() + .blockFirst(); + + return response.getCosmosDiagnostics().getDiagnosticsContext(); + } + + if ("readMany".equals(operation)) { + CosmosReadManyRequestOptions options = new CosmosReadManyRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + FeedResponse response = this.container + .readMany(Collections.singletonList(new CosmosItemIdentity(partitionKey(item), item.getId())), options, TestObject.class) + .block(); + + return response.getCosmosDiagnostics().getDiagnosticsContext(); + } + + if ("upsert".equals(operation)) { + item.setStringProp("timeout-upsert-" + item.getStringProp()); + CosmosItemRequestOptions options = new CosmosItemRequestOptions() + .setContentResponseOnWriteEnabled(true) + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + return this.container + .upsertItem(item, options) + .block() + .getDiagnostics() + .getDiagnosticsContext(); + } + + if ("batch".equals(operation)) { + TestObject batchItem = TestObject.create("timeout-batch"); + CosmosBatch batch = CosmosBatch.createCosmosBatch(partitionKey(batchItem)); + batch.createItemOperation(batchItem); + batch.readItemOperation(batchItem.getId()); + + CosmosBatchRequestOptions batchOptions = new CosmosBatchRequestOptions(); + ImplementationBridgeHelpers.CosmosBatchRequestOptionsHelper + .getCosmosBatchRequestOptionsAccessor() + .setEndToEndOperationLatencyPolicyConfig(batchOptions, e2ePolicy); + + CosmosBatchResponse response = this.container.executeCosmosBatch(batch, batchOptions).block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } + + CosmosPatchItemRequestOptions options = new CosmosPatchItemRequestOptions(); + options.setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + CosmosItemResponse response = this.container + .patchItem( + item.getId(), + partitionKey(item), + CosmosPatchOperations.create().set("/stringProp", "timeout-patched-" + item.getStringProp()), + options, + TestObject.class) + .block(); + + return response.getDiagnostics().getDiagnosticsContext(); + } catch (CosmosException error) { + return error.getDiagnostics().getDiagnosticsContext(); + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java new file mode 100644 index 000000000000..cd1b9cb7a05f --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java @@ -0,0 +1,169 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.ReadConsistencyStrategy; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosReadManyRequestOptions; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowLatestCommittedTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithSessionConsistency") + public CustomerWorkflowLatestCommittedTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer latest-committed workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = TIMEOUT) + public void latestCommittedAndExcludedRegionsFlowAcrossReadOperations() { + List excludedRegions = excludeFirstWritableRegion(); + TestObject item = TestObject.create(); + + CosmosItemResponse createResponse = this.container + .createItem(item, new CosmosItemRequestOptions().setExcludedRegions(excludedRegions)) + .block(); + + assertThat(createResponse).isNotNull(); + CosmosDiagnosticsContext createDiagnostics = createResponse.getDiagnostics().getDiagnosticsContext(); + assertThat(createResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.CREATED); + assertThat(createDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.DEFAULT); + assertExcludedRegions(createDiagnostics, excludedRegions); + assertDidNotContactExcludedRegions(createDiagnostics, excludedRegions); + + CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() + .setExcludedRegions(excludedRegions) + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("latest-committed-read"))) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); + + CosmosItemResponse readResponse = this.container + .readItem(item.getId(), partitionKey(item), readOptions, TestObject.class) + .block(); + + assertThat(readResponse).isNotNull(); + CosmosDiagnosticsContext readDiagnostics = readResponse.getDiagnostics().getDiagnosticsContext(); + assertThat(readResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.OK); + assertThat(readDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertThat(readDiagnostics.getTotalRequestCharge()).isGreaterThan(0); + assertKeywordIdentifier(readDiagnostics, "latest-committed-read"); + assertExcludedRegions(readDiagnostics, excludedRegions); + assertDidNotContactExcludedRegions(readDiagnostics, excludedRegions); + + CosmosQueryRequestOptions queryOptions = new CosmosQueryRequestOptions() + .setExcludedRegions(excludedRegions) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED) + .setQueryName("LatestCommittedCustomerWorkflowQuery"); + + FeedResponse queryResponse = this.container + .queryItems(String.format("SELECT * FROM c WHERE c.id = '%s'", item.getId()), queryOptions, TestObject.class) + .byPage() + .blockFirst(); + + assertThat(queryResponse).isNotNull(); + assertThat(queryResponse.getResults()).hasSize(1); + CosmosDiagnosticsContext queryDiagnostics = queryResponse.getCosmosDiagnostics().getDiagnosticsContext(); + assertThat(queryDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertExcludedRegions(queryDiagnostics, excludedRegions); + + CosmosReadManyRequestOptions readManyOptions = new CosmosReadManyRequestOptions() + .setExcludedRegions(excludedRegions) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); + + FeedResponse readManyResponse = this.container + .readMany(Collections.singletonList(new CosmosItemIdentity(partitionKey(item), item.getId())), readManyOptions, TestObject.class) + .block(); + + assertThat(readManyResponse).isNotNull(); + assertThat(readManyResponse.getResults()).hasSize(1); + CosmosDiagnosticsContext readManyDiagnostics = readManyResponse.getCosmosDiagnostics().getDiagnosticsContext(); + assertThat(readManyDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertExcludedRegions(readManyDiagnostics, excludedRegions); + assertDidNotContactExcludedRegions(readManyDiagnostics, excludedRegions); + + CosmosChangeFeedRequestOptions changeFeedOptions = CosmosChangeFeedRequestOptions + .createForProcessingFromBeginning(FeedRange.forLogicalPartition(partitionKey(item))) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED) + .setExcludedRegions(excludedRegions); + + FeedResponse changeFeedResponse = this.container + .queryChangeFeed(changeFeedOptions, TestObject.class) + .byPage() + .blockFirst(); + + assertThat(changeFeedResponse).isNotNull(); + CosmosDiagnosticsContext changeFeedDiagnostics = changeFeedResponse.getCosmosDiagnostics().getDiagnosticsContext(); + assertThat(changeFeedDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertExcludedRegions(changeFeedDiagnostics, excludedRegions); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = TIMEOUT) + public void latestCommittedReadWithRegionalLeaseNotFoundFault() { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + FaultInjectionRule leaseNotFoundRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.READ_ITEM, + FaultInjectionServerErrorType.LEASE_NOT_FOUND, + this.writableRegions.get(0), + currentFaultInjectionConnectionType(), + 1); + + try { + CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED) + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("latest-committed-fault-read"))); + + CosmosDiagnosticsContext diagnosticsContext; + try { + CosmosItemResponse readResponse = this.container + .readItem(item.getId(), partitionKey(item), readOptions, TestObject.class) + .block(); + + assertThat(readResponse).isNotNull(); + diagnosticsContext = readResponse.getDiagnostics().getDiagnosticsContext(); + } catch (CosmosException error) { + diagnosticsContext = error.getDiagnostics().getDiagnosticsContext(); + } + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + } finally { + leaseNotFoundRule.disable(); + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java new file mode 100644 index 000000000000..35ae4799940d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java @@ -0,0 +1,124 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchItemRequestOptions; +import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.time.Duration; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowPartitionLevelCircuitBreakerTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowPartitionLevelCircuitBreakerTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer PCLB workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = 2 * TIMEOUT) + public void pointOperationCircuitBreakerAndQueryPlanWorkflow() { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(3)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build(); + + FaultInjectionRule readFaultRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.READ_ITEM, + FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, + 1); + + try { + CosmosDiagnosticsContext readDiagnostics = readWithPolicy(item, e2ePolicy); + + assertThat(readDiagnostics).isNotNull(); + assertThat(readDiagnostics.getStatusCode()).isGreaterThan(0); + assertThat(readDiagnostics.getContactedRegionNames()).isNotNull(); + } finally { + readFaultRule.disable(); + } + + CosmosDiagnosticsContext queryDiagnostics = queryWithPolicy(item, e2ePolicy); + assertThat(queryDiagnostics).isNotNull(); + assertThat(queryDiagnostics.getStatusCode()).isGreaterThan(0); + assertThat(queryDiagnostics.getContactedRegionNames()).isNotNull(); + assertThat(queryDiagnostics.toJson()).contains("queryPlanDiagnosticsContext"); + + CosmosPatchItemRequestOptions patchOptions = new CosmosPatchItemRequestOptions(); + patchOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + CosmosItemResponse patchResponse = this.container + .patchItem( + item.getId(), + partitionKey(item), + CosmosPatchOperations.create().set("/stringProp", "pclb-patched-" + item.getStringProp()), + patchOptions, + TestObject.class) + .block(); + + assertThat(patchResponse).isNotNull(); + assertThat(patchResponse.getDiagnostics()).isNotNull(); + } + + private CosmosDiagnosticsContext readWithPolicy(TestObject item, CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy) { + try { + CosmosItemRequestOptions options = new CosmosItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + return this.container + .readItem(item.getId(), partitionKey(item), options, TestObject.class) + .block() + .getDiagnostics() + .getDiagnosticsContext(); + } catch (CosmosException error) { + return error.getDiagnostics().getDiagnosticsContext(); + } + } + + private CosmosDiagnosticsContext queryWithPolicy(TestObject item, CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy) { + try { + CosmosQueryRequestOptions queryOptions = new CosmosQueryRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy) + .setQueryName("PclbCustomerWorkflowQuery"); + + FeedResponse response = this.container + .queryItems(String.format("SELECT * FROM c WHERE c.id = '%s'", item.getId()), queryOptions, TestObject.class) + .byPage() + .blockFirst(); + + return response.getCosmosDiagnostics().getDiagnosticsContext(); + } catch (CosmosException error) { + return error.getDiagnostics().getDiagnosticsContext(); + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java new file mode 100644 index 000000000000..114079da6c11 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java @@ -0,0 +1,157 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.ReadConsistencyStrategy; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.implementation.OverridableRequestOptions; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosReadManyRequestOptions; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.models.PartitionKey; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowRequestOptionsTest extends CustomerWorkflowTestBase { + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowRequestOptionsTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer workflow request option tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = TIMEOUT) + public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { + String excludedRegion = this.writableRegions.get(0); + List excludedRegions = Collections.singletonList(excludedRegion); + TestObject item = TestObject.create(); + + CosmosItemRequestOptions createOptions = new CosmosItemRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-create"))) + .setContentResponseOnWriteEnabled(true) + .setExcludedRegions(excludedRegions); + + CosmosItemResponse createResponse = this.container + .createItem(item, createOptions) + .block(); + + assertThat(createResponse).isNotNull(); + assertThat(createResponse.getStatusCode()).isEqualTo(201); + assertKeywordIdentifier(createResponse.getDiagnostics().getDiagnosticsContext(), "customer-create"); + assertExcludedRegions(createResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + assertDidNotContactExcludedRegions(createResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + + CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-read"))) + .setExcludedRegions(excludedRegions) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); + + CosmosItemResponse readResponse = this.container + .readItem(item.getId(), new PartitionKey(item.getMypk()), readOptions, TestObject.class) + .block(); + + assertThat(readResponse).isNotNull(); + CosmosDiagnosticsContext readDiagnostics = readResponse.getDiagnostics().getDiagnosticsContext(); + assertThat(readResponse.getStatusCode()).isEqualTo(200); + assertThat(readDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertKeywordIdentifier(readDiagnostics, "customer-read"); + assertExcludedRegions(readDiagnostics, excludedRegions); + assertDidNotContactExcludedRegions(readDiagnostics, excludedRegions); + + CosmosQueryRequestOptions queryOptions = new CosmosQueryRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-query"))) + .setExcludedRegions(excludedRegions) + .setConsistencyLevel(ConsistencyLevel.EVENTUAL) + .setQueryMetricsEnabled(true) + .setQueryName("CustomerWorkflowQuery"); + + String query = String.format("SELECT * FROM c WHERE c.id = '%s'", item.getId()); + FeedResponse queryResponse = this.container + .queryItems(query, queryOptions, TestObject.class) + .byPage() + .blockFirst(); + + assertThat(queryResponse).isNotNull(); + assertThat(queryResponse.getResults()).hasSize(1); + CosmosDiagnosticsContext queryDiagnostics = queryResponse.getCosmosDiagnostics().getDiagnosticsContext(); + assertKeywordIdentifier(queryDiagnostics, "customer-query"); + assertExcludedRegions(queryDiagnostics, excludedRegions); + OverridableRequestOptions queryRequestOptions = getRequestOptions(queryDiagnostics); + assertThat(queryRequestOptions.getConsistencyLevel()).isEqualTo(ConsistencyLevel.EVENTUAL); + assertThat(queryRequestOptions.isQueryMetricsEnabled()).isTrue(); + assertThat(queryRequestOptions.getQueryNameOrDefault(null)).isEqualTo("CustomerWorkflowQuery"); + + CosmosReadManyRequestOptions readManyOptions = new CosmosReadManyRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-read-many"))) + .setExcludedRegions(excludedRegions) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); + + FeedResponse readManyResponse = this.container + .readMany( + Arrays.asList(new CosmosItemIdentity(new PartitionKey(item.getMypk()), item.getId())), + readManyOptions, + TestObject.class) + .block(); + + assertThat(readManyResponse).isNotNull(); + assertThat(readManyResponse.getResults()).hasSize(1); + CosmosDiagnosticsContext readManyDiagnostics = readManyResponse.getCosmosDiagnostics().getDiagnosticsContext(); + assertThat(readManyDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertKeywordIdentifier(readManyDiagnostics, "customer-read-many"); + assertExcludedRegions(readManyDiagnostics, excludedRegions); + assertDidNotContactExcludedRegions(readManyDiagnostics, excludedRegions); + + item.setStringProp("updated-" + item.getStringProp()); + CosmosItemRequestOptions upsertOptions = new CosmosItemRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-upsert"))) + .setExcludedRegions(excludedRegions) + .setContentResponseOnWriteEnabled(true); + + CosmosItemResponse upsertResponse = this.container + .upsertItem(item, upsertOptions) + .block(); + + assertThat(upsertResponse).isNotNull(); + assertThat(upsertResponse.getStatusCode()).isEqualTo(200); + assertKeywordIdentifier(upsertResponse.getDiagnostics().getDiagnosticsContext(), "customer-upsert"); + assertExcludedRegions(upsertResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + assertDidNotContactExcludedRegions(upsertResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + + CosmosItemRequestOptions deleteOptions = new CosmosItemRequestOptions() + .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-delete"))) + .setExcludedRegions(excludedRegions); + + CosmosItemResponse deleteResponse = this.container + .deleteItem(item.getId(), new PartitionKey(item.getMypk()), deleteOptions) + .block(); + + assertThat(deleteResponse).isNotNull(); + assertThat(deleteResponse.getStatusCode()).isEqualTo(204); + assertKeywordIdentifier(deleteResponse.getDiagnostics().getDiagnosticsContext(), "customer-delete"); + assertExcludedRegions(deleteResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + assertDidNotContactExcludedRegions(deleteResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java new file mode 100644 index 000000000000..57bc2c5b6a21 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.implementation.ConsistencyTestsBase; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.ISessionToken; +import com.azure.cosmos.implementation.SessionTokenHelper; +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.FeedResponse; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.fail; + +public class CustomerWorkflowSessionTokenTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowSessionTokenTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer session-token workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = TIMEOUT) + public void readManyWithAdvancedSessionTokenReturnsReadSessionNotAvailable() throws Exception { + List itemIdentities = new ArrayList<>(); + String lastSessionToken = null; + + for (int index = 0; index < 3; index++) { + TestObject item = TestObject.create("session-token-workflow"); + CosmosItemResponse createResponse = this.container.createItem(item).block(); + + assertThat(createResponse).isNotNull(); + lastSessionToken = createResponse.getSessionToken(); + itemIdentities.add(new CosmosItemIdentity(partitionKey(item), item.getId())); + } + + FeedResponse validReadManyResponse = this.container + .readMany(itemIdentities, lastSessionToken, TestObject.class) + .block(); + + assertThat(validReadManyResponse).isNotNull(); + assertThat(validReadManyResponse.getResults()).hasSize(3); + + String advancedSessionToken = advanceSessionToken(lastSessionToken); + + try { + this.container + .readMany(itemIdentities, advancedSessionToken, TestObject.class) + .block(); + + fail("Should have hit read session not available error."); + } catch (Exception error) { + CosmosException cosmosException = Utils.as(error, CosmosException.class); + + assertThat(cosmosException).isNotNull(); + assertThat(cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.NOTFOUND); + assertThat(cosmosException.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE); + assertThat(cosmosException.getDiagnostics()).isNotNull(); + } + } + + private static String advanceSessionToken(String originalSessionToken) throws Exception { + String[] tokenParts = StringUtils.split(originalSessionToken, ":"); + ISessionToken sessionToken = SessionTokenHelper.parse(tokenParts[1]); + ISessionToken modifiedSessionToken = ConsistencyTestsBase.createSessionToken(sessionToken, sessionToken.getLSN() + 1000000); + + return tokenParts[0] + ":" + modifiedSessionToken.convertToString(); + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java new file mode 100644 index 000000000000..c9ab2d761932 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java @@ -0,0 +1,249 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.ReadConsistencyStrategy; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.time.Duration; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowSingleMasterAvailabilityTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithSessionConsistency") + public CustomerWorkflowSingleMasterAvailabilityTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-sm-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSingleWriteMultiRegionContainer("Customer single-master workflow tests"); + } + + @AfterClass(groups = {"fi-sm-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-sm-customer-workflows"}, timeOut = TIMEOUT) + public void excludedReadableRegionRoutesReadToRemainingReadableRegion() { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + List excludedRegions = excludeFirstReadableRegion(); + CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() + .setExcludedRegions(excludedRegions) + .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); + + CosmosItemResponse readResponse = this.container + .readItem(item.getId(), partitionKey(item), readOptions, TestObject.class) + .block(); + + assertThat(readResponse).isNotNull(); + CosmosDiagnosticsContext diagnosticsContext = readResponse.getDiagnostics().getDiagnosticsContext(); + assertThat(readResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.OK); + assertThat(diagnosticsContext.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); + assertExcludedRegions(diagnosticsContext, excludedRegions); + assertDidNotContactExcludedRegions(diagnosticsContext, excludedRegions); + } + + @Test(groups = {"fi-sm-customer-workflows"}, timeOut = TIMEOUT) + public void readFaultInPreferredReadableRegionCanUseRemoteReadableRegion() { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + FaultInjectionRule readSessionNotAvailableRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.READ_ITEM, + FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE, + this.readableRegions.get(0), + currentFaultInjectionConnectionType(), + 1); + + CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(5)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build(); + + try { + CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + CosmosDiagnosticsContext diagnosticsContext = readWithDiagnostics(item, readOptions); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + if (diagnosticsContext.getStatusCode() < HttpConstants.StatusCodes.BADREQUEST) { + assertThat(diagnosticsContext.getContactedRegionNames()).isNotEmpty(); + } else { + assertThat(diagnosticsContext.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.NOTFOUND); + assertThat(diagnosticsContext.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE); + } + } finally { + readSessionNotAvailableRule.disable(); + } + } + + @Test(groups = {"fi-sm-customer-workflows"}, timeOut = TIMEOUT) + public void writeFaultStaysOnSingleWritableRegion() { + FaultInjectionRule partitionMigratingRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.CREATE_ITEM, + FaultInjectionServerErrorType.PARTITION_IS_MIGRATING, + this.writableRegions.get(0), + currentFaultInjectionConnectionType(), + 1); + + try { + CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(5)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build(); + CosmosItemRequestOptions createOptions = new CosmosItemRequestOptions() + .setContentResponseOnWriteEnabled(true) + .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); + + CosmosDiagnosticsContext diagnosticsContext = createWithDiagnostics(TestObject.create(), createOptions); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + + Set readOnlyRegions = this.readableRegions + .stream() + .map(region -> region.toLowerCase(Locale.ROOT)) + .filter(region -> !region.equals(this.writableRegions.get(0).toLowerCase(Locale.ROOT))) + .collect(Collectors.toSet()); + assertThat(diagnosticsContext.getContactedRegionNames()).doesNotContainAnyElementsOf(readOnlyRegions); + } finally { + partitionMigratingRule.disable(); + } + } + + @DataProvider(name = "singleWriteReadFaultScenarios") + public Object[][] singleWriteReadFaultScenarios() { + return new Object[][]{ + {FaultInjectionServerErrorType.GONE}, + {FaultInjectionServerErrorType.TIMEOUT}, + {FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE}, + {FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR}, + {FaultInjectionServerErrorType.SERVICE_UNAVAILABLE} + }; + } + + @Test(groups = {"fi-sm-customer-workflows"}, dataProvider = "singleWriteReadFaultScenarios", timeOut = TIMEOUT) + public void singleWriteReadFaultMatrix(FaultInjectionServerErrorType errorType) { + TestObject item = TestObject.create(); + this.container.createItem(item).block(); + + FaultInjectionRule faultRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.READ_ITEM, + errorType, + this.readableRegions.get(0), + currentFaultInjectionConnectionType(), + 1); + + try { + CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig( + new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(5)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build()); + + CosmosDiagnosticsContext diagnosticsContext = readWithDiagnostics(item, readOptions); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + } finally { + faultRule.disable(); + } + } + + @DataProvider(name = "singleWriteMutationFaultScenarios") + public Object[][] singleWriteMutationFaultScenarios() { + return new Object[][]{ + {FaultInjectionServerErrorType.PARTITION_IS_MIGRATING}, + {FaultInjectionServerErrorType.TIMEOUT}, + {FaultInjectionServerErrorType.TOO_MANY_REQUEST}, + {FaultInjectionServerErrorType.RETRY_WITH}, + {FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR}, + {FaultInjectionServerErrorType.SERVICE_UNAVAILABLE} + }; + } + + @Test(groups = {"fi-sm-customer-workflows"}, dataProvider = "singleWriteMutationFaultScenarios", timeOut = TIMEOUT) + public void singleWriteCreateFaultMatrix(FaultInjectionServerErrorType errorType) { + FaultInjectionRule faultRule = configureServerErrorRule( + this.container, + FaultInjectionOperationType.CREATE_ITEM, + errorType, + this.writableRegions.get(0), + currentFaultInjectionConnectionType(), + 1); + + try { + CosmosItemRequestOptions createOptions = new CosmosItemRequestOptions() + .setContentResponseOnWriteEnabled(true) + .setCosmosEndToEndOperationLatencyPolicyConfig( + new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(5)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) + .build()); + + CosmosDiagnosticsContext diagnosticsContext = createWithDiagnostics(TestObject.create(), createOptions); + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + } finally { + faultRule.disable(); + } + } + + private CosmosDiagnosticsContext readWithDiagnostics(TestObject item, CosmosItemRequestOptions options) { + try { + return this.container + .readItem(item.getId(), partitionKey(item), options, TestObject.class) + .block() + .getDiagnostics() + .getDiagnosticsContext(); + } catch (CosmosException error) { + return error.getDiagnostics().getDiagnosticsContext(); + } + } + + private CosmosDiagnosticsContext createWithDiagnostics(TestObject item, CosmosItemRequestOptions options) { + try { + return this.container + .createItem(item, new PartitionKey(item.getMypk()), options) + .block() + .getDiagnostics() + .getDiagnosticsContext(); + } catch (CosmosException error) { + return error.getDiagnostics().getDiagnosticsContext(); + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java new file mode 100644 index 000000000000..d68cd5981daf --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.models.CosmosStoredProcedureProperties; +import com.azure.cosmos.models.CosmosStoredProcedureRequestOptions; +import com.azure.cosmos.models.CosmosStoredProcedureResponse; +import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CustomerWorkflowStoredProcedureTest extends CustomerWorkflowTestBase { + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public CustomerWorkflowStoredProcedureTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) + public void beforeClass() { + initializeSharedSinglePartitionContainer("Customer stored procedure workflow tests"); + } + + @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + closeClient(); + } + + @Test(groups = {"fi-customer-workflows"}, timeOut = TIMEOUT) + public void storedProcedureCreateReadExecuteWithMetadataFaultRule() { + String storedProcedureId = "customer-sproc-" + UUID.randomUUID(); + CosmosStoredProcedureProperties storedProcedureProperties = new CosmosStoredProcedureProperties( + storedProcedureId, + "function(input) {" + + " var value = input || 'workflow';" + + " console.log('stored procedure workflow ' + value);" + + " getContext().getResponse().setBody('sproc-ok:' + value);" + + "}"); + + CosmosStoredProcedureResponse createResponse = this.container + .getScripts() + .createStoredProcedure(storedProcedureProperties) + .block(); + + assertThat(createResponse).isNotNull(); + assertThat(createResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.CREATED); + assertThat(createResponse.getDiagnostics()).isNotNull(); + + FaultInjectionRule metadataDelayRule = configureResponseDelayRule( + this.container, + FaultInjectionOperationType.METADATA_REQUEST_CONTAINER, + Duration.ofMillis(100), + 1); + + try { + CosmosStoredProcedureRequestOptions options = new CosmosStoredProcedureRequestOptions(); + options.setPartitionKey(new PartitionKey("sproc-workflow")); + options.setScriptLoggingEnabled(true); + + CosmosStoredProcedureResponse readResponse = this.container + .getScripts() + .getStoredProcedure(storedProcedureId) + .read() + .block(); + + assertThat(readResponse).isNotNull(); + assertThat(readResponse.getProperties().getId()).isEqualTo(storedProcedureId); + + CosmosStoredProcedureResponse executeResponse = this.container + .getScripts() + .getStoredProcedure(storedProcedureId) + .execute(Collections.singletonList("workflow"), options) + .block(); + + assertThat(executeResponse).isNotNull(); + assertThat(executeResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.OK); + assertThat(executeResponse.getResponseAsString()).contains("sproc-ok:workflow"); + assertThat(executeResponse.getScriptLog()).contains("stored procedure workflow workflow"); + assertThat(executeResponse.getDiagnostics()).isNotNull(); + } finally { + metadataDelayRule.disable(); + } + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java new file mode 100644 index 000000000000..e12db077dec1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -0,0 +1,320 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.workflows.customer; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosAsyncDatabase; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; +import com.azure.cosmos.ConnectionMode; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.implementation.AsyncDocumentClient; +import com.azure.cosmos.implementation.DatabaseAccount; +import com.azure.cosmos.implementation.DatabaseAccountLocation; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.OverridableRequestOptions; +import com.azure.cosmos.implementation.RxDocumentClientImpl; +import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; +import com.azure.cosmos.models.ThroughputProperties; +import com.azure.cosmos.rx.TestSuiteBase; +import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; +import com.azure.cosmos.test.faultinjection.FaultInjectionCondition; +import com.azure.cosmos.test.faultinjection.FaultInjectionConditionBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionConnectionType; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionResultBuilders; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionRuleBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import com.azure.cosmos.test.faultinjection.IFaultInjectionResult; +import org.testng.SkipException; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.UUID; +import java.util.function.BooleanSupplier; +import java.util.stream.Collectors; + +import static org.assertj.core.api.Assertions.assertThat; + +public abstract class CustomerWorkflowTestBase extends TestSuiteBase { + protected CosmosAsyncClient client; + protected CosmosAsyncContainer container; + protected List writableRegions; + protected List readableRegions; + + protected CustomerWorkflowTestBase(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + protected final void initializeSharedSinglePartitionContainer(String scenarioName) { + CosmosAsyncClient discoveryClient = null; + + try { + discoveryClient = getClientBuilder().buildAsyncClient(); + this.writableRegions = discoverWritableRegions(discoveryClient); + skipIfInsufficientRegions(this.writableRegions, scenarioName); + + this.client = getClientBuilder() + .preferredRegions(this.writableRegions) + .multipleWriteRegionsEnabled(true) + .contentResponseOnWriteEnabled(true) + .buildAsyncClient(); + this.container = getSharedSinglePartitionCosmosContainer(this.client); + } finally { + safeClose(discoveryClient); + } + } + + protected final void closeClient() { + safeClose(this.client); + this.client = null; + this.container = null; + this.writableRegions = null; + this.readableRegions = null; + } + + protected final void initializeSharedSingleWriteMultiRegionContainer(String scenarioName) { + CosmosAsyncClient discoveryClient = null; + + try { + CosmosClientBuilder clientBuilder = getClientBuilder() + .multipleWriteRegionsEnabled(false) + .contentResponseOnWriteEnabled(true); + + discoveryClient = clientBuilder.buildAsyncClient(); + this.writableRegions = discoverWritableRegions(discoveryClient); + this.readableRegions = discoverReadableRegions(discoveryClient); + skipIfInsufficientReadableRegions(this.readableRegions, scenarioName); + skipIfNotSingleWriteRegion(this.writableRegions, scenarioName); + + this.client = clientBuilder + .preferredRegions(this.readableRegions) + .multipleWriteRegionsEnabled(false) + .buildAsyncClient(); + this.container = getSharedSinglePartitionCosmosContainer(this.client); + } finally { + safeClose(discoveryClient); + } + } + + protected final List excludeFirstWritableRegion() { + return Collections.singletonList(this.writableRegions.get(0)); + } + + protected final List excludeFirstReadableRegion() { + return Collections.singletonList(this.readableRegions.get(0)); + } + + protected static com.azure.cosmos.models.PartitionKey partitionKey(TestObject item) { + return new com.azure.cosmos.models.PartitionKey(item.getMypk()); + } + + protected final CosmosAsyncContainer createTemporaryContainer(String prefix, String partitionKeyPath) { + CosmosAsyncDatabase database = getSharedCosmosDatabase(this.client); + String containerId = prefix + "-" + UUID.randomUUID(); + + database + .createContainerIfNotExists(containerId, partitionKeyPath, ThroughputProperties.createManualThroughput(400)) + .block(); + + return database.getContainer(containerId); + } + + protected static void deleteTemporaryContainer(CosmosAsyncContainer container) { + safeDeleteCollection(container); + } + + protected static void awaitCondition(BooleanSupplier condition, Duration timeout, String failureMessage) { + long deadline = System.nanoTime() + timeout.toNanos(); + + while (System.nanoTime() < deadline) { + if (condition.getAsBoolean()) { + return; + } + + try { + Thread.sleep(250); + } catch (InterruptedException error) { + Thread.currentThread().interrupt(); + throw new AssertionError(failureMessage, error); + } + } + + throw new AssertionError(failureMessage); + } + + protected final FaultInjectionRule configureServerErrorRule( + CosmosAsyncContainer targetContainer, + FaultInjectionOperationType operationType, + FaultInjectionServerErrorType errorType, + int hitLimit) { + + return configureServerErrorRule(targetContainer, operationType, errorType, this.writableRegions.get(0), hitLimit); + } + + protected final FaultInjectionRule configureServerErrorRule( + CosmosAsyncContainer targetContainer, + FaultInjectionOperationType operationType, + FaultInjectionServerErrorType errorType, + String region, + int hitLimit) { + + return configureServerErrorRule(targetContainer, operationType, errorType, region, FaultInjectionConnectionType.DIRECT, hitLimit); + } + + protected final FaultInjectionRule configureServerErrorRule( + CosmosAsyncContainer targetContainer, + FaultInjectionOperationType operationType, + FaultInjectionServerErrorType errorType, + String region, + FaultInjectionConnectionType connectionType, + int hitLimit) { + + FaultInjectionConditionBuilder conditionBuilder = new FaultInjectionConditionBuilder() + .operationType(operationType) + .connectionType(connectionType); + + if (region != null) { + conditionBuilder.region(region); + } + + FaultInjectionRule rule = new FaultInjectionRuleBuilder("customer-workflow-" + errorType + "-" + UUID.randomUUID()) + .condition(conditionBuilder.build()) + .result(FaultInjectionResultBuilders.getResultBuilder(errorType).build()) + .duration(Duration.ofMinutes(5)) + .hitLimit(hitLimit) + .build(); + + CosmosFaultInjectionHelper.configureFaultInjectionRules(targetContainer, Collections.singletonList(rule)).block(); + return rule; + } + + protected final FaultInjectionConnectionType currentFaultInjectionConnectionType() { + if (getConnectionPolicy().getConnectionMode() == ConnectionMode.GATEWAY) { + return FaultInjectionConnectionType.GATEWAY; + } + + return FaultInjectionConnectionType.DIRECT; + } + + protected final FaultInjectionRule configureResponseDelayRule( + CosmosAsyncContainer targetContainer, + FaultInjectionOperationType operationType, + Duration delay, + int hitLimit) { + + FaultInjectionCondition condition = new FaultInjectionConditionBuilder() + .operationType(operationType) + .connectionType(FaultInjectionConnectionType.DIRECT) + .build(); + + IFaultInjectionResult result = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) + .delay(delay) + .times(hitLimit) + .build(); + + FaultInjectionRule rule = new FaultInjectionRuleBuilder("customer-workflow-response-delay-" + UUID.randomUUID()) + .condition(condition) + .result(result) + .duration(Duration.ofMinutes(5)) + .hitLimit(hitLimit) + .build(); + + CosmosFaultInjectionHelper.configureFaultInjectionRules(targetContainer, Collections.singletonList(rule)).block(); + return rule; + } + + protected static List discoverWritableRegions(CosmosAsyncClient client) { + AsyncDocumentClient asyncDocumentClient = ReflectionUtils.getAsyncDocumentClient(client); + RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) asyncDocumentClient; + GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); + DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + + List writableRegions = new ArrayList<>(); + for (DatabaseAccountLocation accountLocation : databaseAccount.getWritableLocations()) { + writableRegions.add(accountLocation.getName()); + } + + return writableRegions; + } + + protected static List discoverReadableRegions(CosmosAsyncClient client) { + AsyncDocumentClient asyncDocumentClient = ReflectionUtils.getAsyncDocumentClient(client); + RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) asyncDocumentClient; + GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); + DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + + List readableRegions = new ArrayList<>(); + for (DatabaseAccountLocation accountLocation : databaseAccount.getReadableLocations()) { + readableRegions.add(accountLocation.getName()); + } + + return readableRegions; + } + + protected static void skipIfInsufficientRegions(List regions, String scenarioName) { + if (regions == null || regions.size() < 2) { + throw new SkipException(scenarioName + " requires a live multi-region account."); + } + } + + protected static void skipIfInsufficientReadableRegions(List regions, String scenarioName) { + if (regions == null || regions.size() < 2) { + throw new SkipException(scenarioName + " requires a live multi-region single-write account."); + } + } + + protected static void skipIfNotSingleWriteRegion(List regions, String scenarioName) { + if (regions == null || regions.size() != 1) { + throw new SkipException(scenarioName + " requires exactly one write region."); + } + } + + protected static OverridableRequestOptions getRequestOptions(CosmosDiagnosticsContext diagnosticsContext) { + assertThat(diagnosticsContext).isNotNull(); + return ImplementationBridgeHelpers + .CosmosDiagnosticsContextHelper + .getCosmosDiagnosticsContextAccessor() + .getRequestOptions(diagnosticsContext); + } + + protected static void assertKeywordIdentifier(CosmosDiagnosticsContext diagnosticsContext, String expectedKeywordIdentifier) { + OverridableRequestOptions requestOptions = getRequestOptions(diagnosticsContext); + + assertThat(requestOptions.getKeywordIdentifiers()) + .containsExactly(expectedKeywordIdentifier); + } + + protected static void assertExcludedRegions( + CosmosDiagnosticsContext diagnosticsContext, + List expectedExcludedRegions) { + + OverridableRequestOptions requestOptions = getRequestOptions(diagnosticsContext); + + assertThat(requestOptions.getExcludedRegions()) + .containsExactlyElementsOf(expectedExcludedRegions); + } + + protected static void assertDidNotContactExcludedRegions( + CosmosDiagnosticsContext diagnosticsContext, + Collection excludedRegions) { + + Set contactedRegionNames = diagnosticsContext.getContactedRegionNames(); + Set normalizedExcludedRegions = excludedRegions + .stream() + .map(region -> region.toLowerCase(Locale.ROOT)) + .collect(Collectors.toSet()); + + assertThat(contactedRegionNames).isNotNull(); + assertThat(contactedRegionNames).doesNotContainAnyElementsOf(normalizedExcludedRegions); + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-customer-workflows-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-customer-workflows-testng.xml new file mode 100644 index 000000000000..edfa8a57770f --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-customer-workflows-testng.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-sm-customer-workflows-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-sm-customer-workflows-testng.xml new file mode 100644 index 000000000000..976b8fbdc204 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/fi-sm-customer-workflows-testng.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index e5771c249c1c..8c9144b412e0 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -8,6 +8,8 @@ "-Pdirect": "Direct", "-Pmulti-master": "MultiMaster", "-Pfi-multi-master": "FaultInjectionMultiMaster", + "-Pfi-customer-workflows": "FaultInjectionCustomerWorkflows", + "-Pfi-sm-customer-workflows": "FaultInjectionSingleMasterCustomerWorkflows", "-Pflaky-multi-master": "FlakyMultiMaster", "-Pcircuit-breaker-misc-direct": "CircuitBreakerMiscDirect", "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", @@ -22,6 +24,7 @@ "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session' }": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }": "", + "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong'; enableMultipleRegions = $true }": "" }, @@ -126,7 +129,7 @@ } }, "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pfi-multi-master" ], + "ProfileFlag": [ "-Pfi-multi-master", "-Pfi-customer-workflows" ], "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } @@ -164,6 +167,21 @@ "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "SingleMaster_MultiRegion_FI_CustomerWorkflows": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-sm-customer-workflows" ], + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", From 716e2295d8c9b464608eb636cfa68b51f0556232 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 09:05:26 +0000 Subject: [PATCH 02/14] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../cosmos/workflows/customer/CustomerWorkflowTestBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index e12db077dec1..2d23044d8660 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -167,7 +167,7 @@ protected final FaultInjectionRule configureServerErrorRule( String region, int hitLimit) { - return configureServerErrorRule(targetContainer, operationType, errorType, region, FaultInjectionConnectionType.DIRECT, hitLimit); + return configureServerErrorRule(targetContainer, operationType, errorType, region, currentFaultInjectionConnectionType(), hitLimit); } protected final FaultInjectionRule configureServerErrorRule( From e68f6533ca1018e193c0098679486f666de8aa4c Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 09:15:03 +0000 Subject: [PATCH 03/14] Update live-platform-matrix.json --- sdk/cosmos/live-platform-matrix.json | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 8c9144b412e0..38e5bfc71686 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -135,6 +135,22 @@ "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion_FI_CustomerWorkflows_ThinClient_Http2": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-customer-workflows" ], + "AdditionalArgs": "-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(thin-client-canary-multi-writer-session-endpoint) -DACCOUNT_KEY=$(thin-client-canary-multi-writer-session-key) -DCOSMOS.THINCLIENT_ENABLED=true -DCOSMOS.HTTP2_ENABLED=true", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", @@ -182,6 +198,22 @@ "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "SingleMaster_MultiRegion_FI_CustomerWorkflows_ThinClient_Http2": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-sm-customer-workflows" ], + "AdditionalArgs": "-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(thin-client-canary-multi-region-session-endpoint) -DACCOUNT_KEY=$(thin-client-canary-multi-region-session-key) -DCOSMOS.THINCLIENT_ENABLED=true -DCOSMOS.HTTP2_ENABLED=true", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", From 0b236f146e182bbf9a4de7a5dd60d795a3632270 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 11:14:15 +0000 Subject: [PATCH 04/14] Addressing code review feedback --- ...erWorkflowAvailabilityFaultMatrixTest.java | 26 +-- ...stomerWorkflowChangeFeedProcessorTest.java | 69 ++++++- ...ustomerWorkflowDaoStyleOperationsTest.java | 10 +- .../CustomerWorkflowHighE2ETimeoutTest.java | 28 ++- .../CustomerWorkflowLatestCommittedTest.java | 16 +- ...kflowPartitionLevelCircuitBreakerTest.java | 14 +- .../CustomerWorkflowRequestOptionsTest.java | 16 +- .../CustomerWorkflowSessionTokenTest.java | 1 + ...rWorkflowSingleMasterAvailabilityTest.java | 25 ++- .../CustomerWorkflowStoredProcedureTest.java | 5 + .../customer/CustomerWorkflowTestBase.java | 173 ++++++++++++++++-- 11 files changed, 307 insertions(+), 76 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java index 1d5300249d66..5cccbb7c06f5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java @@ -24,6 +24,7 @@ import org.testng.annotations.Test; import java.util.Collections; +import java.util.List; import static org.assertj.core.api.Assertions.assertThat; @@ -84,28 +85,31 @@ public void representativeDirectMultiMasterFaultWorkflow( FaultInjectionOperationType faultInjectionOperationType, FaultInjectionServerErrorType errorType) { + skipIfNotDirectMode("Customer availability fault workflow (direct multi-master)"); + TestObject item = TestObject.create(); if (!"create".equals(operation)) { this.container.createItem(item).block(); + registerForCleanup(item); } - FaultInjectionRule faultRule = configureServerErrorRule( - this.container, - faultInjectionOperationType, - errorType, - this.writableRegions.get(0), - currentFaultInjectionConnectionType(), - 1); + List faultRules = "readMany".equals(operation) + ? configureReadManyServerErrorRules(this.container, errorType, this.writableRegions.get(0), 1) + : Collections.singletonList(configureServerErrorRule( + this.container, + faultInjectionOperationType, + errorType, + this.writableRegions.get(0), + currentFaultInjectionConnectionType(), + 1)); try { CosmosDiagnosticsContext diagnosticsContext = executeOperation(operation, item); - assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); - assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + assertFaultInjectedOperation(diagnosticsContext, faultRules); assertThat(diagnosticsContext.getDuration()).isNotNull(); } finally { - faultRule.disable(); + faultRules.forEach(FaultInjectionRule::disable); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java index 4a7e033a7fbf..8f7ca6389800 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java @@ -9,6 +9,7 @@ import com.azure.cosmos.TestObject; import com.azure.cosmos.models.ChangeFeedProcessorItem; import com.azure.cosmos.models.ChangeFeedProcessorOptions; +import com.azure.cosmos.models.ChangeFeedProcessorState; import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; import com.azure.cosmos.test.faultinjection.FaultInjectionRule; import com.fasterxml.jackson.databind.JsonNode; @@ -19,7 +20,6 @@ import java.time.Duration; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.UUID; @@ -47,7 +47,7 @@ public void afterClass() { } @Test(groups = {"fi-customer-workflows"}, timeOut = 2 * TIMEOUT) - public void latestVersionProcessorRestartAndReadFeedFaultWorkflow() throws InterruptedException { + public void latestVersionProcessorRestartResumesFromLeasesWorkflow() throws InterruptedException { CosmosAsyncContainer feedContainer = createTemporaryContainer("customer-cfp-feed", "/mypk"); CosmosAsyncContainer leaseContainer = createTemporaryContainer("customer-cfp-lease", "/id"); ChangeFeedProcessor processor = null; @@ -61,7 +61,10 @@ public void latestVersionProcessorRestartAndReadFeedFaultWorkflow() throws Inter createFeedItem(feedContainer, expectedIds, "cfp-initial-1"); createFeedItem(feedContainer, expectedIds, "cfp-initial-2"); - processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, receivedIds, initialLatch, "initial"); + // Use a single, stable lease prefix so the second processor instance resumes from the persisted + // continuation instead of reprocessing from the beginning - this validates a genuine restart. + String leasePrefix = "resume"; + processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, receivedIds, initialLatch, leasePrefix); processor.start().block(); ChangeFeedProcessor initialProcessor = processor; @@ -70,7 +73,7 @@ public void latestVersionProcessorRestartAndReadFeedFaultWorkflow() throws Inter assertThat(receivedIds).containsAll(expectedIds); awaitCondition( - () -> !initialProcessor.getCurrentState().block().isEmpty(), + () -> hasAcquiredLeases(initialProcessor), Duration.ofSeconds(20), "Change feed processor did not acquire leases."); @@ -81,7 +84,7 @@ public void latestVersionProcessorRestartAndReadFeedFaultWorkflow() throws Inter TestObject restartedItem = createFeedItem(feedContainer, expectedIds, "cfp-restart"); readFeedDelayRule = configureResponseDelayRule(feedContainer, FaultInjectionOperationType.READ_FEED_ITEM, Duration.ofMillis(100), 1); - processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, receivedIds, restartLatch, "restart"); + processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, receivedIds, restartLatch, leasePrefix); processor.start().block(); assertThat(processor.isStarted()).isTrue(); @@ -100,6 +103,62 @@ public void latestVersionProcessorRestartAndReadFeedFaultWorkflow() throws Inter } } + @Test(groups = {"fi-customer-workflows"}, timeOut = 2 * TIMEOUT) + public void latestVersionProcessorWithNewLeasePrefixReprocessesFromBeginningWorkflow() throws InterruptedException { + CosmosAsyncContainer feedContainer = createTemporaryContainer("customer-cfp-feed", "/mypk"); + CosmosAsyncContainer leaseContainer = createTemporaryContainer("customer-cfp-lease", "/id"); + ChangeFeedProcessor processor = null; + + try { + Set expectedIds = Collections.newSetFromMap(new ConcurrentHashMap()); + Set initialReceivedIds = Collections.newSetFromMap(new ConcurrentHashMap()); + CountDownLatch initialLatch = new CountDownLatch(2); + + createFeedItem(feedContainer, expectedIds, "cfp-initial-1"); + createFeedItem(feedContainer, expectedIds, "cfp-initial-2"); + + processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, initialReceivedIds, initialLatch, "initial"); + processor.start().block(); + ChangeFeedProcessor initialProcessor = processor; + + assertThat(processor.isStarted()).isTrue(); + assertThat(initialLatch.await(30, TimeUnit.SECONDS)).isTrue(); + assertThat(initialReceivedIds).containsAll(expectedIds); + + awaitCondition( + () -> hasAcquiredLeases(initialProcessor), + Duration.ofSeconds(20), + "Change feed processor did not acquire leases."); + + processor.stop().block(); + assertThat(processor.isStarted()).isFalse(); + + // A different lease prefix creates a fresh lease set, so a from-beginning processor reprocesses all + // existing items. A separate received-id set is required because the original set already contains them. + Set reprocessedIds = Collections.newSetFromMap(new ConcurrentHashMap()); + CountDownLatch reprocessLatch = new CountDownLatch(expectedIds.size()); + + processor = createLatestVersionProcessor(feedContainer, leaseContainer, expectedIds, reprocessedIds, reprocessLatch, "fresh"); + processor.start().block(); + + assertThat(processor.isStarted()).isTrue(); + assertThat(reprocessLatch.await(30, TimeUnit.SECONDS)).isTrue(); + assertThat(reprocessedIds).containsAll(expectedIds); + assertThat(processor.getEstimatedLag().block()).isNotNull(); + } finally { + if (processor != null && processor.isStarted()) { + processor.stop().block(); + } + deleteTemporaryContainer(feedContainer); + deleteTemporaryContainer(leaseContainer); + } + } + + private static boolean hasAcquiredLeases(ChangeFeedProcessor processor) { + List currentState = processor.getCurrentState().block(); + return currentState != null && !currentState.isEmpty(); + } + private TestObject createFeedItem(CosmosAsyncContainer feedContainer, Set expectedIds, String partitionKey) { TestObject item = TestObject.create(partitionKey + "-" + UUID.randomUUID()); feedContainer.createItem(item).block(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java index bc55ec5681f8..0abfd8531096 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java @@ -25,7 +25,6 @@ import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.UUID; @@ -40,7 +39,7 @@ public CustomerWorkflowDaoStyleOperationsTest(CosmosClientBuilder clientBuilder) @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) public void beforeClass() { - initializeSharedSinglePartitionContainer("Customer DAO-style workflow tests"); + initializeSharedSinglePartitionContainer("Customer DAO-style workflow tests", true); } @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) @@ -54,7 +53,7 @@ public void crudReadAllPatchBatchAndBulkWorkflow() { TestObject item = TestObject.create(); CosmosItemRequestOptions createOptions = new CosmosItemRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("workflow-crud-create"))) + .setKeywordIdentifiers(Collections.singleton("workflow-crud-create")) .setExcludedRegions(excludedRegions) .setCustomItemSerializer(CosmosItemSerializer.DEFAULT_SERIALIZER) .setContentResponseOnWriteEnabled(true); @@ -64,6 +63,7 @@ public void crudReadAllPatchBatchAndBulkWorkflow() { .block(); assertThat(createResponse).isNotNull(); + registerForCleanup(item); assertThat(createResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.CREATED); assertKeywordIdentifier(createResponse.getDiagnostics().getDiagnosticsContext(), "workflow-crud-create"); assertThat(getRequestOptions(createResponse.getDiagnostics().getDiagnosticsContext()).getCustomItemSerializer()) @@ -113,12 +113,14 @@ public void crudReadAllPatchBatchAndBulkWorkflow() { CosmosBatchResponse batchResponse = this.container.executeCosmosBatch(batch).block(); assertThat(batchResponse).isNotNull(); + registerForCleanup(batchItem); assertThat(batchResponse.isSuccessStatusCode()).isTrue(); assertThat(batchResponse.size()).isEqualTo(2); assertThat(batchResponse.getDiagnostics()).isNotNull(); TestObject bulkItem = TestObject.create(); this.container.createItem(bulkItem).block(); + registerForCleanup(bulkItem); CosmosPatchOperations bulkPatchOperations = CosmosPatchOperations.create() .set("/stringProp", "bulk-patched-" + bulkItem.getStringProp()); @@ -129,7 +131,7 @@ public void crudReadAllPatchBatchAndBulkWorkflow() { CosmosBulkExecutionOptions bulkExecutionOptions = new CosmosBulkExecutionOptions() .setMaxMicroBatchSize(2) .setExcludedRegions(excludedRegions) - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("workflow-bulk"))); + .setKeywordIdentifiers(Collections.singleton("workflow-bulk")); List> bulkResponses = this.container .executeBulkOperations(Flux.fromIterable(bulkOperations), bulkExecutionOptions) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java index 5624fafdd732..55c36288e7e3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java @@ -31,7 +31,9 @@ import org.testng.annotations.Test; import java.time.Duration; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import static org.assertj.core.api.Assertions.assertThat; @@ -68,23 +70,32 @@ public Object[][] timeoutWorkflowOperations() { @Test(groups = {"fi-customer-workflows"}, dataProvider = "timeoutWorkflowOperations", timeOut = 2 * TIMEOUT) public void responseDelayWithAvailabilityStrategyWorkflow(String operation, FaultInjectionOperationType faultInjectionOperationType) { TestObject item = TestObject.create(); - this.container.createItem(item).block(); + if (!"create".equals(operation)) { + this.container.createItem(item).block(); + registerForCleanup(item); + } CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(4)) .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) .build(); - FaultInjectionRule delayRule = configureResponseDelayRule(this.container, faultInjectionOperationType, Duration.ofMillis(1500), 1); + // readMany resolves to a point read for a single item, so the QUERY_ITEM data-provider value alone would not + // exercise the fault - inject the delay for both the point-read and query operation types. + List delayRules = new ArrayList<>(); + if ("readMany".equals(operation)) { + delayRules.add(configureResponseDelayRule(this.container, FaultInjectionOperationType.READ_ITEM, Duration.ofMillis(1500), 1)); + delayRules.add(configureResponseDelayRule(this.container, FaultInjectionOperationType.QUERY_ITEM, Duration.ofMillis(1500), 1)); + } else { + delayRules.add(configureResponseDelayRule(this.container, faultInjectionOperationType, Duration.ofMillis(1500), 1)); + } try { CosmosDiagnosticsContext diagnosticsContext = executeWithE2EPolicy(operation, item, e2ePolicy); - assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertFaultInjectedOperation(diagnosticsContext, delayRules); assertThat(diagnosticsContext.getDuration()).isLessThan(Duration.ofSeconds(10)); - assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); } finally { - delayRule.disable(); + delayRules.forEach(FaultInjectionRule::disable); } } @@ -92,6 +103,7 @@ public void responseDelayWithAvailabilityStrategyWorkflow(String operation, Faul public void partitionMigratingFaultWithE2EPolicyWorkflow() { TestObject item = TestObject.create(); this.container.createItem(item).block(); + registerForCleanup(item); CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(4)) .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) @@ -106,10 +118,8 @@ public void partitionMigratingFaultWithE2EPolicyWorkflow() { try { CosmosDiagnosticsContext diagnosticsContext = executeWithE2EPolicy("read", item, e2ePolicy); - assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertFaultInjectedOperation(diagnosticsContext, migratingRule); assertThat(diagnosticsContext.getDuration()).isLessThan(Duration.ofSeconds(10)); - assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); } finally { migratingRule.disable(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java index cd1b9cb7a05f..1d661ec282a6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java @@ -25,7 +25,6 @@ import org.testng.annotations.Test; import java.util.Collections; -import java.util.HashSet; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; @@ -39,7 +38,7 @@ public CustomerWorkflowLatestCommittedTest(CosmosClientBuilder clientBuilder) { @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) public void beforeClass() { - initializeSharedSinglePartitionContainer("Customer latest-committed workflow tests"); + initializeSharedSinglePartitionContainer("Customer latest-committed workflow tests", true); } @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) @@ -57,6 +56,7 @@ public void latestCommittedAndExcludedRegionsFlowAcrossReadOperations() { .block(); assertThat(createResponse).isNotNull(); + registerForCleanup(item); CosmosDiagnosticsContext createDiagnostics = createResponse.getDiagnostics().getDiagnosticsContext(); assertThat(createResponse.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.CREATED); assertThat(createDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.DEFAULT); @@ -65,7 +65,7 @@ public void latestCommittedAndExcludedRegionsFlowAcrossReadOperations() { CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() .setExcludedRegions(excludedRegions) - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("latest-committed-read"))) + .setKeywordIdentifiers(Collections.singleton("latest-committed-read")) .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); CosmosItemResponse readResponse = this.container @@ -122,7 +122,9 @@ public void latestCommittedAndExcludedRegionsFlowAcrossReadOperations() { .byPage() .blockFirst(); - assertThat(changeFeedResponse).isNotNull(); + assertThat(changeFeedResponse) + .as("change feed query should return at least one page before reading diagnostics") + .isNotNull(); CosmosDiagnosticsContext changeFeedDiagnostics = changeFeedResponse.getCosmosDiagnostics().getDiagnosticsContext(); assertThat(changeFeedDiagnostics.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); assertExcludedRegions(changeFeedDiagnostics, excludedRegions); @@ -132,6 +134,7 @@ public void latestCommittedAndExcludedRegionsFlowAcrossReadOperations() { public void latestCommittedReadWithRegionalLeaseNotFoundFault() { TestObject item = TestObject.create(); this.container.createItem(item).block(); + registerForCleanup(item); FaultInjectionRule leaseNotFoundRule = configureServerErrorRule( this.container, @@ -144,7 +147,7 @@ public void latestCommittedReadWithRegionalLeaseNotFoundFault() { try { CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED) - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("latest-committed-fault-read"))); + .setKeywordIdentifiers(Collections.singleton("latest-committed-fault-read")); CosmosDiagnosticsContext diagnosticsContext; try { @@ -160,8 +163,7 @@ public void latestCommittedReadWithRegionalLeaseNotFoundFault() { assertThat(diagnosticsContext).isNotNull(); assertThat(diagnosticsContext.getEffectiveReadConsistencyStrategy()).isEqualTo(ReadConsistencyStrategy.LATEST_COMMITTED); - assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertFaultInjectedOperation(diagnosticsContext, leaseNotFoundRule); } finally { leaseNotFoundRule.disable(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java index 35ae4799940d..4f929557a136 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java @@ -48,6 +48,7 @@ public void afterClass() { public void pointOperationCircuitBreakerAndQueryPlanWorkflow() { TestObject item = TestObject.create(); this.container.createItem(item).block(); + registerForCleanup(item); CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(3)) .availabilityStrategy(new ThresholdBasedAvailabilityStrategy(Duration.ofMillis(100), Duration.ofMillis(200))) @@ -62,16 +63,14 @@ public void pointOperationCircuitBreakerAndQueryPlanWorkflow() { try { CosmosDiagnosticsContext readDiagnostics = readWithPolicy(item, e2ePolicy); - assertThat(readDiagnostics).isNotNull(); - assertThat(readDiagnostics.getStatusCode()).isGreaterThan(0); - assertThat(readDiagnostics.getContactedRegionNames()).isNotNull(); + assertFaultInjectedOperation(readDiagnostics, readFaultRule); } finally { readFaultRule.disable(); } CosmosDiagnosticsContext queryDiagnostics = queryWithPolicy(item, e2ePolicy); assertThat(queryDiagnostics).isNotNull(); - assertThat(queryDiagnostics.getStatusCode()).isGreaterThan(0); + assertThat(queryDiagnostics.getStatusCode()).isBetween(200, 599); assertThat(queryDiagnostics.getContactedRegionNames()).isNotNull(); assertThat(queryDiagnostics.toJson()).contains("queryPlanDiagnosticsContext"); @@ -111,8 +110,13 @@ private CosmosDiagnosticsContext queryWithPolicy(TestObject item, CosmosEndToEnd .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy) .setQueryName("PclbCustomerWorkflowQuery"); + // ORDER BY forces the gateway query-plan round-trip so the queryPlanDiagnosticsContext is always present, + // independent of single-partition / ServiceInterop query-plan optimizations. FeedResponse response = this.container - .queryItems(String.format("SELECT * FROM c WHERE c.id = '%s'", item.getId()), queryOptions, TestObject.class) + .queryItems( + String.format("SELECT * FROM c WHERE c.id = '%s' ORDER BY c.id", item.getId()), + queryOptions, + TestObject.class) .byPage() .blockFirst(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java index 114079da6c11..a10b7442930c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; @@ -35,7 +34,7 @@ public CustomerWorkflowRequestOptionsTest(CosmosClientBuilder clientBuilder) { @BeforeClass(groups = {"fi-customer-workflows"}, timeOut = SETUP_TIMEOUT) public void beforeClass() { - initializeSharedSinglePartitionContainer("Customer workflow request option tests"); + initializeSharedSinglePartitionContainer("Customer workflow request option tests", true); } @AfterClass(groups = {"fi-customer-workflows"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) @@ -50,7 +49,7 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { TestObject item = TestObject.create(); CosmosItemRequestOptions createOptions = new CosmosItemRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-create"))) + .setKeywordIdentifiers(Collections.singleton("customer-create")) .setContentResponseOnWriteEnabled(true) .setExcludedRegions(excludedRegions); @@ -59,13 +58,14 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { .block(); assertThat(createResponse).isNotNull(); + registerForCleanup(item); assertThat(createResponse.getStatusCode()).isEqualTo(201); assertKeywordIdentifier(createResponse.getDiagnostics().getDiagnosticsContext(), "customer-create"); assertExcludedRegions(createResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); assertDidNotContactExcludedRegions(createResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-read"))) + .setKeywordIdentifiers(Collections.singleton("customer-read")) .setExcludedRegions(excludedRegions) .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); @@ -82,7 +82,7 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { assertDidNotContactExcludedRegions(readDiagnostics, excludedRegions); CosmosQueryRequestOptions queryOptions = new CosmosQueryRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-query"))) + .setKeywordIdentifiers(Collections.singleton("customer-query")) .setExcludedRegions(excludedRegions) .setConsistencyLevel(ConsistencyLevel.EVENTUAL) .setQueryMetricsEnabled(true) @@ -105,7 +105,7 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { assertThat(queryRequestOptions.getQueryNameOrDefault(null)).isEqualTo("CustomerWorkflowQuery"); CosmosReadManyRequestOptions readManyOptions = new CosmosReadManyRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-read-many"))) + .setKeywordIdentifiers(Collections.singleton("customer-read-many")) .setExcludedRegions(excludedRegions) .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); @@ -126,7 +126,7 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { item.setStringProp("updated-" + item.getStringProp()); CosmosItemRequestOptions upsertOptions = new CosmosItemRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-upsert"))) + .setKeywordIdentifiers(Collections.singleton("customer-upsert")) .setExcludedRegions(excludedRegions) .setContentResponseOnWriteEnabled(true); @@ -141,7 +141,7 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { assertDidNotContactExcludedRegions(upsertResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); CosmosItemRequestOptions deleteOptions = new CosmosItemRequestOptions() - .setKeywordIdentifiers(new HashSet<>(Collections.singletonList("customer-delete"))) + .setKeywordIdentifiers(Collections.singleton("customer-delete")) .setExcludedRegions(excludedRegions); CosmosItemResponse deleteResponse = this.container diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java index 57bc2c5b6a21..ef45b0a83240 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java @@ -52,6 +52,7 @@ public void readManyWithAdvancedSessionTokenReturnsReadSessionNotAvailable() thr CosmosItemResponse createResponse = this.container.createItem(item).block(); assertThat(createResponse).isNotNull(); + registerForCleanup(item); lastSessionToken = createResponse.getSessionToken(); itemIdentities.add(new CosmosItemIdentity(partitionKey(item), item.getId())); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java index c9ab2d761932..d439aaba6a2e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java @@ -52,6 +52,7 @@ public void afterClass() { public void excludedReadableRegionRoutesReadToRemainingReadableRegion() { TestObject item = TestObject.create(); this.container.createItem(item).block(); + registerForCleanup(item); List excludedRegions = excludeFirstReadableRegion(); CosmosItemRequestOptions readOptions = new CosmosItemRequestOptions() @@ -74,6 +75,7 @@ public void excludedReadableRegionRoutesReadToRemainingReadableRegion() { public void readFaultInPreferredReadableRegionCanUseRemoteReadableRegion() { TestObject item = TestObject.create(); this.container.createItem(item).block(); + registerForCleanup(item); FaultInjectionRule readSessionNotAvailableRule = configureServerErrorRule( this.container, @@ -94,7 +96,10 @@ public void readFaultInPreferredReadableRegionCanUseRemoteReadableRegion() { CosmosDiagnosticsContext diagnosticsContext = readWithDiagnostics(item, readOptions); assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(readSessionNotAvailableRule.getHitCount()) + .as("the injected read-session-not-available fault should have been hit in the preferred readable region") + .isGreaterThanOrEqualTo(1); + assertThat(diagnosticsContext.getStatusCode()).isBetween(HttpConstants.StatusCodes.OK, 599); assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); if (diagnosticsContext.getStatusCode() < HttpConstants.StatusCodes.BADREQUEST) { assertThat(diagnosticsContext.getContactedRegionNames()).isNotEmpty(); @@ -128,9 +133,14 @@ public void writeFaultStaysOnSingleWritableRegion() { CosmosDiagnosticsContext diagnosticsContext = createWithDiagnostics(TestObject.create(), createOptions); assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); + assertThat(partitionMigratingRule.getHitCount()) + .as("the injected write fault should have been hit in the single writable region") + .isGreaterThanOrEqualTo(1); + assertThat(diagnosticsContext.getStatusCode()).isBetween(HttpConstants.StatusCodes.OK, 599); assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + // A single-write account cannot hedge writes to another region, so even with an availability strategy + // configured the write must never be routed to a read-only region. Set readOnlyRegions = this.readableRegions .stream() .map(region -> region.toLowerCase(Locale.ROOT)) @@ -157,6 +167,7 @@ public Object[][] singleWriteReadFaultScenarios() { public void singleWriteReadFaultMatrix(FaultInjectionServerErrorType errorType) { TestObject item = TestObject.create(); this.container.createItem(item).block(); + registerForCleanup(item); FaultInjectionRule faultRule = configureServerErrorRule( this.container, @@ -175,9 +186,7 @@ public void singleWriteReadFaultMatrix(FaultInjectionServerErrorType errorType) CosmosDiagnosticsContext diagnosticsContext = readWithDiagnostics(item, readOptions); - assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); - assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + assertFaultInjectedOperation(diagnosticsContext, faultRule); } finally { faultRule.disable(); } @@ -215,9 +224,9 @@ public void singleWriteCreateFaultMatrix(FaultInjectionServerErrorType errorType CosmosDiagnosticsContext diagnosticsContext = createWithDiagnostics(TestObject.create(), createOptions); - assertThat(diagnosticsContext).isNotNull(); - assertThat(diagnosticsContext.getStatusCode()).isGreaterThan(0); - assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + // The availability strategy cannot hedge writes on a single-write account; the assertion below confirms + // the injected write fault was still exercised and produced a real HTTP outcome. + assertFaultInjectedOperation(diagnosticsContext, faultRule); } finally { faultRule.disable(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java index d68cd5981daf..096bd7041fc8 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java @@ -91,6 +91,11 @@ public void storedProcedureCreateReadExecuteWithMetadataFaultRule() { assertThat(executeResponse.getDiagnostics()).isNotNull(); } finally { metadataDelayRule.disable(); + try { + this.container.getScripts().getStoredProcedure(storedProcedureId).delete().block(); + } catch (Exception error) { + // best-effort cleanup of the stored procedure created by this test + } } } } \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index 2d23044d8660..acb5d1c3e2f3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -8,15 +8,19 @@ import com.azure.cosmos.CosmosClientBuilder; import com.azure.cosmos.CosmosDiagnosticsContext; import com.azure.cosmos.ConnectionMode; +import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.TestObject; import com.azure.cosmos.implementation.AsyncDocumentClient; import com.azure.cosmos.implementation.DatabaseAccount; import com.azure.cosmos.implementation.DatabaseAccountLocation; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OverridableRequestOptions; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; +import com.azure.cosmos.models.CosmosItemIdentity; +import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.ThroughputProperties; import com.azure.cosmos.rx.TestSuiteBase; import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; @@ -49,12 +53,21 @@ public abstract class CustomerWorkflowTestBase extends TestSuiteBase { protected CosmosAsyncContainer container; protected List writableRegions; protected List readableRegions; + private final List itemsToCleanup = Collections.synchronizedList(new ArrayList<>()); protected CustomerWorkflowTestBase(CosmosClientBuilder clientBuilder) { super(clientBuilder); } protected final void initializeSharedSinglePartitionContainer(String scenarioName) { + initializeSharedSinglePartitionContainer(scenarioName, false); + } + + protected final void initializeSharedSinglePartitionContainer(String scenarioName, boolean forceSessionConsistency) { + if (forceSessionConsistency) { + skipIfAccountConsistencyWeakerThanSession(scenarioName); + } + CosmosAsyncClient discoveryClient = null; try { @@ -62,11 +75,18 @@ protected final void initializeSharedSinglePartitionContainer(String scenarioNam this.writableRegions = discoverWritableRegions(discoveryClient); skipIfInsufficientRegions(this.writableRegions, scenarioName); - this.client = getClientBuilder() + CosmosClientBuilder clientBuilder = getClientBuilder() .preferredRegions(this.writableRegions) .multipleWriteRegionsEnabled(true) - .contentResponseOnWriteEnabled(true) - .buildAsyncClient(); + .contentResponseOnWriteEnabled(true); + + if (forceSessionConsistency) { + // Read-your-write across an excluded write region is only deterministic with session (or + // stronger) consistency, so pin the client to session consistency for these scenarios. + clientBuilder.consistencyLevel(ConsistencyLevel.SESSION); + } + + this.client = clientBuilder.buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); } finally { safeClose(discoveryClient); @@ -74,6 +94,7 @@ protected final void initializeSharedSinglePartitionContainer(String scenarioNam } protected final void closeClient() { + cleanupRegisteredItems(); safeClose(this.client); this.client = null; this.container = null; @@ -85,19 +106,19 @@ protected final void initializeSharedSingleWriteMultiRegionContainer(String scen CosmosAsyncClient discoveryClient = null; try { - CosmosClientBuilder clientBuilder = getClientBuilder() + discoveryClient = getClientBuilder() .multipleWriteRegionsEnabled(false) - .contentResponseOnWriteEnabled(true); - - discoveryClient = clientBuilder.buildAsyncClient(); + .contentResponseOnWriteEnabled(true) + .buildAsyncClient(); this.writableRegions = discoverWritableRegions(discoveryClient); this.readableRegions = discoverReadableRegions(discoveryClient); skipIfInsufficientReadableRegions(this.readableRegions, scenarioName); skipIfNotSingleWriteRegion(this.writableRegions, scenarioName); - this.client = clientBuilder + this.client = getClientBuilder() .preferredRegions(this.readableRegions) .multipleWriteRegionsEnabled(false) + .contentResponseOnWriteEnabled(true) .buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); } finally { @@ -105,6 +126,39 @@ protected final void initializeSharedSingleWriteMultiRegionContainer(String scen } } + /** + * Registers an item to be best-effort deleted from the shared container when the test class finishes, + * so the shared single-partition container does not accumulate items across runs. + */ + protected final void registerForCleanup(TestObject item) { + if (item != null) { + this.itemsToCleanup.add(new CosmosItemIdentity(partitionKey(item), item.getId())); + } + } + + private void cleanupRegisteredItems() { + CosmosAsyncContainer cleanupContainer = this.container; + List snapshot; + synchronized (this.itemsToCleanup) { + snapshot = new ArrayList<>(this.itemsToCleanup); + this.itemsToCleanup.clear(); + } + + if (cleanupContainer == null) { + return; + } + + for (CosmosItemIdentity identity : snapshot) { + try { + cleanupContainer + .deleteItem(identity.getId(), identity.getPartitionKey(), new CosmosItemRequestOptions()) + .block(); + } catch (Exception error) { + // best-effort cleanup - ignore (for example item already deleted by the test itself) + } + } + } + protected final List excludeFirstWritableRegion() { return Collections.singletonList(this.writableRegions.get(0)); } @@ -144,7 +198,7 @@ protected static void awaitCondition(BooleanSupplier condition, Duration timeout Thread.sleep(250); } catch (InterruptedException error) { Thread.currentThread().interrupt(); - throw new AssertionError(failureMessage, error); + throw new AssertionError("Interrupted while waiting for condition: " + failureMessage, error); } } @@ -213,7 +267,7 @@ protected final FaultInjectionRule configureResponseDelayRule( FaultInjectionCondition condition = new FaultInjectionConditionBuilder() .operationType(operationType) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(currentFaultInjectionConnectionType()) .build(); IFaultInjectionResult result = FaultInjectionResultBuilders @@ -234,10 +288,7 @@ protected final FaultInjectionRule configureResponseDelayRule( } protected static List discoverWritableRegions(CosmosAsyncClient client) { - AsyncDocumentClient asyncDocumentClient = ReflectionUtils.getAsyncDocumentClient(client); - RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) asyncDocumentClient; - GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); - DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + DatabaseAccount databaseAccount = readDatabaseAccount(client); List writableRegions = new ArrayList<>(); for (DatabaseAccountLocation accountLocation : databaseAccount.getWritableLocations()) { @@ -248,10 +299,7 @@ protected static List discoverWritableRegions(CosmosAsyncClient client) } protected static List discoverReadableRegions(CosmosAsyncClient client) { - AsyncDocumentClient asyncDocumentClient = ReflectionUtils.getAsyncDocumentClient(client); - RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) asyncDocumentClient; - GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); - DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + DatabaseAccount databaseAccount = readDatabaseAccount(client); List readableRegions = new ArrayList<>(); for (DatabaseAccountLocation accountLocation : databaseAccount.getReadableLocations()) { @@ -261,6 +309,26 @@ protected static List discoverReadableRegions(CosmosAsyncClient client) return readableRegions; } + private static DatabaseAccount readDatabaseAccount(CosmosAsyncClient client) { + AsyncDocumentClient asyncDocumentClient = ReflectionUtils.getAsyncDocumentClient(client); + RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) asyncDocumentClient; + + // Force a database account read instead of relying on the possibly not-yet-populated cached value + // returned by GlobalEndpointManager.getLatestDatabaseAccount(). + DatabaseAccount databaseAccount = rxDocumentClient.getDatabaseAccount().block(); + + if (databaseAccount == null) { + GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); + databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + } + + assertThat(databaseAccount) + .as("database account must be available for region discovery") + .isNotNull(); + + return databaseAccount; + } + protected static void skipIfInsufficientRegions(List regions, String scenarioName) { if (regions == null || regions.size() < 2) { throw new SkipException(scenarioName + " requires a live multi-region account."); @@ -279,6 +347,73 @@ protected static void skipIfNotSingleWriteRegion(List regions, String sc } } + protected static void skipIfAccountConsistencyWeakerThanSession(String scenarioName) { + if (accountConsistency == ConsistencyLevel.EVENTUAL || accountConsistency == ConsistencyLevel.CONSISTENT_PREFIX) { + throw new SkipException( + scenarioName + " requires an account with session or stronger default consistency for deterministic read-your-write."); + } + } + + protected final void skipIfNotDirectMode(String scenarioName) { + if (getConnectionPolicy().getConnectionMode() != ConnectionMode.DIRECT) { + throw new SkipException(scenarioName + " only applies to the direct connection mode client builder."); + } + } + + protected final void skipIfNotGatewayMode(String scenarioName) { + if (getConnectionPolicy().getConnectionMode() != ConnectionMode.GATEWAY) { + throw new SkipException(scenarioName + " only applies to the gateway connection mode client builder."); + } + } + + /** + * Configures the same server-error fault for both the point-read ({@code READ_ITEM}) and query + * ({@code QUERY_ITEM}) operation types. {@code readMany} resolves to a point read for a single item in a + * partition and to a query for multiple items, so both rules are needed for the fault to reliably apply. + */ + protected final List configureReadManyServerErrorRules( + CosmosAsyncContainer targetContainer, + FaultInjectionServerErrorType errorType, + String region, + int hitLimit) { + + List rules = new ArrayList<>(); + rules.add(configureServerErrorRule( + targetContainer, FaultInjectionOperationType.READ_ITEM, errorType, region, currentFaultInjectionConnectionType(), hitLimit)); + rules.add(configureServerErrorRule( + targetContainer, FaultInjectionOperationType.QUERY_ITEM, errorType, region, currentFaultInjectionConnectionType(), hitLimit)); + return rules; + } + + /** + * Asserts that a fault-injected operation produced a real HTTP outcome and that at least one of the supplied + * fault rules was actually hit, so the scenario cannot silently pass without exercising the injected fault. + */ + protected static void assertFaultInjectedOperation( + CosmosDiagnosticsContext diagnosticsContext, + FaultInjectionRule... rules) { + + assertThat(diagnosticsContext).isNotNull(); + assertThat(diagnosticsContext.getStatusCode()).isBetween(HttpConstants.StatusCodes.OK, 599); + assertThat(diagnosticsContext.getContactedRegionNames()).isNotNull(); + + long totalHits = 0; + for (FaultInjectionRule rule : rules) { + totalHits += rule.getHitCount(); + } + + assertThat(totalHits) + .as("expected at least one injected fault to be hit") + .isGreaterThanOrEqualTo(1); + } + + protected static void assertFaultInjectedOperation( + CosmosDiagnosticsContext diagnosticsContext, + List rules) { + + assertFaultInjectedOperation(diagnosticsContext, rules.toArray(new FaultInjectionRule[0])); + } + protected static OverridableRequestOptions getRequestOptions(CosmosDiagnosticsContext diagnosticsContext) { assertThat(diagnosticsContext).isNotNull(); return ImplementationBridgeHelpers @@ -291,7 +426,7 @@ protected static void assertKeywordIdentifier(CosmosDiagnosticsContext diagnosti OverridableRequestOptions requestOptions = getRequestOptions(diagnosticsContext); assertThat(requestOptions.getKeywordIdentifiers()) - .containsExactly(expectedKeywordIdentifier); + .contains(expectedKeywordIdentifier); } protected static void assertExcludedRegions( From e01dbdb48204f15e5b1ff988209b4732b375ac4f Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 13:23:57 +0200 Subject: [PATCH 05/14] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../CustomerWorkflowHighE2ETimeoutTest.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java index 55c36288e7e3..b9a46e97f873 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java @@ -131,17 +131,17 @@ private CosmosDiagnosticsContext executeWithE2EPolicy( CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy) { try { - if ("create".equals(operation)) { + TestObject createdItem = TestObject.create(); CosmosItemRequestOptions options = new CosmosItemRequestOptions() .setContentResponseOnWriteEnabled(true) .setCosmosEndToEndOperationLatencyPolicyConfig(e2ePolicy); - return this.container - .createItem(TestObject.create(), options) - .block() - .getDiagnostics() - .getDiagnosticsContext(); - } + CosmosItemResponse response = this.container + .createItem(createdItem, options) + .block(); + + registerForCleanup(createdItem); + return response.getDiagnostics().getDiagnosticsContext(); if ("read".equals(operation)) { CosmosItemRequestOptions options = new CosmosItemRequestOptions() From 837f3da6ebf669d3b89e2af8a57a679b27222658 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 11:28:30 +0000 Subject: [PATCH 06/14] Register created items for cleanup in customer workflow create paths --- .../CustomerWorkflowAvailabilityFaultMatrixTest.java | 3 ++- .../CustomerWorkflowSingleMasterAvailabilityTest.java | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java index 5cccbb7c06f5..bb76b2b8f6bd 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java @@ -185,9 +185,10 @@ private CosmosDiagnosticsContext executeOperation(String operation, TestObject i } CosmosItemResponse response = this.container - .createItem(TestObject.create(), new CosmosItemRequestOptions().setContentResponseOnWriteEnabled(true)) + .createItem(item, new CosmosItemRequestOptions().setContentResponseOnWriteEnabled(true)) .block(); + registerForCleanup(item); return response.getDiagnostics().getDiagnosticsContext(); } catch (CosmosException error) { CosmosDiagnosticsContext diagnosticsContext = error.getDiagnostics().getDiagnosticsContext(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java index d439aaba6a2e..65e4ed0b83f0 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java @@ -246,11 +246,14 @@ private CosmosDiagnosticsContext readWithDiagnostics(TestObject item, CosmosItem private CosmosDiagnosticsContext createWithDiagnostics(TestObject item, CosmosItemRequestOptions options) { try { - return this.container + CosmosDiagnosticsContext diagnosticsContext = this.container .createItem(item, new PartitionKey(item.getMypk()), options) .block() .getDiagnostics() .getDiagnosticsContext(); + + registerForCleanup(item); + return diagnosticsContext; } catch (CosmosException error) { return error.getDiagnostics().getDiagnosticsContext(); } From fb9e0dbe7282e98b46174c114c8098138e808978 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 11:35:40 +0000 Subject: [PATCH 07/14] Update CustomerWorkflowHighE2ETimeoutTest.java --- .../workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java index b9a46e97f873..667b7228956d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java @@ -131,6 +131,7 @@ private CosmosDiagnosticsContext executeWithE2EPolicy( CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy) { try { + if ("create".equals(operation)) { TestObject createdItem = TestObject.create(); CosmosItemRequestOptions options = new CosmosItemRequestOptions() .setContentResponseOnWriteEnabled(true) @@ -142,6 +143,7 @@ private CosmosDiagnosticsContext executeWithE2EPolicy( registerForCleanup(createdItem); return response.getDiagnostics().getDiagnosticsContext(); + } if ("read".equals(operation)) { CosmosItemRequestOptions options = new CosmosItemRequestOptions() @@ -204,6 +206,7 @@ private CosmosDiagnosticsContext executeWithE2EPolicy( CosmosBatchResponse response = this.container.executeCosmosBatch(batch, batchOptions).block(); + registerForCleanup(batchItem); return response.getDiagnostics().getDiagnosticsContext(); } From a37f5179e81ea35b015a5845cf41f2717cea18be Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 11:43:57 +0000 Subject: [PATCH 08/14] Delete CUSTOMER_WORKFLOW_COVERAGE_MAP.md --- .../CUSTOMER_WORKFLOW_COVERAGE_MAP.md | 83 ------------------- 1 file changed, 83 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md diff --git a/sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md b/sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md deleted file mode 100644 index 52d55708292f..000000000000 --- a/sdk/cosmos/azure-cosmos-tests/CUSTOMER_WORKFLOW_COVERAGE_MAP.md +++ /dev/null @@ -1,83 +0,0 @@ -# Customer Workflow Coverage Map - -This file tracks the customer release-validation workflows from `e:\benchmark-tests` against the runnable Cosmos SDK tests in this module. Keep it updated as customer-derived workflows are ported, disabled, or marked duplicate. - -## Classification - -| Classification | Meaning | -|---|---| -| `gap` | Customer workflow has no equivalent SDK scenario coverage. | -| `partial` | SDK covers the primitives, but not the customer-style operation chain or diagnostics assertion. | -| `duplicate` | Existing SDK tests already cover the behavior with enough release-signal fidelity. | -| `wrapper-specific` | Assertion belongs to the customer's wrapper defaults, not an SDK contract. | -| `deferred` | Candidate workflow, but not enabled until runtime/flakiness/account-shape trade-offs are reviewed. | - -## Initial Coverage Triage - -| Customer source area | Customer workflow signal | Existing SDK references | Classification | Initial action | -|---|---|---|---|---| -| `test/CosmosDaoTest.java` | Create/read/query/upsert/delete, readAll, bulk/batch, custom serializer through a DAO wrapper | `CosmosItemTest`, `DocumentCrudTest`, `CosmosBulkTest`, `CosmosBulkAsyncTest`, `TransactionalBatchTest`, `CosmosItemSerializerTest` | `partial` / `wrapper-specific` | Port only customer-style operation chains with diagnostics assertions; mark DAO cache/session-map/default policy checks wrapper-specific. | -| `test/CosmosMultiFeatureTests.java` | App-style create/read/query/upsert/delete, keyword identifiers, invalid session token, no preferred region default routing | `CosmosDiagnosticsTest`, `ExcludeRegionTests`, `SessionConsistencyWithRegionScopingTests` | `partial` | Start with keyword identifier and region-routing workflows in `fi-customer-workflows`. | -| `test/CosmosDriverDynamicRequestOptionTest.java` | Dynamic operation policy changes request options per operation and validates request options through diagnostics | `OperationPoliciesTest`, `GatewayReadConsistencyStrategyE2ETest`, `CosmosLatestCommittedItemTests` | `partial` | Add customer workflow tests that combine create/read/query/readMany/upsert chains and diagnostics request-option validation. | -| `test/Latest_Committed_Tests.java` | Latest-committed with excluded regions, consistency combinations, RU and contacted-region expectations | `CosmosLatestCommittedItemTests`, `GatewayReadConsistencyStrategyE2ETest`, `ClientRetryPolicyE2ETests` | `partial` | Add focused live multi-region workflow rows; keep primitive latest-committed behavior as duplicate references. | -| `regression/direct/*.java` | Latest-committed direct-mode regression matrix for change feed, read, query, readMany, session/eventual combinations | `CosmosLatestCommittedItemTests`, change feed processor tests | `partial` | Port only variants that add multi-region workflow signal beyond existing latest-committed tests. | -| `regression/gateway/*.java` | Gateway latest-committed regression matrix | `GatewayReadConsistencyStrategyE2ETest`, `GatewayReadConsistencyStrategySpyWireTest` | `duplicate` / `partial` | Keep as duplicate unless the coverage table identifies a workflow assertion not present in gateway tests. | -| `test/CosmosHighE2ETimeoutTest.java` | E2E timeout behavior under response delay and partition migrating faults for create/query/readMany/batch | `EndToEndTimeOutValidationTests`, `EndToEndTimeOutWithAvailabilityTest`, `FaultInjectionWithAvailabilityStrategyTestsBase` | `partial` | Add one customer-style chain after request-option workflows stabilize. | -| `test/CosmosStoredProcedureTest.java` | Stored procedure create/read/update diagnostics under response delay and read-session-not-available faults | `StoredProcedureCrudTest`, `StoredProcedureQueryTest`, `StoredProcedureUpsertReplaceTest`, `CosmosSyncStoredProcTest` | `gap` / `partial` | Add targeted stored-procedure fault workflow that deploys scripts in setup. | -| `test/ChangeFeedProcessorTest.java` | CFP start/stop, latest-version handler, current state, restart, and fault-injected read feed | `IncrementalChangeFeedProcessorTest`, `FullFidelityChangeFeedProcessorTest`, `CosmosContainerChangeFeedTest` | `partial` | Add a small CFP workflow and replace fixed sleeps with polling. | -| `test/PartitionLevelCircuitBreakerTests.java` | PCLB app chain and query-plan behavior under regional faults | `PerPartitionCircuitBreakerE2ETests`, `PerPartitionAutomaticFailoverE2ETests` | `partial` | Add one PCLB-enabled workflow row after first live suite run. | -| `test/CosmosConflictResolutionTest.java` | Multi-client conflict detection and conflict query | `CosmosConflictsTest`, `ConflictTests`, `MultiMasterConflictResolutionTest` | `duplicate` / `partial` | Document existing coverage first; port only if customer ordering/diagnostics differs. | -| `test/Cosmos429test.java` | 429 and connection delay behavior in app-shaped calls | `RetryThrottleTest`, `ResourceThrottleRetryPolicyTest`, `FaultInjectionServerErrorRuleOnDirectTests`, `FaultInjectionServerErrorRuleOnGatewayTests` | `duplicate` / `partial` | Prefer parameterized FI rows; do not create a standalone clone. | -| `singlemaster/direct/*.java` | Single-write account availability strategies in direct mode | `EndToEndTimeOutWithAvailabilityTest`, `ExcludeRegionTests`, `FITests_*` | `deferred` | Document rows first; add a single-write multi-region matrix only if unique customer coverage remains. | -| `singlemaster/gateway/*.java` | Single-write account availability strategies in gateway mode | Gateway retry/fault-injection tests | `deferred` | Same as singlemaster/direct. | -| `multimaster/direct/*.java` | Multi-write direct availability strategy matrix across fault/status/operation combinations | `FaultInjectionWithAvailabilityStrategyTestsBase`, `FITests_*`, `PerPartitionAutomaticFailoverE2ETests` | `partial` | Port representative workflow matrix with TestNG data providers instead of one class per customer file. | -| `multimaster/gateway/*.java` | Multi-write gateway availability strategy matrix | `FaultInjectionServerErrorRuleOnGatewayTests`, `FaultInjectionServerErrorRuleOnGatewayV2Tests`, `FITests_*` | `partial` | Port selected gateway workflow rows after direct-mode baseline. | - -## Enabled Suite - -The initial implementation adds TestNG group `fi-customer-workflows`, Maven profile `-Pfi-customer-workflows`, and live matrix display name `FaultInjectionCustomerWorkflows`. The suite is intended to run only through the existing on-demand Cosmos live test path. - -Single-write multi-region customer workflows use TestNG group `fi-sm-customer-workflows`, Maven profile `-Pfi-sm-customer-workflows`, and live matrix display name `FaultInjectionSingleMasterCustomerWorkflows`. - -## Implemented Workflow Classes - -| Workflow class | Customer coverage areas represented | -|---|---| -| `CustomerWorkflowRequestOptionsTest` | Dynamic request options, keyword identifiers, excluded regions, create/read/query/readMany/upsert/delete diagnostics. | -| `CustomerWorkflowDaoStyleOperationsTest` | DAO-style CRUD chain, readAll, patch, transactional batch, bulk read/patch with max micro-batch sizing, and request-level serializer propagation. | -| `CustomerWorkflowLatestCommittedTest` | Latest-committed point read, query, readMany, change feed, excluded regions, diagnostics request-option propagation, regional lease-not-found fault coverage, and direct/gateway client variants. | -| `CustomerWorkflowSessionTokenTest` | ReadMany with valid and advanced user session tokens, validating read-session-not-available behavior. | -| `CustomerWorkflowStoredProcedureTest` | Stored procedure create/read/execute with script logging and metadata fault-rule coverage. | -| `CustomerWorkflowChangeFeedProcessorTest` | Latest-version CFP start, restart, current state/lag, and read-feed fault recovery. | -| `CustomerWorkflowAvailabilityFaultMatrixTest` | Expanded multi-master direct/gateway fault matrix for read, query, readMany, create, upsert, replace, delete, and patch operations across representative 404/408/410/429/449/500/503 families. | -| `CustomerWorkflowHighE2ETimeoutTest` | Response-delay workflow with E2E timeout and availability strategy for create, read, query, readMany, upsert, batch, patch, and partition-migrating read. | -| `CustomerWorkflowPartitionLevelCircuitBreakerTest` | PCLB-oriented point read, query-plan diagnostics/query, and patch app-chain workflow under the PCLB-enabled live matrix leg. | -| `CustomerWorkflowSingleMasterAvailabilityTest` | Single-write multi-region excluded-readable-region reads, local readable-region read faults, write faults constrained to the single writable region, and representative direct/gateway read/create fault matrices. | - -## Remaining Gap Summary - -| Remaining area | Current status after `fi-customer-workflows` | Importance of adding more | -|---|---|---| -| Exhaustive dynamic request-option matrix | Core app-style create/read/query/readMany/upsert/delete request-option propagation is covered; the exhaustive per-option matrix remains in existing SDK primitive tests. | `nice to have` / mostly duplicate. Add only if release owners want customer-style chaining for every option combination. | -| Latest-committed RU comparison variants | Point read, query, readMany, change feed, excluded regions, diagnostics propagation, and a regional lease-not-found fault are covered; strict RU comparison checks remain. | `nice to have`. RU comparisons are service-sensitive and less valuable than the diagnostics/routing checks now covered. | -| Gateway latest-committed regression variants | Direct and gateway latest-committed workflow variants are covered by `CustomerWorkflowLatestCommittedTest`; existing gateway read-consistency tests remain the primitive anchor. | `covered enough`. No further action unless strict one-class-per-customer-file parity is required. | -| Stored procedure exact fault parity | Stored procedure create/read/execute/script-log and metadata fault-rule coverage are added; exact response-delay/read-session-not-available stored-procedure fault parity is not fully represented because fault injection has no stored-procedure-specific operation type. | `addressing significant partial gap`, but may require deeper test-infra support or a carefully scoped metadata/data-plane proxy scenario. | -| CFP full customer matrix | Latest-version CFP start, restart, current state/lag, and read-feed fault recovery are covered; full-fidelity/all-versions, side-cart, and deeper lease recovery variants remain. | `nice to have`. Current workflow covers the highest-signal CFP behavior without copying the large CFP matrix. | -| Full multi-write availability matrix | Expanded direct/gateway multi-write fault rows now cover read/query/readMany/create/upsert/replace/delete/patch across representative 404/408/410/429/449/500/503 families. The only unported portion is exact one-class-per-customer-file parity and every operation/error permutation. | `runtime-heavy duplicate`. Stop here unless parity is required over runtime. | -| Single-write direct/gateway availability matrix | Dedicated single-write multi-region live leg and representative direct/gateway read/create fault matrices are added through `fi-sm-customer-workflows`; exact one-class-per-error-file parity remains. | `runtime-heavy duplicate`. Stop here unless strict customer-suite parity is required. | -| High E2E timeout extended fault variants | Response-delay E2E timeout with availability strategy now covers create/read/query/readMany/upsert/batch/patch plus partition-migrating read; deeper customer-specific timing/RU assertions remain. | `nice to have`. The main workflow gap is covered; remaining work is runtime-sensitive strict parity. | -| PCLB exact regional circuit-breaker assertions | PCLB-oriented read/query-plan diagnostics/query/patch app-chain workflow is added; exact circuit-breaker state transitions remain in existing PCLB tests. | `nice to have` for customer parity; existing SDK PCLB tests already cover the lower-level behavior. | -| 429 and connection-delay app-shaped calls | 429-style rows are now represented in multi-write and single-write matrices; connection-delay/connect-reset style network transport variants remain in existing transport/FI tests and selected timeout workflows. | `runtime-heavy duplicate`. Add only if network-fault parity is explicitly required. | -| Conflict resolution and conflict query | Not added to the new workflow suite; existing conflict tests cover core SDK behavior. | `nice to have` / duplicate. Add only if customer multi-client ordering or diagnostics are materially different. | -| Basic multi-write behavior and feature-validation classes | Covered indirectly by CRUD/request-options/latest-committed/session-token workflows and existing multi-master tests. | `completely duplicate` for this suite unless a specific uncovered assertion is identified. | -| Custom serializer standalone tests | Request-level serializer propagation is represented in the DAO-style workflow; existing serializer tests cover normal and exception behavior. | `completely duplicate`. Keep deeper standalone serializer tests out of this workflow suite. | -| Customer wrapper defaults, caches, DAO session maps, and configuration defaults | Not ported by design because these are not SDK contracts. | `completely useless for SDK coverage` / wrapper-specific. Keep documented only. | - -## Porting Rules - -- Use SDK-native tests in `azure-cosmos-tests`; do not copy customer-specific package dependencies. -- Do not copy hardcoded customer endpoints, account keys, database names, or container names. -- Prefer dynamic account-region discovery over hardcoded region order. -- Replace fixed sleeps with polling or retry loops. -- Preserve customer workflow shape where it adds release signal: operation chains, contacted-region diagnostics, effective consistency, effective read-consistency strategy, retry counts, and request-option propagation. -- Mark wrapper default assertions as `wrapper-specific` unless the SDK owns the behavior. From 9f6c651a76a919d694a671bc121f7aa6ad499f7f Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 12:02:16 +0000 Subject: [PATCH 09/14] Refactoring customer tests into separate pipelines --- ...erWorkflowAvailabilityFaultMatrixTest.java | 2 +- ...stomerWorkflowChangeFeedProcessorTest.java | 2 +- ...ustomerWorkflowDaoStyleOperationsTest.java | 2 +- .../CustomerWorkflowHighE2ETimeoutTest.java | 2 +- .../CustomerWorkflowLatestCommittedTest.java | 2 +- ...kflowPartitionLevelCircuitBreakerTest.java | 2 +- .../CustomerWorkflowRequestOptionsTest.java | 2 +- .../CustomerWorkflowSessionTokenTest.java | 2 +- .../CustomerWorkflowStoredProcedureTest.java | 2 +- .../customer/CustomerWorkflowTestBase.java | 2 +- ...fi-customer-workflows-platform-matrix.json | 42 ++++++++++++ ...sm-customer-workflows-platform-matrix.json | 41 ++++++++++++ sdk/cosmos/live-platform-matrix.json | 52 +-------------- sdk/cosmos/tests.yml | 64 +++++++++++++++++++ 14 files changed, 158 insertions(+), 61 deletions(-) create mode 100644 sdk/cosmos/live-fi-customer-workflows-platform-matrix.json create mode 100644 sdk/cosmos/live-fi-sm-customer-workflows-platform-matrix.json diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java index bb76b2b8f6bd..176715cd5a2a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowAvailabilityFaultMatrixTest.java @@ -196,4 +196,4 @@ private CosmosDiagnosticsContext executeOperation(String operation, TestObject i return diagnosticsContext; } } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java index 8f7ca6389800..c7bb684b066b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java @@ -206,4 +206,4 @@ private static void recordLatestVersionItems( } } } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java index 0abfd8531096..2356468005ce 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowDaoStyleOperationsTest.java @@ -146,4 +146,4 @@ public void crudReadAllPatchBatchAndBulkWorkflow() { assertThat(response.getResponse().getCosmosDiagnostics()).isNotNull(); }); } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java index 667b7228956d..3946d0324b1e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowHighE2ETimeoutTest.java @@ -227,4 +227,4 @@ private CosmosDiagnosticsContext executeWithE2EPolicy( return error.getDiagnostics().getDiagnosticsContext(); } } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java index 1d661ec282a6..a5eeaca9822d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowLatestCommittedTest.java @@ -168,4 +168,4 @@ public void latestCommittedReadWithRegionalLeaseNotFoundFault() { leaseNotFoundRule.disable(); } } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java index 4f929557a136..8ffaf5571295 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowPartitionLevelCircuitBreakerTest.java @@ -125,4 +125,4 @@ private CosmosDiagnosticsContext queryWithPolicy(TestObject item, CosmosEndToEnd return error.getDiagnostics().getDiagnosticsContext(); } } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java index a10b7442930c..f46b517813a6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowRequestOptionsTest.java @@ -154,4 +154,4 @@ public void excludedRegionAndKeywordIdentifiersFlowAcrossOperations() { assertExcludedRegions(deleteResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); assertDidNotContactExcludedRegions(deleteResponse.getDiagnostics().getDiagnosticsContext(), excludedRegions); } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java index ef45b0a83240..2ad18a9c09a7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSessionTokenTest.java @@ -89,4 +89,4 @@ private static String advanceSessionToken(String originalSessionToken) throws Ex return tokenParts[0] + ":" + modifiedSessionToken.convertToString(); } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java index 096bd7041fc8..d18fb5d9f9f9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowStoredProcedureTest.java @@ -98,4 +98,4 @@ public void storedProcedureCreateReadExecuteWithMetadataFaultRule() { } } } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index acb5d1c3e2f3..80bf2e67dae6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -452,4 +452,4 @@ protected static void assertDidNotContactExcludedRegions( assertThat(contactedRegionNames).isNotNull(); assertThat(contactedRegionNames).doesNotContainAnyElementsOf(normalizedExcludedRegions); } -} \ No newline at end of file +} diff --git a/sdk/cosmos/live-fi-customer-workflows-platform-matrix.json b/sdk/cosmos/live-fi-customer-workflows-platform-matrix.json new file mode 100644 index 000000000000..c3b1e2b841fe --- /dev/null +++ b/sdk/cosmos/live-fi-customer-workflows-platform-matrix.json @@ -0,0 +1,42 @@ +{ + "displayNames": { + "-Pfi-customer-workflows": "FaultInjectionCustomerWorkflows", + "Session": "", + "ubuntu": "", + "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }": "" + }, + "include": [ + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion_FI_CustomerWorkflows": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-customer-workflows" ], + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\"", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion_FI_CustomerWorkflows_ThinClient_Http2": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-customer-workflows" ], + "AdditionalArgs": "-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(thin-client-canary-multi-writer-session-endpoint) -DACCOUNT_KEY=$(thin-client-canary-multi-writer-session-key) -DCOSMOS.THINCLIENT_ENABLED=true -DCOSMOS.HTTP2_ENABLED=true", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + } + ] +} diff --git a/sdk/cosmos/live-fi-sm-customer-workflows-platform-matrix.json b/sdk/cosmos/live-fi-sm-customer-workflows-platform-matrix.json new file mode 100644 index 000000000000..cd56d0a830a5 --- /dev/null +++ b/sdk/cosmos/live-fi-sm-customer-workflows-platform-matrix.json @@ -0,0 +1,41 @@ +{ + "displayNames": { + "-Pfi-sm-customer-workflows": "FaultInjectionSingleMasterCustomerWorkflows", + "Session": "", + "ubuntu": "", + "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }": "" + }, + "include": [ + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "SingleMaster_MultiRegion_FI_CustomerWorkflows": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-sm-customer-workflows" ], + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "SingleMaster_MultiRegion_FI_CustomerWorkflows_ThinClient_Http2": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pfi-sm-customer-workflows" ], + "AdditionalArgs": "-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(thin-client-canary-multi-region-session-endpoint) -DACCOUNT_KEY=$(thin-client-canary-multi-region-session-key) -DCOSMOS.THINCLIENT_ENABLED=true -DCOSMOS.HTTP2_ENABLED=true", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + } + ] +} diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 38e5bfc71686..e5771c249c1c 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -8,8 +8,6 @@ "-Pdirect": "Direct", "-Pmulti-master": "MultiMaster", "-Pfi-multi-master": "FaultInjectionMultiMaster", - "-Pfi-customer-workflows": "FaultInjectionCustomerWorkflows", - "-Pfi-sm-customer-workflows": "FaultInjectionSingleMasterCustomerWorkflows", "-Pflaky-multi-master": "FlakyMultiMaster", "-Pcircuit-breaker-misc-direct": "CircuitBreakerMiscDirect", "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", @@ -24,7 +22,6 @@ "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session' }": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }": "", - "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}": "", "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong'; enableMultipleRegions = $true }": "" }, @@ -129,28 +126,12 @@ } }, "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pfi-multi-master", "-Pfi-customer-workflows" ], + "ProfileFlag": [ "-Pfi-multi-master" ], "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "ArmConfig": { - "MultiMaster_MultiRegion_FI_CustomerWorkflows_ThinClient_Http2": { - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", - "PREFERRED_LOCATIONS": "[]" - } - }, - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pfi-customer-workflows" ], - "AdditionalArgs": "-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(thin-client-canary-multi-writer-session-endpoint) -DACCOUNT_KEY=$(thin-client-canary-multi-writer-session-key) -DCOSMOS.THINCLIENT_ENABLED=true -DCOSMOS.HTTP2_ENABLED=true", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", @@ -183,37 +164,6 @@ "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "ArmConfig": { - "SingleMaster_MultiRegion_FI_CustomerWorkflows": { - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", - "PREFERRED_LOCATIONS": "[\"East US 2\"]" - } - }, - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pfi-sm-customer-workflows" ], - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "ArmConfig": { - "SingleMaster_MultiRegion_FI_CustomerWorkflows_ThinClient_Http2": { - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", - "PREFERRED_LOCATIONS": "[]" - } - }, - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pfi-sm-customer-workflows" ], - "AdditionalArgs": "-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(thin-client-canary-multi-region-session-endpoint) -DACCOUNT_KEY=$(thin-client-canary-multi-region-session-key) -DCOSMOS.THINCLIENT_ENABLED=true -DCOSMOS.HTTP2_ENABLED=true", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index 0eecf42166ea..a03d1638ab89 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -233,6 +233,70 @@ extends: - name: AdditionalArgs value: '-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true -DACCOUNT_HOST=$(gsi-pipeline-uri) -DACCOUNT_KEY=$(gsi-pipeline-key)' + - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml + parameters: + TestName: 'Cosmos_Live_Test_FaultInjectionCustomerWorkflows' + CloudConfig: + Public: + ServiceConnection: azure-sdk-tests-cosmos + MatrixConfigs: + - Name: Cosmos_live_test_fi_customer_workflows + Path: sdk/cosmos/live-fi-customer-workflows-platform-matrix.json + Selection: all + GenerateVMJobs: true + MatrixReplace: + - .*Version=1.2(1|5)/1.17 + ServiceDirectory: cosmos + Artifacts: + - name: azure-cosmos + groupId: com.azure + safeName: azurecosmos + AdditionalModules: + - name: azure-cosmos-tests + groupId: com.azure + - name: azure-cosmos-benchmark + groupId: com.azure + TimeoutInMinutes: 210 + MaxParallel: 20 + TestGoals: 'verify' + TestOptions: '$(ProfileFlag) $(AdditionalArgs) -DskipCompile=true -DskipTestCompile=true -DcreateSourcesJar=false' + TestResultsFiles: '**/junitreports/TEST-*.xml' + AdditionalVariables: + - name: AdditionalArgs + value: '-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true' + + - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml + parameters: + TestName: 'Cosmos_Live_Test_FaultInjectionSingleMasterCustomerWorkflows' + CloudConfig: + Public: + ServiceConnection: azure-sdk-tests-cosmos + MatrixConfigs: + - Name: Cosmos_live_test_fi_sm_customer_workflows + Path: sdk/cosmos/live-fi-sm-customer-workflows-platform-matrix.json + Selection: all + GenerateVMJobs: true + MatrixReplace: + - .*Version=1.2(1|5)/1.17 + ServiceDirectory: cosmos + Artifacts: + - name: azure-cosmos + groupId: com.azure + safeName: azurecosmos + AdditionalModules: + - name: azure-cosmos-tests + groupId: com.azure + - name: azure-cosmos-benchmark + groupId: com.azure + TimeoutInMinutes: 210 + MaxParallel: 20 + TestGoals: 'verify' + TestOptions: '$(ProfileFlag) $(AdditionalArgs) -DskipCompile=true -DskipTestCompile=true -DcreateSourcesJar=false' + TestResultsFiles: '**/junitreports/TEST-*.xml' + AdditionalVariables: + - name: AdditionalArgs + value: '-DCOSMOS.CLIENT_LEAK_DETECTION_ENABLED=true' + - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml parameters: TestName: 'Spring_Data_Cosmos_Integration' From b946a6524f2175cbd5bd8c8e373172810a1e0937 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 14:05:53 +0000 Subject: [PATCH 10/14] Fix single master tests --- ...rWorkflowSingleMasterAvailabilityTest.java | 36 +++++++++++++++++-- .../customer/CustomerWorkflowTestBase.java | 36 +++++++++++++++---- 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java index 65e4ed0b83f0..3b1305d3b206 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowSingleMasterAvailabilityTest.java @@ -59,9 +59,9 @@ public void excludedReadableRegionRoutesReadToRemainingReadableRegion() { .setExcludedRegions(excludedRegions) .setReadConsistencyStrategy(ReadConsistencyStrategy.LATEST_COMMITTED); - CosmosItemResponse readResponse = this.container - .readItem(item.getId(), partitionKey(item), readOptions, TestObject.class) - .block(); + // Excluding the preferred readable region forces the read onto the remaining readable region, which may + // lag behind the just-completed write. Retry until cross-region replication catches up before asserting. + CosmosItemResponse readResponse = readWithReplicationRetry(item, readOptions); assertThat(readResponse).isNotNull(); CosmosDiagnosticsContext diagnosticsContext = readResponse.getDiagnostics().getDiagnosticsContext(); @@ -165,6 +165,8 @@ public Object[][] singleWriteReadFaultScenarios() { @Test(groups = {"fi-sm-customer-workflows"}, dataProvider = "singleWriteReadFaultScenarios", timeOut = TIMEOUT) public void singleWriteReadFaultMatrix(FaultInjectionServerErrorType errorType) { + skipIfFaultTypeUnsupportedOnGateway(errorType, "Customer single-master read fault matrix"); + TestObject item = TestObject.create(); this.container.createItem(item).block(); registerForCleanup(item); @@ -244,6 +246,34 @@ private CosmosDiagnosticsContext readWithDiagnostics(TestObject item, CosmosItem } } + private CosmosItemResponse readWithReplicationRetry(TestObject item, CosmosItemRequestOptions options) { + Duration deadline = Duration.ofSeconds(30); + long deadlineNanos = System.nanoTime() + deadline.toNanos(); + CosmosException lastNotFound = null; + + while (System.nanoTime() < deadlineNanos) { + try { + return this.container + .readItem(item.getId(), partitionKey(item), options, TestObject.class) + .block(); + } catch (CosmosException error) { + if (error.getStatusCode() != HttpConstants.StatusCodes.NOTFOUND) { + throw error; + } + // Item not yet replicated to the remaining readable region - wait and retry. + lastNotFound = error; + try { + Thread.sleep(500); + } catch (InterruptedException interrupted) { + Thread.currentThread().interrupt(); + throw new AssertionError("Interrupted while waiting for cross-region replication.", interrupted); + } + } + } + + throw new AssertionError("Item was not replicated to the remaining readable region within " + deadline, lastNotFound); + } + private CosmosDiagnosticsContext createWithDiagnostics(TestObject item, CosmosItemRequestOptions options) { try { CosmosDiagnosticsContext diagnosticsContext = this.container diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index 80bf2e67dae6..1afb56bd9eb2 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -312,13 +312,20 @@ protected static List discoverReadableRegions(CosmosAsyncClient client) private static DatabaseAccount readDatabaseAccount(CosmosAsyncClient client) { AsyncDocumentClient asyncDocumentClient = ReflectionUtils.getAsyncDocumentClient(client); RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) asyncDocumentClient; - - // Force a database account read instead of relying on the possibly not-yet-populated cached value - // returned by GlobalEndpointManager.getLatestDatabaseAccount(). - DatabaseAccount databaseAccount = rxDocumentClient.getDatabaseAccount().block(); - - if (databaseAccount == null) { - GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); + GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); + + // The latest database account is populated during client initialization. Poll briefly to defend against + // an initialization race instead of forcing a synthetic database-account read (which is not routable in + // direct connection mode). + DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + long deadlineNanos = System.nanoTime() + Duration.ofSeconds(10).toNanos(); + while (databaseAccount == null && System.nanoTime() < deadlineNanos) { + try { + Thread.sleep(200); + } catch (InterruptedException interrupted) { + Thread.currentThread().interrupt(); + throw new AssertionError("Interrupted while waiting for the database account to be available.", interrupted); + } databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); } @@ -366,6 +373,21 @@ protected final void skipIfNotGatewayMode(String scenarioName) { } } + /** + * Skips fault-injection scenarios that cannot be injected for the gateway connection type. The gateway + * internally retries 410/0, so {@code GONE} and {@code STALED_ADDRESSES_SERVER_GONE} rules are rejected at + * configuration time for gateway-mode clients. + */ + protected final void skipIfFaultTypeUnsupportedOnGateway(FaultInjectionServerErrorType errorType, String scenarioName) { + if (currentFaultInjectionConnectionType() == FaultInjectionConnectionType.GATEWAY + && (errorType == FaultInjectionServerErrorType.GONE + || errorType == FaultInjectionServerErrorType.STALED_ADDRESSES_SERVER_GONE)) { + + throw new SkipException( + scenarioName + " cannot inject " + errorType + " for the gateway connection type."); + } + } + /** * Configures the same server-error fault for both the point-read ({@code READ_ITEM}) and query * ({@code QUERY_ITEM}) operation types. {@code readMany} resolves to a point read for a single item in a From c422d86f30dfc7730e28f655e6dc0656b6daa6b4 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 14:38:54 +0000 Subject: [PATCH 11/14] Update CustomerWorkflowChangeFeedProcessorTest.java --- .../CustomerWorkflowChangeFeedProcessorTest.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java index c7bb684b066b..2d5b88cacd65 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowChangeFeedProcessorTest.java @@ -90,7 +90,12 @@ public void latestVersionProcessorRestartResumesFromLeasesWorkflow() throws Inte assertThat(processor.isStarted()).isTrue(); assertThat(restartLatch.await(30, TimeUnit.SECONDS)).isTrue(); assertThat(receivedIds).contains(restartedItem.getId()); - assertThat(processor.getEstimatedLag().block()).isNotNull(); + + // getEstimatedLag() is not supported for a latest-version processor; query the per-lease state + // (which exposes the estimated lag) via the supported getCurrentState() API instead. + List currentState = processor.getCurrentState().block(); + assertThat(currentState).isNotNull().isNotEmpty(); + assertThat(currentState).allSatisfy(state -> assertThat(state.getEstimatedLag()).isGreaterThanOrEqualTo(0)); } finally { if (readFeedDelayRule != null) { readFeedDelayRule.disable(); @@ -144,7 +149,12 @@ public void latestVersionProcessorWithNewLeasePrefixReprocessesFromBeginningWork assertThat(processor.isStarted()).isTrue(); assertThat(reprocessLatch.await(30, TimeUnit.SECONDS)).isTrue(); assertThat(reprocessedIds).containsAll(expectedIds); - assertThat(processor.getEstimatedLag().block()).isNotNull(); + + // getEstimatedLag() is not supported for a latest-version processor; query the per-lease state + // (which exposes the estimated lag) via the supported getCurrentState() API instead. + List currentState = processor.getCurrentState().block(); + assertThat(currentState).isNotNull().isNotEmpty(); + assertThat(currentState).allSatisfy(state -> assertThat(state.getEstimatedLag()).isGreaterThanOrEqualTo(0)); } finally { if (processor != null && processor.isStarted()) { processor.stop().block(); From 3ff626d8768518b9d30d264154b2bf994088ca47 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 19 Jun 2026 15:05:57 +0000 Subject: [PATCH 12/14] Update CustomerWorkflowTestBase.java --- .../customer/CustomerWorkflowTestBase.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index 1afb56bd9eb2..604a24b2621a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -88,11 +88,28 @@ protected final void initializeSharedSinglePartitionContainer(String scenarioNam this.client = clientBuilder.buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); + awaitSharedContainerPropagation(); } finally { safeClose(discoveryClient); } } + /** + * Waits briefly after the shared container is created so its metadata fully propagates to every region of + * the multi-region account. A direct-mode write routed to a secondary region (for example via excluded + * regions) immediately after container creation can otherwise fail because the container is not yet fully + * provisioned in that region - observed as a 401 MAC-signature error from the secondary region's backend + * replica. Kept in this customer-scenario base (not TestSuiteBase) so single-region tests are not delayed. + */ + protected static void awaitSharedContainerPropagation() { + try { + Thread.sleep(Duration.ofSeconds(5).toMillis()); + } catch (InterruptedException interrupted) { + Thread.currentThread().interrupt(); + throw new AssertionError("Interrupted while waiting for shared container propagation.", interrupted); + } + } + protected final void closeClient() { cleanupRegisteredItems(); safeClose(this.client); @@ -121,6 +138,7 @@ protected final void initializeSharedSingleWriteMultiRegionContainer(String scen .contentResponseOnWriteEnabled(true) .buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); + awaitSharedContainerPropagation(); } finally { safeClose(discoveryClient); } From 81f1de99ae5929485987b9ea11a48ce8f61907c7 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 22 Jun 2026 08:04:20 +0000 Subject: [PATCH 13/14] Fixing test flakiness due to async container creation --- ...StreamingOrderByQueryVectorSearchTest.java | 2 +- .../cosmos/rx/OrderbyDocumentQueryTest.java | 2 +- .../azure/cosmos/rx/QueryValidationTests.java | 8 +- .../cosmos/rx/ReadFeedCollectionsTest.java | 2 +- .../com/azure/cosmos/rx/TestSuiteBase.java | 148 ++++++++++++++++-- .../customer/CustomerWorkflowTestBase.java | 20 +-- 6 files changed, 145 insertions(+), 37 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java index c632fd06e315..7629bbb15ae7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java @@ -102,7 +102,7 @@ public void before_NonStreamingOrderByQueryVectorSearchTest() { database.createContainer(containerProperties).block(); largeDataContainer = database.getContainer(largeDataContainerId); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(largeDataContainer); for (Document doc : getVectorDocs()) { flatIndexContainer.createItem(doc).block(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java index 16722fd466bd..ffe81695c2f3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java @@ -688,7 +688,7 @@ public void before_OrderbyDocumentQueryTest() throws Exception { })) .block(); roundTripsContainer = createdDatabase.getContainer(containerName); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(roundTripsContainer); setupRoundTripContainer(); List> keyValuePropsList = new ArrayList<>(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java index 966890825de4..ec2edda48f87 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java @@ -126,7 +126,7 @@ public void orderByQueryForLargeCollection() { ).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(container); int partitionDocCount = 5; int pageSize = partitionDocCount + 1; @@ -382,7 +382,7 @@ public void splitQueryContinuationToken() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/mypk"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(container); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); //Insert some documents @@ -494,7 +494,7 @@ public void orderbyContinuationOnUndefinedAndNull() throws Exception { createdDatabase.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(container); CosmosContainerResponse containerResponse = container.read().block(); assert (containerResponse != null); CosmosContainerProperties properties = containerResponse.getProperties(); @@ -582,7 +582,7 @@ public void queryLargePartitionKeyOn100BPKCollection() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/id"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(container); //id as partitionkey > 100bytes String itemID1 = "cosmosdb" + "-drWarm4Z60GkknMfHLo5BwuiH7w6AffzSb9jKbvwAQwaRZd10oxnLeCueuyZ5gbm9dwVVAqJLdzrB38Dk73Q6xMErv-0"; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java index e081fee801b4..6a2cb96d3b9a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java @@ -88,7 +88,7 @@ public CosmosAsyncContainer createCollections(CosmosAsyncDatabase database) { partitionKeyDef.setPaths(paths); CosmosContainerProperties containerProperties = new CosmosContainerProperties(UUID.randomUUID().toString(), partitionKeyDef); database.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(database.getContainer(containerProperties.getId())); return database.getContainer(containerProperties.getId()); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index 4f9f54f2fdb9..e4e45eb6b715 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -31,10 +31,13 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.Database; +import com.azure.cosmos.implementation.DatabaseAccount; +import com.azure.cosmos.implementation.DatabaseAccountLocation; import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.FailureValidator; import com.azure.cosmos.implementation.FeedResponseListValidator; +import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.InternalObjectNode; @@ -43,6 +46,7 @@ import com.azure.cosmos.implementation.QueryFeedOperationState; import com.azure.cosmos.implementation.RequestOptions; import com.azure.cosmos.implementation.Resource; +import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.ResourceResponse; import com.azure.cosmos.implementation.ResourceResponseValidator; import com.azure.cosmos.implementation.TestConfigurations; @@ -51,6 +55,7 @@ import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.directconnectivity.Protocol; +import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.guava25.base.CaseFormat; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.models.CosmosClientTelemetryConfig; @@ -566,24 +571,143 @@ public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database .getCosmosAsyncClientAccessor() .getPreferredRegions(client).size() > 1; if (throughput > 6000 || isMultiRegional) { - waitForCollectionToBeAvailableToRead(); + waitForCollectionToBeAvailableToRead(database.getContainer(cosmosContainerProperties.getId())); } return database.getContainer(cosmosContainerProperties.getId()); } - protected static void waitForCollectionToBeAvailableToRead() { - // Creating a container is an async task - especially with multiple regions it can - // take some time until the container is available in the remote regions as well. - // When the container does not exist yet, metadata reads or item operations can - // fail with 404/1013 "Collection is not yet available for read". - // So, adding this delay after container creation to minimize risk of hitting these errors. - try { - TimeUnit.SECONDS.sleep(3); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); + protected static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer container) { + // Creating a container is asynchronous - especially on multi-region accounts the new collection can + // take time to become readable in the non-write regions. Until then, reads routed to those regions fail + // with 404/1013 ("Collection is not yet available for read"). Instead of a fixed sleep, verify - against + // every non-primary region of the account - that the collection is readable, probing each region (by + // excluding all other regions) with exponential back-off until it succeeds, bounded to two minutes total. + CosmosAsyncClient client = ImplementationBridgeHelpers + .CosmosAsyncDatabaseHelper + .getCosmosAsyncDatabaseAccessor() + .getCosmosAsyncClient(container.getDatabase()); + + DatabaseAccount databaseAccount = getLatestDatabaseAccount(client); + + // Use the account's regions (not the client's preferred regions, which may be a subset). + List allRegions = new ArrayList<>(); + for (DatabaseAccountLocation location : databaseAccount.getReadableLocations()) { + allRegions.add(location.getName()); } + + // The primary region is the first writable location; propagation lag manifests in the other regions. + String primaryRegion = null; + for (DatabaseAccountLocation location : databaseAccount.getWritableLocations()) { + primaryRegion = location.getName(); + break; + } + final String primary = primaryRegion; + + List nonPrimaryRegions = allRegions + .stream() + .filter(region -> primary == null || !region.equalsIgnoreCase(primary)) + .collect(Collectors.toList()); + + Duration maxWait = Duration.ofMinutes(2); + long deadlineNanos = System.nanoTime() + maxWait.toNanos(); + + if (nonPrimaryRegions.isEmpty()) { + // Single-region account: there is no non-primary region to verify, but the collection still needs + // to be readable (for example while physical partitions are provisioned). + awaitContainerReadableInRegion(container, null, Collections.emptyList(), deadlineNanos, maxWait); + return; + } + + // Verify the collection is readable in each non-primary region. + for (String region : nonPrimaryRegions) { + final String target = region; + List excludedRegions = allRegions + .stream() + .filter(other -> !other.equalsIgnoreCase(target)) + .collect(Collectors.toList()); + awaitContainerReadableInRegion(container, region, excludedRegions, deadlineNanos, maxWait); + } + } + + private static void awaitContainerReadableInRegion( + CosmosAsyncContainer container, + String targetRegion, + List excludedRegions, + long deadlineNanos, + Duration maxWait) { + + long backoffMillis = 100; + long maxBackoffMillis = 5000; + int attempts = 0; + Throwable lastError = null; + + while (true) { + attempts++; + try { + CosmosQueryRequestOptions options = new CosmosQueryRequestOptions(); + if (!excludedRegions.isEmpty()) { + options.setExcludedRegions(excludedRegions); + } + // A successful (possibly empty) page proves the collection is resolvable/readable in the + // targeted region. + container.queryItems("SELECT TOP 1 c.id FROM c", options, Object.class) + .byPage(1) + .blockFirst(); + return; + } catch (Exception error) { + lastError = error; + } + + long remainingNanos = deadlineNanos - System.nanoTime(); + if (remainingNanos <= 0) { + break; + } + + long sleepMillis = Math.max(1, Math.min(backoffMillis, TimeUnit.NANOSECONDS.toMillis(remainingNanos))); + try { + TimeUnit.MILLISECONDS.sleep(sleepMillis); + } catch (InterruptedException interrupted) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted while waiting for collection to be available to read.", interrupted); + } + backoffMillis = Math.min(backoffMillis * 2, maxBackoffMillis); + } + + throw new AssertionError( + String.format( + "Container '%s' was not available to read%s within %d seconds (%d attempts).", + container.getId(), + targetRegion != null ? " in region '" + targetRegion + "'" : "", + maxWait.getSeconds(), + attempts), + lastError); + } + + private static DatabaseAccount getLatestDatabaseAccount(CosmosAsyncClient client) { + AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(client); + GlobalEndpointManager globalEndpointManager = + ReflectionUtils.getGlobalEndpointManager((RxDocumentClientImpl) asyncDocumentClient); + + // The latest database account is populated during client initialization; poll briefly to defend against + // an initialization race. + DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + long deadlineNanos = System.nanoTime() + Duration.ofSeconds(10).toNanos(); + while (databaseAccount == null && System.nanoTime() < deadlineNanos) { + try { + TimeUnit.MILLISECONDS.sleep(200); + } catch (InterruptedException interrupted) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted while resolving the database account.", interrupted); + } + databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + } + + if (databaseAccount == null) { + throw new AssertionError("Database account was not available to determine the account's regions."); + } + + return databaseAccount; } public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database, CosmosContainerProperties cosmosContainerProperties, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index 604a24b2621a..77f2f40c1f08 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -88,28 +88,12 @@ protected final void initializeSharedSinglePartitionContainer(String scenarioNam this.client = clientBuilder.buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); - awaitSharedContainerPropagation(); + waitForCollectionToBeAvailableToRead(this.container); } finally { safeClose(discoveryClient); } } - /** - * Waits briefly after the shared container is created so its metadata fully propagates to every region of - * the multi-region account. A direct-mode write routed to a secondary region (for example via excluded - * regions) immediately after container creation can otherwise fail because the container is not yet fully - * provisioned in that region - observed as a 401 MAC-signature error from the secondary region's backend - * replica. Kept in this customer-scenario base (not TestSuiteBase) so single-region tests are not delayed. - */ - protected static void awaitSharedContainerPropagation() { - try { - Thread.sleep(Duration.ofSeconds(5).toMillis()); - } catch (InterruptedException interrupted) { - Thread.currentThread().interrupt(); - throw new AssertionError("Interrupted while waiting for shared container propagation.", interrupted); - } - } - protected final void closeClient() { cleanupRegisteredItems(); safeClose(this.client); @@ -138,7 +122,7 @@ protected final void initializeSharedSingleWriteMultiRegionContainer(String scen .contentResponseOnWriteEnabled(true) .buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); - awaitSharedContainerPropagation(); + waitForCollectionToBeAvailableToRead(this.container); } finally { safeClose(discoveryClient); } From f530460c544f4af718a4ca9d02266268293f3e01 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 22 Jun 2026 09:18:25 +0000 Subject: [PATCH 14/14] Fixes test failures --- ...ContainerCreateDeleteWithSameNameTest.java | 16 ++++++--- ...StreamingOrderByQueryVectorSearchTest.java | 2 +- .../cosmos/rx/OrderbyDocumentQueryTest.java | 2 +- .../azure/cosmos/rx/QueryValidationTests.java | 8 ++--- .../cosmos/rx/ReadFeedCollectionsTest.java | 2 +- .../com/azure/cosmos/rx/TestSuiteBase.java | 35 ++++++++++++++----- .../customer/CustomerWorkflowTestBase.java | 4 +-- 7 files changed, 47 insertions(+), 22 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ContainerCreateDeleteWithSameNameTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ContainerCreateDeleteWithSameNameTest.java index a6423d69bb03..e1013fe00a06 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ContainerCreateDeleteWithSameNameTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ContainerCreateDeleteWithSameNameTest.java @@ -889,6 +889,9 @@ private void createDeleteContainerWithSameName( String pkPathAfterRecreate, Function getPkAfterRecreate) throws InterruptedException { CosmosAsyncContainer container = null; + // A throwaway client runs the post-create readiness probe so it does not warm this test's main client + // collection cache - the test relies on that cache being stale after the container is recreated. + CosmosAsyncClient probeClient = getClientBuilder().buildAsyncClient(); try { // step1: create container String testContainerId = UUID.randomUUID().toString(); @@ -899,7 +902,7 @@ private void createDeleteContainerWithSameName( partitionKeyDef.setPaths(paths); CosmosContainerProperties containerProperties = getCollectionDefinition(testContainerId, partitionKeyDef); - container = createCollection(this.createdDatabase, containerProperties, new CosmosContainerRequestOptions(), ruBeforeDelete); + container = createCollection(this.createdDatabase, containerProperties, new CosmosContainerRequestOptions(), ruBeforeDelete, probeClient); // Step2: execute func validateFunc.accept(container, getPkBeforeDelete, false); @@ -912,13 +915,14 @@ private void createDeleteContainerWithSameName( partitionKeyDef.setPaths(Arrays.asList(pkPathAfterRecreate)); containerProperties = getCollectionDefinition(testContainerId, partitionKeyDef); - container = createCollection(this.createdDatabase, containerProperties, new CosmosContainerRequestOptions(), ruAfterRecreate); + container = createCollection(this.createdDatabase, containerProperties, new CosmosContainerRequestOptions(), ruAfterRecreate, probeClient); // step5: same as step2. // This part will confirm the cache refreshed correctly validateFunc.accept(container, getPkAfterRecreate, true); } finally { safeDeleteCollection(container); + safeClose(probeClient); } } @@ -930,6 +934,9 @@ private void changeFeedCreateDeleteContainerWithSameName( String pkPathAfterRecreate) throws InterruptedException { CosmosAsyncContainer feedContainer = null; CosmosAsyncContainer leaseContainer = null; + // A throwaway client runs the post-create readiness probe so it does not warm this test's main client + // collection cache - the test relies on that cache being stale after the feed container is recreated. + CosmosAsyncClient probeClient = getClientBuilder().buildAsyncClient(); try { // step1: create feed container and lease container @@ -937,7 +944,7 @@ private void changeFeedCreateDeleteContainerWithSameName( PartitionKeyDefinition partitionKeyDefinition = new PartitionKeyDefinition(); partitionKeyDefinition.setPaths(Arrays.asList(pkPathBeforeDelete)); CosmosContainerProperties feedContainerProperties = getCollectionDefinition(feedContainerId, partitionKeyDefinition); - feedContainer = createCollection(this.createdDatabase, feedContainerProperties, new CosmosContainerRequestOptions(), ruBeforeDelete); + feedContainer = createCollection(this.createdDatabase, feedContainerProperties, new CosmosContainerRequestOptions(), ruBeforeDelete, probeClient); String leaseContainerId = UUID.randomUUID().toString(); CosmosContainerProperties leaseContainerProperties = getCollectionDefinition(leaseContainerId); @@ -954,7 +961,7 @@ private void changeFeedCreateDeleteContainerWithSameName( // step 4: recreate the feed container with same id as step 1 partitionKeyDefinition.setPaths(Arrays.asList(pkPathAfterRecreate)); feedContainerProperties = getCollectionDefinition(feedContainerId, partitionKeyDefinition); - feedContainer = createCollection(this.createdDatabase, feedContainerProperties, new CosmosContainerRequestOptions(), ruAfterRecreate); + feedContainer = createCollection(this.createdDatabase, feedContainerProperties, new CosmosContainerRequestOptions(), ruAfterRecreate, probeClient); // step5: recreate the lease container and lease container with same ids as step1 leaseContainer = createLeaseContainer(leaseContainerProperties.getId()); @@ -965,6 +972,7 @@ private void changeFeedCreateDeleteContainerWithSameName( } finally { safeDeleteCollection(feedContainer); safeDeleteCollection(leaseContainer); + safeClose(probeClient); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java index 7629bbb15ae7..bcfc6cb66bf4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java @@ -102,7 +102,7 @@ public void before_NonStreamingOrderByQueryVectorSearchTest() { database.createContainer(containerProperties).block(); largeDataContainer = database.getContainer(largeDataContainerId); - waitForCollectionToBeAvailableToRead(largeDataContainer); + waitForCollectionToBeAvailableToRead(largeDataContainer, /* probeClient */ null); for (Document doc : getVectorDocs()) { flatIndexContainer.createItem(doc).block(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java index ffe81695c2f3..1bf138fc529e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java @@ -688,7 +688,7 @@ public void before_OrderbyDocumentQueryTest() throws Exception { })) .block(); roundTripsContainer = createdDatabase.getContainer(containerName); - waitForCollectionToBeAvailableToRead(roundTripsContainer); + waitForCollectionToBeAvailableToRead(roundTripsContainer, /* probeClient */ null); setupRoundTripContainer(); List> keyValuePropsList = new ArrayList<>(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java index ec2edda48f87..8658d3c4a265 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java @@ -126,7 +126,7 @@ public void orderByQueryForLargeCollection() { ).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(container, /* probeClient */ null); int partitionDocCount = 5; int pageSize = partitionDocCount + 1; @@ -382,7 +382,7 @@ public void splitQueryContinuationToken() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/mypk"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(container, /* probeClient */ null); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); //Insert some documents @@ -494,7 +494,7 @@ public void orderbyContinuationOnUndefinedAndNull() throws Exception { createdDatabase.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(container, /* probeClient */ null); CosmosContainerResponse containerResponse = container.read().block(); assert (containerResponse != null); CosmosContainerProperties properties = containerResponse.getProperties(); @@ -582,7 +582,7 @@ public void queryLargePartitionKeyOn100BPKCollection() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/id"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(container, /* probeClient */ null); //id as partitionkey > 100bytes String itemID1 = "cosmosdb" + "-drWarm4Z60GkknMfHLo5BwuiH7w6AffzSb9jKbvwAQwaRZd10oxnLeCueuyZ5gbm9dwVVAqJLdzrB38Dk73Q6xMErv-0"; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java index 6a2cb96d3b9a..d0f9c7dee219 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java @@ -88,7 +88,7 @@ public CosmosAsyncContainer createCollections(CosmosAsyncDatabase database) { partitionKeyDef.setPaths(paths); CosmosContainerProperties containerProperties = new CosmosContainerProperties(UUID.randomUUID().toString(), partitionKeyDef); database.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); - waitForCollectionToBeAvailableToRead(database.getContainer(containerProperties.getId())); + waitForCollectionToBeAvailableToRead(database.getContainer(containerProperties.getId()), /* probeClient */ null); return database.getContainer(containerProperties.getId()); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index e4e45eb6b715..a13bfe770730 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -552,6 +552,18 @@ protected static void waitIfNeededForReplicasToCatchUp(CosmosClientBuilder clien public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database, CosmosContainerProperties cosmosContainerProperties, CosmosContainerRequestOptions options, int throughput) { + return createCollection(database, cosmosContainerProperties, options, throughput, /* probeClient */ null); + } + + /** + * Overload of {@link #createCollection(CosmosAsyncDatabase, CosmosContainerProperties, CosmosContainerRequestOptions, int)} + * that runs the post-creation collection-readiness probe using {@code probeClient} instead of the caller's + * client. Tests that depend on the caller's collection cache remaining stale after a recreate (for example + * {@code ContainerCreateDeleteWithSameNameTest}) pass a throwaway client here so the probe does not refresh + * their main client's cache. When {@code probeClient} is null the caller's client is used. + */ + public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database, CosmosContainerProperties cosmosContainerProperties, + CosmosContainerRequestOptions options, int throughput, CosmosAsyncClient probeClient) { database.createContainer(cosmosContainerProperties, ThroughputProperties.createManualThroughput(throughput), options) .retryWhen(Retry.fixedDelay(3, Duration.ofSeconds(5)) .filter(TestSuiteBase::isTransientCreateFailure)) @@ -571,24 +583,29 @@ public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database .getCosmosAsyncClientAccessor() .getPreferredRegions(client).size() > 1; if (throughput > 6000 || isMultiRegional) { - waitForCollectionToBeAvailableToRead(database.getContainer(cosmosContainerProperties.getId())); + waitForCollectionToBeAvailableToRead(database.getContainer(cosmosContainerProperties.getId()), probeClient); } return database.getContainer(cosmosContainerProperties.getId()); } - protected static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer container) { + protected static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer container, CosmosAsyncClient probeClient) { // Creating a container is asynchronous - especially on multi-region accounts the new collection can // take time to become readable in the non-write regions. Until then, reads routed to those regions fail // with 404/1013 ("Collection is not yet available for read"). Instead of a fixed sleep, verify - against // every non-primary region of the account - that the collection is readable, probing each region (by // excluding all other regions) with exponential back-off until it succeeds, bounded to two minutes total. - CosmosAsyncClient client = ImplementationBridgeHelpers - .CosmosAsyncDatabaseHelper - .getCosmosAsyncDatabaseAccessor() - .getCosmosAsyncClient(container.getDatabase()); - + // The probe is issued through probeClient when provided (so a throwaway client does not warm the caller's + // caches); otherwise the container's own client is used. + CosmosAsyncClient client = probeClient != null + ? probeClient + : ImplementationBridgeHelpers + .CosmosAsyncDatabaseHelper + .getCosmosAsyncDatabaseAccessor() + .getCosmosAsyncClient(container.getDatabase()); DatabaseAccount databaseAccount = getLatestDatabaseAccount(client); + CosmosAsyncContainer probeContainer = + client.getDatabase(container.getDatabase().getId()).getContainer(container.getId()); // Use the account's regions (not the client's preferred regions, which may be a subset). List allRegions = new ArrayList<>(); @@ -615,7 +632,7 @@ protected static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer if (nonPrimaryRegions.isEmpty()) { // Single-region account: there is no non-primary region to verify, but the collection still needs // to be readable (for example while physical partitions are provisioned). - awaitContainerReadableInRegion(container, null, Collections.emptyList(), deadlineNanos, maxWait); + awaitContainerReadableInRegion(probeContainer, null, Collections.emptyList(), deadlineNanos, maxWait); return; } @@ -626,7 +643,7 @@ protected static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer .stream() .filter(other -> !other.equalsIgnoreCase(target)) .collect(Collectors.toList()); - awaitContainerReadableInRegion(container, region, excludedRegions, deadlineNanos, maxWait); + awaitContainerReadableInRegion(probeContainer, region, excludedRegions, deadlineNanos, maxWait); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java index 77f2f40c1f08..e8d57ca887e2 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/workflows/customer/CustomerWorkflowTestBase.java @@ -88,7 +88,7 @@ protected final void initializeSharedSinglePartitionContainer(String scenarioNam this.client = clientBuilder.buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); - waitForCollectionToBeAvailableToRead(this.container); + waitForCollectionToBeAvailableToRead(this.container, /* probeClient */ null); } finally { safeClose(discoveryClient); } @@ -122,7 +122,7 @@ protected final void initializeSharedSingleWriteMultiRegionContainer(String scen .contentResponseOnWriteEnabled(true) .buildAsyncClient(); this.container = getSharedSinglePartitionCosmosContainer(this.client); - waitForCollectionToBeAvailableToRead(this.container); + waitForCollectionToBeAvailableToRead(this.container, /* probeClient */ null); } finally { safeClose(discoveryClient); }