Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
106 commits
Select commit Hold shift + click to select a range
93f03a3
Introducing Gateway V2.0 dedicated HTTP timeout policy.
jeet1995 Feb 2, 2026
35d94f3
Introducing Gateway V2.0 dedicated HTTP timeout policy.
jeet1995 Feb 2, 2026
3c126cb
Introducing Gateway V2.0 dedicated HTTP timeout policy.
jeet1995 Feb 2, 2026
7adaacb
Introducing Gateway V2.0 dedicated HTTP timeout policy.
jeet1995 Feb 5, 2026
512d06a
Merge branch 'Azure:main' into AzCosmos_HttpTimeoutPolicyChangesGatew…
jeet1995 Feb 9, 2026
94d4cb2
Clean up.
jeet1995 Feb 9, 2026
c38f7ae
Clean up.
jeet1995 Feb 9, 2026
ab70db3
Clean up.
jeet1995 Feb 9, 2026
610ad49
FixFewTests-part2 (#47933)
xinlian12 Feb 9, 2026
c93869b
Remove test-jar dependency with copied code (#47917)
alzimmermsft Feb 9, 2026
deaa2b3
[VoiceLive]Add Foundry Agent integration, filler responses, and reaso…
xitzhang Feb 9, 2026
57bd4f8
Remove mssql-jdbc dependency and update assertj-core (#47945)
alzimmermsft Feb 9, 2026
2781d4f
[VoiceLive]Release 1.0.0-beta.4 (#47946)
xitzhang Feb 9, 2026
d2de5eb
[AutoPR azure-resourcemanager-nginx]-generated-from-SDK Generation - …
azure-sdk Feb 10, 2026
fc2da2f
Update wrong data type (#47937)
sandeepdhamija Feb 10, 2026
9d1c4cf
Deprecating azure-resourcemanager-mixedreality (#47943)
MichaelZp0 Feb 10, 2026
4ae43d4
[Automation] Generate SDK based on TypeSpec 0.39.1 (#47953)
azure-sdk Feb 10, 2026
486f7d2
Increment package versions for mixedreality releases (#47955)
azure-sdk Feb 10, 2026
96edfbb
fixFewTests - Part3 (#47939)
xinlian12 Feb 10, 2026
1999049
Re-enable spring-cloud-azure-starter-monitor for Spring Boot 4 (#47951)
Copilot Feb 10, 2026
3294b99
Clean up.
jeet1995 Feb 10, 2026
7238e33
Clean up.
jeet1995 Feb 10, 2026
d81307b
Migrate azure-search-documents to TypeSpec (#47819)
alzimmermsft Feb 10, 2026
04d1389
Increment package versions for nginx releases (#47965)
azure-sdk Feb 10, 2026
5fb1ad6
Increment package versions for ai releases (#47947)
azure-sdk Feb 10, 2026
e12c45b
escapeNonAscIIPkValueForQueryPlanAndQuery (#47881)
xinlian12 Feb 10, 2026
2ef4708
Deprecate AgriFood FarmBeats SDK and code cleanup (#47935)
samvaity Feb 10, 2026
d472305
Search TypeSpec migration - remove last few BinaryData APIs from publ…
alzimmermsft Feb 10, 2026
2b6c73d
Fix GraalVM native image compatibility for AzureIdentityEnvVars (#47940)
g2vinay Feb 10, 2026
4e7408b
Release azure-cosmos 4.78.0, azure-cosmos-encryption 2.27.0, and Spar…
xinlian12 Feb 11, 2026
2e9ea28
Fix: Include stack trace in token error logs (#47974)
g2vinay Feb 11, 2026
49294c0
[AutoPR azure-resourcemanager-resources-deploymentstacks]-generated-f…
azure-sdk Feb 11, 2026
905a8d6
Set default values for head_sha and repo_url in generate_typespec_pro…
Copilot Feb 11, 2026
7d1046e
Increment package versions for cosmos releases (#47983)
azure-sdk Feb 11, 2026
45ef854
Nregion synchronous commit feature (#47757)
mbhaskar Feb 12, 2026
a1dffe9
Add ConnectionDetails support for EventHubs (#47926)
Copilot Feb 12, 2026
88e14b0
Increment package versions for edgeactions releases (#47991)
azure-sdk Feb 12, 2026
753f744
mgmt network, update api-version to 2025-05-01 (#47831)
v-huizhu2 Feb 12, 2026
f3d135f
[AutoPR azure-resourcemanager-computebulkactions]-generated-from-SDK …
azure-sdk Feb 12, 2026
047ff77
Increment package versions for disconnectedoperations releases (#47992)
azure-sdk Feb 12, 2026
4dd4c40
Increment package versions for network releases (#47994)
azure-sdk Feb 12, 2026
a4bc25d
Deprecating azure-mixedreality-authentication (#47942)
MichaelZp0 Feb 12, 2026
276cee5
Remove all MixedReality SDKs (#47885)
MichaelZp0 Feb 12, 2026
a4cfbdd
Remove Operational Insights from CODEOWNERS (#47989)
ronniegeraghty Feb 12, 2026
54cd75d
Update service owners to AzureSdkOwners in CODEOWNERS (#47988)
ronniegeraghty Feb 12, 2026
70396e5
Fix java - spring - tests by adding Thread.sleep (#47990)
rujche Feb 12, 2026
0aeccb8
Change PRLabel from %Azure Quantum to %Quantum (#47948)
ronniegeraghty Feb 12, 2026
7f09460
Remove commented Device Provisioning Service owners (#47949)
ronniegeraghty Feb 12, 2026
55920c3
Add checkstyle rule to validate serialization method completeness (#4…
Copilot Feb 12, 2026
b4a83ce
Fix pipeline failure about linting-extensions (#48005)
rujche Feb 13, 2026
52002b1
Only publish docs.ms and github.io docs if publishing to Maven (#47997)
danieljurek Feb 13, 2026
dd32b1b
avoidExtraQuery (#47996)
xinlian12 Feb 13, 2026
17fbc78
Sync eng/common directory with azure-sdk-tools for PR 13968 (#48004)
azure-sdk Feb 13, 2026
d8550cc
Configurations: 'specification/codesigning/CodeSigning.Management/ts…
azure-sdk Feb 14, 2026
682229f
[Automation] Generate SDK based on TypeSpec 0.39.2 (#48006)
azure-sdk Feb 14, 2026
4e3c313
[VoiceLive] Update for agent V2, remove foundry tools, rename filler …
xitzhang Feb 14, 2026
81b998a
[VoiceLive] Release 1.0.0-beta.5 (#48013)
xitzhang Feb 14, 2026
416e7b5
Ignore implementation packages when generating docs (#47998)
srnagar Feb 14, 2026
7c4e3cb
Increment package versions for artifactsigning releases (#48017)
azure-sdk Feb 16, 2026
7ecbfd6
[AutoPR azure-resourcemanager-managedops]-generated-from-SDK Generati…
azure-sdk Feb 16, 2026
edcf070
[Kafka connector]AddSupportForThroughputBucket (#48009)
xinlian12 Feb 17, 2026
289c832
mgmt, trustedsigning, update to next preview (#48016)
weidongxu-microsoft Feb 17, 2026
f2e6b14
- Adding nregion feature to changelog (#47987)
mbhaskar Feb 17, 2026
9b1a477
Increment package versions for resources releases (#48020)
azure-sdk Feb 17, 2026
7c3932c
Increment package versions for managedops releases (#48027)
azure-sdk Feb 18, 2026
67073ae
Remove unused UnitSpec from fabric-cosmos-spark-auth_3 (#48010)
Copilot Feb 18, 2026
229de3f
update release date (#48029)
ryazhang-microsoft Feb 18, 2026
5b3d14c
Replace `azd config list` with `azd auth status` in TROUBLESHOOTING.m…
scottaddie Feb 18, 2026
31e1a1b
Bug 47910.count query text block (#47911)
Blackbaud-JasonBodnar Feb 18, 2026
94bc255
Add tests for LAZY indexing mode in Cosmos Java SDK (#48024)
Copilot Feb 18, 2026
c98c130
Storage - STG101 Beta Features (#48019)
ibrandes Feb 18, 2026
9f473ec
Increment package versions for ai releases (#48035)
azure-sdk Feb 18, 2026
85a90f3
Adding tests to associate channel lifecycle with Netty's ReadTimeoutE…
jeet1995 Feb 18, 2026
f21a5ab
Update changelog and README files for multiple Azure Storage SDK comp…
ibrandes Feb 18, 2026
bd4365b
Replace ThreadLocal Collator with instance Collator (#48037)
alzimmermsft Feb 19, 2026
85bd7a0
Update azd section of Identity troubleshooting guide (#48038)
scottaddie Feb 19, 2026
dce90cc
Open Storage - STG101 Beta Release Date Bump (#48045)
ibrandes Feb 19, 2026
beb5bb9
[SparkConnector]updateTransactionalBulkConfig (#48008)
xinlian12 Feb 19, 2026
38064d5
[SparkConnector]IncludeOperationStatusCodeHistoryInStaleProgressLogs …
xinlian12 Feb 19, 2026
9c8603b
Update to use JDK's deafult trust CA store for cert validations (#48046)
samvaity Feb 19, 2026
2f66b3a
Fix Netty ByteBuf leak in RxGatewayStoreModel via doFinally safety ne…
kushagraThapar Feb 20, 2026
00621cb
Adding tests to associate channel lifecycle with Netty's ReadTimeoutE…
jeet1995 Feb 20, 2026
e4bcbe1
Add Azure Artifacts Feed Setup section to CONTRIBUTING.md (#48032)
raych1 Feb 20, 2026
c8c2ec2
Use CFS as the package resolution source (#47901)
raych1 Feb 20, 2026
b44d84a
Fix: PublishDevFeedPackage runs in parallel with VerifyReleaseVersion…
Copilot Feb 20, 2026
dcf8ecd
Add e2ePolicyCfg to GatewayStatistics for timeout policy diagnostics
jeet1995 Feb 21, 2026
fe100e1
Remove implementation/TestSuiteBase.java and consolidate to rx/TestSu…
Copilot Feb 21, 2026
4548099
Part 1: Add multi-parent-channel and retry-parentChannelId tests
jeet1995 Feb 22, 2026
152d405
Part 1: Fix retryUsesConsistentParentChannelId + add evidence MD
jeet1995 Feb 22, 2026
706edd6
OpenSpec: Rectify Part 2 spec — 1s GW V2 connect/acquire timeout bifu…
jeet1995 Feb 22, 2026
1e0cc12
Part 1: ALL 7/7 PASSED — relaxed tc netem assertions for kernel TCP RST
jeet1995 Feb 22, 2026
80460bf
Generating `azure-ai-projects` from latest spec (#47875)
jpalvarezl Feb 23, 2026
2dcce8b
Clean up
jeet1995 Feb 23, 2026
400894e
Merge branch 'main' of https://github.com/jeet1995/azure-sdk-for-java…
jeet1995 Feb 23, 2026
4c2aca9
Merge branch 'Azure:main' into AzCosmos_HttpTimeoutPolicyChangesGatew…
jeet1995 Feb 23, 2026
ceb99ba
Clean up.
jeet1995 Feb 23, 2026
c582a85
Adding tests with manual packet delay tests.
jeet1995 Feb 23, 2026
2d5228f
Addressing comments.
jeet1995 Feb 26, 2026
49bfddb
Clean up.
jeet1995 Feb 27, 2026
467b8b8
Merge branch 'Azure:main' into AzCosmos_HttpTimeoutPolicyChangesGatew…
jeet1995 Feb 27, 2026
cc8c0bd
Clean up.
jeet1995 Feb 27, 2026
6460378
Merge branch 'AzCosmos_HttpTimeoutPolicyChangesGatewayV2' of https://…
jeet1995 Feb 27, 2026
af4b085
Merge branch 'Azure:main' into AzCosmos_HttpTimeoutPolicyChangesGatew…
jeet1995 Feb 28, 2026
794fbbf
Clean up.
jeet1995 Mar 3, 2026
618d088
Merge branch 'AzCosmos_HttpTimeoutPolicyChangesGatewayV2' of https://…
jeet1995 Mar 3, 2026
32dd112
Merge branch 'main' of https://github.com/jeet1995/azure-sdk-for-java…
jeet1995 Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions sdk/cosmos/azure-cosmos-tests/NETWORK_DELAY_TESTING_README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Http2ConnectionLifecycleTests — Network Delay Testing

## What This Tests

`Http2ConnectionLifecycleTests` validates that HTTP/2 parent TCP connections (NioSocketChannel) survive
stream-level ReadTimeoutExceptions triggered by real network delay. Uses Linux `tc netem` to inject
kernel-level packet delay inside a Docker container.

**Key invariant proven:** A real netty `ReadTimeoutException` on an `Http2StreamChannel` does NOT close
the parent `NioSocketChannel` — the connection pool reuses it for subsequent requests.

## Why Not SDK Fault Injection?

SDK `RESPONSE_DELAY` adds a `Mono.delay()` at the HTTP layer — bytes still flow normally on the wire.
Netty's `ReadTimeoutHandler` never fires because it monitors actual socket I/O, not application-layer delays.
Only `tc netem` creates real kernel-level packet delay that triggers the handler.

## Prerequisites

- Docker Desktop with Linux containers
- Docker memory: **8 GB+**
- A Cosmos DB account with thin client enabled
- Credentials in `sdk/cosmos/cosmos-v4.properties`:
```properties
ACCOUNT_HOST=https://<account>.documents.azure.com:443/
ACCOUNT_KEY=<primary-key>
```

## Build

```bash
cd sdk/cosmos

# Build SDK
mvn clean install -pl azure-cosmos,azure-cosmos-test,azure-cosmos-tests -am \
-DskipTests -Dgpg.skip -Dcheckstyle.skip -Dspotbugs.skip \
-Drevapi.skip -Dmaven.javadoc.skip -Denforcer.skip -Djacoco.skip

# Build Docker image
docker build -t cosmos-netem-test -f azure-cosmos-tests/Dockerfile.netem .

# Generate Linux classpath
mvn dependency:build-classpath -f azure-cosmos-tests/pom.xml -DincludeScope=test
# Convert Windows paths → Linux paths, save to azure-cosmos-tests/target/cp-linux.txt
```

## Run

```bash
cd sdk/cosmos

ACCOUNT_HOST=$(grep "^ACCOUNT_HOST" cosmos-v4.properties | cut -d: -f2- | tr -d ' ')
ACCOUNT_KEY=$(grep "^ACCOUNT_KEY" cosmos-v4.properties | cut -d: -f2- | tr -d ' ')

docker run --rm --cap-add=NET_ADMIN --memory 8g \
-v "$(pwd):/workspace" \
-v "$HOME/.m2:/root/.m2" \
-e "ACCOUNT_HOST=$ACCOUNT_HOST" \
-e "ACCOUNT_KEY=$ACCOUNT_KEY" \
cosmos-netem-test bash -c '
cd /workspace &&
CP=$(cat azure-cosmos-tests/target/cp-linux.txt) &&
java --add-opens java.base/java.lang=ALL-UNNAMED \
--add-opens java.base/java.util=ALL-UNNAMED \
--add-opens java.base/java.net=ALL-UNNAMED \
--add-opens java.base/java.io=ALL-UNNAMED \
--add-opens java.base/java.nio=ALL-UNNAMED \
--add-opens java.base/java.util.concurrent=ALL-UNNAMED \
--add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED \
--add-opens java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens java.base/sun.nio.cs=ALL-UNNAMED \
--add-opens java.base/sun.security.action=ALL-UNNAMED \
--add-opens java.base/sun.util.calendar=ALL-UNNAMED \
-cp "$CP" \
-DACCOUNT_HOST=$ACCOUNT_HOST \
-DACCOUNT_KEY=$ACCOUNT_KEY \
-DCOSMOS.THINCLIENT_ENABLED=true \
-DCOSMOS.HTTP2_ENABLED=true \
org.testng.TestNG /workspace/azure-cosmos-tests/src/test/resources/manual-thinclient-network-delay-testng.xml \
-verbose 2
'
```

## tc netem Commands Used

### Add Global Delay

```bash
tc qdisc add dev eth0 root netem delay 8000ms
```

Delays ALL outbound packets by 8 seconds. This includes TCP SYN, data, ACKs.
The delay causes Netty's `ReadTimeoutHandler` to fire because the server's response
ACKs are delayed, stalling TCP flow from the application's perspective.

### Remove Delay

```bash
tc qdisc del dev eth0 root netem
```

Restores normal networking. Called in `@AfterMethod` and `@AfterClass` as safety net.

## Tests

| Test | What It Proves |
|------|---------------|
| `connectionReuseAfterRealNettyTimeout` | Parent NioSocketChannel survives ReadTimeoutException; recovery read uses same `parentChannelId` |
| `multiParentChannelConnectionReuse` | Under concurrent load (>30 streams), multiple parent channels are created and ALL survive timeout |
| `retryUsesConsistentParentChannelId` | Retry attempts (6s→6s→10s) use consistent parent channel(s); pool recovers post-delay |
| `connectionSurvivesE2ETimeoutWithRealDelay` | Parent survives when e2e timeout (7s) AND ReadTimeoutHandler both fire |
| `parentChannelSurvivesE2ECancelWithoutReadTimeout` | Parent survives when e2e cancel (3s) fires BEFORE ReadTimeoutHandler (6s) — stream RST only |

## Important Notes

- Tests run **sequentially** (`parallel="false" thread-count="1"`) — tc netem is interface-global
- `--cap-add=NET_ADMIN` is required for `tc` commands (Linux `CAP_NET_ADMIN` capability)
- Each test creates/closes its own client (`@BeforeMethod`/`@AfterMethod`) for connection pool isolation
- Delay cleanup runs in `finally` blocks AND `@AfterMethod` for reliability
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import com.azure.cosmos.CosmosDiagnosticsContext;
import com.azure.cosmos.CosmosDiagnosticsRequestInfo;
import com.azure.cosmos.CosmosException;
import com.azure.cosmos.DirectConnectionConfig;
import com.azure.cosmos.FlakyTestRetryAnalyzer;
import com.azure.cosmos.TestObject;
import com.azure.cosmos.implementation.AsyncDocumentClient;
Expand Down Expand Up @@ -147,6 +146,16 @@ public static Object[] preferredRegionsConfigProvider() {
return new Object[] {true, false};
}

@DataProvider(name = "responseDelayOperationTypeProvider")
public static Object[][] responseDelayOperationTypeProvider() {
return new Object[][]{
// operationType, faultInjectionOperationType
{ OperationType.Read, FaultInjectionOperationType.READ_ITEM },
{ OperationType.Query, FaultInjectionOperationType.QUERY_ITEM },
{ OperationType.ReadFeed, FaultInjectionOperationType.READ_FEED_ITEM }
};
}

@Test(groups = {"fi-thinclient-multi-master"}, dataProvider = "faultInjectionServerErrorResponseProvider", timeOut = TIMEOUT)
public void faultInjectionServerErrorRuleTests_ServerErrorResponse(
FaultInjectionServerErrorType serverErrorType,
Expand Down Expand Up @@ -481,52 +490,65 @@ public void faultInjectionServerErrorRuleTests_Partition() throws JsonProcessing

}

@Test(groups = {"fi-thinclient-multi-master"}, timeOut = 4 * TIMEOUT)
public void faultInjectionServerErrorRuleTests_ServerResponseDelay() throws JsonProcessingException {
@Test(groups = {"fi-thinclient-multi-master"}, dataProvider = "responseDelayOperationTypeProvider", timeOut = 4 * TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
public void faultInjectionServerErrorRuleTests_ServerResponseDelay(
OperationType operationType,
FaultInjectionOperationType faultInjectionOperationType) throws JsonProcessingException {

// define another rule which can simulate timeout
String timeoutRuleId = "serverErrorRule-responseDelay-" + UUID.randomUUID();
FaultInjectionRule timeoutRule =
new FaultInjectionRuleBuilder(timeoutRuleId)
.condition(
new FaultInjectionConditionBuilder()
.connectionType(FaultInjectionConnectionType.GATEWAY)
.operationType(FaultInjectionOperationType.READ_ITEM)
.operationType(faultInjectionOperationType)
.build()
)
.result(
FaultInjectionResultBuilders
.getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY)
.times(1)
.delay(Duration.ofSeconds(61)) // the default time out is 60s
.delay(Duration.ofSeconds(61)) // the default time out is 60s, but Gateway V2 uses 6s
.build()
)
.duration(Duration.ofMinutes(5))
.build();
try {
DirectConnectionConfig directConnectionConfig = DirectConnectionConfig.getDefaultConfig();
directConnectionConfig.setConnectTimeout(Duration.ofSeconds(1));

// create a new item to be used by read operations
TestItem createdItem = TestItem.createNewItem();
// create a new item to be used by operations
TestObject createdItem = TestObject.create();
this.cosmosAsyncContainer.createItem(createdItem).block();

CosmosFaultInjectionHelper.configureFaultInjectionRules(this.cosmosAsyncContainer, Arrays.asList(timeoutRule)).block();
CosmosItemResponse<TestItem> itemResponse =
this.cosmosAsyncContainer.readItem(createdItem.getId(), new PartitionKey(createdItem.getId()), TestItem.class).block();

// With HttpTimeoutPolicyForGatewayV2, the first attempt times out at 6s,
// but since delay is only injected once (times=1), the retry succeeds
CosmosDiagnostics cosmosDiagnostics = this.performDocumentOperation(
this.cosmosAsyncContainer,
operationType,
createdItem,
false);

AssertionsForClassTypes.assertThat(timeoutRule.getHitCount()).isEqualTo(1);
this.validateHitCount(timeoutRule, 1, OperationType.Read, ResourceType.Document);
this.validateHitCount(timeoutRule, 1, operationType, ResourceType.Document);

this.validateFaultInjectionRuleApplied(
itemResponse.getDiagnostics(),
OperationType.Read,
cosmosDiagnostics,
operationType,
HttpConstants.StatusCodes.REQUEST_TIMEOUT,
HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_READ_TIMEOUT,
timeoutRuleId,
true
);

assertThinClientEndpointUsed(itemResponse.getDiagnostics());
assertThinClientEndpointUsed(cosmosDiagnostics);

// Validate end-to-end latency and final status code from CosmosDiagnosticsContext
CosmosDiagnosticsContext diagnosticsContext = cosmosDiagnostics.getDiagnosticsContext();
AssertionsForClassTypes.assertThat(diagnosticsContext).isNotNull();
AssertionsForClassTypes.assertThat(diagnosticsContext.getDuration()).isNotNull();
AssertionsForClassTypes.assertThat(diagnosticsContext.getDuration()).isLessThan(Duration.ofSeconds(8));
AssertionsForClassTypes.assertThat(diagnosticsContext.getStatusCode()).isBetween(HttpConstants.StatusCodes.OK, HttpConstants.StatusCodes.NOT_MODIFIED);

} finally {
timeoutRule.disable();
Expand Down
Loading
Loading