diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6e71ca6ef..f1592400e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -6,6 +6,7 @@
# Triggers:
# - pull_request: when a PR targets main
# - push: when code is pushed to main
+# - milestone: when milestone metadata changes
#
# Notes:
# Builds against Java 17, 21, and 25.
@@ -15,6 +16,8 @@ run-name: Build - ${{ github.event_name }}
on:
workflow_dispatch:
+ milestone:
+ types: [created, edited, closed, deleted]
pull_request:
branches:
- main
@@ -38,6 +41,7 @@ on:
permissions:
contents: read
+ issues: read
pages: write
id-token: write
@@ -54,6 +58,8 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v7
+ with:
+ ref: ${{ github.event_name == 'milestone' && github.event.repository.default_branch || github.ref }}
- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v5
@@ -76,6 +82,27 @@ jobs:
if: ${{ matrix.java == 17 }}
run: mvn -B -pl sdk -am javadoc:javadoc
+ - name: Generate milestone badge
+ if: ${{ matrix.java == 17 }}
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ set -euo pipefail
+
+ TITLE=$(gh api repos/${{ github.repository }}/milestones \
+ --jq '[.[] | select(.state == "open" and .due_on != null)] | sort_by(.due_on) | .[0].title // empty')
+
+ if [ -z "$TITLE" ]; then
+ URL="https://img.shields.io/badge/Next%20Milestone-No%20milestone-lightgrey"
+ else
+ ENCODED=$(echo -n "$TITLE" | jq -sRr @uri | sed 's/-/--/g')
+ URL="https://img.shields.io/badge/Next%20Milestone-${ENCODED}-blue"
+ fi
+
+ mkdir -p milestone
+ curl -sfL "$URL" -o milestone/badge.svg
+ echo "Badge generated for: ${TITLE:-No milestone}"
+
- name: Configure GitHub Pages
if: ${{ matrix.java == 17 }}
uses: actions/configure-pages@v5
@@ -83,9 +110,10 @@ jobs:
- name: Prepare GitHub Pages artifact
if: ${{ matrix.java == 17 }}
run: |
- mkdir -p github-pages/coverage github-pages/javadoc
+ mkdir -p github-pages/coverage github-pages/javadoc github-pages/milestone
cp -R coverage-report/target/site/jacoco-aggregate/. github-pages/coverage
cp -R sdk/target/reports/apidocs/. github-pages/javadoc
+ cp -R milestone/. github-pages/milestone
touch github-pages/.nojekyll
- name: Upload GitHub Pages artifact
@@ -95,7 +123,7 @@ jobs:
path: github-pages
deploy-pages:
- if: ${{ github.ref == 'refs/heads/main' }}
+ if: ${{ github.event_name == 'milestone' || github.ref == 'refs/heads/main' }}
needs: build
runs-on: ubuntu-latest
environment:
diff --git a/.github/workflows/update-milestone-badge.yml b/.github/workflows/update-milestone-badge.yml
deleted file mode 100644
index 07467a760..000000000
--- a/.github/workflows/update-milestone-badge.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-name: Update Milestone Badge
-
-on:
- milestone:
- types: [created, edited, closed, deleted]
- schedule:
- - cron: '0 6 * * *'
- workflow_dispatch:
-
-permissions:
- contents: write
-
-jobs:
- update-badge:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout docs branch
- uses: actions/checkout@v7
- with:
- ref: docs
- fetch-depth: 1
-
- - name: Fetch nearest milestone and generate badge
- env:
- GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- set -euo pipefail
-
- # Fetch open milestones with due dates, pick the nearest one
- TITLE=$(gh api repos/${{ github.repository }}/milestones \
- --jq '[.[] | select(.state == "open" and .due_on != null)] | sort_by(.due_on) | .[0].title // empty')
-
- if [ -z "$TITLE" ]; then
- URL="https://img.shields.io/badge/Next%20Milestone-No%20milestone-lightgrey"
- else
- # URL-encode the title using jq, then escape dashes for shields.io
- ENCODED=$(echo -n "$TITLE" | jq -sRr @uri | sed 's/-/--/g')
- URL="https://img.shields.io/badge/Next%20Milestone-${ENCODED}-blue"
- fi
-
- mkdir -p milestone
- curl -sfL "$URL" -o milestone/badge.svg
- echo "Badge generated for: ${TITLE:-No milestone}"
-
- - name: Commit and push
- run: |
- git config user.name "github-actions[bot]"
- git config user.email "github-actions[bot]@users.noreply.github.com"
- git add milestone/badge.svg
- git diff --cached --quiet && echo "No changes" && exit 0
- git commit -m "chore: update milestone badge"
- git push
diff --git a/README.md b/README.md
index f664b63a7..8fe6bbf25 100644
--- a/README.md
+++ b/README.md
@@ -5,11 +5,11 @@
[](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/build.yml)
[](https://aws.github.io/aws-durable-execution-sdk-java/javadoc/)
[](https://mvnrepository.com/artifact/software.amazon.lambda.durable/aws-durable-execution-sdk-java)
-[](https://aws.github.io/aws-durable-execution-sdk-java/coverage/)
+[](https://aws.github.io/aws-durable-execution-sdk-java/coverage/)
[](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/e2e-tests.yml)
[](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/github-code-scanning/codeql)
[](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/check-spotless.yml)
-[](https://github.com/orgs/aws/projects/346/views/6)
+[](https://github.com/orgs/aws/projects/346/views/6)
Build resilient, long-running AWS Lambda functions that automatically checkpoint progress and resume after failures. Durable functions can run for up to one year while you pay only for active compute time.
@@ -110,6 +110,7 @@ See [Deploy Lambda durable functions with Infrastructure as Code](https://docs.a
- [Configuration](docs/advanced/configuration.md) - Customize SDK behaviour
- [Error Handling](docs/advanced/error-handling.md) - SDK exceptions for handling failures
- [Logging](docs/advanced/logging.md) - How to use DurableLogger
+- [Migrating from 1.x to 2.x](docs/migration-1.x-to-2.x.md) - Upgrade guide for breaking changes since `v1.2.1`
- [Testing](docs/advanced/testing.md) - Utilities for local development and cloud-based integration testing
## Related SDKs
diff --git a/docs/migration-1.x-to-2.x.md b/docs/migration-1.x-to-2.x.md
new file mode 100644
index 000000000..52e1cfaeb
--- /dev/null
+++ b/docs/migration-1.x-to-2.x.md
@@ -0,0 +1,231 @@
+# Migrating from 1.x to 2.x
+
+This guide helps teams upgrade from the `1.x` line to `2.x`.
+
+It focuses on the breaking changes introduced since `v1.2.1`, the most recent `1.x` release at the time of writing. If you are already on a newer `1.x` patch, the same migration steps still apply.
+
+## Upgrade Checklist
+
+- Replace `StepConfig.builder().semantics(...)` with the correct `2.x` equivalent for your intended behavior.
+- Update log queries, parsers, and dashboards to use `executionArn`, `operationId`, and `operationName`.
+- Rebaseline replay-sensitive logging and plugin behavior for child contexts, especially in `parallel()`, `map()`, and nested `runInChildContext(...)` workflows.
+- Update any code that expected validation failures to throw `IllegalDurableOperationException`.
+- Verify that custom `SerDes` implementations can deserialize SDK-managed values immediately after serialization, or explicitly opt out of the extra validation pass.
+
+Useful searches before upgrading:
+
+```bash
+rg -n "\.semantics\(" .
+rg -n "durableExecutionArn|contextId|contextName" .
+rg -n "replay|isReplayingChildren|onOperationStart|onOperationEnd" sdk examples
+```
+
+## 1. Rename `StepConfig.semantics(...)` to `semanticsPerRetry(...)`
+
+The deprecated `semantics(...)` builder method is removed in `2.x`.
+
+This is not always a one-line rename. In `1.x`, `semantics(StepSemantics.AT_MOST_ONCE_PER_RETRY)` behaved like `2.x` `semanticsPerRetry(StepSemantics.AT_MOST_ONCE_PER_RETRY)` plus a `NO_RETRY` policy.
+
+Before:
+
+```java
+var config = StepConfig.builder()
+ .semantics(StepSemantics.AT_MOST_ONCE_PER_RETRY)
+ .build();
+```
+
+Naive rename:
+
+```java
+var config = StepConfig.builder()
+ .semanticsPerRetry(StepSemantics.AT_MOST_ONCE_PER_RETRY)
+ .build();
+```
+
+Behavior-preserving migration for old `1.x` `AT_MOST_ONCE_PER_RETRY` usage:
+
+```java
+var config = StepConfig.builder()
+ .semanticsPerRetry(StepSemantics.AT_MOST_ONCE_PER_RETRY)
+ .retryStrategy(RetryStrategies.Presets.NO_RETRY)
+ .build();
+```
+
+Migration rules:
+
+- Old `semantics(AT_LEAST_ONCE_PER_RETRY)` maps directly to `semanticsPerRetry(AT_LEAST_ONCE_PER_RETRY)`.
+- Old `semantics(AT_MOST_ONCE_PER_RETRY)` should usually become `semanticsPerRetry(AT_MOST_ONCE_PER_RETRY)` plus `retryStrategy(RetryStrategies.Presets.NO_RETRY)` if you want to preserve the old `1.x` behavior.
+- If you intentionally want the corrected `2.x` per-retry semantics, use `semanticsPerRetry(AT_MOST_ONCE_PER_RETRY)` without forcing `NO_RETRY`.
+
+What to update:
+
+- Step configuration builders
+- Shared helper methods and wrapper APIs
+- Tests that asserted on `config.semantics()`
+
+If you expose your own configuration layer on top of the SDK, rename it now so downstream users do not inherit the removed `semantics` name.
+
+## 2. Update logger MDC field names
+
+The main user-visible breaking change in `2.x` is the logger metadata rename so Java matches the other durable execution SDKs.
+
+Before:
+
+```json
+{
+ "durableExecutionArn": "arn:aws:lambda:...",
+ "contextId": "child-context-id",
+ "contextName": "inventory-check"
+}
+```
+
+After:
+
+```json
+{
+ "executionArn": "arn:aws:lambda:...",
+ "operationId": "child-context-id",
+ "operationName": "inventory-check"
+}
+```
+
+What to update:
+
+- CloudWatch Logs Insights queries
+- Metric filters and alarms
+- Log processors and index mappings
+- Dashboards and saved searches
+- Any custom JSON or MDC parsing
+
+Important: this rename only applies to logger MDC fields. The SDK API still uses `durableExecutionArn` in places such as `DurableExecutionInput` and plugin invocation records. Do not mechanically rename every `durableExecutionArn` identifier in your codebase.
+
+### Mixed-version rollout query
+
+If you need one query that works during a rolling upgrade, use `coalesce(...)`:
+
+```sql
+fields coalesce(executionArn, durableExecutionArn) as executionArn,
+ coalesce(operationId, contextId) as operationId,
+ coalesce(operationName, contextName) as operationName
+| filter executionArn = "arn:aws:lambda:..."
+```
+
+### Temporary compatibility option
+
+If you need to preserve the old MDC keys for a short rollout window, configure `LoggerConfig` with `oldKeyNames=true`:
+
+```java
+@Override
+protected DurableConfig createConfiguration() {
+ return DurableConfig.builder()
+ .withLoggerConfig(new LoggerConfig(true, true))
+ .build();
+}
+```
+
+That can reduce migration risk while dashboards and parsers are being updated, but the recommended end state for `2.x` is the new key set.
+
+## 3. Rebaseline replay-sensitive logging and replay APIs
+
+`2.x` uses per-context replay state for logging and plugin callbacks instead of relying on a single global replay view.
+
+What changes in practice:
+
+- Replay suppression is more accurate for child contexts.
+- Concurrent child contexts no longer look like fresh execution when that child is still replaying.
+- Custom plugins see replay metadata that better reflects the current child context.
+- `StepContext` does not expose replay state anymore.
+- Step logs are attempt-based and are never replay-suppressed.
+
+API impact:
+
+- `isReplaying()` now belongs on `DurableContext`, not `BaseContext`.
+- Code that assumed every context type had `isReplaying()` needs to be updated.
+- If you were checking replay state inside step lambdas, move that logic to the surrounding `DurableContext` or redesign it around attempt-based step behavior.
+
+What to review:
+
+- Tests that count log lines across replays
+- Dashboards that alert on replay log volume
+- Custom plugins using replay-sensitive hooks or `isReplayingChildren`
+- Nested workflows that use `parallel()`, `map()`, or `runInChildContext(...)`
+- Any code that called `isReplaying()` on `BaseContext` or `StepContext`
+
+The most common upgrade symptom here is not a compile error. It is changed log volume or changed replay-related assertions in tests.
+
+## 4. Update exception handling for context validation failures
+
+In `2.x`, invalid context usage now throws `IllegalStateException` instead of `IllegalDurableOperationException`.
+
+This affects validation failures such as nested durable operations from unsupported thread types, for example calling a blocking durable operation from within a step execution.
+
+What to update:
+
+- Unit and integration tests that assert exception types
+- Error classification logic
+- Alerting or telemetry that treated `IllegalDurableOperationException` as an SDK defect signal
+- Runbooks that distinguished user misuse from SDK or platform failures
+
+Before:
+
+```java
+assertThrows(IllegalDurableOperationException.class, future::get);
+```
+
+After:
+
+```java
+assertThrows(IllegalStateException.class, future::get);
+```
+
+## 5. Validate serialization round trips earlier
+
+`2.x` validates serialized results and exceptions with an immediate deserialize pass before checkpointing by default.
+
+What changes in practice:
+
+- Serialization problems now fail on first execution instead of surfacing later on replay.
+- Custom `SerDes` implementations must be able to deserialize SDK-managed values they serialize.
+- Child-context results are validated consistently, including virtual child-context paths.
+
+This is usually a correctness improvement, but it can surface previously hidden `SerDes` bugs during upgrade.
+
+### New opt-out configuration
+
+If your workload is very performance-sensitive and you need to skip the extra validation deserialize pass, you can opt out:
+
+```java
+@Override
+protected DurableConfig createConfiguration() {
+ return DurableConfig.builder()
+ .withSerializationRoundTripValidation(false)
+ .build();
+}
+```
+
+Use that carefully:
+
+- Disabling validation can hide serialization bugs until replay.
+- Custom `SerDes` implementations are still expected to be round-trip safe.
+
+## Recommended Validation After Upgrading
+
+1. Build and run your test suite with the `2.x` dependency.
+2. Exercise one workflow that replays after `wait()`, `waitForCondition()`, or callback resume.
+3. Exercise one workflow with child contexts or concurrency.
+4. Verify that your log queries and dashboards still resolve the correct execution and operation identifiers.
+5. Verify any code that relied on `BaseContext.isReplaying()` or replay suppression inside step lambdas.
+6. If you use custom `SerDes`, run one workflow that checkpoints both a successful result and an exception payload.
+7. If you use plugins, verify replay-sensitive metadata in at least one replayed child-context scenario.
+
+## Summary
+
+Most upgrades are straightforward:
+
+- `semantics(...)` becomes `semanticsPerRetry(...)`, and old `AT_MOST_ONCE_PER_RETRY` users may also need `RetryStrategies.Presets.NO_RETRY` to preserve `1.x` behavior
+- Logger metadata moves to `executionArn`, `operationId`, and `operationName`
+- Replay-sensitive logging becomes per-context, `isReplaying()` moves to `DurableContext`, and step logs are no longer replay-suppressed
+- Validation failures now throw `IllegalStateException`
+- Serialization round-trip problems surface earlier by default, with an opt-out via `withSerializationRoundTripValidation(false)`
+
+If you update those areas first, the `1.x` to `2.x` migration should be low risk.