diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6e71ca6ef..f1592400e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,6 +6,7 @@ # Triggers: # - pull_request: when a PR targets main # - push: when code is pushed to main +# - milestone: when milestone metadata changes # # Notes: # Builds against Java 17, 21, and 25. @@ -15,6 +16,8 @@ run-name: Build - ${{ github.event_name }} on: workflow_dispatch: + milestone: + types: [created, edited, closed, deleted] pull_request: branches: - main @@ -38,6 +41,7 @@ on: permissions: contents: read + issues: read pages: write id-token: write @@ -54,6 +58,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v7 + with: + ref: ${{ github.event_name == 'milestone' && github.event.repository.default_branch || github.ref }} - name: Setup Java ${{ matrix.java }} uses: actions/setup-java@v5 @@ -76,6 +82,27 @@ jobs: if: ${{ matrix.java == 17 }} run: mvn -B -pl sdk -am javadoc:javadoc + - name: Generate milestone badge + if: ${{ matrix.java == 17 }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + + TITLE=$(gh api repos/${{ github.repository }}/milestones \ + --jq '[.[] | select(.state == "open" and .due_on != null)] | sort_by(.due_on) | .[0].title // empty') + + if [ -z "$TITLE" ]; then + URL="https://img.shields.io/badge/Next%20Milestone-No%20milestone-lightgrey" + else + ENCODED=$(echo -n "$TITLE" | jq -sRr @uri | sed 's/-/--/g') + URL="https://img.shields.io/badge/Next%20Milestone-${ENCODED}-blue" + fi + + mkdir -p milestone + curl -sfL "$URL" -o milestone/badge.svg + echo "Badge generated for: ${TITLE:-No milestone}" + - name: Configure GitHub Pages if: ${{ matrix.java == 17 }} uses: actions/configure-pages@v5 @@ -83,9 +110,10 @@ jobs: - name: Prepare GitHub Pages artifact if: ${{ matrix.java == 17 }} run: | - mkdir -p github-pages/coverage github-pages/javadoc + mkdir -p github-pages/coverage github-pages/javadoc github-pages/milestone cp -R coverage-report/target/site/jacoco-aggregate/. github-pages/coverage cp -R sdk/target/reports/apidocs/. github-pages/javadoc + cp -R milestone/. github-pages/milestone touch github-pages/.nojekyll - name: Upload GitHub Pages artifact @@ -95,7 +123,7 @@ jobs: path: github-pages deploy-pages: - if: ${{ github.ref == 'refs/heads/main' }} + if: ${{ github.event_name == 'milestone' || github.ref == 'refs/heads/main' }} needs: build runs-on: ubuntu-latest environment: diff --git a/.github/workflows/update-milestone-badge.yml b/.github/workflows/update-milestone-badge.yml deleted file mode 100644 index 07467a760..000000000 --- a/.github/workflows/update-milestone-badge.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Update Milestone Badge - -on: - milestone: - types: [created, edited, closed, deleted] - schedule: - - cron: '0 6 * * *' - workflow_dispatch: - -permissions: - contents: write - -jobs: - update-badge: - runs-on: ubuntu-latest - steps: - - name: Checkout docs branch - uses: actions/checkout@v7 - with: - ref: docs - fetch-depth: 1 - - - name: Fetch nearest milestone and generate badge - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -euo pipefail - - # Fetch open milestones with due dates, pick the nearest one - TITLE=$(gh api repos/${{ github.repository }}/milestones \ - --jq '[.[] | select(.state == "open" and .due_on != null)] | sort_by(.due_on) | .[0].title // empty') - - if [ -z "$TITLE" ]; then - URL="https://img.shields.io/badge/Next%20Milestone-No%20milestone-lightgrey" - else - # URL-encode the title using jq, then escape dashes for shields.io - ENCODED=$(echo -n "$TITLE" | jq -sRr @uri | sed 's/-/--/g') - URL="https://img.shields.io/badge/Next%20Milestone-${ENCODED}-blue" - fi - - mkdir -p milestone - curl -sfL "$URL" -o milestone/badge.svg - echo "Badge generated for: ${TITLE:-No milestone}" - - - name: Commit and push - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git add milestone/badge.svg - git diff --cached --quiet && echo "No changes" && exit 0 - git commit -m "chore: update milestone badge" - git push diff --git a/README.md b/README.md index f664b63a7..8fe6bbf25 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ [![Build](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/build.yml/badge.svg)](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/build.yml) [![Javadoc](https://img.shields.io/badge/JavaDoc-Online-green)](https://aws.github.io/aws-durable-execution-sdk-java/javadoc/) [![Maven](https://badges.mvnrepository.com/badge/software.amazon.lambda.durable/aws-durable-execution-sdk-java/badge.svg?label=Maven)](https://mvnrepository.com/artifact/software.amazon.lambda.durable/aws-durable-execution-sdk-java) -[![Coverage](https://raw.githubusercontent.com/aws/aws-durable-execution-sdk-java/refs/heads/docs/coverage/jacoco.svg)](https://aws.github.io/aws-durable-execution-sdk-java/coverage/) +[![Coverage](https://aws.github.io/aws-durable-execution-sdk-java/coverage/jacoco.svg)](https://aws.github.io/aws-durable-execution-sdk-java/coverage/) [![E2E Tests](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/e2e-tests.yml/badge.svg)](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/e2e-tests.yml) [![CodeQL](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/github-code-scanning/codeql) [![Spotless](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/check-spotless.yml/badge.svg)](https://github.com/aws/aws-durable-execution-sdk-java/actions/workflows/check-spotless.yml) -[![Roadmap](https://raw.githubusercontent.com/aws/aws-durable-execution-sdk-java/refs/heads/docs/milestone/badge.svg)](https://github.com/orgs/aws/projects/346/views/6) +[![Roadmap](https://aws.github.io/aws-durable-execution-sdk-java/milestone/badge.svg)](https://github.com/orgs/aws/projects/346/views/6) Build resilient, long-running AWS Lambda functions that automatically checkpoint progress and resume after failures. Durable functions can run for up to one year while you pay only for active compute time. @@ -110,6 +110,7 @@ See [Deploy Lambda durable functions with Infrastructure as Code](https://docs.a - [Configuration](docs/advanced/configuration.md) - Customize SDK behaviour - [Error Handling](docs/advanced/error-handling.md) - SDK exceptions for handling failures - [Logging](docs/advanced/logging.md) - How to use DurableLogger +- [Migrating from 1.x to 2.x](docs/migration-1.x-to-2.x.md) - Upgrade guide for breaking changes since `v1.2.1` - [Testing](docs/advanced/testing.md) - Utilities for local development and cloud-based integration testing ## Related SDKs diff --git a/docs/migration-1.x-to-2.x.md b/docs/migration-1.x-to-2.x.md new file mode 100644 index 000000000..52e1cfaeb --- /dev/null +++ b/docs/migration-1.x-to-2.x.md @@ -0,0 +1,231 @@ +# Migrating from 1.x to 2.x + +This guide helps teams upgrade from the `1.x` line to `2.x`. + +It focuses on the breaking changes introduced since `v1.2.1`, the most recent `1.x` release at the time of writing. If you are already on a newer `1.x` patch, the same migration steps still apply. + +## Upgrade Checklist + +- Replace `StepConfig.builder().semantics(...)` with the correct `2.x` equivalent for your intended behavior. +- Update log queries, parsers, and dashboards to use `executionArn`, `operationId`, and `operationName`. +- Rebaseline replay-sensitive logging and plugin behavior for child contexts, especially in `parallel()`, `map()`, and nested `runInChildContext(...)` workflows. +- Update any code that expected validation failures to throw `IllegalDurableOperationException`. +- Verify that custom `SerDes` implementations can deserialize SDK-managed values immediately after serialization, or explicitly opt out of the extra validation pass. + +Useful searches before upgrading: + +```bash +rg -n "\.semantics\(" . +rg -n "durableExecutionArn|contextId|contextName" . +rg -n "replay|isReplayingChildren|onOperationStart|onOperationEnd" sdk examples +``` + +## 1. Rename `StepConfig.semantics(...)` to `semanticsPerRetry(...)` + +The deprecated `semantics(...)` builder method is removed in `2.x`. + +This is not always a one-line rename. In `1.x`, `semantics(StepSemantics.AT_MOST_ONCE_PER_RETRY)` behaved like `2.x` `semanticsPerRetry(StepSemantics.AT_MOST_ONCE_PER_RETRY)` plus a `NO_RETRY` policy. + +Before: + +```java +var config = StepConfig.builder() + .semantics(StepSemantics.AT_MOST_ONCE_PER_RETRY) + .build(); +``` + +Naive rename: + +```java +var config = StepConfig.builder() + .semanticsPerRetry(StepSemantics.AT_MOST_ONCE_PER_RETRY) + .build(); +``` + +Behavior-preserving migration for old `1.x` `AT_MOST_ONCE_PER_RETRY` usage: + +```java +var config = StepConfig.builder() + .semanticsPerRetry(StepSemantics.AT_MOST_ONCE_PER_RETRY) + .retryStrategy(RetryStrategies.Presets.NO_RETRY) + .build(); +``` + +Migration rules: + +- Old `semantics(AT_LEAST_ONCE_PER_RETRY)` maps directly to `semanticsPerRetry(AT_LEAST_ONCE_PER_RETRY)`. +- Old `semantics(AT_MOST_ONCE_PER_RETRY)` should usually become `semanticsPerRetry(AT_MOST_ONCE_PER_RETRY)` plus `retryStrategy(RetryStrategies.Presets.NO_RETRY)` if you want to preserve the old `1.x` behavior. +- If you intentionally want the corrected `2.x` per-retry semantics, use `semanticsPerRetry(AT_MOST_ONCE_PER_RETRY)` without forcing `NO_RETRY`. + +What to update: + +- Step configuration builders +- Shared helper methods and wrapper APIs +- Tests that asserted on `config.semantics()` + +If you expose your own configuration layer on top of the SDK, rename it now so downstream users do not inherit the removed `semantics` name. + +## 2. Update logger MDC field names + +The main user-visible breaking change in `2.x` is the logger metadata rename so Java matches the other durable execution SDKs. + +Before: + +```json +{ + "durableExecutionArn": "arn:aws:lambda:...", + "contextId": "child-context-id", + "contextName": "inventory-check" +} +``` + +After: + +```json +{ + "executionArn": "arn:aws:lambda:...", + "operationId": "child-context-id", + "operationName": "inventory-check" +} +``` + +What to update: + +- CloudWatch Logs Insights queries +- Metric filters and alarms +- Log processors and index mappings +- Dashboards and saved searches +- Any custom JSON or MDC parsing + +Important: this rename only applies to logger MDC fields. The SDK API still uses `durableExecutionArn` in places such as `DurableExecutionInput` and plugin invocation records. Do not mechanically rename every `durableExecutionArn` identifier in your codebase. + +### Mixed-version rollout query + +If you need one query that works during a rolling upgrade, use `coalesce(...)`: + +```sql +fields coalesce(executionArn, durableExecutionArn) as executionArn, + coalesce(operationId, contextId) as operationId, + coalesce(operationName, contextName) as operationName +| filter executionArn = "arn:aws:lambda:..." +``` + +### Temporary compatibility option + +If you need to preserve the old MDC keys for a short rollout window, configure `LoggerConfig` with `oldKeyNames=true`: + +```java +@Override +protected DurableConfig createConfiguration() { + return DurableConfig.builder() + .withLoggerConfig(new LoggerConfig(true, true)) + .build(); +} +``` + +That can reduce migration risk while dashboards and parsers are being updated, but the recommended end state for `2.x` is the new key set. + +## 3. Rebaseline replay-sensitive logging and replay APIs + +`2.x` uses per-context replay state for logging and plugin callbacks instead of relying on a single global replay view. + +What changes in practice: + +- Replay suppression is more accurate for child contexts. +- Concurrent child contexts no longer look like fresh execution when that child is still replaying. +- Custom plugins see replay metadata that better reflects the current child context. +- `StepContext` does not expose replay state anymore. +- Step logs are attempt-based and are never replay-suppressed. + +API impact: + +- `isReplaying()` now belongs on `DurableContext`, not `BaseContext`. +- Code that assumed every context type had `isReplaying()` needs to be updated. +- If you were checking replay state inside step lambdas, move that logic to the surrounding `DurableContext` or redesign it around attempt-based step behavior. + +What to review: + +- Tests that count log lines across replays +- Dashboards that alert on replay log volume +- Custom plugins using replay-sensitive hooks or `isReplayingChildren` +- Nested workflows that use `parallel()`, `map()`, or `runInChildContext(...)` +- Any code that called `isReplaying()` on `BaseContext` or `StepContext` + +The most common upgrade symptom here is not a compile error. It is changed log volume or changed replay-related assertions in tests. + +## 4. Update exception handling for context validation failures + +In `2.x`, invalid context usage now throws `IllegalStateException` instead of `IllegalDurableOperationException`. + +This affects validation failures such as nested durable operations from unsupported thread types, for example calling a blocking durable operation from within a step execution. + +What to update: + +- Unit and integration tests that assert exception types +- Error classification logic +- Alerting or telemetry that treated `IllegalDurableOperationException` as an SDK defect signal +- Runbooks that distinguished user misuse from SDK or platform failures + +Before: + +```java +assertThrows(IllegalDurableOperationException.class, future::get); +``` + +After: + +```java +assertThrows(IllegalStateException.class, future::get); +``` + +## 5. Validate serialization round trips earlier + +`2.x` validates serialized results and exceptions with an immediate deserialize pass before checkpointing by default. + +What changes in practice: + +- Serialization problems now fail on first execution instead of surfacing later on replay. +- Custom `SerDes` implementations must be able to deserialize SDK-managed values they serialize. +- Child-context results are validated consistently, including virtual child-context paths. + +This is usually a correctness improvement, but it can surface previously hidden `SerDes` bugs during upgrade. + +### New opt-out configuration + +If your workload is very performance-sensitive and you need to skip the extra validation deserialize pass, you can opt out: + +```java +@Override +protected DurableConfig createConfiguration() { + return DurableConfig.builder() + .withSerializationRoundTripValidation(false) + .build(); +} +``` + +Use that carefully: + +- Disabling validation can hide serialization bugs until replay. +- Custom `SerDes` implementations are still expected to be round-trip safe. + +## Recommended Validation After Upgrading + +1. Build and run your test suite with the `2.x` dependency. +2. Exercise one workflow that replays after `wait()`, `waitForCondition()`, or callback resume. +3. Exercise one workflow with child contexts or concurrency. +4. Verify that your log queries and dashboards still resolve the correct execution and operation identifiers. +5. Verify any code that relied on `BaseContext.isReplaying()` or replay suppression inside step lambdas. +6. If you use custom `SerDes`, run one workflow that checkpoints both a successful result and an exception payload. +7. If you use plugins, verify replay-sensitive metadata in at least one replayed child-context scenario. + +## Summary + +Most upgrades are straightforward: + +- `semantics(...)` becomes `semanticsPerRetry(...)`, and old `AT_MOST_ONCE_PER_RETRY` users may also need `RetryStrategies.Presets.NO_RETRY` to preserve `1.x` behavior +- Logger metadata moves to `executionArn`, `operationId`, and `operationName` +- Replay-sensitive logging becomes per-context, `isReplaying()` moves to `DurableContext`, and step logs are no longer replay-suppressed +- Validation failures now throw `IllegalStateException` +- Serialization round-trip problems surface earlier by default, with an opt-out via `withSerializationRoundTripValidation(false)` + +If you update those areas first, the `1.x` to `2.x` migration should be low risk.