From 3c954fb6e7e7d8fd0fc0f117fcf3d5f336e0af30 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Sat, 13 Jun 2026 00:33:13 +0700
Subject: [PATCH 1/9] feat: surface gate detail in the workflow run/resume
 --json payload

A paused run was indistinguishable from any other pause in the
machine-readable outcome, and the gate's prompt/options/choice never
left the human-facing stream. Record each step's type in the run
state's step results (one engine line) and, when the run sits at a
gate, add a gate block (step_id/message/options/choice) to the payload
so orchestrators can drive review gates without parsing stdout.

Reference implementation for the proposal in #2964.

Addresses #2964
---
 src/specify_cli/__init__.py         | 26 ++++++++++++-
 src/specify_cli/workflows/engine.py |  1 +
 tests/test_workflows.py             | 57 +++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index e2d0bfb0b9..0cc4c82690 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -2091,13 +2091,37 @@ def _parse_input_values(input_values: list[str] | None) -> dict[str, Any]:
 
 def _workflow_run_payload(state: Any) -> dict[str, Any]:
     """Machine-readable summary of a run/resume outcome."""
-    return {
+    payload = {
         "run_id": state.run_id,
         "workflow_id": state.workflow_id,
         "status": state.status.value,
         "current_step_id": state.current_step_id,
         "current_step_index": state.current_step_index,
     }
+    gate = _gate_outcome(state)
+    if gate is not None:
+        payload["gate"] = gate
+    return payload
+
+
+def _gate_outcome(state: Any) -> dict[str, Any] | None:
+    """Gate detail for the structured outcome, if the run sits at a gate.
+
+    A paused run is otherwise indistinguishable from any other pause in
+    the machine-readable payload; surfacing the gate's prompt, options,
+    and (after an interactive choice) the decision lets orchestrators
+    drive review gates without parsing the human-facing stream.
+    """
+    step = (getattr(state, "step_results", None) or {}).get(state.current_step_id)
+    if not isinstance(step, dict) or step.get("type") != "gate":
+        return None
+    output = step.get("output") or {}
+    return {
+        "step_id": state.current_step_id,
+        "message": output.get("message"),
+        "options": output.get("options"),
+        "choice": output.get("choice"),
+    }
 
 
 def _run_outcome_exit_code(status_value: str) -> int:
diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py
index 0d56f7df70..f463bc66c1 100644
--- a/src/specify_cli/workflows/engine.py
+++ b/src/specify_cli/workflows/engine.py
@@ -676,6 +676,7 @@ def _execute_steps(
 
             # Record step results — prefer resolved values from step output
             step_data = {
+                "type": step_type,
                 "integration": result.output.get("integration")
                 or step_config.get("integration")
                 or context.default_integration,
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 6bd49f197a..4ea0b2c410 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5288,3 +5288,60 @@ def test_resume_failed_run_exits_nonzero(self, tmp_path, monkeypatch):
         assert resumed.exit_code == 1, resumed.stdout
         payload = _json.loads(resumed.stdout)
         assert payload["status"] == "failed"
+
+
+class TestWorkflowRunGateOutcomeJson:
+    """CLI-level tests: the --json payload surfaces gate pauses."""
+
+    _WF_GATE = """
+schema_version: "1.0"
+workflow:
+  id: "gate-json"
+  name: "Gate JSON"
+  version: "1.0.0"
+steps:
+  - id: review
+    type: gate
+    message: "Approve the thing?"
+    options: ["approve", "reject"]
+"""
+
+    _WF_PLAIN = """
+schema_version: "1.0"
+workflow:
+  id: "plain-json"
+  name: "Plain JSON"
+  version: "1.0.0"
+steps:
+  - id: fine
+    type: shell
+    run: "true"
+"""
+
+    def _run_json(self, tmp_path, monkeypatch, content):
+        import json as _json
+        from typer.testing import CliRunner
+        from specify_cli import app
+
+        path = tmp_path / "wf.yml"
+        path.write_text(content, encoding="utf-8")
+        monkeypatch.chdir(tmp_path)
+        runner = CliRunner()
+        result = runner.invoke(app, ["workflow", "run", str(path), "--json"])
+        return _json.loads(result.stdout)
+
+    def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch):
+        # CliRunner stdin is not a TTY, so the gate pauses for resume.
+        payload = self._run_json(tmp_path, monkeypatch, self._WF_GATE)
+        assert payload["status"] == "paused"
+        assert payload["gate"] == {
+            "step_id": "review",
+            "message": "Approve the thing?",
+            "options": ["approve", "reject"],
+            "choice": None,
+        }
+
+    def test_completed_run_has_no_gate_block(self, tmp_path, monkeypatch):
+        payload = self._run_json(tmp_path, monkeypatch, self._WF_PLAIN)
+        assert payload["status"] == "completed"
+        assert "gate" not in payload

From 609bd08ddfbb8a3ffef5841685bfd98a54563632 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 09:10:07 +0700
Subject: [PATCH 2/9] fix(workflow): only surface gate detail in --json when
 the run is paused

Address review (#2965): _gate_outcome() emitted a gate block whenever current_step_id pointed at a gate step. Since RunState.current_step_id is never cleared on completion, a completed/failed run whose last step was a gate leaked stale gate detail in run/resume/status --json. Guard on status == paused. Also assert CLI success in the _run_json test helper before JSON-parsing, and add direct coverage for the suppression guard.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 src/specify_cli/__init__.py |  6 ++++++
 tests/test_workflows.py     | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 0cc4c82690..2408f4169f 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -2112,6 +2112,12 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None:
     and (after an interactive choice) the decision lets orchestrators
     drive review gates without parsing the human-facing stream.
     """
+    # Only a run that is actually *paused* sits at a gate awaiting a
+    # decision. RunState.current_step_id is not cleared on completion, so
+    # without this guard a completed/failed run whose last executed step was
+    # a gate would surface stale gate details (in run/resume/status --json).
+    if getattr(state.status, "value", state.status) != "paused":
+        return None
     step = (getattr(state, "step_results", None) or {}).get(state.current_step_id)
     if not isinstance(step, dict) or step.get("type") != "gate":
         return None
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 4ea0b2c410..9c39aa4807 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5328,6 +5328,9 @@ def _run_json(self, tmp_path, monkeypatch, content):
         monkeypatch.chdir(tmp_path)
         runner = CliRunner()
         result = runner.invoke(app, ["workflow", "run", str(path), "--json"])
+        # Assert the CLI succeeded before parsing so a real failure surfaces
+        # the actual output instead of an opaque JSON decode error.
+        assert result.exit_code == 0, result.stdout
         return _json.loads(result.stdout)
 
     def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch):
@@ -5345,3 +5348,27 @@ def test_completed_run_has_no_gate_block(self, tmp_path, monkeypatch):
         payload = self._run_json(tmp_path, monkeypatch, self._WF_PLAIN)
         assert payload["status"] == "completed"
         assert "gate" not in payload
+
+    def test_gate_block_suppressed_when_run_not_paused(self):
+        # RunState.current_step_id is not cleared on completion, so a
+        # completed/failed run whose last executed step was a gate still
+        # points current_step_id at that gate. The gate block must only be
+        # emitted while the run is actually paused at it.
+        from types import SimpleNamespace
+        from specify_cli import _gate_outcome
+
+        gate_step = {
+            "type": "gate",
+            "output": {"message": "m", "options": ["approve"], "choice": "approve"},
+        }
+
+        def _state(status):
+            return SimpleNamespace(
+                status=SimpleNamespace(value=status),
+                current_step_id="review",
+                step_results={"review": gate_step},
+            )
+
+        assert _gate_outcome(_state("completed")) is None
+        assert _gate_outcome(_state("failed")) is None
+        assert _gate_outcome(_state("paused")) is not None

From 01ad2dee3646014e823559d0aa9228be59d8e025 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 20:43:55 +0700
Subject: [PATCH 3/9] fix(workflows): surface gate block on aborted runs;
 stabilize message

Address Copilot review:
- `_gate_outcome` now also surfaces the gate block when a run is `aborted`
  by a gate rejection (`on_reject: abort`), not only when `paused`. Abort
  is the only path that sets ABORTED and it leaves current_step_id on the
  gate, so an orchestrator can read the recorded `choice` for the stop.
- Coerce `message` to a string (it may be a non-string YAML literal that
  GateStep only coerces for interpolation) so the JSON schema stays stable.
- Tests: add a CLI-level aborted-path test, a message-coercion test, and
  extend the suppression test to allow `aborted`; share the run helper via
  `_invoke_json` to avoid duplicating the invoke boilerplate.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/__init__.py | 27 +++++++++------
 tests/test_workflows.py     | 68 +++++++++++++++++++++++++++++++------
 2 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 2408f4169f..14679e5f3e 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -2105,26 +2105,31 @@ def _workflow_run_payload(state: Any) -> dict[str, Any]:
 
 
 def _gate_outcome(state: Any) -> dict[str, Any] | None:
-    """Gate detail for the structured outcome, if the run sits at a gate.
+    """Gate detail for the structured outcome, when the run rests at a gate.
 
-    A paused run is otherwise indistinguishable from any other pause in
-    the machine-readable payload; surfacing the gate's prompt, options,
-    and (after an interactive choice) the decision lets orchestrators
-    drive review gates without parsing the human-facing stream.
+    A paused or gate-aborted run is otherwise indistinguishable from any
+    other pause/abort in the machine-readable payload; surfacing the gate's
+    prompt, options, and (after an interactive choice) the decision lets
+    orchestrators drive review gates without parsing the human-facing stream.
     """
-    # Only a run that is actually *paused* sits at a gate awaiting a
-    # decision. RunState.current_step_id is not cleared on completion, so
-    # without this guard a completed/failed run whose last executed step was
-    # a gate would surface stale gate details (in run/resume/status --json).
-    if getattr(state.status, "value", state.status) != "paused":
+    # Two run states rest *on* a gate: `paused` (awaiting a decision) and
+    # `aborted` (a gate rejected with `on_reject: abort` — the only path that
+    # sets ABORTED, leaving current_step_id on that gate). Any other status —
+    # notably `completed`/`failed` — must be suppressed: current_step_id is
+    # not cleared when a run whose last executed step was a gate moves on, so
+    # without this guard it would surface stale detail (run/resume/status).
+    if getattr(state.status, "value", state.status) not in ("paused", "aborted"):
         return None
     step = (getattr(state, "step_results", None) or {}).get(state.current_step_id)
     if not isinstance(step, dict) or step.get("type") != "gate":
         return None
     output = step.get("output") or {}
+    # `message` may be a non-string YAML literal (GateStep coerces it only for
+    # expression interpolation), so normalise it here for a stable JSON schema.
+    message = output.get("message")
     return {
         "step_id": state.current_step_id,
-        "message": output.get("message"),
+        "message": None if message is None else str(message),
         "options": output.get("options"),
         "choice": output.get("choice"),
     }
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 9c39aa4807..3caa1c1c0f 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5318,16 +5318,19 @@ class TestWorkflowRunGateOutcomeJson:
     run: "true"
 """
 
-    def _run_json(self, tmp_path, monkeypatch, content):
-        import json as _json
+    def _invoke_json(self, tmp_path, monkeypatch, content):
         from typer.testing import CliRunner
         from specify_cli import app
 
         path = tmp_path / "wf.yml"
         path.write_text(content, encoding="utf-8")
         monkeypatch.chdir(tmp_path)
-        runner = CliRunner()
-        result = runner.invoke(app, ["workflow", "run", str(path), "--json"])
+        return CliRunner().invoke(app, ["workflow", "run", str(path), "--json"])
+
+    def _run_json(self, tmp_path, monkeypatch, content):
+        import json as _json
+
+        result = self._invoke_json(tmp_path, monkeypatch, content)
         # Assert the CLI succeeded before parsing so a real failure surfaces
         # the actual output instead of an opaque JSON decode error.
         assert result.exit_code == 0, result.stdout
@@ -5349,17 +5352,43 @@ def test_completed_run_has_no_gate_block(self, tmp_path, monkeypatch):
         assert payload["status"] == "completed"
         assert "gate" not in payload
 
-    def test_gate_block_suppressed_when_run_not_paused(self):
-        # RunState.current_step_id is not cleared on completion, so a
-        # completed/failed run whose last executed step was a gate still
-        # points current_step_id at that gate. The gate block must only be
-        # emitted while the run is actually paused at it.
+    def test_gate_abort_carries_gate_block(self, tmp_path, monkeypatch):
+        # An interactive gate the operator rejects ends the run as `aborted`
+        # (on_reject defaults to abort), not `paused`. The JSON surface must
+        # still carry the gate block with the recorded choice so an
+        # orchestrator can see *why* the run stopped.
+        import json as _json
+        from specify_cli.workflows.steps.gate import GateStep
+
+        _force_gate_stdin(monkeypatch, tty=True)
+        monkeypatch.setattr(
+            GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject")
+        )
+        result = self._invoke_json(tmp_path, monkeypatch, self._WF_GATE)
+        payload = _json.loads(result.stdout)
+        assert payload["status"] == "aborted"
+        assert payload["gate"] == {
+            "step_id": "review",
+            "message": "Approve the thing?",
+            "options": ["approve", "reject"],
+            "choice": "reject",
+        }
+
+    def test_gate_block_emitted_only_when_run_rests_at_gate(self):
+        # A run rests *on* a gate only while `paused` (awaiting a decision) or
+        # `aborted` (gate rejected with on_reject: abort). current_step_id is
+        # not cleared afterwards, so a `completed`/`failed` run whose last
+        # executed step was a gate must NOT surface a stale gate block.
         from types import SimpleNamespace
         from specify_cli import _gate_outcome
 
         gate_step = {
             "type": "gate",
-            "output": {"message": "m", "options": ["approve"], "choice": "approve"},
+            "output": {
+                "message": "m",
+                "options": ["approve", "reject"],
+                "choice": "reject",
+            },
         }
 
         def _state(status):
@@ -5372,3 +5401,22 @@ def _state(status):
         assert _gate_outcome(_state("completed")) is None
         assert _gate_outcome(_state("failed")) is None
         assert _gate_outcome(_state("paused")) is not None
+        assert _gate_outcome(_state("aborted")) is not None
+
+    def test_gate_block_message_coerced_to_string(self):
+        # message may be a non-string YAML literal (e.g. a number); the JSON
+        # surface normalises it so the emitted schema stays stable.
+        from types import SimpleNamespace
+        from specify_cli import _gate_outcome
+
+        state = SimpleNamespace(
+            status=SimpleNamespace(value="paused"),
+            current_step_id="review",
+            step_results={
+                "review": {
+                    "type": "gate",
+                    "output": {"message": 12.5, "options": ["ok"], "choice": None},
+                }
+            },
+        )
+        assert _gate_outcome(state)["message"] == "12.5"

From 4a45ec0df3b0143c52ded2c6547d0fac63a58175 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 22:48:18 +0700
Subject: [PATCH 4/9] test(workflows): assert clean exit in gate-abort JSON
 test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address Copilot review: the gate-abort test parsed stdout without first
asserting the CLI exited cleanly, so an invoke failure would surface as an
opaque JSON decode error. Route it through `_run_json` (which asserts
exit_code == 0 before parsing) and drop the now-redundant `_invoke_json`
helper — a gate abort emits the payload and returns, so the run exits 0.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/test_workflows.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 3caa1c1c0f..b9632d0ad1 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5318,19 +5318,15 @@ class TestWorkflowRunGateOutcomeJson:
     run: "true"
 """
 
-    def _invoke_json(self, tmp_path, monkeypatch, content):
+    def _run_json(self, tmp_path, monkeypatch, content):
+        import json as _json
         from typer.testing import CliRunner
         from specify_cli import app
 
         path = tmp_path / "wf.yml"
         path.write_text(content, encoding="utf-8")
         monkeypatch.chdir(tmp_path)
-        return CliRunner().invoke(app, ["workflow", "run", str(path), "--json"])
-
-    def _run_json(self, tmp_path, monkeypatch, content):
-        import json as _json
-
-        result = self._invoke_json(tmp_path, monkeypatch, content)
+        result = CliRunner().invoke(app, ["workflow", "run", str(path), "--json"])
         # Assert the CLI succeeded before parsing so a real failure surfaces
         # the actual output instead of an opaque JSON decode error.
         assert result.exit_code == 0, result.stdout
@@ -5356,16 +5352,16 @@ def test_gate_abort_carries_gate_block(self, tmp_path, monkeypatch):
         # An interactive gate the operator rejects ends the run as `aborted`
         # (on_reject defaults to abort), not `paused`. The JSON surface must
         # still carry the gate block with the recorded choice so an
-        # orchestrator can see *why* the run stopped.
-        import json as _json
+        # orchestrator can see *why* the run stopped. The run helper asserts
+        # the CLI exited cleanly before parsing (a gate abort emits the
+        # payload and returns; it does not crash the command).
         from specify_cli.workflows.steps.gate import GateStep
 
         _force_gate_stdin(monkeypatch, tty=True)
         monkeypatch.setattr(
             GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject")
         )
-        result = self._invoke_json(tmp_path, monkeypatch, self._WF_GATE)
-        payload = _json.loads(result.stdout)
+        payload = self._run_json(tmp_path, monkeypatch, self._WF_GATE)
         assert payload["status"] == "aborted"
         assert payload["gate"] == {
             "step_id": "review",

From 63ea4d2049e95735420de05d5b632bc88dc5a0a5 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 18 Jun 2026 00:39:52 +0700
Subject: [PATCH 5/9] fix: use result.output in run-helper assert; document
 step_data shape
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address Copilot review:
- `_run_json` asserted with `result.stdout` in the message, but under
  `--json` step output is redirected off stdout — the useful diagnostics
  live on `result.output`. Switch the assertion message to `result.output`
  (the JSON parse still reads stdout), matching the other CLI tests.
- `StepContext.steps` documented a 5-key entry shape; the engine now also
  persists `type` and `status`. Update the docstring to the canonical
  7-key shape so step authors/debuggers see the real record.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/workflows/base.py | 7 ++++---
 tests/test_workflows.py           | 6 ++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/specify_cli/workflows/base.py b/src/specify_cli/workflows/base.py
index b144ca903d..b61fdb1a08 100644
--- a/src/specify_cli/workflows/base.py
+++ b/src/specify_cli/workflows/base.py
@@ -47,9 +47,10 @@ class StepContext:
     #: Resolved workflow inputs (from user prompts / defaults).
     inputs: dict[str, Any] = field(default_factory=dict)
 
-    #: Accumulated step results keyed by step ID.
-    #: Each entry is ``{"integration": ..., "model": ..., "options": ...,
-    #:   "input": ..., "output": ...}``.
+    #: Accumulated step results keyed by step ID. Each entry is the dict the
+    #: engine persists per step:
+    #: ``{"type": ..., "integration": ..., "model": ..., "options": ...,
+    #:   "input": ..., "output": ..., "status": ...}``.
     steps: dict[str, dict[str, Any]] = field(default_factory=dict)
 
     #: Current fan-out item (set only inside fan-out iterations).
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index b9632d0ad1..975b9b103b 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5328,8 +5328,10 @@ def _run_json(self, tmp_path, monkeypatch, content):
         monkeypatch.chdir(tmp_path)
         result = CliRunner().invoke(app, ["workflow", "run", str(path), "--json"])
         # Assert the CLI succeeded before parsing so a real failure surfaces
-        # the actual output instead of an opaque JSON decode error.
-        assert result.exit_code == 0, result.stdout
+        # the actual output instead of an opaque JSON decode error. Use
+        # ``result.output`` for the message: under ``--json`` step output is
+        # redirected off stdout, so the useful diagnostics live there.
+        assert result.exit_code == 0, result.output
         return _json.loads(result.stdout)
 
     def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch):

From 24d6e8599d485a18961e0b5a793bdc353adf7fdd Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 18 Jun 2026 00:56:19 +0700
Subject: [PATCH 6/9] =?UTF-8?q?test(workflows):=20align=20gate-abort=20JSO?=
 =?UTF-8?q?N=20test=20with=20aborted=E2=86=92exit-1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After rebasing onto main, a gate abort now emits the --json payload and
then exits non-zero (`_run_outcome_exit_code` maps aborted → 1, from the
merged exit-code work). Give `_run_json` an `expected_exit` parameter
(default 0) so the abort case asserts exit 1 while the paused/completed
cases stay at 0 — keeping a single shared helper rather than duplicating
the invoke boilerplate.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/test_workflows.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 975b9b103b..1075a5fee4 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5318,7 +5318,7 @@ class TestWorkflowRunGateOutcomeJson:
     run: "true"
 """
 
-    def _run_json(self, tmp_path, monkeypatch, content):
+    def _run_json(self, tmp_path, monkeypatch, content, *, expected_exit=0):
         import json as _json
         from typer.testing import CliRunner
         from specify_cli import app
@@ -5327,11 +5327,14 @@ def _run_json(self, tmp_path, monkeypatch, content):
         path.write_text(content, encoding="utf-8")
         monkeypatch.chdir(tmp_path)
         result = CliRunner().invoke(app, ["workflow", "run", str(path), "--json"])
-        # Assert the CLI succeeded before parsing so a real failure surfaces
-        # the actual output instead of an opaque JSON decode error. Use
-        # ``result.output`` for the message: under ``--json`` step output is
-        # redirected off stdout, so the useful diagnostics live there.
-        assert result.exit_code == 0, result.output
+        # Assert the expected exit code before parsing so a real failure
+        # surfaces the actual output instead of an opaque JSON decode error.
+        # A terminal run still emits its JSON payload, then exits non-zero on
+        # ``failed``/``aborted`` (see ``_run_outcome_exit_code``), so callers
+        # pass the expected code. Use ``result.output`` for the message:
+        # under ``--json`` step output is redirected off stdout, so the useful
+        # diagnostics live there.
+        assert result.exit_code == expected_exit, result.output
         return _json.loads(result.stdout)
 
     def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch):
@@ -5354,16 +5357,18 @@ def test_gate_abort_carries_gate_block(self, tmp_path, monkeypatch):
         # An interactive gate the operator rejects ends the run as `aborted`
         # (on_reject defaults to abort), not `paused`. The JSON surface must
         # still carry the gate block with the recorded choice so an
-        # orchestrator can see *why* the run stopped. The run helper asserts
-        # the CLI exited cleanly before parsing (a gate abort emits the
-        # payload and returns; it does not crash the command).
+        # orchestrator can see *why* the run stopped. A gate abort emits the
+        # payload and then exits non-zero (aborted → exit 1), so the helper
+        # is told to expect exit code 1.
         from specify_cli.workflows.steps.gate import GateStep
 
         _force_gate_stdin(monkeypatch, tty=True)
         monkeypatch.setattr(
             GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject")
         )
-        payload = self._run_json(tmp_path, monkeypatch, self._WF_GATE)
+        payload = self._run_json(
+            tmp_path, monkeypatch, self._WF_GATE, expected_exit=1
+        )
         assert payload["status"] == "aborted"
         assert payload["gate"] == {
             "step_id": "review",

From d322cf5b00ed4476660748fbf1471259c0e5c368 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 18 Jun 2026 01:59:40 +0700
Subject: [PATCH 7/9] fix(workflows): backward-compat gate detection +
 normalize gate options

Address Copilot review:
- A run paused by an older version has no persisted step `type`, so
  `_gate_outcome` would never surface its gate block on resume. Add
  `_is_gate_step`: prefer the `type` field, but when it is absent fall back
  to the gate's unique output signature (`on_reject`, written only by
  GateStep). A record with a different known `type` is still not a gate.
- Normalize `options` to a list of strings (mirroring the `message`
  coercion) so an unvalidated workflow with non-string options can't
  destabilize the JSON schema.
- Tests: options coercion, type-less gate detection, and a type-less
  non-gate negative case.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/__init__.py | 28 ++++++++++++++---
 tests/test_workflows.py     | 60 +++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 14679e5f3e..7015247c33 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -2104,6 +2104,24 @@ def _workflow_run_payload(state: Any) -> dict[str, Any]:
     return payload
 
 
+def _is_gate_step(step: dict[str, Any]) -> bool:
+    """Whether a recorded step result is a gate.
+
+    Prefers the persisted ``type`` field, but when it is absent — a run paused
+    by an older version, whose step record predates ``type`` being stored —
+    falls back to the gate's unique output signature: only ``GateStep`` writes
+    an ``on_reject`` key. A record carrying a *different* known ``type`` is not
+    a gate, so the fallback applies only when ``type`` is missing entirely.
+    """
+    step_type = step.get("type")
+    if step_type == "gate":
+        return True
+    if step_type:
+        return False
+    output = step.get("output")
+    return isinstance(output, dict) and "on_reject" in output
+
+
 def _gate_outcome(state: Any) -> dict[str, Any] | None:
     """Gate detail for the structured outcome, when the run rests at a gate.
 
@@ -2121,16 +2139,18 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None:
     if getattr(state.status, "value", state.status) not in ("paused", "aborted"):
         return None
     step = (getattr(state, "step_results", None) or {}).get(state.current_step_id)
-    if not isinstance(step, dict) or step.get("type") != "gate":
+    if not isinstance(step, dict) or not _is_gate_step(step):
         return None
     output = step.get("output") or {}
-    # `message` may be a non-string YAML literal (GateStep coerces it only for
-    # expression interpolation), so normalise it here for a stable JSON schema.
+    # `message` and `options` may be non-string YAML literals in an unvalidated
+    # workflow (GateStep coerces neither for the payload), so normalise both
+    # here for a stable JSON schema: message → str, options → list[str].
     message = output.get("message")
+    options = output.get("options")
     return {
         "step_id": state.current_step_id,
         "message": None if message is None else str(message),
-        "options": output.get("options"),
+        "options": [str(o) for o in options] if isinstance(options, list) else options,
         "choice": output.get("choice"),
     }
 
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 1075a5fee4..5f0f763abb 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5423,3 +5423,63 @@ def test_gate_block_message_coerced_to_string(self):
             },
         )
         assert _gate_outcome(state)["message"] == "12.5"
+
+    def test_gate_block_options_coerced_to_strings(self):
+        # options may be non-string YAML literals in an unvalidated workflow;
+        # the JSON surface normalises them to a list of strings.
+        from types import SimpleNamespace
+        from specify_cli import _gate_outcome
+
+        state = SimpleNamespace(
+            status=SimpleNamespace(value="paused"),
+            current_step_id="review",
+            step_results={
+                "review": {
+                    "type": "gate",
+                    "output": {"message": "m", "options": [1, 2.5], "choice": None},
+                }
+            },
+        )
+        assert _gate_outcome(state)["options"] == ["1", "2.5"]
+
+    def test_gate_block_detected_without_type_field(self):
+        # A run paused by an older version has no persisted step `type`. The
+        # gate is still detected by its unique output signature (`on_reject`),
+        # so resume surfaces the gate block instead of silently dropping it.
+        from types import SimpleNamespace
+        from specify_cli import _gate_outcome
+
+        state = SimpleNamespace(
+            status=SimpleNamespace(value="paused"),
+            current_step_id="review",
+            step_results={
+                "review": {
+                    # no "type" key — pre-dates the field being persisted
+                    "output": {
+                        "message": "Approve?",
+                        "options": ["approve", "reject"],
+                        "on_reject": "abort",
+                        "choice": None,
+                    },
+                }
+            },
+        )
+        gate = _gate_outcome(state)
+        assert gate is not None
+        assert gate["step_id"] == "review"
+        assert gate["options"] == ["approve", "reject"]
+
+    def test_non_gate_step_without_type_is_not_a_gate(self):
+        # A typeless record lacking the gate signature must NOT be mistaken for
+        # a gate (the fallback keys off `on_reject`, which only GateStep writes).
+        from types import SimpleNamespace
+        from specify_cli import _gate_outcome
+
+        state = SimpleNamespace(
+            status=SimpleNamespace(value="paused"),
+            current_step_id="run-tests",
+            step_results={
+                "run-tests": {"output": {"exit_code": 0, "stdout": "ok"}},
+            },
+        )
+        assert _gate_outcome(state) is None

From a97cd1849b766064b1dbe0aa7ba3eded0314fe69 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 18 Jun 2026 02:24:53 +0700
Subject: [PATCH 8/9] fix(workflows): normalize non-list gate options to a
 stable list[str]

Address Copilot review: the prior options normalization only mapped a
`list`, returning the raw value for any other shape (scalar/tuple), which
contradicted the "stable list[str]" intent. Extract `_normalize_gate_options`:
None stays None; list/tuple maps each element through str; any other scalar
becomes a single-element list (a bare string is one option, never iterated
character-by-character). The emitted schema is now always list[str] | None.
Extend the options test to cover list, tuple, bare string, numeric scalar,
and None.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/__init__.py | 21 +++++++++++++++++---
 tests/test_workflows.py     | 38 ++++++++++++++++++++++++-------------
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 7015247c33..5a68a770e1 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -2144,17 +2144,32 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None:
     output = step.get("output") or {}
     # `message` and `options` may be non-string YAML literals in an unvalidated
     # workflow (GateStep coerces neither for the payload), so normalise both
-    # here for a stable JSON schema: message → str, options → list[str].
+    # here for a stable JSON schema: message → str, options → list[str] | None.
     message = output.get("message")
-    options = output.get("options")
     return {
         "step_id": state.current_step_id,
         "message": None if message is None else str(message),
-        "options": [str(o) for o in options] if isinstance(options, list) else options,
+        "options": _normalize_gate_options(output.get("options")),
         "choice": output.get("choice"),
     }
 
 
+def _normalize_gate_options(options: Any) -> list[str] | None:
+    """Normalise a gate's ``options`` to a stable ``list[str]`` (or ``None``).
+
+    A valid gate stores a list, but an unvalidated workflow could leave a
+    scalar or tuple. ``None`` stays ``None`` (no options); a list/tuple maps
+    each element through ``str``; any other scalar becomes a single-element
+    list — so the emitted JSON schema is always ``list[str] | None``. A bare
+    string is treated as one option, never iterated character-by-character.
+    """
+    if options is None:
+        return None
+    if isinstance(options, (list, tuple)):
+        return [str(o) for o in options]
+    return [str(options)]
+
+
 def _run_outcome_exit_code(status_value: str) -> int:
     """Exit code for a finished run/resume: non-zero on terminal failure.
 
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 5f0f763abb..143938a2f5 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5425,22 +5425,34 @@ def test_gate_block_message_coerced_to_string(self):
         assert _gate_outcome(state)["message"] == "12.5"
 
     def test_gate_block_options_coerced_to_strings(self):
-        # options may be non-string YAML literals in an unvalidated workflow;
-        # the JSON surface normalises them to a list of strings.
+        # options may be non-string / non-list literals in an unvalidated
+        # workflow; the JSON surface always normalises them to list[str] | None
+        # so the emitted schema is stable regardless of the input shape.
         from types import SimpleNamespace
         from specify_cli import _gate_outcome
 
-        state = SimpleNamespace(
-            status=SimpleNamespace(value="paused"),
-            current_step_id="review",
-            step_results={
-                "review": {
-                    "type": "gate",
-                    "output": {"message": "m", "options": [1, 2.5], "choice": None},
-                }
-            },
-        )
-        assert _gate_outcome(state)["options"] == ["1", "2.5"]
+        def _options_payload(options):
+            state = SimpleNamespace(
+                status=SimpleNamespace(value="paused"),
+                current_step_id="review",
+                step_results={
+                    "review": {
+                        "type": "gate",
+                        "output": {
+                            "message": "m",
+                            "options": options,
+                            "choice": None,
+                        },
+                    }
+                },
+            )
+            return _gate_outcome(state)["options"]
+
+        assert _options_payload([1, 2.5]) == ["1", "2.5"]  # list
+        assert _options_payload(("approve", "reject")) == ["approve", "reject"]  # tuple
+        assert _options_payload("approve") == ["approve"]  # bare scalar, not iterated
+        assert _options_payload(7) == ["7"]  # numeric scalar
+        assert _options_payload(None) is None  # absent stays absent
 
     def test_gate_block_detected_without_type_field(self):
         # A run paused by an older version has no persisted step `type`. The

From c4297a0fe161857b47c217ab766e320ef7302b1a Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 18 Jun 2026 02:50:23 +0700
Subject: [PATCH 9/9] fix(workflows): normalize gate choice to str; portable
 plain-gate test

Address Copilot review:
- `_gate_outcome` normalized `message` and `options` but passed `choice`
  through as-is; an unvalidated gate can record a non-string `choice`,
  which contradicts the stable-schema rationale. Coerce `choice` to
  `str | None` (None still means "no decision yet"), consistent with the
  other two fields. Adds a focused choice-coercion test.
- The plain (no-gate) test workflow used `run: "true"`, which fails under
  cmd.exe on Windows (ShellStep uses shell=True). Use the cross-platform
  `run: "exit 0"` (matching the exit-code suite's workflows).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/__init__.py | 10 ++++++----
 tests/test_workflows.py     | 26 +++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
index 5a68a770e1..b415b9197b 100644
--- a/src/specify_cli/__init__.py
+++ b/src/specify_cli/__init__.py
@@ -2142,15 +2142,17 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None:
     if not isinstance(step, dict) or not _is_gate_step(step):
         return None
     output = step.get("output") or {}
-    # `message` and `options` may be non-string YAML literals in an unvalidated
-    # workflow (GateStep coerces neither for the payload), so normalise both
-    # here for a stable JSON schema: message → str, options → list[str] | None.
+    # `message`, `options`, and `choice` may be non-string YAML literals in an
+    # unvalidated workflow (GateStep coerces none of them for the payload), so
+    # normalise all three for a stable JSON schema: message → str, options →
+    # list[str] | None, choice → str | None (None means no decision yet).
     message = output.get("message")
+    choice = output.get("choice")
     return {
         "step_id": state.current_step_id,
         "message": None if message is None else str(message),
         "options": _normalize_gate_options(output.get("options")),
-        "choice": output.get("choice"),
+        "choice": None if choice is None else str(choice),
     }
 
 
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 143938a2f5..6df0f069be 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -5315,7 +5315,7 @@ class TestWorkflowRunGateOutcomeJson:
 steps:
   - id: fine
     type: shell
-    run: "true"
+    run: "exit 0"
 """
 
     def _run_json(self, tmp_path, monkeypatch, content, *, expected_exit=0):
@@ -5454,6 +5454,30 @@ def _options_payload(options):
         assert _options_payload(7) == ["7"]  # numeric scalar
         assert _options_payload(None) is None  # absent stays absent
 
+    def test_gate_block_choice_coerced_to_string(self):
+        # An unvalidated gate can record a non-string choice; the JSON
+        # surface normalises it to str (and keeps None = no decision yet),
+        # consistent with the message/options normalization.
+        from types import SimpleNamespace
+        from specify_cli import _gate_outcome
+
+        def _choice_payload(choice):
+            state = SimpleNamespace(
+                status=SimpleNamespace(value="paused"),
+                current_step_id="review",
+                step_results={
+                    "review": {
+                        "type": "gate",
+                        "output": {"message": "m", "options": ["ok"], "choice": choice},
+                    }
+                },
+            )
+            return _gate_outcome(state)["choice"]
+
+        assert _choice_payload(None) is None  # no decision yet
+        assert _choice_payload("reject") == "reject"  # normal string passes through
+        assert _choice_payload(2) == "2"  # non-string coerced
+
     def test_gate_block_detected_without_type_field(self):
         # A run paused by an older version has no persisted step `type`. The
         # gate is still detected by its unique output signature (`on_reject`),