From 3c954fb6e7e7d8fd0fc0f117fcf3d5f336e0af30 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Sat, 13 Jun 2026 00:33:13 +0700 Subject: [PATCH 1/9] feat: surface gate detail in the workflow run/resume --json payload A paused run was indistinguishable from any other pause in the machine-readable outcome, and the gate's prompt/options/choice never left the human-facing stream. Record each step's type in the run state's step results (one engine line) and, when the run sits at a gate, add a gate block (step_id/message/options/choice) to the payload so orchestrators can drive review gates without parsing stdout. Reference implementation for the proposal in #2964. Addresses #2964 --- src/specify_cli/__init__.py | 26 ++++++++++++- src/specify_cli/workflows/engine.py | 1 + tests/test_workflows.py | 57 +++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index e2d0bfb0b9..0cc4c82690 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2091,13 +2091,37 @@ def _parse_input_values(input_values: list[str] | None) -> dict[str, Any]: def _workflow_run_payload(state: Any) -> dict[str, Any]: """Machine-readable summary of a run/resume outcome.""" - return { + payload = { "run_id": state.run_id, "workflow_id": state.workflow_id, "status": state.status.value, "current_step_id": state.current_step_id, "current_step_index": state.current_step_index, } + gate = _gate_outcome(state) + if gate is not None: + payload["gate"] = gate + return payload + + +def _gate_outcome(state: Any) -> dict[str, Any] | None: + """Gate detail for the structured outcome, if the run sits at a gate. + + A paused run is otherwise indistinguishable from any other pause in + the machine-readable payload; surfacing the gate's prompt, options, + and (after an interactive choice) the decision lets orchestrators + drive review gates without parsing the human-facing stream. + """ + step = (getattr(state, "step_results", None) or {}).get(state.current_step_id) + if not isinstance(step, dict) or step.get("type") != "gate": + return None + output = step.get("output") or {} + return { + "step_id": state.current_step_id, + "message": output.get("message"), + "options": output.get("options"), + "choice": output.get("choice"), + } def _run_outcome_exit_code(status_value: str) -> int: diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py index 0d56f7df70..f463bc66c1 100644 --- a/src/specify_cli/workflows/engine.py +++ b/src/specify_cli/workflows/engine.py @@ -676,6 +676,7 @@ def _execute_steps( # Record step results — prefer resolved values from step output step_data = { + "type": step_type, "integration": result.output.get("integration") or step_config.get("integration") or context.default_integration, diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 6bd49f197a..4ea0b2c410 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5288,3 +5288,60 @@ def test_resume_failed_run_exits_nonzero(self, tmp_path, monkeypatch): assert resumed.exit_code == 1, resumed.stdout payload = _json.loads(resumed.stdout) assert payload["status"] == "failed" + + +class TestWorkflowRunGateOutcomeJson: + """CLI-level tests: the --json payload surfaces gate pauses.""" + + _WF_GATE = """ +schema_version: "1.0" +workflow: + id: "gate-json" + name: "Gate JSON" + version: "1.0.0" +steps: + - id: review + type: gate + message: "Approve the thing?" + options: ["approve", "reject"] +""" + + _WF_PLAIN = """ +schema_version: "1.0" +workflow: + id: "plain-json" + name: "Plain JSON" + version: "1.0.0" +steps: + - id: fine + type: shell + run: "true" +""" + + def _run_json(self, tmp_path, monkeypatch, content): + import json as _json + from typer.testing import CliRunner + from specify_cli import app + + path = tmp_path / "wf.yml" + path.write_text(content, encoding="utf-8") + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke(app, ["workflow", "run", str(path), "--json"]) + return _json.loads(result.stdout) + + def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch): + # CliRunner stdin is not a TTY, so the gate pauses for resume. + payload = self._run_json(tmp_path, monkeypatch, self._WF_GATE) + assert payload["status"] == "paused" + assert payload["gate"] == { + "step_id": "review", + "message": "Approve the thing?", + "options": ["approve", "reject"], + "choice": None, + } + + def test_completed_run_has_no_gate_block(self, tmp_path, monkeypatch): + payload = self._run_json(tmp_path, monkeypatch, self._WF_PLAIN) + assert payload["status"] == "completed" + assert "gate" not in payload From 609bd08ddfbb8a3ffef5841685bfd98a54563632 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Wed, 17 Jun 2026 09:10:07 +0700 Subject: [PATCH 2/9] fix(workflow): only surface gate detail in --json when the run is paused Address review (#2965): _gate_outcome() emitted a gate block whenever current_step_id pointed at a gate step. Since RunState.current_step_id is never cleared on completion, a completed/failed run whose last step was a gate leaked stale gate detail in run/resume/status --json. Guard on status == paused. Also assert CLI success in the _run_json test helper before JSON-parsing, and add direct coverage for the suppression guard. Co-Authored-By: Claude Fable 5 --- src/specify_cli/__init__.py | 6 ++++++ tests/test_workflows.py | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 0cc4c82690..2408f4169f 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2112,6 +2112,12 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None: and (after an interactive choice) the decision lets orchestrators drive review gates without parsing the human-facing stream. """ + # Only a run that is actually *paused* sits at a gate awaiting a + # decision. RunState.current_step_id is not cleared on completion, so + # without this guard a completed/failed run whose last executed step was + # a gate would surface stale gate details (in run/resume/status --json). + if getattr(state.status, "value", state.status) != "paused": + return None step = (getattr(state, "step_results", None) or {}).get(state.current_step_id) if not isinstance(step, dict) or step.get("type") != "gate": return None diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 4ea0b2c410..9c39aa4807 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5328,6 +5328,9 @@ def _run_json(self, tmp_path, monkeypatch, content): monkeypatch.chdir(tmp_path) runner = CliRunner() result = runner.invoke(app, ["workflow", "run", str(path), "--json"]) + # Assert the CLI succeeded before parsing so a real failure surfaces + # the actual output instead of an opaque JSON decode error. + assert result.exit_code == 0, result.stdout return _json.loads(result.stdout) def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch): @@ -5345,3 +5348,27 @@ def test_completed_run_has_no_gate_block(self, tmp_path, monkeypatch): payload = self._run_json(tmp_path, monkeypatch, self._WF_PLAIN) assert payload["status"] == "completed" assert "gate" not in payload + + def test_gate_block_suppressed_when_run_not_paused(self): + # RunState.current_step_id is not cleared on completion, so a + # completed/failed run whose last executed step was a gate still + # points current_step_id at that gate. The gate block must only be + # emitted while the run is actually paused at it. + from types import SimpleNamespace + from specify_cli import _gate_outcome + + gate_step = { + "type": "gate", + "output": {"message": "m", "options": ["approve"], "choice": "approve"}, + } + + def _state(status): + return SimpleNamespace( + status=SimpleNamespace(value=status), + current_step_id="review", + step_results={"review": gate_step}, + ) + + assert _gate_outcome(_state("completed")) is None + assert _gate_outcome(_state("failed")) is None + assert _gate_outcome(_state("paused")) is not None From 01ad2dee3646014e823559d0aa9228be59d8e025 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Wed, 17 Jun 2026 20:43:55 +0700 Subject: [PATCH 3/9] fix(workflows): surface gate block on aborted runs; stabilize message Address Copilot review: - `_gate_outcome` now also surfaces the gate block when a run is `aborted` by a gate rejection (`on_reject: abort`), not only when `paused`. Abort is the only path that sets ABORTED and it leaves current_step_id on the gate, so an orchestrator can read the recorded `choice` for the stop. - Coerce `message` to a string (it may be a non-string YAML literal that GateStep only coerces for interpolation) so the JSON schema stays stable. - Tests: add a CLI-level aborted-path test, a message-coercion test, and extend the suppression test to allow `aborted`; share the run helper via `_invoke_json` to avoid duplicating the invoke boilerplate. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/__init__.py | 27 +++++++++------ tests/test_workflows.py | 68 +++++++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 21 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 2408f4169f..14679e5f3e 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2105,26 +2105,31 @@ def _workflow_run_payload(state: Any) -> dict[str, Any]: def _gate_outcome(state: Any) -> dict[str, Any] | None: - """Gate detail for the structured outcome, if the run sits at a gate. + """Gate detail for the structured outcome, when the run rests at a gate. - A paused run is otherwise indistinguishable from any other pause in - the machine-readable payload; surfacing the gate's prompt, options, - and (after an interactive choice) the decision lets orchestrators - drive review gates without parsing the human-facing stream. + A paused or gate-aborted run is otherwise indistinguishable from any + other pause/abort in the machine-readable payload; surfacing the gate's + prompt, options, and (after an interactive choice) the decision lets + orchestrators drive review gates without parsing the human-facing stream. """ - # Only a run that is actually *paused* sits at a gate awaiting a - # decision. RunState.current_step_id is not cleared on completion, so - # without this guard a completed/failed run whose last executed step was - # a gate would surface stale gate details (in run/resume/status --json). - if getattr(state.status, "value", state.status) != "paused": + # Two run states rest *on* a gate: `paused` (awaiting a decision) and + # `aborted` (a gate rejected with `on_reject: abort` — the only path that + # sets ABORTED, leaving current_step_id on that gate). Any other status — + # notably `completed`/`failed` — must be suppressed: current_step_id is + # not cleared when a run whose last executed step was a gate moves on, so + # without this guard it would surface stale detail (run/resume/status). + if getattr(state.status, "value", state.status) not in ("paused", "aborted"): return None step = (getattr(state, "step_results", None) or {}).get(state.current_step_id) if not isinstance(step, dict) or step.get("type") != "gate": return None output = step.get("output") or {} + # `message` may be a non-string YAML literal (GateStep coerces it only for + # expression interpolation), so normalise it here for a stable JSON schema. + message = output.get("message") return { "step_id": state.current_step_id, - "message": output.get("message"), + "message": None if message is None else str(message), "options": output.get("options"), "choice": output.get("choice"), } diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 9c39aa4807..3caa1c1c0f 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5318,16 +5318,19 @@ class TestWorkflowRunGateOutcomeJson: run: "true" """ - def _run_json(self, tmp_path, monkeypatch, content): - import json as _json + def _invoke_json(self, tmp_path, monkeypatch, content): from typer.testing import CliRunner from specify_cli import app path = tmp_path / "wf.yml" path.write_text(content, encoding="utf-8") monkeypatch.chdir(tmp_path) - runner = CliRunner() - result = runner.invoke(app, ["workflow", "run", str(path), "--json"]) + return CliRunner().invoke(app, ["workflow", "run", str(path), "--json"]) + + def _run_json(self, tmp_path, monkeypatch, content): + import json as _json + + result = self._invoke_json(tmp_path, monkeypatch, content) # Assert the CLI succeeded before parsing so a real failure surfaces # the actual output instead of an opaque JSON decode error. assert result.exit_code == 0, result.stdout @@ -5349,17 +5352,43 @@ def test_completed_run_has_no_gate_block(self, tmp_path, monkeypatch): assert payload["status"] == "completed" assert "gate" not in payload - def test_gate_block_suppressed_when_run_not_paused(self): - # RunState.current_step_id is not cleared on completion, so a - # completed/failed run whose last executed step was a gate still - # points current_step_id at that gate. The gate block must only be - # emitted while the run is actually paused at it. + def test_gate_abort_carries_gate_block(self, tmp_path, monkeypatch): + # An interactive gate the operator rejects ends the run as `aborted` + # (on_reject defaults to abort), not `paused`. The JSON surface must + # still carry the gate block with the recorded choice so an + # orchestrator can see *why* the run stopped. + import json as _json + from specify_cli.workflows.steps.gate import GateStep + + _force_gate_stdin(monkeypatch, tty=True) + monkeypatch.setattr( + GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject") + ) + result = self._invoke_json(tmp_path, monkeypatch, self._WF_GATE) + payload = _json.loads(result.stdout) + assert payload["status"] == "aborted" + assert payload["gate"] == { + "step_id": "review", + "message": "Approve the thing?", + "options": ["approve", "reject"], + "choice": "reject", + } + + def test_gate_block_emitted_only_when_run_rests_at_gate(self): + # A run rests *on* a gate only while `paused` (awaiting a decision) or + # `aborted` (gate rejected with on_reject: abort). current_step_id is + # not cleared afterwards, so a `completed`/`failed` run whose last + # executed step was a gate must NOT surface a stale gate block. from types import SimpleNamespace from specify_cli import _gate_outcome gate_step = { "type": "gate", - "output": {"message": "m", "options": ["approve"], "choice": "approve"}, + "output": { + "message": "m", + "options": ["approve", "reject"], + "choice": "reject", + }, } def _state(status): @@ -5372,3 +5401,22 @@ def _state(status): assert _gate_outcome(_state("completed")) is None assert _gate_outcome(_state("failed")) is None assert _gate_outcome(_state("paused")) is not None + assert _gate_outcome(_state("aborted")) is not None + + def test_gate_block_message_coerced_to_string(self): + # message may be a non-string YAML literal (e.g. a number); the JSON + # surface normalises it so the emitted schema stays stable. + from types import SimpleNamespace + from specify_cli import _gate_outcome + + state = SimpleNamespace( + status=SimpleNamespace(value="paused"), + current_step_id="review", + step_results={ + "review": { + "type": "gate", + "output": {"message": 12.5, "options": ["ok"], "choice": None}, + } + }, + ) + assert _gate_outcome(state)["message"] == "12.5" From 4a45ec0df3b0143c52ded2c6547d0fac63a58175 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Wed, 17 Jun 2026 22:48:18 +0700 Subject: [PATCH 4/9] test(workflows): assert clean exit in gate-abort JSON test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot review: the gate-abort test parsed stdout without first asserting the CLI exited cleanly, so an invoke failure would surface as an opaque JSON decode error. Route it through `_run_json` (which asserts exit_code == 0 before parsing) and drop the now-redundant `_invoke_json` helper — a gate abort emits the payload and returns, so the run exits 0. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_workflows.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 3caa1c1c0f..b9632d0ad1 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5318,19 +5318,15 @@ class TestWorkflowRunGateOutcomeJson: run: "true" """ - def _invoke_json(self, tmp_path, monkeypatch, content): + def _run_json(self, tmp_path, monkeypatch, content): + import json as _json from typer.testing import CliRunner from specify_cli import app path = tmp_path / "wf.yml" path.write_text(content, encoding="utf-8") monkeypatch.chdir(tmp_path) - return CliRunner().invoke(app, ["workflow", "run", str(path), "--json"]) - - def _run_json(self, tmp_path, monkeypatch, content): - import json as _json - - result = self._invoke_json(tmp_path, monkeypatch, content) + result = CliRunner().invoke(app, ["workflow", "run", str(path), "--json"]) # Assert the CLI succeeded before parsing so a real failure surfaces # the actual output instead of an opaque JSON decode error. assert result.exit_code == 0, result.stdout @@ -5356,16 +5352,16 @@ def test_gate_abort_carries_gate_block(self, tmp_path, monkeypatch): # An interactive gate the operator rejects ends the run as `aborted` # (on_reject defaults to abort), not `paused`. The JSON surface must # still carry the gate block with the recorded choice so an - # orchestrator can see *why* the run stopped. - import json as _json + # orchestrator can see *why* the run stopped. The run helper asserts + # the CLI exited cleanly before parsing (a gate abort emits the + # payload and returns; it does not crash the command). from specify_cli.workflows.steps.gate import GateStep _force_gate_stdin(monkeypatch, tty=True) monkeypatch.setattr( GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject") ) - result = self._invoke_json(tmp_path, monkeypatch, self._WF_GATE) - payload = _json.loads(result.stdout) + payload = self._run_json(tmp_path, monkeypatch, self._WF_GATE) assert payload["status"] == "aborted" assert payload["gate"] == { "step_id": "review", From 63ea4d2049e95735420de05d5b632bc88dc5a0a5 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Thu, 18 Jun 2026 00:39:52 +0700 Subject: [PATCH 5/9] fix: use result.output in run-helper assert; document step_data shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot review: - `_run_json` asserted with `result.stdout` in the message, but under `--json` step output is redirected off stdout — the useful diagnostics live on `result.output`. Switch the assertion message to `result.output` (the JSON parse still reads stdout), matching the other CLI tests. - `StepContext.steps` documented a 5-key entry shape; the engine now also persists `type` and `status`. Update the docstring to the canonical 7-key shape so step authors/debuggers see the real record. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/workflows/base.py | 7 ++++--- tests/test_workflows.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/specify_cli/workflows/base.py b/src/specify_cli/workflows/base.py index b144ca903d..b61fdb1a08 100644 --- a/src/specify_cli/workflows/base.py +++ b/src/specify_cli/workflows/base.py @@ -47,9 +47,10 @@ class StepContext: #: Resolved workflow inputs (from user prompts / defaults). inputs: dict[str, Any] = field(default_factory=dict) - #: Accumulated step results keyed by step ID. - #: Each entry is ``{"integration": ..., "model": ..., "options": ..., - #: "input": ..., "output": ...}``. + #: Accumulated step results keyed by step ID. Each entry is the dict the + #: engine persists per step: + #: ``{"type": ..., "integration": ..., "model": ..., "options": ..., + #: "input": ..., "output": ..., "status": ...}``. steps: dict[str, dict[str, Any]] = field(default_factory=dict) #: Current fan-out item (set only inside fan-out iterations). diff --git a/tests/test_workflows.py b/tests/test_workflows.py index b9632d0ad1..975b9b103b 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5328,8 +5328,10 @@ def _run_json(self, tmp_path, monkeypatch, content): monkeypatch.chdir(tmp_path) result = CliRunner().invoke(app, ["workflow", "run", str(path), "--json"]) # Assert the CLI succeeded before parsing so a real failure surfaces - # the actual output instead of an opaque JSON decode error. - assert result.exit_code == 0, result.stdout + # the actual output instead of an opaque JSON decode error. Use + # ``result.output`` for the message: under ``--json`` step output is + # redirected off stdout, so the useful diagnostics live there. + assert result.exit_code == 0, result.output return _json.loads(result.stdout) def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch): From 24d6e8599d485a18961e0b5a793bdc353adf7fdd Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Thu, 18 Jun 2026 00:56:19 +0700 Subject: [PATCH 6/9] =?UTF-8?q?test(workflows):=20align=20gate-abort=20JSO?= =?UTF-8?q?N=20test=20with=20aborted=E2=86=92exit-1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After rebasing onto main, a gate abort now emits the --json payload and then exits non-zero (`_run_outcome_exit_code` maps aborted → 1, from the merged exit-code work). Give `_run_json` an `expected_exit` parameter (default 0) so the abort case asserts exit 1 while the paused/completed cases stay at 0 — keeping a single shared helper rather than duplicating the invoke boilerplate. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_workflows.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 975b9b103b..1075a5fee4 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5318,7 +5318,7 @@ class TestWorkflowRunGateOutcomeJson: run: "true" """ - def _run_json(self, tmp_path, monkeypatch, content): + def _run_json(self, tmp_path, monkeypatch, content, *, expected_exit=0): import json as _json from typer.testing import CliRunner from specify_cli import app @@ -5327,11 +5327,14 @@ def _run_json(self, tmp_path, monkeypatch, content): path.write_text(content, encoding="utf-8") monkeypatch.chdir(tmp_path) result = CliRunner().invoke(app, ["workflow", "run", str(path), "--json"]) - # Assert the CLI succeeded before parsing so a real failure surfaces - # the actual output instead of an opaque JSON decode error. Use - # ``result.output`` for the message: under ``--json`` step output is - # redirected off stdout, so the useful diagnostics live there. - assert result.exit_code == 0, result.output + # Assert the expected exit code before parsing so a real failure + # surfaces the actual output instead of an opaque JSON decode error. + # A terminal run still emits its JSON payload, then exits non-zero on + # ``failed``/``aborted`` (see ``_run_outcome_exit_code``), so callers + # pass the expected code. Use ``result.output`` for the message: + # under ``--json`` step output is redirected off stdout, so the useful + # diagnostics live there. + assert result.exit_code == expected_exit, result.output return _json.loads(result.stdout) def test_gate_pause_carries_gate_block(self, tmp_path, monkeypatch): @@ -5354,16 +5357,18 @@ def test_gate_abort_carries_gate_block(self, tmp_path, monkeypatch): # An interactive gate the operator rejects ends the run as `aborted` # (on_reject defaults to abort), not `paused`. The JSON surface must # still carry the gate block with the recorded choice so an - # orchestrator can see *why* the run stopped. The run helper asserts - # the CLI exited cleanly before parsing (a gate abort emits the - # payload and returns; it does not crash the command). + # orchestrator can see *why* the run stopped. A gate abort emits the + # payload and then exits non-zero (aborted → exit 1), so the helper + # is told to expect exit code 1. from specify_cli.workflows.steps.gate import GateStep _force_gate_stdin(monkeypatch, tty=True) monkeypatch.setattr( GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject") ) - payload = self._run_json(tmp_path, monkeypatch, self._WF_GATE) + payload = self._run_json( + tmp_path, monkeypatch, self._WF_GATE, expected_exit=1 + ) assert payload["status"] == "aborted" assert payload["gate"] == { "step_id": "review", From d322cf5b00ed4476660748fbf1471259c0e5c368 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Thu, 18 Jun 2026 01:59:40 +0700 Subject: [PATCH 7/9] fix(workflows): backward-compat gate detection + normalize gate options Address Copilot review: - A run paused by an older version has no persisted step `type`, so `_gate_outcome` would never surface its gate block on resume. Add `_is_gate_step`: prefer the `type` field, but when it is absent fall back to the gate's unique output signature (`on_reject`, written only by GateStep). A record with a different known `type` is still not a gate. - Normalize `options` to a list of strings (mirroring the `message` coercion) so an unvalidated workflow with non-string options can't destabilize the JSON schema. - Tests: options coercion, type-less gate detection, and a type-less non-gate negative case. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/__init__.py | 28 ++++++++++++++--- tests/test_workflows.py | 60 +++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 14679e5f3e..7015247c33 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2104,6 +2104,24 @@ def _workflow_run_payload(state: Any) -> dict[str, Any]: return payload +def _is_gate_step(step: dict[str, Any]) -> bool: + """Whether a recorded step result is a gate. + + Prefers the persisted ``type`` field, but when it is absent — a run paused + by an older version, whose step record predates ``type`` being stored — + falls back to the gate's unique output signature: only ``GateStep`` writes + an ``on_reject`` key. A record carrying a *different* known ``type`` is not + a gate, so the fallback applies only when ``type`` is missing entirely. + """ + step_type = step.get("type") + if step_type == "gate": + return True + if step_type: + return False + output = step.get("output") + return isinstance(output, dict) and "on_reject" in output + + def _gate_outcome(state: Any) -> dict[str, Any] | None: """Gate detail for the structured outcome, when the run rests at a gate. @@ -2121,16 +2139,18 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None: if getattr(state.status, "value", state.status) not in ("paused", "aborted"): return None step = (getattr(state, "step_results", None) or {}).get(state.current_step_id) - if not isinstance(step, dict) or step.get("type") != "gate": + if not isinstance(step, dict) or not _is_gate_step(step): return None output = step.get("output") or {} - # `message` may be a non-string YAML literal (GateStep coerces it only for - # expression interpolation), so normalise it here for a stable JSON schema. + # `message` and `options` may be non-string YAML literals in an unvalidated + # workflow (GateStep coerces neither for the payload), so normalise both + # here for a stable JSON schema: message → str, options → list[str]. message = output.get("message") + options = output.get("options") return { "step_id": state.current_step_id, "message": None if message is None else str(message), - "options": output.get("options"), + "options": [str(o) for o in options] if isinstance(options, list) else options, "choice": output.get("choice"), } diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 1075a5fee4..5f0f763abb 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5423,3 +5423,63 @@ def test_gate_block_message_coerced_to_string(self): }, ) assert _gate_outcome(state)["message"] == "12.5" + + def test_gate_block_options_coerced_to_strings(self): + # options may be non-string YAML literals in an unvalidated workflow; + # the JSON surface normalises them to a list of strings. + from types import SimpleNamespace + from specify_cli import _gate_outcome + + state = SimpleNamespace( + status=SimpleNamespace(value="paused"), + current_step_id="review", + step_results={ + "review": { + "type": "gate", + "output": {"message": "m", "options": [1, 2.5], "choice": None}, + } + }, + ) + assert _gate_outcome(state)["options"] == ["1", "2.5"] + + def test_gate_block_detected_without_type_field(self): + # A run paused by an older version has no persisted step `type`. The + # gate is still detected by its unique output signature (`on_reject`), + # so resume surfaces the gate block instead of silently dropping it. + from types import SimpleNamespace + from specify_cli import _gate_outcome + + state = SimpleNamespace( + status=SimpleNamespace(value="paused"), + current_step_id="review", + step_results={ + "review": { + # no "type" key — pre-dates the field being persisted + "output": { + "message": "Approve?", + "options": ["approve", "reject"], + "on_reject": "abort", + "choice": None, + }, + } + }, + ) + gate = _gate_outcome(state) + assert gate is not None + assert gate["step_id"] == "review" + assert gate["options"] == ["approve", "reject"] + + def test_non_gate_step_without_type_is_not_a_gate(self): + # A typeless record lacking the gate signature must NOT be mistaken for + # a gate (the fallback keys off `on_reject`, which only GateStep writes). + from types import SimpleNamespace + from specify_cli import _gate_outcome + + state = SimpleNamespace( + status=SimpleNamespace(value="paused"), + current_step_id="run-tests", + step_results={ + "run-tests": {"output": {"exit_code": 0, "stdout": "ok"}}, + }, + ) + assert _gate_outcome(state) is None From a97cd1849b766064b1dbe0aa7ba3eded0314fe69 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Thu, 18 Jun 2026 02:24:53 +0700 Subject: [PATCH 8/9] fix(workflows): normalize non-list gate options to a stable list[str] Address Copilot review: the prior options normalization only mapped a `list`, returning the raw value for any other shape (scalar/tuple), which contradicted the "stable list[str]" intent. Extract `_normalize_gate_options`: None stays None; list/tuple maps each element through str; any other scalar becomes a single-element list (a bare string is one option, never iterated character-by-character). The emitted schema is now always list[str] | None. Extend the options test to cover list, tuple, bare string, numeric scalar, and None. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/__init__.py | 21 +++++++++++++++++--- tests/test_workflows.py | 38 ++++++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 7015247c33..5a68a770e1 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2144,17 +2144,32 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None: output = step.get("output") or {} # `message` and `options` may be non-string YAML literals in an unvalidated # workflow (GateStep coerces neither for the payload), so normalise both - # here for a stable JSON schema: message → str, options → list[str]. + # here for a stable JSON schema: message → str, options → list[str] | None. message = output.get("message") - options = output.get("options") return { "step_id": state.current_step_id, "message": None if message is None else str(message), - "options": [str(o) for o in options] if isinstance(options, list) else options, + "options": _normalize_gate_options(output.get("options")), "choice": output.get("choice"), } +def _normalize_gate_options(options: Any) -> list[str] | None: + """Normalise a gate's ``options`` to a stable ``list[str]`` (or ``None``). + + A valid gate stores a list, but an unvalidated workflow could leave a + scalar or tuple. ``None`` stays ``None`` (no options); a list/tuple maps + each element through ``str``; any other scalar becomes a single-element + list — so the emitted JSON schema is always ``list[str] | None``. A bare + string is treated as one option, never iterated character-by-character. + """ + if options is None: + return None + if isinstance(options, (list, tuple)): + return [str(o) for o in options] + return [str(options)] + + def _run_outcome_exit_code(status_value: str) -> int: """Exit code for a finished run/resume: non-zero on terminal failure. diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 5f0f763abb..143938a2f5 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5425,22 +5425,34 @@ def test_gate_block_message_coerced_to_string(self): assert _gate_outcome(state)["message"] == "12.5" def test_gate_block_options_coerced_to_strings(self): - # options may be non-string YAML literals in an unvalidated workflow; - # the JSON surface normalises them to a list of strings. + # options may be non-string / non-list literals in an unvalidated + # workflow; the JSON surface always normalises them to list[str] | None + # so the emitted schema is stable regardless of the input shape. from types import SimpleNamespace from specify_cli import _gate_outcome - state = SimpleNamespace( - status=SimpleNamespace(value="paused"), - current_step_id="review", - step_results={ - "review": { - "type": "gate", - "output": {"message": "m", "options": [1, 2.5], "choice": None}, - } - }, - ) - assert _gate_outcome(state)["options"] == ["1", "2.5"] + def _options_payload(options): + state = SimpleNamespace( + status=SimpleNamespace(value="paused"), + current_step_id="review", + step_results={ + "review": { + "type": "gate", + "output": { + "message": "m", + "options": options, + "choice": None, + }, + } + }, + ) + return _gate_outcome(state)["options"] + + assert _options_payload([1, 2.5]) == ["1", "2.5"] # list + assert _options_payload(("approve", "reject")) == ["approve", "reject"] # tuple + assert _options_payload("approve") == ["approve"] # bare scalar, not iterated + assert _options_payload(7) == ["7"] # numeric scalar + assert _options_payload(None) is None # absent stays absent def test_gate_block_detected_without_type_field(self): # A run paused by an older version has no persisted step `type`. The From c4297a0fe161857b47c217ab766e320ef7302b1a Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Thu, 18 Jun 2026 02:50:23 +0700 Subject: [PATCH 9/9] fix(workflows): normalize gate choice to str; portable plain-gate test Address Copilot review: - `_gate_outcome` normalized `message` and `options` but passed `choice` through as-is; an unvalidated gate can record a non-string `choice`, which contradicts the stable-schema rationale. Coerce `choice` to `str | None` (None still means "no decision yet"), consistent with the other two fields. Adds a focused choice-coercion test. - The plain (no-gate) test workflow used `run: "true"`, which fails under cmd.exe on Windows (ShellStep uses shell=True). Use the cross-platform `run: "exit 0"` (matching the exit-code suite's workflows). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/__init__.py | 10 ++++++---- tests/test_workflows.py | 26 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 5a68a770e1..b415b9197b 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2142,15 +2142,17 @@ def _gate_outcome(state: Any) -> dict[str, Any] | None: if not isinstance(step, dict) or not _is_gate_step(step): return None output = step.get("output") or {} - # `message` and `options` may be non-string YAML literals in an unvalidated - # workflow (GateStep coerces neither for the payload), so normalise both - # here for a stable JSON schema: message → str, options → list[str] | None. + # `message`, `options`, and `choice` may be non-string YAML literals in an + # unvalidated workflow (GateStep coerces none of them for the payload), so + # normalise all three for a stable JSON schema: message → str, options → + # list[str] | None, choice → str | None (None means no decision yet). message = output.get("message") + choice = output.get("choice") return { "step_id": state.current_step_id, "message": None if message is None else str(message), "options": _normalize_gate_options(output.get("options")), - "choice": output.get("choice"), + "choice": None if choice is None else str(choice), } diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 143938a2f5..6df0f069be 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5315,7 +5315,7 @@ class TestWorkflowRunGateOutcomeJson: steps: - id: fine type: shell - run: "true" + run: "exit 0" """ def _run_json(self, tmp_path, monkeypatch, content, *, expected_exit=0): @@ -5454,6 +5454,30 @@ def _options_payload(options): assert _options_payload(7) == ["7"] # numeric scalar assert _options_payload(None) is None # absent stays absent + def test_gate_block_choice_coerced_to_string(self): + # An unvalidated gate can record a non-string choice; the JSON + # surface normalises it to str (and keeps None = no decision yet), + # consistent with the message/options normalization. + from types import SimpleNamespace + from specify_cli import _gate_outcome + + def _choice_payload(choice): + state = SimpleNamespace( + status=SimpleNamespace(value="paused"), + current_step_id="review", + step_results={ + "review": { + "type": "gate", + "output": {"message": "m", "options": ["ok"], "choice": choice}, + } + }, + ) + return _gate_outcome(state)["choice"] + + assert _choice_payload(None) is None # no decision yet + assert _choice_payload("reject") == "reject" # normal string passes through + assert _choice_payload(2) == "2" # non-string coerced + def test_gate_block_detected_without_type_field(self): # A run paused by an older version has no persisted step `type`. The # gate is still detected by its unique output signature (`on_reject`),