From 00ecbf19580646e09f29325f6e0c9159a24e1445 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Sat, 13 Jun 2026 00:36:23 +0700
Subject: [PATCH 1/2] feat(workflows): opt-in output_format: json exposes
 parsed shell stdout as output.data

No step that runs external code could hand a typed value to a later
step, so e.g. a fan-out could never consume a runtime-computed
collection. With output_format: json declared, stdout is parsed and
exposed under output.data (raw keys unchanged); a parse failure fails
the step with a clear error. Without the key, behavior is unchanged.

Reference implementation for the proposal in #2962.

Addresses #2962
---
 .../workflows/steps/shell/__init__.py         | 24 +++++++++
 tests/test_workflows.py                       | 49 +++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/src/specify_cli/workflows/steps/shell/__init__.py b/src/specify_cli/workflows/steps/shell/__init__.py
index 73ac99530a..8c62e4cfa8 100644
--- a/src/specify_cli/workflows/steps/shell/__init__.py
+++ b/src/specify_cli/workflows/steps/shell/__init__.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import json
 import subprocess
 from typing import Any
 
@@ -49,6 +50,23 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
                     error=f"Shell command exited with code {proc.returncode}.",
                     output=output,
                 )
+            if config.get("output_format") == "json":
+                # Opt-in structured output: expose the parsed stdout under
+                # ``output.data`` so later steps can consume typed values
+                # (e.g. a fan-out's ``items:``). A parse failure fails the
+                # step — declaring ``output_format: json`` is a contract.
+                try:
+                    output["data"] = json.loads(proc.stdout)
+                except json.JSONDecodeError as exc:
+                    return StepResult(
+                        status=StepStatus.FAILED,
+                        error=(
+                            f"Shell step {config.get('id', '?')!r} declared "
+                            f"output_format: json but stdout is not valid "
+                            f"JSON: {exc}"
+                        ),
+                        output=output,
+                    )
             return StepResult(
                 status=StepStatus.COMPLETED,
                 output=output,
@@ -72,4 +90,10 @@ def validate(self, config: dict[str, Any]) -> list[str]:
             errors.append(
                 f"Shell step {config.get('id', '?')!r} is missing 'run' field."
             )
+        output_format = config.get("output_format")
+        if output_format is not None and output_format != "json":
+            errors.append(
+                f"Shell step {config.get('id', '?')!r}: 'output_format' must "
+                f"be 'json' when present, got {output_format!r}."
+            )
         return errors
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 51da5cc86b..752d06f509 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -941,6 +941,55 @@ def test_validate_missing_run(self):
         assert any("missing 'run'" in e for e in errors)
 
 
+    def test_output_format_json_exposes_data(self, tmp_path):
+        from specify_cli.workflows.steps.shell import ShellStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = ShellStep()
+        ctx = StepContext(project_root=str(tmp_path))
+        config = {
+            "id": "emit",
+            "run": "echo '{\"items\": [1, 2]}'",
+            "output_format": "json",
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.COMPLETED
+        assert result.output["data"] == {"items": [1, 2]}
+        assert result.output["exit_code"] == 0  # raw keys still present
+
+    def test_output_format_json_invalid_stdout_fails(self, tmp_path):
+        from specify_cli.workflows.steps.shell import ShellStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = ShellStep()
+        ctx = StepContext(project_root=str(tmp_path))
+        config = {
+            "id": "emit",
+            "run": "echo not-json",
+            "output_format": "json",
+        }
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.FAILED
+        assert "output_format: json" in (result.error or "")
+
+    def test_no_output_format_keeps_raw_output_only(self, tmp_path):
+        from specify_cli.workflows.steps.shell import ShellStep
+        from specify_cli.workflows.base import StepContext, StepStatus
+
+        step = ShellStep()
+        ctx = StepContext(project_root=str(tmp_path))
+        config = {"id": "emit", "run": "echo '{\"items\": []}'"}
+        result = step.execute(config, ctx)
+        assert result.status == StepStatus.COMPLETED
+        assert "data" not in result.output
+
+    def test_validate_rejects_unknown_output_format(self):
+        from specify_cli.workflows.steps.shell import ShellStep
+
+        step = ShellStep()
+        errors = step.validate({"id": "emit", "run": "true", "output_format": "yaml"})
+        assert any("'output_format' must be 'json'" in e for e in errors)
+
 class _StubStdin:
     """Stdin stub exposing only a fixed ``isatty`` result.
 

From 7416ae553209d4a78ed8afa2ae7f254864ac85d6 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 09:07:47 +0700
Subject: [PATCH 2/2] test(shell): emit JSON via sys.executable for
 cross-platform output_format tests

Address review (#2963): replace non-portable echo '{...}' (Windows cmd.exe keeps the single quotes, breaking JSON) with the established '"{py}" "{script}"' pattern using sys.executable + a temp script, so the output_format tests pass on the Windows CI matrix. Also make the validate test's run inert (exit 0).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 tests/test_workflows.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 752d06f509..bbfa265de7 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -909,6 +909,17 @@ def test_validate_valid(self):
 class TestShellStep:
     """Test the shell step type."""
 
+    @staticmethod
+    def _python_run(tmp_path, body):
+        """A portable shell ``run`` that executes ``body`` with the current
+        interpreter, avoiding non-portable shell quoting (e.g. Windows
+        ``cmd.exe`` keeping single quotes) in the output_format tests."""
+        import sys
+
+        script = tmp_path / "emit.py"
+        script.write_text(body, encoding="utf-8")
+        return f'"{sys.executable}" "{script}"'
+
     def test_execute_echo(self):
         from specify_cli.workflows.steps.shell import ShellStep
         from specify_cli.workflows.base import StepContext, StepStatus
@@ -949,7 +960,9 @@ def test_output_format_json_exposes_data(self, tmp_path):
         ctx = StepContext(project_root=str(tmp_path))
         config = {
             "id": "emit",
-            "run": "echo '{\"items\": [1, 2]}'",
+            "run": self._python_run(
+                tmp_path, 'import json; print(json.dumps({"items": [1, 2]}))\n'
+            ),
             "output_format": "json",
         }
         result = step.execute(config, ctx)
@@ -965,7 +978,7 @@ def test_output_format_json_invalid_stdout_fails(self, tmp_path):
         ctx = StepContext(project_root=str(tmp_path))
         config = {
             "id": "emit",
-            "run": "echo not-json",
+            "run": self._python_run(tmp_path, "print('not-json')\n"),
             "output_format": "json",
         }
         result = step.execute(config, ctx)
@@ -978,7 +991,12 @@ def test_no_output_format_keeps_raw_output_only(self, tmp_path):
 
         step = ShellStep()
         ctx = StepContext(project_root=str(tmp_path))
-        config = {"id": "emit", "run": "echo '{\"items\": []}'"}
+        config = {
+            "id": "emit",
+            "run": self._python_run(
+                tmp_path, 'import json; print(json.dumps({"items": []}))\n'
+            ),
+        }
         result = step.execute(config, ctx)
         assert result.status == StepStatus.COMPLETED
         assert "data" not in result.output
@@ -987,7 +1005,7 @@ def test_validate_rejects_unknown_output_format(self):
         from specify_cli.workflows.steps.shell import ShellStep
 
         step = ShellStep()
-        errors = step.validate({"id": "emit", "run": "true", "output_format": "yaml"})
+        errors = step.validate({"id": "emit", "run": "exit 0", "output_format": "yaml"})
         assert any("'output_format' must be 'json'" in e for e in errors)
 
 class _StubStdin: