From 39316e5962f3d1c2d5262d11362b9b90d94264ad Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Sat, 13 Jun 2026 00:29:42 +0700
Subject: [PATCH 1/4] feat(workflows): add from_json expression filter

Step outputs captured as strings could never become typed values in
templates - the filter set was default/join/map/contains only, so e.g.
a fan-out items: could never consume a step's JSON stdout. Add an
arg-less from_json pipe filter with parse-or-raise semantics: invalid
JSON or non-string input raises a clear ValueError rather than passing
through silently.

Fixes #2960
---
 src/specify_cli/workflows/expressions.py | 22 ++++++++++++++++++-
 tests/test_workflows.py                  | 28 ++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py
index 3cc74c7646..b7532ca06c 100644
--- a/src/specify_cli/workflows/expressions.py
+++ b/src/specify_cli/workflows/expressions.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import json
 import re
 from typing import Any
 
@@ -57,6 +58,23 @@ def _filter_contains(value: Any, substring: str) -> bool:
     return False
 
 
+def _filter_from_json(value: Any) -> Any:
+    """Parse a JSON string into a typed value (list/dict/scalar).
+
+    Raises ``ValueError`` on non-string input or invalid JSON — a parse
+    failure here means the pipeline wiring is wrong, and silently
+    passing the unparsed value through would hide it.
+    """
+    if not isinstance(value, str):
+        raise ValueError(
+            f"from_json: expected a JSON string, got {type(value).__name__}"
+        )
+    try:
+        return json.loads(value)
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"from_json: invalid JSON: {exc}") from exc
+
+
 # -- Expression resolution ------------------------------------------------
 
 _EXPR_PATTERN = re.compile(r"\{\{(.+?)\}\}")
@@ -122,7 +140,7 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
     - Comparisons: ``==``, ``!=``, ``>``, ``<``, ``>=``, ``<=``
     - Boolean operators: ``and``, ``or``, ``not``
     - ``in``, ``not in``
-    - Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| map('...')``
+    - Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| from_json``, ``| map('...')``
     - String and numeric literals
     """
     expr = expr.strip()
@@ -157,6 +175,8 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
         filter_name = filter_expr.strip()
         if filter_name == "default":
             return _filter_default(value)
+        if filter_name == "from_json":
+            return _filter_from_json(value)
         return value
 
     # Boolean operators — parse 'or' first (lower precedence) so that
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 51da5cc86b..af1ebd5c46 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -286,6 +286,34 @@ def test_filter_contains(self):
         ctx = StepContext(inputs={"text": "hello world"})
         assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True
 
+    def test_filter_from_json_parses_list(self):
+        from specify_cli.workflows.expressions import evaluate_expression
+        from specify_cli.workflows.base import StepContext
+
+        ctx = StepContext(
+            steps={"emit": {"output": {"stdout": '{"items": [1, 2, 3]}'}}}
+        )
+        result = evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx)
+        assert result == {"items": [1, 2, 3]}
+
+    def test_filter_from_json_invalid_json_raises(self):
+        import pytest
+        from specify_cli.workflows.expressions import evaluate_expression
+        from specify_cli.workflows.base import StepContext
+
+        ctx = StepContext(steps={"emit": {"output": {"stdout": "not json"}}})
+        with pytest.raises(ValueError, match="from_json: invalid JSON"):
+            evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx)
+
+    def test_filter_from_json_non_string_raises(self):
+        import pytest
+        from specify_cli.workflows.expressions import evaluate_expression
+        from specify_cli.workflows.base import StepContext
+
+        ctx = StepContext(steps={"emit": {"output": {"exit_code": 0}}})
+        with pytest.raises(ValueError, match="expected a JSON string"):
+            evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx)
+
     def test_condition_evaluation(self):
         from specify_cli.workflows.expressions import evaluate_condition
         from specify_cli.workflows.base import StepContext

From 3751f5fefdd3f7bf46f0085885237b7b7e680d73 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 09:05:12 +0700
Subject: [PATCH 2/4] =?UTF-8?q?fix(expressions):=20make=20from=5Fjson=20st?=
 =?UTF-8?q?rict=20=E2=80=94=20reject=20any=20arguments?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review (#2961): from_json('x') and from_json() previously fell through to a silent passthrough of the unparsed value. Reject any parenthesized form with a clear error so mis-wired templates fail loudly. Rename test to ...parses_object (JSON under test is an object) and add coverage for the strict no-arguments behavior.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 src/specify_cli/workflows/expressions.py | 14 ++++++++++++--
 tests/test_workflows.py                  | 17 ++++++++++++++++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py
index b7532ca06c..85164daae8 100644
--- a/src/specify_cli/workflows/expressions.py
+++ b/src/specify_cli/workflows/expressions.py
@@ -158,6 +158,18 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
         value = _evaluate_simple_expression(parts[0].strip(), namespace)
         filter_expr = parts[1].strip()
 
+        # `from_json` is strict and takes no arguments. Match the filter name
+        # tolerant of whitespace and reject any parenthesized form
+        # (`from_json()`, `from_json('x')`, `from_json ()`) so a mis-wired
+        # template fails loudly instead of silently returning the unparsed
+        # value.
+        if filter_expr.split("(", 1)[0].strip() == "from_json":
+            if "(" in filter_expr:
+                raise ValueError(
+                    "from_json: filter takes no arguments (use '| from_json')"
+                )
+            return _filter_from_json(value)
+
         # Parse filter name and argument
         filter_match = re.match(r"(\w+)\((.+)\)", filter_expr)
         if filter_match:
@@ -175,8 +187,6 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
         filter_name = filter_expr.strip()
         if filter_name == "default":
             return _filter_default(value)
-        if filter_name == "from_json":
-            return _filter_from_json(value)
         return value
 
     # Boolean operators — parse 'or' first (lower precedence) so that
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index af1ebd5c46..ece3e44eba 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -286,7 +286,7 @@ def test_filter_contains(self):
         ctx = StepContext(inputs={"text": "hello world"})
         assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True
 
-    def test_filter_from_json_parses_list(self):
+    def test_filter_from_json_parses_object(self):
         from specify_cli.workflows.expressions import evaluate_expression
         from specify_cli.workflows.base import StepContext
 
@@ -314,6 +314,21 @@ def test_filter_from_json_non_string_raises(self):
         with pytest.raises(ValueError, match="expected a JSON string"):
             evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx)
 
+    def test_filter_from_json_rejects_arguments(self):
+        # `from_json` is strict and takes no arguments. Both the empty-parens
+        # and accidental-arg forms must raise rather than silently return the
+        # unparsed value, so a mis-wired template is caught immediately.
+        import pytest
+        from specify_cli.workflows.expressions import evaluate_expression
+        from specify_cli.workflows.base import StepContext
+
+        ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}})
+        for bad in ("from_json()", "from_json('x')", "from_json ()", "from_json ('x')"):
+            with pytest.raises(ValueError, match="from_json: filter takes no arguments"):
+                evaluate_expression(
+                    "{{ steps.emit.output.stdout | " + bad + " }}", ctx
+                )
+
     def test_condition_evaluation(self):
         from specify_cli.workflows.expressions import evaluate_condition
         from specify_cli.workflows.base import StepContext

From 0eba1467ed358d599f7bd59a74aca7b243152f4a Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 20:45:41 +0700
Subject: [PATCH 3/4] docs(workflows): document the from_json expression filter

Address Copilot review: the user-facing filter references omitted the
newly added `from_json` filter. Add it to the ARCHITECTURE.md filter table
(with the `{{ steps.emit.output.stdout | from_json }}` example) and to the
filter enumerations in workflows/README.md and docs/reference/workflows.md
so the docs match the evaluator's capabilities.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/reference/workflows.md | 2 +-
 workflows/ARCHITECTURE.md   | 1 +
 workflows/README.md         | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/reference/workflows.md b/docs/reference/workflows.md
index 9e0c6f2e77..5f6e90d924 100644
--- a/docs/reference/workflows.md
+++ b/docs/reference/workflows.md
@@ -280,7 +280,7 @@ Steps can reference inputs and previous step outputs using `{{ expression }}` sy
 | `steps.specify.output.file`    | Output from a previous step          |
 | `item`                         | Current item in a fan-out iteration  |
 
-Available filters: `default`, `join`, `contains`, `map`.
+Available filters: `default`, `join`, `contains`, `map`, `from_json`.
 
 Example:
 
diff --git a/workflows/ARCHITECTURE.md b/workflows/ARCHITECTURE.md
index 892333473c..664261b1ce 100644
--- a/workflows/ARCHITECTURE.md
+++ b/workflows/ARCHITECTURE.md
@@ -118,6 +118,7 @@ Workflow definitions use Jinja2-like `{{ expression }}` syntax for dynamic value
 | Filter: `join` | `{{ list \| join(', ') }}` | Join list elements |
 | Filter: `contains` | `{{ text \| contains('sub') }}` | Substring/membership check |
 | Filter: `map` | `{{ list \| map('attr') }}` | Extract attribute from each item |
+| Filter: `from_json` | `{{ steps.emit.output.stdout \| from_json }}` | Parse a JSON string into a typed value (raises on invalid JSON) |
 
 **Single expressions** (`{{ expr }}` only) return typed values. **Mixed templates** (`"text {{ expr }} more"`) return interpolated strings.
 
diff --git a/workflows/README.md b/workflows/README.md
index 0e3e74a924..19e580eff9 100644
--- a/workflows/README.md
+++ b/workflows/README.md
@@ -314,7 +314,7 @@ condition: "{{ steps.run-tests.output.exit_code != 0 }}"
 message: "{{ status | default('pending') }}"
 ```
 
-Supported filters: `default`, `join`, `contains`, `map`.
+Supported filters: `default`, `join`, `contains`, `map`, `from_json`.
 
 ### Runtime Context
 

From fd896a3a4a9a19112b94a185f57524e69ef27472 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Wed, 17 Jun 2026 22:49:39 +0700
Subject: [PATCH 4/4] fix(workflows): make from_json strictness reject trailing
 tokens; fix docstring

Address Copilot review:
- Strictness only rejected parenthesized forms, so typos like
  `| from_json)` or `| from_json extra` still fell through to the
  unknown-filter path and silently returned the unparsed value. Match on
  the leading filter token and require the whole filter to be exactly
  `from_json`, so every mis-wired form raises. Extend the rejection test to
  cover the trailing-token cases.
- The module docstring claimed "no imports", which is misleading now that
  the module imports `json`. Reword to state the actual sandbox guarantee:
  templates cannot do file I/O, import modules, or run arbitrary code.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/workflows/expressions.py | 23 ++++++++++++++---------
 tests/test_workflows.py                  | 22 ++++++++++++++++------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py
index 85164daae8..6259b59de0 100644
--- a/src/specify_cli/workflows/expressions.py
+++ b/src/specify_cli/workflows/expressions.py
@@ -1,7 +1,8 @@
 """Sandboxed expression evaluator for workflow templates.
 
 Provides a safe Jinja2 subset for evaluating expressions in workflow YAML.
-No file I/O, no imports, no arbitrary code execution.
+Templates cannot perform file I/O, import modules, or run arbitrary code —
+the evaluator only walks the namespace and applies a fixed set of filters.
 """
 
 from __future__ import annotations
@@ -158,15 +159,19 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
         value = _evaluate_simple_expression(parts[0].strip(), namespace)
         filter_expr = parts[1].strip()
 
-        # `from_json` is strict and takes no arguments. Match the filter name
-        # tolerant of whitespace and reject any parenthesized form
-        # (`from_json()`, `from_json('x')`, `from_json ()`) so a mis-wired
-        # template fails loudly instead of silently returning the unparsed
-        # value.
-        if filter_expr.split("(", 1)[0].strip() == "from_json":
-            if "(" in filter_expr:
+        # `from_json` is strict: it takes no arguments and tolerates no
+        # trailing tokens. Match on the leading filter name and require the
+        # whole filter to be exactly `from_json`, so every mis-wired form
+        # (`from_json()`, `from_json('x')`, `from_json)`, `from_json extra`)
+        # fails loudly instead of silently falling through to the
+        # unknown-filter path and returning the unparsed value. (filter_expr
+        # is already stripped above.)
+        leading = re.match(r"\w+", filter_expr)
+        if leading and leading.group(0) == "from_json":
+            if filter_expr != "from_json":
                 raise ValueError(
-                    "from_json: filter takes no arguments (use '| from_json')"
+                    "from_json: expected '| from_json' with no arguments or "
+                    f"trailing tokens, got '| {filter_expr}'"
                 )
             return _filter_from_json(value)
 
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index ece3e44eba..f3ab4e9012 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -314,17 +314,27 @@ def test_filter_from_json_non_string_raises(self):
         with pytest.raises(ValueError, match="expected a JSON string"):
             evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx)
 
-    def test_filter_from_json_rejects_arguments(self):
-        # `from_json` is strict and takes no arguments. Both the empty-parens
-        # and accidental-arg forms must raise rather than silently return the
-        # unparsed value, so a mis-wired template is caught immediately.
+    def test_filter_from_json_rejects_malformed_forms(self):
+        # `from_json` is strict: no arguments and no trailing tokens. Every
+        # mis-wired form — parenthesized, accidental arg, or trailing
+        # garbage — must raise rather than silently fall through to the
+        # unknown-filter path and return the unparsed value.
         import pytest
         from specify_cli.workflows.expressions import evaluate_expression
         from specify_cli.workflows.base import StepContext
 
         ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}})
-        for bad in ("from_json()", "from_json('x')", "from_json ()", "from_json ('x')"):
-            with pytest.raises(ValueError, match="from_json: filter takes no arguments"):
+        bad_forms = (
+            "from_json()",
+            "from_json('x')",
+            "from_json ()",
+            "from_json ('x')",
+            "from_json)",
+            "from_json extra",
+            "from_json 'x'",
+        )
+        for bad in bad_forms:
+            with pytest.raises(ValueError, match="from_json: expected"):
                 evaluate_expression(
                     "{{ steps.emit.output.stdout | " + bad + " }}", ctx
                 )