From 39316e5962f3d1c2d5262d11362b9b90d94264ad Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Sat, 13 Jun 2026 00:29:42 +0700 Subject: [PATCH 1/4] feat(workflows): add from_json expression filter Step outputs captured as strings could never become typed values in templates - the filter set was default/join/map/contains only, so e.g. a fan-out items: could never consume a step's JSON stdout. Add an arg-less from_json pipe filter with parse-or-raise semantics: invalid JSON or non-string input raises a clear ValueError rather than passing through silently. Fixes #2960 --- src/specify_cli/workflows/expressions.py | 22 ++++++++++++++++++- tests/test_workflows.py | 28 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py index 3cc74c7646..b7532ca06c 100644 --- a/src/specify_cli/workflows/expressions.py +++ b/src/specify_cli/workflows/expressions.py @@ -6,6 +6,7 @@ from __future__ import annotations +import json import re from typing import Any @@ -57,6 +58,23 @@ def _filter_contains(value: Any, substring: str) -> bool: return False +def _filter_from_json(value: Any) -> Any: + """Parse a JSON string into a typed value (list/dict/scalar). + + Raises ``ValueError`` on non-string input or invalid JSON — a parse + failure here means the pipeline wiring is wrong, and silently + passing the unparsed value through would hide it. + """ + if not isinstance(value, str): + raise ValueError( + f"from_json: expected a JSON string, got {type(value).__name__}" + ) + try: + return json.loads(value) + except json.JSONDecodeError as exc: + raise ValueError(f"from_json: invalid JSON: {exc}") from exc + + # -- Expression resolution ------------------------------------------------ _EXPR_PATTERN = re.compile(r"\{\{(.+?)\}\}") @@ -122,7 +140,7 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: - Comparisons: ``==``, ``!=``, ``>``, ``<``, ``>=``, ``<=`` - Boolean operators: ``and``, ``or``, ``not`` - ``in``, ``not in`` - - Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| map('...')`` + - Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| from_json``, ``| map('...')`` - String and numeric literals """ expr = expr.strip() @@ -157,6 +175,8 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: filter_name = filter_expr.strip() if filter_name == "default": return _filter_default(value) + if filter_name == "from_json": + return _filter_from_json(value) return value # Boolean operators — parse 'or' first (lower precedence) so that diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 51da5cc86b..af1ebd5c46 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -286,6 +286,34 @@ def test_filter_contains(self): ctx = StepContext(inputs={"text": "hello world"}) assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True + def test_filter_from_json_parses_list(self): + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext( + steps={"emit": {"output": {"stdout": '{"items": [1, 2, 3]}'}}} + ) + result = evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx) + assert result == {"items": [1, 2, 3]} + + def test_filter_from_json_invalid_json_raises(self): + import pytest + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext(steps={"emit": {"output": {"stdout": "not json"}}}) + with pytest.raises(ValueError, match="from_json: invalid JSON"): + evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx) + + def test_filter_from_json_non_string_raises(self): + import pytest + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext(steps={"emit": {"output": {"exit_code": 0}}}) + with pytest.raises(ValueError, match="expected a JSON string"): + evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx) + def test_condition_evaluation(self): from specify_cli.workflows.expressions import evaluate_condition from specify_cli.workflows.base import StepContext From 3751f5fefdd3f7bf46f0085885237b7b7e680d73 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Wed, 17 Jun 2026 09:05:12 +0700 Subject: [PATCH 2/4] =?UTF-8?q?fix(expressions):=20make=20from=5Fjson=20st?= =?UTF-8?q?rict=20=E2=80=94=20reject=20any=20arguments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review (#2961): from_json('x') and from_json() previously fell through to a silent passthrough of the unparsed value. Reject any parenthesized form with a clear error so mis-wired templates fail loudly. Rename test to ...parses_object (JSON under test is an object) and add coverage for the strict no-arguments behavior. Co-Authored-By: Claude Fable 5 --- src/specify_cli/workflows/expressions.py | 14 ++++++++++++-- tests/test_workflows.py | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py index b7532ca06c..85164daae8 100644 --- a/src/specify_cli/workflows/expressions.py +++ b/src/specify_cli/workflows/expressions.py @@ -158,6 +158,18 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: value = _evaluate_simple_expression(parts[0].strip(), namespace) filter_expr = parts[1].strip() + # `from_json` is strict and takes no arguments. Match the filter name + # tolerant of whitespace and reject any parenthesized form + # (`from_json()`, `from_json('x')`, `from_json ()`) so a mis-wired + # template fails loudly instead of silently returning the unparsed + # value. + if filter_expr.split("(", 1)[0].strip() == "from_json": + if "(" in filter_expr: + raise ValueError( + "from_json: filter takes no arguments (use '| from_json')" + ) + return _filter_from_json(value) + # Parse filter name and argument filter_match = re.match(r"(\w+)\((.+)\)", filter_expr) if filter_match: @@ -175,8 +187,6 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: filter_name = filter_expr.strip() if filter_name == "default": return _filter_default(value) - if filter_name == "from_json": - return _filter_from_json(value) return value # Boolean operators — parse 'or' first (lower precedence) so that diff --git a/tests/test_workflows.py b/tests/test_workflows.py index af1ebd5c46..ece3e44eba 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -286,7 +286,7 @@ def test_filter_contains(self): ctx = StepContext(inputs={"text": "hello world"}) assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True - def test_filter_from_json_parses_list(self): + def test_filter_from_json_parses_object(self): from specify_cli.workflows.expressions import evaluate_expression from specify_cli.workflows.base import StepContext @@ -314,6 +314,21 @@ def test_filter_from_json_non_string_raises(self): with pytest.raises(ValueError, match="expected a JSON string"): evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx) + def test_filter_from_json_rejects_arguments(self): + # `from_json` is strict and takes no arguments. Both the empty-parens + # and accidental-arg forms must raise rather than silently return the + # unparsed value, so a mis-wired template is caught immediately. + import pytest + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}}) + for bad in ("from_json()", "from_json('x')", "from_json ()", "from_json ('x')"): + with pytest.raises(ValueError, match="from_json: filter takes no arguments"): + evaluate_expression( + "{{ steps.emit.output.stdout | " + bad + " }}", ctx + ) + def test_condition_evaluation(self): from specify_cli.workflows.expressions import evaluate_condition from specify_cli.workflows.base import StepContext From 0eba1467ed358d599f7bd59a74aca7b243152f4a Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Wed, 17 Jun 2026 20:45:41 +0700 Subject: [PATCH 3/4] docs(workflows): document the from_json expression filter Address Copilot review: the user-facing filter references omitted the newly added `from_json` filter. Add it to the ARCHITECTURE.md filter table (with the `{{ steps.emit.output.stdout | from_json }}` example) and to the filter enumerations in workflows/README.md and docs/reference/workflows.md so the docs match the evaluator's capabilities. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/reference/workflows.md | 2 +- workflows/ARCHITECTURE.md | 1 + workflows/README.md | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/reference/workflows.md b/docs/reference/workflows.md index 9e0c6f2e77..5f6e90d924 100644 --- a/docs/reference/workflows.md +++ b/docs/reference/workflows.md @@ -280,7 +280,7 @@ Steps can reference inputs and previous step outputs using `{{ expression }}` sy | `steps.specify.output.file` | Output from a previous step | | `item` | Current item in a fan-out iteration | -Available filters: `default`, `join`, `contains`, `map`. +Available filters: `default`, `join`, `contains`, `map`, `from_json`. Example: diff --git a/workflows/ARCHITECTURE.md b/workflows/ARCHITECTURE.md index 892333473c..664261b1ce 100644 --- a/workflows/ARCHITECTURE.md +++ b/workflows/ARCHITECTURE.md @@ -118,6 +118,7 @@ Workflow definitions use Jinja2-like `{{ expression }}` syntax for dynamic value | Filter: `join` | `{{ list \| join(', ') }}` | Join list elements | | Filter: `contains` | `{{ text \| contains('sub') }}` | Substring/membership check | | Filter: `map` | `{{ list \| map('attr') }}` | Extract attribute from each item | +| Filter: `from_json` | `{{ steps.emit.output.stdout \| from_json }}` | Parse a JSON string into a typed value (raises on invalid JSON) | **Single expressions** (`{{ expr }}` only) return typed values. **Mixed templates** (`"text {{ expr }} more"`) return interpolated strings. diff --git a/workflows/README.md b/workflows/README.md index 0e3e74a924..19e580eff9 100644 --- a/workflows/README.md +++ b/workflows/README.md @@ -314,7 +314,7 @@ condition: "{{ steps.run-tests.output.exit_code != 0 }}" message: "{{ status | default('pending') }}" ``` -Supported filters: `default`, `join`, `contains`, `map`. +Supported filters: `default`, `join`, `contains`, `map`, `from_json`. ### Runtime Context From fd896a3a4a9a19112b94a185f57524e69ef27472 Mon Sep 17 00:00:00 2001 From: doquanghuy Date: Wed, 17 Jun 2026 22:49:39 +0700 Subject: [PATCH 4/4] fix(workflows): make from_json strictness reject trailing tokens; fix docstring Address Copilot review: - Strictness only rejected parenthesized forms, so typos like `| from_json)` or `| from_json extra` still fell through to the unknown-filter path and silently returned the unparsed value. Match on the leading filter token and require the whole filter to be exactly `from_json`, so every mis-wired form raises. Extend the rejection test to cover the trailing-token cases. - The module docstring claimed "no imports", which is misleading now that the module imports `json`. Reword to state the actual sandbox guarantee: templates cannot do file I/O, import modules, or run arbitrary code. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/workflows/expressions.py | 23 ++++++++++++++--------- tests/test_workflows.py | 22 ++++++++++++++++------ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py index 85164daae8..6259b59de0 100644 --- a/src/specify_cli/workflows/expressions.py +++ b/src/specify_cli/workflows/expressions.py @@ -1,7 +1,8 @@ """Sandboxed expression evaluator for workflow templates. Provides a safe Jinja2 subset for evaluating expressions in workflow YAML. -No file I/O, no imports, no arbitrary code execution. +Templates cannot perform file I/O, import modules, or run arbitrary code — +the evaluator only walks the namespace and applies a fixed set of filters. """ from __future__ import annotations @@ -158,15 +159,19 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: value = _evaluate_simple_expression(parts[0].strip(), namespace) filter_expr = parts[1].strip() - # `from_json` is strict and takes no arguments. Match the filter name - # tolerant of whitespace and reject any parenthesized form - # (`from_json()`, `from_json('x')`, `from_json ()`) so a mis-wired - # template fails loudly instead of silently returning the unparsed - # value. - if filter_expr.split("(", 1)[0].strip() == "from_json": - if "(" in filter_expr: + # `from_json` is strict: it takes no arguments and tolerates no + # trailing tokens. Match on the leading filter name and require the + # whole filter to be exactly `from_json`, so every mis-wired form + # (`from_json()`, `from_json('x')`, `from_json)`, `from_json extra`) + # fails loudly instead of silently falling through to the + # unknown-filter path and returning the unparsed value. (filter_expr + # is already stripped above.) + leading = re.match(r"\w+", filter_expr) + if leading and leading.group(0) == "from_json": + if filter_expr != "from_json": raise ValueError( - "from_json: filter takes no arguments (use '| from_json')" + "from_json: expected '| from_json' with no arguments or " + f"trailing tokens, got '| {filter_expr}'" ) return _filter_from_json(value) diff --git a/tests/test_workflows.py b/tests/test_workflows.py index ece3e44eba..f3ab4e9012 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -314,17 +314,27 @@ def test_filter_from_json_non_string_raises(self): with pytest.raises(ValueError, match="expected a JSON string"): evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx) - def test_filter_from_json_rejects_arguments(self): - # `from_json` is strict and takes no arguments. Both the empty-parens - # and accidental-arg forms must raise rather than silently return the - # unparsed value, so a mis-wired template is caught immediately. + def test_filter_from_json_rejects_malformed_forms(self): + # `from_json` is strict: no arguments and no trailing tokens. Every + # mis-wired form — parenthesized, accidental arg, or trailing + # garbage — must raise rather than silently fall through to the + # unknown-filter path and return the unparsed value. import pytest from specify_cli.workflows.expressions import evaluate_expression from specify_cli.workflows.base import StepContext ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}}) - for bad in ("from_json()", "from_json('x')", "from_json ()", "from_json ('x')"): - with pytest.raises(ValueError, match="from_json: filter takes no arguments"): + bad_forms = ( + "from_json()", + "from_json('x')", + "from_json ()", + "from_json ('x')", + "from_json)", + "from_json extra", + "from_json 'x'", + ) + for bad in bad_forms: + with pytest.raises(ValueError, match="from_json: expected"): evaluate_expression( "{{ steps.emit.output.stdout | " + bad + " }}", ctx )