diff --git a/docs/reference/workflows.md b/docs/reference/workflows.md index 9e0c6f2e77..5f6e90d924 100644 --- a/docs/reference/workflows.md +++ b/docs/reference/workflows.md @@ -280,7 +280,7 @@ Steps can reference inputs and previous step outputs using `{{ expression }}` sy | `steps.specify.output.file` | Output from a previous step | | `item` | Current item in a fan-out iteration | -Available filters: `default`, `join`, `contains`, `map`. +Available filters: `default`, `join`, `contains`, `map`, `from_json`. Example: diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py index 3cc74c7646..6259b59de0 100644 --- a/src/specify_cli/workflows/expressions.py +++ b/src/specify_cli/workflows/expressions.py @@ -1,11 +1,13 @@ """Sandboxed expression evaluator for workflow templates. Provides a safe Jinja2 subset for evaluating expressions in workflow YAML. -No file I/O, no imports, no arbitrary code execution. +Templates cannot perform file I/O, import modules, or run arbitrary code — +the evaluator only walks the namespace and applies a fixed set of filters. """ from __future__ import annotations +import json import re from typing import Any @@ -57,6 +59,23 @@ def _filter_contains(value: Any, substring: str) -> bool: return False +def _filter_from_json(value: Any) -> Any: + """Parse a JSON string into a typed value (list/dict/scalar). + + Raises ``ValueError`` on non-string input or invalid JSON — a parse + failure here means the pipeline wiring is wrong, and silently + passing the unparsed value through would hide it. + """ + if not isinstance(value, str): + raise ValueError( + f"from_json: expected a JSON string, got {type(value).__name__}" + ) + try: + return json.loads(value) + except json.JSONDecodeError as exc: + raise ValueError(f"from_json: invalid JSON: {exc}") from exc + + # -- Expression resolution ------------------------------------------------ _EXPR_PATTERN = re.compile(r"\{\{(.+?)\}\}") @@ -122,7 +141,7 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: - Comparisons: ``==``, ``!=``, ``>``, ``<``, ``>=``, ``<=`` - Boolean operators: ``and``, ``or``, ``not`` - ``in``, ``not in`` - - Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| map('...')`` + - Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| from_json``, ``| map('...')`` - String and numeric literals """ expr = expr.strip() @@ -140,6 +159,22 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: value = _evaluate_simple_expression(parts[0].strip(), namespace) filter_expr = parts[1].strip() + # `from_json` is strict: it takes no arguments and tolerates no + # trailing tokens. Match on the leading filter name and require the + # whole filter to be exactly `from_json`, so every mis-wired form + # (`from_json()`, `from_json('x')`, `from_json)`, `from_json extra`) + # fails loudly instead of silently falling through to the + # unknown-filter path and returning the unparsed value. (filter_expr + # is already stripped above.) + leading = re.match(r"\w+", filter_expr) + if leading and leading.group(0) == "from_json": + if filter_expr != "from_json": + raise ValueError( + "from_json: expected '| from_json' with no arguments or " + f"trailing tokens, got '| {filter_expr}'" + ) + return _filter_from_json(value) + # Parse filter name and argument filter_match = re.match(r"(\w+)\((.+)\)", filter_expr) if filter_match: diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 51da5cc86b..f3ab4e9012 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -286,6 +286,59 @@ def test_filter_contains(self): ctx = StepContext(inputs={"text": "hello world"}) assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True + def test_filter_from_json_parses_object(self): + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext( + steps={"emit": {"output": {"stdout": '{"items": [1, 2, 3]}'}}} + ) + result = evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx) + assert result == {"items": [1, 2, 3]} + + def test_filter_from_json_invalid_json_raises(self): + import pytest + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext(steps={"emit": {"output": {"stdout": "not json"}}}) + with pytest.raises(ValueError, match="from_json: invalid JSON"): + evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx) + + def test_filter_from_json_non_string_raises(self): + import pytest + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext(steps={"emit": {"output": {"exit_code": 0}}}) + with pytest.raises(ValueError, match="expected a JSON string"): + evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx) + + def test_filter_from_json_rejects_malformed_forms(self): + # `from_json` is strict: no arguments and no trailing tokens. Every + # mis-wired form — parenthesized, accidental arg, or trailing + # garbage — must raise rather than silently fall through to the + # unknown-filter path and return the unparsed value. + import pytest + from specify_cli.workflows.expressions import evaluate_expression + from specify_cli.workflows.base import StepContext + + ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}}) + bad_forms = ( + "from_json()", + "from_json('x')", + "from_json ()", + "from_json ('x')", + "from_json)", + "from_json extra", + "from_json 'x'", + ) + for bad in bad_forms: + with pytest.raises(ValueError, match="from_json: expected"): + evaluate_expression( + "{{ steps.emit.output.stdout | " + bad + " }}", ctx + ) + def test_condition_evaluation(self): from specify_cli.workflows.expressions import evaluate_condition from specify_cli.workflows.base import StepContext diff --git a/workflows/ARCHITECTURE.md b/workflows/ARCHITECTURE.md index 892333473c..664261b1ce 100644 --- a/workflows/ARCHITECTURE.md +++ b/workflows/ARCHITECTURE.md @@ -118,6 +118,7 @@ Workflow definitions use Jinja2-like `{{ expression }}` syntax for dynamic value | Filter: `join` | `{{ list \| join(', ') }}` | Join list elements | | Filter: `contains` | `{{ text \| contains('sub') }}` | Substring/membership check | | Filter: `map` | `{{ list \| map('attr') }}` | Extract attribute from each item | +| Filter: `from_json` | `{{ steps.emit.output.stdout \| from_json }}` | Parse a JSON string into a typed value (raises on invalid JSON) | **Single expressions** (`{{ expr }}` only) return typed values. **Mixed templates** (`"text {{ expr }} more"`) return interpolated strings. diff --git a/workflows/README.md b/workflows/README.md index 0e3e74a924..19e580eff9 100644 --- a/workflows/README.md +++ b/workflows/README.md @@ -314,7 +314,7 @@ condition: "{{ steps.run-tests.output.exit_code != 0 }}" message: "{{ status | default('pending') }}" ``` -Supported filters: `default`, `join`, `contains`, `map`. +Supported filters: `default`, `join`, `contains`, `map`, `from_json`. ### Runtime Context