From 5cba29b11b392548c594310f393098a3880957d2 Mon Sep 17 00:00:00 2001 From: Brandon Garcia Date: Tue, 21 Apr 2026 11:32:12 -0700 Subject: [PATCH 1/3] fix(workflows): auto-wrap bare workflow definitions in {"specification": ...} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `roboflow workflow create --definition ` and `Workspace.create_workflow(name, definition)` previously dumped the user's JSON verbatim into the backend's `config` field. That breaks execution via the Roboflow Inference server, which parses `config` and then does: workflow_config = json.loads(response["workflow"]["config"]) specification = workflow_config["specification"] # -> KeyError -> MalformedWorkflowResponseError -> HTTP 502 User-facing Workflows JSON — as published at https://inference.roboflow.com/workflows/create_and_run/, in `inference/development/workflows_examples/*`, and in the in-repo integration tests (e.g. LEGACY_DETECTION_PLUS_CLASSIFICATION_WORKFLOW) — is the flat shape {"version", "inputs", "steps", "outputs"}. The web app silently wraps it in {"specification": ...} before POST so that the inference server can unwrap it. Nothing in the SDK/CLI did the same, so every CLI user who pasted the documented shape and then invoked `/infer/workflows/...` hit the 502 on first execution. Reproduced in EU staging (roboflow-eu-staging) against a freshly trained yolov11n model: CLI-created workflow returned `{"message": "Internal error. Request to Roboflow API failed."}` at the inference endpoint; inference-server logs showed `MalformedWorkflowResponseError: Workflow specification not found in Roboflow API response` at roboflow_api.py:749. Re-saving the same definition wrapped in {"specification": ...} made execution work end-to- end (15/15 test-set images, p50 ~1.2s). Fix is limited to the SDK write path and is backward compatible: - New helper `_normalize_workflow_config(config)` on `rfapi` wraps a bare workflow spec (dict or JSON string) in {"specification": ...}. Dicts already containing a top-level `specification` key are passed through unchanged; dicts without any of version/inputs/steps/outputs at the top level are left alone so callers sending custom payloads aren't second-guessed; non-JSON strings pass through as-is; None continues to map to "{}" (preserving the "create empty workflow" default behaviour exercised by test_defaults_config_and_template). - `create_workflow` and `update_workflow` in `roboflow.adapters.rfapi` now call the helper on `config`. New tests cover bare-dict wrap, bare-JSON-string wrap, already-wrapped no-op, non-workflow-dict passthrough, and the empty/None/non-JSON edge cases in `_normalize_workflow_config` directly. Existing tests pass unchanged — including the byte-for-byte string passthrough for non-workflow JSON. --- roboflow/adapters/rfapi.py | 56 ++++++++++++++++--- tests/adapters/test_rfapi_phase2.py | 87 +++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 8 deletions(-) diff --git a/roboflow/adapters/rfapi.py b/roboflow/adapters/rfapi.py index f08043e2..82a04ea1 100644 --- a/roboflow/adapters/rfapi.py +++ b/roboflow/adapters/rfapi.py @@ -653,6 +653,46 @@ def delete_folder(api_key, workspace_url, group_id): # --------------------------------------------------------------------------- +_WORKFLOW_SPEC_KEYS = frozenset({"version", "inputs", "steps", "outputs"}) + + +def _normalize_workflow_config(config): + """Return a JSON string suitable for the backend's ``config`` field. + + The backend stores the ``config`` value verbatim, and the Roboflow inference + server expects to parse it to ``{"specification": {...}}`` (see + ``inference.core.roboflow_api.get_workflow_specification``). User-facing + Workflows JSON — as published in docs.roboflow.com/workflows, + ``inference/development/workflows_examples/*``, and the web UI's "View JSON" + export — is the flat shape ``{"version", "inputs", "steps", "outputs"}``. + The web app silently wraps it in ``{"specification": ...}`` before POSTing; + this helper does the same for SDK/CLI callers so that users don't need to + know the backend's storage convention. + + Behavior: + - ``None`` -> ``"{}"`` (preserves legacy "empty workflow" default). + - Anything already wrapped (``{"specification": ...}``) is passed through. + - Dicts or JSON strings that look like a bare workflow spec — i.e., contain + any of ``version``/``inputs``/``steps``/``outputs`` at the top level — + get wrapped. + - Any other input is preserved as-is (stringified if needed) so callers who + intentionally send custom payloads aren't second-guessed. + """ + if config is None: + return "{}" + if isinstance(config, str): + try: + parsed = json.loads(config) + except (ValueError, TypeError): + return config + if isinstance(parsed, dict) and "specification" not in parsed and _WORKFLOW_SPEC_KEYS & parsed.keys(): + return json.dumps({"specification": parsed}) + return config # preserve user-supplied string unchanged when no wrap needed + if isinstance(config, dict) and "specification" not in config and _WORKFLOW_SPEC_KEYS & config.keys(): + config = {"specification": config} + return json.dumps(config) + + def list_workflows(api_key, workspace_url): """GET /{ws}/workflows — list workflows.""" response = requests.get(f"{API_URL}/{workspace_url}/workflows", params={"api_key": api_key}) @@ -688,13 +728,11 @@ def create_workflow(api_key, workspace_url, *, name, url=None, config=None, temp import re url = re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-") - if config is None: - config = "{}" if template is None: template = "{}" - # config/template must be strings (the API validates with Joi.string) - if not isinstance(config, str): - config = json.dumps(config) + # config must be the backend's stored shape (`{"specification": ...}`); + # auto-wrap bare workflow definitions so docs-shaped JSON works unchanged. + config = _normalize_workflow_config(config) if not isinstance(template, str): template = json.dumps(template) params: Dict[str, str] = { @@ -723,10 +761,12 @@ def update_workflow(api_key, workspace_url, *, workflow_id, workflow_name, workf workflow_id: The workflow's internal ID. workflow_name: The workflow's display name. workflow_url: The workflow's URL slug. - config: JSON string (or dict) of the workflow config. + config: JSON string (or dict) of the workflow config. Bare workflow + definitions (``{"version", "inputs", "steps", "outputs"}``) are + auto-wrapped in ``{"specification": ...}`` to match the backend's + stored shape; see ``_normalize_workflow_config``. """ - if not isinstance(config, str): - config = json.dumps(config) + config = _normalize_workflow_config(config) payload: Dict[str, str] = { "id": workflow_id, "name": workflow_name, diff --git a/tests/adapters/test_rfapi_phase2.py b/tests/adapters/test_rfapi_phase2.py index 3537e7fe..38aa4f0e 100644 --- a/tests/adapters/test_rfapi_phase2.py +++ b/tests/adapters/test_rfapi_phase2.py @@ -332,6 +332,81 @@ def test_error(self, mock_post): with self.assertRaises(RoboflowError): create_workflow("key", "ws", name="Bad") + @patch("roboflow.adapters.rfapi.requests.post") + def test_bare_spec_dict_is_auto_wrapped(self, mock_post): + """Docs-shaped workflow definitions get wrapped in {"specification": ...} + so they match the backend's stored format and the inference server's + expectation. See `_normalize_workflow_config`.""" + import json as _json + + from roboflow.adapters.rfapi import create_workflow + + mock_post.return_value = MagicMock(status_code=201, json=lambda: {"workflow": {"url": "wf"}}) + bare = {"version": "1.0", "inputs": [], "steps": [], "outputs": []} + create_workflow("key", "ws", name="WF", config=bare) + sent_config = _json.loads(mock_post.call_args[1]["params"]["config"]) + self.assertEqual(sent_config, {"specification": bare}) + + @patch("roboflow.adapters.rfapi.requests.post") + def test_already_wrapped_config_is_not_double_wrapped(self, mock_post): + import json as _json + + from roboflow.adapters.rfapi import create_workflow + + mock_post.return_value = MagicMock(status_code=201, json=lambda: {"workflow": {"url": "wf"}}) + wrapped = {"specification": {"version": "1.0", "inputs": [], "steps": [], "outputs": []}} + create_workflow("key", "ws", name="WF", config=wrapped) + sent_config = _json.loads(mock_post.call_args[1]["params"]["config"]) + self.assertEqual(sent_config, wrapped) + + @patch("roboflow.adapters.rfapi.requests.post") + def test_bare_spec_json_string_is_auto_wrapped(self, mock_post): + """JSON strings are parsed, wrapped if bare, and re-serialized.""" + import json as _json + + from roboflow.adapters.rfapi import create_workflow + + mock_post.return_value = MagicMock(status_code=201, json=lambda: {"workflow": {"url": "wf"}}) + bare_str = '{"version": "1.0", "steps": []}' + create_workflow("key", "ws", name="WF", config=bare_str) + sent_config = _json.loads(mock_post.call_args[1]["params"]["config"]) + self.assertEqual(sent_config, {"specification": {"version": "1.0", "steps": []}}) + + @patch("roboflow.adapters.rfapi.requests.post") + def test_non_workflow_dict_is_not_wrapped(self, mock_post): + """Dicts that don't look like a workflow spec (no version/inputs/steps/outputs) + are passed through unchanged to avoid second-guessing custom payloads.""" + import json as _json + + from roboflow.adapters.rfapi import create_workflow + + mock_post.return_value = MagicMock(status_code=201, json=lambda: {"workflow": {"url": "wf"}}) + create_workflow("key", "ws", name="WF", config={"a": 1}) + sent_config = _json.loads(mock_post.call_args[1]["params"]["config"]) + self.assertEqual(sent_config, {"a": 1}) + + +class TestNormalizeWorkflowConfig(unittest.TestCase): + def test_none_returns_empty_object(self): + from roboflow.adapters.rfapi import _normalize_workflow_config + + self.assertEqual(_normalize_workflow_config(None), "{}") + + def test_empty_dict_preserved(self): + from roboflow.adapters.rfapi import _normalize_workflow_config + + self.assertEqual(_normalize_workflow_config({}), "{}") + + def test_string_without_workflow_keys_preserved_byte_for_byte(self): + from roboflow.adapters.rfapi import _normalize_workflow_config + + self.assertEqual(_normalize_workflow_config('{"a":1}'), '{"a":1}') + + def test_non_json_string_passthrough(self): + from roboflow.adapters.rfapi import _normalize_workflow_config + + self.assertEqual(_normalize_workflow_config("not json"), "not json") + class TestUpdateWorkflow(unittest.TestCase): @patch("roboflow.adapters.rfapi.requests.post") @@ -360,6 +435,18 @@ def test_config_string_passthrough(self, mock_post): payload = mock_post.call_args[1]["json"] self.assertEqual(payload["config"], '{"a":1}') + @patch("roboflow.adapters.rfapi.requests.post") + def test_bare_spec_dict_is_auto_wrapped_on_update(self, mock_post): + import json as _json + + from roboflow.adapters.rfapi import update_workflow + + mock_post.return_value = MagicMock(status_code=200, json=lambda: {"status": "ok"}) + bare = {"version": "1.0", "inputs": [], "steps": [], "outputs": []} + update_workflow("key", "ws", workflow_id="id-1", workflow_name="WF1", workflow_url="wf1", config=bare) + sent_config = _json.loads(mock_post.call_args[1]["json"]["config"]) + self.assertEqual(sent_config, {"specification": bare}) + @patch("roboflow.adapters.rfapi.requests.post") def test_error(self, mock_post): from roboflow.adapters.rfapi import RoboflowError, update_workflow From 5fb6655ecfbfa1865be9207963432f34e3f969c5 Mon Sep 17 00:00:00 2001 From: Brandon Garcia Date: Tue, 21 Apr 2026 12:15:25 -0700 Subject: [PATCH 2/3] review: BOM strip, compact JSON, extra edge-case tests Addresses reviewer feedback on the workflow-spec auto-wrap: 1. Strip a leading UTF-8 BOM (``\ufeff``) before ``json.loads`` so configs saved from Windows editors are recognized as bare workflow specs and get wrapped. Previously they fell through the JSON-parse ``except`` branch and shipped to the backend unchanged, reproducing the original 502. 2. Serialize wrapped output with compact separators (``(",", ":")``) to match the shape the web app writes via ``JSON.stringify``. Downstream audit/diff tooling sees SDK- and UI-written rows as byte-identical when the logical content matches. 3. Additional unit tests cover the boundaries reviewers flagged: already-wrapped JSON string passthrough, partial workflow dicts (single workflow-shaped key), JSON array / scalar input (guards the ``isinstance(parsed, dict)`` check), UTF-8 BOM auto-wrap, and the compact-separators invariant. 4. Rename ``test_empty_dict_preserved`` to ``test_empty_dict_serialized_to_empty_json`` with a comment noting the behavior is ``json.dumps({}) == "{}"`` rather than an explicit preservation branch. 5. Hoist ``_normalize_workflow_config`` and ``json`` imports to module level in the test file; individual tests no longer repeat the import. 475 passed, 1 skipped. --- roboflow/adapters/rfapi.py | 12 ++++-- tests/adapters/test_rfapi_phase2.py | 57 ++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/roboflow/adapters/rfapi.py b/roboflow/adapters/rfapi.py index 82a04ea1..89d44599 100644 --- a/roboflow/adapters/rfapi.py +++ b/roboflow/adapters/rfapi.py @@ -675,21 +675,27 @@ def _normalize_workflow_config(config): - Dicts or JSON strings that look like a bare workflow spec — i.e., contain any of ``version``/``inputs``/``steps``/``outputs`` at the top level — get wrapped. + - A leading UTF-8 BOM on string input is stripped before parsing so files + saved from Windows editors still get wrapped correctly. + - When a wrap happens, the result is serialized with compact separators + (``","``, ``":"``) to match the shape the web app writes, so audit / + diff tools don't see SDK-written and UI-written rows as different. - Any other input is preserved as-is (stringified if needed) so callers who intentionally send custom payloads aren't second-guessed. """ if config is None: return "{}" if isinstance(config, str): + stripped = config.lstrip("\ufeff") try: - parsed = json.loads(config) + parsed = json.loads(stripped) except (ValueError, TypeError): return config if isinstance(parsed, dict) and "specification" not in parsed and _WORKFLOW_SPEC_KEYS & parsed.keys(): - return json.dumps({"specification": parsed}) + return json.dumps({"specification": parsed}, separators=(",", ":")) return config # preserve user-supplied string unchanged when no wrap needed if isinstance(config, dict) and "specification" not in config and _WORKFLOW_SPEC_KEYS & config.keys(): - config = {"specification": config} + return json.dumps({"specification": config}, separators=(",", ":")) return json.dumps(config) diff --git a/tests/adapters/test_rfapi_phase2.py b/tests/adapters/test_rfapi_phase2.py index 38aa4f0e..3d561ac6 100644 --- a/tests/adapters/test_rfapi_phase2.py +++ b/tests/adapters/test_rfapi_phase2.py @@ -1,8 +1,11 @@ """Unit tests for Phase 2 rfapi functions.""" +import json import unittest from unittest.mock import MagicMock, patch +from roboflow.adapters.rfapi import _normalize_workflow_config + class TestListBatches(unittest.TestCase): @patch("roboflow.adapters.rfapi.requests.get") @@ -387,26 +390,62 @@ def test_non_workflow_dict_is_not_wrapped(self, mock_post): class TestNormalizeWorkflowConfig(unittest.TestCase): - def test_none_returns_empty_object(self): - from roboflow.adapters.rfapi import _normalize_workflow_config + """Direct unit tests for the private ``_normalize_workflow_config`` helper. - self.assertEqual(_normalize_workflow_config(None), "{}") + Imported from the private API intentionally — whitebox tests lock the + behavior contract that ``create_workflow``/``update_workflow`` rely on. + """ - def test_empty_dict_preserved(self): - from roboflow.adapters.rfapi import _normalize_workflow_config + def test_none_returns_empty_object(self): + self.assertEqual(_normalize_workflow_config(None), "{}") + def test_empty_dict_serialized_to_empty_json(self): + # Empty dict has no workflow keys, so it falls through the wrap check + # and serializes to ``"{}"`` — coincidentally matching the legacy + # ``None -> "{}"`` default. self.assertEqual(_normalize_workflow_config({}), "{}") def test_string_without_workflow_keys_preserved_byte_for_byte(self): - from roboflow.adapters.rfapi import _normalize_workflow_config - self.assertEqual(_normalize_workflow_config('{"a":1}'), '{"a":1}') def test_non_json_string_passthrough(self): - from roboflow.adapters.rfapi import _normalize_workflow_config - self.assertEqual(_normalize_workflow_config("not json"), "not json") + def test_already_wrapped_json_string_preserved_byte_for_byte(self): + wrapped = '{"specification": {"version": "1.0"}}' + self.assertEqual(_normalize_workflow_config(wrapped), wrapped) + + def test_partial_workflow_dict_is_wrapped(self): + # Single workflow-shaped key at top level is enough to classify as a + # bare spec; users often build definitions incrementally. + result = _normalize_workflow_config({"steps": [{"id": "s1"}]}) + self.assertEqual(json.loads(result), {"specification": {"steps": [{"id": "s1"}]}}) + + def test_json_array_input_preserved(self): + # ``isinstance(parsed, dict)`` guards against calling ``.keys()`` on + # non-dict JSON; pinning the no-wrap behavior here protects that. + self.assertEqual(_normalize_workflow_config("[1,2,3]"), "[1,2,3]") + + def test_json_scalar_inputs_preserved(self): + self.assertEqual(_normalize_workflow_config("42"), "42") + self.assertEqual(_normalize_workflow_config("true"), "true") + self.assertEqual(_normalize_workflow_config("null"), "null") + + def test_utf8_bom_stripped_before_parse(self): + # Windows editors frequently prepend a UTF-8 BOM. Without the strip, + # ``json.loads`` raises and the raw (unwrapped) string would ship — + # reproducing the exact 502 this PR is meant to fix. + bom_str = '\ufeff{"version":"1.0","steps":[]}' + result = _normalize_workflow_config(bom_str) + self.assertEqual(json.loads(result), {"specification": {"version": "1.0", "steps": []}}) + + def test_wrapped_output_uses_compact_separators(self): + # Matches the shape the web UI writes via ``JSON.stringify``, so + # Firestore audit/diff tooling sees SDK- and UI-written rows as + # byte-identical when the logical content matches. + result = _normalize_workflow_config({"version": "1.0", "steps": []}) + self.assertEqual(result, '{"specification":{"version":"1.0","steps":[]}}') + class TestUpdateWorkflow(unittest.TestCase): @patch("roboflow.adapters.rfapi.requests.post") From 88487d66521e66fe5c5c741d4d2a100a3a62ae92 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Tue, 28 Apr 2026 22:50:04 -0500 Subject: [PATCH 3/3] review: also strip UTF-8 BOM from passthrough strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the BOM strip only fed `json.loads`; the returned string still carried the BOM in the two passthrough branches (already-wrapped JSON string and non-spec JSON string). The inference server's `json.loads` rejects a leading BOM ("Unexpected UTF-8 BOM"), so a Windows-edited already-wrapped config still produced the same 502 this PR set out to fix. Return `stripped` instead of `config` from both no-wrap branches. `lstrip("")` is a no-op on non-BOM input, so the byte-for-byte preservation guarantee for the documented test cases is unchanged. Add three tests covering the previously-leaky cases: BOM + already wrapped, BOM + non-workflow dict string, BOM + non-JSON string. 512 passed, 1 skipped. Co-Authored-By: Claude Opus 4.7 (1M context) --- roboflow/adapters/rfapi.py | 9 +++++---- tests/adapters/test_rfapi_phase2.py | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/roboflow/adapters/rfapi.py b/roboflow/adapters/rfapi.py index 0385600c..c0b00f39 100644 --- a/roboflow/adapters/rfapi.py +++ b/roboflow/adapters/rfapi.py @@ -676,8 +676,9 @@ def _normalize_workflow_config(config): - Dicts or JSON strings that look like a bare workflow spec — i.e., contain any of ``version``/``inputs``/``steps``/``outputs`` at the top level — get wrapped. - - A leading UTF-8 BOM on string input is stripped before parsing so files - saved from Windows editors still get wrapped correctly. + - A leading UTF-8 BOM on string input is stripped before parsing AND on + the returned value, so files saved from Windows editors don't ship a + BOM to the backend (the inference server's ``json.loads`` rejects it). - When a wrap happens, the result is serialized with compact separators (``","``, ``":"``) to match the shape the web app writes, so audit / diff tools don't see SDK-written and UI-written rows as different. @@ -691,10 +692,10 @@ def _normalize_workflow_config(config): try: parsed = json.loads(stripped) except (ValueError, TypeError): - return config + return stripped if isinstance(parsed, dict) and "specification" not in parsed and _WORKFLOW_SPEC_KEYS & parsed.keys(): return json.dumps({"specification": parsed}, separators=(",", ":")) - return config # preserve user-supplied string unchanged when no wrap needed + return stripped # preserve user-supplied string when no wrap needed (BOM stripped) if isinstance(config, dict) and "specification" not in config and _WORKFLOW_SPEC_KEYS & config.keys(): return json.dumps({"specification": config}, separators=(",", ":")) return json.dumps(config) diff --git a/tests/adapters/test_rfapi_phase2.py b/tests/adapters/test_rfapi_phase2.py index 3d561ac6..cc205df5 100644 --- a/tests/adapters/test_rfapi_phase2.py +++ b/tests/adapters/test_rfapi_phase2.py @@ -439,6 +439,29 @@ def test_utf8_bom_stripped_before_parse(self): result = _normalize_workflow_config(bom_str) self.assertEqual(json.loads(result), {"specification": {"version": "1.0", "steps": []}}) + def test_utf8_bom_stripped_when_already_wrapped(self): + # Already-wrapped JSON saved from a Windows editor would otherwise + # ship the BOM through to the backend, where the inference server's + # ``json.loads`` rejects it ("Unexpected UTF-8 BOM") \u2014 same 502 in + # a different shape. + bom_wrapped = '\ufeff{"specification": {"version": "1.0"}}' + self.assertEqual( + _normalize_workflow_config(bom_wrapped), + '{"specification": {"version": "1.0"}}', + ) + + def test_utf8_bom_stripped_for_non_workflow_dict_string(self): + # A custom JSON payload (not a workflow spec) with a leading BOM + # also gets the BOM removed so the backend stores parseable JSON. + bom_custom = '\ufeff{"a":1}' + self.assertEqual(_normalize_workflow_config(bom_custom), '{"a":1}') + + def test_utf8_bom_stripped_for_non_json_string(self): + # Non-JSON string with a BOM: still strip the BOM, since shipping + # it verbatim has no upside and would only produce a downstream + # decode error if anything ever tries to parse it. + self.assertEqual(_normalize_workflow_config("\ufeffnot json"), "not json") + def test_wrapped_output_uses_compact_separators(self): # Matches the shape the web UI writes via ``JSON.stringify``, so # Firestore audit/diff tooling sees SDK- and UI-written rows as