From 0f95c42b587f05e8c1c339ef687d3ed2d0cb71e9 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:31:50 +0000 Subject: [PATCH 1/6] direct metadata validation from json --- app/tasks/validation_tasks.py | 57 ++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index e27c46c..8e1c115 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -7,6 +7,7 @@ import logging import os import shutil +import json from typing import Optional from rocrate_validator import services @@ -22,7 +23,6 @@ find_validation_object_on_minio ) from app.utils.webhook_utils import send_webhook_notification -from app.utils.file_utils import build_metadata_only_rocrate logger = logging.getLogger(__name__) @@ -98,7 +98,7 @@ def process_validation_task_by_id( @celery.task def process_validation_task_by_metadata( - crate_json: str, profile_name: str | None, webhook_url: str | None + crate_json: str, profile_name: str | None, webhook_url: str | None, profiles_path: Optional[str] = None ) -> ValidationResult | str: """ Background task to process the RO-Crate validation for a given json metadata string. @@ -111,19 +111,13 @@ def process_validation_task_by_metadata( :todo: Replace the Crate ID with a more comprehensive system, and replace profile name with URI. """ - skip_checks_list = ['ro-crate-1.1_12.1'] - file_path = None - try: - # Fetch the RO-Crate from MinIO using the provided ID: - file_path = build_metadata_only_rocrate(crate_json) - - logging.info(f"Processing validation task for {file_path}") + logging.info(f"Processing validation task for provided metadata string") # Perform validation: - validation_result = perform_ro_crate_validation(file_path, + validation_result = perform_metadata_validation(crate_json, profile_name, - skip_checks_list + profiles_path ) if isinstance(validation_result, str): @@ -132,9 +126,9 @@ def process_validation_task_by_metadata( raise Exception(f"Validation failed: {validation_result}") if not validation_result.has_issues(): - logging.info(f"RO Crate {file_path} is valid.") + logging.info("RO Crate metadata is valid.") else: - logging.info(f"RO Crate {file_path} is invalid.") + logging.info("RO Crate metadata is invalid.") if webhook_url: send_webhook_notification(webhook_url, validation_result.to_json()) @@ -148,10 +142,6 @@ def process_validation_task_by_metadata( send_webhook_notification(webhook_url, error_data) finally: - # Clean up the temporary file if it was created: - if file_path and os.path.exists(file_path): - shutil.rmtree(file_path) - if isinstance(validation_result, str): return validation_result else: @@ -196,6 +186,39 @@ def perform_ro_crate_validation( return str(e) +def perform_metadata_validation( + crate_json: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None +) -> ValidationResult | str: + """ + Validates only RO-Crate metadata provided as a json string. + + :param crate_json: The JSON string containing the metadata + :param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will + attempt to determine the profile. + :param profiles_path: The path to the profiles definition directory + :param skip_checks_list: A list of checks to skip, if needed + :return: The validation result. + :raises Exception: If an error occurs during the validation process. + """ + + try: + logging.info(f"Validating ro-crate metadata with profile {profile_name}") + + settings = services.ValidationSettings( + **({"metadata_only": True}), + **({"metadata_dict": json.loads(crate_json)}), + **({"profile_identifier": profile_name} if profile_name else {}), + **({"skip_checks": skip_checks_list} if skip_checks_list else {}), + **({"profiles_path": profiles_path} if profiles_path else {}) + ) + + return services.validate(settings) + + except Exception as e: + logging.error(f"Unexpected error during validation: {e}") + return str(e) + + def check_ro_crate_exists( minio_client: object, bucket_name: str, From a8494adb63534f1411683088d75c06ddc65a4978 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:32:30 +0000 Subject: [PATCH 2/6] tests updated and extended for json metadata validation --- tests/test_services.py | 3 +- tests/test_validation_tasks.py | 114 ++++++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 33 deletions(-) diff --git a/tests/test_services.py b/tests/test_services.py index ccebeba..ca0d9b9 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -197,7 +197,8 @@ def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str, "{}", 422, "Required parameter crate_json is empty" ), - ] + ], + ids=["missing_crate_json","invalid_json","empty_json"] ) def test_queue_metadata_json_errors(flask_app, crate_json: str, status_code: int, response_error: str): response, status = queue_ro_crate_metadata_validation_task(crate_json) diff --git a/tests/test_validation_tasks.py b/tests/test_validation_tasks.py index 49c3fed..ade15b9 100644 --- a/tests/test_validation_tasks.py +++ b/tests/test_validation_tasks.py @@ -1,9 +1,11 @@ from unittest import mock import pytest +import json from app.tasks.validation_tasks import ( process_validation_task_by_id, perform_ro_crate_validation, + perform_metadata_validation, return_ro_crate_validation, process_validation_task_by_metadata, check_ro_crate_exists, @@ -227,34 +229,28 @@ def test_process_validation_failure( # Test function: process_validation_task_by_metadata @pytest.mark.parametrize( - "crate_json, profile_name, webhook_url, mock_path, validation_json, validation_value, os_path_exists", + "crate_json, profile_name, webhook_url, validation_json, validation_value", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", "https://example.com/webhook", "/tmp/crate", - '{"status": "valid"}', False, True + "test-profile", "https://example.com/webhook", + '{"status": "valid"}', False ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", "https://example.com/webhook", "/tmp/crate", - '{"status": "invalid"}', True, True + "test-profile", "https://example.com/webhook", + '{"status": "invalid"}', True ) ], ids=["success_no_issues", "success_with_issues"] ) -@mock.patch("app.tasks.validation_tasks.shutil.rmtree") -@mock.patch("app.tasks.validation_tasks.os.path.exists") @mock.patch("app.tasks.validation_tasks.send_webhook_notification") -@mock.patch("app.tasks.validation_tasks.perform_ro_crate_validation") -@mock.patch("app.tasks.validation_tasks.build_metadata_only_rocrate") +@mock.patch("app.tasks.validation_tasks.perform_metadata_validation") def test_metadata_validation( - mock_build, mock_validate, mock_webhook, mock_exists, mock_rmtree, - crate_json: str, profile_name: str, webhook_url: str, mock_path: str, - validation_json: str, validation_value: bool, os_path_exists: bool + mock_validate, mock_webhook, + crate_json: str, profile_name: str, webhook_url: str, + validation_json: str, validation_value: bool, ): - mock_exists.return_value = os_path_exists - mock_build.return_value = mock_path - mock_result = mock.Mock() mock_result.has_issues.return_value = validation_value mock_result.to_json.return_value = validation_json @@ -263,39 +259,33 @@ def test_metadata_validation( result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url) assert result == validation_json - mock_build.assert_called_once_with(crate_json) mock_validate.assert_called_once() mock_webhook.assert_called_once_with(webhook_url, validation_json) - mock_rmtree.assert_called_once_with(mock_path) @pytest.mark.parametrize( - "crate_json, profile_name, webhook_url, mock_path, validation_message, os_path_exists", + "crate_json, profile_name, webhook_url, validation_message", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", "https://example.com/webhook", "/tmp/crate", - "Validation error", True + "test-profile", "https://example.com/webhook", + "Validation error" ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", None, "/tmp/crate", - "Validation error", True + "test-profile", None, + "Validation error" ) ], ids=["validation_fails", "validation_fails_no_webhook"] ) -@mock.patch("app.tasks.validation_tasks.shutil.rmtree") -@mock.patch("app.tasks.validation_tasks.os.path.exists", return_value=True) @mock.patch("app.tasks.validation_tasks.send_webhook_notification") -@mock.patch("app.tasks.validation_tasks.perform_ro_crate_validation") -@mock.patch("app.tasks.validation_tasks.build_metadata_only_rocrate") +@mock.patch("app.tasks.validation_tasks.perform_metadata_validation") def test_validation_fails_and_sends_error_notification_to_webhook( - mock_build, mock_validate, mock_webhook, mock_exists, mock_rmtree, - crate_json: str, profile_name: str, webhook_url: str, mock_path: str, - validation_message: str, os_path_exists: bool + mock_validate, mock_webhook, + crate_json: str, profile_name: str, webhook_url: str, + validation_message: str ): - mock_build.return_value = mock_path mock_validate.return_value = validation_message @@ -313,8 +303,6 @@ def test_validation_fails_and_sends_error_notification_to_webhook( # Make sure webhook not sent mock_webhook.assert_not_called() - mock_rmtree.assert_called_once_with(mock_path) - # Test function: perform_ro_crate_validation @@ -378,6 +366,68 @@ def test_validation_settings_error(mock_validation_settings, mock_validate): mock_validate.assert_not_called() +# Test function: perform_metadata_validation + +@pytest.mark.parametrize( + "crate_json, profile_name, skip_checks", + [ + ('{"id":"dummy json"}', "ro_profile", ["check1", "check2"]), + ('{"id":"dummy json"}', None, None) + ], + ids=["success_with_all_args", "success_with_only_crate"] +) +@mock.patch("app.tasks.validation_tasks.services.validate") +@mock.patch("app.tasks.validation_tasks.services.ValidationSettings") +def test_metadata_validation_success_with_all_args( + mock_validation_settings, mock_validate, + crate_json: str, profile_name: str, skip_checks: list +): + mock_result = mock.Mock() + mock_validate.return_value = mock_result + + result = perform_metadata_validation(crate_json, profile_name, skip_checks) + + # Assert that result was returned + assert result == mock_result + + # Validate proper construction of ValidationSettings + mock_validation_settings.assert_called_once() + args, kwargs = mock_validation_settings.call_args + assert kwargs["metadata_dict"] == json.loads(crate_json) + if profile_name is not None: + assert kwargs["profile_identifier"] == profile_name + else: + assert "profile_identifier" not in kwargs + if skip_checks is not None: + assert kwargs["skip_checks"] == skip_checks + else: + assert "skip_checks" not in kwargs + + mock_validate.assert_called_once_with(mock_validation_settings.return_value) + + +@mock.patch("app.tasks.validation_tasks.services.validate", side_effect=RuntimeError("Validation error")) +@mock.patch("app.tasks.validation_tasks.services.ValidationSettings") +def test_metadata_validation_raises_exception_and_returns_string(mock_validation_settings, mock_validate): + crate_json = '{"id":"test metadata"}' + result = perform_metadata_validation(crate_json, "profile", skip_checks_list=None) + + assert isinstance(result, str) + assert "Validation error" in result + mock_validate.assert_called_once() + + +@mock.patch("app.tasks.validation_tasks.services.validate") +@mock.patch("app.tasks.validation_tasks.services.ValidationSettings", side_effect=ValueError("Bad config")) +def test_metadata_validation_settings_error(mock_validation_settings, mock_validate): + crate_json = '{"id":"test metadata"}' + result = perform_metadata_validation(crate_json, None) + + assert isinstance(result, str) + assert "Bad config" in result + mock_validate.assert_not_called() + + # Test function: return_ro_crate_validation @mock.patch("app.tasks.validation_tasks.get_validation_status_from_minio") From 3838f116b8c96737321e3346ff7e3f072d7dea2e Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:35:08 +0000 Subject: [PATCH 3/6] removed metadata only rocrate build function --- app/utils/file_utils.py | 53 ----------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 app/utils/file_utils.py diff --git a/app/utils/file_utils.py b/app/utils/file_utils.py deleted file mode 100644 index 15c16e4..0000000 --- a/app/utils/file_utils.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Utility methods for interacting with the File System.""" - -# Author: Douglas Lowe, Alexander Hambley -# License: MIT -# Copyright (c) 2025 eScience Lab, The University of Manchester - -import json -import logging -import os -import tempfile - -from dotenv import load_dotenv - - -logger = logging.getLogger(__name__) - - -def build_metadata_only_rocrate(crate_json: str) -> str: - """ - Creates a temporary directory for an empty RO-Crate, - and saves the JSON string as a metadata file. - - :param crate_json: The metadata string. - :return: The local file path where the RO-Crate is saved. - :raises ValueError: If the required environment variables are not set. - :raises Exception: If an unexpected error occurs during the operation. - """ - - load_dotenv() - - try: - # Prepare temporary file path to store RO Crate for validation: - temp_dir = tempfile.mkdtemp() - file_path = os.path.join(temp_dir, 'ro-crate-metadata.json') - - logging.info( - f"Creating RO-Crate Metadata file. File path: {file_path}" - ) - with open(file_path, 'w') as f: - f.write(crate_json) - logging.info( - f"RO-Crate metadata successfully saved to {file_path}." - ) - - return temp_dir - - except ValueError as value_error: - logging.error(f"Configuration Error: {value_error}") - raise - - except Exception as e: - logging.error(f"Unexpected error creating RO-Crate metadata: {e}") - raise From ef545b7fb6c0d9bd8adbc38687c6f84362fbefa4 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:30:50 +0000 Subject: [PATCH 4/6] pass profiles_path env variable to metadata testing function --- app/ro_crates/routes/post_routes.py | 4 +++- app/services/validation_service.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/ro_crates/routes/post_routes.py b/app/ro_crates/routes/post_routes.py index 2c517f4..4fcf4ab 100644 --- a/app/ro_crates/routes/post_routes.py +++ b/app/ro_crates/routes/post_routes.py @@ -111,4 +111,6 @@ def validate_ro_crate_metadata(json_data) -> tuple[Response, int]: else: profile_name = None - return queue_ro_crate_metadata_validation_task(crate_json, profile_name) + profiles_path = current_app.config["PROFILES_PATH"] + + return queue_ro_crate_metadata_validation_task(crate_json, profile_name, profiles_path=profiles_path) diff --git a/app/services/validation_service.py b/app/services/validation_service.py index b51a088..37c5c05 100644 --- a/app/services/validation_service.py +++ b/app/services/validation_service.py @@ -61,7 +61,7 @@ def queue_ro_crate_validation_task( def queue_ro_crate_metadata_validation_task( - crate_json: str, profile_name=None, webhook_url=None + crate_json: str, profile_name=None, webhook_url=None, profiles_path=None ) -> tuple[Response, int]: """ Queues an RO-Crate for validation with Celery. @@ -69,6 +69,7 @@ def queue_ro_crate_metadata_validation_task( :param crate_id: The ID of the RO-Crate to validate. :param profile_name: The profile to validate against. :param webhook_url: The URL to POST the validation results to. + :param profiles_path: A path to the profile definition directory. :return: A tuple containing a JSON response and an HTTP status code. :raises: Exception: If an error occurs whilst queueing the task. """ @@ -90,7 +91,8 @@ def queue_ro_crate_metadata_validation_task( result = process_validation_task_by_metadata.delay( crate_json, profile_name, - webhook_url + webhook_url, + profiles_path ) if webhook_url: return jsonify({"message": "Validation in progress"}), 202 From 028523536412f5a03a9802e3fd1af4153f97614b Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:31:21 +0000 Subject: [PATCH 5/6] docstring and logging update --- app/tasks/validation_tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index 8e1c115..e6096d6 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -106,13 +106,14 @@ def process_validation_task_by_metadata( :param crate_json: A string containing the RO-Crate JSON metadata to validate. :param profile_name: The name of the validation profile to use. Defaults to None. :param webhook_url: The webhook URL to send notifications to. Defaults to None. + :param profiles_path: The path to the profiles definition directory. Defaults to None. :raises Exception: If an error occurs during the validation process. :todo: Replace the Crate ID with a more comprehensive system, and replace profile name with URI. """ try: - logging.info(f"Processing validation task for provided metadata string") + logging.info("Processing validation task for provided metadata string") # Perform validation: validation_result = perform_metadata_validation(crate_json, From 94cbefdba63897916fe5d5792f2c60d1642105af Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:32:12 +0000 Subject: [PATCH 6/6] add profiles_path variable to metadata api and service tests --- tests/test_api_routes.py | 12 +++++++----- tests/test_services.py | 14 +++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py index e50b511..486378b 100644 --- a/tests/test_api_routes.py +++ b/tests/test_api_routes.py @@ -141,24 +141,26 @@ def test_validate_fails_missing_elements(client: FlaskClient, crate_id: str, pay # Test POST API: /v1/ro_crates/validate_metadata +# TODO: Write tests for profiles_path environment variable. This will require a refactoring of the create_app function. @pytest.mark.parametrize( - "payload, status_code, response_json", + "payload, status_code, response_json, profiles_path", [ ( { "crate_json": '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "profile_name": "default" - }, 200, {"status": "success"} + }, 200, {"status": "success"}, None ), ( { "crate_json": '{"@context": "https://w3id.org/ro/crate/1.1/context"}', - }, 200, {"status": "success"} + }, 200, {"status": "success"}, None ), ], ids=["success_with_all_fields", "success_without_profile_name"] ) -def test_validate_metadata_success(client: FlaskClient, payload: dict, status_code: int, response_json: dict): +def test_validate_metadata_success(client: FlaskClient, payload: dict, status_code: int, + response_json: dict, profiles_path: str): with patch("app.ro_crates.routes.post_routes.queue_ro_crate_metadata_validation_task") as mock_queue: mock_queue.return_value = (response_json, status_code) @@ -167,7 +169,7 @@ def test_validate_metadata_success(client: FlaskClient, payload: dict, status_co crate_json = payload["crate_json"] if "crate_json" in payload else None profile_name = payload["profile_name"] if "profile_name" in payload else None - mock_queue.assert_called_once_with(crate_json, profile_name) + mock_queue.assert_called_once_with(crate_json, profile_name, profiles_path=profiles_path) assert response.status_code == status_code assert response.json == response_json diff --git a/tests/test_services.py b/tests/test_services.py index ca0d9b9..0413e17 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -141,32 +141,32 @@ def test_queue_ro_crate_validation_task_failure( # Test function: queue_ro_crate_metadata_validation_task @pytest.mark.parametrize( - "crate_json, profile, webhook, status_code, return_value, response_json, delay_side_effect", + "crate_json, profile, webhook, status_code, return_value, response_json, delay_side_effect, profiles_path", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "default", "http://webhook", 202, None, {"message": "Validation in progress"}, - None + None, None ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "default", None, 200, {"status": "ok"}, {"result": {"status": "ok"}}, - None + None, None ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "default", "http://webhook", 500, None, {"error": "Celery error"}, - Exception("Celery error") + Exception("Celery error"), None ), ], ids=["success_with_webhook", "success_without_webhook", "failure_celery_error"] ) def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str, status_code: int, return_value: dict, response_json: dict, - delay_side_effect: Exception): + delay_side_effect: Exception, profiles_path: str): with patch("app.services.validation_service.process_validation_task_by_metadata.delay", side_effect=delay_side_effect) as mock_delay: mock_result = MagicMock() @@ -175,9 +175,9 @@ def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str, if delay_side_effect is None: mock_delay.return_value = mock_result - response, status = queue_ro_crate_metadata_validation_task(crate_json, profile, webhook) + response, status = queue_ro_crate_metadata_validation_task(crate_json, profile, webhook, profiles_path) - mock_delay.assert_called_once_with(crate_json, profile, webhook) + mock_delay.assert_called_once_with(crate_json, profile, webhook, profiles_path) assert status == status_code assert response.json == response_json