From 4ca0f19fbce4a775ae7b2e22b1cc77ef33279dd5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 12:09:53 +0000 Subject: [PATCH 01/40] Bump minio from 7.2.16 to 7.2.18 Bumps [minio](https://github.com/minio/minio-py) from 7.2.16 to 7.2.18. - [Release notes](https://github.com/minio/minio-py/releases) - [Commits](https://github.com/minio/minio-py/compare/7.2.16...7.2.18) --- updated-dependencies: - dependency-name: minio dependency-version: 7.2.18 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index a2e7a54..2efc080 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,5 @@ celery==5.5.3 -minio==7.2.16 +minio==7.2.18 requests==2.32.5 Flask==3.1.2 Werkzeug==3.1.3 diff --git a/requirements.txt b/requirements.txt index 13fd7ef..4c88540 100644 --- a/requirements.txt +++ b/requirements.txt @@ -93,7 +93,7 @@ marshmallow==4.0.0 # webargs mdurl==0.1.2 # via markdown-it-py -minio==7.2.16 +minio==7.2.18 # via -r requirements.in owlrl==7.1.4 # via pyshacl From 0b7e4c3e29a851b06f0a07176b04b896ba844156 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Oct 2025 12:45:55 +0000 Subject: [PATCH 02/40] Bump python-dotenv from 1.1.1 to 1.2.1 Bumps [python-dotenv](https://github.com/theskumar/python-dotenv) from 1.1.1 to 1.2.1. - [Release notes](https://github.com/theskumar/python-dotenv/releases) - [Changelog](https://github.com/theskumar/python-dotenv/blob/main/CHANGELOG.md) - [Commits](https://github.com/theskumar/python-dotenv/compare/v1.1.1...v1.2.1) --- updated-dependencies: - dependency-name: python-dotenv dependency-version: 1.2.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 2efc080..7e00cc0 100644 --- a/requirements.in +++ b/requirements.in @@ -4,6 +4,6 @@ requests==2.32.5 Flask==3.1.2 Werkzeug==3.1.3 redis==6.4.0 -python-dotenv==1.1.1 +python-dotenv==1.2.1 apiflask==2.4.0 roc-validator==0.7.3 diff --git a/requirements.txt b/requirements.txt index 4c88540..f7e98ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -127,7 +127,7 @@ pyshacl==0.30.1 # via roc-validator python-dateutil==2.9.0.post0 # via celery -python-dotenv==1.1.1 +python-dotenv==1.2.1 # via -r requirements.in rdflib[html]==7.1.4 # via From e1ea2a2191721590c51b0eeb0285b1f62ae0fcb2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:50:00 +0000 Subject: [PATCH 03/40] Bump apiflask from 2.4.0 to 3.0.2 Bumps [apiflask](https://github.com/apiflask/apiflask) from 2.4.0 to 3.0.2. - [Release notes](https://github.com/apiflask/apiflask/releases) - [Changelog](https://github.com/apiflask/apiflask/blob/main/docs/changelog.md) - [Commits](https://github.com/apiflask/apiflask/compare/2.4.0...3.0.2) --- updated-dependencies: - dependency-name: apiflask dependency-version: 3.0.2 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 2efc080..b46818b 100644 --- a/requirements.in +++ b/requirements.in @@ -5,5 +5,5 @@ Flask==3.1.2 Werkzeug==3.1.3 redis==6.4.0 python-dotenv==1.1.1 -apiflask==2.4.0 +apiflask==3.0.2 roc-validator==0.7.3 diff --git a/requirements.txt b/requirements.txt index 4c88540..aa1f4fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,9 @@ # amqp==5.3.1 # via kombu -apiflask==2.4.0 +annotated-types==0.7.0 + # via pydantic +apiflask==3.0.2 # via -r requirements.in apispec==6.8.2 # via apiflask @@ -51,6 +53,10 @@ click-repl==0.3.0 # via celery colorlog==6.9.0 # via roc-validator +dnspython==2.8.0 + # via email-validator +email-validator==2.3.0 + # via pydantic enum-tools==0.12.0 # via roc-validator flask==3.1.2 @@ -67,6 +73,7 @@ html5rdf==1.2.1 # via rdflib idna==3.10 # via + # email-validator # requests # url-normalize importlib-metadata==8.7.0 @@ -117,6 +124,10 @@ pycparser==2.22 # via cffi pycryptodome==3.23.0 # via minio +pydantic[email]==2.12.4 + # via apiflask +pydantic-core==2.41.5 + # via pydantic pygments==2.19.2 # via # enum-tools @@ -160,7 +171,12 @@ typing-extensions==4.14.1 # cattrs # enum-tools # minio + # pydantic + # pydantic-core # rich-click + # typing-inspection +typing-inspection==0.4.2 + # via pydantic tzdata==2025.2 # via kombu url-normalize==2.2.1 From 10546a49d2fd064c8a100fd20d768fe152de2a40 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 11:58:08 +0000 Subject: [PATCH 04/40] Bump minio from 7.2.18 to 7.2.19 Bumps [minio](https://github.com/minio/minio-py) from 7.2.18 to 7.2.19. - [Release notes](https://github.com/minio/minio-py/releases) - [Commits](https://github.com/minio/minio-py/compare/7.2.18...7.2.19) --- updated-dependencies: - dependency-name: minio dependency-version: 7.2.19 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 0ef0156..87e03c6 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,5 @@ celery==5.5.3 -minio==7.2.18 +minio==7.2.20 requests==2.32.5 Flask==3.1.2 Werkzeug==3.1.3 diff --git a/requirements.txt b/requirements.txt index 7540085..5c64cb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -100,7 +100,7 @@ marshmallow==4.0.0 # webargs mdurl==0.1.2 # via markdown-it-py -minio==7.2.18 +minio==7.2.20 # via -r requirements.in owlrl==7.1.4 # via pyshacl From c87a35e9b5d818c9a82e576121551cf87cc82419 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 12:21:40 +0000 Subject: [PATCH 05/40] Bump redis from 6.4.0 to 7.1.0 Bumps [redis](https://github.com/redis/redis-py) from 6.4.0 to 7.1.0. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v6.4.0...v7.1.0) --- updated-dependencies: - dependency-name: redis dependency-version: 7.1.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 87e03c6..182817e 100644 --- a/requirements.in +++ b/requirements.in @@ -3,7 +3,7 @@ minio==7.2.20 requests==2.32.5 Flask==3.1.2 Werkzeug==3.1.3 -redis==6.4.0 +redis==7.1.0 python-dotenv==1.2.1 apiflask==3.0.2 roc-validator==0.7.3 diff --git a/requirements.txt b/requirements.txt index 5c64cb3..1c664ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -145,7 +145,7 @@ rdflib[html]==7.1.4 # owlrl # pyshacl # roc-validator -redis==6.4.0 +redis==7.1.0 # via -r requirements.in requests==2.32.5 # via From 1198f8cb9b413f519d85c3a555a4fa792b2445a7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 14:53:46 +0000 Subject: [PATCH 06/40] Bump werkzeug from 3.1.3 to 3.1.4 Bumps [werkzeug](https://github.com/pallets/werkzeug) from 3.1.3 to 3.1.4. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/3.1.3...3.1.4) --- updated-dependencies: - dependency-name: werkzeug dependency-version: 3.1.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 182817e..f271f19 100644 --- a/requirements.in +++ b/requirements.in @@ -2,7 +2,7 @@ celery==5.5.3 minio==7.2.20 requests==2.32.5 Flask==3.1.2 -Werkzeug==3.1.3 +Werkzeug==3.1.4 redis==7.1.0 python-dotenv==1.2.1 apiflask==3.0.2 diff --git a/requirements.txt b/requirements.txt index 1c664ec..dd4c6ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -197,7 +197,7 @@ wcwidth==0.2.13 # prompt-toolkit webargs==8.7.0 # via apiflask -werkzeug==3.1.3 +werkzeug==3.1.4 # via # -r requirements.in # flask From 66f75b8d449ba2655d9ddde9278343e528fa162b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Dec 2025 05:00:30 +0000 Subject: [PATCH 07/40] Bump urllib3 from 2.5.0 to 2.6.0 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.5.0 to 2.6.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.5.0...2.6.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.6.0 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1c664ec..ff5751e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -181,7 +181,7 @@ tzdata==2025.2 # via kombu url-normalize==2.2.1 # via requests-cache -urllib3==2.5.0 +urllib3==2.6.0 # via # minio # requests From 10d5d84459c73cda1dc5e3d988cf1671a6e28abc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Dec 2025 20:22:16 +0000 Subject: [PATCH 08/40] Bump marshmallow from 4.0.0 to 4.1.2 Bumps [marshmallow](https://github.com/marshmallow-code/marshmallow) from 4.0.0 to 4.1.2. - [Changelog](https://github.com/marshmallow-code/marshmallow/blob/dev/CHANGELOG.rst) - [Commits](https://github.com/marshmallow-code/marshmallow/compare/4.0.0...4.1.2) --- updated-dependencies: - dependency-name: marshmallow dependency-version: 4.1.2 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1c664ec..060a6fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -93,7 +93,7 @@ markupsafe==3.0.2 # flask # jinja2 # werkzeug -marshmallow==4.0.0 +marshmallow==4.1.2 # via # apiflask # flask-marshmallow From bdf04ca87d2a64327d9db4cd4d5adc66a54ba87a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jan 2026 11:21:50 +0000 Subject: [PATCH 09/40] Bump celery from 5.5.3 to 5.6.2 Bumps [celery](https://github.com/celery/celery) from 5.5.3 to 5.6.2. - [Release notes](https://github.com/celery/celery/releases) - [Changelog](https://github.com/celery/celery/blob/main/Changelog.rst) - [Commits](https://github.com/celery/celery/compare/v5.5.3...v5.6.2) --- updated-dependencies: - dependency-name: celery dependency-version: 5.6.2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/requirements.in b/requirements.in index 182817e..5b48c7e 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,4 @@ -celery==5.5.3 +celery==5.6.2 minio==7.2.20 requests==2.32.5 Flask==3.1.2 diff --git a/requirements.txt b/requirements.txt index 1c664ec..b639bf7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ blinker==1.9.0 # via flask cattrs==25.1.1 # via requests-cache -celery==5.5.3 +celery==5.6.2 # via -r requirements.in certifi==2025.8.3 # via @@ -84,7 +84,7 @@ itsdangerous==2.2.0 # via flask jinja2==3.1.6 # via flask -kombu==5.5.4 +kombu==5.6.2 # via celery markdown-it-py==3.0.0 # via rich @@ -179,6 +179,8 @@ typing-inspection==0.4.2 # via pydantic tzdata==2025.2 # via kombu +tzlocal==5.3.1 + # via celery url-normalize==2.2.1 # via requests-cache urllib3==2.5.0 From bbade120b514fbe2209b335ab3f892690b0b2159 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 Jan 2026 06:39:07 +0000 Subject: [PATCH 10/40] Bump urllib3 from 2.6.0 to 2.6.3 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.6.0 to 2.6.3. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.6.0...2.6.3) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.6.3 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 131869f..6ce8834 100644 --- a/requirements.txt +++ b/requirements.txt @@ -183,7 +183,7 @@ tzlocal==5.3.1 # via celery url-normalize==2.2.1 # via requests-cache -urllib3==2.6.0 +urllib3==2.6.3 # via # minio # requests From cb2bf1cb4f59267704b37938ad11b4207aeda4ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 9 Jan 2026 00:48:31 +0000 Subject: [PATCH 11/40] Bump werkzeug from 3.1.4 to 3.1.5 Bumps [werkzeug](https://github.com/pallets/werkzeug) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: werkzeug dependency-version: 3.1.5 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 6ff7a01..adaa688 100644 --- a/requirements.in +++ b/requirements.in @@ -2,7 +2,7 @@ celery==5.6.2 minio==7.2.20 requests==2.32.5 Flask==3.1.2 -Werkzeug==3.1.4 +Werkzeug==3.1.5 redis==7.1.0 python-dotenv==1.2.1 apiflask==3.0.2 diff --git a/requirements.txt b/requirements.txt index 131869f..5bc8385 100644 --- a/requirements.txt +++ b/requirements.txt @@ -199,7 +199,7 @@ wcwidth==0.2.13 # prompt-toolkit webargs==8.7.0 # via apiflask -werkzeug==3.1.4 +werkzeug==3.1.5 # via # -r requirements.in # flask From c61931f2825f68e88ac19abc57ff9279168cca24 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 12 Jan 2026 13:31:09 +0000 Subject: [PATCH 12/40] rocrate-validator v0.8.0 --- requirements.in | 2 +- requirements.txt | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 6ff7a01..af155ed 100644 --- a/requirements.in +++ b/requirements.in @@ -6,4 +6,4 @@ Werkzeug==3.1.4 redis==7.1.0 python-dotenv==1.2.1 apiflask==3.0.2 -roc-validator==0.7.3 +roc-validator==0.8 diff --git a/requirements.txt b/requirements.txt index 131869f..81b0fc8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,8 @@ argon2-cffi==25.1.0 # via minio argon2-cffi-bindings==25.1.0 # via argon2-cffi +async-timeout==5.0.1 + # via redis attrs==25.3.0 # via # cattrs @@ -160,7 +162,7 @@ rich==13.9.4 # roc-validator rich-click==1.8.9 # via roc-validator -roc-validator==0.7.3 +roc-validator==0.8.0 # via -r requirements.in six==1.17.0 # via python-dateutil @@ -177,6 +179,8 @@ typing-extensions==4.14.1 # typing-inspection typing-inspection==0.4.2 # via pydantic +typos==1.42.0 + # via roc-validator tzdata==2025.2 # via kombu tzlocal==5.3.1 From 6df8f9a52bb1d1caac7a8d557160a8884863b4a9 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 12 Jan 2026 16:41:28 +0000 Subject: [PATCH 13/40] docker project specified for local integration testing --- tests/test_integration.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test_integration.py b/tests/test_integration.py index 2b4b9a7..c83cc6b 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -6,7 +6,7 @@ import os import docker from minio import Minio - +import uuid @pytest.fixture(scope="session") def docker_client(): @@ -17,8 +17,11 @@ def docker_client(): def docker_compose(docker_client): """Start Docker Compose before tests, shut down after.""" print("Starting Docker Compose...") + + PROJECT = f"test_{uuid.uuid4().hex}" + subprocess.run( - ["docker", "compose", "-f", "docker-compose-develop.yml", "up", "-d"], + ["docker", "compose", "-f", "docker-compose-develop.yml", "-p", PROJECT, "up", "-d"], check=True ) time.sleep(10) # Wait for services to start — adjust as needed @@ -35,7 +38,7 @@ def docker_compose(docker_client): print(logs) print("Stopping Docker Compose...") - subprocess.run(["docker", "compose", "down"], check=True) + subprocess.run(["docker", "compose", "-p", PROJECT, "down", "-v"], check=True) def load_test_data_into_minio(): @@ -50,9 +53,7 @@ def load_test_data_into_minio(): bucket_name = "ro-crates" test_data_dir = "tests/data/ro_crates" - # Ensure bucket exists - if not minio_client.bucket_exists(bucket_name): - minio_client.make_bucket(bucket_name) + minio_client.make_bucket(bucket_name) # Walk and upload files for root, _, files in os.walk(test_data_dir): From 8a7c2eacad013b82d956407f2631ba5ccb15212f Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 12 Jan 2026 16:46:18 +0000 Subject: [PATCH 14/40] white space cleanup --- tests/test_integration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_integration.py b/tests/test_integration.py index c83cc6b..1e90a1a 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -8,6 +8,7 @@ from minio import Minio import uuid + @pytest.fixture(scope="session") def docker_client(): return docker.from_env() From c216de73a508da0b69a33a0955965c9710f6c549 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 12 Jan 2026 14:50:52 +0000 Subject: [PATCH 15/40] profile path optional input for rocrate validation task --- app/tasks/validation_tasks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index 0a62b55..74cb265 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -158,7 +158,7 @@ def process_validation_task_by_metadata( def perform_ro_crate_validation( - file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None + file_path: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None ) -> ValidationResult | str: """ Validates an RO-Crate using the provided file path and profile name. @@ -166,6 +166,7 @@ def perform_ro_crate_validation( :param file_path: The path to the RO-Crate file to validate :param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will attempt to determine the profile. + :param profiles_path: The path to the profiles definition directory :param skip_checks_list: A list of checks to skip, if needed :return: The validation result. :raises Exception: If an error occurs during the validation process. @@ -183,7 +184,8 @@ def perform_ro_crate_validation( settings = services.ValidationSettings( rocrate_uri=full_file_path, **({"profile_identifier": profile_name} if profile_name else {}), - **({"skip_checks": skip_checks_list} if skip_checks_list else {}) + **({"skip_checks": skip_checks_list} if skip_checks_list else {}), + **({"profiles_path": profiles_path} if profiles_path else {}) ) return services.validate(settings) From 2eaafd43b422a02a72b402f3a2b8cbd9ecea8611 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 19 Jan 2026 14:02:11 +0000 Subject: [PATCH 16/40] clean config class, use for celery app, add profiles_path --- app/utils/config.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index a57b63f..28e71ae 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -10,34 +10,32 @@ from flask import Flask +def get_env(name: str, default=None, required=False): + value = os.environ.get(name, default) + if required and value is None: + raise RuntimeError(f"Missing required environment variable: {name}") + return value + + class Config: """Base configuration class for the Flask application.""" - SECRET_KEY = os.getenv("SECRET_KEY", "my_precious") - # Celery configuration: - CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL") - CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND") + CELERY_BROKER_URL = get_env("CELERY_BROKER_URL", required=False) + CELERY_RESULT_BACKEND = get_env("CELERY_RESULT_BACKEND", required=False) - # MinIO configuration: - MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT") - MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY") - MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY") - MINIO_BUCKET_NAME = os.getenv("MINIO_BUCKET_NAME", "bucket-name") + # rocrate validator configuration: + PROFILES_PATH = get_env("PROFILES_PATH", required=False) class DevelopmentConfig(Config): """Development configuration class.""" - DEBUG = True - ENV = "development" class ProductionConfig(Config): """Production configuration class.""" - DEBUG = False - ENV = "production" class InvalidAPIUsage(Exception): @@ -63,10 +61,13 @@ def make_celery(app: Flask = None) -> Celery: :param app: The Flask application to use. :return: The Celery instance. """ + env = os.environ.get("FLASK_ENV", "development") + config_cls = ProductionConfig if env == "production" else DevelopmentConfig + celery = Celery( app.import_name if app else __name__, - broker=os.getenv("CELERY_BROKER_URL"), - backend=os.getenv("CELERY_RESULT_BACKEND"), + broker=config_cls.CELERY_BROKER_URL, + backend=config_cls.CELERY_RESULT_BACKEND, ) if app: From 5f3f1f4c1071765591778c192e6c270f7c7816da Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 19 Jan 2026 14:02:57 +0000 Subject: [PATCH 17/40] provide route for passing profiles_path to rocrate validator call --- app/ro_crates/routes/post_routes.py | 7 +++++-- app/services/validation_service.py | 6 ++++-- app/tasks/validation_tasks.py | 5 +++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/app/ro_crates/routes/post_routes.py b/app/ro_crates/routes/post_routes.py index c1ebcdb..2c517f4 100644 --- a/app/ro_crates/routes/post_routes.py +++ b/app/ro_crates/routes/post_routes.py @@ -7,7 +7,7 @@ from apiflask import APIBlueprint, Schema from apiflask.fields import String, Boolean from marshmallow.fields import Nested -from flask import Response +from flask import Response, current_app from app.services.validation_service import ( queue_ro_crate_validation_task, @@ -81,7 +81,10 @@ def validate_ro_crate_via_id(json_data, crate_id) -> tuple[Response, int]: else: profile_name = None - return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, webhook_url) + profiles_path = current_app.config["PROFILES_PATH"] + + return queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, + webhook_url, profiles_path) @post_routes_bp.post("/validate_metadata") diff --git a/app/services/validation_service.py b/app/services/validation_service.py index 67dde94..b51a088 100644 --- a/app/services/validation_service.py +++ b/app/services/validation_service.py @@ -25,7 +25,8 @@ def queue_ro_crate_validation_task( - minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None + minio_config, crate_id, root_path=None, profile_name=None, webhook_url=None, + profiles_path=None ) -> tuple[Response, int]: """ Queues an RO-Crate for validation with Celery. @@ -51,7 +52,8 @@ def queue_ro_crate_validation_task( raise InvalidAPIUsage(f"No RO-Crate with prefix: {crate_id}", 400) try: - process_validation_task_by_id.delay(minio_config, crate_id, root_path, profile_name, webhook_url) + process_validation_task_by_id.delay(minio_config, crate_id, root_path, + profile_name, webhook_url, profiles_path) return jsonify({"message": "Validation in progress"}), 202 except Exception as e: diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index 74cb265..e27c46c 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -29,7 +29,8 @@ @celery.task def process_validation_task_by_id( - minio_config: dict, crate_id: str, root_path: str, profile_name: str | None, webhook_url: str | None + minio_config: dict, crate_id: str, root_path: str, profile_name: str | None, + webhook_url: str | None, profiles_path: str | None ) -> None: """ Background task to process the RO-Crate validation by ID. @@ -56,7 +57,7 @@ def process_validation_task_by_id( logging.info(f"Processing validation task for {file_path}") # Perform validation: - validation_result = perform_ro_crate_validation(file_path, profile_name) + validation_result = perform_ro_crate_validation(file_path, profile_name, profiles_path=profiles_path) if isinstance(validation_result, str): logging.error(f"Validation failed: {validation_result}") From 973cd038d8c86de2054c06fbbef7b5933d53f39e Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 19 Jan 2026 14:03:40 +0000 Subject: [PATCH 18/40] update tests for profile_paths variable --- tests/test_api_routes.py | 31 +++++++++++++++++++++---------- tests/test_services.py | 17 +++++++++++------ tests/test_validation_tasks.py | 28 ++++++++++++++-------------- 3 files changed, 46 insertions(+), 30 deletions(-) diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py index f527501..e50b511 100644 --- a/tests/test_api_routes.py +++ b/tests/test_api_routes.py @@ -13,7 +13,7 @@ def client(): # Test POST API: /v1/ro_crates/{crate_id}/validation @pytest.mark.parametrize( - "crate_id, payload, status_code, response_json", + "crate_id, payload, profiles_path, status_code, response_json", [ ( "crate-123", { @@ -27,7 +27,9 @@ def client(): "root_path": "base_path", "webhook_url": "https://webhook.example.com", "profile_name": "default" - }, 202, {"message": "Validation in progress"} + }, + None, + 202, {"message": "Validation in progress"} ), ( "crate-123", { @@ -38,9 +40,11 @@ def client(): "ssl": False, "bucket": "test_bucket" }, - "root_path": "base_path", + "root_path": "base_path", "webhook_url": "https://webhook.example.com", - }, 202, {"message": "Validation in progress"} + }, + None, + 202, {"message": "Validation in progress"} ), ( "crate-123", { @@ -51,9 +55,11 @@ def client(): "ssl": False, "bucket": "test_bucket" }, - "root_path": "base_path", + "root_path": "base_path", "profile_name": "default" - }, 202, {"message": "Validation in progress"} + }, + None, + 202, {"message": "Validation in progress"} ), ( "crate-123", { @@ -66,7 +72,9 @@ def client(): }, "webhook_url": "https://webhook.example.com", "profile_name": "default" - }, 202, {"message": "Validation in progress"} + }, + None, + 202, {"message": "Validation in progress"} ), ( "crate-123", { @@ -77,14 +85,17 @@ def client(): "ssl": False, "bucket": "test_bucket" }, - }, 202, {"message": "Validation in progress"} + }, + None, + 202, {"message": "Validation in progress"} ), ], ids=["validate_by_id", "validate_with_missing_profile_name", "validate_with_missing_webhook_url", "validate_with_missing_root_path", "validate_with_missing_root_path_and_profile_name_and_webhook_url"] ) -def test_validate_by_id_success(client: FlaskClient, crate_id: str, payload: dict, status_code: int, response_json: dict): +def test_validate_by_id_success(client: FlaskClient, crate_id: str, payload: dict, + profiles_path: str, status_code: int, response_json: dict): with patch("app.ro_crates.routes.post_routes.queue_ro_crate_validation_task") as mock_queue: mock_queue.return_value = (response_json, status_code) @@ -96,7 +107,7 @@ def test_validate_by_id_success(client: FlaskClient, crate_id: str, payload: dic webhook_url = payload["webhook_url"] if "webhook_url" in payload else None assert response.status_code == status_code assert response.json == response_json - mock_queue.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url) + mock_queue.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url, profiles_path) @pytest.mark.parametrize( diff --git a/tests/test_services.py b/tests/test_services.py index c7d50c3..ccebeba 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -22,7 +22,7 @@ def flask_app(): # Test function: queue_ro_crate_validation_task @pytest.mark.parametrize( - "crate_id, rocrate_exists, minio_client, delay_side_effects, payload, status_code, response_dict", + "crate_id, rocrate_exists, minio_client, delay_side_effects, payload, profiles_path, status_code, response_dict", [ ( "crate123", True, "minio_client", None, @@ -37,7 +37,9 @@ def flask_app(): "root_path": "base_path", "webhook_url": "https://webhook.example.com", "profile_name": "default" - }, 202, {"message": "Validation in progress"} + }, + None, + 202, {"message": "Validation in progress"} ), ( "crate123", True, "minio_client", Exception("Celery down"), @@ -52,7 +54,9 @@ def flask_app(): "root_path": "base_path", "webhook_url": "https://webhook.example.com", "profile_name": "default" - }, 500, {"error": "Celery down"} + }, + None, + 500, {"error": "Celery down"} ), ], ids=["successful_queue", "celery_server_down"] @@ -65,7 +69,7 @@ def test_queue_ro_crate_validation_task( mock_exists, mock_delay, flask_app: FlaskClient, crate_id: str, rocrate_exists: bool, minio_client: str, - delay_side_effects: Exception, payload: dict, status_code: int, response_dict: dict + delay_side_effects: Exception, payload: dict, profiles_path: str, status_code: int, response_dict: dict ): mock_delay.side_effect = delay_side_effects mock_exists.return_value = rocrate_exists @@ -76,11 +80,12 @@ def test_queue_ro_crate_validation_task( profile_name = payload["profile_name"] if "profile_name" in payload else None webhook_url = payload["webhook_url"] if "webhook_url" in payload else None - response, status_code = queue_ro_crate_validation_task(minio_config, crate_id, root_path, profile_name, webhook_url) + response, status_code = queue_ro_crate_validation_task(minio_config, crate_id, root_path, + profile_name, webhook_url, profiles_path) mock_client.assert_called_once_with(minio_config) mock_exists.assert_called_once_with(minio_client, minio_config["bucket"], crate_id, root_path) - mock_delay.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url) + mock_delay.assert_called_once_with(minio_config, crate_id, root_path, profile_name, webhook_url, profiles_path) assert status_code == status_code assert response.json == response_dict diff --git a/tests/test_validation_tasks.py b/tests/test_validation_tasks.py index afa11c2..49c3fed 100644 --- a/tests/test_validation_tasks.py +++ b/tests/test_validation_tasks.py @@ -17,7 +17,7 @@ @pytest.mark.parametrize( "minio_config, crate_id, os_path_exists, os_path_isfile, os_path_isdir, " + - "return_value, webhook, profile, val_success, val_result, minio_client", + "return_value, webhook, profile, profiles_path, val_success, val_result, minio_client", [ ( { @@ -28,7 +28,7 @@ "bucket": "test_bucket" }, "crate123", True, True, False, "/tmp/crate.zip", - "https://example.com/hook", "profileA", True, '{"status": "valid"}', + "https://example.com/hook", "profileA", None, True, '{"status": "valid"}', "minio_client" ), ( @@ -40,7 +40,7 @@ "bucket": "test_bucket" }, "crate123", True, False, True, "/tmp/crate123", - "https://example.com/hook", "profileA", True, '{"status": "valid"}', + "https://example.com/hook", "profileA", None, True, '{"status": "valid"}', "minio_client" ), ( @@ -52,7 +52,7 @@ "bucket": "test_bucket" }, "crate123", True, False, True, "/tmp/crate123", - None, "profileA", True, '{"status": "valid"}', + None, "profileA", None, True, '{"status": "valid"}', "minio_client" ), ], @@ -80,7 +80,7 @@ def test_process_validation( mock_rmtree, mock_client, minio_config: dict, crate_id: str, os_path_exists: bool, os_path_isfile: bool, os_path_isdir: bool, - return_value: str, webhook: str, profile: str, val_success: bool, val_result: str, minio_client: str + return_value: str, webhook: str, profile: str, profiles_path: str, val_success: bool, val_result: str, minio_client: str ): mock_exists.return_value = os_path_exists mock_isfile.return_value = os_path_isfile @@ -93,11 +93,11 @@ def test_process_validation( mock_validation_result.to_json.return_value = val_result mock_validate.return_value = mock_validation_result - process_validation_task_by_id(minio_config, crate_id, "", profile, webhook) + process_validation_task_by_id(minio_config, crate_id, "", profile, webhook, profiles_path) mock_client.assert_called_once_with(minio_config) mock_fetch.assert_called_once_with(minio_client, minio_config["bucket"], crate_id, "") - mock_validate.assert_called_once_with(return_value, profile) + mock_validate.assert_called_once_with(return_value, profile, profiles_path=profiles_path) mock_update.assert_called_once_with(minio_client, minio_config["bucket"], crate_id, "", val_result) if webhook is not None: mock_webhook.assert_called_once_with(webhook, val_result) @@ -113,7 +113,7 @@ def test_process_validation( @pytest.mark.parametrize( "minio_config, crate_id, os_path_exists, os_path_isfile, os_path_isdir, return_fetch, " - + "webhook, profile, return_validate, validate_side_effect, fetch_side_effect, minio_client", + + "webhook, profile, profiles_path, return_validate, validate_side_effect, fetch_side_effect, minio_client", [ ( { @@ -124,7 +124,7 @@ def test_process_validation( "bucket": "test_bucket" }, "crate123", True, True, False, "/tmp/crate.zip", - "https://example.com/hook", "profileA", "Validation failed", None, None, + "https://example.com/hook", "profileA", None, "Validation failed", None, None, "minio_client" ), ( @@ -136,7 +136,7 @@ def test_process_validation( "bucket": "test_bucket" }, "crate123", True, True, False, "/tmp/crate.zip", - "https://example.com/hook", "profileA", None, Exception("Unexpected error"), None, + "https://example.com/hook", "profileA", None, None, Exception("Unexpected error"), None, "minio_client" ), ( @@ -148,7 +148,7 @@ def test_process_validation( "bucket": "test_bucket" }, "crate123", False, False, False, None, - "https://example.com/hook", "profileA", None, None, Exception("MinIO fetch failed"), + "https://example.com/hook", "profileA", None, None, None, Exception("MinIO fetch failed"), "minio_client" ), ], @@ -177,7 +177,7 @@ def test_process_validation_failure( mock_rmtree, mock_client, minio_config: dict, crate_id: str, os_path_exists: bool, os_path_isfile: bool, os_path_isdir: bool, - return_fetch: str, webhook: str, profile: str, return_validate: str, + return_fetch: str, webhook: str, profile: str, profiles_path: str, return_validate: str, validate_side_effect: Exception, fetch_side_effect: Exception, minio_client: str ): mock_exists.return_value = os_path_exists @@ -195,10 +195,10 @@ def test_process_validation_failure( else: mock_validate.side_effect = validate_side_effect - process_validation_task_by_id(minio_config, crate_id, "", profile, webhook) + process_validation_task_by_id(minio_config, crate_id, "", profile, webhook, profiles_path) if fetch_side_effect is None: - mock_validate.assert_called_once_with(return_fetch, profile) + mock_validate.assert_called_once_with(return_fetch, profile, profiles_path=profiles_path) else: mock_validate.assert_not_called() From cf02db7d6a395c5791c7e26a9049cea708d99379 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:00:53 +0000 Subject: [PATCH 19/40] switch to extra_profiles_path option for validator additional profiles --- app/tasks/validation_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index e27c46c..3b178e5 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -186,7 +186,7 @@ def perform_ro_crate_validation( rocrate_uri=full_file_path, **({"profile_identifier": profile_name} if profile_name else {}), **({"skip_checks": skip_checks_list} if skip_checks_list else {}), - **({"profiles_path": profiles_path} if profiles_path else {}) + **({"extra_profiles_path": profiles_path} if profiles_path else {}) ) return services.validate(settings) From 0db76edfbb325dfb962b7a8bcbc73cb912bbc1e8 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 20 Jan 2026 17:34:28 +0000 Subject: [PATCH 20/40] docker compose profile loading example --- docker-compose-develop.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose-develop.yml b/docker-compose-develop.yml index fa7661d..e6b6a0e 100644 --- a/docker-compose-develop.yml +++ b/docker-compose-develop.yml @@ -32,9 +32,12 @@ services: - MINIO_ROOT_USER=${MINIO_ROOT_USER} - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - PROFILES_PATH=/app/profiles depends_on: - redis - minio + volumes: + - ./tests/data/rocrate_validator_profiles:/app/profiles:ro redis: image: "redis:alpine" From 1067ff5e648cc4ea6f9689fde38b0a4ead9b034e Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 20 Jan 2026 17:35:08 +0000 Subject: [PATCH 21/40] integration test for providing extra profile for validation --- tests/test_integration.py | 74 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tests/test_integration.py b/tests/test_integration.py index 1e90a1a..63941c4 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -350,6 +350,80 @@ def test_directory_rocrate_validation(): assert response_result["passed"] is False +def test_extra_profile_rocrate_validation(): + ro_crate = "ro_crate_2" + profile_name = "alpha-crate-0.1" + url_post = f"http://localhost:5001/v1/ro_crates/{ro_crate}/validation" + url_get = f"http://localhost:5001/v1/ro_crates/{ro_crate}/validation" + headers = { + "accept": "application/json", + "Content-Type": "application/json" + } + + # The API expects the JSON to be passed as a string + post_payload = { + "minio_config": { + "endpoint": "minio:9000", + "accesskey": "minioadmin", + "secret": "minioadmin", + "ssl": False, + "bucket": "ro-crates" + }, + "profile_name": profile_name + } + get_payload = { + "minio_config": { + "endpoint": "minio:9000", + "accesskey": "minioadmin", + "secret": "minioadmin", + "ssl": False, + "bucket": "ro-crates" + } + } + + # POST action and tests + response = requests.post(url_post, json=post_payload, headers=headers) + response_result = response.json()['message'] + + # Print response for debugging + print("Status Code:", response.status_code) + print("Response JSON:", response_result) + + # Assertions + assert response.status_code == 202 + assert response_result == "Validation in progress" + + # wait for ro-crate to be validated + time.sleep(10) + + # GET action and tests + response = requests.get(url_get, json=get_payload, headers=headers) + response_result = response.json() + + # Print response for debugging + print("Status Code:", response.status_code) + print("Response JSON:", response_result) + + start_time = time.time() + while response.status_code == 400: + time.sleep(10) + # GET action and tests + response = requests.get(url_get, json=get_payload, headers=headers) + response_result = response.json() + # Print response for debugging + print("Status Code:", response.status_code) + print("Response JSON:", response_result) + + elapsed = time.time() - start_time + if elapsed > 60: + print("60 seconds passed. Exiting loop") + break + + # Assertions + assert response.status_code == 200 + assert response_result["passed"] is False + + def test_ignore_rocrates_not_on_basepath(): ro_crate = "ro_crate_4" url_post = f"http://localhost:5001/v1/ro_crates/{ro_crate}/validation" From 1cd7608832de11866fe407b75f692459b3e636c6 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 13:22:17 +0000 Subject: [PATCH 22/40] full profile directory for crate validator, not extra profiles path --- app/tasks/validation_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index 3b178e5..e27c46c 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -186,7 +186,7 @@ def perform_ro_crate_validation( rocrate_uri=full_file_path, **({"profile_identifier": profile_name} if profile_name else {}), **({"skip_checks": skip_checks_list} if skip_checks_list else {}), - **({"extra_profiles_path": profiles_path} if profiles_path else {}) + **({"profiles_path": profiles_path} if profiles_path else {}) ) return services.validate(settings) From f1181e5c6dbfaf8aaf9954c9b7c889afe42f5d9f Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 13:23:17 +0000 Subject: [PATCH 23/40] profiles path (in develop) set for flask not celery worker --- docker-compose-develop.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-develop.yml b/docker-compose-develop.yml index e6b6a0e..bffae0f 100644 --- a/docker-compose-develop.yml +++ b/docker-compose-develop.yml @@ -16,6 +16,7 @@ services: - MINIO_ROOT_USER=${MINIO_ROOT_USER} - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - PROFILES_PATH=/app/profiles depends_on: - redis - minio @@ -32,7 +33,6 @@ services: - MINIO_ROOT_USER=${MINIO_ROOT_USER} - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} - - PROFILES_PATH=/app/profiles depends_on: - redis - minio From 4d8ee0aa733d5e99256b52dfc38a7bd62f802847 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 13:30:40 +0000 Subject: [PATCH 24/40] remove extraneous environment variables from dev celery worker --- docker-compose-develop.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker-compose-develop.yml b/docker-compose-develop.yml index bffae0f..a8d27c2 100644 --- a/docker-compose-develop.yml +++ b/docker-compose-develop.yml @@ -29,10 +29,6 @@ services: environment: - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/0 - - MINIO_ENDPOINT=${MINIO_ENDPOINT} - - MINIO_ROOT_USER=${MINIO_ROOT_USER} - - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} - - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} depends_on: - redis - minio From 00708dfbf84abf0ae778a395dc960edd3bd5556b Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 13:31:19 +0000 Subject: [PATCH 25/40] remove extraneous environment variables from main celery worker --- docker-compose.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8605c0c..bd47218 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,10 +26,6 @@ services: environment: - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/0 - - MINIO_ENDPOINT=${MINIO_ENDPOINT} - - MINIO_ROOT_USER=${MINIO_ROOT_USER} - - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} - - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} depends_on: - redis - minio From 6ac555e785b38a7975c88fc5bf7f84e519e7fbfb Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 13:38:40 +0000 Subject: [PATCH 26/40] add test profiles --- .../alpha-crate/1_root_data_entity.ttl | 46 ++ .../alpha-crate/profile.ttl | 55 +++ .../may/11_workflow_execution_phase.ttl | 64 +++ .../five-safes-crate/may/12_check_phase.ttl | 56 +++ .../may/13_validation_phase.ttl | 57 +++ .../may/14_workflow_retrieval_phase.ttl | 102 +++++ .../may/1_responsible_project.ttl | 56 +++ .../five-safes-crate/may/4_sign_off.ttl | 58 +++ .../may/8_disclosure_phase.ttl | 58 +++ .../must/11_workflow_execution_phase.ttl | 64 +++ .../five-safes-crate/must/12_check_phase.ttl | 93 ++++ .../must/13_validation_phase.ttl | 126 ++++++ .../must/14_workflow_retrieval_phase.ttl | 157 +++++++ .../five-safes-crate/must/15_metadata_file.py | 64 +++ .../must/15_metadata_file.ttl | 41 ++ .../must/16_publishing_phase.ttl | 41 ++ .../must/1_requesting_agent.ttl | 82 ++++ .../must/1_responsible_project.ttl | 59 +++ .../must/1_root_data_entity.ttl | 46 ++ .../must/2_requesting_agent.ttl | 63 +++ .../must/3_timestamp_format.ttl | 54 +++ .../five-safes-crate/must/4_sign_off.ttl | 89 ++++ .../must/6_workflow_reference.ttl | 80 ++++ .../must/7_requested_workflow_run.ttl | 86 ++++ .../must/8_disclosure_phase.ttl | 100 +++++ .../five-safes-crate/profile.ttl | 83 ++++ .../five-safes-crate/should/10_outputs.ttl | 89 ++++ .../should/11_workflow_execution_phase.ttl | 85 ++++ .../should/12_check_phase.ttl | 190 +++++++++ .../should/13_validation_phase.ttl | 165 +++++++ .../should/14_workflow_retrieval_phase.ttl | 125 ++++++ .../should/1_requesting_agent.ttl | 46 ++ .../should/1_responsible_project.ttl | 60 +++ .../should/2_requesting_agent.ttl | 49 +++ .../five-safes-crate/should/4_sign_off.ttl | 177 ++++++++ .../should/6_workflow_reference.ttl | 49 +++ .../should/7_requested_workflow_run.ttl | 36 ++ .../should/8_disclosure_phase.ttl | 114 +++++ .../five-safes-crate/should/9_inputs.ttl | 58 +++ .../ro-crate/may/4_data_entity_metadata.ttl | 89 ++++ .../ro-crate/may/61_license_entity.ttl | 66 +++ .../ro-crate/must/0_file_descriptor_format.py | 401 ++++++++++++++++++ .../must/1_file-descriptor_metadata.ttl | 100 +++++ .../must/2_root_data_entity_metadata.ttl | 175 ++++++++ .../ro-crate/must/4_data_entity_metadata.py | 75 ++++ .../ro-crate/must/4_data_entity_metadata.ttl | 216 ++++++++++ .../must/5_web_data_entity_metadata.ttl | 50 +++ .../ro-crate/must/6_contextual_entity.ttl | 81 ++++ .../ro-crate/ontology.ttl | 67 +++ .../ro-crate/prefixes.ttl | 49 +++ .../ro-crate/profile.ttl | 74 ++++ .../should/2_root_data_entity_metadata.ttl | 74 ++++ .../should/2_root_data_entity_relative_uri.py | 42 ++ .../should/4_data_entity_existence.py | 58 +++ .../should/4_data_entity_metadata.ttl | 69 +++ .../should/5_web_data_entity_metadata.py | 73 ++++ .../should/5_web_data_entity_metadata.ttl | 63 +++ .../should/6_contextual_entity_metadata.ttl | 75 ++++ 58 files changed, 5020 insertions(+) create mode 100644 tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl create mode 100644 tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl create mode 100644 tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/profile.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl create mode 100644 tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl diff --git a/tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl b/tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl new file mode 100644 index 0000000..af82b5d --- /dev/null +++ b/tests/data/rocrate_validator_profiles/alpha-crate/1_root_data_entity.ttl @@ -0,0 +1,46 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix alpha-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +alpha-crate:RootDataEntityRequiredProperties + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + sh:property [ + a sh:PropertyShape ; + sh:name "sourceOrganization" ; + sh:path schema:sourceOrganization; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message """The Root Data Entity MUST have a `sourceOrganization` property.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "sourceOrganization" ; + sh:path schema:sourceOrganization ; + sh:class schema:Project ; + sh:severity sh:Violation ; + sh:message """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl b/tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl new file mode 100644 index 0000000..d33dfef --- /dev/null +++ b/tests/data/rocrate_validator_profiles/alpha-crate/profile.ttl @@ -0,0 +1,55 @@ +# Copyright (c) 2024-2025 CRS4, University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + + # the Profile's label + rdfs:label "Alpha RO-Crate 0.1" ; + + # regular metadata, a basic description of the Profile + rdfs:comment """Alpha RO-Crate Metadata Specification 0.1"""@en ; + + # URI of the publisher of the Metadata Specification + dct:publisher ; + + # This profile is a transitive profile of the RO-Crate Metadata Specification + prof:isTransitiveProfileOf ; + + # this profile has a JSON-LD context resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in JSON-LD format + dct:format ; + + # it conforms to JSON-LD, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Vocabulary" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Vocabulary ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # a short code to refer to the Profile with when a URI can't be used + prof:hasToken "alpha-crate" ; +. diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl new file mode 100644 index 0000000..6c33191 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/11_workflow_execution_phase.ttl @@ -0,0 +1,64 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:WorkflowexecutionObjectHasStartTimeIfBegun + a sh:NodeShape ; + sh:name "WorkflowExecution" ; + sh:description ( + "The workflow execution object MAY have a startTime if actionStatus is " + "either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ) ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:CreateAction ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:description ( + "The workflow execution object MAY have a startTime if actionStatus is " + "either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ) ; + sh:message "The workflow execution object MAY have a startTime if actionStatus is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl new file mode 100644 index 0000000..0e741f6 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/12_check_phase.ttl @@ -0,0 +1,56 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:CheckValueMayHaveStartTime + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:message "`CheckValue` MAY have the `startTime` property." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl new file mode 100644 index 0000000..b7adcb3 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/13_validation_phase.ttl @@ -0,0 +1,57 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DownloadActionMayHaveStartTimeIfBegun + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:description "ValidationCheck MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + sh:message "ValidationCheck MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl new file mode 100644 index 0000000..4386682 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/14_workflow_retrieval_phase.ttl @@ -0,0 +1,102 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DownloadedWorkflowSHOULDExistAndBeReferencedByDownloadActionResult + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description "Validates that DownloadAction result references an existing entity" ; + sh:targetClass schema:DownloadAction ; + + sh:property [ + a sh:PropertyShape ; + sh:name "Result" ; + sh:description "The result property must reference an existing entity in the RO-Crate" ; + sh:path schema:result ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:select """ + PREFIX rdf: + PREFIX schema: + + SELECT $this $value + WHERE { + $this schema:result $value . + + # Entity must have BOTH type AND name (proper definition) + FILTER NOT EXISTS { + $value rdf:type schema:Dataset . + } + } + """ ; + sh:severity sh:Info ; + sh:message "The entity representing the downloaded workflow is not defined, OR is not referenced by `DownloadAction` --> `result`, OR is not of type `schema:Dataset`." ; + ] ; + ] . + + +five-safes-crate:DownloadActionMayHaveStartTimeIfBegun + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description ( + "`DownloadAction` MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ); + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:DownloadAction ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:description ( + "`DownloadAction` MAY have the `startTime` property if `actionStatus` " + "is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." + ); + sh:message "`DownloadAction` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl new file mode 100644 index 0000000..5dd46fd --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/1_responsible_project.ttl @@ -0,0 +1,56 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:ResponsibleProject + a sh:NodeShape ; + sh:name "Responsible Project" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "funding" ; + sh:path schema:funding; + sh:minCount 1 ; + sh:severity sh:Info ; + sh:message """The Responsible Project does not have the property `funding`.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "member" ; + sh:path schema:member; + sh:minCount 1 ; + sh:severity sh:Info ; + sh:message """The Responsible Project does not have the property `member`.""" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl new file mode 100644 index 0000000..3890e2b --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/4_sign_off.ttl @@ -0,0 +1,58 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . +@prefix shp: . + + +five-safes-crate:SignOffPhaseStartTime + a sh:NodeShape ; + sh:name "SignOffPhaseStartTime" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/ActiveActionStatus", + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ; + sh:severity sh:Info ; + sh:description "Sign Off object MAY have a startTime property if action is active, completed or failed." ; + sh:message "Sign Off object MAY have a startTime property if action is active, completed or failed." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl new file mode 100644 index 0000000..4694319 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/may/8_disclosure_phase.ttl @@ -0,0 +1,58 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DisclosureObjectHasStartTimeIfBegun + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "DisclosureCheck" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus", + "http://schema.org/ActiveActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "StartTime" ; + sh:path schema:startTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Info ; + sh:description "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + sh:message "`DisclosureCheck` MAY have the `startTime` property if `actionStatus` is either ActiveActionStatus, CompletedActionStatus or FailedActionStatus." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl new file mode 100644 index 0000000..2f723c9 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/11_workflow_execution_phase.ttl @@ -0,0 +1,64 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:WorkflowMustHaveDescriptiveName + a sh:NodeShape ; + sh:name "WorkflowExecution" ; + sh:targetClass schema:CreateAction ; + + sh:property [ + a sh:PropertyShape ; + sh:name "name" ; + sh:minCount 1 ; + sh:description "Workflow (CreateAction) MUST have a name string of at least 10 characters." ; + sh:path schema:name ; + sh:datatype xsd:string ; + sh:minLength 10 ; + sh:severity sh:Violation ; + sh:message "Workflow (CreateAction) MUST have a name string of at least 10 characters." ; + ] . + + + + +five-safes-crate:WorkflowMustHaveActionStatusWithAllowedValues + a sh:NodeShape ; + sh:name "WorkflowExecution" ; + sh:targetClass schema:CreateAction ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "actionStatus" ; + sh:description "WorkflowExecution MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "WorkflowExecution MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl new file mode 100644 index 0000000..35b5dd1 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/12_check_phase.ttl @@ -0,0 +1,93 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:CheckValueObjectHasDescriptiveNameAndIsAssessAction + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "CheckValue MUST be a `schema:AssessAction`." ; + ] ; + + sh:property [ + sh:a sh:PropertyShape ; + sh:name "name" ; + sh:description "CheckValue MUST have a human readable name string." ; + sh:path schema:name ; + sh:datatype xsd:string ; + sh:severity sh:Violation ; + sh:message "CheckValue MUST have a human readable name string." ; + ] . + +five-safes-crate:CheckValueActionStatusMustHaveAllowedValues + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "`CheckValue` --> `actionStatus` MUST have one of the allowed values." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl new file mode 100644 index 0000000..8ce6be4 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/13_validation_phase.ttl @@ -0,0 +1,126 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:ValidationCheckObjectHasDescriptiveNameAndIsAssessAction + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "ValidationCheck MUST be a `schema:AssessAction`." ; + ] ; + + sh:property [ + sh:a sh:PropertyShape ; + sh:name "name" ; + sh:description "ValidationCheck MUST have a human readable name string." ; + sh:path schema:name ; + sh:datatype xsd:string ; + sh:severity sh:Violation ; + sh:message "ValidationCheck MUST have a human readable name string." ; + ] . + + +five-safes-crate:ValidationCheckActionStatusMustHaveAllowedValue + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "actionStatus" ; + sh:description "The `actionStatus` of ValidationCheck MUST have an allowed value (see https://schema.org/ActionStatusType)." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "The `actionStatus` of ValidationCheck MUST have an allowed value (see https://schema.org/ActionStatusType)." ; + ] . + + +five-safes-crate:ValidationCheckActionStatusMustHaveAllowedValue + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "actionStatus" ; + sh:description "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, CompletedActionStatus, or FailedActionStatus." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "actionStatus MUST be either PotentialActionStatus, ActiveActionStatus, CompletedActionStatus, or FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl new file mode 100644 index 0000000..a1108ad --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/14_workflow_retrieval_phase.ttl @@ -0,0 +1,157 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DownloadActionObjectMUSTHavesDescriptiveName + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:targetClass schema:DownloadAction ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "name" ; + sh:description "DownloadAction MUST have a human readable name string." ; + sh:path schema:name ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:datatype xsd:string ; + sh:severity sh:Violation ; + sh:message "DownloadAction MUST have a human readable name string." ; + ] . + + + +five-safes-crate:WorkflowSameAsAndRootDataEntityMainEntityMUSTBeTheSame + a sh:NodeShape ; + sh:name "Downloaded Workflow" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:Dataset . + ?s rdf:type schema:DownloadAction ; + schema:result ?this . + } + """ ; + ]; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this + WHERE { + FILTER NOT EXISTS { + $this schema:sameAs ?o . + ?s schema:mainEntity ?o . + # ?o rdf:type schema:Dataset . + } + } + """ ; + sh:severity sh:Violation ; + sh:description "The property `sameAs` of the entity representing the downloaded workflow MUST point to the same entity as `RootDataEntity` --> `mainEntity`." ; + sh:message "The property `sameAs` of the entity representing the downloaded workflow MUST point to the same entity as `RootDataEntity` --> `mainEntity`." ; + ] . + + +five-safes-crate:DownloadedWorkflowDistributionAndDownloadActionObjectMUSTBeTheSame + a sh:NodeShape ; + sh:name "Downloaded Workflow" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:Dataset . + ?s rdf:type schema:DownloadAction ; + schema:result ?this . + } + """ ; + ]; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this + WHERE { + ?action rdf:type schema:DownloadAction . + FILTER NOT EXISTS { + $this schema:distribution ?url . + ?action schema:object ?url . + } + } + """ ; + sh:severity sh:Violation ; + sh:message "DownloadedWorkflow --> `distribution` MUST reference the same entity as `DownloadAction` --> `object`." ; + ] . + + +five-safes-crate:DownloadActionActionStatusMUSTHaveAllowedValues + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:DownloadAction ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "The value of actionStatus MUST be one of the allowed values: PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py new file mode 100644 index 0000000..b589d33 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.py @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2025 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import rocrate_validator.utils.log as logging +from rocrate_validator.models import Severity, ValidationContext +from rocrate_validator.requirements.python import PyFunctionCheck, check, requirement + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="RO-Crate context version") +class FileDescriptorContextVersion(PyFunctionCheck): + """The RO-Crate metadata file MUST include the RO-Crate context version 1.2 + (or later minor version) in `@context`""" + + @check(name="RO-Crate context version", severity=Severity.REQUIRED) + def test_existence(self, context: ValidationContext) -> bool: + """ + The RO-Crate metadata file MUST include the RO-Crate context version 1.2 + (or later minor version) in `@context` + """ + try: + json_dict = context.ro_crate.metadata.as_dict() + context_value = json_dict["@context"] + pattern = re.compile( + r"https://w3id\.org/ro/crate/1\.[2-9](-DRAFT)?/context" + ) + passed = True + if isinstance(context_value, list): + if not any( + pattern.match(item) + for item in context_value + if isinstance(item, str) + ): + passed = False + else: + if not pattern.match(context_value): + passed = False + if not passed: + context.result.add_issue( + "The RO-Crate metadata file MUST include the RO-Crate context " + "version 1.2 (or later minor version) in `@context`", + self, + ) + return passed + + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return True diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl new file mode 100644 index 0000000..14d939f --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/15_metadata_file.ttl @@ -0,0 +1,41 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . +@prefix dct: . + +five-safes-crate:MetadataFileDescriptorProperties a sh:NodeShape ; + sh:name "RO-Crate conforms to 1.2 or later minor version" ; + sh:description """The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version"""; + sh:targetClass ro-crate:ROCrateMetadataFileDescriptor ; + sh:property [ + a sh:PropertyShape ; + sh:name "RO-Crate conforms to 1.2 or later minor version" ; + sh:description "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path dct:conformsTo ; + sh:pattern "https://w3id\\.org/ro/crate/(1\\.[2-9](-DRAFT)?)" ; + sh:severity sh:Violation; + sh:message "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with RO-Crate specification version 1.2 or later minor version" ; + ] . + +ro-crate:conformsToROCrateSpec sh:deactivated true . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl new file mode 100644 index 0000000..5a591a4 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/16_publishing_phase.ttl @@ -0,0 +1,41 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:AllAssessActionsMentioned + a sh:NodeShape ; + sh:name "All AssessActions are mentioned from Root Data Entity" ; + sh:description "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ; + sh:targetClass schema:AssessAction; + + sh:property [ + a sh:PropertyShape ; + sh:name "AssessAction mentions from RDE" ; + sh:description "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ; + sh:path [ sh:inversePath schema:mentions ] ; + sh:node ro-crate:RootDataEntity ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "All AssessAction entities in the crate MUST be referenced from the Root Dataset via `mentions`." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl new file mode 100644 index 0000000..2cc0d43 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_requesting_agent.ttl @@ -0,0 +1,82 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:AgentIsMemberOf + a sh:NodeShape ; + sh:name "Requesting Agent" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "memberOf" ; + sh:path schema:memberOf; + sh:class schema:Project ; + sh:severity sh:Violation ; + sh:message """The 'memberOf' property of an agent MUST be of type Project.""" ; + ] . + + +five-safes-crate:AgentProjectIntersection + a sh:NodeShape ; + sh:name "Agent Project Intersection" ; + sh:description """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "Agent Project Intersection" ; + sh:description """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this WHERE { + FILTER EXISTS { + $this schema:memberOf ?anyProject . + } + FILTER NOT EXISTS { + $this schema:memberOf ?commonProject . + ?metadata schema:about ?root . + ?root schema:sourceOrganization ?commonProject . + } + } + """ ; + sh:severity sh:Violation ; + sh:message """At least one Project referenced by Agent -> memberOf MUST be included in the set of Projects referenced by RootDataEntity -> sourceOrganization.""" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl new file mode 100644 index 0000000..c06c873 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_responsible_project.ttl @@ -0,0 +1,59 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:ResponsibleProject + a sh:NodeShape ; + sh:name "Responsible Project" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?agent . + ?agent schema:memberOf ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "funding" ; + sh:path schema:funding; + sh:class schema:Grant ; + sh:severity sh:Violation ; + sh:message """The property 'funding' of the Responsible Project MUST be of type schema:Grant.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "member" ; + sh:path schema:member; + sh:or ( + [ sh:class schema:Organization ] + [ sh:class schema:Person ] + ) ; + sh:severity sh:Violation ; + sh:message """The property 'member' of the Responsible Project MUST be of type schema:Organization or schema:Person.""" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl new file mode 100644 index 0000000..bb9a514 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/1_root_data_entity.ttl @@ -0,0 +1,46 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:RootDataEntityRequiredProperties + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + sh:property [ + a sh:PropertyShape ; + sh:name "sourceOrganization" ; + sh:path schema:sourceOrganization; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message """The Root Data Entity MUST have a `sourceOrganization` property.""" ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "sourceOrganization" ; + sh:path schema:sourceOrganization ; + sh:class schema:Project ; + sh:severity sh:Violation ; + sh:message """The `sourceOrganization` property of the RootDataEntity MUST point to a Project entity.""" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl new file mode 100644 index 0000000..890bf3a --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/2_requesting_agent.ttl @@ -0,0 +1,63 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + + +five-safes-crate:CreateActionHasAgent + a sh:NodeShape ; + sh:name "CreateAction" ; + sh:targetClass schema:CreateAction ; + sh:description "Checks that a CreateAction has an agent and that each agent is a schema:Person." ; + + # CreateAction entity MUST have an agent (IRI) + sh:property [ + a sh:PropertyShape ; + sh:name "Has Agent" ; + sh:path schema:agent ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "CreateAction MUST have at least one schema:agent that is a contextual entity." ; + ] ; + + # The agent of a CreateAction entity MUST be a Person + sh:property [ + a sh:PropertyShape ; + sh:name "Agent is a Person" ; + sh:path schema:agent ; + sh:nodeKind sh:IRI ; + sh:class schema:Person ; + sh:severity sh:Violation ; + sh:message "Each CreateAction agent MUST be typed as schema:Person." ; + ] ; + + # If any agent affiliation exists, it MUST be an Organization (IRI) + sh:property [ + a sh:PropertyShape ; + sh:name "Affiliation is an Organization" ; + sh:path ( schema:agent schema:affiliation ) ; + sh:class schema:Organization ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "The affiliation of a CreateAction's agent MUST be a contextual entity with type schema:Organization." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl new file mode 100644 index 0000000..c4f2ddb --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/3_timestamp_format.ttl @@ -0,0 +1,54 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +# to ensure the entity id will be included in any error message, +# target all entities which have startTime and/or endTime properties using sh:targetSubjectsOf, +# then we use sh:property to validate the values of those properties. +# the properties are listed individually so that the property id appears in any error message too +five-safes-crate:TimeStampFormat + a sh:NodeShape ; + sh:name "Timestamp Format" ; + sh:description "Timestamps MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ; + sh:targetSubjectsOf schema:startTime, schema:endTime; + sh:property [ + a sh:PropertyShape ; + sh:name "End TimeStamp" ; + sh:path schema:endTime ; + sh:minCount 0 ; + sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ; + sh:severity sh:Violation ; + sh:message "All `startTime` and `endTime` values MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ; + sh:description "End timestamps MUST follow the RFC 3339 standard." ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Start TimeStamp" ; + sh:path schema:startTime ; + sh:minCount 0 ; + sh:pattern "^[0-9]{4}-[0-9]{2}-[0-9]{2}[Tt][0-9]{2}:[0-9]{2}:[0-9]{2}([.|,][0-9]+)?(Z|z|[+-][0-9]{2}:[0-9]{2})$" ; + sh:severity sh:Violation ; + sh:message "All `startTime` and `endTime` values MUST follow the RFC 3339 standard (YYYY-MM-DD'T'hh:mm:ss[.fraction](Z | ±hh:mm))." ; + sh:description "Start timestamps MUST follow the RFC 3339 standard." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl new file mode 100644 index 0000000..2b61a25 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/4_sign_off.ttl @@ -0,0 +1,89 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + +five-safes-crate:SignOffObjectActionAndName + a sh:NodeShape ; + sh:name "SignOff" ; + sh:description "Sign Off phase" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff . + } + """ ; + ] ; + + sh:property [ + sh:path schema:name ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "Sign Off phase MUST have a human-readable name string." ; + ] ; + + sh:property [ + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "Sign Off phase MUST be a `schema:AssessAction`." ; + ] . + +five-safes-crate:SignOffObjectHasActionStatus + a sh:NodeShape ; + sh:name "SignOffStatus" ; + sh:description "Sign Off Phase Action Status" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "actionStatus" ; + sh:description "The value of actionStatus MUST be one of the allowed values." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "The value of actionStatus MUST be one of the allowed values: PotentialActionStatus; ActiveActionStatus; CompletedActionStatus; FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl new file mode 100644 index 0000000..6477742 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/6_workflow_reference.ttl @@ -0,0 +1,80 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + + +five-safes-crate:ReferenceToWorkflowCrate + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + + # RootDataEntity MUST have an mainEntity property + sh:property [ + a sh:PropertyShape ; + sh:name "mainEntity" ; + sh:path schema:mainEntity ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message "The RootDataEntity MUST have exactly one schema:mainEntity property that is an IRI." ; + ] ; + + # The mainEntity of a RootDataEntity MUST be a Dataset + sh:property [ + a sh:PropertyShape ; + sh:name "mainEntity" ; + sh:path schema:mainEntity ; + sh:class schema:Dataset ; + sh:severity sh:Violation ; + sh:message "The mainEntity pointed to by the RootDataEntity MUST be of type schema:Dataset" ; + ] . + +five-safes-crate:mainEntityHasProperConformsTo + a sh:NodeShape ; + sh:name "mainEntity" ; + sh:description "The mainEntity of the RootDataEntity MUST have a conformsTo property with an IRI starting with https://w3id.org/workflowhub/workflow-ro-crate" ; + sh:targetObjectsOf schema:mainEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "conformsTo" ; + sh:path purl:conformsTo ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Violation ; + sh:message "mainEntity MUST have one and only one `purl:conformsTo` property." ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "conformsTo" ; + sh:message "conformsTo IRI must start with https://w3id.org/workflowhub/workflow-ro-crate" ; + sh:select """ + PREFIX schema: + PREFIX purl: + SELECT $this WHERE { + $this purl:conformsTo ?iri . + FILTER(!STRSTARTS(STR(?iri), "https://w3id.org/workflowhub/workflow-ro-crate")) + } + """ ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl new file mode 100644 index 0000000..5679437 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/7_requested_workflow_run.ttl @@ -0,0 +1,86 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:RootDataEntityMentionsCreateAction + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "mentions" ; + sh:path schema:mentions; + sh:qualifiedValueShape [ + sh:class schema:CreateAction ; + ] ; + sh:qualifiedMinCount 1 ; + sh:severity sh:Violation ; + sh:message "`RootDataEntity` MUST reference at least one `CreateAction` through `mentions`" ; + ] . + + +five-safes-crate:CreateActionInstrumentAndStatus + a sh:NodeShape ; + sh:name "CreateAction" ; + sh:targetClass schema:CreateAction ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "instrument" ; + sh:path schema:instrument; + sh:minCount 1 ; + sh:severity sh:Violation ; + sh:message "`CreateAction` MUST have the `schema:instrument` property" ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "instrument" ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this ?main ?instrument + WHERE { + ?root schema:mainEntity ?main . + $this schema:instrument ?instrument . + FILTER (?instrument != ?main) + } + """ ; + sh:severity sh:Violation ; + sh:message "`CreateAction` --> `instrument` MUST reference the same entity as `Root Data Entity` --> `mainEntity`" ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:name "object" ; + sh:select """ + SELECT $this ?object + WHERE { + $this schema:object ?object . + FILTER NOT EXISTS { ?object a ?type . } + } + """ ; + sh:severity sh:Violation ; + sh:message "Each `object` in `CreateAction` MUST reference an existing entity." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl new file mode 100644 index 0000000..3c8ceca --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/must/8_disclosure_phase.ttl @@ -0,0 +1,100 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DisclosureObjectHasDescriptiveNameAndIsAssessAction + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "DisclosureCheck" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "AssessAction" ; + sh:description "`DisclosureCheck` MUST be a `schema:AssessAction`." ; + sh:path rdf:type ; + sh:minCount 1 ; + sh:hasValue schema:AssessAction; + sh:severity sh:Violation ; + sh:message "`DisclosureCheck` MUST be a `schema:AssessAction`." ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "name" ; + sh:description "`DisclosureCheck` MUST have a name string of at least 10 characters." ; + sh:minCount 1 ; + sh:path schema:name ; + sh:datatype xsd:string ; + sh:minLength 10 ; + sh:severity sh:Violation ; + sh:message "`DisclosureCheck` MUST have a name string of at least 10 characters." ; + ] . + + +five-safes-crate:DisclosureObjectHasActionStatusWithAcceptedValue + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?status . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "actionStatus" ; + sh:description "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + sh:path schema:actionStatus ; + sh:in ( + "http://schema.org/PotentialActionStatus" + "http://schema.org/ActiveActionStatus" + "http://schema.org/CompletedActionStatus" + "http://schema.org/FailedActionStatus" + ) ; + sh:severity sh:Violation ; + sh:message "`DisclosureCheck` MUST have an actionStatus with an allowed value (see https://schema.org/ActionStatusType)." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl new file mode 100644 index 0000000..f144e01 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/profile.ttl @@ -0,0 +1,83 @@ +# Copyright (c) 2024-2025 CRS4, University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + a prof:Profile ; + + # the Profile's label + rdfs:label "Five Safes RO-Crate 0.4" ; + + # regular metadata, a basic description of the Profile + rdfs:comment """Five Safes RO-Crate Metadata Specification 0.4"""@en ; + + # URI of the publisher of the Metadata Specification + dct:publisher ; + + # TODO: resolve failures when these profiles are applied + # This profile is an extension of Workflow Run Crate for use in Trusted Research Environments (TRE) + # prof:isProfileOf ; + + # This profile is a transitive profile of the RO-Crate Metadata Specification + prof:isTransitiveProfileOf ; + # TODO: resolve failures when these profiles are applied + # , + # ; + + # this profile has a JSON-LD context resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in JSON-LD format + dct:format ; + + # it conforms to JSON-LD, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Vocabulary" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Vocabulary ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # this profile has a human-readable documentation resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in HTML format + dct:format ; + + # it conforms to HTML, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Specification" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Specification ; + + # this profile resource's actual file + prof:hasArtifact ; + + # this profile is inherited from Workflow Run profile + prof:isInheritedFrom ; + ] ; + + # a short code to refer to the Profile with when a URI can't be used + prof:hasToken "five-safes-crate" ; +. diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl new file mode 100644 index 0000000..25f623a --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/10_outputs.ttl @@ -0,0 +1,89 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + + +five-safes-crate:CreateActionHasResultIfActionCompleted + a sh:NodeShape ; + sh:name "CreateAction" ; + sh:description "`CreateAction` with CompletedActionStatus SHOULD have the `schema:result` property." ; + + sh:target [ + a sh:SPARQLTarget ; + sh:name "Result" ; + sh:description "`CreateAction` with CompletedActionStatus SHOULD have the `schema:result` property." ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this WHERE { + ?this a schema:CreateAction ; + schema:actionStatus "http://schema.org/CompletedActionStatus" . + } + """ + ] ; + + + sh:property [ + a sh:PropertyShape ; + sh:name "Result" ; + sh:path schema:result ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "`CreateAction` with CompletedActionStatus SHOULD have the `schema:result` property." ; + ] . + + +five-safes-crate:CreateActionResultOutputsHaveAllowedTypes + a sh:NodeShape ; + sh:name "Output" ; + sh:description "Result SHOULD have a `@type` among an allowed set of values." ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + SELECT ?this + WHERE { + ?createAction a schema:CreateAction . + ?createAction schema:result ?this . + } + """ ; + ] ; + sh:message "Result SHOULD have a `@type` among an allowed set of values." ; + sh:severity sh:Warning ; + sh:or ( + [ + sh:class schema:MediaObject; + ] + [ + sh:class schema:Dataset; + ] + [ + sh:class schema:Collection; + ] + [ + sh:class schema:DigitalDocument; + ] + [ + sh:class schema:PropertyValue; + ] + ) . + diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl new file mode 100644 index 0000000..0c15231 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/11_workflow_execution_phase.ttl @@ -0,0 +1,85 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + + +five-safes-crate:RootDataEntityShouldMentionWorkflow + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:description "RootDataEntity SHOULD mention workflow execution object (typed CreateAction)." ; + sh:targetClass ro-crate:RootDataEntity ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + SELECT $this + WHERE { + + FILTER NOT EXISTS { + $this schema:mentions ?workflowExecution . + ?workflowExecution rdf:type schema:CreateAction . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "RootDataEntity SHOULD mention workflow execution object (typed CreateAction)." ; + ] . + + + +five-safes-crate:WorkflowexecutionObjectHasEndTimeIfEnded + a sh:NodeShape ; + sh:name "WorkflowExecution" ; + sh:description "The workflow execution object SHOULD have an endTime property if it has ended." ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:CreateAction ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "The workflow execution object SHOULD have an endTime property if it has ended." ; + sh:message "The workflow execution object SHOULD have an endTime property if it has ended." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl new file mode 100644 index 0000000..9944d8d --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/12_check_phase.ttl @@ -0,0 +1,190 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:RootDataEntityShouldMentionCheckValueObject + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:description "RootDataEntity SHOULD mention a check value object." ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS{ + $this schema:mentions ?action . + ?action schema:additionalType shp:CheckValue . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "RootDataEntity SHOULD mention a check value object." ; + ] . + + +five-safes-crate:CheckValueObjectShouldPointToRootDataEntity + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "object" ; + sh:path schema:object ; + sh:minCount 1 ; + sh:class ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + sh:message "`CheckValue` --> `object` SHOULD point to the root of the RO-Crate" ; + ] . + + +five-safes-crate:CheckValueInstrumentShouldPointToEntityTypedDefinedTerm + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "instrument" ; + sh:path schema:instrument ; + sh:minCount 1 ; + sh:class schema:DefinedTerm ; + sh:severity sh:Warning ; + sh:message "`CheckValue` --> `instrument` SHOULD point to an entity typed `schema:DefinedTerm`" ; + ] . + + +five-safes-crate:CheckValueAgentShouldIdentifyTheAgentWhoPerformnedTheCheck + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "agent" ; + sh:path schema:agent ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Warning ; + sh:message "`CheckValue` --> `agent` SHOULD reference the agent who initiated the check" ; + ] . + + +five-safes-crate:CheckValueShouldHaveEndTime + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:message "`CheckValue` SHOULD have the `endTime` property." ; + ] . + + +five-safes-crate:CheckValueShouldHaveActionStatus + a sh:NodeShape ; + sh:name "CheckValue" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:CheckValue . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "CheckValue SHOULD have actionStatus property." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl new file mode 100644 index 0000000..cb2b181 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/13_validation_phase.ttl @@ -0,0 +1,165 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:RootDataEntityShouldMentionValidationCheckObject + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "" ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:description "RootDataEntity SHOULD mention a ValidationCheck object." ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS{ + $this schema:mentions ?action . + ?action schema:additionalType shp:ValidationCheck . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "RootDataEntity SHOULD mention a ValidationCheck object." ; + ] . + + +five-safes-crate:ValidationCheckObjectShouldPointToRootDataEntity + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "object" ; + sh:path schema:object ; + sh:minCount 1 ; + sh:class ro-crate:RootDataEntity ; + sh:severity sh:Warning ; + sh:message "`ValidationCheck` --> `object` SHOULD point to the root of the RO-Crate" ; + ] . + +five-safes-crate:ValidationCheckInstrumentShouldPointToEntityWithSpecificId + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "instrument" ; + sh:description "" ; + sh:path schema:instrument ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:hasValue ; + sh:severity sh:Warning ; + sh:message "`ValidationCheck` --> `instrument` SHOULD point to an entity with @id https://w3id.org/5s-crate/0.4" ; + ] . + + +five-safes-crate:ValidationCheckShouldHaveActionStatus + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "ValidationCheck SHOULD have actionStatus property." ; + ] . + + +five-safes-crate:DownloadActionShouldHaveEndTimeIfBegun + a sh:NodeShape ; + sh:name "ValidationCheck" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:ValidationCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "ValidationCheck SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + sh:message "ValidationCheck SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl new file mode 100644 index 0000000..f2bd9dd --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/14_workflow_retrieval_phase.ttl @@ -0,0 +1,125 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DownloadActionEntitySHOULDExist + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:description "" ; + sh:targetClass ro-crate:RootDataEntity ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this + WHERE { + FILTER NOT EXISTS { + ?s rdf:type schema:DownloadAction . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "An entity typed DownloadAction SHOULD exist." ; + ] . + + + +five-safes-crate:RootDataEntitySHOULDMentionDownloadActionIfPresent + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:description "" ; + sh:targetClass ro-crate:RootDataEntity ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "" ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT $this ?da + WHERE { + ?da rdf:type schema:DownloadAction . + FILTER NOT EXISTS { + $this schema:mentions ?da . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "RootDataEntity SHOULD mention DownloadAction if this exists." ; + ] . + + +five-safes-crate:DownloadActionShouldHaveEndTimeIfEnded + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX rdf: + + SELECT ?this + WHERE { + ?this rdf:type schema:DownloadAction ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:message "`DownloadAction` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + ] . + + +five-safes-crate:DownloadActionShouldHaveActionStatus + a sh:NodeShape ; + sh:name "DownloadAction" ; + sh:targetClass schema:DownloadAction ; + sh:description "" ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "`DownloadAction` SHOULD have `actionStatus` property." ; + ] . + diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl new file mode 100644 index 0000000..332c67d --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_requesting_agent.ttl @@ -0,0 +1,46 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:AgentIsMemberOf + a sh:NodeShape ; + sh:name "Requesting Agent" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + } + """ + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "memberOf" ; + sh:path schema:memberOf; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message """The Requesting Agent SHOULD have a `memberOf` property.""" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl new file mode 100644 index 0000000..28d1c1e --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/1_responsible_project.ttl @@ -0,0 +1,60 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:ResponsibleProjectMemberAndSourceOrganizationIntersection + a sh:NodeShape ; + sh:name "Organizations (members of Responsible Project)" ; + sh:description """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + ?this a schema:Person ; + schema:memberOf ?project ; + schema:affiliation ?someAffiliation . + ?project schema:member ?org2 . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "Intersection with agent affiliations" ; + sh:description """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ; + + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this WHERE { + FILTER NOT EXISTS { + $this schema:affiliation ?org . + $this schema:memberOf ?project . + ?project schema:member ?org . + } + } + """ ; + sh:severity sh:Warning ; + sh:message """At least one of the organisations that are members of the responsible project SHOULD be included in the Requesting Agent's affiliations, if such properties exist.""" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl new file mode 100644 index 0000000..c21b3f9 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/2_requesting_agent.ttl @@ -0,0 +1,49 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +# Person who is the agent of a CreateAction SHOULD have an affiliation +five-safes-crate:PersonAgentHasAffiliation + a sh:NodeShape ; + sh:name "Agent of CreateAction" ; + sh:description "The agent of a CreateAction entity" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT DISTINCT ?this WHERE { + ?action a schema:CreateAction ; + schema:agent ?this . + } + """ + ] ; + + # The agent of a CreateAction entity SHOULD have an affiliation + sh:property [ + a sh:PropertyShape ; + sh:name "Presence of affiliations" ; + sh:path schema:affiliation ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The agent of a CreateAction entity SHOULD have an affiliation" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl new file mode 100644 index 0000000..129c02d --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/4_sign_off.ttl @@ -0,0 +1,177 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . +@prefix shp: . + + +# There SHOULD be a Sign-Off Phase +five-safes-crate:SignOffPhase + a sh:NodeShape ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "Check the Sign-Off Phase" ; + sh:sparql [ + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS { + ?action schema:additionalType shp:SignOff . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "There SHOULD be a Sign-Off Phase in the Final RO-Crate" ; + ] ; + sh:sparql [ + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + ?action schema:additionalType shp:SignOff . + FILTER NOT EXISTS { + $this schema:mentions ?action . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "The Root Data Entity SHOULD mention a Sign-Off Phase Object" ; + ] . + + +five-safes-crate:SignOffPhaseProperties + a sh:NodeShape ; + sh:description "Check Sign-Off Phase Properties" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff . + } + """ + ] ; + sh:property [ + sh:description "Check if the Sign Off phase has an actionStatus" ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an actionStatus" ; + ] ; + sh:property [ + sh:description "Check if the Sign Off phase has an agent" ; + sh:path schema:agent ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an agent" ; + ] ; + sh:property [ + sh:description "Check if the Sign Off phase has an instrument (TRE Policy)" ; + sh:path schema:instrument ; + sh:class schema:CreativeWork ; + sh:nodeKind sh:IRI; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an TRE policy (instrument) with type CreativeWork" ; + ] ; + sh:property [ + sh:description "Check if the Sign Off phase has an instrument (TRE Policy)" ; + sh:path ( schema:instrument schema:name ) ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD have an TRE policy (instrument) with a human-readable name" ; + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:description "Check if the Sign Off phase lists the workflow as an object" ; + sh:select """ + PREFIX schema: + PREFIX rocrate: + SELECT $this + WHERE { + ?root a schema:Dataset ; + schema:mainEntity ?mainEntity ; + rdf:type rocrate:RootDataEntity . + FILTER NOT EXISTS { + $this schema:object ?mainEntity . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD list the workflow (mainEntity) as an object" ; + ]; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:description "Check if the Sign Off phase lists the Responsible Project as an object" ; + sh:select """ + PREFIX schema: + PREFIX rocrate: + SELECT $this + WHERE { + ?root a schema:Dataset ; + rdf:type rocrate:RootDataEntity ; + schema:sourceOrganization ?sourceOrg . + FILTER NOT EXISTS { + $this schema:object ?sourceOrg . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "The Sign-Off Phase SHOULD list the Responsible Project (sourceOrganization) as an object" ; + ]. + + +five-safes-crate:SignOffPhaseEndTime + a sh:NodeShape ; + sh:description "Sign Off end time check" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT ?this + WHERE { + ?this schema:additionalType shp:SignOff ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "Sign Off object SHOULD have endTime property if action completed or failed." ; + sh:message "Sign Off object SHOULD have endTime property if action completed or failed." ; + ] . diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl new file mode 100644 index 0000000..98c2856 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/6_workflow_reference.ttl @@ -0,0 +1,49 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:DatasetMustHaveDistributionIfURI + a sh:NodeShape ; + sh:name "mainEntity" ; + sh:targetObjectsOf schema:mainEntity ; + sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "distribution" ; + sh:description "If mainEntity has an HTTP(S) @id, it SHOULD have a distribution that is an HTTP(S) URL." ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this + WHERE { + FILTER (STRSTARTS(STR($this), "http://") || STRSTARTS(STR($this), "https://")) . + FILTER NOT EXISTS { + $this schema:distribution ?dist . + FILTER (STRSTARTS(STR(?dist), "http://") || STRSTARTS(STR(?dist), "https://")) . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "If mainEntity has an HTTP(S) @id SHOULD have at least one distribution with an HTTP(S) URL." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl new file mode 100644 index 0000000..41af739 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/7_requested_workflow_run.ttl @@ -0,0 +1,36 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +# CreateAction SHOULD have object property with minimum cardinality 1 +five-safes-crate:CreateActionShouldHaveObjectProperty + a sh:NodeShape ; + sh:targetClass schema:CreateAction ; + sh:name "CreateAction" ; + sh:property [ + sh:path schema:object ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:severity sh:Warning ; + sh:message "`CreateAction` SHOULD have the property `object` with IRI values." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl new file mode 100644 index 0000000..1d46625 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/8_disclosure_phase.ttl @@ -0,0 +1,114 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +five-safes-crate:RootDataEntityShouldMentionDisclosureObject + a sh:NodeShape ; + sh:name "RootDataEntity" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:description "RootDataEntity SHOULD mention a disclosure object." ; + + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "mentions" ; + sh:description "`RootDataEntity` SHOULD mention a disclosure object." ; + sh:select """ + PREFIX schema: + PREFIX shp: + SELECT $this + WHERE { + FILTER NOT EXISTS{ + $this schema:mentions ?action . + ?action a schema:AssessAction ; + schema:additionalType shp:DisclosureCheck . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "`RootDataEntity` SHOULD mention a disclosure object." ; + ] . + + +five-safes-crate:DisclosureObjectHasActionStatus + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "The `DisclosureCheck` SHOULD have `actionStatus` property." ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck . + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "ActionStatus" ; + sh:description "The `DisclosureCheck` SHOULD have `actionStatus` property." ; + sh:path schema:actionStatus ; + sh:minCount 1 ; + sh:severity sh:Warning ; + sh:message "The `DisclosureCheck` SHOULD have `actionStatus` property." ; + ] . + + +five-safes-crate:DisclosureObjectHasEndTimeIfcompletedOrFailed + a sh:NodeShape ; + sh:name "DisclosureCheck" ; + sh:description "DisclosureCheck" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + PREFIX schema: + PREFIX shp: + + SELECT ?this + WHERE { + ?this schema:additionalType shp:DisclosureCheck ; + schema:actionStatus ?status . + FILTER(?status IN ( + "http://schema.org/CompletedActionStatus", + "http://schema.org/FailedActionStatus" + )) + } + """ ; + ] ; + + sh:property [ + a sh:PropertyShape ; + sh:name "EndTime" ; + sh:path schema:endTime ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:severity sh:Warning ; + sh:description "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + sh:message "`DisclosureCheck` SHOULD have the `endTime` property if `actionStatus` is either CompletedActionStatus or FailedActionStatus." ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl b/tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl new file mode 100644 index 0000000..5726109 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/five-safes-crate/should/9_inputs.ttl @@ -0,0 +1,58 @@ +# Copyright (c) 2025 eScience Lab, The University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix five-safes-crate: . +@prefix rdf: . +@prefix schema: . +@prefix bioschemas: . +@prefix purl: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + + +five-safes-crate:InputEntityReferencesFormalParameterViaExampleOfWork + a sh:NodeShape ; + sh:name "Input" ; + sh:description "" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this WHERE { + ?action a schema:CreateAction ; + schema:object ?this . + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:name "exampleOfWork" ; + sh:description "Input SHOULD reference a FormalParameter using exampleOfWork" ; + + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT $this WHERE { + FILTER NOT EXISTS { + $this schema:exampleOfWork ?par . + ?par a bioschemas:FormalParameter . + } + } + """ ; + sh:severity sh:Warning ; + sh:message "Input SHOULD reference a FormalParameter using exampleOfWork" ; + ] . \ No newline at end of file diff --git a/tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl new file mode 100644 index 0000000..4dfd242 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/may/4_data_entity_metadata.ttl @@ -0,0 +1,89 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix sh: . +@prefix xsd: . +@prefix owl: . +@prefix schema: . +@prefix validator: . + +ro-crate:FileDataEntityWebOptionalProperties a sh:NodeShape ; + sh:name "File Data Entity with web presence: OPTIONAL properties" ; + sh:description """A File Data Entity which have a corresponding web presence, + for instance a landing page that describes the file, including persistence identifiers (e.g. DOI), + resolving to an intermediate HTML page instead of the downloadable file directly. + These can included for File Data Entities as additional metadata by using the properties: + `identifier`, `url`, `subjectOf`and `mainEntityOfPage`""" ; + sh:targetClass ro-crate:File ; + # Check if the Web-based Data Entity has a contentSize property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional formal `identifier` (e.g. DOI)" ; + sh:description """Check if the File Data Entity has a formal identifier string such as a DOI""" ; + sh:path schema:identifier ; + sh:datatype xsd:anyURI ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY have a formal identifier specified through an `identifier` property""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional `url` property" ; + sh:description """Check if the File Data Entity has an optional `download` link""" ; + sh:path schema:url ; + sh:datatype xsd:anyURI ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY use a `url` property to denote a `download` link""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional `subjectOf` property" ; + sh:description """Check if the File Data Entity includes a `subjectOf` property to link `CreativeWork` instances that mention it.""" ; + sh:path schema:subjectOf ; + sh:class schema:WebPage, schema:CreativeWork ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY include a `subjectOf` property to link `CreativeWork` instances that mention it.""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "File Data Entity: optional `mainEntityOfPage` property" ; + sh:description """Check if the File Data Entity has a `mainEntityOfPage` property""" ; + sh:path schema:mainEntityOfPage ; + sh:class schema:WebPage, schema:CreativeWork ; + sh:severity sh:Info ; + sh:message """The File Data Entity MAY have a `mainEntityOfPage` property""" ; + ] . + + +ro-crate:DirectoryDataEntityWebOptionalDistribution a sh:NodeShape ; + sh:name "Directory Data Entity: OPTIONAL `distribution` property" ; + sh:description """A Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ; + sh:targetClass ro-crate:File ; + # Check if the Web-based Data Entity has a contentSize property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "Directory Data Entity: optional `distribution` property" ; + sh:description """Check if the Directory Data Entity has a `distribution` property""" ; + sh:path schema:distribution ; + sh:datatype xsd:anyURI ; + sh:severity sh:Info ; + sh:message """The Directory Data Entity MAY have a `distribution` property to denote the distribution of the files within the directory""" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl b/tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl new file mode 100644 index 0000000..202af89 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/may/61_license_entity.ttl @@ -0,0 +1,66 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix xsd: . + + +ro-crate:LicenseOptionalAllowedValues a sh:NodeShape ; + sh:name "Root Data Entity: optional properties" ; + sh:description """Define the optional properties for the Root Data Entity (e.g., license)""" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "License" ; + sh:description """Check if the RO-Crate has a license property with a URI or a textual description""" ; + sh:message """MAY have a URI (eg for Creative Commons or Open Source licenses). + MAY, if necessary be a textual description of how the RO-Crate may be used.""" ; + sh:minCount 1 ; + sh:nodeKind sh:IRIOrLiteral ; + sh:path schema_org:license ; + sh:or ( + [ sh:dataType xsd:string ] + [ sh:dataType xsd:anyURI ] + ) ; + ]. + +ro-crate:LicenseDefinition a sh:NodeShape ; + sh:name "License definition" ; + sh:description """Contextual entity representing a license with a name and description."""; + sh:targetClass schema_org:license ; + sh:property [ + a sh:PropertyShape ; + sh:name "License name" ; + sh:description "The license MAY have a name" ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:name ; + sh:message "Missing license name" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "License description" ; + sh:description """The license MAY have a description""" ; + sh:maxCount 1; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:description ; + sh:message "Missing license description" ; + ] . + diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py b/tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py new file mode 100644 index 0000000..fa02f64 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/0_file_descriptor_format.py @@ -0,0 +1,401 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) +from rocrate_validator.utils.http import HttpRequester + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="File Descriptor existence") +class FileDescriptorExistence(PyFunctionCheck): + """The file descriptor MUST be present in the RO-Crate and MUST not be empty.""" + + @check(name="File Descriptor Existence") + def test_existence(self, context: ValidationContext) -> bool: + """ + Check if the file descriptor is present in the RO-Crate + """ + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + if not context.ro_crate.has_descriptor(): + message = f'file descriptor "{context.rel_fd_path}" is not present' + context.result.add_issue(message, self) + return False + return True + + @check(name="File Descriptor size check") + def test_size(self, context: ValidationContext) -> bool: + """ + Check if the file descriptor is not empty + """ + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + if not context.ro_crate.has_descriptor(): + message = f'file descriptor {context.rel_fd_path} is empty' + context.result.add_issue(message, self) + return False + if context.ro_crate.metadata.size == 0: + context.result.add_issue(f'RO-Crate "{context.rel_fd_path}" file descriptor is empty', self) + return False + return True + + +@requirement(name="File Descriptor JSON format") +class FileDescriptorJsonFormat(PyFunctionCheck): + """ + The file descriptor MUST be a valid JSON file + """ + @check(name="File Descriptor JSON format") + def check(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is in the correct format""" + try: + logger.debug("Checking validity of JSON file at %s", context.ro_crate.metadata) + context.ro_crate.metadata.as_dict() + return True + except Exception as e: + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" is not in the correct format', self) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + +@requirement(name="File Descriptor JSON-LD format") +class FileDescriptorJsonLdFormat(PyFunctionCheck): + """ + The file descriptor MUST be a valid JSON-LD file + """ + + def __check_remote_context__(self, context_uri: str) -> bool: + # Try to retrieve the context + try: + raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"}) + if raw_data.status_code != 200: + raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'", self) + logger.debug(f"Retrieved context from {context_uri}") + + # Try to parse the JSON-LD and access the context + jsonLD = raw_data.json()["@context"] + assert isinstance(jsonLD, dict) + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + def __check_contexts__(self, context: ValidationContext, jsonld_context: object) -> bool: + """ Get the keys of the context URI """ + is_valid = True + # if the context is a string, check if it is a valid URI + if isinstance(jsonld_context, str): + if not self.__check_remote_context__(jsonld_context): + context.result.add_issue( + f'Unable to retrieve the JSON-LD context "{jsonld_context}"', self) + is_valid = False + + # if the context is a dictionary, get the keys of the dictionary + if isinstance(jsonld_context, dict): + logger.debug(f"Detected dictionary context: {jsonld_context}") + + # if the context is a list of contexts, get the keys of each context + if isinstance(jsonld_context, list): + for ctx in jsonld_context: + if not self.__check_contexts__(context, ctx): + is_valid = False + # return if the context is valid + return is_valid + + @check(name="File Descriptor @context property validation") + def check_context(self, context: ValidationContext) -> bool: + """ Check if the file descriptor contains + the @context property and it is a valid JSON-LD context + """ + try: + json_dict = context.ro_crate.metadata.as_dict() + if "@context" not in json_dict: + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" ' + "does not contain a context", self) + return False + + # Check if the context is valid + return self.__check_contexts__(context, json_dict["@context"]) + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="File Descriptor JSON-LD must be flattened") + def check_flattened(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is flattened """ + + def is_entity_flat_recursive(entity: Any, is_first: bool = True, fail_fast: bool = False) -> bool: + """ Recursively check if the given data corresponds to a flattened JSON-LD object + and returns False if it does not and is not a root element + """ + result = True + if isinstance(entity, dict): + if is_first: + for _, elem in entity.items(): + if not is_entity_flat_recursive(elem, is_first=False, fail_fast=fail_fast): + result = False + if fail_fast: + return False + # if this is not the root element, it must not contain more properties than @id + else: + if "@id" in entity and "@value" in entity: + # add issue if both @id and @value are present + context.result.add_issue( + ( + f'entity "{entity.get("@id", entity)}" contains both @id and @value: ' + 'an object with an @value represents a value object, which is a literal value such as ' + 'a string, number, date, or language-tagged string. This object is not an identifiable ' + 'resource, but a simple literal value.' + ), + self + ) + result = False + if fail_fast: + return False + + # Handle value objects + if "@value" in entity: + # Inline the checks from is_value_object and add issues for each violation + if not isinstance(entity, dict): + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'it MUST be a dictionary.', + self + ) + result = False + if fail_fast: + return False + + has_language = "@language" in entity + has_type = "@type" in entity + + if has_language and has_type: + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + '@language and @type cannot coexist.', + self + ) + result = False + if fail_fast: + return False + + if has_language and not isinstance(entity["@value"], str): + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid value object: ' + 'if @language is present, @value must be a string.', + self + ) + result = False + if fail_fast: + return False + # Handle node objects: + # every remaining entity with len(entity) > 1 must be a node object + elif "@id" not in entity or len(entity) > 1: + context.result.add_issue( + f'entity "{entity.get("@id", entity)}" is not a valid node object reference: ' + 'it MUST have only @id, but no other properties.', + self + ) + result = False + if fail_fast: + return False + if isinstance(entity, list): + for element in entity: + if not is_entity_flat_recursive(element, is_first=False, fail_fast=fail_fast): + result = False + if fail_fast: + return False + return result + + try: + fail_fast = context.settings.abort_on_first + json_dict = context.ro_crate.metadata.as_dict() + result = True + for entity in json_dict["@graph"]: + if not is_entity_flat_recursive(entity, fail_fast=fail_fast): + context.result.add_issue( + f'RO-Crate file descriptor "{context.rel_fd_path}" ' + f'is not fully flattened at entity "{entity.get("@id", entity)}"', self) + result = False + if fail_fast: + return False + return result + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of the @id property of the file descriptor entities") + def check_identifiers(self, context: ValidationContext) -> bool: + """ Check if the file descriptor entities have the @id property """ + try: + json_dict = context.ro_crate.metadata.as_dict() + for entity in json_dict["@graph"]: + if "@id" not in entity: + context.result.add_issue( + f"Entity \"{entity.get('name', None) or entity}\" " + f"of RO-Crate \"{context.rel_fd_path}\" " + "file descriptor does not contain the @id attribute", self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + @check(name="Validation of the @type property of the file descriptor entities") + def check_types(self, context: ValidationContext) -> bool: + """ Check if the file descriptor entities have the @type property """ + try: + json_dict = context.ro_crate.metadata.as_dict() + for entity in json_dict["@graph"]: + if "@type" not in entity: + context.result.add_issue( + f"Entity \"{entity.get('name', None) or entity}\" " + f"of RO-Crate \"{context.rel_fd_path}\" " + "file descriptor does not contain the @type attribute", self) + return False + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + return False + + def __get_context_keys__(self, context: object) -> set: + """ Get the keys of the context URI """ + if isinstance(context, str): + return self.__get_remote_context_keys__(context) + + # if the context is a dictionary, get the keys of the dictionary + if isinstance(context, dict): + return set(context.keys()) + + # if the context is a list of contexts, get the keys of each context + if isinstance(context, list): + keys = set() + for ctx in context: + keys.update(self.__get_context_keys__(ctx)) + return keys + + def __get_remote_context_keys__(self, context_uri: str) -> set: + """ Get the keys of the context URI """ + + logger.debug(f"Retrieving context from {context_uri}...") + # Try to retrieve the context + raw_data = HttpRequester().get(context_uri, headers={"Accept": "application/ld+json"}) + if raw_data.status_code != 200: + raise RuntimeError(f"Unable to retrieve the JSON-LD context '{context_uri}'") + + logger.debug(f"Retrieved context from {context_uri}") + + # Get the keys of the context + jsonLD = raw_data.json() + jsonLD_ctx = jsonLD["@context"] + if not isinstance(jsonLD_ctx, dict): + raise RuntimeError("The context is not a dictionary", self) + return set(jsonLD_ctx.keys()) + + def __check_entity_keys__(self, entity: dict, + context_keys: set, + unexpected_keys: dict[str, int] = None) -> dict[str, int]: + """ Check if the entity is in the correct format """ + + def add_unexpected_key(k: str, u_keys: dict) -> None: + """ Add a key to the unexpected keys dictionary """ + u_keys[k] = u_keys.get(k, 0) + 1 + + # Keys that should be skipped + SKIP_KEYS = {"@id", "@type", "@context", "@value", "@language"} + + # Ensure unexpected_keys is initialized + if unexpected_keys is None: + unexpected_keys = {} + + # If the entity is a dictionary, check each key + if isinstance(entity, dict): + for k, v in entity.items(): + if k not in context_keys and k not in SKIP_KEYS: + logger.debug(f"Key {k} not in context keys") + add_unexpected_key(k, unexpected_keys) + if isinstance(v, (dict, list)): + self.__check_entity_keys__(v, context_keys, unexpected_keys) + + # If the entity is a list, check each element + elif isinstance(entity, list): + for elem in entity: + self.__check_entity_keys__(elem, context_keys, unexpected_keys) + + return unexpected_keys + + @check(name="Validation of the compaction format of the file descriptor") + def check_compaction(self, context: ValidationContext) -> bool: + """ Check if the file descriptor is in the **compacted** JSON-LD format """ + try: + logger.debug("Checking compaction format of JSON-LD file at %s", context.ro_crate.metadata) + json_dict = context.ro_crate.metadata.as_dict() + logger.debug(f"JSONLD keys:{json_dict.keys()}") + + jsonld_context = json_dict.get("@context", None) + logger.debug(f"Context: {jsonld_context}") + + try: + context_keys = self.__get_context_keys__(jsonld_context) + logger.debug(f"{context_keys}") + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + context.result.add_issue(str(e), self) + return False + + unexpected_keys = self.__check_entity_keys__(json_dict.get("@graph"), context_keys) + logger.debug(f"Unexpected keys: {unexpected_keys}") + if len(unexpected_keys) > 0: + for k, v in unexpected_keys.items(): + logger.debug(f"Key {k} appears {v} times") + # Add the correct suffix to the message + suffix = "s" if v > 1 else "" + # Check if k is a term or a URI + if k.startswith("http"): + context.result.add_issue( + f'The The {v} occurrence{suffix} of the "{k}" URI cannot be used as a key{suffix} "' + 'because the compacted format requires simple terms as keys ' + '(see https://www.w3.org/TR/json-ld-api/#compaction for more details).', self) + else: + context.result.add_issue( + f'The {v} occurrence{suffix} of the JSON-LD key "{k}" ' + f'{"is" if v == 1 else "are"} not allowed in the compacted format ' + 'because it is not present in the @context of the document', self) + return False + + return True + except Exception as e: + if logger.isEnabledFor(logging.DEBUG): + logger.exception(e) + context.result.add_issue( + f'Unexpected error: {e}', self) + return False diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl new file mode 100644 index 0000000..32d2a63 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/1_file-descriptor_metadata.ttl @@ -0,0 +1,100 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix dct: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix validator: . + + +ro-crate:FindROCrateMetadataFileDescriptorEntity a sh:NodeShape, validator:HiddenShape; + sh:name "Identify the RO-Crate Metadata File Descriptor" ; + sh:description """The RO-Crate Metadata File Descriptor entity describes the RO-Crate itself, and it is named as `ro-crate-metadata.json`. + It can be identified by name according to the RO-Crate specification + available at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores).""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:CreativeWork ; + FILTER(contains(str(?this), "ro-crate-metadata.json")) + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:ROCrateMetadataFileDescriptor ; + ] . + +ro-crate:ROCrateMetadataFileDescriptorExistence + a sh:NodeShape ; + sh:name "RO-Crate Metadata File Descriptor entity existence" ; + sh:description "The RO-Crate JSON-LD MUST contain a Metadata File Descriptor entity named `ro-crate-metadata.json` and typed as `schema:CreativeWork`" ; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "RO-Crate Metadata File Descriptor entity existence" ; + sh:description """Check if the RO-Crate Metadata File Descriptor entity exists, + i.e., if there exists an entity with @id `ro-crate-metadata.json` and type `schema:CreativeWork`""" ; + sh:path rdf:type ; + sh:hasValue ro-crate:ROCrateMetadataFileDescriptor ; + sh:minCount 1 ; + sh:message "The root of the document MUST have an entity with @id `ro-crate-metadata.json`" ; + ] . + +ro-crate:ROCrateMetadataFileDescriptorRecommendedProperties a sh:NodeShape ; + sh:name "RO-Crate Metadata File Descriptor REQUIRED properties" ; + sh:description """RO-Crate Metadata Descriptor MUST be defined + according with the requirements details defined in + [RO-Crate Metadata File Descriptor](https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#ro-crate-metadata-file-descriptor)"""; + sh:targetNode ro:ro-crate-metadata.json ; + sh:property [ + a sh:PropertyShape ; + sh:name "Metadata File Descriptor entity type" ; + sh:description "Check if the RO-Crate Metadata File Descriptor has `@type` CreativeWork, as per schema.org" ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path rdf:type ; + sh:hasValue schema_org:CreativeWork ; + sh:message "The RO-Crate metadata file MUST be a CreativeWork, as per schema.org" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Metadata File Descriptor entity: `about` property" ; + sh:description """Check if the RO-Crate Metadata File Descriptor has an `about` property referencing the Root Data Entity""" ; + sh:maxCount 1; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path schema_org:about ; + sh:class schema_org:Dataset ; + sh:message "The RO-Crate metadata file descriptor MUST have an `about` property referencing the Root Data Entity" ; + ] ; + sh:property ro-crate:conformsToROCrateSpec . + +ro-crate:conformsToROCrateSpec sh:name "Metadata File Descriptor entity: `conformsTo` property" ; + sh:description """Check if the RO-Crate Metadata File Descriptor has a `conformsTo` property which points to the RO-Crate specification version""" ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path dct:conformsTo ; + sh:hasValue ; + sh:message "The RO-Crate metadata file descriptor MUST have a `conformsTo` property with the RO-Crate specification version" . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl new file mode 100644 index 0000000..2c2efcf --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/2_root_data_entity_metadata.ttl @@ -0,0 +1,175 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix validator: . +@prefix xsd: . + + +ro-crate:RootDataEntityType + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity type" ; + sh:description "The Root Data Entity MUST be a `Dataset` (as per `schema.org`)" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity type" ; + sh:description "Check if the Root Data Entity is a `Dataset` (as per `schema.org`)" ; + sh:path rdf:type ; + sh:hasValue schema_org:Dataset ; + sh:minCount 1 ; + sh:message """The Root Data Entity MUST be a `Dataset` (as per `schema.org`)""" ; + ] ; + # Validate that if the publisher is specified, it is an Organization or a Person + sh:property [ + sh:path schema_org:publisher ; + sh:severity sh:Violation ; + sh:name "Root Data Entity: `publisher` property" ; + sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization` or `Person`.""" ; + sh:or ( + [ sh:class schema_org:Organization ] + [ sh:class schema_org:Person ] + ) ; + sh:message """The Root Data Entity MUST have a `publisher` property of type `Organization` or `Person`.""" ; + ] . + + +ro-crate:FindRootDataEntity a sh:NodeShape, validator:HiddenShape; + sh:name "Identify the Root Data Entity of the RO-Crate" ; + sh:description """The Root Data Entity is the top-level Data Entity in the RO-Crate and serves as the starting point for the description of the RO-Crate. + It is a schema:Dataset and is indirectly identified by the about property of the resource ro-crate-metadata.json in the RO-Crate + (see the definition at [Finding RO-Crate Root in RDF triple stores](https://www.researchobject.org/ro-crate/1.1/appendix/relative-uris.html#finding-ro-crate-root-in-rdf-triple-stores)). + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?this . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:RootDataEntity ; + ] . + + +ro-crate:RootDataEntityValueRestriction + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity value restriction" ; + sh:description "The Root Data Entity MUST end with `/`" ; + sh:targetNode ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity URI value" ; + sh:description "Check if the Root Data Entity URI ends with `/`" ; + sh:path [ sh:inversePath rdf:type ] ; + sh:minCount 1 ; + sh:message """The Root Data Entity URI MUST end with `/`""" ; + sh:pattern "/$" ; + ] . + +ro-crate:RootDataEntityRequiredProperties + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity REQUIRED properties" ; + sh:description "The Root Data Entity MUST have a `name`, `description`, `license` and `datePublished`" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `name` property" ; + sh:description """Check if the Root Data Entity includes a `name` (as specified by schema.org) + to clearly identify the dataset and distinguish it from other datasets.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:name; + sh:message "The Root Data Entity MUST have a `name` property (as specified by schema.org)" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `description` property" ; + sh:description """Check if the Root Data Entity includes a `description` (as specified by schema.org) + to provide a human-readable description of the dataset.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:description; + sh:message "The Root Data Entity MUST have a `description` property (as specified by schema.org)" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `license` property" ; + sh:description """Check if the Root Data Entity includes a `license` property (as specified by schema.org) + to provide information about the license of the dataset.""" ; + sh:path schema_org:license; + sh:minCount 1 ; + sh:or ( + [ sh:class ro-crate:ContextualEntity ] + [ sh:dataType xsd:anyURI ] + [ sh:datatype xsd:string ] + ) ; + sh:message """The Root Data Entity MUST have a `license` property (as specified by schema.org). + SHOULD link to a Contextual Entity in the RO-Crate Metadata File with a name and description. + MAY have a URI (eg for Creative Commons or Open Source licenses). + MAY, if necessary be a textual description of how the RO-Crate may be used.""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `datePublished` property" ; + sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org) + to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:datePublished ; + sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$" ; + sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date" ; + ] . + +ro-crate:RootDataEntityHasPartValueRestriction + a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ; + sh:description "The Root Data Entity MUST be linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "RO-Crate Root Data Entity: `hasPart` value restriction" ; + sh:description "Check if the Root Data Entity is linked to the declared `File`, `Directory` and other types of instances through the `hasPart` property" ; + sh:path schema_org:hasPart ; + sh:or ( + [ sh:class ro-crate:File ] + [ sh:class ro-crate:Directory ] + [ sh:class ro-crate:GenericDataEntity ] + ) ; + sh:message """The Root Data Entity MUST be linked to either File or Directory instances, nothing else""" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py new file mode 100644 index 0000000..1abcbcf --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.py @@ -0,0 +1,75 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Data Entity: REQUIRED resource availability") +class DataEntityRequiredChecker(PyFunctionCheck): + """ + Resources corresponding to local Data Entities MUST be present in the RO-Crate payload + """ + + @check(name="Data Entity: REQUIRED resource availability") + def check_availability(self, context: ValidationContext) -> bool: + """ + Check the presence of the Data Entity in the RO-Crate + """ + # Skip the check in metadata-only mode + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + # Perform the check + result = True + for entity in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True): + assert entity.id is not None, "Entity has no @id" + logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) + try: + logger.debug("Ensure the presence of the Data Entity '%s' within the RO-Crate", entity.id) + if entity.has_local_identifier(): + logger.debug( + "Ignoring the Data Entity '%s' as it is a local entity with a local identifier. " + "According to the RO-Crate specification, local entities with local identifiers " + "are not required to be included in the RO-Crate payload" + "(see https://github.com/ResearchObject/ro-crate/issues/400#issuecomment-2779152885 and " + "https://github.com/ResearchObject/ro-crate/pull/426 for more details)", + entity.id) + continue + if not entity.has_relative_path(): + logger.debug( + "Ignoring the Data Entity '%s' as it is a local entity with an absolute path. " + "According to the RO-Crate specification, local entities with absolute paths " + "are not required to be included in the RO-Crate payload. " + "It is only recommended that they exist at the time of RO-Crate creation.", + entity.id) + continue + if not entity.is_available(): + context.result.add_issue( + f"The RO-Crate does not include the Data Entity '{entity.id}' as part of its payload", self) + result = False + except Exception as e: + context.result.add_issue( + f"Unable to check the the presence of the Data Entity '{entity.id}' within the RO-Crate", self) + if logger.isEnabledFor(logging.DEBUG): + logger.debug(e, exc_info=True) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl new file mode 100644 index 0000000..a172e1f --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/4_data_entity_metadata.ttl @@ -0,0 +1,216 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix owl: . +@prefix validator: . + +ro-crate:DataEntityRequiredProperties a sh:NodeShape ; + sh:name "Data Entity: REQUIRED properties" ; + sh:description """A Data Entity MUST be a `URI Path` relative to the ROCrate root, + or an absolute URI""" ; + sh:targetClass ro-crate:DataEntity ; + + sh:property [ + sh:name "Data Entity: @id value restriction" ; + sh:description """Check if the Data Entity has an absolute or relative URI as `@id`""" ; + sh:path [sh:inversePath rdf:type ] ; + sh:nodeKind sh:IRI ; + sh:severity sh:Violation ; + sh:message """Data Entities MUST have an absolute or relative URI as @id.""" ; + ] . + +ro-crate:FileDataEntity a sh:NodeShape ; + sh:name "File Data Entity: REQUIRED properties" ; + sh:description """A File Data Entity MUST be a `File`. + `File` is an RO-Crate alias for the schema.org `MediaObject`. + The term `File` here is liberal, and includes "downloadable" resources where `@id` is an absolute URI. + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:MediaObject . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#"))) + } + """ + ] ; + + sh:property [ + sh:name "File Data Entity: REQUIRED type" ; + sh:description """Check if the File Data Entity has `File` as `@type`. + `File` is an RO-Crate alias for the schema.org `MediaObject`. + """ ; + sh:path rdf:type ; + sh:hasValue ro-crate:File ; + sh:severity sh:Violation ; + sh:message """File Data Entities MUST have "File" as a value for @type.""" ; + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:DataEntity ; + ] . + + +ro-crate:DirectoryDataEntity a sh:NodeShape ; + sh:name "Directory Data Entity: REQUIRED properties" ; + sh:description """A Directory Data Entity MUST be of @type `Dataset`. + The term `directory` here includes HTTP file listings where `@id` is an absolute URI. + """ ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:Dataset . + ?metadatafile schema:about ?root . + # Exclude all dataset entities that ends with `./#` + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + FILTER(!STRSTARTS(STR(?this), CONCAT(STR(?root), "#"))) + } + """ + ] ; + + # Decomment for debugging + # sh:property [ + # sh:name "Test Directory" ; + # sh:description """Data Entities representing directories MUST have "Directory" as a value for @type.""" ; + # sh:path rdf:type ; + # sh:hasValue ro-crate:File ; + # sh:severity sh:Violation ; + # ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:Directory ; + ] ; + + # Expand data graph with triples from the directory data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:DataEntity ; + ] ; + + # Ensure that the directory data entity is a dataset + sh:property [ + sh:name "Directory Data Entity: REQUIRED type" ; + sh:description """Check if the Directory Data Entity has `Dataset` as `@type`.""" ; + sh:path rdf:type ; + sh:hasValue schema_org:Dataset ; + sh:severity sh:Violation ; + ] . + +ro-crate:DataEntityRequiredPropertiesShape a sh:NodeShape ; + sh:name "Data Entity: REQUIRED properties" ; + sh:description """A `DataEntity` MUST be linked, either directly or indirectly, from the Root Data Entity""" ; + sh:targetClass ro-crate:DataEntity ; + sh:property + [ + a sh:PropertyShape ; + sh:path [ sh:inversePath schema_org:hasPart ] ; + sh:node schema_org:Dataset ; + sh:minCount 1 ; + sh:name "Data Entity MUST be directly referenced" ; + sh:description """Check if the Data Entity is linked, either directly or indirectly, to the `Root Data Entity` using the `hasPart` (as defined in `schema.org`) property" """ ; + # sh:message "A Data Entity MUST be directly or indirectly linked to the `Root Data Entity` through the `hasPart` property" ; + ] . + +ro-crate:GenericDataEntityRequiredProperties a sh:NodeShape ; + sh:name "Generic Data Entity: REQUIRED properties" ; + sh:description """A Data Entity other than a File or a Directory MUST be a `DataEntity`""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?root schema:hasPart ?this . + ?metadatafile schema:about ?root . + FILTER(contains(str(?metadatafile), "ro-crate-metadata.json")) + FILTER(?this != ?root) + FILTER(?this != ?metadatafile) + FILTER NOT EXISTS { + ?this a schema:MediaObject . + ?this a schema:Dataset . + } + } + """ + ] ; + + # Expand data graph with triples to mark the matching entities as GenericDataEntity instances + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:GenericDataEntity ; + ] ; + + # Expand data graph with triples to mark the matching entities as DataEntity instances + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:DataEntity ; + ] . + + +# Uncomment for debugging +# ro-crate:TestGenericDataEntity a sh:NodeShape ; +# sh:disabled true ; +# sh:targetClass ro-crate:GenericDataEntity ; +# sh:name "Generic Data Entity: test invalid property"; +# sh:description """Check if the GenericDataEntity has the invalidProperty property""" ; +# sh:property [ +# sh:minCount 1 ; +# sh:maxCount 1 ; +# sh:path ro-crate:invalidProperty ; +# sh:severity sh:Violation ; +# sh:message "Testing the generic data entity"; +# sh:datatype xsd:string ; +# sh:message "Testing for the invalidProperty of the generic data entity"; +# ] . + + +# Uncomment for debugging +# ro:testDirectory a sh:NodeShape ; +# sh:name "Definition of Test Directory" ; +# sh:description """A Test Directory is a digital object that is stored in a file format""" ; +# sh:targetClass ro-crate:Directory ; + +# sh:property [ +# sh:name "Test Directory instance" ; +# sh:description """Check if the Directory DataEntity instance has the fake property ro-crate:foo""" ; +# sh:path rdf:type ; +# sh:hasValue ro-crate:foo ; +# sh:severity sh:Violation ; +# ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl new file mode 100644 index 0000000..c547d60 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/5_web_data_entity_metadata.ttl @@ -0,0 +1,50 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix dct: . +@prefix schema_org: . +@prefix sh: . +@prefix owl: . +@prefix xsd: . +@prefix validator: . + + +ro-crate:WebBasedDataEntity a sh:NodeShape, validator:HiddenShape ; + sh:name "Web-based Data Entity: REQUIRED properties" ; + sh:description """A Web-based Data Entity is a `File` identified by an absolute URL""" ; + + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?this a schema:MediaObject . + FILTER(?this != ro:ro-crate-metadata.json) + FILTER regex(str(?this), "^(https?|ftps?)://", "i") + } + """ + ] ; + + # Expand data graph with triples which identify the web-based data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:WebDataEntity ; + ] . + diff --git a/tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl b/tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl new file mode 100644 index 0000000..9ecf87a --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/must/6_contextual_entity.ttl @@ -0,0 +1,81 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . +@prefix owl: . +@prefix validator: . + + +ro-crate:FindLicenseEntity a sh:NodeShape, validator:HiddenShape ; + sh:name "Identify License Entity" ; + sh:description """Mark a license entity any Data Entity referenced by the `schema:license` property.""" ; + sh:target [ + a sh:SPARQLTarget ; + sh:prefixes ro-crate:sparqlPrefixes ; + sh:select """ + SELECT ?this + WHERE { + ?subject schema:license ?this . + } + """ + ] ; + + # Expand data graph with triples from the file data entity + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:ContextualEntity ; + ] . + + +ro-crate:WebSiteRecommendedProperties a sh:NodeShape ; + sh:name "WebSite RECOMMENDED Properties" ; + sh:description """A `WebSite` MUST be identified by a valid IRI and MUST have a `name` property.""" ; + sh:targetClass schema:WebSite ; + sh:property [ + sh:path [sh:inversePath rdf:type] ; + sh:datType sh:IRI ; + sh:name "WebSite: value restriction of its identifier" ; + sh:description "Check if the WebSite has a valid IRI" ; + sh:message "A WebSite MUST have a valid IRI" ; + ] ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "WebSite: REQUIRED `name` property" ; + sh:description "Check if the WebSite has a `name` property" ; + sh:message "A WebSite MUST have a `name` property" ; + ] . + + +ro-crate:CreativeWorkAuthorDefinition a sh:NodeShape, validator:HiddenShape ; + sh:name "CreativeWork Author Definition" ; + sh:description """Define the `CreativeWorkAuthor` as the `Person` object of the `schema:author` predicate.""" ; + sh:targetObjectsOf schema:author ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object ro-crate:CreativeWorkAuthor ; + sh:condition [ + sh:property [ sh:path rdf:type ; sh:hasValue schema:Person ; sh:minCount 1 ] ; + ] ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl b/tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl new file mode 100644 index 0000000..9af3a09 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/ontology.ttl @@ -0,0 +1,67 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix owl: . +@prefix rdf: . +@prefix xml: . +@prefix xsd: . +@prefix rdfs: . +@prefix schema: . +@prefix rocrate: . +@prefix bioschemas: . +@prefix ro-crate: . +# @base <./.> . + + rdf:type owl:Ontology ; + owl:versionIRI . + +# # ################################################################# +# # # Classes +# # ################################################################# + +# Declare the RootDataEntity class +ro-crate:RootDataEntity rdf:type owl:Class ; + rdfs:subClassOf schema:Dataset ; + rdfs:label "RootDataEntity"@en . + +### http://schema.org/CreativeWork +schema:CreativeWork rdf:type owl:Class ; + rdfs:label "CreativeWork"@en . + +### http://schema.org/MediaObject +schema:MediaObject rdf:type owl:Class ; + owl:equivalentClass ro-crate:File ; + rdfs:label "MediaObject"@en . + + +### http://schema.org/SoftwareSourceCode +schema:SoftwareSourceCode rdf:type owl:Class ; + rdfs:subClassOf schema:CreativeWork . + + +### https://bioschemas.org/ComputationalWorkflow +bioschemas:ComputationalWorkflow rdf:type owl:Class . + + +### https://w3id.org/ro/crate/1.1/DataEntity +ro-crate:DataEntity rdf:type owl:Class ; + rdfs:subClassOf schema:CreativeWork ; + rdfs:label "DataEntity"@en . + + +# # ### https://w3id.org/ro/crate/1.1/Directory +ro-crate:Directory rdf:type owl:Class ; + rdfs:subClassOf schema:Dataset ; + rdfs:label "Directory"@en . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl b/tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl new file mode 100644 index 0000000..79006d5 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/prefixes.ttl @@ -0,0 +1,49 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix sh: . +@prefix xsd: . +@prefix ro-crate: . + +# Define the prefixes used in the SPARQL queries +ro-crate:sparqlPrefixes + sh:declare [ + sh:prefix "schema" ; + sh:namespace "http://schema.org/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "bioschemas" ; + sh:namespace "https://bioschemas.org/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "bioschemas-cw" ; + sh:namespace "https://bioschemas.org/ComputationalWorkflow#"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "rocrate" ; + sh:namespace "https://w3id.org/ro/crate/1.1/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "wfrun" ; + sh:namespace "https://w3id.org/ro/terms/workflow-run#"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "codemeta" ; + sh:namespace "https://codemeta.github.io/terms/"^^xsd:anyURI ; + ] ; + sh:declare [ + sh:prefix "ro" ; + sh:namespace "./"^^xsd:anyURI ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/profile.ttl b/tests/data/rocrate_validator_profiles/ro-crate/profile.ttl new file mode 100644 index 0000000..47872a9 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/profile.ttl @@ -0,0 +1,74 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dct: . +@prefix prof: . +@prefix role: . +@prefix rdfs: . + + + + # a Profile; it's identifying URI + a prof:Profile ; + + # common metadata for the Profile + + # the Profile's label + rdfs:label "RO-Crate Metadata Specification 1.1" ; + + # regular metadata, a basic description of the Profile + rdfs:comment """RO-Crate Metadata Specification."""@en ; + + # regular metadata, URI of publisher + dct:publisher ; + + # this profile has a JSON-LD context resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in JSON-LD format + dct:format ; + + # it conforms to JSON-LD, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Vocabulary" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Vocabulary ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # this profile has a human-readable documentation resource + prof:hasResource [ + a prof:ResourceDescriptor ; + + # it's in HTML format + dct:format ; + + # it conforms to HTML, here referred to by its namespace URI as a Profile + dct:conformsTo ; + + # this profile resource plays the role of "Specification" + # described in this ontology's accompanying Roles vocabulary + prof:hasRole role:Specification ; + + # this profile resource's actual file + prof:hasArtifact ; + ] ; + + # a short code to refer to the Profile with when a URI can't be used + prof:hasToken "ro-crate" ; +. diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl new file mode 100644 index 0000000..7cb53f0 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_metadata.ttl @@ -0,0 +1,74 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix validator: . + +ro-crate:RootDataEntityDirectRecommendedProperties a sh:NodeShape ; + sh:name "RO-Crate Root Data Entity RECOMMENDED properties" ; + sh:description """The Root Data Entity SHOULD have + the properties `name`, `description` and `license` defined as described + in the RO-Crate specification """; + sh:targetClass ro-crate:RootDataEntity ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `license` SHOULD link to a Contextual Entity" ; + sh:description """Check if the Root Data Entity includes a `license` property + that links to a Contextual Entity with type `schema_org:CreativeWork` to describe the license.""" ; + sh:nodeKind sh:BlankNodeOrIRI ; + sh:class schema_org:CreativeWork ; + sh:path schema_org:license; + sh:minCount 1 ; + sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the schema_org:license type""" ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: `author` property" ; + sh:description """Check if the Root Data Entity includes a `author` property (as specified by schema.org) + to provide information about its author.""" ; + sh:or ( + [ sh:class schema_org:Person ;] + [ sh:class schema_org:Organization ;] + ) ; + sh:path schema_org:author; + sh:minCount 1 ; + sh:message """The Root Data Entity SHOULD have a link to a Contextual Entity representing the `author` of the RO-Crate""" ; + ] ; + sh:property [ + sh:minCount 1 ; + sh:maxCount 1 ; + sh:path schema_org:publisher ; + sh:severity sh:Warning ; + sh:name "Root Data Entity: `publisher` property" ; + sh:description """Check if the Root Data Entity has a `publisher` property of type `Organization`.""" ; + sh:message "The `publisher` property of a `Root Data Entity` SHOULD be an `Organization`"; + sh:nodeKind sh:IRI ; + sh:class schema_org:Organization ; + ] ; + sh:property [ + a sh:PropertyShape ; + sh:name "Root Data Entity: RECOMMENDED `datePublished` property" ; + sh:description """Check if the Root Data Entity includes a `datePublished` (as specified by schema.org) + to provide the date when the dataset was published. The datePublished MUST be a valid ISO 8601 date. + It SHOULD be specified to at least the day level, but MAY include a time component.""" ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:path schema_org:datePublished ; + sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ; + sh:message "The Root Data Entity MUST have a `datePublished` property (as specified by schema.org) with a valid ISO 8601 date and the precision of at least the day level" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py new file mode 100644 index 0000000..dd7d8fc --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/2_root_data_entity_relative_uri.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="RO-Crate Root Data Entity RECOMMENDED value") +class RootDataEntityRelativeURI(PyFunctionCheck): + """ + The Root Data Entity SHOULD be denoted by the string / + """ + + @check(name="Root Data Entity: RECOMMENDED value") + def check_relative_uris(self, context: ValidationContext) -> bool: + """Check if the Root Data Entity is denoted by the string `./` in the file descriptor JSON-LD""" + try: + if not context.ro_crate.metadata.get_root_data_entity().id == './': + context.result.add_issue( + 'Root Data Entity URI is not denoted by the string `./`', self) + return False + return True + except Exception as e: + context.result.add_issue( + f'Error checking Root Data Entity URI: {str(e)}', self) + return False diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py new file mode 100644 index 0000000..f5742ac --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_existence.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Data Entity: RECOMMENDED resource availability") +class DataEntityRecommendedChecker(PyFunctionCheck): + """ + Data Entities with absolute URI paths SHOULD be available + at the time of RO-Crate creation + """ + + @check(name="Data Entity: RECOMMENDED resource availability") + def check_availability(self, context: ValidationContext) -> bool: + """ + Check the availability of the Data Entity with absolute URI paths + are available at the time of RO-Crate creation + """ + # Skip the check in metadata-only mode + if context.settings.metadata_only: + logger.debug("Skipping file descriptor existence check in metadata-only mode") + return True + # Perform the check + result = True + for entity in [ + _ for _ in context.ro_crate.metadata.get_data_entities(exclude_web_data_entities=True) + if _.has_absolute_path()]: + assert entity.id is not None, "Entity has no @id" + try: + if not entity.is_available(): + context.result.add_issue( + f'Data Entity {entity.id} is not available', self) + result = False + except Exception as e: + context.result.add_issue( + f'Web-based Data Entity {entity.id} is not available: {e}', self) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl new file mode 100644 index 0000000..188e3a8 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/4_data_entity_metadata.ttl @@ -0,0 +1,69 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix schema_org: . +@prefix sh: . +@prefix xsd: . + +ro-crate:FileRecommendedProperties a sh:NodeShape ; + sh:targetClass ro-crate:File ; + sh:name "File Data Entity: RECOMMENDED properties"; + sh:description """A `File` Data Entity SHOULD have detailed descriptions encodings through the `encodingFormat` property""" ; + sh:property [ + sh:minCount 1 ; + sh:maxCount 2 ; + sh:path schema_org:encodingFormat ; + sh:severity sh:Warning ; + sh:name "File Data Entity: RECOMMENDED `encodingFormat` property" ; + sh:description """Check if the File Data Entity has a detailed description of encodings through the `encodingFormat` property. + The `encodingFormat` property SHOULD be a PRONOM identifier (e.g., application/pdf) or, + to add more detail, SHOULD be linked using a `PRONOM` to a `Contextual Entity` of type `WebSite` + (see [Adding detailed descriptions of encodings](https://www.researchobject.org/ro-crate/1.1/data-entities.html#adding-detailed-descriptions-of-encodings)). + """ ; + sh:message "Missing or invalid `encodingFormat` linked to the `File Data Entity`"; + sh:or ( + [ + sh:datatype xsd:string ; + sh:pattern "^(\\w*)\\/(\\w[\\w\\.-]*)(?:\\+(\\w[\\w\\.-]*))?(?:;(\\w+=[^;]+))*$" ; + sh:name "File Data Entity: RECOMMENDED `PRONOM` for the `encodingFormat` property" ; + sh:description """Check if the File Data Entity is linked to its `encodingFormat` through a PRONOM identifier + (e.g., application/pdf, application/text, image/svg+xml, image/svg;q=0.9,/;q=0.8,image/svg+xml;q=0.9,/;q=0.8, application/vnd.uplanet.listcmd-wbxml;charset=utf-8). + """ ; + sh:message "The `encodingFormat` SHOULD be linked using a PRONOM identifier (e.g., application/pdf)."; + ] + [ + sh:nodeKind sh:IRI ; + sh:class schema_org:WebSite ; + sh:name "File Data Entity: RECOMMENDED `Contextual Entity` linked to the `encodingFormat` property"; + sh:description "Check if the File Data Entity `encodingFormat` is linked to a `Contextual Entity of type `WebSite`." ; + sh:message "The `encodingFormat` SHOULD be linked to a `Contextual Entity` of type `Web Site`." ; + ] + ) + ] . + +ro-crate:DirectoryDataEntityRequiredValueRestriction a sh:NodeShape ; + sh:name "Directory Data Entity: RECOMMENDED value restriction" ; + sh:description """A Directory Data Entity SHOULD end with `/`""" ; + sh:targetNode ro-crate:Directory ; + sh:property [ + a sh:PropertyShape ; + sh:name "Directory Data Entity: RECOMMENDED value restriction" ; + sh:description """Check if the Directory Data Entity ends with `/`""" ; + sh:path [ sh:inversePath rdf:type ] ; + sh:message """Every Data Entity Directory URI SHOULD end with `/`""" ; + sh:pattern "/$" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py new file mode 100644 index 0000000..13ef914 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rocrate_validator.utils import log as logging +from rocrate_validator.models import ValidationContext +from rocrate_validator.requirements.python import (PyFunctionCheck, check, + requirement) + +# set up logging +logger = logging.getLogger(__name__) + + +@requirement(name="Web-based Data Entity: RECOMMENDED resource availability") +class WebDataEntityRecommendedChecker(PyFunctionCheck): + """ + Web-based Data Entity instances SHOULD be available + at the URIs specified in the `@id` property of the Web-based Data Entity. + """ + + @check(name="Web-based Data Entity: resource availability") + def check_availability(self, context: ValidationContext) -> bool: + """ + Check if the Web-based Data Entity is directly downloadable + by a simple retrieval (e.g. HTTP GET) permitting redirection and HTTP/HTTPS URIs + """ + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + try: + if not entity.is_available(): + context.result.add_issue( + f'Web-based Data Entity {entity.id} is not available', self) + result = False + except Exception as e: + context.result.add_issue( + f'Web-based Data Entity {entity.id} is not available: {e}', self) + result = False + if not result and context.fail_fast: + return result + return result + + @check(name="Web-based Data Entity: `contentSize` property") + def check_content_size(self, context: ValidationContext) -> bool: + """ + Check if the Web-based Data Entity has a `contentSize` property + and if it is set to actual size of the downloadable content + """ + result = True + for entity in context.ro_crate.metadata.get_web_data_entities(): + assert entity.id is not None, "Entity has no @id" + if entity.is_available(): + content_size = entity.get_property("contentSize") + if content_size and int(content_size) != context.ro_crate.get_external_file_size(entity.id): + context.result.add_issue( + f'The property contentSize={content_size} of the Web-based Data Entity ' + f'{entity.id} does not match the actual size of ' + f'the downloadable content, i.e., {entity.content_size} (bytes)', self, + violatingEntity=entity.id, violatingProperty='contentSize', violatingPropertyValue=content_size) + result = False + if not result and context.fail_fast: + return result + return result diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl new file mode 100644 index 0000000..e4f3cc5 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/5_web_data_entity_metadata.ttl @@ -0,0 +1,63 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix rdf: . +@prefix dct: . +@prefix schema_org: . +@prefix sh: . +@prefix owl: . +@prefix xsd: . +@prefix validator: . + + +ro-crate:WebBasedDataEntityRequiredValueRestriction a sh:NodeShape ; + sh:name "Web-based Data Entity: RECOMMENDED properties" ; + sh:description """A Web-based Data Entity MUST be identified by an absolute URL and + SHOULD have a `contentSize` and `sdDatePublished` property""" ; + sh:targetClass ro-crate:WebDataEntity ; + # Check if the Web-based Data Entity has a contentSize property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "Web-based Data Entity: `contentSize` property" ; + sh:description """Check if the Web-based Data Entity has a `contentSize` property""" ; + sh:path schema_org:contentSize ; + sh:datatype xsd:string ; + sh:severity sh:Warning ; + sh:message """Web-based Data Entities SHOULD have a `contentSize` property""" ; + sh:sparql [ + sh:message "If the value is a string it must be a string representing an integer." ; + sh:select """ + SELECT ?this ?value + WHERE { + ?this schema:contentSize ?value . + FILTER NOT EXISTS { + FILTER (xsd:integer(?value) = ?value) + } + } + """ ; + ] ; + ] ; + # Check if the Web-based Data Entity has a sdDatePublished property + sh:property [ + a sh:PropertyShape ; + sh:minCount 1 ; + sh:name "Web-based Data Entity: `sdDatePublished` property" ; + sh:description """Check if the Web-based Data Entity has a `sdDatePublished` property""" ; + sh:path schema_org:sdDatePublished ; + sh:pattern "^([\\+-]?\\d{4})((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))|W([0-4]\\d|5[0-2])(-?[1-7])|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)?[0-5]\\d)?|24:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)$" ; + sh:message """Web-based Data Entities SHOULD have a `sdDatePublished` property to indicate when the absolute URL was accessed""" ; + ] . diff --git a/tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl b/tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl new file mode 100644 index 0000000..a90dbc2 --- /dev/null +++ b/tests/data/rocrate_validator_profiles/ro-crate/should/6_contextual_entity_metadata.ttl @@ -0,0 +1,75 @@ +# Copyright (c) 2024-2026 CRS4 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix ro: <./> . +@prefix ro-crate: . +@prefix schema: . +@prefix sh: . +@prefix xsd: . + +ro-crate:CreativeWorkAuthorMinimumRecommendedProperties a sh:NodeShape ; + sh:name "CreativeWork Author: minimum RECOMMENDED properties" ; + sh:description """The minimum recommended properties for a `CreativeWork Author` are `name` and `affiliation`.""" ; + sh:targetClass ro-crate:CreativeWorkAuthor ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "CreativeWork Author: RECOMMENDED name property" ; + sh:description "Check if the author has a name." ; + sh:message "The author SHOULD have a name." ; + ] ; + sh:property [ + sh:path schema:affiliation ; + sh:minCount 1 ; + sh:or ( + [ sh:dataType xsd:string ; ] + [ sh:class schema:Organization ;] + ) ; + sh:severity sh:Warning ; + sh:name "CreativeWork Author: RECOMMENDED affiliation property" ; + sh:description "Check if the author has an organizational affiliation." ; + sh:message "The author SHOULD have an organizational affiliation." ; + ] ; + sh:property [ + sh:path schema:affiliation ; + sh:minCount 1 ; + sh:class schema:Organization ; + sh:severity sh:Warning ; + sh:name "CreativeWork Author: RECOMMENDED Contextual Entity linked for the organizational `affiliation` property" ; + sh:description "Check if the author has a Contextual Entity for the organizational `affiliation` property." ; + sh:message "The author SHOULD have a Contextual Entity which specifies the organizational `affiliation`." ; + ] . + + +ro-crate:OrganizationRecommendedProperties a sh:NodeShape ; + sh:name "Organization: RECOMMENDED properties" ; + sh:description """The recommended properties for an `Organization` are `name` and `url`.""" ; + sh:targetClass schema:Organization ; + sh:property [ + sh:path schema:name ; + sh:minCount 1 ; + sh:dataType xsd:string ; + sh:name "Organization: RECOMMENDED name property" ; + sh:description "Check if the `organization` has a name." ; + sh:message "The organization SHOULD have a name." ; + ] ; + sh:property [ + sh:path schema:url ; + sh:minCount 1 ; + sh:dataType xsd:anyURI ; + sh:name "Organization: RECOMMENDED url property" ; + sh:description "Check if the `organization` has a URL." ; + sh:message "The organization SHOULD have a URL." ; + ] . From 9844596418e5cda0c8dd27a8ba3ad6c2f9ac1727 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 13:59:48 +0000 Subject: [PATCH 27/40] add profile_name to API description in readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 98d4322..d1d8ae8 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ This project presents a Flask-based API for validating RO-Crates. |------------|-----------|-------------------------|-----------------------------------------------------------------------| | root_path | optional | string | Root path which contains the RO-Crate | | webhook_url | optional | string | Webhook to send validation result to | +| profile_name | optional | string | RO-Crate profile to validate against | | minio_config | required | dictionary | MinIO Configuration Details | `minio_config` From b28d8c9e992d8d78158ae84996e25291b537bab9 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:08:17 +0000 Subject: [PATCH 28/40] API and docker updates in readme --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d1d8ae8..a82204e 100644 --- a/README.md +++ b/README.md @@ -168,12 +168,24 @@ curl -X 'POST' \ 2. Create the `.env` file for shared environment information. An example environment file is included (`example.env`), which can be copied for this purpose. But make sure to change any security settings (username and passwords). -3. Build and start the services using Docker Compose: +3. A directory containing RO-Crate profiles to replace the default RO-Crate profiles for validation may be provided. Note that this will need to contain all profile files, as the default profile data will not be used. An example of this is given in the `docker-compose-develop.yml` file, and described here: + 1. Store the profiles in a convenient directory, e.g.: `./local/rocrate_validator_profiles` + 2. Add a volume to the celery worker container for these, e.g.: +``` + volumes: + - ./local/rocrate_validator_profiles:/app/profiles:ro +``` + 3. Provide the `PROFILES_PATH` environment to the flask container (not the celery worker container) to match the internal path, e.g.: +``` + - PROFILES_PATH=/app/profiles +``` + +4. Build and start the services using Docker Compose: ```bash docker compose up --build ``` -4. Set up the MinIO bucket +5. Set up the MinIO bucket 1. Open the MinIO web interface at `http://localhost:9000`. 2. Log in with your MinIO credentials. 3. Create a new bucket named `ro-crates`. From 472352928f3c88df8beb0e44f09970b3d1a4f73e Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:09:32 +0000 Subject: [PATCH 29/40] readme cleanup --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a82204e..8eaad38 100644 --- a/README.md +++ b/README.md @@ -171,14 +171,14 @@ curl -X 'POST' \ 3. A directory containing RO-Crate profiles to replace the default RO-Crate profiles for validation may be provided. Note that this will need to contain all profile files, as the default profile data will not be used. An example of this is given in the `docker-compose-develop.yml` file, and described here: 1. Store the profiles in a convenient directory, e.g.: `./local/rocrate_validator_profiles` 2. Add a volume to the celery worker container for these, e.g.: -``` + ``` volumes: - ./local/rocrate_validator_profiles:/app/profiles:ro -``` + ``` 3. Provide the `PROFILES_PATH` environment to the flask container (not the celery worker container) to match the internal path, e.g.: -``` + ``` - PROFILES_PATH=/app/profiles -``` + ``` 4. Build and start the services using Docker Compose: ```bash From fe43ffd50b0ab9a9bb7b78adcd07101c48277db7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 12:40:30 +0000 Subject: [PATCH 30/40] Bump redis from 7.1.0 to 7.1.1 Bumps [redis](https://github.com/redis/redis-py) from 7.1.0 to 7.1.1. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v7.1.0...v7.1.1) --- updated-dependencies: - dependency-name: redis dependency-version: 7.1.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/requirements.in b/requirements.in index 1bc9ad6..dabea2e 100644 --- a/requirements.in +++ b/requirements.in @@ -3,7 +3,7 @@ minio==7.2.20 requests==2.32.5 Flask==3.1.2 Werkzeug==3.1.5 -redis==7.1.0 +redis==7.1.1 python-dotenv==1.2.1 apiflask==3.0.2 roc-validator==0.8 diff --git a/requirements.txt b/requirements.txt index 9637873..4e556b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,8 +16,6 @@ argon2-cffi==25.1.0 # via minio argon2-cffi-bindings==25.1.0 # via argon2-cffi -async-timeout==5.0.1 - # via redis attrs==25.3.0 # via # cattrs @@ -147,7 +145,7 @@ rdflib[html]==7.1.4 # owlrl # pyshacl # roc-validator -redis==7.1.0 +redis==7.1.1 # via -r requirements.in requests==2.32.5 # via From 742968c3f7d50f4d97dbce0672d2022a370ef310 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:31:50 +0000 Subject: [PATCH 31/40] direct metadata validation from json --- app/tasks/validation_tasks.py | 57 ++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index e27c46c..8e1c115 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -7,6 +7,7 @@ import logging import os import shutil +import json from typing import Optional from rocrate_validator import services @@ -22,7 +23,6 @@ find_validation_object_on_minio ) from app.utils.webhook_utils import send_webhook_notification -from app.utils.file_utils import build_metadata_only_rocrate logger = logging.getLogger(__name__) @@ -98,7 +98,7 @@ def process_validation_task_by_id( @celery.task def process_validation_task_by_metadata( - crate_json: str, profile_name: str | None, webhook_url: str | None + crate_json: str, profile_name: str | None, webhook_url: str | None, profiles_path: Optional[str] = None ) -> ValidationResult | str: """ Background task to process the RO-Crate validation for a given json metadata string. @@ -111,19 +111,13 @@ def process_validation_task_by_metadata( :todo: Replace the Crate ID with a more comprehensive system, and replace profile name with URI. """ - skip_checks_list = ['ro-crate-1.1_12.1'] - file_path = None - try: - # Fetch the RO-Crate from MinIO using the provided ID: - file_path = build_metadata_only_rocrate(crate_json) - - logging.info(f"Processing validation task for {file_path}") + logging.info(f"Processing validation task for provided metadata string") # Perform validation: - validation_result = perform_ro_crate_validation(file_path, + validation_result = perform_metadata_validation(crate_json, profile_name, - skip_checks_list + profiles_path ) if isinstance(validation_result, str): @@ -132,9 +126,9 @@ def process_validation_task_by_metadata( raise Exception(f"Validation failed: {validation_result}") if not validation_result.has_issues(): - logging.info(f"RO Crate {file_path} is valid.") + logging.info("RO Crate metadata is valid.") else: - logging.info(f"RO Crate {file_path} is invalid.") + logging.info("RO Crate metadata is invalid.") if webhook_url: send_webhook_notification(webhook_url, validation_result.to_json()) @@ -148,10 +142,6 @@ def process_validation_task_by_metadata( send_webhook_notification(webhook_url, error_data) finally: - # Clean up the temporary file if it was created: - if file_path and os.path.exists(file_path): - shutil.rmtree(file_path) - if isinstance(validation_result, str): return validation_result else: @@ -196,6 +186,39 @@ def perform_ro_crate_validation( return str(e) +def perform_metadata_validation( + crate_json: str, profile_name: str | None, skip_checks_list: Optional[list] = None, profiles_path: Optional[str] = None +) -> ValidationResult | str: + """ + Validates only RO-Crate metadata provided as a json string. + + :param crate_json: The JSON string containing the metadata + :param profile_name: The name of the validation profile to use. Defaults to None. If None, the CRS4 validator will + attempt to determine the profile. + :param profiles_path: The path to the profiles definition directory + :param skip_checks_list: A list of checks to skip, if needed + :return: The validation result. + :raises Exception: If an error occurs during the validation process. + """ + + try: + logging.info(f"Validating ro-crate metadata with profile {profile_name}") + + settings = services.ValidationSettings( + **({"metadata_only": True}), + **({"metadata_dict": json.loads(crate_json)}), + **({"profile_identifier": profile_name} if profile_name else {}), + **({"skip_checks": skip_checks_list} if skip_checks_list else {}), + **({"profiles_path": profiles_path} if profiles_path else {}) + ) + + return services.validate(settings) + + except Exception as e: + logging.error(f"Unexpected error during validation: {e}") + return str(e) + + def check_ro_crate_exists( minio_client: object, bucket_name: str, From 0f62ee231df0be2f4defdfe8c9a368037c06a6dd Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:32:30 +0000 Subject: [PATCH 32/40] tests updated and extended for json metadata validation --- tests/test_services.py | 3 +- tests/test_validation_tasks.py | 114 ++++++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 33 deletions(-) diff --git a/tests/test_services.py b/tests/test_services.py index ccebeba..ca0d9b9 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -197,7 +197,8 @@ def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str, "{}", 422, "Required parameter crate_json is empty" ), - ] + ], + ids=["missing_crate_json","invalid_json","empty_json"] ) def test_queue_metadata_json_errors(flask_app, crate_json: str, status_code: int, response_error: str): response, status = queue_ro_crate_metadata_validation_task(crate_json) diff --git a/tests/test_validation_tasks.py b/tests/test_validation_tasks.py index 49c3fed..ade15b9 100644 --- a/tests/test_validation_tasks.py +++ b/tests/test_validation_tasks.py @@ -1,9 +1,11 @@ from unittest import mock import pytest +import json from app.tasks.validation_tasks import ( process_validation_task_by_id, perform_ro_crate_validation, + perform_metadata_validation, return_ro_crate_validation, process_validation_task_by_metadata, check_ro_crate_exists, @@ -227,34 +229,28 @@ def test_process_validation_failure( # Test function: process_validation_task_by_metadata @pytest.mark.parametrize( - "crate_json, profile_name, webhook_url, mock_path, validation_json, validation_value, os_path_exists", + "crate_json, profile_name, webhook_url, validation_json, validation_value", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", "https://example.com/webhook", "/tmp/crate", - '{"status": "valid"}', False, True + "test-profile", "https://example.com/webhook", + '{"status": "valid"}', False ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", "https://example.com/webhook", "/tmp/crate", - '{"status": "invalid"}', True, True + "test-profile", "https://example.com/webhook", + '{"status": "invalid"}', True ) ], ids=["success_no_issues", "success_with_issues"] ) -@mock.patch("app.tasks.validation_tasks.shutil.rmtree") -@mock.patch("app.tasks.validation_tasks.os.path.exists") @mock.patch("app.tasks.validation_tasks.send_webhook_notification") -@mock.patch("app.tasks.validation_tasks.perform_ro_crate_validation") -@mock.patch("app.tasks.validation_tasks.build_metadata_only_rocrate") +@mock.patch("app.tasks.validation_tasks.perform_metadata_validation") def test_metadata_validation( - mock_build, mock_validate, mock_webhook, mock_exists, mock_rmtree, - crate_json: str, profile_name: str, webhook_url: str, mock_path: str, - validation_json: str, validation_value: bool, os_path_exists: bool + mock_validate, mock_webhook, + crate_json: str, profile_name: str, webhook_url: str, + validation_json: str, validation_value: bool, ): - mock_exists.return_value = os_path_exists - mock_build.return_value = mock_path - mock_result = mock.Mock() mock_result.has_issues.return_value = validation_value mock_result.to_json.return_value = validation_json @@ -263,39 +259,33 @@ def test_metadata_validation( result = process_validation_task_by_metadata(crate_json, profile_name, webhook_url) assert result == validation_json - mock_build.assert_called_once_with(crate_json) mock_validate.assert_called_once() mock_webhook.assert_called_once_with(webhook_url, validation_json) - mock_rmtree.assert_called_once_with(mock_path) @pytest.mark.parametrize( - "crate_json, profile_name, webhook_url, mock_path, validation_message, os_path_exists", + "crate_json, profile_name, webhook_url, validation_message", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", "https://example.com/webhook", "/tmp/crate", - "Validation error", True + "test-profile", "https://example.com/webhook", + "Validation error" ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context", "@graph": []}', - "test-profile", None, "/tmp/crate", - "Validation error", True + "test-profile", None, + "Validation error" ) ], ids=["validation_fails", "validation_fails_no_webhook"] ) -@mock.patch("app.tasks.validation_tasks.shutil.rmtree") -@mock.patch("app.tasks.validation_tasks.os.path.exists", return_value=True) @mock.patch("app.tasks.validation_tasks.send_webhook_notification") -@mock.patch("app.tasks.validation_tasks.perform_ro_crate_validation") -@mock.patch("app.tasks.validation_tasks.build_metadata_only_rocrate") +@mock.patch("app.tasks.validation_tasks.perform_metadata_validation") def test_validation_fails_and_sends_error_notification_to_webhook( - mock_build, mock_validate, mock_webhook, mock_exists, mock_rmtree, - crate_json: str, profile_name: str, webhook_url: str, mock_path: str, - validation_message: str, os_path_exists: bool + mock_validate, mock_webhook, + crate_json: str, profile_name: str, webhook_url: str, + validation_message: str ): - mock_build.return_value = mock_path mock_validate.return_value = validation_message @@ -313,8 +303,6 @@ def test_validation_fails_and_sends_error_notification_to_webhook( # Make sure webhook not sent mock_webhook.assert_not_called() - mock_rmtree.assert_called_once_with(mock_path) - # Test function: perform_ro_crate_validation @@ -378,6 +366,68 @@ def test_validation_settings_error(mock_validation_settings, mock_validate): mock_validate.assert_not_called() +# Test function: perform_metadata_validation + +@pytest.mark.parametrize( + "crate_json, profile_name, skip_checks", + [ + ('{"id":"dummy json"}', "ro_profile", ["check1", "check2"]), + ('{"id":"dummy json"}', None, None) + ], + ids=["success_with_all_args", "success_with_only_crate"] +) +@mock.patch("app.tasks.validation_tasks.services.validate") +@mock.patch("app.tasks.validation_tasks.services.ValidationSettings") +def test_metadata_validation_success_with_all_args( + mock_validation_settings, mock_validate, + crate_json: str, profile_name: str, skip_checks: list +): + mock_result = mock.Mock() + mock_validate.return_value = mock_result + + result = perform_metadata_validation(crate_json, profile_name, skip_checks) + + # Assert that result was returned + assert result == mock_result + + # Validate proper construction of ValidationSettings + mock_validation_settings.assert_called_once() + args, kwargs = mock_validation_settings.call_args + assert kwargs["metadata_dict"] == json.loads(crate_json) + if profile_name is not None: + assert kwargs["profile_identifier"] == profile_name + else: + assert "profile_identifier" not in kwargs + if skip_checks is not None: + assert kwargs["skip_checks"] == skip_checks + else: + assert "skip_checks" not in kwargs + + mock_validate.assert_called_once_with(mock_validation_settings.return_value) + + +@mock.patch("app.tasks.validation_tasks.services.validate", side_effect=RuntimeError("Validation error")) +@mock.patch("app.tasks.validation_tasks.services.ValidationSettings") +def test_metadata_validation_raises_exception_and_returns_string(mock_validation_settings, mock_validate): + crate_json = '{"id":"test metadata"}' + result = perform_metadata_validation(crate_json, "profile", skip_checks_list=None) + + assert isinstance(result, str) + assert "Validation error" in result + mock_validate.assert_called_once() + + +@mock.patch("app.tasks.validation_tasks.services.validate") +@mock.patch("app.tasks.validation_tasks.services.ValidationSettings", side_effect=ValueError("Bad config")) +def test_metadata_validation_settings_error(mock_validation_settings, mock_validate): + crate_json = '{"id":"test metadata"}' + result = perform_metadata_validation(crate_json, None) + + assert isinstance(result, str) + assert "Bad config" in result + mock_validate.assert_not_called() + + # Test function: return_ro_crate_validation @mock.patch("app.tasks.validation_tasks.get_validation_status_from_minio") From f84bf2968179cd3ca44fab967781172744672e32 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:35:08 +0000 Subject: [PATCH 33/40] removed metadata only rocrate build function --- app/utils/file_utils.py | 53 ----------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 app/utils/file_utils.py diff --git a/app/utils/file_utils.py b/app/utils/file_utils.py deleted file mode 100644 index 15c16e4..0000000 --- a/app/utils/file_utils.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Utility methods for interacting with the File System.""" - -# Author: Douglas Lowe, Alexander Hambley -# License: MIT -# Copyright (c) 2025 eScience Lab, The University of Manchester - -import json -import logging -import os -import tempfile - -from dotenv import load_dotenv - - -logger = logging.getLogger(__name__) - - -def build_metadata_only_rocrate(crate_json: str) -> str: - """ - Creates a temporary directory for an empty RO-Crate, - and saves the JSON string as a metadata file. - - :param crate_json: The metadata string. - :return: The local file path where the RO-Crate is saved. - :raises ValueError: If the required environment variables are not set. - :raises Exception: If an unexpected error occurs during the operation. - """ - - load_dotenv() - - try: - # Prepare temporary file path to store RO Crate for validation: - temp_dir = tempfile.mkdtemp() - file_path = os.path.join(temp_dir, 'ro-crate-metadata.json') - - logging.info( - f"Creating RO-Crate Metadata file. File path: {file_path}" - ) - with open(file_path, 'w') as f: - f.write(crate_json) - logging.info( - f"RO-Crate metadata successfully saved to {file_path}." - ) - - return temp_dir - - except ValueError as value_error: - logging.error(f"Configuration Error: {value_error}") - raise - - except Exception as e: - logging.error(f"Unexpected error creating RO-Crate metadata: {e}") - raise From 4d0f544ac5345a16e0986db69c7444d9b3f0036b Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:30:50 +0000 Subject: [PATCH 34/40] pass profiles_path env variable to metadata testing function --- app/ro_crates/routes/post_routes.py | 4 +++- app/services/validation_service.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/ro_crates/routes/post_routes.py b/app/ro_crates/routes/post_routes.py index 2c517f4..4fcf4ab 100644 --- a/app/ro_crates/routes/post_routes.py +++ b/app/ro_crates/routes/post_routes.py @@ -111,4 +111,6 @@ def validate_ro_crate_metadata(json_data) -> tuple[Response, int]: else: profile_name = None - return queue_ro_crate_metadata_validation_task(crate_json, profile_name) + profiles_path = current_app.config["PROFILES_PATH"] + + return queue_ro_crate_metadata_validation_task(crate_json, profile_name, profiles_path=profiles_path) diff --git a/app/services/validation_service.py b/app/services/validation_service.py index b51a088..37c5c05 100644 --- a/app/services/validation_service.py +++ b/app/services/validation_service.py @@ -61,7 +61,7 @@ def queue_ro_crate_validation_task( def queue_ro_crate_metadata_validation_task( - crate_json: str, profile_name=None, webhook_url=None + crate_json: str, profile_name=None, webhook_url=None, profiles_path=None ) -> tuple[Response, int]: """ Queues an RO-Crate for validation with Celery. @@ -69,6 +69,7 @@ def queue_ro_crate_metadata_validation_task( :param crate_id: The ID of the RO-Crate to validate. :param profile_name: The profile to validate against. :param webhook_url: The URL to POST the validation results to. + :param profiles_path: A path to the profile definition directory. :return: A tuple containing a JSON response and an HTTP status code. :raises: Exception: If an error occurs whilst queueing the task. """ @@ -90,7 +91,8 @@ def queue_ro_crate_metadata_validation_task( result = process_validation_task_by_metadata.delay( crate_json, profile_name, - webhook_url + webhook_url, + profiles_path ) if webhook_url: return jsonify({"message": "Validation in progress"}), 202 From e3d4d78155f9dc251f4fce96355b6710ed2675a4 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:31:21 +0000 Subject: [PATCH 35/40] docstring and logging update --- app/tasks/validation_tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/tasks/validation_tasks.py b/app/tasks/validation_tasks.py index 8e1c115..e6096d6 100644 --- a/app/tasks/validation_tasks.py +++ b/app/tasks/validation_tasks.py @@ -106,13 +106,14 @@ def process_validation_task_by_metadata( :param crate_json: A string containing the RO-Crate JSON metadata to validate. :param profile_name: The name of the validation profile to use. Defaults to None. :param webhook_url: The webhook URL to send notifications to. Defaults to None. + :param profiles_path: The path to the profiles definition directory. Defaults to None. :raises Exception: If an error occurs during the validation process. :todo: Replace the Crate ID with a more comprehensive system, and replace profile name with URI. """ try: - logging.info(f"Processing validation task for provided metadata string") + logging.info("Processing validation task for provided metadata string") # Perform validation: validation_result = perform_metadata_validation(crate_json, From aef6f64c7cbac6363921f3245d7b1c8c0f2d1999 Mon Sep 17 00:00:00 2001 From: Douglas Lowe <10961945+douglowe@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:32:12 +0000 Subject: [PATCH 36/40] add profiles_path variable to metadata api and service tests --- tests/test_api_routes.py | 12 +++++++----- tests/test_services.py | 14 +++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py index e50b511..486378b 100644 --- a/tests/test_api_routes.py +++ b/tests/test_api_routes.py @@ -141,24 +141,26 @@ def test_validate_fails_missing_elements(client: FlaskClient, crate_id: str, pay # Test POST API: /v1/ro_crates/validate_metadata +# TODO: Write tests for profiles_path environment variable. This will require a refactoring of the create_app function. @pytest.mark.parametrize( - "payload, status_code, response_json", + "payload, status_code, response_json, profiles_path", [ ( { "crate_json": '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "profile_name": "default" - }, 200, {"status": "success"} + }, 200, {"status": "success"}, None ), ( { "crate_json": '{"@context": "https://w3id.org/ro/crate/1.1/context"}', - }, 200, {"status": "success"} + }, 200, {"status": "success"}, None ), ], ids=["success_with_all_fields", "success_without_profile_name"] ) -def test_validate_metadata_success(client: FlaskClient, payload: dict, status_code: int, response_json: dict): +def test_validate_metadata_success(client: FlaskClient, payload: dict, status_code: int, + response_json: dict, profiles_path: str): with patch("app.ro_crates.routes.post_routes.queue_ro_crate_metadata_validation_task") as mock_queue: mock_queue.return_value = (response_json, status_code) @@ -167,7 +169,7 @@ def test_validate_metadata_success(client: FlaskClient, payload: dict, status_co crate_json = payload["crate_json"] if "crate_json" in payload else None profile_name = payload["profile_name"] if "profile_name" in payload else None - mock_queue.assert_called_once_with(crate_json, profile_name) + mock_queue.assert_called_once_with(crate_json, profile_name, profiles_path=profiles_path) assert response.status_code == status_code assert response.json == response_json diff --git a/tests/test_services.py b/tests/test_services.py index ca0d9b9..0413e17 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -141,32 +141,32 @@ def test_queue_ro_crate_validation_task_failure( # Test function: queue_ro_crate_metadata_validation_task @pytest.mark.parametrize( - "crate_json, profile, webhook, status_code, return_value, response_json, delay_side_effect", + "crate_json, profile, webhook, status_code, return_value, response_json, delay_side_effect, profiles_path", [ ( '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "default", "http://webhook", 202, None, {"message": "Validation in progress"}, - None + None, None ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "default", None, 200, {"status": "ok"}, {"result": {"status": "ok"}}, - None + None, None ), ( '{"@context": "https://w3id.org/ro/crate/1.1/context"}', "default", "http://webhook", 500, None, {"error": "Celery error"}, - Exception("Celery error") + Exception("Celery error"), None ), ], ids=["success_with_webhook", "success_without_webhook", "failure_celery_error"] ) def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str, status_code: int, return_value: dict, response_json: dict, - delay_side_effect: Exception): + delay_side_effect: Exception, profiles_path: str): with patch("app.services.validation_service.process_validation_task_by_metadata.delay", side_effect=delay_side_effect) as mock_delay: mock_result = MagicMock() @@ -175,9 +175,9 @@ def test_queue_metadata(flask_app, crate_json: dict, profile: str, webhook: str, if delay_side_effect is None: mock_delay.return_value = mock_result - response, status = queue_ro_crate_metadata_validation_task(crate_json, profile, webhook) + response, status = queue_ro_crate_metadata_validation_task(crate_json, profile, webhook, profiles_path) - mock_delay.assert_called_once_with(crate_json, profile, webhook) + mock_delay.assert_called_once_with(crate_json, profile, webhook, profiles_path) assert status == status_code assert response.json == response_json From 15f5b3ccf3e39838cf25aab3586c8e74081b199b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Feb 2026 12:23:56 +0000 Subject: [PATCH 37/40] Bump redis from 7.1.1 to 7.2.0 Bumps [redis](https://github.com/redis/redis-py) from 7.1.1 to 7.2.0. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v7.1.1...v7.2.0) --- updated-dependencies: - dependency-name: redis dependency-version: 7.2.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index dabea2e..a50d5e4 100644 --- a/requirements.in +++ b/requirements.in @@ -3,7 +3,7 @@ minio==7.2.20 requests==2.32.5 Flask==3.1.2 Werkzeug==3.1.5 -redis==7.1.1 +redis==7.2.0 python-dotenv==1.2.1 apiflask==3.0.2 roc-validator==0.8 diff --git a/requirements.txt b/requirements.txt index 4e556b7..7e5b524 100644 --- a/requirements.txt +++ b/requirements.txt @@ -145,7 +145,7 @@ rdflib[html]==7.1.4 # owlrl # pyshacl # roc-validator -redis==7.1.1 +redis==7.2.0 # via -r requirements.in requests==2.32.5 # via From dab78e3978f7823682e1749618a956a7a99fd22b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Feb 2026 12:23:31 +0000 Subject: [PATCH 38/40] Bump flask from 3.1.2 to 3.1.3 Bumps [flask](https://github.com/pallets/flask) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/pallets/flask/releases) - [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/flask/compare/3.1.2...3.1.3) --- updated-dependencies: - dependency-name: flask dependency-version: 3.1.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index a50d5e4..4f65a9b 100644 --- a/requirements.in +++ b/requirements.in @@ -1,7 +1,7 @@ celery==5.6.2 minio==7.2.20 requests==2.32.5 -Flask==3.1.2 +Flask==3.1.3 Werkzeug==3.1.5 redis==7.2.0 python-dotenv==1.2.1 diff --git a/requirements.txt b/requirements.txt index 7e5b524..75f7401 100644 --- a/requirements.txt +++ b/requirements.txt @@ -59,7 +59,7 @@ email-validator==2.3.0 # via pydantic enum-tools==0.12.0 # via roc-validator -flask==3.1.2 +flask==3.1.3 # via # -r requirements.in # apiflask From 78639c2f43df3395466381ae193119c17c4dc895 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 10:16:21 +0000 Subject: [PATCH 39/40] Bump werkzeug from 3.1.5 to 3.1.6 Bumps [werkzeug](https://github.com/pallets/werkzeug) from 3.1.5 to 3.1.6. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/3.1.5...3.1.6) --- updated-dependencies: - dependency-name: werkzeug dependency-version: 3.1.6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 4f65a9b..7156e77 100644 --- a/requirements.in +++ b/requirements.in @@ -2,7 +2,7 @@ celery==5.6.2 minio==7.2.20 requests==2.32.5 Flask==3.1.3 -Werkzeug==3.1.5 +Werkzeug==3.1.6 redis==7.2.0 python-dotenv==1.2.1 apiflask==3.0.2 diff --git a/requirements.txt b/requirements.txt index 75f7401..f8b3962 100644 --- a/requirements.txt +++ b/requirements.txt @@ -201,7 +201,7 @@ wcwidth==0.2.13 # prompt-toolkit webargs==8.7.0 # via apiflask -werkzeug==3.1.5 +werkzeug==3.1.6 # via # -r requirements.in # flask From b0bd555d7721afbbdee94d439bd2a162fa2166c7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 10:16:26 +0000 Subject: [PATCH 40/40] Bump roc-validator from 0.8 to 0.8.1 Bumps [roc-validator](https://github.com/crs4/rocrate-validator) from 0.8 to 0.8.1. - [Release notes](https://github.com/crs4/rocrate-validator/releases) - [Changelog](https://github.com/crs4/rocrate-validator/blob/develop/CHANGELOG.md) - [Commits](https://github.com/crs4/rocrate-validator/compare/0.8.0...0.8.1) --- updated-dependencies: - dependency-name: roc-validator dependency-version: 0.8.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 7156e77..1dcad46 100644 --- a/requirements.in +++ b/requirements.in @@ -6,4 +6,4 @@ Werkzeug==3.1.6 redis==7.2.0 python-dotenv==1.2.1 apiflask==3.0.2 -roc-validator==0.8 +roc-validator==0.8.1 diff --git a/requirements.txt b/requirements.txt index f8b3962..2dd5129 100644 --- a/requirements.txt +++ b/requirements.txt @@ -160,7 +160,7 @@ rich==13.9.4 # roc-validator rich-click==1.8.9 # via roc-validator -roc-validator==0.8.0 +roc-validator==0.8.1 # via -r requirements.in six==1.17.0 # via python-dateutil