Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ audit: deps
bandit: deps
$(BIN)/bandit -r app || true

test: deps
$(UV) pip install --python $(BIN)/python -e ".[dev]"
$(BIN)/python -m pytest test/ -v

# Full validation bundle
lint: clean format ruff pylint audit bandit

Expand Down
7 changes: 3 additions & 4 deletions app/demo_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import uuid

from fastapi import HTTPException
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel

from .routers.account import facility_adapter as account_adapter
Expand Down Expand Up @@ -520,8 +519,8 @@ async def list_sites(
sites = [s for s in sites if s.last_modified > ms]

o = offset or 0
l = limit or len(sites)
return sites[o : o + l]
page_limit = limit or len(sites)
return sites[o : o + page_limit]

async def get_site(self: "DemoAdapter", site_id: str, modified_since: str | None = None) -> facility_models.Site:
site = next((s for s in self.sites if s.id == site_id), None)
Expand All @@ -547,7 +546,7 @@ async def get_resources(
description: str | None = None,
group: str | None = None,
modified_since: datetime.datetime | None = None,
resource_type: status_models.ResourceType | None = None,
resource_type: status_models.ResourceTypeValue | None = None,
current_status: status_models.Status | None = None,
capability: Capability | None = None,
site_id: str | None = None,
Expand Down
4 changes: 2 additions & 2 deletions app/routers/account/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from ...request_context import get_url_prefix
from ...types.base import IRIBaseModel
from ...types.scalars import AllocationUnit
from ...types.scalars import AllocationUnit, AllocationUnitValue


class Project(IRIBaseModel):
Expand Down Expand Up @@ -34,7 +34,7 @@ class AllocationEntry(IRIBaseModel):

allocation: float = Field(..., description="Total allocation amount granted.", example=100000.0) # how much this allocation can spend
usage: float = Field(..., description="Amount of allocation consumed.", example=52342.5) # how much this allocation has spent
unit: AllocationUnit = Field(..., description="Unit of the allocation (e.g., node_hours, bytes).", example="node_hours")
unit: AllocationUnitValue = Field(..., description="DOE IRI URN for the allocation unit.", example=AllocationUnit.node_hours)


class ProjectAllocation(IRIBaseModel):
Expand Down
16 changes: 5 additions & 11 deletions app/routers/filesystem/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,7 @@
from enum import Enum
from pydantic import Field, AliasChoices, BaseModel


class CompressionType(str, Enum):
"""Defines the type of compression to be used for compressing or extracting files."""
none = "none"
bzip2 = "bzip2"
gzip = "gzip"
xz = "xz"
from ...types.scalars import CompressionType, CompressionTypeValue


class ContentUnit(str, Enum):
Expand Down Expand Up @@ -202,7 +196,7 @@ class PostCompressRequest(FilesystemRequestBase):
target_path: str = Field(..., description="Path to the compressed file", example="/home/user/file.tar.gz")
match_pattern: str|None = Field(default=None, description="Regex pattern to filter files to compress", example=".*\\.txt$")
dereference: bool = Field(default=False, description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.", example=True)
compression: CompressionType = Field(default="gzip", description="Defines the type of compression to be used. By default gzip is used.", example="gzip")
compression: CompressionTypeValue = Field(default=CompressionType.gzip, description="DOE IRI URN for the compression type. Legacy short tokens are accepted only as input compatibility aliases and are normalized.", example=CompressionType.gzip)
model_config = {
"json_schema_extra": {
"examples": [
Expand All @@ -211,7 +205,7 @@ class PostCompressRequest(FilesystemRequestBase):
"target_path": "/home/user/file.tar.gz",
"match_pattern": "*./[ab].*\\.txt",
"dereference": "true",
"compression": "none",
"compression": CompressionType.none,
}
]
}
Expand All @@ -226,14 +220,14 @@ class PostExtractResponse(BaseModel):
class PostExtractRequest(FilesystemRequestBase):
"""Represents a request to extract a compressed file."""
target_path: str = Field(..., description="Path to the directory where to extract the compressed file", example="/home/user/dir")
compression: CompressionType = Field(default="gzip", description="Defines the type of compression to be used. By default gzip is used.", example="gzip")
compression: CompressionTypeValue = Field(default=CompressionType.gzip, description="DOE IRI URN for the compression type. Legacy short tokens are accepted only as input compatibility aliases and are normalized.", example=CompressionType.gzip)
model_config = {
"json_schema_extra": {
"examples": [
{
"source_path": "/home/user/dir/file.tar.gz",
"target_path": "/home/user/dir",
"compression": "none",
"compression": CompressionType.none,
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion app/routers/status/facility_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ async def get_resources(
description: str | None = None,
group: str | None = None,
modified_since: datetime.datetime | None = None,
resource_type: status_models.ResourceType | None = None,
resource_type: status_models.ResourceTypeValue | None = None,
current_status: status_models.Status | None = None,
capability: Capability | None = None,
site_id: str | None = None,
Expand Down
24 changes: 8 additions & 16 deletions app/routers/status/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ...request_context import get_url_prefix
from ...types.base import NamedObject
from ...types.scalars import ResourceType, ResourceTypeValue, urn_has_complete_prefix, validate_doe_iri_urn


class Status(enum.Enum):
Expand All @@ -16,18 +17,8 @@ class Status(enum.Enum):
unknown = "unknown"


class ResourceType(enum.Enum):
"""Represents the type of a resource."""
website = "website"
service = "service"
compute = "compute"
system = "system"
storage = "storage"
network = "network"
unknown = "unknown"


class Endpoint(enum.Enum):
class Endpoint(str, enum.Enum):
"""Router endpoint a resource supports (used internally to route compute/filesystem requests)."""
compute = "compute"
filesystem = "filesystem"

Expand All @@ -42,7 +33,7 @@ def _self_path(self) -> str:
capability_ids: list[str] = Field(default_factory=list, exclude=True)
group: str|None = Field(default=None, description="Logical grouping of the resource", example="frontend")
current_status: Status|None = Field(default=None, description="The current status comes from the status of the last event for this resource", example="up")
resource_type: ResourceType = Field(..., description="Type of the resource", example="service")
resource_type: ResourceTypeValue = Field(..., description="DOE IRI URN for the resource type", example=ResourceType.service)
supported_endpoints: list[Endpoint] = Field(default_factory=list, description="a list of endpoints where this resource can be used")

@computed_field(description="URI of the site where this resource is located")
Expand All @@ -63,9 +54,10 @@ def find(cls, items, name=None, description=None, modified_since=None, group=Non
if group:
items = [item for item in items if item.group == group]
if resource_type:
if isinstance(resource_type, str):
resource_type = ResourceType(resource_type)
items = [item for item in items if item.resource_type == resource_type]
# resource_type may be a ResourceType enum (which is a str subclass) or a raw URN string.
# Do not call str() on a str(Enum) — it returns the repr, not the value.
rt_urn = validate_doe_iri_urn(resource_type.value if hasattr(resource_type, "value") else resource_type)
items = [item for item in items if urn_has_complete_prefix(rt_urn, item.resource_type)]
if current_status:
items = [item for item in items if item.current_status == current_status]
if capability:
Expand Down
12 changes: 9 additions & 3 deletions app/routers/status/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi import Depends, HTTPException, Query, Request

from ...types.http import forbidExtraQueryParams
from ...types.scalars import AllocationUnit, StrictDateTime
from ...types.scalars import AllocationUnitValue, StrictDateTime, DOE_IRI_URN_MIN_LENGTH, DOE_IRI_URN_SCHEMA_PATTERN
from .. import iri_router
from ..error_handlers import DEFAULT_RESPONSES
from ..iri_meta import iri_meta_dict
Expand Down Expand Up @@ -33,9 +33,15 @@ async def get_resources(
offset: int = Query(default=0, ge=0),
limit: int = Query(default=100, ge=0, le=1000),
modified_since: StrictDateTime = Query(default=None),
resource_type: models.ResourceType = Query(default=None),
resource_type: models.ResourceTypeValue = Query(
default=None,
min_length=DOE_IRI_URN_MIN_LENGTH,
pattern=DOE_IRI_URN_SCHEMA_PATTERN,
description="DOE IRI resource type URN (urn:doe-iri:<domain>:<nss>). Facility-local extensions accepted.",
examples=[models.ResourceType.compute, models.ResourceType.storage, models.ResourceType.service],
),
current_status: models.Status = Query(default=None),
capability: List[AllocationUnit] = Query(default=None, min_length=1),
capability: List[AllocationUnitValue] = Query(default=None, min_length=1),
_forbid=Depends(forbidExtraQueryParams("name", "description", "group", "offset", "limit", "modified_since", "resource_type", "current_status", "capability", multiParams={"capability"})),
) -> list[models.Resource]:
return await router.adapter.get_resources(
Expand Down
4 changes: 2 additions & 2 deletions app/types/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydantic import Field

from .base import NamedObject
from .scalars import AllocationUnit, StrictDateTime
from .scalars import AllocationUnit, AllocationUnitValue, StrictDateTime


class Capability(NamedObject):
Expand All @@ -20,4 +20,4 @@ def _self_path(self) -> str:

last_modified: StrictDateTime|None = Field(default=None, description="ISO 8601 timestamp when this object was last modified.", example="2026-02-21T12:00:00Z")

units: list[AllocationUnit] = Field(..., description="Allocation units supported by this capability", example=["node_hours"])
units: list[AllocationUnitValue] = Field(..., description="Allocation units supported by this capability", example=[AllocationUnit.node_hours])
144 changes: 137 additions & 7 deletions app/types/scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

# pylint: disable=unused-argument
import datetime
import enum
import re
from enum import Enum
from typing import Annotated

from pydantic import BeforeValidator, WithJsonSchema
from pydantic_core import core_schema


Expand Down Expand Up @@ -83,10 +86,137 @@ def __get_pydantic_json_schema__(cls, schema, handler):


# -----------------------------------------------------------------------
# AllocationUnit: an enum for allocation units
class AllocationUnit(enum.Enum):
"""Units for allocation"""
# DOE IRI URN validation

node_hours = "node_hours"
bytes = "bytes"
inodes = "inodes"
DOE_IRI_URN_PREFIX = "urn:doe-iri:"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why all this complexity? How about just new enum(str)-s of the urls?

_DOMAIN = r"[A-Za-z0-9][A-Za-z0-9-]{0,31}"
_SEGMENT_CHAR = r"(?:[A-Za-z0-9._~-]|%[0-9A-Fa-f]{2}|[!$&'()*+,;=@]|/)"
_DOMAIN_SPECIFIC_SEGMENT = rf"{_SEGMENT_CHAR}+"
_DOMAIN_SPECIFIC_STRING = rf"{_DOMAIN_SPECIFIC_SEGMENT}(?::{_DOMAIN_SPECIFIC_SEGMENT})*"
DOE_IRI_URN_PATTERN = re.compile(rf"^{DOE_IRI_URN_PREFIX}(?P<domain>{_DOMAIN}):(?P<nss>{_DOMAIN_SPECIFIC_STRING})$")
# General URN pattern and minimum length — use these for query parameters that accept any domain.
DOE_IRI_URN_SCHEMA_PATTERN = rf"^{DOE_IRI_URN_PREFIX}{_DOMAIN}:{_DOMAIN_SPECIFIC_STRING}$"
DOE_IRI_URN_MIN_LENGTH = len(DOE_IRI_URN_PREFIX) + 1 + 1 + 1 # prefix + 1 domain char + colon + 1 nss char


def validate_doe_iri_urn(value: str) -> str:
"""Validate a DOE IRI URN string. Raises ValueError on failure."""
if not isinstance(value, str) or not value.strip():
raise ValueError("Invalid DOE IRI URN. Expected a non-empty string.")
candidate = value.strip()
if not DOE_IRI_URN_PATTERN.fullmatch(candidate):
raise ValueError("Invalid DOE IRI URN. Expected format urn:doe-iri:<domain>:<domain-specific-string>.")
return candidate


def _validate_urn_domain(value: str, domain: str, label: str) -> str:
"""Validate a DOE IRI URN and enforce that its domain matches the expected value."""
urn = validate_doe_iri_urn(value)
actual_domain = urn.split(":", 3)[2]
if actual_domain != domain:
raise ValueError(f"Invalid {label}. Expected domain '{domain}', got '{actual_domain}'.")
return urn


def doe_iri_domain_urn_schema_pattern(domain: str) -> str:
"""Return the JSON schema pattern for DOE IRI URNs in one domain."""
return rf"^{DOE_IRI_URN_PREFIX}{domain}:{_DOMAIN_SPECIFIC_STRING}$"


def doe_iri_domain_urn_min_length(domain: str) -> int:
"""Return the minimum length for DOE IRI URNs in one domain."""
return len(f"{DOE_IRI_URN_PREFIX}{domain}:") + 1


def _domain_urn_schema(domain: str, description: str, examples: list[str]) -> dict[str, object]:
return {
"type": "string",
"minLength": doe_iri_domain_urn_min_length(domain),
"pattern": doe_iri_domain_urn_schema_pattern(domain),
"description": description,
"examples": examples,
}


def urn_has_complete_prefix(parent_urn: str, candidate_urn: str) -> bool:
"""Return True when parent_urn is an exact or parent segment match of candidate_urn."""
parent_segments = validate_doe_iri_urn(parent_urn).split(":")
candidate_segments = validate_doe_iri_urn(candidate_urn).split(":")
if len(parent_segments) > len(candidate_segments):
return False
return candidate_segments[: len(parent_segments)] == parent_segments


# -----------------------------------------------------------------------
# Canonical enum types


class ResourceType(str, Enum):
"""Canonical DOE IRI resource type URNs (spec §3.1).

Note: `service` lives in the `service` domain per spec, not `resource`.
ResourceTypeValue accepts any valid DOE IRI URN to allow facility extensions.
"""
website = "urn:doe-iri:resource:website"
service = "urn:doe-iri:service:generic"
compute = "urn:doe-iri:resource:compute"
system = "urn:doe-iri:resource:system"
storage = "urn:doe-iri:resource:storage"
network = "urn:doe-iri:resource:network"
unknown = "urn:doe-iri:resource:unknown"


class AllocationUnit(str, Enum):
"""Canonical DOE IRI allocation-unit URNs (spec §3.2)."""
node_hours = "urn:doe-iri:allocation:compute:node-hours"
bytes = "urn:doe-iri:allocation:storage:bytes"
inodes = "urn:doe-iri:allocation:storage:inodes"


class CompressionType(str, Enum):
"""Canonical DOE IRI compression URNs (spec §3.3)."""
none = "urn:doe-iri:compression:none"
bzip2 = "urn:doe-iri:compression:bzip2"
gzip = "urn:doe-iri:compression:gzip"
xz = "urn:doe-iri:compression:xz"


# -----------------------------------------------------------------------
# Pydantic annotated field types

# ResourceTypeValue accepts any valid DOE IRI URN.
# No domain constraint: `service` lives in the `service` domain (spec §3.1),
# and facilities may use their own domains for local extensions (spec §5).
ResourceTypeValue = Annotated[
str,
BeforeValidator(validate_doe_iri_urn),
WithJsonSchema({
"type": "string",
"description": "DOE IRI resource type URN (urn:doe-iri:<domain>:<nss>). Facility-local extensions accepted.",
"examples": [ResourceType.compute, ResourceType.storage, ResourceType.service],
}),
]

AllocationUnitValue = Annotated[
str,
BeforeValidator(lambda v: _validate_urn_domain(v, "allocation", "allocation unit")),
WithJsonSchema(
_domain_urn_schema(
"allocation",
"DOE IRI allocation-unit URN.",
[AllocationUnit.node_hours, AllocationUnit.bytes],
)
),
]

CompressionTypeValue = Annotated[
str,
BeforeValidator(lambda v: _validate_urn_domain(v, "compression", "compression type")),
WithJsonSchema(
_domain_urn_schema(
"compression",
"DOE IRI compression URN.",
[CompressionType.gzip, CompressionType.none],
)
),
]
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ dependencies = [
"globus-sdk>=4.3.1",
"typer>=0.24.1",
]
[project.optional-dependencies]
dev = ["pytest>=9"]

[tool.ruff]
line-length = 200
exclude = [".venv", "__pycache__", "build", "dist"]
Loading
Loading