From e8643cf642f24a5ec4a2c48b6a0ed7ee5cc60d69 Mon Sep 17 00:00:00 2001 From: Johannes Terblanche Date: Fri, 17 Apr 2026 15:57:54 +0200 Subject: [PATCH] Improve DFG.Packed packing docs --- src/Serialization/DFGStructStyles.jl | 38 ++++++ .../DistributionSerialization.jl | 17 ++- src/Serialization/PackedSerialization.jl | 118 +++++++++++++++++- src/entities/Blobprovider.jl | 13 +- 4 files changed, 179 insertions(+), 7 deletions(-) diff --git a/src/Serialization/DFGStructStyles.jl b/src/Serialization/DFGStructStyles.jl index d1532fda..00da8c3a 100644 --- a/src/Serialization/DFGStructStyles.jl +++ b/src/Serialization/DFGStructStyles.jl @@ -1,3 +1,41 @@ +""" + DFGJSONStyle <: JSON.JSONStyle + +Custom JSON serialization style used throughout DFG for `JSON.json` / `JSON.parse`. + +This style adds handling for types that don't round-trip through plain JSON: +`Complex`, `SArray`, `ArrayPartition`, `RefValue{Int}`, `TimeDateZone`, etc. + +# Polymorphic abstract types — `pack`/`unpack` + +Abstract types like `AbstractBlobprovider` and `AbstractObservation` are serialized through the +[`Packed`](@ref) envelope. Each concrete subtype can define a lightweight +"packed" companion struct and overload [`pack`](@ref) / [`unpack`](@ref). +The default `pack(x) = x` works for structs whose fields are all plain data. + +When a type contains **non-serializable fields** (clients, connections, caches), +define a packed companion: + +```julia +struct PackedMytype + label::Symbol +end +DFG.pack(s::Mytype) = PackedMytype(s.label) +DFG.unpack(p::PackedMytype) = Mytype(reconnect_client(), p.label) +``` + +JSON emitted with `style = DFGJSONStyle()` embeds a `"type"` header +so the deserializer can locate the packed type and call `unpack`. + +# Usage + +```julia +json_str = JSON.json(value; style = DFGJSONStyle()) +value = JSON.parse(json_str, T; style = DFGJSONStyle()) +``` + +See also: [`pack`](@ref), [`unpack`](@ref), [`Packed`](@ref), [`@packed`](@ref) +""" struct DFGJSONStyle <: JSON.JSONStyle end # Base.RefValue{Int} serialization diff --git a/src/Serialization/DistributionSerialization.jl b/src/Serialization/DistributionSerialization.jl index c385bf7b..a866b0be 100644 --- a/src/Serialization/DistributionSerialization.jl +++ b/src/Serialization/DistributionSerialization.jl @@ -1,7 +1,18 @@ ## ================================================================================ -## there are 2 ways of dealing with types that don't pack out of the box -## 1) define pack and unpack methods for them. -## 2) use StructUtils.jl with a custom StructStyle. +## Types that don't serialize cleanly through StructUtils can use pack/unpack. +## +## Pattern: +## 1. Define a "Packed" companion struct with only serializable fields. +## 2. Overload `DFG.pack(x::OriginalType) -> PackedType`. +## 3. Overload `DFG.unpack(p::PackedType) -> OriginalType`. +## +## The Packed envelope + DFGJSONStyle handles the rest automatically: +## serialization calls pack(), embeds TypeMetadata, and deserialization +## resolves the packed type, builds it via StructUtils.make, then calls unpack(). +## +## Alternatively, for scalar-like types, use StructUtils.lower/lift directly +## with a custom StructStyle (see DFGStructStyles.jl for examples). +## ================================================================================ ## 1) Overloads of Distributions.jl types not packing out of the box # TODO make Distributions extension or move to IncrementalInferenceTypes as not to have Distributions.jl as a dependency of DFG diff --git a/src/Serialization/PackedSerialization.jl b/src/Serialization/PackedSerialization.jl index f51478c8..f835710b 100644 --- a/src/Serialization/PackedSerialization.jl +++ b/src/Serialization/PackedSerialization.jl @@ -1,10 +1,89 @@ +""" + pack(x) -> packed_x + +Convert `x` into a serialization-friendly "packed" form. + +The default method is the identity (`pack(x) = x`), meaning types whose fields +are all plain data (numbers, strings, arrays, nested structs of the same) need +no special treatment — they serialize as-is through `StructUtils.jl`. + +Overload `pack` when a type contains **non-serializable fields** such as network +connections, open file handles, live caches, or opaque foreign objects. +The pattern is: + +1. Define a "packed" companion struct holding only the serializable subset. +2. Overload `pack` to project the live type onto the packed struct. +3. Overload [`unpack`](@ref) to reconstruct the live type from the packed struct. + +The `Packed` wrapper calls `pack` automatically during serialization, embedding a +`TypeMetadata` header so the deserializer can locate the packed type. + +# Example + +```julia +# Live type — `client` is an HTTP connection, not serializable +struct MyRemoteStore <: DFG.AbstractBlobprovider + client::HTTPClient + label::Symbol +end + +# Packed form — only the serializable fields +struct PackedMyRemoteStore + label::Symbol +end + +DFG.pack(s::MyRemoteStore) = PackedMyRemoteStore(s.label) +DFG.unpack(p::PackedMyRemoteStore) = MyRemoteStore(find_active_client(), p.label) +``` + +The JSON output will contain `"type": {"pkg": "MyPkg", "name": "PackedMyRemoteStore", ...}` +so the deserializer resolves the packed type, calls `StructUtils.make(PackedMyRemoteStore, json)`, +and then calls `unpack(::PackedMyRemoteStore)` to produce the live `MyRemoteStore`. + +See also: [`unpack`](@ref), [`Packed`](@ref), [`TypeMetadata`](@ref), [`DFGJSONStyle`](@ref) +""" function pack end + +""" + unpack(packed_x) -> x + +Reconstruct a live object from its packed (serialization-safe) form. + +The default method is the identity (`unpack(x) = x`). Overload it for each +packed companion type created alongside a [`pack`](@ref) overload. + +During deserialization the pipeline is: + + JSON bytes → StructUtils.make(PackedT, data) → unpack(::PackedT) → live T + +`unpack` is the place to reconnect non-serializable resources (clients, +file handles, caches) that were stripped by `pack`. + +# Example + +```julia +DFG.unpack(p::PackedMyRemoteStore) = MyRemoteStore(find_active_client(), p.label) +``` + +See also: [`pack`](@ref), [`Packed`](@ref) +""" function unpack end version(::Type{T}) where {T} = pkgversion(parentmodule(T)) # version(node) = node.version -# Type for storing packed type information +""" + TypeMetadata(pkg, name, version) + TypeMetadata(::Type{T}) + +Metadata header embedded in every [`Packed`](@ref) envelope. + +Stores the top-level package name, the struct name, and the package version so +the deserializer can resolve the correct concrete type at load time via +[`resolvePackedType`](@ref). + +Generally you never construct this manually — `Packed(x)` does it for you. +""" struct TypeMetadata pkg::Symbol #TODO use PkgId, maybe best to use flat structure with optional uuid, something like pkg[_name], pkg_uuid::Union{Nothing, UUID} name::Symbol @@ -15,6 +94,27 @@ function TypeMetadata(::Type{T}) where {T} return TypeMetadata(fullname(parentmodule(T))[1], nameof(T), version(T)) end +""" + Packed{T}(type::TypeMetadata, packed::T) + Packed(x) + +Serialization envelope that pairs a value with its [`TypeMetadata`](@ref). + +Calling `Packed(x)` runs the full pipeline: + +1. `packed_x = pack(x)` — convert `x` to its serialization-safe form. +2. `TypeMetadata(typeof(packed_x))` — snapshot the packed type's identity. +3. Bundle both into `Packed{typeof(packed_x)}(metadata, packed_x)`. + +During **serialization** (`lower`): the packed struct's fields are flattened +into an `OrderedDict` alongside a `:type` key holding the metadata. + +During **deserialization** (`lift`): `resolvePackedType` reads the `:type` key +to identify the packed type, `StructUtils.make` builds it, and [`unpack`](@ref) +reconstructs the original live type. + +See also: [`pack`](@ref), [`unpack`](@ref), [`TypeMetadata`](@ref), [`resolvePackedType`](@ref) +""" StructUtils.@nonstruct struct Packed{T} type::TypeMetadata packed::T @@ -36,6 +136,11 @@ function StructUtils.lift(::Type{<:Packed{T}}, x) where {T} return r end +""" + pack_lower(x) + +Convenience: `pack` then `lower` in one call. Equivalent to `StructUtils.lower(Packed(x))`. +""" function pack_lower(x) px = Packed(x) d = StructUtils.make(OrderedDict{Symbol, Any}, px.packed) @@ -47,7 +152,16 @@ unpack(x) = x unpack(p::Packed) = unpack(p.packed) pack(x) = x -#TODO add overwriteable layer +#TODO add overwriteable layer to resolvePackedType +""" + resolvePackedType(json_obj) -> Type{Packed{T}} + +Read the `type` metadata from a JSON object and resolve the corresponding +`Packed{T}` type. Called automatically by the `@choosetype` machinery during +deserialization of abstract types (e.g. `AbstractBlobprovider`, `Packed`). + +The resolver requires the owning module to be loaded in `Main`. +""" function resolvePackedType(lazyobj::JSON.LazyValue) type = JSON.parse(lazyobj.type) # TODO we can use Base.PkgId to not require modules to be available in Main diff --git a/src/entities/Blobprovider.jl b/src/entities/Blobprovider.jl index f3081c48..a3df800f 100644 --- a/src/entities/Blobprovider.jl +++ b/src/entities/Blobprovider.jl @@ -55,7 +55,16 @@ listBlobproviders(dfg) # lists all mounted labels abstract type AbstractBlobprovider end const Blobprovider = AbstractBlobprovider -function StructUtils.lower(::StructUtils.StructStyle, store::AbstractBlobprovider) - return StructUtils.lower(Packed(store)) +# --- Polymorphic serialization via Packed envelope --- +# Any AbstractBlobprovider is serialized through Packed(provider), which: +# 1. Calls pack(provider) — returns a serialization-safe packed struct +# (identity by default; override for types with non-serializable fields). +# 2. Wraps in Packed{PackedT}(TypeMetadata, packed_provider). +# 3. Flattens to JSON with a "type" header for deserialization dispatch. +# On deserialization, resolvePackedType reads the header, make() builds the +# packed struct, and unpack() reconstructs the live type. +# See: pack, unpack, Packed, DFGJSONStyle +function StructUtils.lower(::StructUtils.StructStyle, provider::AbstractBlobprovider) + return StructUtils.lower(Packed(provider)) end @choosetype AbstractBlobprovider resolvePackedType