diff --git a/Cargo.lock b/Cargo.lock index c0117cfb..84f97339 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,18 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + [[package]] name = "anyhow" version = "1.0.100" @@ -119,6 +131,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "bitstream-io" +version = "4.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757" +dependencies = [ + "core2", +] + [[package]] name = "block-buffer" version = "0.9.0" @@ -176,6 +197,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.48" @@ -229,6 +256,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -240,6 +294,31 @@ dependencies = [ "zeroize", ] +[[package]] +name = "clap" +version = "4.5.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + [[package]] name = "config" version = "0.15.19" @@ -311,6 +390,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -329,6 +417,41 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crunchy" version = "0.2.4" @@ -702,7 +825,7 @@ dependencies = [ "fastly-shared", "fastly-sys", "http", - "itertools", + "itertools 0.13.0", "lazy_static", "mime", "serde", @@ -956,6 +1079,17 @@ dependencies = [ "subtle", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "handlebars" version = "6.4.0" @@ -1007,6 +1141,18 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1032,6 +1178,38 @@ dependencies = [ "itoa", ] +[[package]] +name = "iab_gpp" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3be2d0191a3376e0176bb3df53b2754c644ead6edd50d9494ee8fa376a70e02" +dependencies = [ + "bitstream-io", + "fnv", + "iab_gpp_derive", + "num-derive", + "num-iter", + "num-traits", + "prettyplease", + "proc-macro2", + "quote", + "strum_macros", + "syn 2.0.111", + "thiserror 2.0.17", + "walkdir", +] + +[[package]] +name = "iab_gpp_derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5acda598b043c6386d20fffe86c600b63c7ca4980ee9a28f7e9aaa15d749747" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -1183,6 +1361,26 @@ dependencies = [ "generic-array", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -1389,6 +1587,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -1440,6 +1649,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -1634,6 +1849,34 @@ dependencies = [ "spki", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "poly1305" version = "0.8.0" @@ -1675,6 +1918,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.111", +] + [[package]] name = "primeorder" version = "0.13.6" @@ -1882,6 +2135,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2114,6 +2376,18 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "subtle" version = "2.6.1" @@ -2252,6 +2526,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.49.0" @@ -2347,6 +2631,7 @@ dependencies = [ "chrono", "config", "cookie", + "criterion", "derive_more", "ed25519-dalek", "error-stack", @@ -2357,6 +2642,7 @@ dependencies = [ "hex", "hmac", "http", + "iab_gpp", "jose-jwk", "log", "log-fastly", @@ -2524,6 +2810,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2584,6 +2880,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "which" version = "8.0.0" @@ -2595,6 +2901,15 @@ dependencies = [ "winsafe", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index ba3faccd..a88e287e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ handlebars = "6.4.0" hex = "0.4.3" hmac = "0.12.1" http = "1.4.0" +iab_gpp = "0.1" jose-jwk = "0.1.2" log = "0.4.28" log-fastly = "0.11.12" @@ -79,3 +80,4 @@ urlencoding = "2.1" uuid = { version = "1.18", features = ["v4"] } validator = { version = "0.20", features = ["derive"] } which = "8" +criterion = { version = "0.5", default-features = false, features = ["plotters", "cargo_bench_support"] } diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index c360474e..b15f16bb 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -29,6 +29,7 @@ handlebars = { workspace = true } hex = { workspace = true } hmac = { workspace = true } http = { workspace = true } +iab_gpp = { workspace = true } jose-jwk = { workspace = true } log = { workspace = true } rand = { workspace = true } @@ -67,5 +68,10 @@ validator = { workspace = true } default = [] [dev-dependencies] +criterion = { workspace = true } temp-env = { workspace = true } tokio-test = { workspace = true } + +[[bench]] +name = "consent_decode" +harness = false diff --git a/crates/common/benches/consent_decode.rs b/crates/common/benches/consent_decode.rs new file mode 100644 index 00000000..32550a3c --- /dev/null +++ b/crates/common/benches/consent_decode.rs @@ -0,0 +1,236 @@ +//! Benchmarks for the consent decoding pipeline. +//! +//! Measures the computational cost of decoding consent signals (TCF v2, GPP, +//! US Privacy) to determine whether wiring decoding into the auction hot path +//! introduces unacceptable latency. +//! +//! Run with: `cargo bench -p trusted-server-common` + +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use base64::Engine as _; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; + +use trusted_server_common::consent::tcf::decode_tc_string; +use trusted_server_common::consent::types::RawConsentSignals; +use trusted_server_common::consent::us_privacy::decode_us_privacy; +use trusted_server_common::consent::{build_context_from_signals, gpp}; + +// --------------------------------------------------------------------------- +// Test data +// --------------------------------------------------------------------------- + +/// Known-good GPP string with US Privacy section only (section ID 6). +const GPP_USP_ONLY: &str = "DBABTA~1YNN"; + +/// GPP string with both TCF EU v2 and US Privacy sections. +const GPP_TCF_AND_USP: &str = "DBACNY~CPXxRfAPXxRfAAfKABENB-CgAAAAAAAAAAYgAAAAAAAA~1YNN"; + +/// Builds a minimal TC String v2 byte buffer for benchmarking. +/// +/// This duplicates the test helper from `tcf.rs` since `#[cfg(test)]` helpers +/// are not available in bench targets. +fn build_tc_bytes(vendor_count: u16, use_range_encoding: bool) -> Vec { + let total_bits = if use_range_encoding { + // Core fields (213) + maxVendorId (16) + isRange (1) + numEntries (12) + // + one range entry per vendor group: isRange(1) + start(16) + end(16) + // We'll encode as one big range: vendors 1..=vendor_count + 213 + 17 + 12 + 1 + 32 + } else { + // Bitfield: core fields + maxVendorId + isRange + one bit per vendor + 213 + 17 + usize::from(vendor_count) + }; + let total_bytes = total_bits.div_ceil(8); + let mut buf = vec![0u8; total_bytes]; + + // Version (6 bits) = 2 + write_bits(&mut buf, 0, 6, 2); + // Created (36 bits) = 100000 (arbitrary) + write_bits(&mut buf, 6, 36, 100_000); + // LastUpdated (36 bits) = 200000 + write_bits(&mut buf, 42, 36, 200_000); + // CmpId (12 bits) = 7 + write_bits(&mut buf, 78, 12, 7); + // CmpVersion (12 bits) = 1 + write_bits(&mut buf, 90, 12, 1); + // ConsentScreen (6 bits) = 1 + write_bits(&mut buf, 102, 6, 1); + // ConsentLanguage (12 bits) = EN + write_bits(&mut buf, 108, 6, u64::from(b'E' - b'A')); + write_bits(&mut buf, 114, 6, u64::from(b'N' - b'A')); + // VendorListVersion (12 bits) = 42 + write_bits(&mut buf, 120, 12, 42); + // TcfPolicyVersion (6 bits) = 2 + write_bits(&mut buf, 132, 6, 2); + // IsServiceSpecific (1) = 0, UseNonStandardTexts (1) = 0 + // SpecialFeatureOptIns (12) = 0b000000000011 (features 11, 12) + write_bits(&mut buf, 140, 12, 0b0000_0000_0011); + // PurposesConsent (24) = purposes 1-4 consented + write_bits(&mut buf, 152, 24, 0b1111_0000_0000_0000_0000_0000); + // PurposesLITransparency (24) = purposes 1-2 + write_bits(&mut buf, 176, 24, 0b1100_0000_0000_0000_0000_0000); + // PurposeOneTreatment (1) = 0 + // PublisherCC (12) = EN + write_bits(&mut buf, 201, 6, u64::from(b'E' - b'A')); + write_bits(&mut buf, 207, 6, u64::from(b'N' - b'A')); + + // MaxVendorConsentId (16) + write_bits(&mut buf, 213, 16, u64::from(vendor_count)); + + if use_range_encoding { + // IsRangeEncoding (1) = 1 + write_bit(&mut buf, 229, true); + // NumEntries (12) = 1 (one range covering all vendors) + write_bits(&mut buf, 230, 12, 1); + // Entry: IsRangeEntry (1) = 1 + write_bit(&mut buf, 242, true); + // StartVendorId (16) = 1 + write_bits(&mut buf, 243, 16, 1); + // EndVendorId (16) = vendor_count + write_bits(&mut buf, 259, 16, u64::from(vendor_count)); + } else { + // IsRangeEncoding (1) = 0 (bitfield) + write_bit(&mut buf, 229, false); + // Set every other vendor as consented (realistic pattern) + for i in 0..usize::from(vendor_count) { + if i % 2 == 0 { + write_bit(&mut buf, 230 + i, true); + } + } + } + + buf +} + +fn write_bit(buf: &mut [u8], bit_offset: usize, value: bool) { + if value { + let byte_idx = bit_offset / 8; + let bit_idx = 7 - (bit_offset % 8); + if byte_idx < buf.len() { + buf[byte_idx] |= 1 << bit_idx; + } + } +} + +fn write_bits(buf: &mut [u8], bit_offset: usize, num_bits: usize, value: u64) { + for i in 0..num_bits { + let bit = (value >> (num_bits - 1 - i)) & 1 == 1; + write_bit(buf, bit_offset + i, bit); + } +} + +fn encode_tc_string(vendor_count: u16, use_range: bool) -> String { + let bytes = build_tc_bytes(vendor_count, use_range); + URL_SAFE_NO_PAD.encode(&bytes) +} + +// --------------------------------------------------------------------------- +// Benchmarks +// --------------------------------------------------------------------------- + +fn bench_us_privacy(c: &mut Criterion) { + c.bench_function("us_privacy_decode", |b| { + b.iter(|| decode_us_privacy(black_box("1YNN"))); + }); +} + +fn bench_tcf_decode(c: &mut Criterion) { + let small_tc = encode_tc_string(10, false); + let medium_tc = encode_tc_string(100, false); + let large_tc_bitfield = encode_tc_string(500, false); + let large_tc_range = encode_tc_string(500, true); + + let mut group = c.benchmark_group("tcf_decode"); + + group.bench_with_input( + BenchmarkId::new("bitfield", "10_vendors"), + &small_tc, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.bench_with_input( + BenchmarkId::new("bitfield", "100_vendors"), + &medium_tc, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.bench_with_input( + BenchmarkId::new("bitfield", "500_vendors"), + &large_tc_bitfield, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.bench_with_input( + BenchmarkId::new("range", "500_vendors"), + &large_tc_range, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.finish(); +} + +fn bench_gpp_decode(c: &mut Criterion) { + let mut group = c.benchmark_group("gpp_decode"); + + group.bench_function("usp_only", |b| { + b.iter(|| gpp::decode_gpp_string(black_box(GPP_USP_ONLY))); + }); + + group.bench_function("with_tcf", |b| { + b.iter(|| gpp::decode_gpp_string(black_box(GPP_TCF_AND_USP))); + }); + + group.finish(); +} + +fn bench_full_pipeline(c: &mut Criterion) { + // Build a realistic TC string (500 vendors, range encoding) + let tc_string = encode_tc_string(500, true); + + let all_signals = RawConsentSignals { + raw_tc_string: Some(tc_string), + raw_gpp_string: Some(GPP_USP_ONLY.to_owned()), + raw_gpp_sid: Some("6".to_owned()), + raw_us_privacy: Some("1YNN".to_owned()), + gpc: true, + }; + + let empty_signals = RawConsentSignals::default(); + + let tc_only = RawConsentSignals { + raw_tc_string: Some(encode_tc_string(500, true)), + ..Default::default() + }; + + let mut group = c.benchmark_group("full_pipeline"); + + group.bench_function("all_signals", |b| { + b.iter(|| build_context_from_signals(black_box(&all_signals))); + }); + + group.bench_function("empty_signals", |b| { + b.iter(|| build_context_from_signals(black_box(&empty_signals))); + }); + + group.bench_function("tcf_only", |b| { + b.iter(|| build_context_from_signals(black_box(&tc_only))); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_us_privacy, + bench_tcf_decode, + bench_gpp_decode, + bench_full_pipeline, +); +criterion_main!(benches); diff --git a/crates/common/build.rs b/crates/common/build.rs index cb1e60ae..1c478a53 100644 --- a/crates/common/build.rs +++ b/crates/common/build.rs @@ -1,4 +1,4 @@ -#![allow(clippy::unwrap_used, clippy::panic)] +#![allow(clippy::unwrap_used, clippy::panic, dead_code)] #[path = "src/error.rs"] mod error; @@ -6,6 +6,9 @@ mod error; #[path = "src/auction_config_types.rs"] mod auction_config_types; +#[path = "src/consent_config.rs"] +mod consent_config; + #[path = "src/settings.rs"] mod settings; diff --git a/crates/common/src/auction/endpoints.rs b/crates/common/src/auction/endpoints.rs index c2175caf..c155181e 100644 --- a/crates/common/src/auction/endpoints.rs +++ b/crates/common/src/auction/endpoints.rs @@ -4,7 +4,10 @@ use error_stack::{Report, ResultExt}; use fastly::{Request, Response}; use crate::auction::formats::AdRequest; +use crate::consent; +use crate::cookies::handle_request_cookies; use crate::error::TrustedServerError; +use crate::geo::GeoInfo; use crate::settings::Settings; use super::formats::{convert_to_openrtb_response, convert_tsjs_to_auction_request}; @@ -41,8 +44,19 @@ pub async fn handle_auction( body.ad_units.len() ); + // Extract consent from request cookies, headers, and geo. + let cookie_jar = handle_request_cookies(&req)?; + let geo = GeoInfo::from_request(&req); + let consent_context = consent::build_consent_context(&consent::ConsentPipelineInput { + jar: cookie_jar.as_ref(), + req: &req, + config: &settings.consent, + geo: geo.as_ref(), + synthetic_id: None, // Auction requests don't carry a Synthetic ID yet. + }); + // Convert tsjs request format to auction request - let auction_request = convert_tsjs_to_auction_request(&body, settings, &req)?; + let auction_request = convert_tsjs_to_auction_request(&body, settings, &req, consent_context)?; // Create auction context let context = AuctionContext { diff --git a/crates/common/src/auction/formats.rs b/crates/common/src/auction/formats.rs index 71804e26..40e48746 100644 --- a/crates/common/src/auction/formats.rs +++ b/crates/common/src/auction/formats.rs @@ -13,6 +13,7 @@ use std::collections::HashMap; use uuid::Uuid; use crate::auction::types::OrchestratorExt; +use crate::consent::ConsentContext; use crate::creative; use crate::error::TrustedServerError; use crate::geo::GeoInfo; @@ -63,7 +64,12 @@ pub struct BannerUnit { pub sizes: Vec>, } -/// Convert tsjs/Prebid.js request format to internal `AuctionRequest`. +/// Convert tsjs/Prebid.js request format to internal [`AuctionRequest`]. +/// +/// The `consent` parameter carries decoded consent signals extracted from the +/// incoming request's cookies and headers. It is populated by the caller +/// (the `/auction` endpoint handler) and forwarded through to the +/// [`OpenRTB`][`crate::openrtb::OpenRtbRequest`] bid request. /// /// # Errors /// @@ -74,6 +80,7 @@ pub fn convert_tsjs_to_auction_request( body: &AdRequest, settings: &Settings, req: &Request, + consent: ConsentContext, ) -> Result> { // Generate synthetic ID let synthetic_id = get_or_generate_synthetic_id(settings, req).change_context( @@ -145,7 +152,7 @@ pub fn convert_tsjs_to_auction_request( user: UserInfo { id: synthetic_id, fresh_id, - consent: None, + consent: Some(consent), }, device, site: Some(SiteInfo { diff --git a/crates/common/src/auction/types.rs b/crates/common/src/auction/types.rs index 6c6c4d63..a6abab2d 100644 --- a/crates/common/src/auction/types.rs +++ b/crates/common/src/auction/types.rs @@ -72,8 +72,14 @@ pub struct UserInfo { pub id: String, /// Fresh ID for this session pub fresh_id: String, - /// GDPR consent string if applicable - pub consent: Option, + /// Decoded consent context for this request. + /// + /// Carries both raw consent strings (for `OpenRTB` forwarding) and decoded + /// structured data (for TS-level enforcement and observability). + /// Skipped during serde since it is populated at runtime from request + /// cookies/headers, not from stored data. + #[serde(skip)] + pub consent: Option, } /// Device information from request. diff --git a/crates/common/src/consent/extraction.rs b/crates/common/src/consent/extraction.rs new file mode 100644 index 00000000..d5b420bf --- /dev/null +++ b/crates/common/src/consent/extraction.rs @@ -0,0 +1,180 @@ +//! Consent signal extraction from cookies and headers. +//! +//! Reads raw consent strings from the [`CookieJar`] and HTTP headers without +//! performing any decoding or validation. This is the first step in the consent +//! pipeline described in the [Consent Forwarding Architecture Design]. + +use cookie::CookieJar; +use fastly::Request; + +use crate::constants::{ + COOKIE_EUCONSENT_V2, COOKIE_GPP, COOKIE_GPP_SID, COOKIE_US_PRIVACY, HEADER_SEC_GPC, +}; + +use super::types::RawConsentSignals; + +/// Extracts raw consent signals from a [`CookieJar`] and a [`Request`]. +/// +/// Reads the following consent cookies (if present): +/// - `euconsent-v2` — IAB TCF v2 consent string +/// - `__gpp` — IAB Global Privacy Platform string +/// - `__gpp_sid` — GPP section IDs (comma-separated) +/// - `us_privacy` — IAB US Privacy / CCPA string +/// +/// Also reads the `Sec-GPC` header for Global Privacy Control. +/// +/// No decoding or validation is performed — values are captured as-is. +pub fn extract_consent_signals(jar: Option<&CookieJar>, req: &Request) -> RawConsentSignals { + let raw_tc_string = jar + .and_then(|j| j.get(COOKIE_EUCONSENT_V2)) + .map(|c| c.value().to_owned()); + + let raw_gpp_string = jar + .and_then(|j| j.get(COOKIE_GPP)) + .map(|c| c.value().to_owned()); + + let raw_gpp_sid = jar + .and_then(|j| j.get(COOKIE_GPP_SID)) + .map(|c| c.value().to_owned()); + + let raw_us_privacy = jar + .and_then(|j| j.get(COOKIE_US_PRIVACY)) + .map(|c| c.value().to_owned()); + + let gpc = req + .get_header(HEADER_SEC_GPC) + .and_then(|v| v.to_str().ok()) + .map(|v| v.trim() == "1") + .unwrap_or(false); + + RawConsentSignals { + raw_tc_string, + raw_gpp_string, + raw_gpp_sid, + raw_us_privacy, + gpc, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cookies::parse_cookies_to_jar; + + #[test] + fn no_cookies_no_headers() { + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(None, &req); + assert!(signals.is_empty(), "should produce empty signals"); + } + + #[test] + fn extracts_euconsent_v2() { + let jar = parse_cookies_to_jar("euconsent-v2=CPXxGfAPXxGfAAHABBENBCCsAP_AAH_AAAAAHftf"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert_eq!( + signals.raw_tc_string.as_deref(), + Some("CPXxGfAPXxGfAAHABBENBCCsAP_AAH_AAAAAHftf"), + "should extract euconsent-v2 cookie value" + ); + } + + #[test] + fn extracts_gpp_cookies() { + let jar = parse_cookies_to_jar("__gpp=DBACNYA~CPXxGfA; __gpp_sid=2,6"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert_eq!( + signals.raw_gpp_string.as_deref(), + Some("DBACNYA~CPXxGfA"), + "should extract __gpp cookie value" + ); + assert_eq!( + signals.raw_gpp_sid.as_deref(), + Some("2,6"), + "should extract __gpp_sid cookie value" + ); + } + + #[test] + fn extracts_us_privacy() { + let jar = parse_cookies_to_jar("us_privacy=1YNN"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert_eq!( + signals.raw_us_privacy.as_deref(), + Some("1YNN"), + "should extract us_privacy cookie value" + ); + } + + #[test] + fn extracts_sec_gpc_header() { + let req = Request::get("https://example.com").with_header("sec-gpc", "1"); + let signals = extract_consent_signals(None, &req); + + assert!(signals.gpc, "should detect Sec-GPC: 1 header"); + } + + #[test] + fn sec_gpc_absent_when_not_set() { + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(None, &req); + + assert!(!signals.gpc, "should default gpc to false"); + } + + #[test] + fn sec_gpc_absent_when_not_one() { + let req = Request::get("https://example.com").with_header("sec-gpc", "0"); + let signals = extract_consent_signals(None, &req); + + assert!(!signals.gpc, "should not treat Sec-GPC: 0 as opt-out"); + } + + #[test] + fn extracts_all_signals() { + let jar = + parse_cookies_to_jar("euconsent-v2=CPXxGf; __gpp=DBAC; __gpp_sid=2,6; us_privacy=1YNN"); + let req = Request::get("https://example.com").with_header("sec-gpc", "1"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert!(signals.raw_tc_string.is_some(), "should have tc_string"); + assert!(signals.raw_gpp_string.is_some(), "should have gpp_string"); + assert!(signals.raw_gpp_sid.is_some(), "should have gpp_sid"); + assert!(signals.raw_us_privacy.is_some(), "should have us_privacy"); + assert!(signals.gpc, "should have gpc"); + } + + #[test] + fn empty_jar_produces_no_cookie_signals() { + let jar = parse_cookies_to_jar(""); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert!( + signals.raw_tc_string.is_none(), + "should have no tc_string from empty jar" + ); + assert!( + signals.raw_gpp_string.is_none(), + "should have no gpp_string from empty jar" + ); + } + + #[test] + fn unrelated_cookies_ignored() { + let jar = parse_cookies_to_jar("session_id=abc123; theme=dark"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert!( + signals.is_empty(), + "should produce empty signals for unrelated cookies" + ); + } +} diff --git a/crates/common/src/consent/gpp.rs b/crates/common/src/consent/gpp.rs new file mode 100644 index 00000000..5032a104 --- /dev/null +++ b/crates/common/src/consent/gpp.rs @@ -0,0 +1,212 @@ +//! GPP (Global Privacy Platform) string decoder. +//! +//! Thin wrapper around the [`iab_gpp`] crate that maps decoded GPP data into +//! our [`GppConsent`] domain type. The `iab_gpp` crate handles the heavy +//! lifting of GPP v1 string parsing and section decoding; this module: +//! +//! 1. Parses the raw `__gpp` cookie value via [`iab_gpp::v1::GPPString`]. +//! 2. Extracts the header-level section IDs. +//! 3. If the EU TCF v2.2 section is present, decodes it via our own +//! [`super::tcf::decode_tc_string`] (for consistency with standalone +//! `euconsent-v2` decoding). +//! +//! # Why wrap `iab_gpp`? +//! +//! - Isolates external dependency behind our own types. +//! - Allows fallback/replacement without touching callers. +//! - Maps `iab_gpp` errors into our [`ConsentDecodeError`] hierarchy. +//! +//! # References +//! +//! - [IAB GPP specification](https://github.com/InteractiveAdvertisingBureau/Global-Privacy-Platform) +//! - [`iab_gpp` crate docs](https://docs.rs/iab_gpp) + +use error_stack::Report; + +use super::types::{ConsentDecodeError, GppConsent, TcfConsent}; + +/// Decodes a GPP string into a [`GppConsent`] struct. +/// +/// Parses the raw `__gpp` cookie value, extracts section IDs, and optionally +/// decodes the EU TCF v2.2 section if present. +/// +/// # Arguments +/// +/// * `gpp_string` — the raw GPP string from the `__gpp` cookie. +/// +/// # Errors +/// +/// - [`ConsentDecodeError::InvalidGppString`] if the `iab_gpp` parser fails. +pub fn decode_gpp_string(gpp_string: &str) -> Result> { + let parsed = iab_gpp::v1::GPPString::parse_str(gpp_string).map_err(|e| { + Report::new(ConsentDecodeError::InvalidGppString { + reason: format!("{e}"), + }) + })?; + + // Extract section IDs as u16 values. + let section_ids: Vec = parsed.section_ids().map(|id| *id as u16).collect(); + + // Attempt to extract and decode the EU TCF v2.2 section. + // Section ID 2 = TcfEuV2 in the GPP spec. + let eu_tcf = decode_tcf_from_gpp(&parsed); + + // The GPP header version is always 1 for current spec. + Ok(GppConsent { + version: 1, + section_ids, + eu_tcf, + }) +} + +/// Attempts to decode the EU TCF v2.2 section from a parsed GPP string. +/// +/// Uses our own TCF decoder on the raw section string (rather than +/// `iab_gpp`'s TCF decoder) to ensure consistency with standalone +/// `euconsent-v2` decoding. +/// +/// Returns `None` if the TCF section is not present or cannot be decoded. +fn decode_tcf_from_gpp(parsed: &iab_gpp::v1::GPPString) -> Option { + // iab_gpp::sections::SectionId::TcfEuV2 corresponds to section ID 2. + let tcf_section_str = parsed.section(iab_gpp::sections::SectionId::TcfEuV2)?; + + // Delegate to our own TCF decoder for consistency. + match super::tcf::decode_tc_string(tcf_section_str) { + Ok(tcf) => Some(tcf), + Err(e) => { + log::warn!("GPP contains TCF EU v2 section but decoding failed: {e}"); + None + } + } +} + +/// Parses a `__gpp_sid` cookie value into a vector of section IDs. +/// +/// The cookie is a comma-separated list of integer section IDs, e.g. `"2,6"`. +/// Invalid entries are silently skipped (logged at debug level) since the +/// cookie is treated as a transport hint. +/// +/// Returns `None` if the input is empty or contains no valid IDs. +#[must_use] +pub fn parse_gpp_sid_cookie(raw: &str) -> Option> { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + + let ids: Vec = trimmed + .split(',') + .filter_map(|s| { + let s = s.trim(); + match s.parse::() { + Ok(id) => Some(id), + Err(_) => { + log::debug!("Ignoring invalid __gpp_sid entry: {s:?}"); + None + } + } + }) + .collect(); + + if ids.is_empty() { + None + } else { + Some(ids) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // A known-good GPP string with US Privacy section (section ID 6). + // Header "DBABTA" encodes: version=1, section IDs=[6] (UspV1). + // Section string: "1YNN" (US Privacy). + const GPP_USP_ONLY: &str = "DBABTA~1YNN"; + + // A GPP string with both TCF EU v2 and US Privacy sections. + // Header "DBACNY" encodes: version=1, section IDs=[2, 6]. + // First section: TCF EU v2 consent string. + // Second section: US Privacy string. + const GPP_TCF_AND_USP: &str = "DBACNY~CPXxRfAPXxRfAAfKABENB-CgAAAAAAAAAAYgAAAAAAAA~1YNN"; + + #[test] + fn decodes_usp_only_gpp_string() { + let result = decode_gpp_string(GPP_USP_ONLY).expect("should decode USP-only GPP"); + assert_eq!(result.version, 1); + assert!(!result.section_ids.is_empty(), "should have section IDs"); + assert!( + result.eu_tcf.is_none(), + "should not have TCF section in USP-only string" + ); + } + + #[test] + fn decodes_gpp_with_tcf_section() { + let result = decode_gpp_string(GPP_TCF_AND_USP).expect("should decode GPP with TCF"); + assert_eq!(result.version, 1); + // Section IDs should include 2 (TCF EU v2) and 6 (USP v1). + assert!( + result.section_ids.contains(&2), + "should contain TCF EU v2 section ID (2)" + ); + // TCF section should be decoded (may or may not succeed depending + // on whether the section string is a valid base64-encoded TC String). + // The GPP TCF section format differs from standalone euconsent-v2, + // so eu_tcf might be None if our decoder can't parse the GPP-encoded + // TCF format. That's acceptable — we log and continue. + } + + #[test] + fn rejects_invalid_gpp_string() { + let result = decode_gpp_string("totally-invalid"); + assert!(result.is_err(), "should reject invalid GPP string"); + } + + #[test] + fn rejects_empty_string() { + let result = decode_gpp_string(""); + assert!(result.is_err(), "should reject empty GPP string"); + } + + #[test] + fn parse_gpp_sid_simple() { + let ids = parse_gpp_sid_cookie("2,6").expect("should parse 2,6"); + assert_eq!(ids, vec![2, 6]); + } + + #[test] + fn parse_gpp_sid_single() { + let ids = parse_gpp_sid_cookie("2").expect("should parse single ID"); + assert_eq!(ids, vec![2]); + } + + #[test] + fn parse_gpp_sid_with_whitespace() { + let ids = parse_gpp_sid_cookie(" 2 , 6 , 8 ").expect("should handle whitespace"); + assert_eq!(ids, vec![2, 6, 8]); + } + + #[test] + fn parse_gpp_sid_empty_returns_none() { + assert!(parse_gpp_sid_cookie("").is_none(), "empty should be None"); + assert!( + parse_gpp_sid_cookie(" ").is_none(), + "whitespace should be None" + ); + } + + #[test] + fn parse_gpp_sid_skips_invalid_entries() { + let ids = parse_gpp_sid_cookie("2,abc,6").expect("should skip invalid"); + assert_eq!(ids, vec![2, 6]); + } + + #[test] + fn parse_gpp_sid_all_invalid_returns_none() { + assert!( + parse_gpp_sid_cookie("abc,def").is_none(), + "all-invalid should be None" + ); + } +} diff --git a/crates/common/src/consent/jurisdiction.rs b/crates/common/src/consent/jurisdiction.rs new file mode 100644 index 00000000..b91313c7 --- /dev/null +++ b/crates/common/src/consent/jurisdiction.rs @@ -0,0 +1,208 @@ +//! Jurisdiction detection for consent observability. +//! +//! Determines the applicable privacy regime based on geolocation data and +//! publisher configuration. Used for **logging and monitoring only** — the +//! detected jurisdiction never causes consent to be synthesized (see proposal +//! Key Decision #3). + +use core::fmt; + +use crate::consent_config::ConsentConfig; +use crate::geo::GeoInfo; + +/// The privacy jurisdiction applicable to a request. +/// +/// Derived from the user's geolocation and the publisher's configured +/// country/state lists. Used for observability — not for consent synthesis. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub enum Jurisdiction { + /// GDPR applies (EU/EEA/UK per `consent.gdpr.applies_in`). + Gdpr, + /// A US state with an active comprehensive privacy law. + UsState(String), + /// Geolocation is known but no matching regulation was found. + NonRegulated, + /// No geolocation data available — jurisdiction cannot be determined. + #[default] + Unknown, +} + +impl fmt::Display for Jurisdiction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Gdpr => write!(f, "GDPR"), + Self::UsState(state) => write!(f, "US-{state}"), + Self::NonRegulated => write!(f, "non-regulated"), + Self::Unknown => write!(f, "unknown"), + } + } +} + +/// Detects the privacy jurisdiction for a request based on geolocation. +/// +/// Checks the user's country against `config.gdpr.applies_in`, and for US +/// users checks the region against `config.us_states.privacy_states`. +/// +/// Returns [`Jurisdiction::Unknown`] when no geo data is available. +#[must_use] +pub fn detect_jurisdiction(geo: Option<&GeoInfo>, config: &ConsentConfig) -> Jurisdiction { + let geo = match geo { + Some(g) => g, + None => return Jurisdiction::Unknown, + }; + + // Check GDPR countries first (EU/EEA/UK). + if config + .gdpr + .applies_in + .iter() + .any(|code| code.eq_ignore_ascii_case(&geo.country)) + { + return Jurisdiction::Gdpr; + } + + // For US users, check if the region is a state with a privacy law. + if geo.country.eq_ignore_ascii_case("US") { + if let Some(region) = &geo.region { + if config + .us_states + .privacy_states + .iter() + .any(|state| state.eq_ignore_ascii_case(region)) + { + return Jurisdiction::UsState(region.to_uppercase()); + } + } + } + + Jurisdiction::NonRegulated +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::{detect_jurisdiction, Jurisdiction}; + use crate::consent_config::ConsentConfig; + use crate::geo::GeoInfo; + + fn make_geo(country: &str, region: Option<&str>) -> GeoInfo { + GeoInfo { + city: "Test".to_owned(), + country: country.to_owned(), + continent: "Test".to_owned(), + latitude: 0.0, + longitude: 0.0, + metro_code: 0, + region: region.map(str::to_owned), + } + } + + #[test] + fn gdpr_detected_for_eu_country() { + let config = ConsentConfig::default(); + let geo = make_geo("DE", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "Germany should trigger GDPR" + ); + } + + #[test] + fn gdpr_detected_for_eea_country() { + let config = ConsentConfig::default(); + let geo = make_geo("NO", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "Norway (EEA) should trigger GDPR" + ); + } + + #[test] + fn gdpr_detected_for_uk() { + let config = ConsentConfig::default(); + let geo = make_geo("GB", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "UK should trigger GDPR" + ); + } + + #[test] + fn us_state_detected_for_california() { + let config = ConsentConfig::default(); + let geo = make_geo("US", Some("CA")); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::UsState("CA".to_owned()), + "California should trigger US state privacy" + ); + } + + #[test] + fn us_non_privacy_state_is_non_regulated() { + let config = ConsentConfig::default(); + let geo = make_geo("US", Some("WY")); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::NonRegulated, + "Wyoming should be non-regulated" + ); + } + + #[test] + fn us_no_region_is_non_regulated() { + let config = ConsentConfig::default(); + let geo = make_geo("US", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::NonRegulated, + "US without region should be non-regulated" + ); + } + + #[test] + fn non_gdpr_non_us_is_non_regulated() { + let config = ConsentConfig::default(); + let geo = make_geo("JP", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::NonRegulated, + "Japan should be non-regulated" + ); + } + + #[test] + fn no_geo_returns_unknown() { + let config = ConsentConfig::default(); + assert_eq!( + detect_jurisdiction(None, &config), + Jurisdiction::Unknown, + "missing geo should return unknown" + ); + } + + #[test] + fn case_insensitive_country_matching() { + let config = ConsentConfig::default(); + let geo = make_geo("de", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "lowercase country code should still match" + ); + } + + #[test] + fn display_formatting() { + assert_eq!(Jurisdiction::Gdpr.to_string(), "GDPR"); + assert_eq!(Jurisdiction::UsState("CA".to_owned()).to_string(), "US-CA"); + assert_eq!(Jurisdiction::NonRegulated.to_string(), "non-regulated"); + assert_eq!(Jurisdiction::Unknown.to_string(), "unknown"); + } +} diff --git a/crates/common/src/consent/kv.rs b/crates/common/src/consent/kv.rs new file mode 100644 index 00000000..2cef966d --- /dev/null +++ b/crates/common/src/consent/kv.rs @@ -0,0 +1,549 @@ +//! KV Store consent persistence. +//! +//! Stores and retrieves consent data from a Fastly KV Store, keyed by +//! Synthetic ID. This provides consent continuity for returning users +//! whose browsers may not have consent cookies on every request. +//! +//! # Storage layout +//! +//! Each entry uses: +//! - **Body** ([`KvConsentEntry`]) — JSON with raw consent strings and context. +//! - **Metadata** ([`ConsentKvMetadata`]) — compact JSON summary for fast +//! consent status checks and change detection (max 2000 bytes). +//! +//! # Change detection +//! +//! Writes only occur when consent signals have actually changed. +//! [`consent_fingerprint`] hashes the raw strings into a compact fingerprint +//! stored in metadata. On the next request, the existing fingerprint is +//! compared before writing. + +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +use super::jurisdiction::Jurisdiction; +use super::types::{ConsentContext, ConsentSource}; + +// --------------------------------------------------------------------------- +// KV body (JSON, stored as value) +// --------------------------------------------------------------------------- + +/// Consent data stored in the KV Store body. +/// +/// Contains the raw consent strings needed to reconstruct a [`ConsentContext`]. +/// Decoded data (TCF, GPP, US Privacy) is not stored — it is re-decoded on +/// read to avoid stale decoded state. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KvConsentEntry { + /// Raw TC String from `euconsent-v2` cookie. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_tc_string: Option, + /// Raw GPP string from `__gpp` cookie. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_gpp_string: Option, + /// GPP section IDs (decoded or from `__gpp_sid` cookie). + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp_section_ids: Option>, + /// Raw US Privacy string from `us_privacy` cookie. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_us_privacy: Option, + /// Raw Google Additional Consent (AC) string. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_ac_string: Option, + + /// Whether GDPR applies to this request. + pub gdpr_applies: bool, + /// Global Privacy Control signal. + pub gpc: bool, + /// Serialized jurisdiction (e.g. `"GDPR"`, `"US-CA"`, `"unknown"`). + pub jurisdiction: String, + + /// When this entry was stored (deciseconds since Unix epoch). + pub stored_at_ds: u64, +} + +// --------------------------------------------------------------------------- +// KV metadata (compact JSON, max 2000 bytes) +// --------------------------------------------------------------------------- + +/// Compact consent summary stored in KV Store metadata. +/// +/// Used for fast consent status checks without reading the full body, +/// and for change detection via the `fingerprint` field. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConsentKvMetadata { + /// SHA-256 fingerprint (first 16 hex chars) of all raw consent strings. + /// + /// Used for write-on-change detection. If the fingerprint matches the + /// current request's consent signals, no write is needed. + pub fp: String, + /// Whether GDPR applies. + pub gdpr: bool, + /// Whether GPC is set. + pub gpc: bool, + /// Whether a US Privacy string is present. + pub usp: bool, + /// Whether a TCF string is present. + pub tcf: bool, +} + +// --------------------------------------------------------------------------- +// Conversions +// --------------------------------------------------------------------------- + +/// Builds a [`KvConsentEntry`] from a [`ConsentContext`]. +/// +/// Captures only the raw strings and contextual flags. Decoded data is +/// intentionally omitted — it will be re-decoded on read. +#[must_use] +pub fn entry_from_context(ctx: &ConsentContext, now_ds: u64) -> KvConsentEntry { + KvConsentEntry { + raw_tc_string: ctx.raw_tc_string.clone(), + raw_gpp_string: ctx.raw_gpp_string.clone(), + gpp_section_ids: ctx.gpp_section_ids.clone(), + raw_us_privacy: ctx.raw_us_privacy.clone(), + raw_ac_string: ctx.raw_ac_string.clone(), + gdpr_applies: ctx.gdpr_applies, + gpc: ctx.gpc, + jurisdiction: ctx.jurisdiction.to_string(), + stored_at_ds: now_ds, + } +} + +/// Builds a [`ConsentKvMetadata`] from a [`ConsentContext`]. +#[must_use] +pub fn metadata_from_context(ctx: &ConsentContext) -> ConsentKvMetadata { + ConsentKvMetadata { + fp: consent_fingerprint(ctx), + gdpr: ctx.gdpr_applies, + gpc: ctx.gpc, + usp: ctx.raw_us_privacy.is_some(), + tcf: ctx.raw_tc_string.is_some(), + } +} + +/// Converts a [`KvConsentEntry`] into [`super::types::RawConsentSignals`] +/// suitable for re-decoding via [`super::build_context_from_signals`]. +#[must_use] +pub fn signals_from_entry(entry: &KvConsentEntry) -> super::types::RawConsentSignals { + super::types::RawConsentSignals { + raw_tc_string: entry.raw_tc_string.clone(), + raw_gpp_string: entry.raw_gpp_string.clone(), + raw_gpp_sid: entry.gpp_section_ids.as_ref().map(|ids| { + ids.iter() + .map(ToString::to_string) + .collect::>() + .join(",") + }), + raw_us_privacy: entry.raw_us_privacy.clone(), + gpc: entry.gpc, + } +} + +/// Reconstructs a [`ConsentContext`] from a KV Store entry. +/// +/// Re-decodes the raw strings to populate structured fields (TCF, GPP, US +/// Privacy). The `source` is set to [`ConsentSource::KvStore`] and the +/// `jurisdiction` is parsed from the stored string representation. +#[must_use] +pub fn context_from_entry(entry: &KvConsentEntry) -> ConsentContext { + let signals = signals_from_entry(entry); + let mut ctx = super::build_context_from_signals(&signals); + + // Restore context fields that aren't derived from raw signals. + ctx.gdpr_applies = entry.gdpr_applies; + ctx.gpc = entry.gpc; + ctx.raw_ac_string = entry.raw_ac_string.clone(); + ctx.jurisdiction = parse_jurisdiction(&entry.jurisdiction); + ctx.source = ConsentSource::KvStore; + + ctx +} + +// --------------------------------------------------------------------------- +// Fingerprinting +// --------------------------------------------------------------------------- + +/// Computes a compact fingerprint of the consent signals for change detection. +/// +/// Returns the first 16 hex characters of a SHA-256 hash computed over all +/// raw consent strings and the GPC flag. This is sufficient for detecting +/// changes without storing full hashes. +#[must_use] +pub fn consent_fingerprint(ctx: &ConsentContext) -> String { + let mut hasher = Sha256::new(); + + // Feed each signal into the hash, separated by a sentinel byte to + // prevent ambiguity (e.g., None+Some("x") vs Some("x")+None). + hash_optional(&mut hasher, ctx.raw_tc_string.as_deref()); + hash_optional(&mut hasher, ctx.raw_gpp_string.as_deref()); + hash_optional(&mut hasher, ctx.raw_us_privacy.as_deref()); + hash_optional(&mut hasher, ctx.raw_ac_string.as_deref()); + hasher.update(if ctx.gpc { b"1" } else { b"0" }); + + let result = hasher.finalize(); + hex::encode(&result[..8]) // 16 hex chars = 8 bytes = 64 bits +} + +/// Feeds an optional string into the hasher with sentinel bytes. +fn hash_optional(hasher: &mut Sha256, value: Option<&str>) { + match value { + Some(s) => { + hasher.update(b"\x01"); + hasher.update(s.as_bytes()); + } + None => hasher.update(b"\x00"), + } +} + +/// Parses a jurisdiction string back into a [`Jurisdiction`] enum. +fn parse_jurisdiction(s: &str) -> Jurisdiction { + match s { + "GDPR" => Jurisdiction::Gdpr, + "non-regulated" => Jurisdiction::NonRegulated, + "unknown" => Jurisdiction::Unknown, + s if s.starts_with("US-") => Jurisdiction::UsState(s[3..].to_owned()), + _ => Jurisdiction::Unknown, + } +} + +// --------------------------------------------------------------------------- +// KV Store operations +// --------------------------------------------------------------------------- + +/// Opens a Fastly KV Store by name, logging a warning on failure. +/// +/// Returns [`None`] if the store does not exist or cannot be opened. +fn open_store(store_name: &str) -> Option { + match fastly::kv_store::KVStore::open(store_name) { + Ok(Some(store)) => Some(store), + Ok(None) => { + log::warn!("Consent KV store '{store_name}' not found"); + None + } + Err(e) => { + log::warn!("Failed to open consent KV store '{store_name}': {e}"); + None + } + } +} + +/// Checks whether the stored consent fingerprint matches the current one. +/// +/// Returns `true` when the stored metadata fingerprint equals `new_fp`, +/// meaning no write is needed. +fn fingerprint_unchanged( + store: &fastly::kv_store::KVStore, + synthetic_id: &str, + new_fp: &str, +) -> bool { + let stored_fp = store + .lookup(synthetic_id) + .ok() + .and_then(|resp| resp.metadata()) + .and_then(|bytes| serde_json::from_slice::(&bytes).ok()) + .map(|meta| meta.fp); + + stored_fp.as_deref() == Some(new_fp) +} + +/// Loads consent data from the KV Store for a given Synthetic ID. +/// +/// Returns `Some(ConsentContext)` if a valid entry is found, [`None`] if the +/// key does not exist or deserialization fails. Errors are logged but never +/// propagated — KV Store failures must not break the request pipeline. +/// +/// # Arguments +/// +/// * `store_name` — The KV Store name (from `consent.consent_store` config). +/// * `synthetic_id` — The Synthetic ID used as the KV Store key. +#[must_use] +pub fn load_consent_from_kv(store_name: &str, synthetic_id: &str) -> Option { + let store = open_store(store_name)?; + + let mut response = match store.lookup(synthetic_id) { + Ok(resp) => resp, + Err(e) => { + log::debug!("Consent KV lookup miss for '{synthetic_id}': {e}"); + return None; + } + }; + + let body_bytes = response.take_body_bytes(); + match serde_json::from_slice::(&body_bytes) { + Ok(entry) => { + log::info!( + "Loaded consent from KV store for '{synthetic_id}' (stored_at_ds={})", + entry.stored_at_ds + ); + Some(context_from_entry(&entry)) + } + Err(e) => { + log::warn!("Failed to deserialize consent KV entry for '{synthetic_id}': {e}"); + None + } + } +} + +/// Saves consent data to the KV Store, writing only when signals have changed. +/// +/// Compares the fingerprint of the current consent signals against the +/// stored metadata. If they match, the write is skipped. Otherwise, the +/// entry is written with the configured TTL. +/// +/// # Arguments +/// +/// * `store_name` — The KV Store name (from `consent.consent_store` config). +/// * `synthetic_id` — The Synthetic ID used as the KV Store key. +/// * `ctx` — The current request's consent context. +/// * `max_age_days` — TTL for the entry, matching `max_consent_age_days`. +pub fn save_consent_to_kv( + store_name: &str, + synthetic_id: &str, + ctx: &ConsentContext, + max_age_days: u32, +) { + if ctx.is_empty() { + log::debug!("Skipping consent KV write: consent is empty"); + return; + } + + let Some(store) = open_store(store_name) else { + return; + }; + + let metadata = metadata_from_context(ctx); + + if fingerprint_unchanged(&store, synthetic_id, &metadata.fp) { + log::debug!( + "Consent unchanged for '{synthetic_id}' (fp={}), skipping write", + metadata.fp + ); + return; + } + + let entry = entry_from_context(ctx, super::now_deciseconds()); + + let Ok(body) = serde_json::to_string(&entry) else { + log::warn!("Failed to serialize consent entry for '{synthetic_id}'"); + return; + }; + let Ok(meta_str) = serde_json::to_string(&metadata) else { + log::warn!("Failed to serialize consent metadata for '{synthetic_id}'"); + return; + }; + + let ttl = std::time::Duration::from_secs(u64::from(max_age_days) * 86_400); + + match store + .build_insert() + .metadata(&meta_str) + .time_to_live(ttl) + .execute(synthetic_id, body) + { + Ok(()) => { + log::info!( + "Saved consent to KV store for '{synthetic_id}' (fp={}, ttl={max_age_days}d)", + metadata.fp + ); + } + Err(e) => { + log::warn!("Failed to write consent to KV store for '{synthetic_id}': {e}"); + } + } +} +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::jurisdiction::Jurisdiction; + use crate::consent::types::{ConsentContext, ConsentSource}; + + fn make_test_context() -> ConsentContext { + ConsentContext { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), + gpp_section_ids: Some(vec![2, 6]), + raw_us_privacy: Some("1YNN".to_owned()), + raw_ac_string: None, + gdpr_applies: true, + tcf: None, + gpp: None, + us_privacy: None, + expired: false, + gpc: false, + jurisdiction: Jurisdiction::Gdpr, + source: ConsentSource::Cookie, + } + } + + #[test] + fn entry_roundtrip() { + let ctx = make_test_context(); + let entry = entry_from_context(&ctx, 1_000_000); + let json = serde_json::to_string(&entry).expect("should serialize"); + let restored: KvConsentEntry = serde_json::from_str(&json).expect("should deserialize"); + + assert_eq!(restored.raw_tc_string, ctx.raw_tc_string); + assert_eq!(restored.raw_gpp_string, ctx.raw_gpp_string); + assert_eq!(restored.gpp_section_ids, ctx.gpp_section_ids); + assert_eq!(restored.raw_us_privacy, ctx.raw_us_privacy); + assert_eq!(restored.gdpr_applies, ctx.gdpr_applies); + assert_eq!(restored.gpc, ctx.gpc); + assert_eq!(restored.jurisdiction, "GDPR"); + assert_eq!(restored.stored_at_ds, 1_000_000); + } + + #[test] + fn metadata_roundtrip() { + let ctx = make_test_context(); + let meta = metadata_from_context(&ctx); + let json = serde_json::to_string(&meta).expect("should serialize"); + let restored: ConsentKvMetadata = serde_json::from_str(&json).expect("should deserialize"); + + assert_eq!(restored.fp, meta.fp); + assert!(restored.gdpr); + assert!(!restored.gpc); + assert!(restored.usp); + assert!(restored.tcf); + } + + #[test] + fn metadata_fits_in_2000_bytes() { + let ctx = make_test_context(); + let meta = metadata_from_context(&ctx); + let json = serde_json::to_string(&meta).expect("should serialize"); + assert!( + json.len() <= 2000, + "metadata JSON must fit in 2000 bytes, was {} bytes", + json.len() + ); + } + + #[test] + fn context_roundtrip_via_entry() { + let original = make_test_context(); + let entry = entry_from_context(&original, 1_000_000); + let restored = context_from_entry(&entry); + + assert_eq!(restored.raw_tc_string, original.raw_tc_string); + assert_eq!(restored.raw_gpp_string, original.raw_gpp_string); + assert_eq!(restored.raw_us_privacy, original.raw_us_privacy); + assert_eq!(restored.gdpr_applies, original.gdpr_applies); + assert_eq!(restored.gpc, original.gpc); + assert_eq!(restored.jurisdiction, original.jurisdiction); + assert_eq!(restored.source, ConsentSource::KvStore); + } + + #[test] + fn fingerprint_deterministic() { + let ctx = make_test_context(); + let fp1 = consent_fingerprint(&ctx); + let fp2 = consent_fingerprint(&ctx); + assert_eq!(fp1, fp2, "fingerprint should be deterministic"); + assert_eq!(fp1.len(), 16, "fingerprint should be 16 hex chars"); + } + + #[test] + fn fingerprint_changes_with_different_signals() { + let ctx1 = make_test_context(); + let mut ctx2 = make_test_context(); + ctx2.raw_tc_string = Some("DIFFERENT_TC_STRING".to_owned()); + + assert_ne!( + consent_fingerprint(&ctx1), + consent_fingerprint(&ctx2), + "different TC strings should produce different fingerprints" + ); + } + + #[test] + fn fingerprint_changes_with_gpc() { + let mut ctx1 = make_test_context(); + ctx1.gpc = false; + let mut ctx2 = make_test_context(); + ctx2.gpc = true; + + assert_ne!( + consent_fingerprint(&ctx1), + consent_fingerprint(&ctx2), + "different GPC values should produce different fingerprints" + ); + } + + #[test] + fn fingerprint_distinguishes_none_from_empty() { + let mut ctx_none = make_test_context(); + ctx_none.raw_tc_string = None; + let mut ctx_empty = make_test_context(); + ctx_empty.raw_tc_string = Some(String::new()); + + assert_ne!( + consent_fingerprint(&ctx_none), + consent_fingerprint(&ctx_empty), + "None vs empty string should produce different fingerprints" + ); + } + + #[test] + fn signals_from_entry_roundtrip() { + let ctx = make_test_context(); + let entry = entry_from_context(&ctx, 1_000_000); + let signals = signals_from_entry(&entry); + + assert_eq!(signals.raw_tc_string, ctx.raw_tc_string); + assert_eq!(signals.raw_gpp_string, ctx.raw_gpp_string); + assert_eq!(signals.raw_us_privacy, ctx.raw_us_privacy); + assert_eq!(signals.gpc, ctx.gpc); + // gpp_sid is serialized as "2,6" from the section IDs + assert_eq!(signals.raw_gpp_sid, Some("2,6".to_owned())); + } + + #[test] + fn parse_jurisdiction_roundtrip() { + assert_eq!(parse_jurisdiction("GDPR"), Jurisdiction::Gdpr); + assert_eq!( + parse_jurisdiction("US-CA"), + Jurisdiction::UsState("CA".to_owned()) + ); + assert_eq!( + parse_jurisdiction("non-regulated"), + Jurisdiction::NonRegulated + ); + assert_eq!(parse_jurisdiction("unknown"), Jurisdiction::Unknown); + assert_eq!( + parse_jurisdiction("something-else"), + Jurisdiction::Unknown, + "unrecognized jurisdiction should default to Unknown" + ); + } + + #[test] + fn empty_entry_serializes_compact() { + let ctx = ConsentContext::default(); + let entry = entry_from_context(&ctx, 0); + let json = serde_json::to_string(&entry).expect("should serialize"); + // With skip_serializing_if = "Option::is_none", omitted fields keep it small. + assert!( + !json.contains("raw_tc_string"), + "None fields should be omitted from JSON" + ); + } + + #[test] + fn entry_preserves_ac_string() { + let mut ctx = make_test_context(); + ctx.raw_ac_string = Some("2~1234.5678~dv.".to_owned()); + let entry = entry_from_context(&ctx, 0); + let restored = context_from_entry(&entry); + + assert_eq!( + restored.raw_ac_string, + Some("2~1234.5678~dv.".to_owned()), + "AC string should survive roundtrip" + ); + } +} diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs new file mode 100644 index 00000000..3636bbb5 --- /dev/null +++ b/crates/common/src/consent/mod.rs @@ -0,0 +1,539 @@ +//! Consent signal extraction, decoding, and normalization. +//! +//! This module implements the consent forwarding pipeline: +//! +//! 1. **Extract** raw consent strings from cookies and HTTP headers. +//! 2. **Decode** each signal into structured data (TCF v2, GPP, US Privacy). +//! 3. **Build** a normalized [`ConsentContext`] that flows through the auction +//! pipeline and populates `OpenRTB` bid requests. +//! +//! # Supported signals +//! +//! - **TCF v2** — `euconsent-v2` cookie (IAB Transparency & Consent Framework) +//! - **GPP** — `__gpp` and `__gpp_sid` cookies (IAB Global Privacy Platform) +//! - **US Privacy** — `us_privacy` cookie (IAB US Privacy / CCPA) +//! - **GPC** — `Sec-GPC` header (Global Privacy Control) +//! +//! # Usage +//! +//! ```ignore +//! let consent = consent::build_consent_context(&consent::ConsentPipelineInput { +//! jar: cookie_jar.as_ref(), +//! req: &req, +//! config: &settings.consent, +//! geo: geo.as_ref(), +//! synthetic_id: Some("sid_abc123"), +//! }); +//! ``` + +mod extraction; +pub mod gpp; +pub mod jurisdiction; +pub mod kv; +pub mod tcf; +pub mod types; +pub mod us_privacy; + +pub use extraction::extract_consent_signals; +pub use types::{ConsentContext, ConsentSource, PrivacyFlag, RawConsentSignals, TcfConsent}; + +use std::time::{SystemTime, UNIX_EPOCH}; + +use cookie::CookieJar; +use fastly::Request; + +use crate::consent_config::{ConsentConfig, ConsentMode}; +use crate::geo::GeoInfo; + +/// Number of deciseconds in one day (86 400 seconds × 10). +const DECISECONDS_PER_DAY: u64 = 86_400 * 10; + +/// Inputs to the consent processing pipeline. +/// +/// Bundles all data needed to extract, decode, classify, and validate +/// consent signals from a single request. +pub struct ConsentPipelineInput<'a> { + /// Parsed cookie jar from the incoming request. + pub jar: Option<&'a CookieJar>, + /// The incoming HTTP request (for header access). + pub req: &'a Request, + /// Publisher consent configuration. + pub config: &'a ConsentConfig, + /// Geolocation data from the request (for jurisdiction detection). + pub geo: Option<&'a GeoInfo>, + /// Synthetic ID for KV Store consent persistence. + /// + /// When set along with `config.consent_store`, enables: + /// - **Read fallback**: loads consent from KV when cookies are absent. + /// - **Write-on-change**: persists cookie-sourced consent to KV. + pub synthetic_id: Option<&'a str>, +} + +/// Extracts, decodes, and normalizes consent signals from a request. +/// +/// This is the primary entry point for the consent pipeline. It: +/// +/// 1. Reads raw consent strings from cookies and headers. +/// 2. Decodes each signal (TCF v2, GPP, US Privacy). +/// 3. Detects the privacy jurisdiction from geolocation. +/// 4. Checks consent expiration (if enabled). +/// 5. Constructs a US Privacy string from GPC when appropriate. +/// 6. Builds a [`ConsentContext`] with both raw and decoded data. +/// 7. Logs a summary for observability. +/// +/// Decoding failures are logged and the corresponding decoded field is set to +/// `None` — the raw string is still preserved for proxy-mode forwarding. +pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext { + let signals = extract_consent_signals(input.jar, input.req); + log_consent_signals(&signals); + + // In proxy mode, skip decoding entirely. + if input.config.mode == ConsentMode::Proxy { + let jur = jurisdiction::detect_jurisdiction(input.geo, input.config); + let gdpr_applies = signals.raw_tc_string.is_some(); + log::debug!("Consent proxy mode: jurisdiction={jur}, skipping decode"); + return ConsentContext { + raw_tc_string: signals.raw_tc_string, + raw_gpp_string: signals.raw_gpp_string, + gpp_section_ids: signals + .raw_gpp_sid + .as_deref() + .and_then(gpp::parse_gpp_sid_cookie), + raw_us_privacy: signals.raw_us_privacy, + raw_ac_string: None, + gdpr_applies, + tcf: None, + gpp: None, + us_privacy: None, + expired: false, + gpc: signals.gpc, + jurisdiction: jur, + source: ConsentSource::Cookie, + }; + } + + // KV Store fallback: if no cookie-based signals exist, try loading + // persisted consent from the KV Store. + if should_try_kv_fallback(&signals) { + if let Some(ctx) = try_kv_fallback(input) { + log_consent_context(&ctx); + return ctx; + } + } + + let mut ctx = build_context_from_signals(&signals); + ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); + apply_expiration_check(&mut ctx, input.config); + apply_gpc_us_privacy(&mut ctx, input.config); + + // KV Store write: persist cookie-sourced consent for future requests. + try_kv_write(input, &ctx); + + log_consent_context(&ctx); + ctx +} + +/// Marks TCF consent as expired when it exceeds the configured maximum age. +/// +/// Clears the decoded `tcf` field (treated as no consent) but preserves the +/// raw string for proxy-mode forwarding. Re-evaluates `gdpr_applies` based +/// on whether a GPP EU TCF section is still available. +fn apply_expiration_check(ctx: &mut ConsentContext, config: &ConsentConfig) { + if !config.check_expiration { + return; + } + + let tcf = match &ctx.tcf { + Some(tcf) => tcf, + None => return, + }; + + if !is_consent_expired(tcf, config.max_consent_age_days) { + return; + } + + let age_days = consent_age_days(tcf); + log::warn!( + "TCF consent expired (age: {age_days}d, max: {}d)", + config.max_consent_age_days + ); + ctx.expired = true; + ctx.tcf = None; + // Re-evaluate: GDPR may still apply if GPP has a TCF section. + ctx.gdpr_applies = ctx.gpp.as_ref().is_some_and(|g| g.eu_tcf.is_some()); +} + +/// Constructs a US Privacy string from GPC when no explicit cookie exists +/// and the user is in a US state with a privacy law. +fn apply_gpc_us_privacy(ctx: &mut ConsentContext, config: &ConsentConfig) { + if !ctx.gpc || ctx.us_privacy.is_some() { + return; + } + if !matches!(&ctx.jurisdiction, jurisdiction::Jurisdiction::UsState(_)) { + return; + } + + if let Some(usp) = build_us_privacy_from_gpc(config) { + log::info!("Constructed US Privacy string from GPC: {usp}"); + ctx.raw_us_privacy = Some(usp.to_string()); + ctx.us_privacy = Some(usp); + ctx.source = ConsentSource::PolicyDefault; + } +} + +/// Extracts raw consent signals and logs them (without decoding). +/// +/// Use this when you need the raw signals but don't need decoded data. +/// Prefer [`build_consent_context`] for the full pipeline. +pub fn extract_and_log_consent(jar: Option<&CookieJar>, req: &Request) -> RawConsentSignals { + let signals = extract_consent_signals(jar, req); + log_consent_signals(&signals); + signals +} + +/// Decodes a raw consent string, logging a warning on failure. +/// +/// Returns [`None`] and logs at `warn` level if decoding fails, preserving +/// the raw string for proxy-mode forwarding. +fn decode_or_warn( + raw: Option<&str>, + label: &str, + decode: fn(&str) -> Result, +) -> Option { + raw.and_then(|s| match decode(s) { + Ok(value) => Some(value), + Err(e) => { + log::warn!("Failed to decode {label}: {e}"); + None + } + }) +} + +/// Builds a [`ConsentContext`] from previously extracted raw signals. +/// +/// This is the decode + normalize stage of the pipeline. Each signal is +/// decoded independently; failures are logged at `warn` level and the +/// corresponding decoded field is left as `None`. +#[must_use] +pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext { + let decoded_us_privacy = decode_or_warn( + signals.raw_us_privacy.as_deref(), + "US Privacy string", + us_privacy::decode_us_privacy, + ); + let decoded_tcf = decode_or_warn( + signals.raw_tc_string.as_deref(), + "TC String", + tcf::decode_tc_string, + ); + let decoded_gpp = decode_or_warn( + signals.raw_gpp_string.as_deref(), + "GPP string", + gpp::decode_gpp_string, + ); + + // Resolve GPP section IDs: + // - Prefer decoded GPP section IDs (authoritative). + // - Fall back to __gpp_sid cookie (transport hint). + let gpp_section_ids = decoded_gpp + .as_ref() + .map(|g| g.section_ids.clone()) + .or_else(|| { + signals + .raw_gpp_sid + .as_deref() + .and_then(gpp::parse_gpp_sid_cookie) + }); + + // GDPR applies if we have a TCF string (standalone or from GPP). + let gdpr_applies = + decoded_tcf.is_some() || decoded_gpp.as_ref().is_some_and(|g| g.eu_tcf.is_some()); + + ConsentContext { + raw_tc_string: signals.raw_tc_string.clone(), + raw_gpp_string: signals.raw_gpp_string.clone(), + gpp_section_ids, + raw_us_privacy: signals.raw_us_privacy.clone(), + // AC string extraction not yet implemented — will be added when + // the CMP-specific cookie source is determined (Phase 1a). + raw_ac_string: None, + + gdpr_applies, + tcf: decoded_tcf, + gpp: decoded_gpp, + us_privacy: decoded_us_privacy, + + expired: false, + + gpc: signals.gpc, + jurisdiction: jurisdiction::Jurisdiction::default(), + source: ConsentSource::Cookie, + } +} + +/// Returns the current time in deciseconds since the Unix epoch. +pub(crate) fn now_deciseconds() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 + / 100 +} + +/// Returns the age of a TCF consent string in days. +fn consent_age_days(tcf: &types::TcfConsent) -> u64 { + now_deciseconds().saturating_sub(tcf.last_updated_ds) / DECISECONDS_PER_DAY +} + +/// Checks whether a TCF consent string has expired. +/// +/// Compares `last_updated_ds` (deciseconds since epoch) against the current +/// time and the configured maximum age. Returns `true` if the consent is +/// older than `max_age_days`. +#[must_use] +pub fn is_consent_expired(tcf: &types::TcfConsent, max_age_days: u32) -> bool { + let max_age_ds = u64::from(max_age_days) * DECISECONDS_PER_DAY; + now_deciseconds().saturating_sub(tcf.last_updated_ds) > max_age_ds +} + +/// Constructs a US Privacy string from `Sec-GPC` and publisher config defaults. +/// +/// Called when `gpc = true` but no explicit `us_privacy` cookie exists and the +/// user is in a US state with a privacy law. The resulting string reflects the +/// publisher's configured compliance posture, not a protocol assertion. +/// +/// Returns [`None`] if the config says GPC should not imply opt-out. +#[must_use] +pub fn build_us_privacy_from_gpc(config: &ConsentConfig) -> Option { + let defaults = &config.us_privacy_defaults; + if !defaults.gpc_implies_optout { + return None; + } + + Some(types::UsPrivacy { + version: 1, + notice_given: PrivacyFlag::from(defaults.notice_given), + opt_out_sale: PrivacyFlag::Yes, + lspa_covered: PrivacyFlag::from(defaults.lspa_covered), + }) +} + +/// Filters Extended User IDs based on TCF consent. +/// +/// Per Prebid's tcfControl enforcement: +/// - **Purpose 1** (Store/access information on a device) must be consented +/// for any EID to exist (identifiers require cookie/localStorage access). +/// - **Purpose 4** (Personalized ads) must be consented for EIDs to be +/// transmitted in the bid request. +/// +/// Returns [`None`] if consent is missing or insufficient, stripping all EIDs +/// from the outgoing bid request. +#[must_use] +pub fn gate_eids_by_consent( + eids: Option>, + consent_ctx: Option<&ConsentContext>, +) -> Option> { + let eids = eids?; + if eids.is_empty() { + return None; + } + + // Resolve the effective TCF consent — standalone or from GPP. + let tcf = consent_ctx.and_then(|ctx| { + ctx.tcf + .as_ref() + .or_else(|| ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref())) + }); + + match tcf { + Some(tcf) if tcf.has_storage_consent() && tcf.has_personalized_ads_consent() => Some(eids), + Some(_) => { + log::info!("EIDs stripped: TCF Purpose 1 or 4 consent missing"); + None + } + None => { + // No TCF data — if GDPR applies, block EIDs as a precaution. + if consent_ctx.is_some_and(|c| c.gdpr_applies) { + log::info!("EIDs stripped: GDPR applies but no TCF consent available"); + None + } else { + Some(eids) + } + } + } +} + +// --------------------------------------------------------------------------- +// KV Store integration helpers +// --------------------------------------------------------------------------- + +/// Returns whether KV fallback should be attempted for this request. +/// +/// KV fallback is used only when cookie-based consent signals are absent. +/// A standalone `Sec-GPC` header should not suppress fallback reads. +#[must_use] +fn should_try_kv_fallback(signals: &RawConsentSignals) -> bool { + !signals.has_cookie_signals() +} + +/// Attempts to load consent from the KV Store when cookie signals are empty. +/// +/// Returns `Some(ConsentContext)` if a valid entry was found and decoded, +/// `None` otherwise. Requires both `consent_store` and `synthetic_id` to +/// be configured. +fn try_kv_fallback(input: &ConsentPipelineInput<'_>) -> Option { + let store_name = input.config.consent_store.as_deref()?; + let synthetic_id = input.synthetic_id?; + + log::debug!("No cookie consent signals, trying KV fallback for '{synthetic_id}'"); + let mut ctx = kv::load_consent_from_kv(store_name, synthetic_id)?; + + // Re-detect jurisdiction from current geo (may differ from stored value). + ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); + apply_expiration_check(&mut ctx, input.config); + apply_gpc_us_privacy(&mut ctx, input.config); + + Some(ctx) +} + +/// Persists cookie-sourced consent to the KV Store when configured. +/// +/// Only writes when consent signals are non-empty and have changed since +/// the last write (fingerprint comparison). +fn try_kv_write(input: &ConsentPipelineInput<'_>, ctx: &ConsentContext) { + let Some(store_name) = input.config.consent_store.as_deref() else { + return; + }; + let Some(synthetic_id) = input.synthetic_id else { + return; + }; + + kv::save_consent_to_kv( + store_name, + synthetic_id, + ctx, + input.config.max_consent_age_days, + ); +} + +// --------------------------------------------------------------------------- +// Logging helpers +// --------------------------------------------------------------------------- + +/// Logs a summary of the extracted consent signals. +/// +/// Emits an `info`-level log line when at least one consent signal is present, +/// or a `debug`-level line when no signals were found. +fn log_consent_signals(signals: &RawConsentSignals) { + if signals.is_empty() { + log::debug!("No consent signals found on request"); + } else { + log::info!("Consent signals: {}", signals); + } +} + +/// Derives a human-readable status label for a decoded signal. +/// +/// Returns `"present"` when decoded data exists, `"decode-failed"` when only +/// the raw string exists, or `"absent"` when neither is available. +fn signal_status(decoded: bool, raw: bool) -> &'static str { + if decoded { + "present" + } else if raw { + "decode-failed" + } else { + "absent" + } +} + +/// Logs a structured summary of the fully-processed consent context. +fn log_consent_context(ctx: &ConsentContext) { + if ctx.is_empty() { + return; + } + + let tcf_status = match (&ctx.tcf, ctx.expired) { + (Some(_), _) => "present", + (None, true) => "expired", + (None, false) if ctx.raw_tc_string.is_some() => "decode-failed", + _ => "absent", + }; + + let gpp_status = signal_status(ctx.gpp.is_some(), ctx.raw_gpp_string.is_some()); + let usp_status = signal_status(ctx.us_privacy.is_some(), false); + + log::info!( + "Consent context: jurisdiction={}, tcf={tcf_status}, gpp={gpp_status}, \ + us_privacy={usp_status}, gpc={}, gdpr_applies={}, source={:?}", + ctx.jurisdiction, + ctx.gpc, + ctx.gdpr_applies, + ctx.source, + ); +} + +#[cfg(test)] +mod tests { + use fastly::Request; + + use super::{build_consent_context, should_try_kv_fallback, ConsentPipelineInput}; + use crate::consent::types::RawConsentSignals; + use crate::consent_config::{ConsentConfig, ConsentMode}; + use crate::cookies::parse_cookies_to_jar; + + #[test] + fn kv_fallback_allowed_when_only_gpc_present() { + let signals = RawConsentSignals { + gpc: true, + ..RawConsentSignals::default() + }; + + assert!( + should_try_kv_fallback(&signals), + "should allow KV fallback when only Sec-GPC is present" + ); + } + + #[test] + fn kv_fallback_skipped_when_cookie_signal_present() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + gpc: true, + ..RawConsentSignals::default() + }; + + assert!( + !should_try_kv_fallback(&signals), + "should skip KV fallback when cookie signals are present" + ); + } + + #[test] + fn proxy_mode_marks_gdpr_when_raw_tc_exists() { + let jar = parse_cookies_to_jar("euconsent-v2=CPXxGfAPXxGfA"); + let req = Request::get("https://example.com"); + let config = ConsentConfig { + mode: ConsentMode::Proxy, + ..ConsentConfig::default() + }; + + let ctx = build_consent_context(&ConsentPipelineInput { + jar: Some(&jar), + req: &req, + config: &config, + geo: None, + synthetic_id: None, + }); + + assert!( + ctx.gdpr_applies, + "should set gdpr_applies when raw TC string is present in proxy mode" + ); + assert_eq!( + ctx.raw_tc_string.as_deref(), + Some("CPXxGfAPXxGfA"), + "should preserve raw TC string in proxy mode" + ); + assert!(ctx.tcf.is_none(), "should skip TCF decoding in proxy mode"); + } +} diff --git a/crates/common/src/consent/tcf.rs b/crates/common/src/consent/tcf.rs new file mode 100644 index 00000000..cba7ec8a --- /dev/null +++ b/crates/common/src/consent/tcf.rs @@ -0,0 +1,531 @@ +//! TCF v2 consent string decoder (core segment only). +//! +//! Decodes the IAB Transparency & Consent Framework v2 consent string from the +//! `euconsent-v2` cookie. Only the core segment (segment type 0) is decoded; +//! publisher restrictions, disclosed vendors, and allowed vendors segments are +//! not yet supported. +//! +//! # Binary format +//! +//! The TC String is a web-safe base64-encoded binary bitfield. The core segment +//! layout (after base64 decoding) is: +//! +//! | Field | Bits | Offset | +//! |-------|------|--------| +//! | Version | 6 | 0 | +//! | Created | 36 | 6 | +//! | `LastUpdated` | 36 | 42 | +//! | `CmpId` | 12 | 78 | +//! | `CmpVersion` | 12 | 90 | +//! | `ConsentScreen` | 6 | 102 | +//! | `ConsentLanguage` | 12 | 108 | +//! | `VendorListVersion` | 12 | 120 | +//! | `TcfPolicyVersion` | 6 | 132 | +//! | `IsServiceSpecific` | 1 | 138 | +//! | `UseNonStandardTexts` | 1 | 139 | +//! | `SpecialFeatureOptIns` | 12 | 140 | +//! | `PurposesConsent` | 24 | 152 | +//! | `PurposesLITransparency` | 24 | 176 | +//! | `PurposeOneTreatment` | 1 | 200 | +//! | `PublisherCC` | 12 | 201 | +//! | `MaxVendorConsentId` | 16 | 213 | +//! | `IsRangeEncoding` | 1 | 229 | +//! | ...vendor consents... | variable | 230 | +//! +//! Segments in a TC String are separated by `.` characters. The first segment +//! is always the core segment; additional segments carry supplementary data. +//! +//! # References +//! +//! - [IAB TCF v2.0 specification](https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20Consent%20string%20and%20vendor%20list%20formats%20v2.md) + +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use base64::Engine as _; +use error_stack::Report; + +use super::types::{ConsentDecodeError, TcfConsent}; + +/// Decodes a TC String v2 into a [`TcfConsent`] struct. +/// +/// Only the core segment is decoded. Additional segments (separated by `.`) +/// are ignored. +/// +/// # Errors +/// +/// - [`ConsentDecodeError::InvalidTcString`] if base64 decoding fails, the +/// version is not 2, or the bitfield is too short. +pub fn decode_tc_string(tc_string: &str) -> Result> { + // TC String may have multiple segments separated by '.' + // The first segment is always the core segment. + let core_segment = tc_string.split('.').next().unwrap_or(tc_string); + + let bytes = URL_SAFE_NO_PAD + .decode(core_segment) + .or_else(|_| { + // Some CMPs use standard base64 with padding + use base64::engine::general_purpose::STANDARD; + STANDARD.decode(core_segment) + }) + .map_err(|e| { + Report::new(ConsentDecodeError::InvalidTcString { + reason: format!("base64 decode failed: {e}"), + }) + })?; + + let reader = BitReader::new(&bytes); + + // Minimum size: 230 bits for core fields up to IsRangeEncoding + if reader.bit_len() < 230 { + return Err(Report::new(ConsentDecodeError::InvalidTcString { + reason: format!( + "bitfield too short: {} bits, need at least 230", + reader.bit_len() + ), + })); + } + + let version = reader.read_u8(0, 6); + if version != 2 { + return Err(Report::new(ConsentDecodeError::InvalidTcString { + reason: format!("unsupported version {version}, expected 2"), + })); + } + + let created_ds = reader.read_u64(6, 36); + let last_updated_ds = reader.read_u64(42, 36); + let cmp_id = reader.read_u16(78, 12); + let cmp_version = reader.read_u16(90, 12); + let consent_screen = reader.read_u8(102, 6); + + // Consent language: two 6-bit values, each offset by 'A' (65) + let lang_a = reader.read_u8(108, 6); + let lang_b = reader.read_u8(114, 6); + let consent_language = format!("{}{}", char::from(b'A' + lang_a), char::from(b'A' + lang_b),); + + let vendor_list_version = reader.read_u16(120, 12); + let tcf_policy_version = reader.read_u8(132, 6); + // Skip: IsServiceSpecific (138, 1), UseNonStandardTexts (139, 1) + + let special_feature_opt_ins = reader.read_bool_vec(140, 12); + let purpose_consents = reader.read_bool_vec(152, 24); + let purpose_legitimate_interests = reader.read_bool_vec(176, 24); + // Skip: PurposeOneTreatment (200, 1), PublisherCC (201, 12) + + // Vendor consents + let vendor_consents = decode_vendor_section(&reader, 213)?; + + // Vendor legitimate interests follow after vendor consents + let vendor_li_offset = vendor_section_end_offset(&reader, 213)?; + let vendor_legitimate_interests = if vendor_li_offset + 17 <= reader.bit_len() { + decode_vendor_section(&reader, vendor_li_offset).unwrap_or_default() + } else { + Vec::new() + }; + + Ok(TcfConsent { + version, + cmp_id, + cmp_version, + consent_screen, + consent_language, + vendor_list_version, + tcf_policy_version, + created_ds, + last_updated_ds, + purpose_consents, + purpose_legitimate_interests, + vendor_consents, + vendor_legitimate_interests, + special_feature_opt_ins, + }) +} + +/// Decodes a vendor section (consents or legitimate interests). +/// +/// The section starts with: +/// - `MaxVendorId` (16 bits) +/// - `IsRangeEncoding` (1 bit) +/// +/// If bitfield encoding: one bit per vendor up to `MaxVendorId`. +/// If range encoding: `NumEntries` (12 bits), then entries. +fn decode_vendor_section( + reader: &BitReader<'_>, + offset: usize, +) -> Result, Report> { + if offset + 17 > reader.bit_len() { + return Ok(Vec::new()); + } + + let max_vendor_id = reader.read_u16(offset, 16); + let is_range = reader.read_bool(offset + 16); + + if !is_range { + // Bitfield: one bit per vendor, 1..=max_vendor_id + let mut vendors = Vec::new(); + let bitfield_start = offset + 17; + for i in 0..usize::from(max_vendor_id) { + let bit_pos = bitfield_start + i; + if bit_pos >= reader.bit_len() { + break; + } + if reader.read_bool(bit_pos) { + // Vendor IDs are 1-indexed + vendors.push((i + 1) as u16); + } + } + Ok(vendors) + } else { + // Range encoding + let num_entries_offset = offset + 17; + if num_entries_offset + 12 > reader.bit_len() { + return Ok(Vec::new()); + } + let num_entries = reader.read_u16(num_entries_offset, 12); + let mut vendors = Vec::new(); + let mut pos = num_entries_offset + 12; + + for _ in 0..num_entries { + if pos >= reader.bit_len() { + break; + } + let is_range_entry = reader.read_bool(pos); + pos += 1; + + if is_range_entry { + // Range: StartVendorId (16) + EndVendorId (16) + if pos + 32 > reader.bit_len() { + break; + } + let start = reader.read_u16(pos, 16); + let end = reader.read_u16(pos + 16, 16); + pos += 32; + for id in start..=end { + vendors.push(id); + } + } else { + // Single vendor: VendorId (16) + if pos + 16 > reader.bit_len() { + break; + } + let id = reader.read_u16(pos, 16); + pos += 16; + vendors.push(id); + } + } + Ok(vendors) + } +} + +/// Calculates the bit offset after a vendor section ends. +fn vendor_section_end_offset( + reader: &BitReader<'_>, + offset: usize, +) -> Result> { + if offset + 17 > reader.bit_len() { + return Ok(offset); + } + + let max_vendor_id = reader.read_u16(offset, 16); + let is_range = reader.read_bool(offset + 16); + + if !is_range { + Ok(offset + 17 + usize::from(max_vendor_id)) + } else { + let num_entries_offset = offset + 17; + if num_entries_offset + 12 > reader.bit_len() { + return Ok(num_entries_offset); + } + let num_entries = reader.read_u16(num_entries_offset, 12); + let mut pos = num_entries_offset + 12; + + for _ in 0..num_entries { + if pos >= reader.bit_len() { + break; + } + let is_range_entry = reader.read_bool(pos); + pos += 1; + + if is_range_entry { + pos += 32; // StartVendorId (16) + EndVendorId (16) + } else { + pos += 16; // Single VendorId + } + } + Ok(pos) + } +} + +// --------------------------------------------------------------------------- +// Bit reader utility +// --------------------------------------------------------------------------- + +/// A simple bit-level reader over a byte slice. +/// +/// All reads are specified as (`bit_offset`, `num_bits`) from the start of the +/// buffer. No internal cursor is maintained — callers manage offsets explicitly. +struct BitReader<'a> { + bytes: &'a [u8], +} + +impl<'a> BitReader<'a> { + const fn new(bytes: &'a [u8]) -> Self { + Self { bytes } + } + + const fn bit_len(&self) -> usize { + self.bytes.len() * 8 + } + + /// Reads a single bit as a boolean. + fn read_bool(&self, bit_offset: usize) -> bool { + let byte_idx = bit_offset / 8; + let bit_idx = 7 - (bit_offset % 8); + if byte_idx >= self.bytes.len() { + return false; + } + (self.bytes[byte_idx] >> bit_idx) & 1 == 1 + } + + /// Reads up to 8 bits as a [`u8`]. + fn read_u8(&self, bit_offset: usize, num_bits: usize) -> u8 { + debug_assert!(num_bits <= 8); + self.read_u64(bit_offset, num_bits) as u8 + } + + /// Reads up to 16 bits as a [`u16`]. + fn read_u16(&self, bit_offset: usize, num_bits: usize) -> u16 { + debug_assert!(num_bits <= 16); + self.read_u64(bit_offset, num_bits) as u16 + } + + /// Reads up to 64 bits as a [`u64`]. + fn read_u64(&self, bit_offset: usize, num_bits: usize) -> u64 { + debug_assert!(num_bits <= 64); + let mut value: u64 = 0; + for i in 0..num_bits { + if self.read_bool(bit_offset + i) { + value |= 1 << (num_bits - 1 - i); + } + } + value + } + + /// Reads a sequence of bits as a [`Vec`]. + fn read_bool_vec(&self, bit_offset: usize, num_bits: usize) -> Vec { + (0..num_bits) + .map(|i| self.read_bool(bit_offset + i)) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // A known-good TC String v2 generated by the IAB reference implementation. + // This encodes: version=2, cmpId=7, cmpVersion=1, consent for purposes 1-4, + // vendor consents for vendors 1-10 (bitfield encoding). + // + // To generate test strings: https://iabeurope.github.io/TCF-v2-consent-string-editor/ + // Or use the IAB reference implementation in JavaScript. + + #[test] + fn decodes_minimal_tc_string() { + // This is a minimal valid TC String v2 core segment. + // Generated with: version=2, created=1970-01-01, lastUpdated=1970-01-01, + // cmpId=1, cmpVersion=1, consentScreen=0, language=EN, + // vendorListVersion=1, policyVersion=2, purposes=none, vendors=none (max=0) + // + // Bitfield construction: + // version(6)=2, created(36)=0, lastUpdated(36)=0, cmpId(12)=1, + // cmpVersion(12)=1, consentScreen(6)=0, language(12)=EN, + // vendorListVersion(12)=1, policyVersion(6)=2, + // isServiceSpecific(1)=0, useNonStandard(1)=0, + // specialFeatures(12)=0, purposeConsents(24)=0, + // purposeLI(24)=0, purposeOneTreatment(1)=0, publisherCC(12)=EN, + // maxVendorId(16)=0, isRange(1)=0 + // + // We'll build this manually. + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &[]); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode minimal TC string"); + assert_eq!(result.version, 2); + assert_eq!(result.cmp_id, 1); + assert_eq!(result.cmp_version, 1); + assert_eq!(result.consent_language, "EN"); + assert_eq!(result.vendor_list_version, 1); + assert!(result.vendor_consents.is_empty(), "should have no vendors"); + } + + #[test] + fn decodes_purpose_consents() { + let purposes = vec![true, true, false, true]; // purposes 1,2,4 + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &purposes, &[]); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode purposes"); + assert!(result.purpose_consents[0], "purpose 1 should be consented"); + assert!(result.purpose_consents[1], "purpose 2 should be consented"); + assert!( + !result.purpose_consents[2], + "purpose 3 should not be consented" + ); + assert!(result.purpose_consents[3], "purpose 4 should be consented"); + } + + #[test] + fn decodes_vendor_consents_bitfield() { + // Vendors 1, 3, 5 consented (bitfield encoding, max=5) + let vendor_bits = vec![true, false, true, false, true]; + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &vendor_bits); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode vendor bitfield"); + assert_eq!( + result.vendor_consents, + vec![1, 3, 5], + "should have vendors 1, 3, 5" + ); + } + + #[test] + fn rejects_version_1() { + // Build bytes with version=1 + let mut bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &[]); + // Clear version bits (first 6 bits) and set to 1 + bytes[0] = (bytes[0] & 0x03) | (1 << 2); // version=1 in first 6 bits + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded); + assert!(result.is_err(), "should reject version 1"); + } + + #[test] + fn rejects_too_short() { + let encoded = URL_SAFE_NO_PAD.encode([0u8; 10]); // only 80 bits + let result = decode_tc_string(&encoded); + assert!(result.is_err(), "should reject short bitfield"); + } + + #[test] + fn rejects_invalid_base64() { + let result = decode_tc_string("!!!invalid!!!"); + assert!(result.is_err(), "should reject invalid base64"); + } + + #[test] + fn handles_segmented_tc_string() { + // TC Strings can have multiple segments separated by '.' + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &[]); + let encoded = format!("{}.extra-segment", URL_SAFE_NO_PAD.encode(&bytes)); + + let result = decode_tc_string(&encoded).expect("should decode first segment"); + assert_eq!(result.version, 2); + } + + #[test] + fn decodes_consent_language() { + let bytes = build_minimal_tc_bytes(1, 1, b"FR", 1, &[], &[]); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode language"); + assert_eq!(result.consent_language, "FR"); + } + + // ----------------------------------------------------------------------- + // Test helper: builds a minimal TC String v2 byte buffer + // ----------------------------------------------------------------------- + + fn build_minimal_tc_bytes( + cmp_id: u16, + cmp_version: u16, + language: &[u8; 2], + vendor_list_version: u16, + purpose_consents: &[bool], + vendor_consent_bits: &[bool], + ) -> Vec { + let max_vendor_id = vendor_consent_bits.len() as u16; + // Calculate total bits needed + // Core fields: 213 bits + 16 (maxVendorId) + 1 (isRange) + max_vendor_id (bitfield) + let total_bits = 213 + 17 + usize::from(max_vendor_id); + let total_bytes = total_bits.div_ceil(8); + let mut buf = vec![0u8; total_bytes]; + + let mut writer = BitWriter::new(&mut buf); + + // Version (6 bits) = 2 + writer.write(0, 6, 2); + // Created (36 bits) = 0 + writer.write(6, 36, 0); + // LastUpdated (36 bits) = 0 + writer.write(42, 36, 0); + // CmpId (12 bits) + writer.write(78, 12, u64::from(cmp_id)); + // CmpVersion (12 bits) + writer.write(90, 12, u64::from(cmp_version)); + // ConsentScreen (6 bits) = 0 + writer.write(102, 6, 0); + // ConsentLanguage (12 bits) - two 6-bit chars offset by 'A' + writer.write(108, 6, u64::from(language[0] - b'A')); + writer.write(114, 6, u64::from(language[1] - b'A')); + // VendorListVersion (12 bits) + writer.write(120, 12, u64::from(vendor_list_version)); + // TcfPolicyVersion (6 bits) = 2 + writer.write(132, 6, 2); + // IsServiceSpecific (1 bit) = 0 + // UseNonStandardTexts (1 bit) = 0 + // SpecialFeatureOptIns (12 bits) = 0 + // PurposesConsent (24 bits) + for (i, &consented) in purpose_consents.iter().enumerate() { + if consented && i < 24 { + writer.write_bool(152 + i, true); + } + } + // PurposesLITransparency (24 bits) = 0 + // PurposeOneTreatment (1 bit) = 0 + // PublisherCC (12 bits) - same as language + writer.write(201, 6, u64::from(language[0] - b'A')); + writer.write(207, 6, u64::from(language[1] - b'A')); + // MaxVendorConsentId (16 bits) + writer.write(213, 16, u64::from(max_vendor_id)); + // IsRangeEncoding (1 bit) = 0 (bitfield) + writer.write_bool(229, false); + // Vendor consent bits + for (i, &consented) in vendor_consent_bits.iter().enumerate() { + if consented { + writer.write_bool(230 + i, true); + } + } + + buf + } + + /// Simple bit writer for test data construction. + struct BitWriter<'a> { + bytes: &'a mut [u8], + } + + impl<'a> BitWriter<'a> { + fn new(bytes: &'a mut [u8]) -> Self { + Self { bytes } + } + + fn write_bool(&mut self, bit_offset: usize, value: bool) { + if value { + let byte_idx = bit_offset / 8; + let bit_idx = 7 - (bit_offset % 8); + if byte_idx < self.bytes.len() { + self.bytes[byte_idx] |= 1 << bit_idx; + } + } + } + + fn write(&mut self, bit_offset: usize, num_bits: usize, value: u64) { + for i in 0..num_bits { + let bit = (value >> (num_bits - 1 - i)) & 1 == 1; + self.write_bool(bit_offset + i, bit); + } + } + } +} diff --git a/crates/common/src/consent/types.rs b/crates/common/src/consent/types.rs new file mode 100644 index 00000000..9098d78b --- /dev/null +++ b/crates/common/src/consent/types.rs @@ -0,0 +1,652 @@ +//! Consent signal types. +//! +//! This module defines the full consent type hierarchy: +//! +//! - [`RawConsentSignals`] — raw (undecoded) strings extracted from cookies/headers +//! - [`ConsentContext`] — the normalized output carrying both raw and decoded data +//! - [`UsPrivacy`] / [`PrivacyFlag`] — decoded US Privacy (CCPA) 4-char string +//! - [`TcfConsent`] — decoded TCF v2 core consent data +//! - [`GppConsent`] — decoded GPP consent data +//! - [`Jurisdiction`] — the privacy regime applicable to the request +//! - [`ConsentSource`] — how consent was sourced (cookie, KV store, etc.) + +use core::fmt; + +// --------------------------------------------------------------------------- +// Raw extraction layer +// --------------------------------------------------------------------------- + +/// Raw consent signals extracted from cookies and HTTP headers. +/// +/// All fields are optional because any combination of consent mechanisms may be +/// present (or absent) on a given request. No decoding or validation is +/// performed at this stage — the values are preserved exactly as received. +/// +/// # Consent sources +/// +/// | Field | Source | Standard | +/// |---|---|---| +/// | [`raw_tc_string`](Self::raw_tc_string) | `euconsent-v2` cookie | IAB TCF v2 | +/// | [`raw_gpp_string`](Self::raw_gpp_string) | `__gpp` cookie | IAB GPP | +/// | [`raw_gpp_sid`](Self::raw_gpp_sid) | `__gpp_sid` cookie | IAB GPP | +/// | [`raw_us_privacy`](Self::raw_us_privacy) | `us_privacy` cookie | IAB US Privacy (CCPA) | +/// | [`gpc`](Self::gpc) | `Sec-GPC` header | Global Privacy Control | +#[derive(Debug, Clone, Default)] +pub struct RawConsentSignals { + /// TCF v2 consent string from the `euconsent-v2` cookie. + pub raw_tc_string: Option, + /// GPP consent string from the `__gpp` cookie. + pub raw_gpp_string: Option, + /// GPP section IDs from the `__gpp_sid` cookie (raw comma-separated string). + pub raw_gpp_sid: Option, + /// US Privacy string from the `us_privacy` cookie (4-character format). + pub raw_us_privacy: Option, + /// Global Privacy Control signal from the `Sec-GPC` header. + /// + /// When `true`, the browser has signaled the user's opt-out preference. + pub gpc: bool, +} + +impl RawConsentSignals { + /// Returns `true` when at least one consent cookie signal is present. + #[must_use] + pub fn has_cookie_signals(&self) -> bool { + self.raw_tc_string.is_some() + || self.raw_gpp_string.is_some() + || self.raw_gpp_sid.is_some() + || self.raw_us_privacy.is_some() + } + + /// Returns `true` when no consent signals were found on the request. + #[must_use] + pub fn is_empty(&self) -> bool { + !self.has_cookie_signals() && !self.gpc + } +} + +impl fmt::Display for RawConsentSignals { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "euconsent-v2=")?; + match &self.raw_tc_string { + Some(s) => write!(f, "present ({} chars)", s.len())?, + None => write!(f, "absent")?, + } + + write!(f, ", __gpp=")?; + match &self.raw_gpp_string { + Some(s) => write!(f, "present ({} chars)", s.len())?, + None => write!(f, "absent")?, + } + + write!(f, ", __gpp_sid=")?; + match &self.raw_gpp_sid { + Some(s) => write!(f, "\"{}\"", s)?, + None => write!(f, "absent")?, + } + + write!(f, ", us_privacy=")?; + match &self.raw_us_privacy { + Some(s) => write!(f, "\"{}\"", s)?, + None => write!(f, "absent")?, + } + + write!(f, ", Sec-GPC={}", if self.gpc { "1" } else { "absent" }) + } +} + +// --------------------------------------------------------------------------- +// Decoded consent types +// --------------------------------------------------------------------------- + +/// Normalized consent context extracted from cookies and headers. +/// +/// Carries both raw consent strings (for `OpenRTB` forwarding) and decoded +/// structured data (for TS-level enforcement and observability). This is the +/// central type that flows through the entire request lifecycle. +/// +/// Built from [`RawConsentSignals`] by the decoding pipeline in +/// [`super::build_consent_context`]. +#[derive(Debug, Clone, Default)] +pub struct ConsentContext { + /// Raw TC String from `euconsent-v2` cookie, passed as-is in `user.consent`. + pub raw_tc_string: Option, + /// Raw GPP string from `__gpp` cookie, passed as-is in `regs.gpp`. + pub raw_gpp_string: Option, + /// GPP section IDs derived from decoded `__gpp` data. + /// + /// The `__gpp_sid` cookie is treated as a transport hint and validated + /// against decoded section IDs when both are present. + pub gpp_section_ids: Option>, + /// Raw US Privacy string from `us_privacy` cookie. + pub raw_us_privacy: Option, + /// Raw Google Additional Consent (AC) string. + /// + /// Covers ad tech providers not in the IAB Global Vendor List but + /// participating in the Google ecosystem. Format: `{version}~{ids}~dv.` + pub raw_ac_string: Option, + + /// Whether GDPR applies to this request (derived from TCF presence). + pub gdpr_applies: bool, + /// Decoded TCF v2 consent data. + pub tcf: Option, + /// Decoded GPP consent data. + pub gpp: Option, + /// Decoded US Privacy signal. + pub us_privacy: Option, + + /// Whether the TCF consent string has expired (age exceeds configured max). + /// + /// When `true` and `check_expiration` is enabled, the decoded `tcf` field + /// is cleared (treated as no consent) but the raw string is preserved for + /// proxy-mode forwarding. + pub expired: bool, + + /// Global Privacy Control signal from `Sec-GPC` header. + pub gpc: bool, + /// Detected privacy jurisdiction for this request. + pub jurisdiction: super::jurisdiction::Jurisdiction, + /// Source of the consent data (for debugging). + pub source: ConsentSource, +} + +impl ConsentContext { + /// Returns `true` when no consent signals are present. + #[must_use] + pub fn is_empty(&self) -> bool { + self.raw_tc_string.is_none() + && self.raw_gpp_string.is_none() + && self.raw_us_privacy.is_none() + && self.raw_ac_string.is_none() + && self.tcf.is_none() + && self.gpp.is_none() + && self.us_privacy.is_none() + && !self.gpc + } +} + +// --------------------------------------------------------------------------- +// TCF v2 +// --------------------------------------------------------------------------- + +/// Decoded TCF v2.x consent data. +/// +/// Extracted from either a standalone TC String (`euconsent-v2` cookie) +/// or from the EU TCF v2.2 section within a GPP string. +/// +/// Only the core segment (segment type 0) is decoded. Publisher restrictions, +/// disclosed vendors, and allowed vendors segments are not yet supported. +#[derive(Debug, Clone)] +pub struct TcfConsent { + /// TCF version (2). + pub version: u8, + /// CMP ID that collected this consent. + pub cmp_id: u16, + /// CMP version. + pub cmp_version: u16, + /// Consent screen number. + pub consent_screen: u8, + /// CMP language (ISO 639-1, two uppercase letters). + pub consent_language: String, + /// Vendor list version used. + pub vendor_list_version: u16, + /// TCF policy version. + pub tcf_policy_version: u8, + /// Timestamp when consent was created (deciseconds since epoch). + pub created_ds: u64, + /// Timestamp when consent was last updated (deciseconds since epoch). + pub last_updated_ds: u64, + + /// Purpose consents (24 bits, 1-indexed). + /// + /// `true` at index 0 means purpose 1 is consented, etc. + pub purpose_consents: Vec, + /// Purpose legitimate interests (24 bits, 1-indexed). + pub purpose_legitimate_interests: Vec, + + /// Vendor IDs with consent granted. + pub vendor_consents: Vec, + /// Vendor IDs with legitimate interest established. + pub vendor_legitimate_interests: Vec, + + /// Special feature opt-ins (12 bits). + pub special_feature_opt_ins: Vec, +} + +impl TcfConsent { + /// Looks up a 1-indexed purpose in a TCF bitfield. + /// + /// Returns `false` for purpose 0 (invalid) and out-of-range indices. + fn purpose_bit(bits: &[bool], purpose: usize) -> bool { + purpose + .checked_sub(1) + .and_then(|idx| bits.get(idx).copied()) + .unwrap_or(false) + } + + /// Checks whether consent was granted for a specific TCF purpose. + /// + /// Purposes are 1-indexed per the TCF specification (Purpose 1 = index 0). + /// Returns `false` if the purpose is out of range. + #[must_use] + pub fn has_purpose_consent(&self, purpose: usize) -> bool { + Self::purpose_bit(&self.purpose_consents, purpose) + } + + /// Checks whether legitimate interest was established for a specific TCF purpose. + /// + /// Purposes are 1-indexed per the TCF specification. + /// Returns `false` if the purpose is out of range. + #[must_use] + pub fn has_purpose_li(&self, purpose: usize) -> bool { + Self::purpose_bit(&self.purpose_legitimate_interests, purpose) + } + + /// Checks whether a specific vendor has been granted consent. + #[must_use] + pub fn has_vendor_consent(&self, vendor_id: u16) -> bool { + self.vendor_consents.contains(&vendor_id) + } + + /// Checks whether a specific vendor has established legitimate interest. + #[must_use] + pub fn has_vendor_li(&self, vendor_id: u16) -> bool { + self.vendor_legitimate_interests.contains(&vendor_id) + } + + /// Whether Purpose 1 (Store/access information on a device) is consented. + /// + /// Required for any EID or cookie-based identifier to be set. + #[must_use] + pub fn has_storage_consent(&self) -> bool { + self.has_purpose_consent(1) + } + + /// Whether Purpose 2 (Basic ads) is consented. + /// + /// Required for bid adapters to participate in the auction. + #[must_use] + pub fn has_basic_ads_consent(&self) -> bool { + self.has_purpose_consent(2) + } + + /// Whether Purpose 4 (Personalized ads) is consented. + /// + /// Controls whether user first-party data and EIDs are transmitted. + #[must_use] + pub fn has_personalized_ads_consent(&self) -> bool { + self.has_purpose_consent(4) + } +} + +// --------------------------------------------------------------------------- +// GPP +// --------------------------------------------------------------------------- + +/// Decoded GPP (Global Privacy Platform) consent data. +/// +/// Wraps the `iab_gpp` crate's decoded output with our domain types. +#[derive(Debug, Clone)] +pub struct GppConsent { + /// GPP header version. + pub version: u8, + /// Active section IDs present in the GPP string. + pub section_ids: Vec, + /// Decoded EU TCF v2.2 section (if present in GPP, section ID 2). + pub eu_tcf: Option, +} + +// --------------------------------------------------------------------------- +// US Privacy (CCPA) +// --------------------------------------------------------------------------- + +/// Decoded US Privacy string (legacy 4-character format). +/// +/// Format: `1YNN` where: +/// - Char 1: Version (always `1`) +/// - Char 2: Notice given (`Y`/`N`/`-`) +/// - Char 3: Opt-out of sale (`Y`/`N`/`-`) +/// - Char 4: LSPA covered (`Y`/`N`/`-`) +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UsPrivacy { + /// Specification version (currently always 1). + pub version: u8, + /// Whether explicit notice has been given to the consumer. + pub notice_given: PrivacyFlag, + /// Whether the consumer has opted out of the sale of personal information. + pub opt_out_sale: PrivacyFlag, + /// Whether the transaction is covered by the Limited Service Provider Agreement. + pub lspa_covered: PrivacyFlag, +} + +/// A tri-state flag used in the US Privacy string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PrivacyFlag { + /// `Y` — yes / affirmative. + Yes, + /// `N` — no / negative. + No, + /// `-` — not applicable or unknown. + NotApplicable, +} + +impl fmt::Display for PrivacyFlag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Yes => write!(f, "Y"), + Self::No => write!(f, "N"), + Self::NotApplicable => write!(f, "-"), + } + } +} + +impl From for PrivacyFlag { + fn from(value: bool) -> Self { + if value { + Self::Yes + } else { + Self::No + } + } +} + +impl fmt::Display for UsPrivacy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}{}{}{}", + self.version, self.notice_given, self.opt_out_sale, self.lspa_covered, + ) + } +} + +// --------------------------------------------------------------------------- +// Metadata types +// --------------------------------------------------------------------------- + +/// How consent was sourced for this request. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum ConsentSource { + /// Read from cookies on the incoming request. + Cookie, + /// Loaded from KV store via `SyntheticID` lookup. + KvStore, + /// Applied from explicit publisher policy defaults. + PolicyDefault, + /// No consent data available. + #[default] + None, +} + +// --------------------------------------------------------------------------- +// Consent error +// --------------------------------------------------------------------------- + +/// Errors that can occur during consent string decoding. +#[derive(Debug, derive_more::Display)] +pub enum ConsentDecodeError { + /// The US Privacy string has an invalid format. + #[display("invalid US Privacy string: {reason}")] + InvalidUsPrivacy { reason: String }, + /// The TC String could not be decoded. + #[display("invalid TC String: {reason}")] + InvalidTcString { reason: String }, + /// The GPP string could not be decoded. + #[display("invalid GPP string: {reason}")] + InvalidGppString { reason: String }, +} + +impl core::error::Error for ConsentDecodeError {} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_signals() { + let signals = RawConsentSignals::default(); + assert!(signals.is_empty(), "default signals should be empty"); + } + + #[test] + fn not_empty_with_tc_string() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + ..Default::default() + }; + assert!(!signals.is_empty(), "should not be empty with tc_string"); + } + + #[test] + fn not_empty_with_gpc() { + let signals = RawConsentSignals { + gpc: true, + ..Default::default() + }; + assert!(!signals.is_empty(), "should not be empty with gpc=true"); + } + + #[test] + fn has_no_cookie_signals_with_only_gpc() { + let signals = RawConsentSignals { + gpc: true, + ..Default::default() + }; + + assert!( + !signals.has_cookie_signals(), + "should not report cookie signals when only gpc is present" + ); + } + + #[test] + fn has_cookie_signals_with_tc_string() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + ..Default::default() + }; + + assert!( + signals.has_cookie_signals(), + "should report cookie signals when tc string is present" + ); + } + + #[test] + fn display_all_absent() { + let signals = RawConsentSignals::default(); + let output = signals.to_string(); + assert!( + output.contains("euconsent-v2=absent"), + "should show euconsent-v2 absent" + ); + assert!(output.contains("__gpp=absent"), "should show __gpp absent"); + assert!( + output.contains("us_privacy=absent"), + "should show us_privacy absent" + ); + assert!( + output.contains("Sec-GPC=absent"), + "should show Sec-GPC absent" + ); + } + + #[test] + fn display_with_values() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), + raw_gpp_sid: Some("2,6".to_owned()), + raw_us_privacy: Some("1YNN".to_owned()), + gpc: true, + }; + let output = signals.to_string(); + assert!( + output.contains("euconsent-v2=present (13 chars)"), + "should show tc_string length" + ); + assert!( + output.contains("__gpp=present (15 chars)"), + "should show gpp length" + ); + assert!( + output.contains("__gpp_sid=\"2,6\""), + "should show gpp_sid value" + ); + assert!( + output.contains("us_privacy=\"1YNN\""), + "should show us_privacy value" + ); + assert!(output.contains("Sec-GPC=1"), "should show Sec-GPC as 1"); + } + + #[test] + fn consent_context_empty_by_default() { + let ctx = ConsentContext::default(); + assert!(ctx.is_empty(), "default ConsentContext should be empty"); + } + + #[test] + fn consent_context_not_empty_with_tc_string() { + let ctx = ConsentContext { + raw_tc_string: Some("CPXx".to_owned()), + ..Default::default() + }; + assert!( + !ctx.is_empty(), + "should not be empty with raw_tc_string present" + ); + } + + #[test] + fn consent_context_not_empty_with_gpc() { + let ctx = ConsentContext { + gpc: true, + ..Default::default() + }; + assert!(!ctx.is_empty(), "should not be empty with gpc=true"); + } + + #[test] + fn us_privacy_display() { + let usp = UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::No, + lspa_covered: PrivacyFlag::NotApplicable, + }; + assert_eq!(usp.to_string(), "1YN-", "should format as 1YN-"); + } + + #[test] + fn privacy_flag_display() { + assert_eq!(PrivacyFlag::Yes.to_string(), "Y"); + assert_eq!(PrivacyFlag::No.to_string(), "N"); + assert_eq!(PrivacyFlag::NotApplicable.to_string(), "-"); + } + + #[test] + fn consent_source_default_is_none() { + assert_eq!( + ConsentSource::default(), + ConsentSource::None, + "default source should be None" + ); + } + + fn make_tcf_consent() -> TcfConsent { + TcfConsent { + version: 2, + cmp_id: 1, + cmp_version: 1, + consent_screen: 1, + consent_language: "EN".to_owned(), + vendor_list_version: 42, + tcf_policy_version: 4, + created_ds: 0, + last_updated_ds: 0, + // Purposes 1, 2, 4 consented (indices 0, 1, 3) + purpose_consents: vec![ + true, true, false, true, false, false, false, false, false, false, false, false, + ], + // Purpose 7 LI (index 6) + purpose_legitimate_interests: vec![ + false, false, false, false, false, false, true, false, false, false, false, false, + ], + vendor_consents: vec![10, 32, 755], + vendor_legitimate_interests: vec![32], + special_feature_opt_ins: vec![false; 12], + } + } + + #[test] + fn tcf_has_purpose_consent() { + let tcf = make_tcf_consent(); + assert!(tcf.has_purpose_consent(1), "should have Purpose 1 consent"); + assert!(tcf.has_purpose_consent(2), "should have Purpose 2 consent"); + assert!( + !tcf.has_purpose_consent(3), + "should not have Purpose 3 consent" + ); + assert!(tcf.has_purpose_consent(4), "should have Purpose 4 consent"); + } + + #[test] + fn tcf_purpose_consent_out_of_range() { + let tcf = make_tcf_consent(); + assert!( + !tcf.has_purpose_consent(0), + "purpose 0 should return false (1-indexed)" + ); + assert!( + !tcf.has_purpose_consent(99), + "out-of-range purpose should return false" + ); + } + + #[test] + fn tcf_has_purpose_li() { + let tcf = make_tcf_consent(); + assert!( + tcf.has_purpose_li(7), + "should have Purpose 7 legitimate interest" + ); + assert!( + !tcf.has_purpose_li(1), + "should not have Purpose 1 legitimate interest" + ); + } + + #[test] + fn tcf_has_vendor_consent() { + let tcf = make_tcf_consent(); + assert!( + tcf.has_vendor_consent(755), + "should have consent for vendor 755" + ); + assert!( + !tcf.has_vendor_consent(999), + "should not have consent for vendor 999" + ); + } + + #[test] + fn tcf_convenience_methods() { + let tcf = make_tcf_consent(); + assert!( + tcf.has_storage_consent(), + "should have storage consent (P1)" + ); + assert!( + tcf.has_basic_ads_consent(), + "should have basic ads consent (P2)" + ); + assert!( + tcf.has_personalized_ads_consent(), + "should have personalized ads consent (P4)" + ); + } +} diff --git a/crates/common/src/consent/us_privacy.rs b/crates/common/src/consent/us_privacy.rs new file mode 100644 index 00000000..eea27e40 --- /dev/null +++ b/crates/common/src/consent/us_privacy.rs @@ -0,0 +1,164 @@ +//! US Privacy string decoder. +//! +//! Parses the legacy 4-character IAB US Privacy string (CCPA format). +//! +//! # Format +//! +//! The string is exactly 4 characters: `VNOL` where: +//! - **V** (version): always `1` +//! - **N** (notice): `Y` = given, `N` = not given, `-` = N/A +//! - **O** (opt-out of sale): `Y` = opted out, `N` = not opted out, `-` = N/A +//! - **L** (LSPA covered): `Y` = yes, `N` = no, `-` = N/A +//! +//! # References +//! +//! - [IAB US Privacy String specification](https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md) + +use error_stack::{Report, ResultExt}; + +use super::types::{ConsentDecodeError, PrivacyFlag, UsPrivacy}; + +/// Decodes a US Privacy string into a [`UsPrivacy`] struct. +/// +/// # Errors +/// +/// - [`ConsentDecodeError::InvalidUsPrivacy`] if the string is not exactly +/// 4 characters, has an unsupported version, or contains invalid flag values. +pub fn decode_us_privacy(s: &str) -> Result> { + let chars: Vec = s.chars().collect(); + + if chars.len() != 4 { + return Err(Report::new(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("expected 4 characters, got {}", chars.len()), + })); + } + + let version = match chars[0] { + '1' => 1u8, + other => { + return Err(Report::new(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("unsupported version '{}', expected '1'", other), + })); + } + }; + + let notice_given = + parse_flag(chars[1]).change_context(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid notice flag '{}'", chars[1]), + })?; + + let opt_out_sale = + parse_flag(chars[2]).change_context(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid opt-out flag '{}'", chars[2]), + })?; + + let lspa_covered = + parse_flag(chars[3]).change_context(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid LSPA flag '{}'", chars[3]), + })?; + + Ok(UsPrivacy { + version, + notice_given, + opt_out_sale, + lspa_covered, + }) +} + +/// Parses a single US Privacy flag character. +fn parse_flag(c: char) -> Result> { + match c { + 'Y' | 'y' => Ok(PrivacyFlag::Yes), + 'N' | 'n' => Ok(PrivacyFlag::No), + '-' => Ok(PrivacyFlag::NotApplicable), + other => Err(Report::new(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid flag character '{other}'"), + })), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn decodes_standard_string() { + let result = decode_us_privacy("1YNN").expect("should decode 1YNN"); + assert_eq!(result.version, 1); + assert_eq!(result.notice_given, PrivacyFlag::Yes); + assert_eq!(result.opt_out_sale, PrivacyFlag::No); + assert_eq!(result.lspa_covered, PrivacyFlag::No); + } + + #[test] + fn decodes_all_yes() { + let result = decode_us_privacy("1YYY").expect("should decode 1YYY"); + assert_eq!(result.notice_given, PrivacyFlag::Yes); + assert_eq!(result.opt_out_sale, PrivacyFlag::Yes); + assert_eq!(result.lspa_covered, PrivacyFlag::Yes); + } + + #[test] + fn decodes_all_not_applicable() { + let result = decode_us_privacy("1---").expect("should decode 1---"); + assert_eq!(result.notice_given, PrivacyFlag::NotApplicable); + assert_eq!(result.opt_out_sale, PrivacyFlag::NotApplicable); + assert_eq!(result.lspa_covered, PrivacyFlag::NotApplicable); + } + + #[test] + fn decodes_mixed_flags() { + let result = decode_us_privacy("1NYN").expect("should decode 1NYN"); + assert_eq!(result.notice_given, PrivacyFlag::No); + assert_eq!(result.opt_out_sale, PrivacyFlag::Yes); + assert_eq!(result.lspa_covered, PrivacyFlag::No); + } + + #[test] + fn roundtrips_through_display() { + let result = decode_us_privacy("1YNN").expect("should decode"); + assert_eq!( + result.to_string(), + "1YNN", + "should roundtrip through Display" + ); + } + + #[test] + fn rejects_too_short() { + let result = decode_us_privacy("1YN"); + assert!(result.is_err(), "should reject 3-char string"); + } + + #[test] + fn rejects_too_long() { + let result = decode_us_privacy("1YNNN"); + assert!(result.is_err(), "should reject 5-char string"); + } + + #[test] + fn rejects_empty() { + let result = decode_us_privacy(""); + assert!(result.is_err(), "should reject empty string"); + } + + #[test] + fn rejects_bad_version() { + let result = decode_us_privacy("2YNN"); + assert!(result.is_err(), "should reject version 2"); + } + + #[test] + fn rejects_invalid_flag() { + let result = decode_us_privacy("1XNN"); + assert!(result.is_err(), "should reject invalid flag 'X'"); + } + + #[test] + fn accepts_lowercase_flags() { + let result = decode_us_privacy("1ynn").expect("should accept lowercase"); + assert_eq!(result.notice_given, PrivacyFlag::Yes); + assert_eq!(result.opt_out_sale, PrivacyFlag::No); + assert_eq!(result.lspa_covered, PrivacyFlag::No); + } +} diff --git a/crates/common/src/consent_config.rs b/crates/common/src/consent_config.rs new file mode 100644 index 00000000..8a6d6036 --- /dev/null +++ b/crates/common/src/consent_config.rs @@ -0,0 +1,472 @@ +//! Consent forwarding configuration types. +//! +//! Defines the `[consent]` TOML section and its nested sub-sections for +//! controlling how Trusted Server interprets, validates, and forwards +//! privacy consent signals to advertising partners. + +use serde::{Deserialize, Serialize}; + +/// TCF spec recommends 13 months (≈395 days). +const MAX_CONSENT_AGE_DAYS: u32 = 395; + +/// How many days newer one string must be to win under the `newest` strategy. +const FRESHNESS_THRESHOLD_DAYS: u32 = 30; + +/// EU member states (27) + EEA non-EU (3) + UK GDPR (1). +const GDPR_COUNTRIES: &[&str] = &[ + "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", "IT", "LV", + "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE", "IS", "LI", "NO", "GB", +]; + +/// US states with active comprehensive privacy laws (as of 2026). +const US_PRIVACY_STATES: &[&str] = &[ + "CA", "VA", "CO", "CT", "UT", "MT", "OR", "TX", "FL", "DE", "IA", "NE", "NH", "NJ", "TN", "MN", + "MD", "IN", "KY", "RI", +]; + +/// Converts a static `&[&str]` slice to an owned `Vec`. +fn str_vec(codes: &[&str]) -> Vec { + codes.iter().copied().map(String::from).collect() +} + +/// Top-level consent configuration (`[consent]` in TOML). +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ConsentConfig { + /// Operating mode for consent handling. + /// + /// - `"interpreter"` — decode consent strings and forward structured data + /// (recommended; enables observability and enforcement). + /// - `"proxy"` — forward raw strings without decoding. + #[serde(default = "default_consent_mode")] + pub mode: ConsentMode, + + /// Whether to check consent expiration based on TCF timestamps. + #[serde(default = "default_true")] + pub check_expiration: bool, + + /// Maximum consent age in days before it is considered expired. + /// + /// TCF spec recommends 13 months (≈395 days). + #[serde(default = "default_max_consent_age_days")] + pub max_consent_age_days: u32, + + /// GDPR jurisdiction configuration. + #[serde(default)] + pub gdpr: GdprConfig, + + /// US state privacy law configuration. + #[serde(default)] + pub us_states: UsStatesConfig, + + /// Defaults for constructing a US Privacy string when only `Sec-GPC` + /// is present and no explicit `us_privacy` cookie exists. + #[serde(default)] + pub us_privacy_defaults: UsPrivacyDefaultsConfig, + + /// How to resolve conflicts when both TCF and GPP strings are present + /// but disagree on consent status. + #[serde(default)] + pub conflict_resolution: ConflictResolutionConfig, + + /// Name of the KV Store used for consent persistence. + /// + /// When set, consent data is persisted per Synthetic ID so that + /// returning users without consent cookies can still have their + /// consent preferences applied. Set to `None` to disable. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub consent_store: Option, +} + +impl Default for ConsentConfig { + fn default() -> Self { + Self { + mode: ConsentMode::Interpreter, + check_expiration: true, + max_consent_age_days: MAX_CONSENT_AGE_DAYS, + gdpr: GdprConfig::default(), + us_states: UsStatesConfig::default(), + us_privacy_defaults: UsPrivacyDefaultsConfig::default(), + conflict_resolution: ConflictResolutionConfig::default(), + consent_store: None, + } + } +} + +// --------------------------------------------------------------------------- +// Consent mode +// --------------------------------------------------------------------------- + +/// Operating mode for the consent pipeline. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ConsentMode { + /// Decode consent strings and forward structured data. + Interpreter, + /// Forward raw strings without decoding. + Proxy, +} + +// --------------------------------------------------------------------------- +// Consent forwarding mode (per-partner) +// --------------------------------------------------------------------------- + +/// How consent signals are forwarded to a specific partner integration. +/// +/// Controls whether consent travels through the `OpenRTB` body, raw `Cookie` +/// headers, or both. The default (`Both`) preserves backward compatibility. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum ConsentForwardingMode { + /// Forward consent in the `OpenRTB` body only; strip consent cookies. + OpenrtbOnly, + /// Forward consent cookies only; omit consent fields from the body. + CookiesOnly, + /// Forward consent in both cookies and body (default). + #[default] + Both, +} + +impl ConsentForwardingMode { + /// Whether consent cookies should be stripped from forwarded requests. + /// + /// Returns `true` for [`OpenrtbOnly`](Self::OpenrtbOnly) since consent + /// travels exclusively through the request body in that mode. + #[must_use] + pub const fn strips_consent_cookies(self) -> bool { + matches!(self, Self::OpenrtbOnly) + } + + /// Whether consent fields should be included in the request body. + /// + /// Returns `true` for [`OpenrtbOnly`](Self::OpenrtbOnly) and + /// [`Both`](Self::Both); `false` for [`CookiesOnly`](Self::CookiesOnly). + #[must_use] + pub const fn includes_body_consent(self) -> bool { + !matches!(self, Self::CookiesOnly) + } +} + +// --------------------------------------------------------------------------- +// GDPR +// --------------------------------------------------------------------------- + +/// GDPR jurisdiction configuration (`[consent.gdpr]`). +/// +/// The `applies_in` list is used for **observability and logging only** — it +/// does NOT cause consent to be synthesized. When a user's country appears in +/// this list, the system logs that GDPR applies, enabling publishers to +/// monitor compliance coverage. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct GdprConfig { + /// ISO 3166-1 alpha-2 country codes where GDPR applies. + #[serde(default = "default_gdpr_countries")] + pub applies_in: Vec, +} + +impl Default for GdprConfig { + fn default() -> Self { + Self { + applies_in: str_vec(GDPR_COUNTRIES), + } + } +} + +// --------------------------------------------------------------------------- +// US States +// --------------------------------------------------------------------------- + +/// US state privacy law configuration (`[consent.us_states]`). +/// +/// Config-driven to avoid recompilation when new state laws take effect. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct UsStatesConfig { + /// US state codes with active comprehensive privacy laws. + #[serde(default = "default_us_privacy_states")] + pub privacy_states: Vec, +} + +impl Default for UsStatesConfig { + fn default() -> Self { + Self { + privacy_states: str_vec(US_PRIVACY_STATES), + } + } +} + +// --------------------------------------------------------------------------- +// US Privacy defaults (GPC handling) +// --------------------------------------------------------------------------- + +/// Publisher-configurable defaults for constructing a US Privacy string +/// when only the `Sec-GPC` header is present (`[consent.us_privacy_defaults]`). +/// +/// These reflect the publisher's actual compliance posture — they are +/// **publisher policy**, not protocol requirements. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct UsPrivacyDefaultsConfig { + /// Whether the publisher has actually shown a CCPA notice to the user. + #[serde(default = "default_true")] + pub notice_given: bool, + + /// Whether the publisher is subject to the Limited Service Provider + /// Agreement. + #[serde(default)] + pub lspa_covered: bool, + + /// Whether a `Sec-GPC: 1` header should be interpreted as an opt-out + /// of sale. + #[serde(default = "default_true")] + pub gpc_implies_optout: bool, +} + +impl Default for UsPrivacyDefaultsConfig { + fn default() -> Self { + Self { + notice_given: true, + lspa_covered: false, + gpc_implies_optout: true, + } + } +} + +// --------------------------------------------------------------------------- +// Conflict resolution +// --------------------------------------------------------------------------- + +/// How to resolve disagreements between GPP and TC String when both are +/// present (`[consent.conflict_resolution]`). +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ConflictResolutionConfig { + /// Resolution strategy. + #[serde(default = "default_conflict_mode")] + pub mode: ConflictMode, + + /// How many days newer one string must be to win under the `newest` + /// strategy. + #[serde(default = "default_freshness_threshold_days")] + pub freshness_threshold_days: u32, +} + +impl Default for ConflictResolutionConfig { + fn default() -> Self { + Self { + mode: ConflictMode::Restrictive, + freshness_threshold_days: FRESHNESS_THRESHOLD_DAYS, + } + } +} + +/// Conflict resolution strategy. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ConflictMode { + /// Deny consent when signals disagree (most privacy-safe). + Restrictive, + /// Use the newer signal based on timestamps. + Newest, + /// Grant consent when signals disagree (requires legal review). + Permissive, +} + +// --------------------------------------------------------------------------- +// Serde default value functions +// --------------------------------------------------------------------------- + +const fn default_consent_mode() -> ConsentMode { + ConsentMode::Interpreter +} + +const fn default_true() -> bool { + true +} + +const fn default_max_consent_age_days() -> u32 { + MAX_CONSENT_AGE_DAYS +} + +const fn default_conflict_mode() -> ConflictMode { + ConflictMode::Restrictive +} + +const fn default_freshness_threshold_days() -> u32 { + FRESHNESS_THRESHOLD_DAYS +} + +fn default_gdpr_countries() -> Vec { + str_vec(GDPR_COUNTRIES) +} + +fn default_us_privacy_states() -> Vec { + str_vec(US_PRIVACY_STATES) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::{ConflictMode, ConsentConfig, ConsentMode}; + + #[test] + fn default_config_uses_interpreter_mode() { + let config = ConsentConfig::default(); + assert_eq!( + config.mode, + ConsentMode::Interpreter, + "default mode should be interpreter" + ); + } + + #[test] + fn default_config_enables_expiration_checking() { + let config = ConsentConfig::default(); + assert!( + config.check_expiration, + "expiration checking should be enabled by default" + ); + assert_eq!( + config.max_consent_age_days, 395, + "default max age should be 395 days" + ); + } + + #[test] + fn default_gdpr_countries_includes_eu_eea_uk() { + let config = ConsentConfig::default(); + let countries = &config.gdpr.applies_in; + assert!( + countries.contains(&"DE".to_owned()), + "should include Germany" + ); + assert!( + countries.contains(&"NO".to_owned()), + "should include Norway (EEA)" + ); + assert!(countries.contains(&"GB".to_owned()), "should include UK"); + assert_eq!( + countries.len(), + 31, + "should have 31 countries (27 EU + 3 EEA + 1 UK)" + ); + } + + #[test] + fn default_us_privacy_states_includes_california() { + let config = ConsentConfig::default(); + assert!( + config.us_states.privacy_states.contains(&"CA".to_owned()), + "should include California" + ); + } + + #[test] + fn default_us_privacy_defaults_reflect_common_posture() { + let config = ConsentConfig::default(); + let defaults = &config.us_privacy_defaults; + assert!(defaults.notice_given, "notice_given should default to true"); + assert!( + !defaults.lspa_covered, + "lspa_covered should default to false" + ); + assert!( + defaults.gpc_implies_optout, + "gpc_implies_optout should default to true" + ); + } + + #[test] + fn default_conflict_resolution_is_restrictive() { + let config = ConsentConfig::default(); + assert_eq!( + config.conflict_resolution.mode, + ConflictMode::Restrictive, + "default conflict mode should be restrictive" + ); + assert_eq!( + config.conflict_resolution.freshness_threshold_days, 30, + "default freshness threshold should be 30 days" + ); + } + + #[test] + fn deserializes_from_empty_json() { + let config: ConsentConfig = + serde_json::from_str("{}").expect("should deserialize empty JSON with defaults"); + assert_eq!(config.mode, ConsentMode::Interpreter); + assert!(config.check_expiration); + } + + #[test] + fn deserializes_proxy_mode() { + let config: ConsentConfig = + serde_json::from_str(r#"{"mode": "proxy"}"#).expect("should deserialize proxy mode"); + assert_eq!(config.mode, ConsentMode::Proxy, "should parse proxy mode"); + } + + #[test] + fn consent_forwarding_mode_strips_cookies_only_for_openrtb() { + use super::ConsentForwardingMode; + + assert!( + ConsentForwardingMode::OpenrtbOnly.strips_consent_cookies(), + "openrtb_only should strip consent cookies" + ); + assert!( + !ConsentForwardingMode::CookiesOnly.strips_consent_cookies(), + "cookies_only should not strip consent cookies" + ); + assert!( + !ConsentForwardingMode::Both.strips_consent_cookies(), + "both should not strip consent cookies" + ); + } + + #[test] + fn consent_forwarding_mode_includes_body_consent_except_cookies_only() { + use super::ConsentForwardingMode; + + assert!( + ConsentForwardingMode::OpenrtbOnly.includes_body_consent(), + "openrtb_only should include body consent" + ); + assert!( + !ConsentForwardingMode::CookiesOnly.includes_body_consent(), + "cookies_only should not include body consent" + ); + assert!( + ConsentForwardingMode::Both.includes_body_consent(), + "both should include body consent" + ); + } + + #[test] + fn deserializes_full_config() { + let json = serde_json::json!({ + "mode": "interpreter", + "check_expiration": false, + "max_consent_age_days": 180, + "gdpr": { "applies_in": ["DE", "FR"] }, + "us_states": { "privacy_states": ["CA"] }, + "us_privacy_defaults": { + "notice_given": false, + "lspa_covered": true, + "gpc_implies_optout": false + }, + "conflict_resolution": { + "mode": "newest", + "freshness_threshold_days": 15 + } + }); + let config: ConsentConfig = + serde_json::from_value(json).expect("should deserialize full config"); + assert!(!config.check_expiration); + assert_eq!(config.max_consent_age_days, 180); + assert_eq!(config.gdpr.applies_in, vec!["DE", "FR"]); + assert_eq!(config.us_states.privacy_states, vec!["CA"]); + assert!(!config.us_privacy_defaults.notice_given); + assert!(config.us_privacy_defaults.lspa_covered); + assert_eq!(config.conflict_resolution.mode, ConflictMode::Newest); + assert_eq!(config.conflict_resolution.freshness_threshold_days, 15); + } +} diff --git a/crates/common/src/constants.rs b/crates/common/src/constants.rs index 5e57e6aa..d5b8c7f6 100644 --- a/crates/common/src/constants.rs +++ b/crates/common/src/constants.rs @@ -59,3 +59,12 @@ pub const INTERNAL_HEADERS: &[&str] = &[ "x-compress-hint", "x-debug-fastly-pop", ]; + +// Consent-related cookie names +pub const COOKIE_EUCONSENT_V2: &str = "euconsent-v2"; +pub const COOKIE_GPP: &str = "__gpp"; +pub const COOKIE_GPP_SID: &str = "__gpp_sid"; +pub const COOKIE_US_PRIVACY: &str = "us_privacy"; + +// Consent-related header names +pub const HEADER_SEC_GPC: HeaderName = HeaderName::from_static("sec-gpc"); diff --git a/crates/common/src/cookies.rs b/crates/common/src/cookies.rs index 1b33d99b..0ab90422 100644 --- a/crates/common/src/cookies.rs +++ b/crates/common/src/cookies.rs @@ -8,10 +8,24 @@ use error_stack::{Report, ResultExt}; use fastly::http::header; use fastly::Request; -use crate::constants::COOKIE_SYNTHETIC_ID; +use crate::constants::{ + COOKIE_EUCONSENT_V2, COOKIE_GPP, COOKIE_GPP_SID, COOKIE_SYNTHETIC_ID, COOKIE_US_PRIVACY, +}; use crate::error::TrustedServerError; use crate::settings::Settings; +/// Cookie names carrying privacy consent signals. +/// +/// Used by [`strip_cookies`] to remove consent signals from a `Cookie` header +/// before forwarding requests to partners that receive consent through the +/// `OpenRTB` body instead. +pub const CONSENT_COOKIE_NAMES: &[&str] = &[ + COOKIE_EUCONSENT_V2, + COOKIE_GPP, + COOKIE_GPP_SID, + COOKIE_US_PRIVACY, +]; + const COOKIE_MAX_AGE: i32 = 365 * 24 * 60 * 60; // 1 year /// Parses a cookie string into a [`CookieJar`]. @@ -59,6 +73,59 @@ pub fn handle_request_cookies( } } +/// Strips named cookies from a `Cookie` header value string. +/// +/// Parses the semicolon-separated cookie pairs, filters out any whose name +/// matches one of `cookie_names`, and reconstructs the header string. +/// +/// Returns an empty string if all cookies were stripped or the input was empty. +#[must_use] +pub fn strip_cookies(cookie_header: &str, cookie_names: &[&str]) -> String { + cookie_header + .split(';') + .map(str::trim) + .filter(|pair| { + if let Some(name) = pair.split('=').next() { + !cookie_names.contains(&name.trim()) + } else { + true + } + }) + .filter(|s| !s.is_empty()) + .collect::>() + .join("; ") +} + +/// Copies the `Cookie` header from one request to another, optionally +/// stripping consent cookies. +/// +/// When `strip_consent` is `true`, cookies listed in [`CONSENT_COOKIE_NAMES`] +/// are removed before forwarding. If stripping leaves no cookies, the header +/// is omitted entirely. Non-UTF-8 cookie headers are forwarded unchanged. +pub fn forward_cookie_header(from: &Request, to: &mut Request, strip_consent: bool) { + let Some(cookie_value) = from.get_header(header::COOKIE) else { + return; + }; + + if !strip_consent { + to.set_header(header::COOKIE, cookie_value); + return; + } + + match cookie_value.to_str() { + Ok(s) => { + let stripped = strip_cookies(s, CONSENT_COOKIE_NAMES); + if !stripped.is_empty() { + to.set_header(header::COOKIE, &stripped); + } + } + Err(_) => { + // Non-UTF-8 Cookie header — forward as-is + to.set_header(header::COOKIE, cookie_value); + } + } +} + /// Creates a synthetic ID cookie string. /// /// Generates a properly formatted cookie with security attributes @@ -199,4 +266,43 @@ mod tests { "Set-Cookie header should match create_synthetic_cookie output" ); } + + // --------------------------------------------------------------- + // strip_cookies tests + // --------------------------------------------------------------- + + #[test] + fn test_strip_cookies_removes_consent() { + let header = "euconsent-v2=BOE; __gpp=DBAC; session=abc123; us_privacy=1YNN"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, "session=abc123"); + } + + #[test] + fn test_strip_cookies_preserves_non_consent() { + let header = "session=abc123; theme=dark"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, "session=abc123; theme=dark"); + } + + #[test] + fn test_strip_cookies_empty_input() { + let stripped = strip_cookies("", CONSENT_COOKIE_NAMES); + assert_eq!(stripped, ""); + } + + #[test] + fn test_strip_cookies_all_stripped() { + let header = "euconsent-v2=BOE; __gpp=DBAC; __gpp_sid=2,6; us_privacy=1YNN"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, ""); + } + + #[test] + fn test_strip_cookies_with_complex_values() { + // Cookie values can contain '=' characters + let header = "euconsent-v2=BOE=xyz; session=abc=123=def"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, "session=abc=123=def"); + } } diff --git a/crates/common/src/integrations/adserver_mock.rs b/crates/common/src/integrations/adserver_mock.rs index b6d1e656..494f61ec 100644 --- a/crates/common/src/integrations/adserver_mock.rs +++ b/crates/common/src/integrations/adserver_mock.rs @@ -173,6 +173,17 @@ impl AdServerMockProvider { json!(null) }; + // Build consent summary from ConsentContext + let consent_json = request.user.consent.as_ref().map(|ctx| { + json!({ + "gdpr": if ctx.gdpr_applies { 1 } else { 0 }, + "consent": ctx.raw_tc_string, + "us_privacy": ctx.raw_us_privacy, + "gpp": ctx.raw_gpp_string, + "gpp_sid": ctx.gpp_section_ids, + }) + }); + // Build full mediation request Ok(json!({ "id": request.id, @@ -180,6 +191,7 @@ impl AdServerMockProvider { "ext": { "bidder_responses": bidder_responses_json, "config": config_json, + "consent": consent_json, }, })) } @@ -660,6 +672,57 @@ mod tests { assert!(!provider.supports_media_type(&MediaType::Native)); } + #[test] + fn test_mediation_request_includes_consent() { + use crate::consent::ConsentContext; + + let config = AdServerMockConfig { + enabled: true, + endpoint: "http://localhost:6767/adserver/mediate".to_string(), + timeout_ms: 500, + price_floor: None, + }; + + let provider = AdServerMockProvider::new(config); + + let mut request = create_test_auction_request(); + request.user.consent = Some(ConsentContext { + raw_tc_string: Some("BOEFEAyO".to_string()), + gdpr_applies: true, + raw_us_privacy: Some("1YNN".to_string()), + raw_gpp_string: Some("DBACNYA~CPXxRfAPXxRfA".to_string()), + gpp_section_ids: Some(vec![2, 6]), + ..Default::default() + }); + + let mediation_req = provider + .build_mediation_request(&request, &[]) + .expect("should build request"); + + let consent = &mediation_req["ext"]["consent"]; + assert_eq!(consent["gdpr"], 1); + assert_eq!(consent["consent"], "BOEFEAyO"); + assert_eq!(consent["us_privacy"], "1YNN"); + assert_eq!(consent["gpp"], "DBACNYA~CPXxRfAPXxRfA"); + assert_eq!(consent["gpp_sid"], json!([2, 6])); + } + + #[test] + fn test_mediation_request_no_consent() { + let config = AdServerMockConfig::default(); + let provider = AdServerMockProvider::new(config); + let request = create_test_auction_request(); // consent is None + + let mediation_req = provider + .build_mediation_request(&request, &[]) + .expect("should build request"); + + assert!( + mediation_req["ext"]["consent"].is_null(), + "consent should be null when no consent context" + ); + } + #[test] fn test_parse_mediation_response_with_missing_prices() { // Test that mediator response with missing price fields returns None prices diff --git a/crates/common/src/integrations/aps.rs b/crates/common/src/integrations/aps.rs index bdd9c25b..f2c13371 100644 --- a/crates/common/src/integrations/aps.rs +++ b/crates/common/src/integrations/aps.rs @@ -41,6 +41,32 @@ struct ApsBidRequest { /// Timeout in milliseconds #[serde(skip_serializing_if = "Option::is_none")] timeout: Option, + + /// GDPR consent information. + #[serde(skip_serializing_if = "Option::is_none")] + gdpr: Option, + + /// US Privacy (CCPA) string. + #[serde(rename = "usPrivacy", skip_serializing_if = "Option::is_none")] + us_privacy: Option, + + /// GPP consent string. + #[serde(skip_serializing_if = "Option::is_none")] + gpp: Option, + + /// GPP section IDs as comma-separated string. + #[serde(rename = "gppSid", skip_serializing_if = "Option::is_none")] + gpp_sid: Option, +} + +/// GDPR consent information for APS requests. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ApsGdprConsent { + /// Whether GDPR applies to this request. + enabled: bool, + /// TCF v2 consent string. + #[serde(skip_serializing_if = "Option::is_none")] + consent: Option, } /// APS slot configuration. @@ -268,6 +294,9 @@ impl ApsAuctionProvider { } /// Convert unified `AuctionRequest` to APS TAM bid request format. + /// + /// Populates consent fields (GDPR, US Privacy, GPP) from the + /// [`ConsentContext`](crate::consent::ConsentContext) attached to the request. fn to_aps_request(&self, request: &AuctionRequest) -> ApsBidRequest { let slots: Vec = request .slots @@ -289,12 +318,33 @@ impl ApsAuctionProvider { }) .collect(); + // Build consent fields from ConsentContext + let consent_ctx = request.user.consent.as_ref(); + let gdpr = consent_ctx.map(|ctx| ApsGdprConsent { + enabled: ctx.gdpr_applies, + consent: ctx.raw_tc_string.clone(), + }); + let us_privacy = consent_ctx.and_then(|ctx| ctx.raw_us_privacy.clone()); + let gpp = consent_ctx.and_then(|ctx| ctx.raw_gpp_string.clone()); + let gpp_sid = consent_ctx.and_then(|ctx| { + ctx.gpp_section_ids.as_ref().map(|ids| { + ids.iter() + .map(std::string::ToString::to_string) + .collect::>() + .join(",") + }) + }); + ApsBidRequest { pub_id: self.config.pub_id.clone(), slots, page_url: request.publisher.page_url.clone(), user_agent: request.device.as_ref().and_then(|d| d.user_agent.clone()), timeout: Some(self.config.timeout_ms), + gdpr, + us_privacy, + gpp, + gpp_sid, } } @@ -862,6 +912,97 @@ mod tests { assert!(!provider.supports_media_type(&MediaType::Native)); } + #[test] + fn test_aps_request_includes_consent_fields() { + use crate::consent::ConsentContext; + + let config = ApsConfig { + enabled: true, + pub_id: "5128".to_string(), + endpoint: default_endpoint(), + timeout_ms: 800, + }; + let provider = ApsAuctionProvider::new(config); + + let mut request = create_test_auction_request(); + request.user.consent = Some(ConsentContext { + raw_tc_string: Some("BOEFEAyOEFEAyAHABDENAI4AAAB9vABAASA".to_string()), + gdpr_applies: true, + raw_us_privacy: Some("1YNN".to_string()), + raw_gpp_string: Some("DBACNYA~CPXxRfAPXxRfA".to_string()), + gpp_section_ids: Some(vec![2, 6]), + ..Default::default() + }); + + let aps_request = provider.to_aps_request(&request); + + // Verify GDPR consent + let gdpr = aps_request.gdpr.expect("should have gdpr"); + assert!(gdpr.enabled); + assert_eq!( + gdpr.consent.as_deref(), + Some("BOEFEAyOEFEAyAHABDENAI4AAAB9vABAASA") + ); + + // Verify US Privacy + assert_eq!(aps_request.us_privacy.as_deref(), Some("1YNN")); + + // Verify GPP + assert_eq!(aps_request.gpp.as_deref(), Some("DBACNYA~CPXxRfAPXxRfA")); + assert_eq!(aps_request.gpp_sid.as_deref(), Some("2,6")); + } + + #[test] + fn test_aps_request_no_consent() { + let config = ApsConfig { + enabled: true, + pub_id: "5128".to_string(), + endpoint: default_endpoint(), + timeout_ms: 800, + }; + let provider = ApsAuctionProvider::new(config); + let request = create_test_auction_request(); // consent is None + + let aps_request = provider.to_aps_request(&request); + + assert!(aps_request.gdpr.is_none()); + assert!(aps_request.us_privacy.is_none()); + assert!(aps_request.gpp.is_none()); + assert!(aps_request.gpp_sid.is_none()); + } + + #[test] + fn test_aps_request_consent_serialization() { + use crate::consent::ConsentContext; + + let config = ApsConfig { + enabled: true, + pub_id: "5128".to_string(), + endpoint: default_endpoint(), + timeout_ms: 800, + }; + let provider = ApsAuctionProvider::new(config); + + let mut request = create_test_auction_request(); + request.user.consent = Some(ConsentContext { + raw_tc_string: Some("BOE".to_string()), + gdpr_applies: true, + ..Default::default() + }); + + let aps_request = provider.to_aps_request(&request); + let json = serde_json::to_value(&aps_request).expect("should serialize"); + + // GDPR fields present + assert_eq!(json["gdpr"]["enabled"], true); + assert_eq!(json["gdpr"]["consent"], "BOE"); + + // Absent fields should not appear (skip_serializing_if) + assert!(json.get("usPrivacy").is_none()); + assert!(json.get("gpp").is_none()); + assert!(json.get("gppSid").is_none()); + } + #[test] fn test_aps_bids_have_no_creative_and_no_decoded_price() { // APS doesn't provide creative HTML - it only provides targeting keys diff --git a/crates/common/src/integrations/lockr.rs b/crates/common/src/integrations/lockr.rs index 29576e01..a9fc8007 100644 --- a/crates/common/src/integrations/lockr.rs +++ b/crates/common/src/integrations/lockr.rs @@ -24,6 +24,7 @@ use serde::Deserialize; use validator::Validate; use crate::backend::BackendConfig; +use crate::cookies::forward_cookie_header; use crate::error::TrustedServerError; use crate::http_util::copy_custom_headers; use crate::integrations::{ @@ -261,6 +262,10 @@ impl LockrIntegration { } /// Copy relevant request headers for proxying. + /// + /// Consent cookies are always stripped — consent signals are forwarded + /// through the `OpenRTB` body by the Prebid integration, not through + /// Lockr's cookie-based API calls. fn copy_request_headers(&self, from: &Request, to: &mut Request) { let headers_to_copy = [ header::CONTENT_TYPE, @@ -269,7 +274,6 @@ impl LockrIntegration { header::AUTHORIZATION, header::ACCEPT_LANGUAGE, header::ACCEPT_ENCODING, - header::COOKIE, ]; for header_name in &headers_to_copy { @@ -278,7 +282,10 @@ impl LockrIntegration { } } - // Handle Origin header - use override if configured, otherwise forward original + // Always strip consent cookies — consent travels through the OpenRTB body + forward_cookie_header(from, to, true); + + // Handle Origin header — use override if configured, otherwise forward original let origin = self .config .origin_override diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index 1ad84f9e..fe62f0d6 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -15,6 +15,8 @@ use crate::auction::types::{ AuctionContext, AuctionRequest, AuctionResponse, Bid as AuctionBid, MediaType, }; use crate::backend::BackendConfig; +use crate::consent_config::ConsentForwardingMode; +use crate::cookies::forward_cookie_header; use crate::error::TrustedServerError; use crate::http_util::RequestInfo; use crate::integrations::{ @@ -23,8 +25,8 @@ use crate::integrations::{ IntegrationRegistration, }; use crate::openrtb::{ - Banner, Device, Format, Geo, Imp, ImpExt, OpenRtbRequest, PrebidExt, PrebidImpExt, Regs, - RegsExt, RequestExt, Site, TrustedServerExt, User, UserExt, + Banner, ConsentedProvidersSettings, Device, Format, Geo, Imp, ImpExt, OpenRtbRequest, + PrebidExt, PrebidImpExt, Regs, RegsExt, RequestExt, Site, TrustedServerExt, User, UserExt, }; use crate::request_signing::RequestSigner; use crate::settings::{IntegrationConfig, Settings}; @@ -62,6 +64,13 @@ pub struct PrebidIntegrationConfig { deserialize_with = "crate::settings::vec_from_seq_or_map" )] pub script_patterns: Vec, + /// How consent signals are forwarded to Prebid Server. + /// + /// - `openrtb_only` — consent in `OpenRTB` body only, consent cookies stripped + /// - `cookies_only` — consent cookies forwarded, body consent fields omitted + /// - `both` — consent in both cookies and body (default) + #[serde(default)] + pub consent_forwarding: ConsentForwardingMode, } impl IntegrationConfig for PrebidIntegrationConfig { @@ -416,9 +425,17 @@ fn make_first_party_proxy_url( ) } -fn copy_request_headers(from: &Request, to: &mut Request) { +/// Copies browser headers to the outgoing Prebid Server request. +/// +/// In [`ConsentForwardingMode::OpenrtbOnly`] mode, consent cookies are +/// stripped from the `Cookie` header since consent travels exclusively +/// through the `OpenRTB` body. +fn copy_request_headers( + from: &Request, + to: &mut Request, + consent_forwarding: ConsentForwardingMode, +) { let headers_to_copy = [ - header::COOKIE, header::USER_AGENT, header::HeaderName::from_static("x-forwarded-for"), header::REFERER, @@ -430,6 +447,8 @@ fn copy_request_headers(from: &Request, to: &mut Request) { to.set_header(header_name, value); } } + + forward_cookie_header(from, to, consent_forwarding.strips_consent_cookies()); } /// Appends query parameters to a URL, handling both URLs with and without existing query strings. @@ -521,10 +540,31 @@ impl PrebidAuctionProvider { } }); - // Build user object + // Build user object — populate consent at both OpenRTB 2.6 top-level + // and Prebid ext-based locations (dual placement). + // In cookies_only mode, body consent fields are omitted — consent + // travels exclusively through the forwarded Cookie header. + let consent_ctx = if self.config.consent_forwarding.includes_body_consent() { + request.user.consent.as_ref() + } else { + None + }; + let raw_tc = consent_ctx.and_then(|c| c.raw_tc_string.clone()); let user = Some(User { id: Some(request.user.id.clone()), + // OpenRTB 2.6 top-level consent field + consent: raw_tc.clone(), ext: Some(UserExt { + // Prebid ext-based consent field + consent: raw_tc, + consented_providers_settings: consent_ctx + .and_then(|c| c.raw_ac_string.as_ref()) + .map(|ac| ConsentedProvidersSettings { + consented_providers: Some(ac.clone()), + }), + // EIDs will be populated by identity providers; consent gating + // is applied via `gate_eids_by_consent` before they are set here. + eids: None, synthetic_fresh: Some(request.user.fresh_id.clone()), }), }); @@ -540,16 +580,14 @@ impl PrebidAuctionProvider { }), }); - // Build regs object if Sec-GPC header is present - let regs = if context.request.get_header("Sec-GPC").is_some() { - Some(Regs { - ext: Some(RegsExt { - us_privacy: Some("1YYN".to_string()), - }), - }) - } else { - None - }; + // Build regs object from ConsentContext. + // + // Populates OpenRTB 2.6 canonical fields: + // - regs.gdpr: 1 if GDPR applies (TCF string present) + // - regs.us_privacy: raw US Privacy string + // - regs.gpp: raw GPP string + // - regs.gpp_sid: active GPP section IDs + let regs = Self::build_regs(consent_ctx); // Build ext object let request_info = RequestInfo::from_request(context.request); @@ -583,6 +621,45 @@ impl PrebidAuctionProvider { } } + /// Builds the `regs` object from a [`ConsentContext`]. + /// + /// Populates consent fields at **both** `OpenRTB` 2.6 top-level locations + /// and the `regs.ext.*` locations that Prebid Server reads today. + /// + /// Returns [`None`] if no consent-relevant data is present (avoids sending + /// an empty `regs` object to Prebid Server). + fn build_regs(consent_ctx: Option<&crate::consent::ConsentContext>) -> Option { + let ctx = consent_ctx?; + + let has_data = ctx.gdpr_applies + || ctx.raw_us_privacy.is_some() + || ctx.raw_gpp_string.is_some() + || ctx.gpc; + + if !has_data { + return None; + } + + let gdpr = if ctx.gdpr_applies { Some(1) } else { Some(0) }; + + // Build ext first so the dual-placement fields are cloned once from + // ConsentContext (into ext), then once more into Regs top-level. + let ext = RegsExt { + gdpr, + us_privacy: ctx.raw_us_privacy.clone(), + gpp: ctx.raw_gpp_string.clone(), + gpp_sid: ctx.gpp_section_ids.clone(), + }; + + Some(Regs { + gdpr: ext.gdpr, + us_privacy: ext.us_privacy.clone(), + gpp: ext.gpp.clone(), + gpp_sid: ext.gpp_sid.clone(), + ext: Some(ext), + }) + } + /// Parse `OpenRTB` response into auction response. fn parse_openrtb_response(&self, json: &Json, response_time_ms: u64) -> AuctionResponse { let mut bids = Vec::new(); @@ -723,7 +800,11 @@ impl AuctionProvider for PrebidAuctionProvider { Method::POST, format!("{}/openrtb2/auction", self.config.server_url), ); - copy_request_headers(context.request, &mut pbs_req); + copy_request_headers( + context.request, + &mut pbs_req, + self.config.consent_forwarding, + ); pbs_req .set_body_json(&openrtb) @@ -874,6 +955,7 @@ mod tests { debug: false, debug_query_params: None, script_patterns: default_script_patterns(), + consent_forwarding: ConsentForwardingMode::Both, } } diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index a01865f6..67527f6a 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -9,7 +9,7 @@ //! - [`constants`]: Application-wide constants and configuration values //! - [`cookies`]: Cookie parsing and generation utilities //! - [`error`]: Error types and error handling utilities -//! - [`gdpr`]: GDPR consent management and TCF string parsing +//! - [`consent`]: Consent signal extraction and logging //! - [`geo`]: Geographic location utilities and DMA code extraction //! - [`models`]: Data models for ad serving and callbacks //! - [`prebid`]: Prebid integration and real-time bidding support @@ -36,6 +36,8 @@ pub mod auction; pub mod auction_config_types; pub mod auth; pub mod backend; +pub mod consent; +pub mod consent_config; pub mod constants; pub mod cookies; pub mod creative; diff --git a/crates/common/src/openrtb.rs b/crates/common/src/openrtb.rs index 3b405209..92540202 100644 --- a/crates/common/src/openrtb.rs +++ b/crates/common/src/openrtb.rs @@ -54,16 +54,72 @@ pub struct Site { pub struct User { #[serde(skip_serializing_if = "Option::is_none")] pub id: Option, + /// TCF v2 consent string (raw TC String from `euconsent-v2` cookie). + /// + /// `OpenRTB` 2.6 canonical field for GDPR consent. + #[serde(skip_serializing_if = "Option::is_none")] + pub consent: Option, #[serde(skip_serializing_if = "Option::is_none")] pub ext: Option, } #[derive(Debug, Serialize, Default)] pub struct UserExt { + /// TCF v2 consent string (Prebid reads `user.ext.consent`). + #[serde(skip_serializing_if = "Option::is_none")] + pub consent: Option, + /// Google Additional Consent settings for Ad Manager / `AdX` demand. + #[serde( + rename = "ConsentedProvidersSettings", + skip_serializing_if = "Option::is_none" + )] + pub consented_providers_settings: Option, + /// Extended User IDs from identity providers. + /// + /// Gated by TCF Purpose 1 (storage) and Purpose 4 (personalized ads). + #[serde(skip_serializing_if = "Option::is_none")] + pub eids: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub synthetic_fresh: Option, } +/// Google Additional Consent (AC) string container. +/// +/// Covers ad tech providers not in the IAB Global Vendor List but +/// participating in the Google ecosystem. Required for Google Ad Manager +/// and `AdX` demand. +/// +/// Format: `{version}~{provider_ids}~dv.` where provider IDs are +/// dot-separated Google ATP IDs. +#[derive(Debug, Serialize, Default)] +pub struct ConsentedProvidersSettings { + /// The AC string value (e.g. `"2~2628.2316.3119~dv."`). + #[serde(skip_serializing_if = "Option::is_none")] + pub consented_providers: Option, +} + +/// An Extended User ID entry from an identity provider. +#[derive(Debug, Serialize)] +pub struct Eid { + /// Identity provider domain (e.g. `"id5-sync.com"`). + pub source: String, + /// One or more user IDs from this provider. + pub uids: Vec, +} + +/// A single user identifier within an [`Eid`] entry. +#[derive(Debug, Serialize)] +pub struct Uid { + /// The identifier value. + pub id: String, + /// Agent type: 1 = cookie/device, 2 = person, 3 = user-provided. + #[serde(skip_serializing_if = "Option::is_none")] + pub atype: Option, + /// Provider-specific extension data. + #[serde(skip_serializing_if = "Option::is_none")] + pub ext: Option, +} + #[derive(Debug, Serialize, Default)] pub struct Device { #[serde(skip_serializing_if = "Option::is_none")] @@ -87,14 +143,49 @@ pub struct Geo { #[derive(Debug, Serialize, Default)] pub struct Regs { + /// GDPR applicability flag (1 = GDPR applies, 0 = does not apply). + /// + /// `OpenRTB` 2.6 canonical field. Set based on TCF consent presence. + #[serde(skip_serializing_if = "Option::is_none")] + pub gdpr: Option, + /// US Privacy string (4-character IAB CCPA format). + /// + /// `OpenRTB` 2.6 top-level field (migrated from `regs.ext.us_privacy`). + #[serde(skip_serializing_if = "Option::is_none")] + pub us_privacy: Option, + /// GPP consent string (raw `__gpp` cookie value). + /// + /// `OpenRTB` 2.6 canonical field for IAB Global Privacy Platform. + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp: Option, + /// GPP section ID list (active sections in the GPP string). + /// + /// `OpenRTB` 2.6 canonical field, derived from decoded GPP data. + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp_sid: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub ext: Option, } -#[derive(Debug, Serialize, Default)] +/// Prebid-compatible `regs.ext` consent fields. +/// +/// Prebid Server reads consent signals from `regs.ext.*` rather than the +/// `OpenRTB` 2.6 top-level locations. We populate both to maximise +/// compatibility (see proposal Key Decision #2 — Dual-Placement). +#[derive(Debug, Clone, Serialize, Default)] pub struct RegsExt { + /// GDPR applicability flag (mirrors `regs.gdpr`). + #[serde(skip_serializing_if = "Option::is_none")] + pub gdpr: Option, + /// US Privacy string (mirrors `regs.us_privacy`). #[serde(skip_serializing_if = "Option::is_none")] pub us_privacy: Option, + /// GPP consent string (mirrors `regs.gpp`). + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp: Option, + /// GPP section ID list (mirrors `regs.gpp_sid`). + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp_sid: Option>, } #[derive(Debug, Serialize, Default)] @@ -173,7 +264,7 @@ pub struct ResponseExt { #[cfg(test)] mod tests { - use super::{OpenRtbBid, OpenRtbResponse, ResponseExt, SeatBid}; + use super::*; use crate::auction::types::OrchestratorExt; #[test] @@ -231,4 +322,147 @@ mod tests { assert_eq!(serialized, expected); } + + #[test] + fn regs_serializes_dual_placement_consent_fields() { + // Mirror the production pattern: build ext, then duplicate into top-level. + let ext = RegsExt { + gdpr: Some(1), + us_privacy: Some("1YNN".to_string()), + gpp: Some("DBACNY~CPXxRfA".to_string()), + gpp_sid: Some(vec![2, 6]), + }; + let regs = Regs { + gdpr: ext.gdpr, + us_privacy: ext.us_privacy.clone(), + gpp: ext.gpp.clone(), + gpp_sid: ext.gpp_sid.clone(), + ext: Some(ext), + }; + + let serialized = serde_json::to_value(®s).expect("should serialize"); + // Top-level fields + assert_eq!(serialized["gdpr"], 1, "top-level gdpr should be 1"); + assert_eq!( + serialized["us_privacy"], "1YNN", + "top-level us_privacy should match" + ); + assert_eq!( + serialized["gpp"], "DBACNY~CPXxRfA", + "top-level gpp should match" + ); + assert_eq!( + serialized["gpp_sid"], + serde_json::json!([2, 6]), + "top-level gpp_sid should match" + ); + // ext-based fields (Prebid reads these) + let ext = &serialized["ext"]; + assert_eq!(ext["gdpr"], 1, "ext gdpr should mirror top-level"); + assert_eq!( + ext["us_privacy"], "1YNN", + "ext us_privacy should mirror top-level" + ); + assert_eq!( + ext["gpp"], "DBACNY~CPXxRfA", + "ext gpp should mirror top-level" + ); + assert_eq!( + ext["gpp_sid"], + serde_json::json!([2, 6]), + "ext gpp_sid should mirror top-level" + ); + } + + #[test] + fn regs_omits_none_fields() { + let regs = Regs::default(); + let serialized = serde_json::to_value(®s).expect("should serialize"); + let obj = serialized.as_object().expect("should be object"); + assert!( + obj.is_empty(), + "all-None regs should serialize as empty object" + ); + } + + #[test] + fn regs_ext_omits_none_fields() { + let ext = RegsExt::default(); + let serialized = serde_json::to_value(&ext).expect("should serialize"); + let obj = serialized.as_object().expect("should be object"); + assert!( + obj.is_empty(), + "all-None RegsExt should serialize as empty object" + ); + } + + #[test] + fn user_serializes_dual_placement_consent() { + let user = User { + id: Some("user-1".to_string()), + consent: Some("CPXxGfAPXxGfA".to_string()), + ext: Some(UserExt { + consent: Some("CPXxGfAPXxGfA".to_string()), + consented_providers_settings: Some(ConsentedProvidersSettings { + consented_providers: Some("2~2628.2316~dv.".to_string()), + }), + eids: None, + synthetic_fresh: None, + }), + }; + + let serialized = serde_json::to_value(&user).expect("should serialize"); + assert_eq!( + serialized["consent"], "CPXxGfAPXxGfA", + "top-level user.consent should be set" + ); + assert_eq!( + serialized["ext"]["consent"], "CPXxGfAPXxGfA", + "user.ext.consent should mirror top-level" + ); + assert_eq!( + serialized["ext"]["ConsentedProvidersSettings"]["consented_providers"], + "2~2628.2316~dv.", + "AC string should be present" + ); + } + + #[test] + fn user_omits_consent_when_none() { + let user = User { + id: Some("user-1".to_string()), + consent: None, + ext: None, + }; + + let serialized = serde_json::to_value(&user).expect("should serialize"); + assert!( + serialized.get("consent").is_none(), + "consent should be omitted when None" + ); + } + + #[test] + fn eid_serializes_correctly() { + let eid = Eid { + source: "id5-sync.com".to_string(), + uids: vec![Uid { + id: "ID5-abc123".to_string(), + atype: Some(1), + ext: None, + }], + }; + + let serialized = serde_json::to_value(&eid).expect("should serialize"); + assert_eq!(serialized["source"], "id5-sync.com", "source should match"); + assert_eq!( + serialized["uids"][0]["id"], "ID5-abc123", + "uid id should match" + ); + assert_eq!(serialized["uids"][0]["atype"], 1, "atype should be 1"); + assert!( + serialized["uids"][0].get("ext").is_none(), + "ext should be omitted when None" + ); + } } diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 78489d2e..5c2d9e49 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -3,6 +3,8 @@ use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use crate::backend::BackendConfig; +use crate::consent::{build_consent_context, ConsentPipelineInput}; +use crate::cookies::handle_request_cookies; use crate::http_util::{serve_static_with_etag, RequestInfo}; use crate::constants::{HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; @@ -202,9 +204,25 @@ pub fn handle_publisher_request( req.get_header("x-forwarded-proto"), ); + // Parse cookies once for reuse by both consent extraction and synthetic ID logic. + let cookie_jar = handle_request_cookies(&req)?; + // Generate synthetic identifiers before the request body is consumed. let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; + // Extract, decode, and log consent signals (TCF, GPP, US Privacy, GPC) + // from the incoming request. The ConsentContext carries both raw strings + // (for OpenRTB forwarding) and decoded data (for observability). + // When a consent_store is configured, this also persists consent to KV + // and falls back to stored consent when cookies are absent. + let geo = crate::geo::GeoInfo::from_request(&req); + let _consent_context = build_consent_context(&ConsentPipelineInput { + jar: cookie_jar.as_ref(), + req: &req, + config: &settings.consent, + geo: geo.as_ref(), + synthetic_id: Some(synthetic_id.as_str()), + }); log::debug!("Proxy synthetic IDs - trusted: {}", synthetic_id); let backend_name = BackendConfig::from_url( diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs index 57cb3951..a7e98a69 100644 --- a/crates/common/src/settings.rs +++ b/crates/common/src/settings.rs @@ -10,6 +10,7 @@ use url::Url; use validator::{Validate, ValidationError}; use crate::auction_config_types::AuctionConfig; +use crate::consent_config::ConsentConfig; use crate::error::TrustedServerError; pub const ENVIRONMENT_VARIABLE_PREFIX: &str = "TRUSTED_SERVER"; @@ -315,6 +316,8 @@ pub struct Settings { #[serde(default)] pub auction: AuctionConfig, #[serde(default)] + pub consent: ConsentConfig, + #[serde(default)] pub proxy: Proxy, } diff --git a/fastly.toml b/fastly.toml index d63815d9..4897169e 100644 --- a/fastly.toml +++ b/fastly.toml @@ -34,6 +34,10 @@ build = """ [[local_server.kv_stores.creative_store]] key = "placeholder" data = "placeholder" + + [[local_server.kv_stores.consent_store]] + key = "placeholder" + data = "placeholder" [local_server.secret_stores] [[local_server.secret_stores.signing_keys]] key = "ts-2025-10-A" diff --git a/trusted-server.toml b/trusted-server.toml index 5ad18888..92f0eb54 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -89,6 +89,32 @@ api_origin = "https://api-js.datadome.co" cache_ttl_seconds = 3600 rewrite_sdk = true +# Consent forwarding configuration +# Controls how Trusted Server interprets and forwards privacy consent signals. +# All values shown below are the defaults — uncomment to override. +# [consent] +# mode = "interpreter" # "interpreter" (decode + forward) or "proxy" (raw passthrough) +# check_expiration = true # Check TCF consent freshness +# max_consent_age_days = 395 # Max age before consent is treated as expired (~13 months) + +# [consent.gdpr] +# applies_in = ["AT","BE","BG","HR","CY","CZ","DK","EE","FI","FR","DE","GR","HU","IE","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES","SE","IS","LI","NO","GB"] + +# [consent.us_states] +# privacy_states = ["CA","VA","CO","CT","UT","MT","OR","TX","FL","DE","IA","NE","NH","NJ","TN","MN","MD","IN","KY","RI"] + +# [consent.us_privacy_defaults] +# notice_given = true # Has publisher actually shown CCPA notice? +# lspa_covered = false # Is publisher subject to LSPA? +# gpc_implies_optout = true # Should Sec-GPC: 1 trigger opt-out? + +# [consent.conflict_resolution] +# mode = "restrictive" # "restrictive" | "newest" | "permissive" +# freshness_threshold_days = 30 + +# KV Store consent persistence (requires a KV store named "consent_store" in fastly.toml) +# consent_store = "consent_store" + # Rewrite configuration for creative HTML/CSS processing # [rewrite] # Domains to exclude from first-party rewriting (supports wildcards like "*.example.com")