Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 17 additions & 21 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -384,28 +384,24 @@ configs:
return (pass);
}

if (req.http.Client-Cert) {
# Authenticated HTML is user-specific → never cache
if (req.http.Accept ~ "text/html" ||
req.http.Accept ~ "application/xhtml+xml") {
return (pass);
}

# Conditional requests must reach backend for validation
if (req.http.If-Match || req.http.If-None-Match ||
req.http.If-Modified-Since || req.http.If-Unmodified-Since) {
return (pass);
}

# /access endpoint returns agent-specific group memberships
if (req.url ~ "^/access") {
return (pass);
}
# Delegated requests carry user identity in the On-Behalf-Of header.
# The backend response echoes that identity in the Link header
# (acl#agent). The cache key does not include the asserted identity,
# so caching would let a later anonymous request to the same
# URL+Accept read back the previous agent's WebID and ACL grant.
if (req.http.On-Behalf-Of) {
return (pass);
}

# SPARQL referencing /acl/agents/ depends on agent identity → don't cache
if (req.url ~ "%2Facl%2Fagents%2F") {
return (pass);
}
# Authenticated responses get acl#agent stamped into the Link header by
# the backend, regardless of representation (HTML, RDF/XML, Turtle,
# JSON-LD, SPARQL results, …). The URL-keyed cache slot ignores the
# asserting identity, so any such response would leak to anonymous
# readers of the same URL. /static/* is the only safely-shared path —
# it's served by Tomcat's default servlet (web.xml:365-371), bypasses
# Jersey, and carries no identity-bearing headers.
if (req.http.Client-Cert && req.url !~ "^/static/") {
return (pass);
}

if (req.http.Cookie) {
Expand Down
22 changes: 22 additions & 0 deletions http-tests/document-hierarchy/GET-namespace-forClass-rdfs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -euo pipefail

initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

# sp:Describe is declared only as rdfs:Class (not owl:Class) in sp.ttl.
# OntologyFilter must promote rdfs:Class to owl:Class during materialization so
# that OWL2 profiles recognise third-party vocab terms and return their SPIN constructors.

response=$(curl -k -f -s \
-G \
-E "$OWNER_CERT_FILE":"$OWNER_CERT_PWD" \
-H "Accept: application/rdf+xml" \
--data-urlencode "forClass=http://spinrdf.org/sp#Describe" \
"${END_USER_BASE_URL}ns")

# response must be non-empty: sp:Describe must be recognised as an OntClass
echo "$response" | grep -q "http://spinrdf.org/sp#Describe"
78 changes: 78 additions & 0 deletions http-tests/proxy/GET-proxied-no-cache-poisoning.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env bash
set -euo pipefail

# Regression: ProxyRequestFilter's server-side fetch attaches On-Behalf-Of
# (via WebIDDelegationFilter), and the backend response carries the asserted
# agent's WebID in the Link header (acl#agent). varnish-frontend must not
# cache that response under a URL-keyed entry — otherwise a subsequent
# anonymous request to the same URL+Accept replays the cached 200 and reads
# back the previous agent's identity (and inherits whatever ACL grant they
# had).

purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

# Step A: authenticated owner fires a proxy request from the end-user
# dataspace to the admin dataspace. This triggers WebIDDelegationFilter →
# On-Behalf-Of on the server-side hop into varnish-frontend.

curl -k -f -s -o /dev/null \
-E "$OWNER_CERT_FILE":"$OWNER_CERT_PWD" \
-G \
-H 'Accept: application/rdf+xml' \
--data-urlencode "uri=${ADMIN_BASE_URL}" \
"${END_USER_BASE_URL}"

# Step B: anonymous direct request to the admin URL with the same Accept.
# If the cache was poisoned in Step A, this returns 200 with the owner's
# WebID in the Link header. Expected after the fix: varnish-frontend should
# pass on On-Behalf-Of and store nothing, so this goes to the backend
# anonymously and gets 403.

response=$(curl -k -s -i -H 'Accept: application/rdf+xml' "${ADMIN_BASE_URL}")

status=$(printf '%s\n' "$response" | awk 'NR==1{print $2}' | tr -d '\r')
link_leak=$(printf '%s\n' "$response" | tr -d '\r' | grep -i '^link:' | grep -c 'acl#agent' || true)

if [ "$status" != "$STATUS_FORBIDDEN" ]; then
echo "Step B: expected $STATUS_FORBIDDEN, got $status"
exit 1
fi

if [ "$link_leak" != "0" ]; then
echo "Step B: anonymous response leaks acl#agent (cache poisoning)"
exit 1
fi

# Step C: authenticated owner fetches the admin URL directly with cert at TLS,
# Accept: application/rdf+xml. The Client-Cert header reaches varnish-frontend
# (nginx-forwarded). The backend stamps acl#agent into the Link header for the
# authenticated 200. varnish-frontend must NOT cache this response — its hash
# key ignores identity, so a subsequent anonymous request would replay the 200.

purge_cache "$FRONTEND_VARNISH_SERVICE"

curl -k -f -s -o /dev/null \
-E "$OWNER_CERT_FILE":"$OWNER_CERT_PWD" \
-H 'Accept: application/rdf+xml' \
"${ADMIN_BASE_URL}"

# Step D: anonymous direct fetch of the same URL. With the fix in place
# (Client-Cert + non-/static/ path → pass in vcl_recv), Step C didn't store
# anything, so this reaches the backend anonymously and gets 403.

response=$(curl -k -s -i -H 'Accept: application/rdf+xml' "${ADMIN_BASE_URL}")

status=$(printf '%s\n' "$response" | awk 'NR==1{print $2}' | tr -d '\r')
link_leak=$(printf '%s\n' "$response" | tr -d '\r' | grep -i '^link:' | grep -c 'acl#agent' || true)

if [ "$status" != "$STATUS_FORBIDDEN" ]; then
echo "Step D: expected $STATUS_FORBIDDEN, got $status"
exit 1
fi

if [ "$link_leak" != "0" ]; then
echo "Step D: anonymous response leaks acl#agent (cache poisoning)"
exit 1
fi
10 changes: 5 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

<groupId>com.atomgraph</groupId>
<artifactId>linkeddatahub</artifactId>
<version>5.5.3</version>
<version>5.6.0-SNAPSHOT</version>
<packaging>${packaging.type}</packaging>

<name>AtomGraph LinkedDataHub</name>
Expand Down Expand Up @@ -46,7 +46,7 @@
<url>https://github.com/AtomGraph/LinkedDataHub</url>
<connection>scm:git:git://github.com/AtomGraph/LinkedDataHub.git</connection>
<developerConnection>scm:git:git@github.com:AtomGraph/LinkedDataHub.git</developerConnection>
<tag>linkeddatahub-5.5.3</tag>
<tag>linkeddatahub-2.1.1</tag>
</scm>

<repositories>
Expand Down Expand Up @@ -146,7 +146,7 @@
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>twirl</artifactId>
<version>1.1.0</version>
<version>1.2.0-SNAPSHOT</version>
<exclusions>
<!-- exclude slf4j-reload4j 1.7.x binding; replaced below with 2.0.x matching slf4j-api from Jena -->
<exclusion>
Expand All @@ -163,13 +163,13 @@
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>client</artifactId>
<version>4.3.0</version>
<version>4.4.0-SNAPSHOT</version>
<classifier>classes</classifier>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>client</artifactId>
<version>4.3.0</version>
<version>4.4.0-SNAPSHOT</version>
<type>war</type>
</dependency>
<dependency>
Expand Down
Loading
Loading