diff --git a/CLAUDE.md b/CLAUDE.md index 97b10069a..31109ff4e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -91,8 +91,28 @@ The application runs as a multi-container setup: 1. Requests come through nginx proxy 2. Varnish provides caching layer 3. LinkedDataHub application handles business logic -4. Data persisted to appropriate Fuseki triplestore -5. XSLT transforms data for client presentation +4. RDF data is read/written via the **Graph Store Protocol** — each document in the hierarchy corresponds to a named graph in the triplestore; the document URI is the graph name +5. Data persisted to appropriate Fuseki triplestore +6. XSLT transforms data for client presentation + +### Linked Data Proxy and Client-Side Rendering + +LDH includes a Linked Data proxy that dereferences external URIs on behalf of the browser. The original design rendered proxied resources identically to local ones — server-side RDF fetch + XSLT. This created a DDoS/resource-exhaustion vector: scraper bots routing arbitrary external URIs through the proxy would trigger a full server-side pipeline (HTTP fetch → XSLT rendering) per request, exhausting HTTP connection pools and CPU. + +The current design splits rendering by request origin: + +- **Browser requests** (`Accept: text/html`): `ProxyRequestFilter` bypasses the proxy entirely. The server returns the local application shell. Saxon-JS then issues a second, RDF-typed request (`Accept: application/rdf+xml`) from the browser. +- **RDF requests** (API clients, Saxon-JS second pass): `ProxyRequestFilter` fetches the external RDF, parses it, and returns it to the caller. No XSLT happens server-side. +- **Client-side rendering**: Saxon-JS receives the raw RDF and applies the same XSLT 3 templates used server-side (shared stylesheet), so proxied resources look almost identical to local ones. + +Key implementation files: +- `ProxyRequestFilter.java` — intercepts `?uri=` and `lapp:Dataset` proxy requests; HTML bypass; forwards external `Link` headers +- `ApplicationFilter.java` — registers external proxy target URI in request context (`AC.uri` property) as authoritative proxy marker +- `ResponseHeadersFilter.java` — skips local-only hypermedia links (`sd:endpoint`, `ldt:ontology`, `ac:stylesheet`) for proxy requests; external ones are forwarded by `ProxyRequestFilter` +- `client.xsl` (`ldh:rdf-document-response`) — receives the RDF proxy response client-side; extracts `sd:endpoint` from `Link` header; stores it in `LinkedDataHub.endpoint` +- `functions.xsl` (`sd:endpoint()`) — returns `LinkedDataHub.endpoint` when set (external proxy), otherwise falls back to the local SPARQL endpoint + +The SPARQL endpoint forwarding chain ensures ContentMode blocks (charts, maps) query the **remote** app's SPARQL endpoint, not the local one. `LinkedDataHub.endpoint` is reset to the local endpoint by `ldh:HTMLDocumentLoaded` on every HTML page navigation, so there is no stale state when navigating back to local documents. ### Key Extension Points - **Vocabulary definitions** in `com.atomgraph.linkeddatahub.vocabulary` diff --git a/Dockerfile b/Dockerfile index 5b9569d80..3d8066e6e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -109,6 +109,8 @@ ENV MAX_TOTAL_CONN=40 ENV MAX_REQUEST_RETRIES=3 +ENV CONNECTION_REQUEST_TIMEOUT=30000 + ENV IMPORT_KEEPALIVE= ENV MAX_IMPORT_THREADS=10 diff --git a/docker-compose.yml b/docker-compose.yml index 2218a0652..1c344b1b5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -65,6 +65,7 @@ services: - SIGN_UP_CERT_VALIDITY=180 - MAX_CONTENT_LENGTH=${MAX_CONTENT_LENGTH:-2097152} - ALLOW_INTERNAL_URLS=${ALLOW_INTERNAL_URLS:-} + - CONNECTION_REQUEST_TIMEOUT=${CONNECTION_REQUEST_TIMEOUT:-} - NOTIFICATION_ADDRESS=LinkedDataHub - MAIL_SMTP_HOST=email-server - MAIL_SMTP_PORT=25 diff --git a/platform/entrypoint.sh b/platform/entrypoint.sh index e61404a84..ed8ef8d50 100755 --- a/platform/entrypoint.sh +++ b/platform/entrypoint.sh @@ -1037,6 +1037,10 @@ if [ -n "$ALLOW_INTERNAL_URLS" ]; then export CATALINA_OPTS="$CATALINA_OPTS -Dcom.atomgraph.linkeddatahub.allowInternalUrls=$ALLOW_INTERNAL_URLS" fi +if [ -n "$CONNECTION_REQUEST_TIMEOUT" ]; then + export CATALINA_OPTS="$CATALINA_OPTS -Dcom.atomgraph.linkeddatahub.connectionRequestTimeout=$CONNECTION_REQUEST_TIMEOUT" +fi + if [ -n "$MAX_CONTENT_LENGTH" ]; then MAX_CONTENT_LENGTH_PARAM="--stringparam ldhc:maxContentLength '$MAX_CONTENT_LENGTH' " fi diff --git a/src/main/java/com/atomgraph/linkeddatahub/Application.java b/src/main/java/com/atomgraph/linkeddatahub/Application.java index d535c4242..c7fd1832a 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/Application.java +++ b/src/main/java/com/atomgraph/linkeddatahub/Application.java @@ -214,6 +214,7 @@ import org.apache.http.HttpClientConnection; import org.apache.http.HttpHost; import org.apache.http.client.HttpRequestRetryHandler; +import org.apache.http.client.config.RequestConfig; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.socket.ConnectionSocketFactory; @@ -358,6 +359,8 @@ public Application(@Context ServletConfig servletConfig) throws URISyntaxExcepti servletConfig.getServletContext().getInitParameter(LDHC.maxConnPerRoute.getURI()) != null ? Integer.valueOf(servletConfig.getServletContext().getInitParameter(LDHC.maxConnPerRoute.getURI())) : null, servletConfig.getServletContext().getInitParameter(LDHC.maxTotalConn.getURI()) != null ? Integer.valueOf(servletConfig.getServletContext().getInitParameter(LDHC.maxTotalConn.getURI())) : null, servletConfig.getServletContext().getInitParameter(LDHC.maxRequestRetries.getURI()) != null ? Integer.valueOf(servletConfig.getServletContext().getInitParameter(LDHC.maxRequestRetries.getURI())) : null, + System.getProperty("com.atomgraph.linkeddatahub.connectionRequestTimeout") != null ? Integer.valueOf(System.getProperty("com.atomgraph.linkeddatahub.connectionRequestTimeout")) : + servletConfig.getServletContext().getInitParameter(LDHC.connectionRequestTimeout.getURI()) != null ? Integer.valueOf(servletConfig.getServletContext().getInitParameter(LDHC.connectionRequestTimeout.getURI())) : null, servletConfig.getServletContext().getInitParameter(LDHC.maxImportThreads.getURI()) != null ? Integer.valueOf(servletConfig.getServletContext().getInitParameter(LDHC.maxImportThreads.getURI())) : null, servletConfig.getServletContext().getInitParameter(LDHC.notificationAddress.getURI()) != null ? servletConfig.getServletContext().getInitParameter(LDHC.notificationAddress.getURI()) : null, servletConfig.getServletContext().getInitParameter(LDHC.supportedLanguages.getURI()) != null ? servletConfig.getServletContext().getInitParameter(LDHC.supportedLanguages.getURI()) : null, @@ -445,7 +448,7 @@ public Application(final ServletConfig servletConfig, final MediaTypes mediaType final String baseURIString, final String proxyScheme, final String proxyHostname, final Integer proxyPort, final String uploadRootString, final boolean invalidateCache, final Integer cookieMaxAge, final boolean enableLinkedDataProxy, final boolean allowInternalUrls, final Integer maxContentLength, - final Integer maxConnPerRoute, final Integer maxTotalConn, final Integer maxRequestRetries, final Integer maxImportThreads, + final Integer maxConnPerRoute, final Integer maxTotalConn, final Integer maxRequestRetries, final Integer connectionRequestTimeout, final Integer maxImportThreads, final String notificationAddressString, final String supportedLanguageCodes, final boolean enableWebIDSignUp, final String oidcRefreshTokensPropertiesPath, final String frontendProxyString, final String backendProxyAdminString, final String backendProxyEndUserString, final String mailUser, final String mailPassword, final String smtpHost, final String smtpPort, @@ -709,10 +712,10 @@ public Application(final ServletConfig servletConfig, final MediaTypes mediaType trustStore.load(trustStoreInputStream, clientTrustStorePassword.toCharArray()); } - client = getClient(keyStore, clientKeyStorePassword, trustStore, maxConnPerRoute, maxTotalConn, null, false); - externalClient = getClient(keyStore, clientKeyStorePassword, trustStore, maxConnPerRoute, maxTotalConn, null, false); - importClient = getClient(keyStore, clientKeyStorePassword, trustStore, maxConnPerRoute, maxTotalConn, maxRequestRetries, true); - noCertClient = getNoCertClient(trustStore, maxConnPerRoute, maxTotalConn, maxRequestRetries); + client = getClient(keyStore, clientKeyStorePassword, trustStore, maxConnPerRoute, maxTotalConn, null, false, connectionRequestTimeout); + externalClient = getClient(keyStore, clientKeyStorePassword, trustStore, maxConnPerRoute, maxTotalConn, null, false, connectionRequestTimeout); + importClient = getClient(keyStore, clientKeyStorePassword, trustStore, maxConnPerRoute, maxTotalConn, maxRequestRetries, true, connectionRequestTimeout); + noCertClient = getNoCertClient(trustStore, maxConnPerRoute, maxTotalConn, maxRequestRetries, connectionRequestTimeout); if (maxContentLength != null) { @@ -1527,7 +1530,7 @@ public void submitImport(RDFImport rdfImport, com.atomgraph.linkeddatahub.apps.m * @throws UnrecoverableKeyException key loading error * @throws KeyManagementException key loading error */ - public static Client getClient(KeyStore keyStore, String keyStorePassword, KeyStore trustStore, Integer maxConnPerRoute, Integer maxTotalConn, Integer maxRequestRetries, boolean buffered) throws NoSuchAlgorithmException, KeyStoreException, UnrecoverableKeyException, KeyManagementException + public static Client getClient(KeyStore keyStore, String keyStorePassword, KeyStore trustStore, Integer maxConnPerRoute, Integer maxTotalConn, Integer maxRequestRetries, boolean buffered, Integer connectionRequestTimeout) throws NoSuchAlgorithmException, KeyStoreException, UnrecoverableKeyException, KeyManagementException { if (keyStore == null) throw new IllegalArgumentException("KeyStore cannot be null"); if (keyStorePassword == null) throw new IllegalArgumentException("KeyStore password string cannot be null"); @@ -1592,7 +1595,11 @@ public void releaseConnection(final HttpClientConnection managedConn, final Obje config.property(ClientProperties.FOLLOW_REDIRECTS, true); config.property(ClientProperties.REQUEST_ENTITY_PROCESSING, RequestEntityProcessing.BUFFERED); // https://stackoverflow.com/questions/42139436/jersey-client-throws-cannot-retry-request-with-a-non-repeatable-request-entity config.property(ApacheClientProperties.CONNECTION_MANAGER, conman); - + if (connectionRequestTimeout != null) + config.property(ApacheClientProperties.REQUEST_CONFIG, RequestConfig.custom(). + setConnectionRequestTimeout(connectionRequestTimeout). + build()); + if (maxRequestRetries != null) config.property(ApacheClientProperties.RETRY_HANDLER, (HttpRequestRetryHandler) (IOException ex, int executionCount, HttpContext context) -> { @@ -1629,7 +1636,7 @@ public void releaseConnection(final HttpClientConnection managedConn, final Obje * @param maxRequestRetries maximum number of times that the HTTP client will retry a request * @return client instance */ - public static Client getNoCertClient(KeyStore trustStore, Integer maxConnPerRoute, Integer maxTotalConn, Integer maxRequestRetries) + public static Client getNoCertClient(KeyStore trustStore, Integer maxConnPerRoute, Integer maxTotalConn, Integer maxRequestRetries, Integer connectionRequestTimeout) { try { @@ -1688,7 +1695,11 @@ public void releaseConnection(final HttpClientConnection managedConn, final Obje config.property(ClientProperties.FOLLOW_REDIRECTS, true); config.property(ClientProperties.REQUEST_ENTITY_PROCESSING, RequestEntityProcessing.BUFFERED); // https://stackoverflow.com/questions/42139436/jersey-client-throws-cannot-retry-request-with-a-non-repeatable-request-entity config.property(ApacheClientProperties.CONNECTION_MANAGER, conman); - + if (connectionRequestTimeout != null) + config.property(ApacheClientProperties.REQUEST_CONFIG, RequestConfig.custom(). + setConnectionRequestTimeout(connectionRequestTimeout). + build()); + if (maxRequestRetries != null) config.property(ApacheClientProperties.RETRY_HANDLER, (HttpRequestRetryHandler) (IOException ex, int executionCount, HttpContext context) -> { @@ -1708,7 +1719,7 @@ public void releaseConnection(final HttpClientConnection managedConn, final Obje } return false; }); - + return ClientBuilder.newBuilder(). withConfig(config). sslContext(ctx). diff --git a/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ApplicationFilter.java b/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ApplicationFilter.java index 358e1491d..5b5e79418 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ApplicationFilter.java +++ b/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ApplicationFilter.java @@ -107,7 +107,23 @@ public void filter(ContainerRequestContext request) throws IOException requestURI = builder.build(); } - else requestURI = request.getUriInfo().getRequestUri(); + else + { + request.setProperty(AC.uri.getURI(), graphURI); // authoritative external proxy marker + + // strip ?uri= from the effective request URI — server-side sees only the path; + // the ContainerRequestContext property is the sole indicator of proxy mode + MultivaluedMap externalQueryParams = new MultivaluedHashMap(); + externalQueryParams.putAll(request.getUriInfo().getQueryParameters()); + externalQueryParams.remove(AC.uri.getLocalName()); + + UriBuilder externalBuilder = UriBuilder.fromUri(request.getUriInfo().getAbsolutePath()); + for (Entry> params : externalQueryParams.entrySet()) + for (String value : params.getValue()) + externalBuilder.queryParam(params.getKey(), value); + + requestURI = externalBuilder.build(); + } } catch (URISyntaxException ex) { diff --git a/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java b/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java index 467e00e7f..23f635a00 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java +++ b/src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java @@ -16,7 +16,7 @@ */ package com.atomgraph.linkeddatahub.server.filter.request; -import com.atomgraph.client.MediaTypes; +import com.atomgraph.core.MediaTypes; import com.atomgraph.client.util.HTMLMediaTypePredicate; import com.atomgraph.client.vocabulary.AC; import com.atomgraph.core.exception.BadGatewayException; @@ -65,6 +65,7 @@ import org.apache.jena.riot.RDFLanguages; import org.apache.jena.riot.resultset.ResultSetReaderRegistry; import org.glassfish.jersey.message.internal.MessageBodyProviderNotFoundException; +import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,6 +83,16 @@ * * ACL is not checked for proxy requests: the proxy is a global transport function, not a document * operation. Access control is enforced by the target endpoint. + *

+ * This filter intentionally does not proxy requests from clients that explicitly accept + * (X)HTML. Rendering arbitrary external URIs as (X)HTML through the full server-side pipeline + * (SPARQL DESCRIBE + XSLT) for every browser-originated proxy request would cause unbounded resource + * exhaustion — a connection-pool and CPU amplification attack vector. Instead, requests whose + * {@code Accept} header contains a non-wildcard {@code text/html} or {@code application/xhtml+xml} + * type fall through to the downstream handler, which serves the LDH application shell; the + * client-side Saxon-JS layer then issues a second, RDF-typed request that does hit this + * filter and is handled cheaply. Pure API clients that send only {@code *}{@code /*} (e.g. curl) + * reach the proxy because they do not list an explicit HTML type. * * @author Martynas Jusevičius {@literal } */ @@ -91,9 +102,10 @@ public class ProxyRequestFilter implements ContainerRequestFilter { private static final Logger log = LoggerFactory.getLogger(ProxyRequestFilter.class); + private static final MediaTypes MEDIA_TYPES = new MediaTypes(); + private static final Pattern LINK_SPLITTER = Pattern.compile(",(?=\\s*<)"); @Inject com.atomgraph.linkeddatahub.Application system; - @Inject MediaTypes mediaTypes; @Inject jakarta.inject.Provider> ontology; @Context Request request; @@ -105,6 +117,27 @@ public void filter(ContainerRequestContext requestContext) throws IOException URI targetURI = targetOpt.get(); + // do not proxy requests from clients that explicitly accept (X)HTML — they expect the app shell, + // which the downstream handler serves. Browsers list text/html as a non-wildcard type; pure API + // clients (curl etc.) send only */* and must reach the proxy. + // Defending against resource exhaustion: proxying + full server-side XSLT rendering for arbitrary + // external URIs on every browser request would amplify CPU and connection-pool load unboundedly. + boolean clientAcceptsHtml = requestContext.getAcceptableMediaTypes().stream() + .anyMatch(mt -> !mt.isWildcardType() && !mt.isWildcardSubtype() && + (mt.isCompatible(MediaType.TEXT_HTML_TYPE) || + mt.isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE))); + if (clientAcceptsHtml) return; + + // negotiate the response format from RDF/SPARQL writable types + List writableTypes = new ArrayList<>(getMediaTypes().getWritable(Model.class)); + writableTypes.addAll(getMediaTypes().getWritable(ResultSet.class)); + List variants = com.atomgraph.core.model.impl.Response.getVariants( + writableTypes, + getSystem().getSupportedLanguages(), + new ArrayList<>()); + Variant selectedVariant = getRequest().selectVariant(variants); + if (selectedVariant == null) return; // client accepts no RDF/SPARQL type + // strip #fragment (servers do not receive fragment identifiers) if (targetURI.getFragment() != null) { @@ -123,7 +156,7 @@ public void filter(ContainerRequestContext requestContext) throws IOException { if (log.isDebugEnabled()) log.debug("Serving mapped URI from DataManager cache: {}", targetURI); Model model = getSystem().getDataManager().loadModel(targetURI.toString()); - requestContext.abortWith(getResponse(model, Response.Status.OK)); + requestContext.abortWith(getResponse(model, Response.Status.OK, selectedVariant)); return; } @@ -141,7 +174,7 @@ public void filter(ContainerRequestContext requestContext) throws IOException if (!description.isEmpty()) { if (log.isDebugEnabled()) log.debug("Serving URI from namespace ontology: {}", targetURI); - requestContext.abortWith(getResponse(description, Response.Status.OK)); + requestContext.abortWith(getResponse(description, Response.Status.OK, selectedVariant)); return; } } @@ -188,7 +221,7 @@ else if (agentContext instanceof IDTokenSecurityContext idTokenSecurityContext) { // provide the target URI as a base URI hint so ModelProvider / HtmlJsonLDReader can resolve relative references clientResponse.getHeaders().putSingle(com.atomgraph.core.io.ModelProvider.REQUEST_URI_HEADER, targetURI.toString()); - requestContext.abortWith(getResponse(clientResponse)); + requestContext.abortWith(getResponse(clientResponse, selectedVariant)); } } catch (MessageBodyProviderNotFoundException ex) @@ -212,19 +245,9 @@ else if (agentContext instanceof IDTokenSecurityContext idTokenSecurityContext) */ protected Optional resolveTargetURI(ContainerRequestContext requestContext) { - // Case 1: explicit ?uri= query parameter - String uriParam = requestContext.getUriInfo().getQueryParameters().getFirst(AC.uri.getLocalName()); - if (uriParam != null) - { - URI targetURI = URI.create(uriParam); - @SuppressWarnings("unchecked") - Optional appOpt = - (Optional) requestContext.getProperty(LAPP.Application.getURI()); - // ApplicationFilter rewrites ?uri= values that are relative to the app base URI; skip those - if (appOpt != null && appOpt.isPresent() && !appOpt.get().getBaseURI().relativize(targetURI).isAbsolute()) - return Optional.empty(); - return Optional.of(targetURI); - } + // Case 1: external ?uri= — ApplicationFilter strips it from UriInfo and stores it here + URI proxyTarget = (URI) requestContext.getProperty(AC.uri.getURI()); + if (proxyTarget != null) return Optional.of(proxyTarget); // Case 2: lapp:Dataset proxy @SuppressWarnings("unchecked") @@ -243,12 +266,13 @@ protected Optional resolveTargetURI(ContainerRequestContext requestContext) * Converts a client response from the proxy target into a JAX-RS response. * * @param clientResponse response from the proxy target + * @param selectedVariant pre-computed variant from content negotiation * @return JAX-RS response to return to the original caller */ - protected Response getResponse(Response clientResponse) + protected Response getResponse(Response clientResponse, Variant selectedVariant) { if (clientResponse.getMediaType() == null) return Response.status(clientResponse.getStatus()).build(); - return getResponse(clientResponse, clientResponse.getStatusInfo()); + return getResponse(clientResponse, clientResponse.getStatusInfo(), selectedVariant); } /** @@ -256,42 +280,56 @@ protected Response getResponse(Response clientResponse) * * @param clientResponse response from the proxy target * @param statusType status to use in the returned response + * @param selectedVariant pre-computed variant from content negotiation * @return JAX-RS response */ - protected Response getResponse(Response clientResponse, Response.StatusType statusType) + protected Response getResponse(Response clientResponse, Response.StatusType statusType, Variant selectedVariant) { MediaType formatType = new MediaType(clientResponse.getMediaType().getType(), clientResponse.getMediaType().getSubtype()); // discard charset param Lang lang = RDFLanguages.contentTypeToLang(formatType.toString()); + Response response; if (lang != null && ResultSetReaderRegistry.isRegistered(lang)) { ResultSetRewindable results = clientResponse.readEntity(ResultSetRewindable.class); - return getResponse(results, statusType); + response = getResponse(results, statusType, selectedVariant); + } + else + { + Model model = clientResponse.readEntity(Model.class); + response = getResponse(model, statusType, selectedVariant); + } + + // forward all Link headers from the external response so the client receives remote hypermedia + // (e.g. sd:endpoint pointing to the remote SPARQL endpoint); + // ResponseHeadersFilter will see sd:endpoint already present and skip injecting the local one + String linkHeader = clientResponse.getHeaderString(HttpHeaders.LINK); + if (linkHeader != null) + { + Response.ResponseBuilder builder = Response.fromResponse(response); + for (String part : LINK_SPLITTER.split(linkHeader)) + builder.header(HttpHeaders.LINK, part.trim()); + response = builder.build(); } - Model model = clientResponse.readEntity(Model.class); - return getResponse(model, statusType); + return response; } /** - * Builds a content-negotiated response for the given RDF model. + * Builds a response for the given RDF model using a pre-computed variant. * * @param model RDF model * @param statusType response status + * @param selectedVariant pre-computed variant from content negotiation * @return JAX-RS response */ - protected Response getResponse(Model model, Response.StatusType statusType) + protected Response getResponse(Model model, Response.StatusType statusType, Variant selectedVariant) { - List variants = com.atomgraph.core.model.impl.Response.getVariants( - getMediaTypes().getWritable(Model.class), - getSystem().getSupportedLanguages(), - new ArrayList<>()); - return new com.atomgraph.core.model.impl.Response(getRequest(), model, null, new EntityTag(Long.toHexString(ModelUtils.hashModel(model))), - variants, + selectedVariant, new HTMLMediaTypePredicate()). getResponseBuilder(). status(statusType). @@ -299,27 +337,23 @@ protected Response getResponse(Model model, Response.StatusType statusType) } /** - * Builds a content-negotiated response for the given SPARQL result set. + * Builds a response for the given SPARQL result set using a pre-computed variant. * * @param resultSet SPARQL result set * @param statusType response status + * @param selectedVariant pre-computed variant from content negotiation * @return JAX-RS response */ - protected Response getResponse(ResultSetRewindable resultSet, Response.StatusType statusType) + protected Response getResponse(ResultSetRewindable resultSet, Response.StatusType statusType, Variant selectedVariant) { long hash = ResultSetUtils.hashResultSet(resultSet); resultSet.reset(); - List variants = com.atomgraph.core.model.impl.Response.getVariants( - getMediaTypes().getWritable(ResultSet.class), - getSystem().getSupportedLanguages(), - new ArrayList<>()); - return new com.atomgraph.core.model.impl.Response(getRequest(), resultSet, null, new EntityTag(Long.toHexString(hash)), - variants, + selectedVariant, new HTMLMediaTypePredicate()). getResponseBuilder(). status(statusType). @@ -348,12 +382,13 @@ public Optional getOntology() /** * Returns the media types registry. + * Core MediaTypes do not include (X)HTML types, which is what we want here. * * @return media types */ public MediaTypes getMediaTypes() { - return mediaTypes; + return MEDIA_TYPES; } /** diff --git a/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/ResponseHeadersFilter.java b/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/ResponseHeadersFilter.java index f444ae697..5c9bc4785 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/ResponseHeadersFilter.java +++ b/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/ResponseHeadersFilter.java @@ -28,9 +28,6 @@ import com.atomgraph.linkeddatahub.vocabulary.ACL; import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; import java.util.Optional; import jakarta.annotation.Priority; import jakarta.inject.Inject; @@ -40,7 +37,6 @@ import jakarta.ws.rs.container.ContainerResponseFilter; import jakarta.ws.rs.core.HttpHeaders; import jakarta.ws.rs.core.Response; -import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,7 +50,6 @@ public class ResponseHeadersFilter implements ContainerResponseFilter { private static final Logger log = LoggerFactory.getLogger(ResponseHeadersFilter.class); - private static final Pattern LINK_SPLITTER = Pattern.compile(",(?=\\s*<)"); // split on commas before next '<' @Inject jakarta.inject.Provider> app; @Inject jakarta.inject.Provider> dataset; @@ -75,16 +70,14 @@ public void filter(ContainerRequestContext request, ContainerResponseContext res if (getAuthorizationContext().isPresent()) getAuthorizationContext().get().getModeURIs().forEach(mode -> response.getHeaders().add(HttpHeaders.LINK, new Link(mode, ACL.mode.getURI(), null))); - List linkValues = response.getHeaders().get(HttpHeaders.LINK); - List links = parseLinkHeaderValues(linkValues); + // for proxy requests the external Link headers are forwarded by ProxyRequestFilter; suppress local-only hypermedia + boolean isProxyRequest = request.getProperty(AC.uri.getURI()) != null; - if (getLinksByRel(links, SD.endpoint.getURI()).isEmpty()) - // add Link rel=sd:endpoint. - // TO-DO: The external SPARQL endpoint URL is different from the internal one currently specified as sd:endpoint in the context dataset + if (!isProxyRequest) response.getHeaders().add(HttpHeaders.LINK, new Link(request.getUriInfo().getBaseUriBuilder().path(Dispatcher.class, "getSPARQLEndpoint").build(), SD.endpoint.getURI(), null)); - // Only add application-specific links if application is present - if (getApplication().isPresent()) + // Only add application-specific links if application is present and this is not a proxy request + if (!isProxyRequest && getApplication().isPresent()) { Application application = getApplication().get(); // add Link rel=ldt:ontology, if the ontology URI is specified @@ -103,55 +96,6 @@ public void filter(ContainerRequestContext request, ContainerResponseContext res } } - /** - * Parses HTTP Link headers into individual {@link Link} objects. - * - * Handles both multiple header fields and comma-separated values - * within a single header field. - * - * @param linkValues raw Link header values (may contain multiple entries) - * @return flat list of parsed {@link Link} objects - */ - protected List parseLinkHeaderValues(List linkValues) - { - List out = new ArrayList<>(); - if (linkValues == null) return out; - - for (Object hv : linkValues) - { - String[] parts = LINK_SPLITTER.split(hv.toString()); - for (String part : parts) - { - try - { - out.add(Link.valueOf(part.trim())); - } - catch (URISyntaxException e) - { - // ignore invalid entries - } - } - } - - return out; - } - - /** - * Returns all Link headers that match the given rel attribute. - * - * @param links link list - * @param rel rel value - * @return filtered header list - */ - protected List getLinksByRel(List links, String rel) - { - return links == null - ? List.of() - : links.stream() - .filter(link -> rel.equals(link.getRel())) - .toList(); - } - /** * Returns the current application. * diff --git a/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/XsltExecutableFilter.java b/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/XsltExecutableFilter.java index 12948e0c3..427cd2382 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/XsltExecutableFilter.java +++ b/src/main/java/com/atomgraph/linkeddatahub/server/filter/response/XsltExecutableFilter.java @@ -18,8 +18,6 @@ import com.atomgraph.client.vocabulary.AC; import com.atomgraph.linkeddatahub.MediaType; -import static com.atomgraph.linkeddatahub.writer.XSLTWriterBase.SYSTEM_ID_PROPERTY; -import static com.atomgraph.server.status.UnprocessableEntityStatus.UNPROCESSABLE_ENTITY; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -38,7 +36,6 @@ import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.UriInfo; -import java.net.URISyntaxException; import java.util.Optional; import javax.xml.transform.Source; import javax.xml.transform.stream.StreamSource; @@ -79,24 +76,6 @@ public void filter(ContainerRequestContext req, ContainerResponseContext resp) t if (stylesheet != null) req.setProperty(AC.stylesheet.getURI(), getXsltExecutable(stylesheet)); else req.setProperty(AC.stylesheet.getURI(), getSystem().getXsltExecutable()); - // systemId (base URI) is only set on successful documents or '422 Unprocessable Entity' (ConstraintViolation) error responses - if (resp.getStatusInfo().getFamily().equals(Response.Status.Family.SUCCESSFUL) || - resp.getStatusInfo().getStatusCode() == UNPROCESSABLE_ENTITY.getStatusCode()) - { - final URI systemId; - - try - { - if (getURI() != null) systemId = getURI(); - else systemId = req.getUriInfo().getRequestUri(); - - req.setProperty(SYSTEM_ID_PROPERTY, systemId); - } - catch (URISyntaxException ex) - { - throw new InternalServerErrorException(ex); - } - } } } @@ -203,36 +182,6 @@ public Source getSource(String url) throws IOException return null; } - /** - * Gets the URI parameter from the request. - * - * @return the URI parameter - * @throws URISyntaxException if the URI is malformed - */ - public URI getURI() throws URISyntaxException - { - return getURIParam(getUriInfo(), AC.uri.getLocalName()); - } - - /** - * Gets a URI parameter from the provided UriInfo. - * - * @param uriInfo the URI information - * @param name the parameter name - * @return the URI parameter value - * @throws URISyntaxException if the URI is malformed - */ - public URI getURIParam(UriInfo uriInfo, String name) throws URISyntaxException - { - if (uriInfo == null) throw new IllegalArgumentException("UriInfo cannot be null"); - if (name == null) throw new IllegalArgumentException("String cannot be null"); - - if (uriInfo.getQueryParameters().containsKey(name)) - return new URI(uriInfo.getQueryParameters().getFirst(name)); - - return null; - } - /** * Returns HTTP client. * diff --git a/src/main/java/com/atomgraph/linkeddatahub/vocabulary/LDHC.java b/src/main/java/com/atomgraph/linkeddatahub/vocabulary/LDHC.java index c9080d5b8..5daf5c389 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/vocabulary/LDHC.java +++ b/src/main/java/com/atomgraph/linkeddatahub/vocabulary/LDHC.java @@ -152,6 +152,9 @@ public static String getURI() /** HTTP client request retry count */ public static final DatatypeProperty maxRequestRetries = m_model.createDatatypeProperty( NS + "maxRequestRetries" ); + /** Timeout in milliseconds waiting for a connection from the HTTP client pool */ + public static final DatatypeProperty connectionRequestTimeout = m_model.createDatatypeProperty( NS + "connectionRequestTimeout" ); + /** Max content length property */ public static final DatatypeProperty maxContentLength = m_model.createDatatypeProperty( NS + "maxContentLength" ); diff --git a/src/main/java/com/atomgraph/linkeddatahub/writer/XSLTWriterBase.java b/src/main/java/com/atomgraph/linkeddatahub/writer/XSLTWriterBase.java index bc16b70e3..00818c9de 100644 --- a/src/main/java/com/atomgraph/linkeddatahub/writer/XSLTWriterBase.java +++ b/src/main/java/com/atomgraph/linkeddatahub/writer/XSLTWriterBase.java @@ -76,8 +76,6 @@ public abstract class XSLTWriterBase extends com.atomgraph.client.writer.XSLTWri private static final Set NAMESPACES; /** The relative URL of the RDF file with localized labels */ public static final String TRANSLATIONS_PATH = "static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/translations.rdf"; - /** System property name for the XSLT system ID. */ - public static final String SYSTEM_ID_PROPERTY = "com.atomgraph.linkeddatahub.writer.XSLTWriterBase.systemId"; static { @@ -135,7 +133,10 @@ public Map getParameters(MultivaluedMap + + + + - - + + @@ -93,7 +97,9 @@ exclude-result-prefixes="#all" - + diff --git a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/document.xsl b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/document.xsl index ebd5c61ec..81941e5ea 100644 --- a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/document.xsl +++ b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/document.xsl @@ -132,13 +132,124 @@ extension-element-prefixes="ixsl" + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + + +
+ + + + + + + +
+ + + + + +
+ +
+
+
+
+ + + + + + +
+ + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+ + + + + + + + + + + + +
+
+ + + + + - - + - + @@ -148,8 +259,7 @@ extension-element-prefixes="ixsl" - - + @@ -157,7 +267,7 @@ extension-element-prefixes="ixsl" + + + + + + + +
+ + +
+ + - + + + + +
+ + + + + + + + + + + +
+
+ + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+ + + + diff --git a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/imports/default.xsl b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/imports/default.xsl index 1721f6456..42b0bf1d8 100644 --- a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/imports/default.xsl +++ b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/imports/default.xsl @@ -73,7 +73,11 @@ exclude-result-prefixes="#all" - + + + + + diff --git a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/layout.xsl b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/layout.xsl index 6cbfd0576..55993ef4f 100644 --- a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/layout.xsl +++ b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/bootstrap/2.3.2/layout.xsl @@ -102,6 +102,7 @@ exclude-result-prefixes="#all"> + @@ -128,6 +129,7 @@ exclude-result-prefixes="#all"> + @@ -563,7 +565,9 @@ exclude-result-prefixes="#all">
- + + +
@@ -596,7 +600,7 @@ exclude-result-prefixes="#all"> - + @@ -614,114 +618,6 @@ exclude-result-prefixes="#all"> - - - - -
- - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - -
- - - - - - - -
- - - - - -
- -
-
-
-
- - - - - -
- - - - - - - - - - - - - - - - - - -
-
- - - - - - -
- - - - - - - - - - - - -
-
- - - @@ -882,7 +778,24 @@ exclude-result-prefixes="#all">
- + + + + +
+
+
+
+
+
+
+
+ + + + + +
@@ -891,62 +804,6 @@ exclude-result-prefixes="#all">
- - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - -
- - @@ -1106,43 +963,7 @@ exclude-result-prefixes="#all"> - - - - - - - - - - -
- - -
-
- + @@ -1197,6 +1018,35 @@ exclude-result-prefixes="#all"> + + + + + + + +
+ + + + + + + +
+
+ + + + + + + +
+
+
+
+ diff --git a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/client.xsl b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/client.xsl index fb846e823..eddd638c5 100644 --- a/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/client.xsl +++ b/src/main/webapp/static/com/atomgraph/linkeddatahub/xsl/client.xsl @@ -113,6 +113,7 @@ extension-element-prefixes="ixsl" + @@ -276,7 +277,7 @@ WHERE count($ldh:apps//*[rdf:type/@rdf:resource = '&sd;Service']): $ac:lang: $sd:endpoint: - ixsl:query-params()?uri: + ac:uri(): UTC offset: @@ -550,97 +551,142 @@ WHERE ETag: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
- + @@ -803,8 +849,6 @@ WHERE - - @@ -854,11 +898,11 @@ WHERE - + - + @@ -962,19 +1006,39 @@ WHERE - - - + + + + + + + + + + + + + + + + + + + + + + @@ -1219,7 +1283,7 @@ WHERE - + diff --git a/src/test/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilterTest.java b/src/test/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilterTest.java index cdca073f5..a55e8bf29 100644 --- a/src/test/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilterTest.java +++ b/src/test/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilterTest.java @@ -18,6 +18,7 @@ import com.atomgraph.client.MediaTypes; import com.atomgraph.client.util.DataManager; +import com.atomgraph.client.vocabulary.AC; import com.atomgraph.linkeddatahub.server.security.AgentContext; import org.apache.jena.ontology.Ontology; import com.atomgraph.linkeddatahub.server.util.URLValidator; @@ -32,6 +33,7 @@ import jakarta.ws.rs.core.Request; import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.UriInfo; +import jakarta.ws.rs.core.Variant; import java.io.IOException; import java.net.URI; import java.util.List; @@ -48,6 +50,8 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.when; /** @@ -62,6 +66,7 @@ public class ProxyRequestFilterTest @Mock com.atomgraph.linkeddatahub.Application system; @Mock MediaTypes mediaTypes; @Mock Request request; + @Mock Variant selectedVariant; @Mock Ontology ontology; @InjectMocks ProxyRequestFilter filter; @@ -88,18 +93,32 @@ public void setUp() when(system.getDataManager()).thenReturn(dataManager); when(dataManager.isMapped(anyString())).thenReturn(false); when(system.isEnableLinkedDataProxy()).thenReturn(false); + when(request.selectVariant(any())).thenReturn(selectedVariant); filter.ontology = () -> Optional.empty(); } + /** + * When the client explicitly accepts (X)HTML, the filter must bypass proxying entirely and let + * the downstream handler serve the app shell — regardless of the target URI. + */ + @Test + public void testHtmlAcceptBypassesProxy() throws IOException + { + when(requestContext.getProperty(AC.uri.getURI())).thenReturn(EXTERNAL_URI); + when(requestContext.getAcceptableMediaTypes()).thenReturn(List.of(MediaType.TEXT_HTML_TYPE)); + + filter.filter(requestContext); + + verify(requestContext, never()).abortWith(any(Response.class)); + } + /** * When the proxy is disabled, a {@code ?uri=} pointing to an unregistered external URL must be blocked. */ @Test(expected = NotAllowedException.class) public void testUnregisteredUriBlockedWhenProxyDisabled() throws IOException { - MultivaluedHashMap params = new MultivaluedHashMap<>(); - params.putSingle("uri", EXTERNAL_URI.toString()); - when(uriInfo.getQueryParameters()).thenReturn(params); + when(requestContext.getProperty(AC.uri.getURI())).thenReturn(EXTERNAL_URI); filter.filter(requestContext); } @@ -111,9 +130,7 @@ public void testUnregisteredUriBlockedWhenProxyDisabled() throws IOException @Test public void testRegisteredAppAllowedWhenProxyDisabled() throws IOException { - MultivaluedHashMap params = new MultivaluedHashMap<>(); - params.putSingle("uri", ADMIN_URI.toString()); - when(uriInfo.getQueryParameters()).thenReturn(params); + when(requestContext.getProperty(AC.uri.getURI())).thenReturn(ADMIN_URI); // matchApp() returns a non-null Resource for the admin app (registered lapp:Application) when(system.matchApp(ADMIN_URI)).thenReturn(registeredApp);