Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions src/wp-includes/canonical.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,52 @@
* @since 2.3.0
*/

/*
* Avoid redirects when URLs differ only in query string space encoding ('+' vs '%20').
* Normalizing only the query portion prevents us from collapsing semantically
* distinct URLs that differ in how reserved characters (like '/') are encoded
* in paths or parameter values.
*
* Converts '+' to '%20' only in the query string so that URLs that differ
* solely by space encoding in their query are treated as equivalent.
*
* @param string $url The URL to normalize.
*
* @return string The URL with normalized query string space encoding.
*/
function _wp_normalize_query_space_encoding( $url ) {

// If there is no query string, return the URL as-is.
$qpos = strpos( $url, '?' );
if ( false === $qpos ) {
return $url;
}

/**
* Split the URL into three parts:
* - the base (up to and including '?'),
* - the query string (between '?' and '#'),
* - the fragment (from '#' to the end).
*
* This allows us to normalize the query string without affecting the path or fragment,
* which may have their own encoding that should be preserved.
*/
$hashpos = strpos( $url, '#', $qpos );
if ( false === $hashpos ) {
$base = substr( $url, 0, $qpos + 1 );
$query = substr( $url, $qpos + 1 );
$fragment = '';
} else {
$base = substr( $url, 0, $qpos + 1 );
$query = substr( $url, $qpos + 1, $hashpos - ( $qpos + 1 ) );
$fragment = substr( $url, $hashpos );
}

$normalized_query = str_replace( '+', '%20', $query );

return $base . $normalized_query . $fragment;
}

/**
* Redirects incoming links to the proper URL based on the site url.
*
Expand Down Expand Up @@ -774,6 +820,13 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
return;
}

$normalized_redirect_url = _wp_normalize_query_space_encoding( $redirect_url );
$normalized_requested_url = _wp_normalize_query_space_encoding( $requested_url );

if ( $normalized_redirect_url === $normalized_requested_url ) {
return;
}

// Hex-encoded octets are case-insensitive.
if ( str_contains( $requested_url, '%' ) ) {
if ( ! function_exists( 'lowercase_octets' ) ) {
Expand Down
56 changes: 56 additions & 0 deletions tests/phpunit/tests/canonical.php
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ public function data_canonical() {
array( '/2008%20', '/2008' ),
array( '//2008////', '/2008/' ),

// Query string space encoding variants should not redirect (Ticket #64376).
array( '/?test=one+two', '/?test=one+two' ), // Plus sign should stay as plus.
array( '/?test=one%20two', '/?test=one%20two' ), // %20 should stay as %20.
array( '/?utm_content=Hello+World&utm_source=test', '/?utm_content=Hello+World&utm_source=test' ), // Multiple params with plus.

// @todo Endpoints (feeds, trackbacks, etc). More fuzzed mixed query variables, comment paging, Home page (static).
);
}
Expand Down Expand Up @@ -465,6 +470,57 @@ public function test_feed_canonical_with_not_exists_query() {
$this->assertNull( $redirect );
}

/**
* Test that query string space encoding variants do not trigger redirects.
*
* Ensures that URLs differing only in space encoding ('+' vs '%20')
* do not cause unnecessary 301 redirects between the two forms.
*
* @ticket 64376
*/
public function test_query_string_encoding_variants_no_redirect() {
// Create a static front page to match the original bug report scenario.
$page_id = self::factory()->post->create(
array(
'post_type' => 'page',
)
);
update_option( 'show_on_front', 'page' );
update_option( 'page_on_front', $page_id );

// Plus signs in UTM parameters should not redirect to %20.
$url_with_plus = home_url( '/?utm_content=Hello+World' );

$this->go_to( $url_with_plus );
$redirect_from_plus = redirect_canonical( $url_with_plus, false );
$this->assertNull( $redirect_from_plus, 'URL with + should not redirect to %20' );

// %20 encoding should not redirect to plus.
$url_with_percent = home_url( '/?utm_content=Hello%20World' );

$this->go_to( $url_with_percent );
$redirect_from_percent = redirect_canonical( $url_with_percent, false );
$this->assertNull( $redirect_from_percent, 'URL with %20 should not redirect to +' );

// Multiple query parameters with mixed space encoding.
$url_mixed = home_url( '/?name=John+Doe&city=New+York&zip=12345' );

$this->go_to( $url_mixed );
$redirect = redirect_canonical( $url_mixed, false );
$this->assertNull( $redirect, 'URL with multiple plus-encoded parameters should not redirect' );

// Mixed encoding with both + and %20 in different parameters.
$url_mixed_encoding = home_url( '/?name=John+Doe&city=New%20York' );

$this->go_to( $url_mixed_encoding );
$redirect = redirect_canonical( $url_mixed_encoding, false );
$this->assertNull( $redirect, 'URL with mixed + and %20 encoding should not redirect' );

// Clean up.
delete_option( 'page_on_front' );
delete_option( 'show_on_front' );
}

/**
* Test canonical redirects for attachment pages when the option is disabled.
*
Expand Down
Loading