diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 9315ba7fb7ff9..5da9c5c171988 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -9,6 +9,52 @@ * @since 2.3.0 */ +/* + * Avoid redirects when URLs differ only in query string space encoding ('+' vs '%20'). + * Normalizing only the query portion prevents us from collapsing semantically + * distinct URLs that differ in how reserved characters (like '/') are encoded + * in paths or parameter values. + * + * Converts '+' to '%20' only in the query string so that URLs that differ + * solely by space encoding in their query are treated as equivalent. + * + * @param string $url The URL to normalize. + * + * @return string The URL with normalized query string space encoding. + */ +function _wp_normalize_query_space_encoding( $url ) { + + // If there is no query string, return the URL as-is. + $qpos = strpos( $url, '?' ); + if ( false === $qpos ) { + return $url; + } + + /** + * Split the URL into three parts: + * - the base (up to and including '?'), + * - the query string (between '?' and '#'), + * - the fragment (from '#' to the end). + * + * This allows us to normalize the query string without affecting the path or fragment, + * which may have their own encoding that should be preserved. + */ + $hashpos = strpos( $url, '#', $qpos ); + if ( false === $hashpos ) { + $base = substr( $url, 0, $qpos + 1 ); + $query = substr( $url, $qpos + 1 ); + $fragment = ''; + } else { + $base = substr( $url, 0, $qpos + 1 ); + $query = substr( $url, $qpos + 1, $hashpos - ( $qpos + 1 ) ); + $fragment = substr( $url, $hashpos ); + } + + $normalized_query = str_replace( '+', '%20', $query ); + + return $base . $normalized_query . $fragment; +} + /** * Redirects incoming links to the proper URL based on the site url. * @@ -774,6 +820,13 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { return; } + $normalized_redirect_url = _wp_normalize_query_space_encoding( $redirect_url ); + $normalized_requested_url = _wp_normalize_query_space_encoding( $requested_url ); + + if ( $normalized_redirect_url === $normalized_requested_url ) { + return; + } + // Hex-encoded octets are case-insensitive. if ( str_contains( $requested_url, '%' ) ) { if ( ! function_exists( 'lowercase_octets' ) ) { diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index 886b09312910e..e6c78e3545e87 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -263,6 +263,11 @@ public function data_canonical() { array( '/2008%20', '/2008' ), array( '//2008////', '/2008/' ), + // Query string space encoding variants should not redirect (Ticket #64376). + array( '/?test=one+two', '/?test=one+two' ), // Plus sign should stay as plus. + array( '/?test=one%20two', '/?test=one%20two' ), // %20 should stay as %20. + array( '/?utm_content=Hello+World&utm_source=test', '/?utm_content=Hello+World&utm_source=test' ), // Multiple params with plus. + // @todo Endpoints (feeds, trackbacks, etc). More fuzzed mixed query variables, comment paging, Home page (static). ); } @@ -465,6 +470,57 @@ public function test_feed_canonical_with_not_exists_query() { $this->assertNull( $redirect ); } + /** + * Test that query string space encoding variants do not trigger redirects. + * + * Ensures that URLs differing only in space encoding ('+' vs '%20') + * do not cause unnecessary 301 redirects between the two forms. + * + * @ticket 64376 + */ + public function test_query_string_encoding_variants_no_redirect() { + // Create a static front page to match the original bug report scenario. + $page_id = self::factory()->post->create( + array( + 'post_type' => 'page', + ) + ); + update_option( 'show_on_front', 'page' ); + update_option( 'page_on_front', $page_id ); + + // Plus signs in UTM parameters should not redirect to %20. + $url_with_plus = home_url( '/?utm_content=Hello+World' ); + + $this->go_to( $url_with_plus ); + $redirect_from_plus = redirect_canonical( $url_with_plus, false ); + $this->assertNull( $redirect_from_plus, 'URL with + should not redirect to %20' ); + + // %20 encoding should not redirect to plus. + $url_with_percent = home_url( '/?utm_content=Hello%20World' ); + + $this->go_to( $url_with_percent ); + $redirect_from_percent = redirect_canonical( $url_with_percent, false ); + $this->assertNull( $redirect_from_percent, 'URL with %20 should not redirect to +' ); + + // Multiple query parameters with mixed space encoding. + $url_mixed = home_url( '/?name=John+Doe&city=New+York&zip=12345' ); + + $this->go_to( $url_mixed ); + $redirect = redirect_canonical( $url_mixed, false ); + $this->assertNull( $redirect, 'URL with multiple plus-encoded parameters should not redirect' ); + + // Mixed encoding with both + and %20 in different parameters. + $url_mixed_encoding = home_url( '/?name=John+Doe&city=New%20York' ); + + $this->go_to( $url_mixed_encoding ); + $redirect = redirect_canonical( $url_mixed_encoding, false ); + $this->assertNull( $redirect, 'URL with mixed + and %20 encoding should not redirect' ); + + // Clean up. + delete_option( 'page_on_front' ); + delete_option( 'show_on_front' ); + } + /** * Test canonical redirects for attachment pages when the option is disabled. *