From 1929ab8fcbf71a6e68c791fafb4743a521a6ec74 Mon Sep 17 00:00:00 2001 From: sanketio Date: Tue, 13 Jan 2026 15:27:47 +0530 Subject: [PATCH 1/6] Fix unnecessary 301 canonical redirect for query string encoding --- src/wp-includes/canonical.php | 19 +++++++++ tests/phpunit/tests/canonical.php | 64 +++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 9315ba7fb7ff9..9e8b76a172a07 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -774,6 +774,25 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { return; } + /* + * Avoid redirects when URLs differ only in query string encoding. + * Per RFC 3986, certain characters can be represented in multiple equivalent ways: + * - Spaces: '+' vs '%20' (e.g., ?name=John+Doe vs ?name=John%20Doe) + * - Unreserved chars unnecessarily encoded: '~' vs '%7E', '-' vs '%2D', '_' vs '%5F', '.' vs '%2E' + * - Reserved chars in values: '/' vs '%2F', ':' vs '%3A', '@' vs '%40' + * + * Example problematic scenarios: + * - UTM params: ?utm_content=Hello+World vs ?utm_content=Hello%20World + * - Encoded paths: ?redirect=/path/to/page vs ?redirect=%2Fpath%2Fto%2Fpage + * - Email params: ?email=user@example.com vs ?email=user%40example.com + * + * Redirecting between these variants provides no SEO or functional benefit + * while potentially causing caching issues and breaking analytics. + */ + if ( $redirect_url && urldecode( $redirect_url ) === urldecode( $requested_url ) ) { + return; + } + // Hex-encoded octets are case-insensitive. if ( str_contains( $requested_url, '%' ) ) { if ( ! function_exists( 'lowercase_octets' ) ) { diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index 886b09312910e..9d60ab10d4374 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -263,6 +263,12 @@ public function data_canonical() { array( '/2008%20', '/2008' ), array( '//2008////', '/2008/' ), + // Query string encoding variants should not redirect (Ticket #64376). + array( '/?test=one+two', '/?test=one+two' ), // Plus sign should not redirect to %20. + array( '/?test=one%20two', '/?test=one%20two' ), // %20 should not redirect to plus. + array( '/?email=user%40example.com', '/?email=user%40example.com' ), // Encoded @ should not redirect. + array( '/?redirect=%2Fpath%2Fto%2Fpage', '/?redirect=%2Fpath%2Fto%2Fpage' ), // Encoded slashes should not redirect. + // @todo Endpoints (feeds, trackbacks, etc). More fuzzed mixed query variables, comment paging, Home page (static). ); } @@ -465,6 +471,64 @@ public function test_feed_canonical_with_not_exists_query() { $this->assertNull( $redirect ); } + /** + * Test that query string encoding variants do not trigger redirects. + * + * Ensures that URLs differing only in encoding (e.g., '+' vs '%20' for spaces) + * do not cause unnecessary 301 redirects. + * + * @ticket 64376 + */ + public function test_query_string_encoding_variants_no_redirect() { + // Create a static front page to match the original bug report scenario. + $page_id = self::factory()->post->create( + array( + 'post_type' => 'page', + ) + ); + update_option( 'show_on_front', 'page' ); + update_option( 'page_on_front', $page_id ); + + // Test 1: Plus signs in UTM parameters should not redirect to %20. + $url_with_plus = home_url( '/?utm_content=Hello+World' ); + $url_with_percent = home_url( '/?utm_content=Hello%20World' ); + + $this->go_to( $url_with_plus ); + $redirect_from_plus = redirect_canonical( $url_with_plus, false ); + + $this->go_to( $url_with_percent ); + $redirect_from_percent = redirect_canonical( $url_with_percent, false ); + + // Both should return false (no redirect). + $this->assertFalse( $redirect_from_plus, 'URL with + should not redirect' ); + $this->assertFalse( $redirect_from_percent, 'URL with %20 should not redirect' ); + + // Test 2: Encoded @ symbol in email parameters. + $url_encoded_at = home_url( '/?email=user%40example.com' ); + + $this->go_to( $url_encoded_at ); + $redirect = redirect_canonical( $url_encoded_at, false ); + $this->assertFalse( $redirect, 'URL with encoded @ should not redirect' ); + + // Test 3: Encoded forward slashes in redirect parameters. + $url_encoded_slash = home_url( '/?redirect=%2Fpath%2Fto%2Fpage' ); + + $this->go_to( $url_encoded_slash ); + $redirect = redirect_canonical( $url_encoded_slash, false ); + $this->assertFalse( $redirect, 'URL with encoded slashes should not redirect' ); + + // Test 4: Multiple query parameters with mixed encoding. + $url_mixed = home_url( '/?name=John+Doe&city=New+York&zip=12345' ); + + $this->go_to( $url_mixed ); + $redirect = redirect_canonical( $url_mixed, false ); + $this->assertFalse( $redirect, 'URL with multiple plus-encoded parameters should not redirect' ); + + // Clean up. + delete_option( 'page_on_front' ); + delete_option( 'show_on_front' ); + } + /** * Test canonical redirects for attachment pages when the option is disabled. * From b111c0234c98f61d3cdc179b36d0bb72273245e1 Mon Sep 17 00:00:00 2001 From: sanketio Date: Tue, 13 Jan 2026 18:50:15 +0530 Subject: [PATCH 2/6] Fix assertions --- tests/phpunit/tests/canonical.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index 9d60ab10d4374..e8f5aa29c95e8 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -499,30 +499,30 @@ public function test_query_string_encoding_variants_no_redirect() { $this->go_to( $url_with_percent ); $redirect_from_percent = redirect_canonical( $url_with_percent, false ); - // Both should return false (no redirect). - $this->assertFalse( $redirect_from_plus, 'URL with + should not redirect' ); - $this->assertFalse( $redirect_from_percent, 'URL with %20 should not redirect' ); + // Both should return null (no redirect). + $this->assertNull( $redirect_from_plus, 'URL with + should not redirect' ); + $this->assertNull( $redirect_from_percent, 'URL with %20 should not redirect' ); // Test 2: Encoded @ symbol in email parameters. $url_encoded_at = home_url( '/?email=user%40example.com' ); $this->go_to( $url_encoded_at ); $redirect = redirect_canonical( $url_encoded_at, false ); - $this->assertFalse( $redirect, 'URL with encoded @ should not redirect' ); + $this->assertNull( $redirect, 'URL with encoded @ should not redirect' ); // Test 3: Encoded forward slashes in redirect parameters. $url_encoded_slash = home_url( '/?redirect=%2Fpath%2Fto%2Fpage' ); $this->go_to( $url_encoded_slash ); $redirect = redirect_canonical( $url_encoded_slash, false ); - $this->assertFalse( $redirect, 'URL with encoded slashes should not redirect' ); + $this->assertNull( $redirect, 'URL with encoded slashes should not redirect' ); // Test 4: Multiple query parameters with mixed encoding. $url_mixed = home_url( '/?name=John+Doe&city=New+York&zip=12345' ); $this->go_to( $url_mixed ); $redirect = redirect_canonical( $url_mixed, false ); - $this->assertFalse( $redirect, 'URL with multiple plus-encoded parameters should not redirect' ); + $this->assertNull( $redirect, 'URL with multiple plus-encoded parameters should not redirect' ); // Clean up. delete_option( 'page_on_front' ); From 5e3b2be4b4b843d5d4979a83706d630e39b6c4bf Mon Sep 17 00:00:00 2001 From: sanketio Date: Fri, 16 Jan 2026 10:03:40 +0530 Subject: [PATCH 3/6] Fix encoded URL condition --- src/wp-includes/canonical.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 9e8b76a172a07..d68e02b81c676 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -789,7 +789,7 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { * Redirecting between these variants provides no SEO or functional benefit * while potentially causing caching issues and breaking analytics. */ - if ( $redirect_url && urldecode( $redirect_url ) === urldecode( $requested_url ) ) { + if ( urldecode( $redirect_url ) === urldecode( $requested_url ) ) { return; } From c7b80ffb72ce49ac441a6d9f03b55e84372be13c Mon Sep 17 00:00:00 2001 From: sanketio Date: Wed, 4 Mar 2026 10:04:27 +0530 Subject: [PATCH 4/6] Only restrict redirect for query space encoding --- src/wp-includes/canonical.php | 66 +++++++++++++++++++++++-------- tests/phpunit/tests/canonical.php | 63 +++++++++++++++-------------- 2 files changed, 84 insertions(+), 45 deletions(-) diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index d68e02b81c676..5da9c5c171988 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -9,6 +9,52 @@ * @since 2.3.0 */ +/* + * Avoid redirects when URLs differ only in query string space encoding ('+' vs '%20'). + * Normalizing only the query portion prevents us from collapsing semantically + * distinct URLs that differ in how reserved characters (like '/') are encoded + * in paths or parameter values. + * + * Converts '+' to '%20' only in the query string so that URLs that differ + * solely by space encoding in their query are treated as equivalent. + * + * @param string $url The URL to normalize. + * + * @return string The URL with normalized query string space encoding. + */ +function _wp_normalize_query_space_encoding( $url ) { + + // If there is no query string, return the URL as-is. + $qpos = strpos( $url, '?' ); + if ( false === $qpos ) { + return $url; + } + + /** + * Split the URL into three parts: + * - the base (up to and including '?'), + * - the query string (between '?' and '#'), + * - the fragment (from '#' to the end). + * + * This allows us to normalize the query string without affecting the path or fragment, + * which may have their own encoding that should be preserved. + */ + $hashpos = strpos( $url, '#', $qpos ); + if ( false === $hashpos ) { + $base = substr( $url, 0, $qpos + 1 ); + $query = substr( $url, $qpos + 1 ); + $fragment = ''; + } else { + $base = substr( $url, 0, $qpos + 1 ); + $query = substr( $url, $qpos + 1, $hashpos - ( $qpos + 1 ) ); + $fragment = substr( $url, $hashpos ); + } + + $normalized_query = str_replace( '+', '%20', $query ); + + return $base . $normalized_query . $fragment; +} + /** * Redirects incoming links to the proper URL based on the site url. * @@ -774,22 +820,10 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { return; } - /* - * Avoid redirects when URLs differ only in query string encoding. - * Per RFC 3986, certain characters can be represented in multiple equivalent ways: - * - Spaces: '+' vs '%20' (e.g., ?name=John+Doe vs ?name=John%20Doe) - * - Unreserved chars unnecessarily encoded: '~' vs '%7E', '-' vs '%2D', '_' vs '%5F', '.' vs '%2E' - * - Reserved chars in values: '/' vs '%2F', ':' vs '%3A', '@' vs '%40' - * - * Example problematic scenarios: - * - UTM params: ?utm_content=Hello+World vs ?utm_content=Hello%20World - * - Encoded paths: ?redirect=/path/to/page vs ?redirect=%2Fpath%2Fto%2Fpage - * - Email params: ?email=user@example.com vs ?email=user%40example.com - * - * Redirecting between these variants provides no SEO or functional benefit - * while potentially causing caching issues and breaking analytics. - */ - if ( urldecode( $redirect_url ) === urldecode( $requested_url ) ) { + $normalized_redirect_url = _wp_normalize_query_space_encoding( $redirect_url ); + $normalized_requested_url = _wp_normalize_query_space_encoding( $requested_url ); + + if ( $normalized_redirect_url === $normalized_requested_url ) { return; } diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index e8f5aa29c95e8..603c2d2ab2b9b 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -263,11 +263,10 @@ public function data_canonical() { array( '/2008%20', '/2008' ), array( '//2008////', '/2008/' ), - // Query string encoding variants should not redirect (Ticket #64376). - array( '/?test=one+two', '/?test=one+two' ), // Plus sign should not redirect to %20. - array( '/?test=one%20two', '/?test=one%20two' ), // %20 should not redirect to plus. - array( '/?email=user%40example.com', '/?email=user%40example.com' ), // Encoded @ should not redirect. - array( '/?redirect=%2Fpath%2Fto%2Fpage', '/?redirect=%2Fpath%2Fto%2Fpage' ), // Encoded slashes should not redirect. + // Query string space encoding variants should not redirect (Ticket #64376). + array( '/?test=one+two', '/?test=one+two' ), // Plus sign should stay as plus. + array( '/?test=one%20two', '/?test=one%20two' ), // %20 should stay as %20. + array( '/?utm_content=Hello+World&utm_source=test', '/?utm_content=Hello+World&utm_source=test' ), // Multiple params with plus. // @todo Endpoints (feeds, trackbacks, etc). More fuzzed mixed query variables, comment paging, Home page (static). ); @@ -472,10 +471,10 @@ public function test_feed_canonical_with_not_exists_query() { } /** - * Test that query string encoding variants do not trigger redirects. + * Test that query string space encoding variants do not trigger redirects. * - * Ensures that URLs differing only in encoding (e.g., '+' vs '%20' for spaces) - * do not cause unnecessary 301 redirects. + * Ensures that URLs differing only in space encoding ('+' vs '%20') + * do not cause unnecessary 301 redirects between the two forms. * * @ticket 64376 */ @@ -489,40 +488,46 @@ public function test_query_string_encoding_variants_no_redirect() { update_option( 'show_on_front', 'page' ); update_option( 'page_on_front', $page_id ); - // Test 1: Plus signs in UTM parameters should not redirect to %20. - $url_with_plus = home_url( '/?utm_content=Hello+World' ); - $url_with_percent = home_url( '/?utm_content=Hello%20World' ); + // Plus signs in UTM parameters should not redirect to %20. + $url_with_plus = home_url( '/?utm_content=Hello+World' ); $this->go_to( $url_with_plus ); $redirect_from_plus = redirect_canonical( $url_with_plus, false ); + $this->assertNull( $redirect_from_plus, 'URL with + should not redirect to %20' ); + + // %20 encoding should not redirect to plus. + $url_with_percent = home_url( '/?utm_content=Hello%20World' ); $this->go_to( $url_with_percent ); $redirect_from_percent = redirect_canonical( $url_with_percent, false ); + $this->assertNull( $redirect_from_percent, 'URL with %20 should not redirect to +' ); - // Both should return null (no redirect). - $this->assertNull( $redirect_from_plus, 'URL with + should not redirect' ); - $this->assertNull( $redirect_from_percent, 'URL with %20 should not redirect' ); + // Multiple query parameters with mixed space encoding. + $url_mixed = home_url( '/?name=John+Doe&city=New+York&zip=12345' ); - // Test 2: Encoded @ symbol in email parameters. - $url_encoded_at = home_url( '/?email=user%40example.com' ); + $this->go_to( $url_mixed ); + $redirect = redirect_canonical( $url_mixed, false ); + $this->assertNull( $redirect, 'URL with multiple plus-encoded parameters should not redirect' ); - $this->go_to( $url_encoded_at ); - $redirect = redirect_canonical( $url_encoded_at, false ); - $this->assertNull( $redirect, 'URL with encoded @ should not redirect' ); + // Mixed encoding with both + and %20 in different parameters. + $url_mixed_encoding = home_url( '/?name=John+Doe&city=New%20York' ); - // Test 3: Encoded forward slashes in redirect parameters. - $url_encoded_slash = home_url( '/?redirect=%2Fpath%2Fto%2Fpage' ); + $this->go_to( $url_mixed_encoding ); + $redirect = redirect_canonical( $url_mixed_encoding, false ); + $this->assertNull( $redirect, 'URL with mixed + and %20 encoding should not redirect' ); - $this->go_to( $url_encoded_slash ); - $redirect = redirect_canonical( $url_encoded_slash, false ); - $this->assertNull( $redirect, 'URL with encoded slashes should not redirect' ); + // Verify that other encoded characters are handled properly. + // URLs with encoded reserved characters should maintain their encoding. + $url_encoded_at = home_url( '/?email=user%40example.com' ); + $url_unencoded_at = home_url( '/?email=user@example.com' ); - // Test 4: Multiple query parameters with mixed encoding. - $url_mixed = home_url( '/?name=John+Doe&city=New+York&zip=12345' ); + $this->go_to( $url_encoded_at ); + $redirect_encoded = redirect_canonical( $url_encoded_at, false ); + $this->assertNull( $redirect_encoded, 'URL with encoded @ should not redirect to itself' ); - $this->go_to( $url_mixed ); - $redirect = redirect_canonical( $url_mixed, false ); - $this->assertNull( $redirect, 'URL with multiple plus-encoded parameters should not redirect' ); + $this->go_to( $url_unencoded_at ); + $redirect_unencoded = redirect_canonical( $url_unencoded_at, false ); + $this->assertNull( $redirect_unencoded, 'URL with unencoded @ should not redirect to itself' ); // Clean up. delete_option( 'page_on_front' ); From f2194715bea98d57be8a47fbd3fccc01bcbbe5f1 Mon Sep 17 00:00:00 2001 From: sanketio Date: Wed, 4 Mar 2026 10:07:56 +0530 Subject: [PATCH 5/6] Fix equal sign warning --- tests/phpunit/tests/canonical.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index 603c2d2ab2b9b..5b6fb2056905d 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -518,8 +518,8 @@ public function test_query_string_encoding_variants_no_redirect() { // Verify that other encoded characters are handled properly. // URLs with encoded reserved characters should maintain their encoding. - $url_encoded_at = home_url( '/?email=user%40example.com' ); - $url_unencoded_at = home_url( '/?email=user@example.com' ); + $url_encoded_at = home_url( '/?email=user%40example.com' ); + $url_unencoded_at = home_url( '/?email=user@example.com' ); $this->go_to( $url_encoded_at ); $redirect_encoded = redirect_canonical( $url_encoded_at, false ); From 4c88802c18e826fd127bb0c40863879da1094a30 Mon Sep 17 00:00:00 2001 From: sanketio Date: Wed, 4 Mar 2026 11:20:17 +0530 Subject: [PATCH 6/6] Fix test case --- tests/phpunit/tests/canonical.php | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index 5b6fb2056905d..e6c78e3545e87 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -516,19 +516,6 @@ public function test_query_string_encoding_variants_no_redirect() { $redirect = redirect_canonical( $url_mixed_encoding, false ); $this->assertNull( $redirect, 'URL with mixed + and %20 encoding should not redirect' ); - // Verify that other encoded characters are handled properly. - // URLs with encoded reserved characters should maintain their encoding. - $url_encoded_at = home_url( '/?email=user%40example.com' ); - $url_unencoded_at = home_url( '/?email=user@example.com' ); - - $this->go_to( $url_encoded_at ); - $redirect_encoded = redirect_canonical( $url_encoded_at, false ); - $this->assertNull( $redirect_encoded, 'URL with encoded @ should not redirect to itself' ); - - $this->go_to( $url_unencoded_at ); - $redirect_unencoded = redirect_canonical( $url_unencoded_at, false ); - $this->assertNull( $redirect_unencoded, 'URL with unencoded @ should not redirect to itself' ); - // Clean up. delete_option( 'page_on_front' ); delete_option( 'show_on_front' );