From 1e9b4f638e81d30b047d3cc84150fdac8af1dd0b Mon Sep 17 00:00:00 2001 From: preciz Date: Mon, 15 Jun 2026 12:22:07 +0200 Subject: [PATCH 1/5] Optimize and simplify Plug.Conn.Utils.validate_utf8!/3 --- lib/plug/conn/utils.ex | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/plug/conn/utils.ex b/lib/plug/conn/utils.ex index a98d09d2..5ef4ba3c 100644 --- a/lib/plug/conn/utils.ex +++ b/lib/plug/conn/utils.ex @@ -293,19 +293,13 @@ defmodule Plug.Conn.Utils do def validate_utf8!(binary, exception, context) def validate_utf8!(<>, exception, context) do - do_validate_utf8!(binary, exception, context) - end - - defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context) do - do_validate_utf8!(rest, exception, context) - end + case :unicode.characters_to_binary(binary) do + ^binary -> + :ok - defp do_validate_utf8!(<>, exception, context) do - raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" - end - - defp do_validate_utf8!(<<>>, _exception, _context) do - :ok + {_, _, <>} -> + raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" + end end ## Helpers From a6dacd8303dc478b9e2aebfd8850ae59637b8a98 Mon Sep 17 00:00:00 2001 From: preciz Date: Mon, 15 Jun 2026 13:09:42 +0200 Subject: [PATCH 2/5] Optimize Plug.Conn.Utils.validate_utf8!/3 using 56-bit SWAR --- lib/plug/conn/utils.ex | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/lib/plug/conn/utils.ex b/lib/plug/conn/utils.ex index 5ef4ba3c..f3c1dae2 100644 --- a/lib/plug/conn/utils.ex +++ b/lib/plug/conn/utils.ex @@ -286,22 +286,40 @@ defmodule Plug.Conn.Utils do do: stripped end + # 56-bit SWAR guard: all 7 bytes are ASCII (< 128) + defguardp ascii_swar?(w) + when Bitwise.band(w, 0x80808080808080) == 0 + @doc """ Validates the given binary is valid UTF-8. """ @spec validate_utf8!(binary, module, binary) :: :ok | no_return - def validate_utf8!(binary, exception, context) - def validate_utf8!(<>, exception, context) do - case :unicode.characters_to_binary(binary) do - ^binary -> - :ok + do_validate_utf8!(binary, exception, context) + end - {_, _, <>} -> - raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" - end + defp do_validate_utf8!(<>, exception, context) + when b <= 127 and ascii_swar?(w) do + do_validate_utf8!(rest, exception, context) end + defp do_validate_utf8!(<>, exception, context) when b <= 127 do + do_validate_utf8!(rest, exception, context) + end + + defp do_validate_utf8!(<<_::utf8, rest::binary>>, exception, context) do + do_validate_utf8!(rest, exception, context) + end + + defp do_validate_utf8!(<>, exception, context) do + raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" + end + + defp do_validate_utf8!(<<>>, _exception, _context) do + :ok + end + + ## Helpers defp strip_spaces("\r\n" <> t), do: strip_spaces(t) From bbfcf91b79abcbed47ede4877039cb3f2d9b744b Mon Sep 17 00:00:00 2001 From: preciz Date: Mon, 15 Jun 2026 13:25:27 +0200 Subject: [PATCH 3/5] Fixes --- lib/plug/conn/utils.ex | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/plug/conn/utils.ex b/lib/plug/conn/utils.ex index f3c1dae2..7ceb773c 100644 --- a/lib/plug/conn/utils.ex +++ b/lib/plug/conn/utils.ex @@ -294,24 +294,26 @@ defmodule Plug.Conn.Utils do Validates the given binary is valid UTF-8. """ @spec validate_utf8!(binary, module, binary) :: :ok | no_return + def validate_utf8!(binary, exception, context) + def validate_utf8!(<>, exception, context) do do_validate_utf8!(binary, exception, context) end - defp do_validate_utf8!(<>, exception, context) + defp do_validate_utf8!(<>, exception, context) when b <= 127 and ascii_swar?(w) do do_validate_utf8!(rest, exception, context) end - defp do_validate_utf8!(<>, exception, context) when b <= 127 do + defp do_validate_utf8!(<>, exception, context) when b <= 127 do do_validate_utf8!(rest, exception, context) end - defp do_validate_utf8!(<<_::utf8, rest::binary>>, exception, context) do + defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context) do do_validate_utf8!(rest, exception, context) end - defp do_validate_utf8!(<>, exception, context) do + defp do_validate_utf8!(<>, exception, context) do raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" end From 0b5ce2fa8c4a64cd28d77876ef6728e7cd3f8658 Mon Sep 17 00:00:00 2001 From: preciz Date: Mon, 15 Jun 2026 13:26:52 +0200 Subject: [PATCH 4/5] Run formatter --- lib/plug/conn/utils.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/plug/conn/utils.ex b/lib/plug/conn/utils.ex index 7ceb773c..7a9433ad 100644 --- a/lib/plug/conn/utils.ex +++ b/lib/plug/conn/utils.ex @@ -321,7 +321,6 @@ defmodule Plug.Conn.Utils do :ok end - ## Helpers defp strip_spaces("\r\n" <> t), do: strip_spaces(t) From 42486414ff2550afd1855cc64a16250ccac264bc Mon Sep 17 00:00:00 2001 From: preciz Date: Mon, 15 Jun 2026 13:31:16 +0200 Subject: [PATCH 5/5] Further optimize Plug.Conn.Utils.validate_utf8!/3 with dual-path SWAR --- lib/plug/conn/utils.ex | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/plug/conn/utils.ex b/lib/plug/conn/utils.ex index 7a9433ad..bf58e145 100644 --- a/lib/plug/conn/utils.ex +++ b/lib/plug/conn/utils.ex @@ -297,27 +297,33 @@ defmodule Plug.Conn.Utils do def validate_utf8!(binary, exception, context) def validate_utf8!(<>, exception, context) do - do_validate_utf8!(binary, exception, context) + if byte_size(binary) < 12 do + do_validate_utf8_small!(binary, exception, context) + else + do_validate_utf8_swar!(binary, exception, context) + end end - defp do_validate_utf8!(<>, exception, context) + # SWAR loop + defp do_validate_utf8_swar!(<>, exception, context) when b <= 127 and ascii_swar?(w) do - do_validate_utf8!(rest, exception, context) + do_validate_utf8_swar!(rest, exception, context) end - defp do_validate_utf8!(<>, exception, context) when b <= 127 do - do_validate_utf8!(rest, exception, context) + defp do_validate_utf8_swar!(rest, exception, context) do + do_validate_utf8_small!(rest, exception, context) end - defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context) do - do_validate_utf8!(rest, exception, context) + # Small loop (identical to original character loop) + defp do_validate_utf8_small!(<<_::utf8, rest::bits>>, exception, context) do + do_validate_utf8_small!(rest, exception, context) end - defp do_validate_utf8!(<>, exception, context) do + defp do_validate_utf8_small!(<>, exception, context) do raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" end - defp do_validate_utf8!(<<>>, _exception, _context) do + defp do_validate_utf8_small!(<<>>, _exception, _context) do :ok end