From ef28e760986d096d2f9c9c9a1b24e9aebaeb2087 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Wed, 4 Mar 2026 08:45:21 +0300 Subject: [PATCH 1/2] src: optimize utf-8 byte length calculation using simdutf --- src/node_buffer.cc | 83 ++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index e40a21288ee79d..1f52cc5a2ef9f2 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -761,9 +761,33 @@ void StringWrite(const FunctionCallbackInfo& args) { void SlowByteLengthUtf8(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); - // Fast case: avoid StringBytes on UTF8 string. Jump to v8. - size_t result = args[0].As()->Utf8LengthV2(args.GetIsolate()); - args.GetReturnValue().Set(static_cast(result)); + Isolate* isolate = args.GetIsolate(); + Local source = args[0].As(); + + int length = source->Length(); + + if (source->IsOneByte()) { + args.GetReturnValue().Set( + static_cast(source->Utf8LengthV2(isolate))); + return; + } + + static constexpr int kSmallStringThreshold = 128; + if (length <= kSmallStringThreshold) { + args.GetReturnValue().Set( + static_cast(source->Utf8LengthV2(isolate))); + return; + } + + String::ValueView view(isolate, source); + auto data = reinterpret_cast(view.data16()); + if (simdutf::validate_utf16(data, length)) { + args.GetReturnValue().Set( + static_cast(simdutf::utf8_length_from_utf16(data, length))); + return; + } + args.GetReturnValue().Set( + static_cast(source->Utf8LengthV2(isolate))); } uint32_t FastByteLengthUtf8( @@ -776,49 +800,23 @@ uint32_t FastByteLengthUtf8( CHECK(sourceValue->IsString()); Local sourceStr = sourceValue.As(); - if (!sourceStr->IsExternalOneByte()) { + int length = sourceStr->Length(); + + if (sourceStr->IsOneByte()) { return sourceStr->Utf8LengthV2(isolate); } - auto source = sourceStr->GetExternalOneByteStringResource(); - // For short inputs, the function call overhead to simdutf is maybe - // not worth it, reserve simdutf for long strings. - if (source->length() > 128) { - return simdutf::utf8_length_from_latin1(source->data(), source->length()); - } - - uint32_t length = source->length(); - const auto input = reinterpret_cast(source->data()); - - uint32_t answer = length; - uint32_t i = 0; - - auto pop = [](uint64_t v) { - return static_cast(((v >> 7) & UINT64_C(0x0101010101010101)) * - UINT64_C(0x0101010101010101) >> - 56); - }; - for (; i + 32 <= length; i += 32) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - memcpy(&v, input + i + 8, 8); - answer += pop(v); - memcpy(&v, input + i + 16, 8); - answer += pop(v); - memcpy(&v, input + i + 24, 8); - answer += pop(v); - } - for (; i + 8 <= length; i += 8) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - } - for (; i + 1 <= length; i += 1) { - answer += input[i] >> 7; + static constexpr int kSmallStringThreshold = 128; + if (length <= kSmallStringThreshold) { + return sourceStr->Utf8LengthV2(isolate); } - return answer; + String::ValueView view(isolate, sourceStr); + auto data = reinterpret_cast(view.data16()); + if (simdutf::validate_utf16(data, length)) { + return simdutf::utf8_length_from_utf16(data, length); + } + return sourceStr->Utf8LengthV2(isolate); } static CFunction fast_byte_length_utf8(CFunction::Make(FastByteLengthUtf8)); @@ -1252,8 +1250,7 @@ static void IsAscii(const FunctionCallbackInfo& args) { env, "Cannot validate on a detached buffer"); } - args.GetReturnValue().Set( - !simdutf::validate_ascii_with_errors(abv.data(), abv.length()).error); + args.GetReturnValue().Set(simdutf::validate_ascii(abv.data(), abv.length())); } void SetBufferPrototype(const FunctionCallbackInfo& args) { From 753dbd0ae3f2aba53e392c1d38cd6cfcfaea52b9 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Thu, 5 Mar 2026 21:28:43 +0300 Subject: [PATCH 2/2] src: optimize utf-8 byte length calculation using simdutf --- src/node_buffer.cc | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 1f52cc5a2ef9f2..c681ecb675c982 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -764,16 +764,10 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo& args) { Isolate* isolate = args.GetIsolate(); Local source = args[0].As(); + static constexpr int kSmallStringThreshold = 128; int length = source->Length(); - if (source->IsOneByte()) { - args.GetReturnValue().Set( - static_cast(source->Utf8LengthV2(isolate))); - return; - } - - static constexpr int kSmallStringThreshold = 128; - if (length <= kSmallStringThreshold) { + if (length <= kSmallStringThreshold || source->IsOneByte()) { args.GetReturnValue().Set( static_cast(source->Utf8LengthV2(isolate))); return; @@ -800,14 +794,10 @@ uint32_t FastByteLengthUtf8( CHECK(sourceValue->IsString()); Local sourceStr = sourceValue.As(); + static constexpr int kSmallStringThreshold = 128; int length = sourceStr->Length(); - if (sourceStr->IsOneByte()) { - return sourceStr->Utf8LengthV2(isolate); - } - - static constexpr int kSmallStringThreshold = 128; - if (length <= kSmallStringThreshold) { + if (length <= kSmallStringThreshold || sourceStr->IsOneByte()) { return sourceStr->Utf8LengthV2(isolate); }