From ea1f127092ba82e88151872e185f5778c699f7e3 Mon Sep 17 00:00:00 2001 From: Dan Carney Date: Wed, 5 Nov 2025 15:19:09 +0000 Subject: [PATCH] deps: V8: cherry-pick 64b36b441179 Original commit message: optimize ascii fast path in WriteUtf8V2 Change-Id: If28168cb4395b953d0ec642ef4fc618ce963dbcd Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7124103 Reviewed-by: Toon Verwaest Commit-Queue: Erik Corry Reviewed-by: Erik Corry Cr-Commit-Position: refs/heads/main@{#103542} Refs: https://github.com/v8/v8/commit/64b36b44117949fe03df33d077117e7bd6257669 --- common.gypi | 2 +- deps/v8/src/strings/unicode-inl.h | 21 ++++++++++++++++++++- deps/v8/src/strings/unicode.h | 11 +++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/common.gypi b/common.gypi index 8a14bb85767b4f..a4825c5429d761 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.11', + 'v8_embedder_string': '-node.12', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/strings/unicode-inl.h b/deps/v8/src/strings/unicode-inl.h index b210e18ebec6a6..782ff2ab500ee1 100644 --- a/deps/v8/src/strings/unicode-inl.h +++ b/deps/v8/src/strings/unicode-inl.h @@ -206,6 +206,16 @@ bool Utf8::IsValidCharacter(uchar c) { c != kBadChar); } +template <> +bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, size_t size) { + return simdutf::validate_ascii(reinterpret_cast(buffer), size); +} + +template <> +bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, size_t size) { + return false; +} + template Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, char* buffer, size_t capacity, @@ -221,8 +231,17 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, const Char* characters = string.begin(); size_t content_capacity = capacity - write_null; CHECK_LE(content_capacity, capacity); - uint16_t last = Utf16::kNoPreviousCharacter; size_t read_index = 0; + if (kSourceIsOneByte) { + size_t writeable = std::min(string.size(), content_capacity); + // Just memcpy when possible. + if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) { + memcpy(buffer, characters, writeable); + read_index = writeable; + write_index = writeable; + } + } + uint16_t last = Utf16::kNoPreviousCharacter; for (; read_index < string.size(); read_index++) { Char character = characters[read_index]; diff --git a/deps/v8/src/strings/unicode.h b/deps/v8/src/strings/unicode.h index ef1e717b1ea857..e8e9cedceeadc9 100644 --- a/deps/v8/src/strings/unicode.h +++ b/deps/v8/src/strings/unicode.h @@ -212,6 +212,9 @@ class V8_EXPORT_PRIVATE Utf8 { // - valid code point range. static bool ValidateEncoding(const uint8_t* str, size_t length); + template + static bool IsAsciiOneByteString(const Char* buffer, size_t size); + // Encode the given characters as Utf8 into the provided output buffer. struct EncodingResult { size_t bytes_written; @@ -223,6 +226,14 @@ class V8_EXPORT_PRIVATE Utf8 { bool replace_invalid_utf8); }; +template <> +inline bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, + size_t size); + +template <> +inline bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, + size_t size); + #if V8_ENABLE_WEBASSEMBLY class V8_EXPORT_PRIVATE Wtf8 { public: