From 24079f4c1597ca7a85f1905ee068afaf58f7a0b5 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Wed, 5 Nov 2025 13:15:35 -0800 Subject: [PATCH] deps: float v8 patch for WriteUtf8V2 Address performance regression in WriteUtf8V2 Refs: https://chromium-review.googlesource.com/c/v8/v8/+/7124103 --- deps/v8/src/strings/unicode-inl.h | 22 +++++++++++++++++++++- deps/v8/src/strings/unicode.h | 10 ++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/deps/v8/src/strings/unicode-inl.h b/deps/v8/src/strings/unicode-inl.h index b210e18ebec6a6..3f317a32867e26 100644 --- a/deps/v8/src/strings/unicode-inl.h +++ b/deps/v8/src/strings/unicode-inl.h @@ -206,6 +206,17 @@ bool Utf8::IsValidCharacter(uchar c) { c != kBadChar); } +template <> +bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, size_t size) { + return simdutf::validate_ascii(reinterpret_cast(buffer), size); +} + +template <> +bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, size_t size) { + // Necessary for template instantiation. + UNREACHABLE(); +} + template Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, char* buffer, size_t capacity, @@ -221,8 +232,17 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, const Char* characters = string.begin(); size_t content_capacity = capacity - write_null; CHECK_LE(content_capacity, capacity); - uint16_t last = Utf16::kNoPreviousCharacter; size_t read_index = 0; + if (kSourceIsOneByte && string.size() > 0 && + Utf8::IsAsciiOneByteString(characters, + std::min(string.size(), content_capacity))) { + size_t to_write = std::min(string.size(), content_capacity); + // Just memcpy when possible. + memcpy(buffer, characters, to_write); + read_index = to_write; + write_index = to_write; + } + uint16_t last = Utf16::kNoPreviousCharacter; for (; read_index < string.size(); read_index++) { Char character = characters[read_index]; diff --git a/deps/v8/src/strings/unicode.h b/deps/v8/src/strings/unicode.h index ef1e717b1ea857..32a0b84a8399b2 100644 --- a/deps/v8/src/strings/unicode.h +++ b/deps/v8/src/strings/unicode.h @@ -212,6 +212,16 @@ class V8_EXPORT_PRIVATE Utf8 { // - valid code point range. static bool ValidateEncoding(const uint8_t* str, size_t length); + template + static bool IsAsciiOneByteString(const Char* buffer, size_t size); + + template <> + inline bool IsAsciiOneByteString(const uint8_t* buffer, size_t size); + + template <> + inline bool IsAsciiOneByteString(const uint16_t* buffer, + size_t size); + // Encode the given characters as Utf8 into the provided output buffer. struct EncodingResult { size_t bytes_written;