From e4d609cf1b28df72afeb8828744085785eca9e76 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Mon, 22 Dec 2025 15:15:32 +0000 Subject: [PATCH 01/32] Add new encoding tests and implementation --- include/cpp/encoding/ASCII.hpp | 24 +++ include/cpp/encoding/UTF16.hpp | 21 +++ include/cpp/encoding/UTF8.hpp | 35 ++++ include/cpp/marshal/View.hpp | 2 +- src/cpp/encoding/ASCII.cpp | 66 +++++++ src/cpp/encoding/UTF16.cpp | 201 ++++++++++++++++++++++ src/cpp/encoding/UTF8.cpp | 219 ++++++++++++++++++++++++ test/native/Native.hx | 6 +- test/native/tests/encoding/TestAscii.hx | 86 ++++++++++ test/native/tests/encoding/TestUtf16.hx | 180 +++++++++++++++++++ test/native/tests/encoding/TestUtf8.hx | 157 +++++++++++++++++ toolchain/haxe-target.xml | 7 + 12 files changed, 1002 insertions(+), 2 deletions(-) create mode 100644 include/cpp/encoding/ASCII.hpp create mode 100644 include/cpp/encoding/UTF16.hpp create mode 100644 include/cpp/encoding/UTF8.hpp create mode 100644 src/cpp/encoding/ASCII.cpp create mode 100644 src/cpp/encoding/UTF16.cpp create mode 100644 src/cpp/encoding/UTF8.cpp create mode 100644 test/native/tests/encoding/TestAscii.hx create mode 100644 test/native/tests/encoding/TestUtf16.hx create mode 100644 test/native/tests/encoding/TestUtf8.hx diff --git a/include/cpp/encoding/ASCII.hpp b/include/cpp/encoding/ASCII.hpp new file mode 100644 index 000000000..0d89fa63c --- /dev/null +++ b/include/cpp/encoding/ASCII.hpp @@ -0,0 +1,24 @@ +#pragma once + +namespace cpp +{ + namespace encoding + { + struct Ascii final + { + static bool isEncoded(const String& string); + + /// + /// Encode the provided string to ASCII bytes and write them to the buffer. + /// If the provided string is UTF16 encoded an exception is raised and nothing is written to the buffer. + /// + /// Number of chars written to the buffer. + static int64_t encode(const String& string, cpp::marshal::View buffer); + + /// + /// Create a string from the provided ASCII bytes. + /// + static String decode(cpp::marshal::View string); + }; + } +} \ No newline at end of file diff --git a/include/cpp/encoding/UTF16.hpp b/include/cpp/encoding/UTF16.hpp new file mode 100644 index 000000000..9bb3bcc36 --- /dev/null +++ b/include/cpp/encoding/UTF16.hpp @@ -0,0 +1,21 @@ +#pragma once + +namespace cpp +{ + namespace encoding + { + struct Utf16 final + { + static bool isEncoded(const String& string); + + static int32_t getByteCount(const char32_t& codepoint); + static int64_t getByteCount(const String& string); + + static int64_t encode(const String& string, cpp::marshal::View buffer); + static int64_t encode(const char32_t& codepoint, cpp::marshal::View buffer); + + static String decode(cpp::marshal::View buffer); + static int64_t decode(cpp::marshal::View buffer, char32_t& out); + }; + } +} \ No newline at end of file diff --git a/include/cpp/encoding/UTF8.hpp b/include/cpp/encoding/UTF8.hpp new file mode 100644 index 000000000..db06a9251 --- /dev/null +++ b/include/cpp/encoding/UTF8.hpp @@ -0,0 +1,35 @@ +#pragma once + +namespace cpp +{ + namespace encoding + { + struct Utf8 final + { + /// + /// Returns the number of bytes required to store the codepoint in it's UTF8 form. + /// + static int64_t getByteCount(const char32_t& codepoint); + + /// + /// Returns the number of bytes required to store the string in it's UTF8 form. + /// + static int64_t getByteCount(const String& string); + + /// + /// Writes the provided string in it's UTF8 form to the buffer. + /// + /// Number of byte written into the buffer + static int64_t encode(const String& string, cpp::marshal::View buffer); + + /// + /// Writes the provided code point in it's UTF8 form to the buffer. + /// + /// Number of byte written into the buffer + static int64_t encode(const char32_t& codepoint, cpp::marshal::View buffer); + + static String decode(cpp::marshal::View buffer); + static int64_t decode(cpp::marshal::View buffer, char32_t& out); + }; + } +} \ No newline at end of file diff --git a/include/cpp/marshal/View.hpp b/include/cpp/marshal/View.hpp index 6b2ad2d1f..f62e6ad07 100644 --- a/include/cpp/marshal/View.hpp +++ b/include/cpp/marshal/View.hpp @@ -67,7 +67,7 @@ template template inline cpp::marshal::View cpp::marshal::View::reinterpret() { - auto newPtr = ::cpp::Pointer{ ptr.reinterpret() }; + auto newPtr = ::cpp::Pointer(reinterpret_cast(ptr.ptr)); auto fromSize = sizeof(T); auto toSize = sizeof(K); diff --git a/src/cpp/encoding/ASCII.cpp b/src/cpp/encoding/ASCII.cpp new file mode 100644 index 000000000..3115e19cd --- /dev/null +++ b/src/cpp/encoding/ASCII.cpp @@ -0,0 +1,66 @@ +#include + +using namespace cpp::marshal; + +bool cpp::encoding::Ascii::isEncoded(const String& string) +{ + if (null() == string) + { + hx::NullReference("String", false); + } + + return string.isAsciiEncoded(); +} + +int64_t cpp::encoding::Ascii::encode(const String& string, View buffer) +{ + if (hx::IsNull(string)) + { + hx::NullReference("String", false); + } + + if (string.isUTF16Encoded()) + { + hx::Throw(HX_CSTRING("String cannot be encoded to ASCII")); + } + + auto src = cpp::marshal::View(string.raw_ptr(), string.length).reinterpret(); + + if (src.tryCopyTo(buffer)) + { + return src.length; + } + else + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } +} + +String cpp::encoding::Ascii::decode(View view) +{ + if (view.isEmpty()) + { + return hx::Throw(HX_CSTRING("View is empty")); + } + + auto bytes = int64_t{ 0 }; + auto i = int64_t{ 0 }; + auto chars = view.reinterpret(); + + while (i < chars.length && 0 != chars.ptr[i]) + { + bytes += sizeof(char); + i++; + } + + if (0 == bytes) + { + return String::emptyString; + } + + auto backing = hx::NewGCPrivate(0, bytes + sizeof(char)); + + std::memcpy(backing, view.ptr.ptr, bytes); + + return String(static_cast(backing), bytes / sizeof(char)); +} diff --git a/src/cpp/encoding/UTF16.cpp b/src/cpp/encoding/UTF16.cpp new file mode 100644 index 000000000..743629301 --- /dev/null +++ b/src/cpp/encoding/UTF16.cpp @@ -0,0 +1,201 @@ +#include +#include + +using namespace cpp::marshal; + +namespace +{ + bool isSurrogate(char32_t codepoint) + { + return codepoint >= 0xd800 && codepoint < 0xe000; + } +} + +bool cpp::encoding::Utf16::isEncoded(const String& string) +{ + if (hx::IsNull(string)) + { + hx::NullReference("String", false); + } + + return string.isUTF16Encoded(); +} + +int32_t cpp::encoding::Utf16::getByteCount(const char32_t& codepoint) +{ + if (codepoint >= 0x10000) + { + if (codepoint < 0x110000) + { + return 4; + } + } + + return 2; +} + +int64_t cpp::encoding::Utf16::getByteCount(const String& string) +{ + if (hx::IsNull(string)) + { + hx::NullReference("String", false); + } + + if (string.isUTF16Encoded()) + { + return string.length * sizeof(char16_t); + } + else + { + auto bytes = int64_t{ 0 }; + for (auto i = 0; i < string.length; i++) + { + bytes += getByteCount(static_cast(string.raw_ptr()[i])); + } + + return bytes; + } +} + +int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) +{ + if (hx::IsNull(string)) + { + hx::NullReference("String", false); + } + + if (0 == string.length) + { + return 0; + } + + if (buffer.isEmpty()) + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + + if (string.isUTF16Encoded()) + { + auto src = cpp::marshal::View(reinterpret_cast(const_cast(string.raw_wptr())), string.length * sizeof(char16_t)); + + if (src.tryCopyTo(buffer)) + { + return src.length; + } + else + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + } + else + { + auto bytes = int64_t{ 0 }; + for (auto i = 0; i < string.length; i++) + { + bytes += getByteCount(static_cast(string.raw_ptr()[i])); + } + + if (bytes > buffer.length) + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + + for (auto i = 0; i < string.length; i++) + { + buffer = buffer.slice(encode(static_cast(string.raw_ptr()[i]), buffer)); + } + + return bytes; + } +} + +int64_t cpp::encoding::Utf16::encode(const char32_t& codepoint, cpp::marshal::View buffer) +{ + if (codepoint >= 0x10000) + { + auto over = codepoint - 0x10000; + if (over >= 0x10000) + { + Marshal::writeUInt16(buffer, 0xFFFD); + + return 2; + } + else + { + auto staging = std::array(); + staging[0] = (over >> 10) + 0xD800; + staging[1] = (over & 0x3FF) + 0xDC00; + + Marshal::writeUInt32(buffer, *reinterpret_cast(staging.data())); + + return 4; + } + } + else if (isSurrogate(codepoint)) + { + Marshal::writeUInt16(buffer, 0xFFFD); + + return 2; + } + else + { + Marshal::writeUInt16(buffer, static_cast(codepoint)); + + return 2; + } +} + +String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) +{ + if (buffer.isEmpty()) + { + return hx::Throw(HX_CSTRING("View empty")); + } + + auto bytes = int64_t{ 0 }; + auto codepoint = char32_t{ 0 }; + auto i = int64_t{ 0 }; + + while (i < buffer.length) + { + i += decode(buffer.slice(i), codepoint); + bytes += getByteCount(codepoint); + } + + auto backing = static_cast(hx::NewGCPrivate(0, bytes + sizeof(char16_t))); + auto output = View(backing, bytes); + + while (false == buffer.isEmpty()) + { + buffer = buffer.slice(decode(buffer, codepoint)); + output = output.slice(encode(codepoint, output)); + } + + reinterpret_cast(backing)[-1] |= HX_GC_STRING_CHAR16_T; + + return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); +} + +int64_t cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_t& codepoint) +{ + auto first = static_cast(Marshal::readUInt16(buffer)); + + if (0xD800 <= first && first < 0xDc00) + { + auto second = static_cast(Marshal::readUInt16(buffer.slice(2))); + if (0xDC00 <= second && second < 0xE000) + { + codepoint = ((((first - 0xD800) << 10) | (second - 0xDC00)) + 0x10000); + + return 4; + } + + return hx::Throw(HX_CSTRING("Invalid UTF16")); + } + else + { + codepoint = first; + + return 2; + } +} diff --git a/src/cpp/encoding/UTF8.cpp b/src/cpp/encoding/UTF8.cpp new file mode 100644 index 000000000..dc13c5c27 --- /dev/null +++ b/src/cpp/encoding/UTF8.cpp @@ -0,0 +1,219 @@ +#include + +using namespace cpp::marshal; + +int64_t cpp::encoding::Utf8::getByteCount(const char32_t& codepoint) +{ + if (codepoint <= 0x7F) + { + return 1; + } + else if (codepoint <= 0x7FF) + { + return 2; + } + else if (codepoint <= 0xFFFF) + { + return 3; + } + else + { + return 4; + } +} + +int64_t cpp::encoding::Utf8::getByteCount(const String& string) +{ + if (hx::IsNull(string)) + { + hx::NullReference("String", false); + } + + if (string.isAsciiEncoded()) + { + return string.length; + } + +#if defined(HX_SMART_STRINGS) + auto source = View(string.raw_wptr(), string.length).reinterpret(); + auto length = source.length; + auto codepoint = char32_t{ 0 }; + auto bytes = int64_t{ 0 }; + + while (false == source.isEmpty()) + { + source = source.slice(Utf16::decode(source, codepoint)); + bytes += getByteCount(codepoint); + } + + return bytes; +#else + return hx::Throw(HX_CSTRING("Unexpected encoding error")); +#endif +} + +int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) +{ + if (hx::IsNull(string)) + { + hx::NullReference("String", false); + } + + if (0 == string.length) + { + return 0; + } + + if (buffer.isEmpty()) + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + + if (string.isAsciiEncoded()) + { + auto src = cpp::marshal::View(reinterpret_cast(const_cast(string.raw_ptr())), string.length * sizeof(char)); + + if (src.tryCopyTo(buffer)) + { + return src.length; + } + else + { + return hx::Throw(HX_CSTRING("Buffer too small")); + } + } + +#if defined(HX_SMART_STRINGS) + if (getByteCount(string) > buffer.length) + { + hx::Throw(HX_CSTRING("Buffer too small")); + } + + auto initialPtr = buffer.ptr.ptr; + auto source = View(string.raw_wptr(), string.length).reinterpret(); + auto codepoint = char32_t{ 0 }; + + while (false == source.isEmpty()) + { + source = source.slice(Utf16::decode(source, codepoint)); + buffer = buffer.slice(encode(codepoint, buffer)); + } + + return buffer.ptr.ptr - initialPtr; +#else + return hx::Throw(HX_CSTRING("Unexpected encoding error")); +#endif +} + +int64_t cpp::encoding::Utf8::encode(const char32_t& codepoint, cpp::marshal::View buffer) +{ + if (getByteCount(codepoint) > buffer.length) + { + hx::Throw(HX_CSTRING("Buffer too small")); + } + + if (codepoint <= 0x7F) + { + buffer.ptr[0] = codepoint; + + return 1; + } + else if (codepoint <= 0x7FF) + { + buffer.ptr[0] = (0xC0 | (codepoint >> 6)); + buffer.ptr[1] = (0x80 | (codepoint & 63)); + + return 2; + } + else if (codepoint <= 0xFFFF) + { + buffer.ptr[0] = (0xE0 | (codepoint >> 12)); + buffer.ptr[1] = (0x80 | ((codepoint >> 6) & 63)); + buffer.ptr[2] = (0x80 | (codepoint & 63)); + + return 3; + } + else + { + buffer.ptr[0] = (0xF0 | (codepoint >> 18)); + buffer.ptr[1] = (0x80 | ((codepoint >> 12) & 63)); + buffer.ptr[2] = (0x80 | ((codepoint >> 6) & 63)); + buffer.ptr[3] = (0x80 | (codepoint & 63)); + + return 4; + } +} + +String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) +{ + if (buffer.isEmpty()) + { + return hx::Throw(HX_CSTRING("View empty")); + } + + auto bytes = int64_t{ 0 }; + auto codepoint = char32_t{ 0 }; + auto i = int64_t{ 0 }; + + while (i < buffer.length) + { + i += decode(buffer.slice(i), codepoint); + bytes += Utf16::getByteCount(codepoint); + } + + auto backing = static_cast(hx::NewGCPrivate(0, bytes + sizeof(char16_t))); + auto output = View(backing, bytes); + + while (false == buffer.isEmpty()) + { + buffer = buffer.slice(decode(buffer, codepoint)); + output = output.slice(Utf16::encode(codepoint, output)); + } + + reinterpret_cast(backing)[-1] |= HX_GC_STRING_CHAR16_T; + + return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); +} + +int64_t cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& codepoint) +{ + if (0 == buffer.length) + { + return hx::Throw(HX_CSTRING("Empty view")); + } + + auto b0 = buffer[0]; + + if ((b0 & 0x80) == 0) + { + codepoint = b0; + + return 1; + } + else if ((b0 & 0xE0) == 0xC0) + { + codepoint = (static_cast(b0 & 0x1F) << 6) | static_cast(buffer[1] & 0x3F); + + return 2; + } + else if ((b0 & 0xF0) == 0xE0) + { + codepoint = (static_cast(b0 & 0x0F) << 12) | (static_cast(buffer[1] & 0x3F) << 6) | static_cast(buffer[2] & 0x3F); + + return 3; + } + else if ((b0 & 0xF8) == 0xF0) + { + codepoint = + (static_cast(b0 & 0x07) << 18) | + (static_cast(buffer[1] & 0x3F) << 12) | + (static_cast(buffer[2] & 0x3F) << 6) | + static_cast(buffer[3] & 0x3F); + + return 4; + } + else + { + return hx::Throw(HX_CSTRING("Failed to read codepoint")); + } +} \ No newline at end of file diff --git a/test/native/Native.hx b/test/native/Native.hx index 0c8cb9506..d82f41a19 100644 --- a/test/native/Native.hx +++ b/test/native/Native.hx @@ -45,7 +45,11 @@ class Native new tests.marshalling.view.TestView(), new tests.marshalling.view.TestMarshal(), - new tests.marshalling.view.TestViewExtensions() + new tests.marshalling.view.TestViewExtensions(), + + new tests.encoding.TestAscii(), + new tests.encoding.TestUtf8(), + new tests.encoding.TestUtf16(), #end ]); } diff --git a/test/native/tests/encoding/TestAscii.hx b/test/native/tests/encoding/TestAscii.hx new file mode 100644 index 000000000..08d6b9df4 --- /dev/null +++ b/test/native/tests/encoding/TestAscii.hx @@ -0,0 +1,86 @@ +package tests.encoding; + +import haxe.io.Bytes; +import cpp.encoding.Ascii; +import utest.Assert; +import utest.Test; + +using cpp.marshal.ViewExtensions; + +class TestAscii extends Test +{ + function test_isEncoded_null() { + Assert.raises(() -> Ascii.isEncoded(null)); + } + + function test_isEncoded_ascii() { + Assert.isTrue(Ascii.isEncoded("test")); + } + + function test_isEncoded_utf16() { + Assert.isFalse(Ascii.isEncoded("πŸ˜‚")); + } + + function test_encode_null() { + final buffer = Bytes.alloc(4); + + Assert.raises(() -> Ascii.encode(null, buffer.asView())); + } + + function test_encode_small_buffer() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Ascii.encode("test", buffer.asView())); + } + + function test_encode_utf16() { + final buffer = Bytes.alloc(1024); + + Assert.raises(() -> Ascii.encode("πŸ˜‚", buffer.asView())); + } + + function test_encode() { + final buffer = Bytes.alloc(1024); + + Assert.equals(4i64, Ascii.encode("test", buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals('e'.code, buffer.get(1)); + Assert.equals('s'.code, buffer.get(2)); + Assert.equals('t'.code, buffer.get(3)); + } + + function test_decode_empty() { + Assert.raises(() -> Ascii.decode(ViewExtensions.empty())); + } + + function test_decode() { + final buffer = Bytes.alloc(4); + buffer.set(0, 't'.code); + buffer.set(1, 'e'.code); + buffer.set(2, 's'.code); + buffer.set(3, 't'.code); + + Assert.equals('test', Ascii.decode(buffer.asView())); + } + + function test_decode_null_termination() { + final buffer = Bytes.alloc(9); + buffer.set(0, 't'.code); + buffer.set(1, 'e'.code); + buffer.set(2, 's'.code); + buffer.set(3, 't'.code); + buffer.set(4, 0); + buffer.set(5, 't'.code); + buffer.set(6, 'e'.code); + buffer.set(7, 's'.code); + buffer.set(8, 't'.code); + + Assert.equals('test', Ascii.decode(buffer.asView())); + } + + function test_decode_no_string() { + final buffer = Bytes.alloc(1); + + Assert.equals('', Ascii.decode(buffer.asView())); + } +} \ No newline at end of file diff --git a/test/native/tests/encoding/TestUtf16.hx b/test/native/tests/encoding/TestUtf16.hx new file mode 100644 index 000000000..c0e27589a --- /dev/null +++ b/test/native/tests/encoding/TestUtf16.hx @@ -0,0 +1,180 @@ +package tests.encoding; + +import haxe.io.Bytes; +import cpp.encoding.Utf16; +import utest.Assert; +import utest.Test; + +using cpp.marshal.ViewExtensions; + +class TestUtf16 extends Test { + function test_isEncoded_null() { + Assert.raises(() -> Utf16.isEncoded(null)); + } + + function test_isEncoded_ascii() { + Assert.isFalse(Utf16.isEncoded("test")); + } + + function test_isEncoded_utf16() { + Assert.isTrue(Utf16.isEncoded("πŸ˜‚")); + } + + public function test_getByteCount_codepoint() { + Assert.equals(2i64, Utf16.getByteCount('a'.code)); + Assert.equals(2i64, Utf16.getByteCount('Ζ…'.code)); + Assert.equals(2i64, Utf16.getByteCount('バ'.code)); + Assert.equals(4i64, Utf16.getByteCount('𝄳'.code)); + Assert.equals(4i64, Utf16.getByteCount('πŸ˜‚'.code)); + } + + public function test_getByteCount_string_null() { + Assert.raises(() -> Utf16.getByteCount((null:String))); + } + + public function test_getByteCount_string_empty() { + Assert.equals(0i64, Utf16.getByteCount('')); + } + + public function test_getByteCount_string_ascii() { + Assert.equals(26i64, Utf16.getByteCount('Hello, World!')); + } + + public function test_getByteCount_string_utf16() { + Assert.equals(26i64, Utf16.getByteCount('HelloπŸ˜‚World!')); + } + + public function test_encode_codepoint() { + final buffer = Bytes.alloc(4); + + Assert.equals(2i64, Utf16.encode('a'.code, buffer.asView())); + Assert.equals(0x61, buffer.get(0)); + Assert.equals(0x00, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(2i64, Utf16.encode('Ζ…'.code, buffer.asView())); + Assert.equals(0x85, buffer.get(0)); + Assert.equals(0x01, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(2i64, Utf16.encode('バ'.code, buffer.asView())); + Assert.equals(0xD0, buffer.get(0)); + Assert.equals(0x30, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(4i64, Utf16.encode('𝄳'.code, buffer.asView())); + Assert.equals(0x34, buffer.get(0)); + Assert.equals(0xD8, buffer.get(1)); + Assert.equals(0x33, buffer.get(2)); + Assert.equals(0xDD, buffer.get(3)); + buffer.asView().clear(); + } + + public function test_encode_codepoint_empty_view() { + Assert.raises(() -> Utf16.encode('a'.code, ViewExtensions.empty())); + } + + public function test_encode_codepoint_no_partial_writes() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf16.encode('𝄳'.code, buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_null() { + final buffer = Bytes.alloc(8); + + Assert.raises(() -> Utf16.encode((null:String), buffer.asView())); + } + + public function test_encode_string_empty_view() { + Assert.raises(() -> Utf16.encode('test', ViewExtensions.empty())); + } + + public function test_encode_string_empty_string() { + final buffer = Bytes.alloc(8); + + Assert.equals(0i64, Utf16.encode('', buffer.asView())); + } + + public function test_encode_string_small_buffer() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf16.encode('test', buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_ascii() { + final buffer = Bytes.alloc(8); + + Assert.equals(8i64, Utf16.encode('test', buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + Assert.equals('e'.code, buffer.get(2)); + Assert.equals(0, buffer.get(3)); + Assert.equals('s'.code, buffer.get(4)); + Assert.equals(0, buffer.get(5)); + Assert.equals('t'.code, buffer.get(6)); + Assert.equals(0, buffer.get(7)); + } + + public function test_encode_string_utf16() { + final buffer = Bytes.alloc(16); + + Assert.equals(12i64, Utf16.encode('teπŸ˜‚st', buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + Assert.equals('e'.code, buffer.get(2)); + Assert.equals(0, buffer.get(3)); + + Assert.equals(0x3D, buffer.get(4)); + Assert.equals(0xD8, buffer.get(5)); + Assert.equals(0x02, buffer.get(6)); + Assert.equals(0xDE, buffer.get(7)); + + Assert.equals('s'.code, buffer.get(8)); + Assert.equals(0, buffer.get(9)); + Assert.equals('t'.code, buffer.get(10)); + Assert.equals(0, buffer.get(11)); + } + + public function test_decode_codepoint() { + var codepoint : cpp.Char32 = 0; + + var bytes = Bytes.ofHex('6100'); + Assert.equals(2i64, Utf16.decode(bytes.asView(), codepoint)); + Assert.equals('a'.code, cast codepoint); + + var bytes = Bytes.ofHex('8501'); + Assert.equals(2i64, Utf16.decode(bytes.asView(), codepoint)); + Assert.equals('Ζ…'.code, cast codepoint); + + var bytes = Bytes.ofHex('D030'); + Assert.equals(2i64, Utf16.decode(bytes.asView(), codepoint)); + Assert.equals('バ'.code, cast codepoint); + + var bytes = Bytes.ofHex('34D833DD'); + Assert.equals(4i64, Utf16.decode(bytes.asView(), codepoint)); + Assert.equals('𝄳'.code, cast codepoint); + } + + public function test_decode_string() { + var bytes = Bytes.ofHex('6100'); + Assert.equals('a', Utf16.decode(bytes.asView())); + + var bytes = Bytes.ofHex('8501'); + Assert.equals('Ζ…', Utf16.decode(bytes.asView())); + + var bytes = Bytes.ofHex('D030'); + Assert.equals('バ', Utf16.decode(bytes.asView())); + + var bytes = Bytes.ofHex('34D833DD'); + Assert.equals('𝄳', Utf16.decode(bytes.asView())); + } + + public function test_decode_empty_view() { + Assert.raises(() -> Utf16.decode(ViewExtensions.empty())); + } +} \ No newline at end of file diff --git a/test/native/tests/encoding/TestUtf8.hx b/test/native/tests/encoding/TestUtf8.hx new file mode 100644 index 000000000..e03f312a9 --- /dev/null +++ b/test/native/tests/encoding/TestUtf8.hx @@ -0,0 +1,157 @@ +package tests.encoding; + +import haxe.io.Bytes; +import cpp.encoding.Utf8; +import utest.Assert; +import utest.Test; + +using cpp.marshal.ViewExtensions; + +class TestUtf8 extends Test { + public function test_getByteCount_codepoint() { + Assert.equals(1i64, Utf8.getByteCount('a'.code)); + Assert.equals(2i64, Utf8.getByteCount('Ζ…'.code)); + Assert.equals(3i64, Utf8.getByteCount('バ'.code)); + Assert.equals(4i64, Utf8.getByteCount('𝄳'.code)); + } + + public function test_getByteCount_string_null() { + Assert.raises(() -> Utf8.getByteCount((null:String))); + } + + public function test_getByteCount_string_empty() { + Assert.equals(0i64, Utf8.getByteCount('')); + } + + public function test_getByteCount_string_ascii() { + Assert.equals(13i64, Utf8.getByteCount('Hello, World!')); + } + + public function test_getByteCount_string_utf16() { + Assert.equals(15i64, Utf8.getByteCount('HelloπŸ˜‚World!')); + } + + public function test_encode_codepoint() { + final buffer = Bytes.alloc(4); + + Assert.equals(1i64, Utf8.encode('a'.code, buffer.asView())); + Assert.equals(0x61, buffer.get(0)); + buffer.asView().clear(); + + Assert.equals(2i64, Utf8.encode('Ζ…'.code, buffer.asView())); + Assert.equals(0xC6, buffer.get(0)); + Assert.equals(0x85, buffer.get(1)); + buffer.asView().clear(); + + Assert.equals(3i64, Utf8.encode('バ'.code, buffer.asView())); + Assert.equals(0xE3, buffer.get(0)); + Assert.equals(0x83, buffer.get(1)); + Assert.equals(0x90, buffer.get(2)); + buffer.asView().clear(); + + Assert.equals(4i64, Utf8.encode('𝄳'.code, buffer.asView())); + Assert.equals(0xF0, buffer.get(0)); + Assert.equals(0x9D, buffer.get(1)); + Assert.equals(0x84, buffer.get(2)); + Assert.equals(0xB3, buffer.get(3)); + buffer.asView().clear(); + } + + public function test_encode_codepoint_empty_view() { + Assert.raises(() -> Utf8.encode('a'.code, ViewExtensions.empty())); + } + + public function test_encode_codepoint_no_partial_writes() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf8.encode('𝄳'.code, buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_null() { + final buffer = Bytes.alloc(8); + + Assert.raises(() -> Utf8.encode((null:String), buffer.asView())); + } + + public function test_encode_string_empty_view() { + Assert.raises(() -> Utf8.encode('test', ViewExtensions.empty())); + } + + public function test_encode_string_empty_string() { + final buffer = Bytes.alloc(8); + + Assert.equals(0i64, Utf8.encode('', buffer.asView())); + } + + public function test_encode_string_small_buffer() { + final buffer = Bytes.alloc(2); + + Assert.raises(() -> Utf8.encode('test', buffer.asView())); + Assert.equals(0, buffer.get(0)); + Assert.equals(0, buffer.get(1)); + } + + public function test_encode_string_ascii() { + final buffer = Bytes.alloc(4); + + Assert.equals(4i64, Utf8.encode('test', buffer.asView())); + Assert.equals('t'.code, buffer.get(0)); + Assert.equals('e'.code, buffer.get(1)); + Assert.equals('s'.code, buffer.get(2)); + Assert.equals('t'.code, buffer.get(3)); + } + + public function test_encode_string_utf16() { + final buffer = Bytes.alloc(8); + + Assert.equals(8i64, Utf8.encode('teπŸ˜‚st', buffer.asView())); + Assert.equals(0x74, buffer.get(0)); + Assert.equals(0x65, buffer.get(1)); + Assert.equals(0xF0, buffer.get(2)); + Assert.equals(0x9F, buffer.get(3)); + Assert.equals(0x98, buffer.get(4)); + Assert.equals(0x82, buffer.get(5)); + Assert.equals(0x73, buffer.get(6)); + Assert.equals(0x74, buffer.get(7)); + } + + public function test_decode_codepoint() { + var codepoint : cpp.Char32 = 0; + + var bytes = Bytes.ofHex('61'); + Assert.equals(1i64, Utf8.decode(bytes.asView(), codepoint)); + Assert.equals('a'.code, cast codepoint); + + var bytes = Bytes.ofHex('c685'); + Assert.equals(2i64, Utf8.decode(bytes.asView(), codepoint)); + Assert.equals('Ζ…'.code, cast codepoint); + + var bytes = Bytes.ofHex('e38390'); + Assert.equals(3i64, Utf8.decode(bytes.asView(), codepoint)); + Assert.equals('バ'.code, cast codepoint); + + var bytes = Bytes.ofHex('f09d84b3'); + Assert.equals(4i64, Utf8.decode(bytes.asView(), codepoint)); + Assert.equals('𝄳'.code, cast codepoint); + } + + public function test_decode_string() { + var bytes = Bytes.ofHex('61'); + Assert.equals('a', Utf8.decode(bytes.asView())); + + var bytes = Bytes.ofHex('c685'); + Assert.equals('Ζ…', Utf8.decode(bytes.asView())); + + var bytes = Bytes.ofHex('e38390'); + Assert.equals('バ', Utf8.decode(bytes.asView())); + + var bytes = Bytes.ofHex('f09d84b3'); + Assert.equals('𝄳', Utf8.decode(bytes.asView())); + } + + public function test_decode_empty_view() { + Assert.raises(() -> Utf8.decode(ViewExtensions.empty())); + } +} \ No newline at end of file diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index 8d7362e12..d7bc6cf42 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -68,6 +68,9 @@ + + + @@ -199,6 +202,10 @@ + + + + From 7bc2d4ee4bb0a8fbd15e346fcf4774565a4d2225 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Mon, 22 Dec 2025 22:09:39 +0000 Subject: [PATCH 02/32] Remove old marshal stuff and return an empty string instead of throwing --- include/cpp/marshal/Definitions.inc | 13 +- include/cpp/marshal/Marshal.hpp | 66 ++--- src/cpp/encoding/ASCII.cpp | 2 +- src/cpp/encoding/UTF16.cpp | 18 +- src/cpp/encoding/UTF8.cpp | 27 +- test/native/cpp/encoding/Ascii.hx | 15 ++ test/native/cpp/encoding/Utf16.hx | 22 ++ test/native/cpp/encoding/Utf8.hx | 20 ++ .../tests/marshalling/view/TestMarshal.hx | 234 ++---------------- 9 files changed, 128 insertions(+), 289 deletions(-) create mode 100644 test/native/cpp/encoding/Ascii.hx create mode 100644 test/native/cpp/encoding/Utf16.hx create mode 100644 test/native/cpp/encoding/Utf8.hx diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index d4bdc0d00..575e044a5 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -224,17 +224,8 @@ namespace cpp static const bool isBigEndian = false; #endif - static View asView(const char* cstring); - static View asView(const char16_t* cstring); - - static View toCharView(const ::String& string); - static int toCharView(const ::String&, View buffer); - - static View toWideCharView(const ::String& string); - static int toWideCharView(const ::String& string, View buffer); - - static ::String toString(View buffer); - static ::String toString(View buffer); + static View asCharView(const ::String& string); + static View asWideCharView(const ::String& string); template static T read(View view); template static ::cpp::Pointer readPointer(View view); diff --git a/include/cpp/marshal/Marshal.hpp b/include/cpp/marshal/Marshal.hpp index 997cf5091..585c72ba8 100644 --- a/include/cpp/marshal/Marshal.hpp +++ b/include/cpp/marshal/Marshal.hpp @@ -18,72 +18,34 @@ namespace } } -inline cpp::marshal::View cpp::marshal::Marshal::asView(const char* cstring) +inline cpp::marshal::View cpp::marshal::Marshal::asCharView(const ::String& string) { - return cpp::marshal::View(const_cast(cstring), static_cast(std::char_traits::length(cstring))); -} - -inline cpp::marshal::View cpp::marshal::Marshal::asView(const char16_t* cstring) -{ - return cpp::marshal::View(const_cast(cstring), static_cast(std::char_traits::length(cstring))); -} - -inline cpp::marshal::View cpp::marshal::Marshal::toCharView(const ::String& string) -{ - auto length = 0; - auto ptr = string.utf8_str(nullptr, true, &length); - - return View(const_cast(ptr), length + 1); -} - -inline int cpp::marshal::Marshal::toCharView(const ::String& string, View buffer) -{ - auto length = 0; - - if (string.utf8_str(buffer, &length)) + if (null() == string) { - return length; + hx::NullReference("string", false); } - else - { - hx::Throw(HX_CSTRING("Not enough space in the view to write the string")); - return 0; + if (false == string.isAsciiEncoded()) + { + hx::Throw(HX_CSTRING("String is not ASCII encoded")); } -} - -inline cpp::marshal::View cpp::marshal::Marshal::toWideCharView(const ::String& string) -{ - auto length = 0; - auto ptr = string.wc_str(nullptr, &length); - return View(const_cast(ptr), length + 1); + return View(const_cast(string.raw_ptr()), string.length); } -inline int cpp::marshal::Marshal::toWideCharView(const ::String& string, View buffer) +inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const ::String& string) { - auto length = 0; - - if (string.wc_str(buffer, &length)) + if (null() == string) { - return length; + hx::NullReference("string", false); } - else - { - hx::Throw(HX_CSTRING("Not enough space in the view to write the string")); - return 0; + if (false == string.isUTF16Encoded()) + { + hx::Throw(HX_CSTRING("String is not ASCII encoded")); } -} - -inline ::String cpp::marshal::Marshal::toString(View buffer) -{ - return ::String::create(buffer); -} -inline ::String cpp::marshal::Marshal::toString(View buffer) -{ - return ::String::create(buffer); + return View(const_cast(string.raw_wptr()), string.length); } template diff --git a/src/cpp/encoding/ASCII.cpp b/src/cpp/encoding/ASCII.cpp index 3115e19cd..7a0acd8bf 100644 --- a/src/cpp/encoding/ASCII.cpp +++ b/src/cpp/encoding/ASCII.cpp @@ -14,7 +14,7 @@ bool cpp::encoding::Ascii::isEncoded(const String& string) int64_t cpp::encoding::Ascii::encode(const String& string, View buffer) { - if (hx::IsNull(string)) + if (null() == string) { hx::NullReference("String", false); } diff --git a/src/cpp/encoding/UTF16.cpp b/src/cpp/encoding/UTF16.cpp index 743629301..9646e32d0 100644 --- a/src/cpp/encoding/UTF16.cpp +++ b/src/cpp/encoding/UTF16.cpp @@ -9,11 +9,21 @@ namespace { return codepoint >= 0xd800 && codepoint < 0xe000; } + + bool isLowSurrogate(char32_t codepoint) + { + return codepoint >= 0xdc00 && codepoint < 0xe000; + } + + bool isHighSurrogate(char32_t codepoint) + { + return codepoint >= 0xd800 && codepoint < 0xdc00; + } } bool cpp::encoding::Utf16::isEncoded(const String& string) { - if (hx::IsNull(string)) + if (null() == string) { hx::NullReference("String", false); } @@ -36,7 +46,7 @@ int32_t cpp::encoding::Utf16::getByteCount(const char32_t& codepoint) int64_t cpp::encoding::Utf16::getByteCount(const String& string) { - if (hx::IsNull(string)) + if (null() == string) { hx::NullReference("String", false); } @@ -59,7 +69,7 @@ int64_t cpp::encoding::Utf16::getByteCount(const String& string) int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) { - if (hx::IsNull(string)) + if (null() == string) { hx::NullReference("String", false); } @@ -149,7 +159,7 @@ String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) { if (buffer.isEmpty()) { - return hx::Throw(HX_CSTRING("View empty")); + return String::emptyString; } auto bytes = int64_t{ 0 }; diff --git a/src/cpp/encoding/UTF8.cpp b/src/cpp/encoding/UTF8.cpp index dc13c5c27..841a665e9 100644 --- a/src/cpp/encoding/UTF8.cpp +++ b/src/cpp/encoding/UTF8.cpp @@ -2,6 +2,22 @@ using namespace cpp::marshal; +namespace +{ + bool isAsciiBuffer(View& buffer) + { + for (auto i = int64_t{ 0 }; i < buffer.length; i++) + { + if (buffer.ptr[i] > 127) + { + return false; + } + } + + return true; + } +} + int64_t cpp::encoding::Utf8::getByteCount(const char32_t& codepoint) { if (codepoint <= 0x7F) @@ -24,7 +40,7 @@ int64_t cpp::encoding::Utf8::getByteCount(const char32_t& codepoint) int64_t cpp::encoding::Utf8::getByteCount(const String& string) { - if (hx::IsNull(string)) + if (null() == string) { hx::NullReference("String", false); } @@ -54,7 +70,7 @@ int64_t cpp::encoding::Utf8::getByteCount(const String& string) int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) { - if (hx::IsNull(string)) + if (null() == string) { hx::NullReference("String", false); } @@ -148,7 +164,12 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) { if (buffer.isEmpty()) { - return hx::Throw(HX_CSTRING("View empty")); + return String::emptyString; + } + + if (isAsciiBuffer(buffer)) + { + return Ascii::decode(buffer); } auto bytes = int64_t{ 0 }; diff --git a/test/native/cpp/encoding/Ascii.hx b/test/native/cpp/encoding/Ascii.hx new file mode 100644 index 000000000..a96eb54d9 --- /dev/null +++ b/test/native/cpp/encoding/Ascii.hx @@ -0,0 +1,15 @@ +package cpp.encoding; + +import cpp.UInt8; +import cpp.Int64; +import cpp.marshal.View; + +@:semantics(value) +@:cpp.PointerType({ namespace : [ "cpp", "encoding" ] }) +extern class Ascii { + static function isEncoded(string:String):Bool; + + static function encode(string:String, buffer:View):Int64; + + static function decode(buffer:View):String; +} diff --git a/test/native/cpp/encoding/Utf16.hx b/test/native/cpp/encoding/Utf16.hx new file mode 100644 index 000000000..161591202 --- /dev/null +++ b/test/native/cpp/encoding/Utf16.hx @@ -0,0 +1,22 @@ +package cpp.encoding; + +import cpp.UInt8; +import cpp.Int64; +import cpp.Char32; +import cpp.marshal.View; +import haxe.extern.AsVar; + +@:semantics(value) +@:cpp.PointerType({ namespace : [ "cpp", "encoding" ] }) +extern class Utf16 { + static function isEncoded(string:String):Bool; + + static overload function getByteCount(codepoint:Char32):Int64; + static overload function getByteCount(string:String):Int64; + + static overload function encode(string:String, buffer:View):Int64; + static overload function encode(codepoint:Char32, buffer:View):Int64; + + static overload function decode(buffer:View):String; + static overload function decode(buffer:View, codepoint:AsVar):Int64; +} \ No newline at end of file diff --git a/test/native/cpp/encoding/Utf8.hx b/test/native/cpp/encoding/Utf8.hx new file mode 100644 index 000000000..c1461c569 --- /dev/null +++ b/test/native/cpp/encoding/Utf8.hx @@ -0,0 +1,20 @@ +package cpp.encoding; + +import cpp.UInt8; +import cpp.Int64; +import cpp.Char32; +import cpp.marshal.View; +import haxe.extern.AsVar; + +@:semantics(value) +@:cpp.PointerType({ namespace : [ "cpp", "encoding" ] }) +extern class Utf8 { + static overload function getByteCount(codepoint:Char32):Int64; + static overload function getByteCount(string:String):Int64; + + static overload function encode(string:String, buffer:View):Int64; + static overload function encode(codepoint:Char32, buffer:View):Int64; + + static overload function decode(buffer:View):String; + static overload function decode(buffer:View, codepoint:AsVar):Int64; +} \ No newline at end of file diff --git a/test/native/tests/marshalling/view/TestMarshal.hx b/test/native/tests/marshalling/view/TestMarshal.hx index 60199b5be..ad236b094 100644 --- a/test/native/tests/marshalling/view/TestMarshal.hx +++ b/test/native/tests/marshalling/view/TestMarshal.hx @@ -115,233 +115,31 @@ class TestMarshal extends Test { Assert.isTrue(storage == value); } - function test_ascii_string_to_utf8() { - final source = "Hello, World!"; - final view = source.toCharView(); - - if (Assert.equals(source.length + 1, view.length)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } - } - - function test_ascii_string_to_utf8_buffer() { - final source = "Hello, World!"; - final buffer = Bytes.ofHex("FFFFFFFFFFFFFFFFFFFFFFFFFFFF"); - final view = buffer.asView().reinterpret(); - final count = Marshal.toCharView(source, view); - - if (Assert.equals(source.length + 1, count)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } - } - - function test_emoji_string_to_utf8() { - final source = "πŸ˜‚"; - final view = source.toCharView(); - - if (Assert.equals(5, view.length)) { - Assert.equals((0xf0:Char), view[0]); - Assert.equals((0x9f:Char), view[1]); - Assert.equals((0x98:Char), view[2]); - Assert.equals((0x82:Char), view[3]); - Assert.equals(0, view[4]); - } + function test_asCharView_null() { + Assert.raises(() -> Marshal.asCharView(null)); } - function test_emoji_string_to_utf8_buffer() { - final source = "πŸ˜‚"; - final buffer = Bytes.ofHex("FFFFFFFFFF"); - final view = buffer.asView().reinterpret(); - final count = Marshal.toCharView(source, view); - - if (Assert.equals(5, count)) { - Assert.equals((0xf0:Char), view[0]); - Assert.equals((0x9f:Char), view[1]); - Assert.equals((0x98:Char), view[2]); - Assert.equals((0x82:Char), view[3]); - Assert.equals(0, view[4]); - } + function test_asWideCharView_null() { + Assert.raises(() -> Marshal.asWideCharView(null)); } - function test_ascii_string_to_utf16() { - final source = "Hello, World!"; - final view = source.toWideCharView(); - - if (Assert.equals(source.length + 1, view.length)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } + function test_asCharView_wrong_encoding() { + Assert.raises(() -> Marshal.asCharView("πŸ˜‚")); } - function test_ascii_string_to_utf16_buffer() { - final source = "Hello, World!"; - final buffer = Bytes.ofHex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); - final view = buffer.asView().reinterpret(); - final count = Marshal.toWideCharView(source, view); - - if (Assert.equals(count, view.length)) { - Assert.equals(view[ 0], "H".code); - Assert.equals(view[ 1], "e".code); - Assert.equals(view[ 2], "l".code); - Assert.equals(view[ 3], "l".code); - Assert.equals(view[ 4], "o".code); - Assert.equals(view[ 5], ",".code); - Assert.equals(view[ 6], " ".code); - Assert.equals(view[ 7], "W".code); - Assert.equals(view[ 8], "o".code); - Assert.equals(view[ 9], "r".code); - Assert.equals(view[10], "l".code); - Assert.equals(view[11], "d".code); - Assert.equals(view[12], "!".code); - Assert.equals(view[13], 0); - } - } - - function test_emoji_string_to_utf16() { - final source = "πŸ˜‚"; - final view = source.toWideCharView(); - - if (Assert.equals(3, view.length)) { - Assert.equals((0xD83D:Char16), view[0]); - Assert.equals((0xDE02:Char16), view[1]); - Assert.equals(0, view[2]); - } - } - - function test_emoji_string_to_utf16_buffer() { - final source = "πŸ˜‚"; - final buffer = Bytes.ofHex("FFFFFFFFFFFFFFFF"); - final view = buffer.asView().slice(0, 3 * 2).reinterpret(); - final count = Marshal.toWideCharView(source, view); - - if (Assert.equals(count, view.length)) { - Assert.equals((0xD83D:Char16), view[0]); - Assert.equals((0xDE02:Char16), view[1]); - Assert.equals(0, view[2]); - } - } - - function test_ascii_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); + function test_asWideCharView_wrong_encoding() { + Assert.raises(() -> Marshal.asWideCharView("hello")); } - function test_ascii_wide_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); - } - - function test_null_terminated_ascii_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - buffer[5] = 0; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); - } - - function test_null_terminated_ascii_wide_chars_to_string() { - final buffer = new Vector(5); - buffer[0] = 'H'.code; - buffer[1] = 'e'.code; - buffer[2] = 'l'.code; - buffer[3] = 'l'.code; - buffer[4] = 'o'.code; - buffer[5] = 0; - final view = buffer.asView(); - final string = view.toString(); - - Assert.equals('Hello', string); - } - - function test_utf8_bytes_to_string() { - final buffer = Bytes.ofHex("f09f9882"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); - } - - function test_null_terminated_utf8_bytes_to_string() { - final buffer = Bytes.ofHex("f09f98820000"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); - } - - function test_utf16_bytes_to_string() { - final buffer = Bytes.ofHex("3DD802De"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); + function test_asCharView() { + final view = "hello".asCharView(); + + Assert.equals(5, view.length); } - function test_null_terminated_utf16_bytes_to_string() { - final buffer = Bytes.ofHex("3DD802De00000000"); - final view = (buffer.asView().reinterpret() : View); - final string = view.toString(); - - Assert.equals('πŸ˜‚', string); + function test_asWideCharView() { + final view = "πŸ˜‚".asWideCharView(); + + Assert.equals(2, view.length); } } \ No newline at end of file From 6ea2cc27f0377252ec4d894257d3da03940933bc Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Mon, 22 Dec 2025 22:09:59 +0000 Subject: [PATCH 03/32] update tests --- test/native/tests/encoding/TestUtf16.hx | 2 +- test/native/tests/encoding/TestUtf8.hx | 2 +- .../tests/marshalling/view/TestMarshal.hx | 4 ++-- .../native/tests/marshalling/view/TestView.hx | 14 ++++++------- .../marshalling/view/TestViewExtensions.hx | 21 ++++++++++--------- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/test/native/tests/encoding/TestUtf16.hx b/test/native/tests/encoding/TestUtf16.hx index c0e27589a..c970645b8 100644 --- a/test/native/tests/encoding/TestUtf16.hx +++ b/test/native/tests/encoding/TestUtf16.hx @@ -175,6 +175,6 @@ class TestUtf16 extends Test { } public function test_decode_empty_view() { - Assert.raises(() -> Utf16.decode(ViewExtensions.empty())); + Assert.equals("", Utf16.decode(ViewExtensions.empty())); } } \ No newline at end of file diff --git a/test/native/tests/encoding/TestUtf8.hx b/test/native/tests/encoding/TestUtf8.hx index e03f312a9..739633f3c 100644 --- a/test/native/tests/encoding/TestUtf8.hx +++ b/test/native/tests/encoding/TestUtf8.hx @@ -152,6 +152,6 @@ class TestUtf8 extends Test { } public function test_decode_empty_view() { - Assert.raises(() -> Utf8.decode(ViewExtensions.empty())); + Assert.equals("",Utf8.decode(ViewExtensions.empty())); } } \ No newline at end of file diff --git a/test/native/tests/marshalling/view/TestMarshal.hx b/test/native/tests/marshalling/view/TestMarshal.hx index ad236b094..8766560e9 100644 --- a/test/native/tests/marshalling/view/TestMarshal.hx +++ b/test/native/tests/marshalling/view/TestMarshal.hx @@ -134,12 +134,12 @@ class TestMarshal extends Test { function test_asCharView() { final view = "hello".asCharView(); - Assert.equals(5, view.length); + Assert.equals(5i64, view.length); } function test_asWideCharView() { final view = "πŸ˜‚".asWideCharView(); - Assert.equals(2, view.length); + Assert.equals(2i64, view.length); } } \ No newline at end of file diff --git a/test/native/tests/marshalling/view/TestView.hx b/test/native/tests/marshalling/view/TestView.hx index 8eb6c5555..7406d17f3 100644 --- a/test/native/tests/marshalling/view/TestView.hx +++ b/test/native/tests/marshalling/view/TestView.hx @@ -139,8 +139,8 @@ class TestView extends Test { final index = 3; final slice = view.slice(index); - if (Assert.equals(7, slice.length)) { - for (i in 0...slice.length) { + if (Assert.equals(7i64, slice.length)) { + for (i in 0...(cast slice.length : Int)) { Assert.equals(i + index + 1, slice[i]); } } @@ -161,8 +161,8 @@ class TestView extends Test { final length = 4; final slice = view.slice(index, length); - if (Assert.equals(length, slice.length)) { - for (i in 0...slice.length) { + if (Assert.equals(haxe.Int64.ofInt(length), slice.length)) { + for (i in 0...(cast slice.length : Int)) { Assert.equals(i + index + 1, slice[i]); } } @@ -243,7 +243,7 @@ class TestView extends Test { final view = buffer.asView(); final second : View = view.reinterpret(); - Assert.equals(1, second.length); + Assert.equals(1i64, second.length); } function test_reinterpret_to_larger_type_not_enough_length() { @@ -251,7 +251,7 @@ class TestView extends Test { final view = buffer.asView(); final second : View = view.reinterpret(); - Assert.equals(0, second.length); + Assert.equals(0i64, second.length); } function test_reinterpret_to_value_type() { @@ -259,7 +259,7 @@ class TestView extends Test { final view = buffer.asView(); final points = (view.reinterpret() : View); - Assert.equals(2, points.length); + Assert.equals(2i64, points.length); Assert.equals(0f64, points[0].x); Assert.equals(0f64, points[0].y); diff --git a/test/native/tests/marshalling/view/TestViewExtensions.hx b/test/native/tests/marshalling/view/TestViewExtensions.hx index 271b1e103..c9d2e2609 100644 --- a/test/native/tests/marshalling/view/TestViewExtensions.hx +++ b/test/native/tests/marshalling/view/TestViewExtensions.hx @@ -1,5 +1,6 @@ package tests.marshalling.view; +import haxe.Int64; import haxe.io.UInt8Array; import haxe.io.UInt16Array; import haxe.io.UInt32Array; @@ -49,7 +50,7 @@ class TestViewExtensions extends Test { final array = [ 100, 200, 300, 400 ]; final view = array.asView(); - if (Assert.equals(array.length, view.length)) { + if (Assert.equals(Int64.ofInt(array.length), view.length)) { for (i in 0...array.length) { Assert.equals(array[i], view[i]); } @@ -60,7 +61,7 @@ class TestViewExtensions extends Test { final vector = Vector.fromData([ 100, 200, 300, 400 ]); final view = vector.asView(); - if (Assert.equals(vector.length, view.length)) { + if (Assert.equals(Int64.ofInt(vector.length), view.length)) { for (i in 0...vector.length) { Assert.equals(vector[i], view[i]); } @@ -71,7 +72,7 @@ class TestViewExtensions extends Test { final bytes = Bytes.ofData([ 10, 20, 30, 40 ]); final view = bytes.asView(); - if (Assert.equals(bytes.length, view.length)) { + if (Assert.equals(Int64.ofInt(bytes.length), view.length)) { for (i in 0...bytes.length) { Assert.equals(bytes.get(i), view[i]); } @@ -83,7 +84,7 @@ class TestViewExtensions extends Test { final buffer = ArrayBufferView.fromBytes(Bytes.ofData([ for (i in 0...100) i ])).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.byteLength, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.byteLength), view.length)) { for (i in 0...buffer.byteLength) { Assert.equals(buffer.buffer.get(index + i), view[i]); } @@ -95,7 +96,7 @@ class TestViewExtensions extends Test { final buffer = Float32Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -107,7 +108,7 @@ class TestViewExtensions extends Test { final buffer = Float64Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -119,7 +120,7 @@ class TestViewExtensions extends Test { final buffer = Int32Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -131,7 +132,7 @@ class TestViewExtensions extends Test { final buffer = UInt32Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -143,7 +144,7 @@ class TestViewExtensions extends Test { final buffer = UInt16Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } @@ -155,7 +156,7 @@ class TestViewExtensions extends Test { final buffer = UInt8Array.fromArray([ for (i in 0...100) i ]).sub(index, 10); final view = buffer.asView(); - if (Assert.equals(buffer.length, view.length)) { + if (Assert.equals(Int64.ofInt(buffer.length), view.length)) { for (i in 0...buffer.length) { Assert.equals(buffer[i], view[i]); } From c39a54cac5676c6c5a19be05917538ebe0178209 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 10:19:29 +0000 Subject: [PATCH 04/32] Case change --- include/cpp/encoding/{ASCII.hpp => Ascii.hpp} | 0 include/cpp/encoding/{UTF16.hpp => Utf16.hpp} | 0 include/cpp/encoding/{UTF8.hpp => Utf8.hpp} | 0 include/hxcpp.h | 3 +++ src/cpp/encoding/{ASCII.cpp => Ascii.cpp} | 0 src/cpp/encoding/{UTF16.cpp => Utf16.cpp} | 0 src/cpp/encoding/{UTF8.cpp => Utf8.cpp} | 0 toolchain/haxe-target.xml | 12 ++++++------ 8 files changed, 9 insertions(+), 6 deletions(-) rename include/cpp/encoding/{ASCII.hpp => Ascii.hpp} (100%) rename include/cpp/encoding/{UTF16.hpp => Utf16.hpp} (100%) rename include/cpp/encoding/{UTF8.hpp => Utf8.hpp} (100%) rename src/cpp/encoding/{ASCII.cpp => Ascii.cpp} (100%) rename src/cpp/encoding/{UTF16.cpp => Utf16.cpp} (100%) rename src/cpp/encoding/{UTF8.cpp => Utf8.cpp} (100%) diff --git a/include/cpp/encoding/ASCII.hpp b/include/cpp/encoding/Ascii.hpp similarity index 100% rename from include/cpp/encoding/ASCII.hpp rename to include/cpp/encoding/Ascii.hpp diff --git a/include/cpp/encoding/UTF16.hpp b/include/cpp/encoding/Utf16.hpp similarity index 100% rename from include/cpp/encoding/UTF16.hpp rename to include/cpp/encoding/Utf16.hpp diff --git a/include/cpp/encoding/UTF8.hpp b/include/cpp/encoding/Utf8.hpp similarity index 100% rename from include/cpp/encoding/UTF8.hpp rename to include/cpp/encoding/Utf8.hpp diff --git a/include/hxcpp.h b/include/hxcpp.h index 68824a682..71618c1b3 100755 --- a/include/hxcpp.h +++ b/include/hxcpp.h @@ -358,6 +358,9 @@ typedef PropertyAccessMode PropertyAccess; #include #include #include +#include +#include +#include #include #include #include diff --git a/src/cpp/encoding/ASCII.cpp b/src/cpp/encoding/Ascii.cpp similarity index 100% rename from src/cpp/encoding/ASCII.cpp rename to src/cpp/encoding/Ascii.cpp diff --git a/src/cpp/encoding/UTF16.cpp b/src/cpp/encoding/Utf16.cpp similarity index 100% rename from src/cpp/encoding/UTF16.cpp rename to src/cpp/encoding/Utf16.cpp diff --git a/src/cpp/encoding/UTF8.cpp b/src/cpp/encoding/Utf8.cpp similarity index 100% rename from src/cpp/encoding/UTF8.cpp rename to src/cpp/encoding/Utf8.cpp diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index d7bc6cf42..09d933727 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -68,9 +68,9 @@ - - - + + + @@ -202,9 +202,9 @@ - - - + + + From bbc622bd3cb60b2ac8b7af559fd49e4538fc3cf7 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 10:19:41 +0000 Subject: [PATCH 05/32] Update utf16 codepoint encoder --- src/cpp/encoding/Utf16.cpp | 40 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 9646e32d0..c2596a9e6 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -121,38 +121,36 @@ int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) { - if (codepoint >= 0x10000) + if (codepoint < 0xD800) { - auto over = codepoint - 0x10000; - if (over >= 0x10000) - { - Marshal::writeUInt16(buffer, 0xFFFD); - - return 2; - } - else - { - auto staging = std::array(); - staging[0] = (over >> 10) + 0xD800; - staging[1] = (over & 0x3FF) + 0xDC00; + Marshal::writeUInt16(buffer, static_cast(codepoint)); - Marshal::writeUInt32(buffer, *reinterpret_cast(staging.data())); + return 2; + } + else if (codepoint < 0xE000) + { + // D800 - DFFF is invalid - return 4; - } + return hx::Throw(HX_CSTRING("Invalid UTF16")); } - else if (isSurrogate(codepoint)) + else if (codepoint < 0x10000) { - Marshal::writeUInt16(buffer, 0xFFFD); + Marshal::writeUInt16(buffer, static_cast(codepoint)); return 2; } - else + else if (codepoint < 0x110000) { - Marshal::writeUInt16(buffer, static_cast(codepoint)); + auto staging = std::array(); + staging[0] = 0xD800 + (((codepoint - 0x10000) >> 10) & 0x3FF); + staging[1] = 0xDC00 + ((codepoint - 0x10000) & 0x3FF); - return 2; + Marshal::writeUInt32(buffer, *reinterpret_cast(staging.data())); + + return 4; } + + return 0; } String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) From bfe921dcc32dc0ac23c4dc6b25f800401309f9a8 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 12:14:15 +0000 Subject: [PATCH 06/32] Add some extra functions --- include/cpp/encoding/Utf16.hpp | 3 +++ include/cpp/encoding/Utf8.hpp | 19 +++---------------- src/cpp/encoding/Utf16.cpp | 10 ++++++++++ src/cpp/encoding/Utf8.cpp | 10 ++++++++++ 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp index 9bb3bcc36..4a4750590 100644 --- a/include/cpp/encoding/Utf16.hpp +++ b/include/cpp/encoding/Utf16.hpp @@ -11,6 +11,9 @@ namespace cpp static int32_t getByteCount(const char32_t& codepoint); static int64_t getByteCount(const String& string); + static int64_t getCharCount(const char32_t& codepoint); + static int64_t getCharCount(const String& string); + static int64_t encode(const String& string, cpp::marshal::View buffer); static int64_t encode(const char32_t& codepoint, cpp::marshal::View buffer); diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp index db06a9251..dbd2c4ba3 100644 --- a/include/cpp/encoding/Utf8.hpp +++ b/include/cpp/encoding/Utf8.hpp @@ -6,26 +6,13 @@ namespace cpp { struct Utf8 final { - /// - /// Returns the number of bytes required to store the codepoint in it's UTF8 form. - /// static int64_t getByteCount(const char32_t& codepoint); - - /// - /// Returns the number of bytes required to store the string in it's UTF8 form. - /// static int64_t getByteCount(const String& string); - /// - /// Writes the provided string in it's UTF8 form to the buffer. - /// - /// Number of byte written into the buffer - static int64_t encode(const String& string, cpp::marshal::View buffer); + static int64_t getCharCount(const char32_t& codepoint); + static int64_t getCharCount(const String& string); - /// - /// Writes the provided code point in it's UTF8 form to the buffer. - /// - /// Number of byte written into the buffer + static int64_t encode(const String& string, cpp::marshal::View buffer); static int64_t encode(const char32_t& codepoint, cpp::marshal::View buffer); static String decode(cpp::marshal::View buffer); diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index c2596a9e6..8280ce5a1 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -67,6 +67,16 @@ int64_t cpp::encoding::Utf16::getByteCount(const String& string) } } +int64_t cpp::encoding::Utf16::getCharCount(const char32_t& codepoint) +{ + return getByteCount(codepoint) / sizeof(char16_t); +} + +int64_t cpp::encoding::Utf16::getCharCount(const String& string) +{ + return getByteCount(string) / sizeof(char16_t); +} + int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) { if (null() == string) diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 841a665e9..42bca5d4e 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -68,6 +68,16 @@ int64_t cpp::encoding::Utf8::getByteCount(const String& string) #endif } +int64_t cpp::encoding::Utf8::getCharCount(const char32_t& codepoint) +{ + return getByteCount(codepoint) / sizeof(char); +} + +int64_t cpp::encoding::Utf8::getCharCount(const String& string) +{ + return getByteCount(string) / sizeof(char); +} + int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) { if (null() == string) From f9fb825be534e9fd61f6a5b765a198aa6af03c8f Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 13:23:19 +0000 Subject: [PATCH 07/32] copyTo function for view --- include/cpp/marshal/Definitions.inc | 1 + include/cpp/marshal/View.hpp | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index 575e044a5..af0da5928 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -202,6 +202,7 @@ namespace cpp bool isEmpty(); View slice(int64_t index); View slice(int64_t index, int64_t length); + void copyTo(const View& destination); bool tryCopyTo(const View& destination); template View reinterpret(); int compare(const View& inRHS); diff --git a/include/cpp/marshal/View.hpp b/include/cpp/marshal/View.hpp index f62e6ad07..fddefd083 100644 --- a/include/cpp/marshal/View.hpp +++ b/include/cpp/marshal/View.hpp @@ -20,6 +20,15 @@ inline bool cpp::marshal::View::tryCopyTo(const View& destination) return true; } +template +inline void cpp::marshal::View::copyTo(const View& destination) +{ + if (tryCopyTo(destination) == false) + { + hx::Throw(HX_CSTRING("View OOB")); + } +} + template inline void cpp::marshal::View::clear() { From 12f7a7b8146dafe9935349274116fb0d681e2f7f Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 13:23:52 +0000 Subject: [PATCH 08/32] single bounds check for utf8 encode --- src/cpp/encoding/Utf8.cpp | 54 +++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 42bca5d4e..3471ffc70 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -1,4 +1,5 @@ #include +#include using namespace cpp::marshal; @@ -133,40 +134,55 @@ int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) { - if (getByteCount(codepoint) > buffer.length) - { - hx::Throw(HX_CSTRING("Buffer too small")); - } - if (codepoint <= 0x7F) { - buffer.ptr[0] = codepoint; + buffer[0] = codepoint; return 1; } else if (codepoint <= 0x7FF) { - buffer.ptr[0] = (0xC0 | (codepoint >> 6)); - buffer.ptr[1] = (0x80 | (codepoint & 63)); - - return 2; + auto data = std::array + { { + static_cast(0xC0 | (codepoint >> 6)), + static_cast(0x80 | (codepoint & 63)) + } }; + auto src = View(data.data(), data.size()); + + src.copyTo(buffer); + + return data.size(); } else if (codepoint <= 0xFFFF) { - buffer.ptr[0] = (0xE0 | (codepoint >> 12)); - buffer.ptr[1] = (0x80 | ((codepoint >> 6) & 63)); - buffer.ptr[2] = (0x80 | (codepoint & 63)); + auto data = std::array + { { + static_cast(0xE0 | (codepoint >> 12)), + static_cast(0x80 | ((codepoint >> 6) & 63)), + static_cast(0x80 | (codepoint & 63)) + } }; - return 3; + auto src = View(data.data(), data.size()); + + src.copyTo(buffer); + + return data.size(); } else { - buffer.ptr[0] = (0xF0 | (codepoint >> 18)); - buffer.ptr[1] = (0x80 | ((codepoint >> 12) & 63)); - buffer.ptr[2] = (0x80 | ((codepoint >> 6) & 63)); - buffer.ptr[3] = (0x80 | (codepoint & 63)); + auto data = std::array + { { + static_cast(0xF0 | (codepoint >> 18)), + static_cast(0x80 | ((codepoint >> 12) & 63)), + static_cast(0x80 | ((codepoint >> 6) & 63)), + static_cast(0x80 | (codepoint & 63)) + } }; - return 4; + auto src = View(data.data(), data.size()); + + src.copyTo(buffer); + + return data.size(); } } From f37b1b96bd6ceab714580ca22fff152c2db6344b Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 14:04:52 +0000 Subject: [PATCH 09/32] marshal writes --- src/cpp/encoding/Utf16.cpp | 12 ++++++++---- src/cpp/encoding/Utf8.cpp | 27 ++++++++++++++++----------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 8280ce5a1..c61757219 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -151,11 +151,15 @@ int64_t cpp::encoding::Utf16::encode(const char32_t& codepoint, cpp::marshal::Vi } else if (codepoint < 0x110000) { - auto staging = std::array(); - staging[0] = 0xD800 + (((codepoint - 0x10000) >> 10) & 0x3FF); - staging[1] = 0xDC00 + ((codepoint - 0x10000) & 0x3FF); + auto staging = std::array(); + auto fst = View(staging.data(), 2); + auto snd = View(staging.data() + 2, 2); + auto all = View(staging.data(), staging.size()); - Marshal::writeUInt32(buffer, *reinterpret_cast(staging.data())); + Marshal::writeUInt16(fst, 0xD800 + (((codepoint - 0x10000) >> 10) & 0x3FF)); + Marshal::writeUInt16(snd, 0xDC00 + ((codepoint - 0x10000) & 0x3FF)); + + all.copyTo(buffer); return 4; } diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 3471ffc70..7b768ba9c 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -224,12 +224,7 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) int64_t cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& codepoint) { - if (0 == buffer.length) - { - return hx::Throw(HX_CSTRING("Empty view")); - } - - auto b0 = buffer[0]; + auto& b0 = buffer[0]; if ((b0 & 0x80) == 0) { @@ -239,23 +234,33 @@ int64_t cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t } else if ((b0 & 0xE0) == 0xC0) { - codepoint = (static_cast(b0 & 0x1F) << 6) | static_cast(buffer[1] & 0x3F); + codepoint = (static_cast(b0 & 0x1F) << 6) | static_cast(buffer.slice(1)[0] & 0x3F); return 2; } else if ((b0 & 0xF0) == 0xE0) { - codepoint = (static_cast(b0 & 0x0F) << 12) | (static_cast(buffer[1] & 0x3F) << 6) | static_cast(buffer[2] & 0x3F); + auto staging = std::array(); + auto dst = View(staging.data(), staging.size()); + + buffer.slice(1, staging.size()).copyTo(dst); + + codepoint = (static_cast(b0 & 0x0F) << 12) | (static_cast(staging[0] & 0x3F) << 6) | static_cast(staging[1] & 0x3F); return 3; } else if ((b0 & 0xF8) == 0xF0) { + auto staging = std::array(); + auto dst = View(staging.data(), staging.size()); + + buffer.slice(1, staging.size()).copyTo(dst); + codepoint = (static_cast(b0 & 0x07) << 18) | - (static_cast(buffer[1] & 0x3F) << 12) | - (static_cast(buffer[2] & 0x3F) << 6) | - static_cast(buffer[3] & 0x3F); + (static_cast(staging[0] & 0x3F) << 12) | + (static_cast(staging[1] & 0x3F) << 6) | + static_cast(staging[2] & 0x3F); return 4; } From 6384c4563474160b4d8b8868f79004f98c644401 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 14:51:00 +0000 Subject: [PATCH 10/32] remove some conversion issues --- src/cpp/encoding/Utf16.cpp | 4 ++-- src/cpp/encoding/Utf8.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index c61757219..aaff4bff6 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -207,7 +207,7 @@ int64_t cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_ auto second = static_cast(Marshal::readUInt16(buffer.slice(2))); if (0xDC00 <= second && second < 0xE000) { - codepoint = ((((first - 0xD800) << 10) | (second - 0xDC00)) + 0x10000); + codepoint = static_cast((((first - 0xD800) << 10) | (second - 0xDC00)) + 0x10000); return 4; } @@ -216,7 +216,7 @@ int64_t cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_ } else { - codepoint = first; + codepoint = static_cast(first); return 2; } diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 7b768ba9c..67331ad9b 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -136,7 +136,7 @@ int64_t cpp::encoding::Utf8::encode(const char32_t& codepoint, cpp::marshal::Vie { if (codepoint <= 0x7F) { - buffer[0] = codepoint; + buffer[0] = static_cast(codepoint); return 1; } @@ -224,7 +224,7 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) int64_t cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& codepoint) { - auto& b0 = buffer[0]; + auto b0 = static_cast(buffer[0]); if ((b0 & 0x80) == 0) { From be9ff94012f173a9e259f095507e50ab7cd4b73d Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 15:13:33 +0000 Subject: [PATCH 11/32] Remove un-needed cast --- test/native/compile.hxml | 1 - test/native/tests/marshalling/view/TestView.hx | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/native/compile.hxml b/test/native/compile.hxml index a063c9266..825082805 100644 --- a/test/native/compile.hxml +++ b/test/native/compile.hxml @@ -1,4 +1,3 @@ -m Native -L utest --D HXCPP-DEBUGGER --cpp bin \ No newline at end of file diff --git a/test/native/tests/marshalling/view/TestView.hx b/test/native/tests/marshalling/view/TestView.hx index 7406d17f3..104a56e7a 100644 --- a/test/native/tests/marshalling/view/TestView.hx +++ b/test/native/tests/marshalling/view/TestView.hx @@ -140,7 +140,7 @@ class TestView extends Test { final slice = view.slice(index); if (Assert.equals(7i64, slice.length)) { - for (i in 0...(cast slice.length : Int)) { + for (i in 0...haxe.Int64.toInt(slice.length)) { Assert.equals(i + index + 1, slice[i]); } } @@ -162,7 +162,7 @@ class TestView extends Test { final slice = view.slice(index, length); if (Assert.equals(haxe.Int64.ofInt(length), slice.length)) { - for (i in 0...(cast slice.length : Int)) { + for (i in 0...haxe.Int64.toInt(slice.length)) { Assert.equals(i + index + 1, slice[i]); } } From 6ff0e28e903ea62a4d9eb58f513d2b1c24fdb11d Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 15:50:47 +0000 Subject: [PATCH 12/32] Remove questionable implicit view to pointer conversions --- include/cpp/marshal/Definitions.inc | 4 ---- include/cpp/marshal/View.hpp | 18 ------------------ 2 files changed, 22 deletions(-) diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index af0da5928..fed1d91bd 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -211,10 +211,6 @@ namespace cpp bool operator!=(const View& inRHS) const; T& operator[] (int64_t index); - - operator void* (); - operator T* (); - operator Pointer(); }; struct Marshal final diff --git a/include/cpp/marshal/View.hpp b/include/cpp/marshal/View.hpp index fddefd083..1e54d80b3 100644 --- a/include/cpp/marshal/View.hpp +++ b/include/cpp/marshal/View.hpp @@ -130,22 +130,4 @@ inline T& cpp::marshal::View::operator[](int64_t index) } return ptr[index]; -} - -template -inline cpp::marshal::View::operator void* () -{ - return ptr.ptr; -} - -template -inline cpp::marshal::View::operator T* () -{ - return ptr.ptr; -} - -template -inline cpp::marshal::View::operator cpp::Pointer () -{ - return ptr; } \ No newline at end of file From 03a98d2109b80886be75471b61f88db2565753f2 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 22:22:30 +0000 Subject: [PATCH 13/32] int returns --- include/cpp/encoding/Utf16.hpp | 8 ++++---- include/cpp/encoding/Utf8.hpp | 8 ++++---- src/cpp/encoding/Utf16.cpp | 8 ++++---- src/cpp/encoding/Utf8.cpp | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp index 4a4750590..c28ca3722 100644 --- a/include/cpp/encoding/Utf16.hpp +++ b/include/cpp/encoding/Utf16.hpp @@ -8,17 +8,17 @@ namespace cpp { static bool isEncoded(const String& string); - static int32_t getByteCount(const char32_t& codepoint); + static int getByteCount(const char32_t& codepoint); static int64_t getByteCount(const String& string); - static int64_t getCharCount(const char32_t& codepoint); + static int getCharCount(const char32_t& codepoint); static int64_t getCharCount(const String& string); + static int encode(const char32_t& codepoint, cpp::marshal::View buffer); static int64_t encode(const String& string, cpp::marshal::View buffer); - static int64_t encode(const char32_t& codepoint, cpp::marshal::View buffer); + static int decode(cpp::marshal::View buffer, char32_t& out); static String decode(cpp::marshal::View buffer); - static int64_t decode(cpp::marshal::View buffer, char32_t& out); }; } } \ No newline at end of file diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp index dbd2c4ba3..12d83e48e 100644 --- a/include/cpp/encoding/Utf8.hpp +++ b/include/cpp/encoding/Utf8.hpp @@ -6,17 +6,17 @@ namespace cpp { struct Utf8 final { - static int64_t getByteCount(const char32_t& codepoint); + static int getByteCount(const char32_t& codepoint); static int64_t getByteCount(const String& string); - static int64_t getCharCount(const char32_t& codepoint); + static int getCharCount(const char32_t& codepoint); static int64_t getCharCount(const String& string); + static int encode(const char32_t& codepoint, cpp::marshal::View buffer); static int64_t encode(const String& string, cpp::marshal::View buffer); - static int64_t encode(const char32_t& codepoint, cpp::marshal::View buffer); static String decode(cpp::marshal::View buffer); - static int64_t decode(cpp::marshal::View buffer, char32_t& out); + static int decode(cpp::marshal::View buffer, char32_t& out); }; } } \ No newline at end of file diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index aaff4bff6..fd7796366 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -31,7 +31,7 @@ bool cpp::encoding::Utf16::isEncoded(const String& string) return string.isUTF16Encoded(); } -int32_t cpp::encoding::Utf16::getByteCount(const char32_t& codepoint) +int cpp::encoding::Utf16::getByteCount(const char32_t& codepoint) { if (codepoint >= 0x10000) { @@ -67,7 +67,7 @@ int64_t cpp::encoding::Utf16::getByteCount(const String& string) } } -int64_t cpp::encoding::Utf16::getCharCount(const char32_t& codepoint) +int cpp::encoding::Utf16::getCharCount(const char32_t& codepoint) { return getByteCount(codepoint) / sizeof(char16_t); } @@ -129,7 +129,7 @@ int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) +int cpp::encoding::Utf16::encode(const char32_t& codepoint, cpp::marshal::View buffer) { if (codepoint < 0xD800) { @@ -198,7 +198,7 @@ String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); } -int64_t cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_t& codepoint) +int cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_t& codepoint) { auto first = static_cast(Marshal::readUInt16(buffer)); diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 67331ad9b..3d500c36f 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -19,7 +19,7 @@ namespace } } -int64_t cpp::encoding::Utf8::getByteCount(const char32_t& codepoint) +int cpp::encoding::Utf8::getByteCount(const char32_t& codepoint) { if (codepoint <= 0x7F) { @@ -69,7 +69,7 @@ int64_t cpp::encoding::Utf8::getByteCount(const String& string) #endif } -int64_t cpp::encoding::Utf8::getCharCount(const char32_t& codepoint) +int cpp::encoding::Utf8::getCharCount(const char32_t& codepoint) { return getByteCount(codepoint) / sizeof(char); } @@ -132,7 +132,7 @@ int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) +int cpp::encoding::Utf8::encode(const char32_t& codepoint, cpp::marshal::View buffer) { if (codepoint <= 0x7F) { @@ -222,7 +222,7 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); } -int64_t cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& codepoint) +int cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& codepoint) { auto b0 = static_cast(buffer[0]); From c006fb469b43d2235f4e49d651872e50ec071840 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 23:22:09 +0000 Subject: [PATCH 14/32] switch to a dedicated codepoint function for utf16 --- include/cpp/encoding/Utf16.hpp | 2 +- src/cpp/encoding/Utf16.cpp | 38 +++++++++++++++------------------- src/cpp/encoding/Utf8.cpp | 14 +++++++------ 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp index c28ca3722..0e3d45c4b 100644 --- a/include/cpp/encoding/Utf16.hpp +++ b/include/cpp/encoding/Utf16.hpp @@ -17,7 +17,7 @@ namespace cpp static int encode(const char32_t& codepoint, cpp::marshal::View buffer); static int64_t encode(const String& string, cpp::marshal::View buffer); - static int decode(cpp::marshal::View buffer, char32_t& out); + static char32_t codepoint(cpp::marshal::View buffer); static String decode(cpp::marshal::View buffer); }; } diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index fd7796366..8682da9e7 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -174,31 +174,31 @@ String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) return String::emptyString; } - auto bytes = int64_t{ 0 }; - auto codepoint = char32_t{ 0 }; - auto i = int64_t{ 0 }; - + auto chars = int64_t{ 0 }; + auto i = int64_t{ 0 }; while (i < buffer.length) { - i += decode(buffer.slice(i), codepoint); - bytes += getByteCount(codepoint); + auto p = codepoint(buffer.slice(i)); + + chars += getCharCount(p); + i += getByteCount(p); } - auto backing = static_cast(hx::NewGCPrivate(0, bytes + sizeof(char16_t))); - auto output = View(backing, bytes); + auto backing = View(::String::allocChar16Ptr(chars), chars); + auto output = backing.reinterpret(); while (false == buffer.isEmpty()) { - buffer = buffer.slice(decode(buffer, codepoint)); - output = output.slice(encode(codepoint, output)); - } + auto p = codepoint(buffer); - reinterpret_cast(backing)[-1] |= HX_GC_STRING_CHAR16_T; + buffer = buffer.slice(getByteCount(p)); + output = output.slice(encode(p, output)); + } - return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); + return String(backing.ptr.ptr, chars); } -int cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_t& codepoint) +char32_t cpp::encoding::Utf16::codepoint(cpp::marshal::View buffer) { auto first = static_cast(Marshal::readUInt16(buffer)); @@ -207,17 +207,13 @@ int cpp::encoding::Utf16::decode(cpp::marshal::View buffer, char32_t& c auto second = static_cast(Marshal::readUInt16(buffer.slice(2))); if (0xDC00 <= second && second < 0xE000) { - codepoint = static_cast((((first - 0xD800) << 10) | (second - 0xDC00)) + 0x10000); - - return 4; + return static_cast((((first - 0xD800) << 10) | (second - 0xDC00)) + 0x10000); } - return hx::Throw(HX_CSTRING("Invalid UTF16")); + return int{ hx::Throw(HX_CSTRING("Invalid UTF16")) }; } else { - codepoint = static_cast(first); - - return 2; + return static_cast(first); } } diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 3d500c36f..3d1832e11 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -54,13 +54,14 @@ int64_t cpp::encoding::Utf8::getByteCount(const String& string) #if defined(HX_SMART_STRINGS) auto source = View(string.raw_wptr(), string.length).reinterpret(); auto length = source.length; - auto codepoint = char32_t{ 0 }; auto bytes = int64_t{ 0 }; while (false == source.isEmpty()) { - source = source.slice(Utf16::decode(source, codepoint)); - bytes += getByteCount(codepoint); + auto p = Utf16::codepoint(source); + + source = source.slice(Utf16::getByteCount(p)); + bytes += getByteCount(p); } return bytes; @@ -118,12 +119,13 @@ int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View(string.raw_wptr(), string.length).reinterpret(); - auto codepoint = char32_t{ 0 }; while (false == source.isEmpty()) { - source = source.slice(Utf16::decode(source, codepoint)); - buffer = buffer.slice(encode(codepoint, buffer)); + auto p = Utf16::codepoint(source); + + source = source.slice(Utf16::getByteCount(p)); + buffer = buffer.slice(encode(p, buffer)); } return buffer.ptr.ptr - initialPtr; From 4b5e40a0216086be8e70c586990f98d7a0287229 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Tue, 23 Dec 2025 23:31:00 +0000 Subject: [PATCH 15/32] move to a dedicated codepoint function for utf8 as well --- include/cpp/encoding/Utf8.hpp | 2 +- src/cpp/encoding/Utf8.cpp | 33 ++++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp index 12d83e48e..809d90f68 100644 --- a/include/cpp/encoding/Utf8.hpp +++ b/include/cpp/encoding/Utf8.hpp @@ -15,8 +15,8 @@ namespace cpp static int encode(const char32_t& codepoint, cpp::marshal::View buffer); static int64_t encode(const String& string, cpp::marshal::View buffer); + static char32_t codepoint(cpp::marshal::View buffer); static String decode(cpp::marshal::View buffer); - static int decode(cpp::marshal::View buffer, char32_t& out); }; } } \ No newline at end of file diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 3d1832e11..8553a3f7a 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -201,13 +201,14 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) } auto bytes = int64_t{ 0 }; - auto codepoint = char32_t{ 0 }; auto i = int64_t{ 0 }; while (i < buffer.length) { - i += decode(buffer.slice(i), codepoint); - bytes += Utf16::getByteCount(codepoint); + auto p = codepoint(buffer.slice(i)); + + i += getByteCount(p); + bytes += Utf16::getByteCount(p); } auto backing = static_cast(hx::NewGCPrivate(0, bytes + sizeof(char16_t))); @@ -215,8 +216,10 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) while (false == buffer.isEmpty()) { - buffer = buffer.slice(decode(buffer, codepoint)); - output = output.slice(Utf16::encode(codepoint, output)); + auto p = codepoint(buffer.slice(i)); + + buffer = buffer.slice(getByteCount(p)); + output = output.slice(Utf16::encode(p, output)); } reinterpret_cast(backing)[-1] |= HX_GC_STRING_CHAR16_T; @@ -224,21 +227,17 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); } -int cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& codepoint) +char32_t cpp::encoding::Utf8::codepoint(cpp::marshal::View buffer) { auto b0 = static_cast(buffer[0]); if ((b0 & 0x80) == 0) { - codepoint = b0; - - return 1; + return b0; } else if ((b0 & 0xE0) == 0xC0) { - codepoint = (static_cast(b0 & 0x1F) << 6) | static_cast(buffer.slice(1)[0] & 0x3F); - - return 2; + return (static_cast(b0 & 0x1F) << 6) | static_cast(buffer.slice(1)[0] & 0x3F); } else if ((b0 & 0xF0) == 0xE0) { @@ -247,9 +246,7 @@ int cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& co buffer.slice(1, staging.size()).copyTo(dst); - codepoint = (static_cast(b0 & 0x0F) << 12) | (static_cast(staging[0] & 0x3F) << 6) | static_cast(staging[1] & 0x3F); - - return 3; + return (static_cast(b0 & 0x0F) << 12) | (static_cast(staging[0] & 0x3F) << 6) | static_cast(staging[1] & 0x3F); } else if ((b0 & 0xF8) == 0xF0) { @@ -258,16 +255,14 @@ int cpp::encoding::Utf8::decode(cpp::marshal::View buffer, char32_t& co buffer.slice(1, staging.size()).copyTo(dst); - codepoint = + return (static_cast(b0 & 0x07) << 18) | (static_cast(staging[0] & 0x3F) << 12) | (static_cast(staging[1] & 0x3F) << 6) | static_cast(staging[2] & 0x3F); - - return 4; } else { - return hx::Throw(HX_CSTRING("Failed to read codepoint")); + return int{ hx::Throw(HX_CSTRING("Failed to read codepoint")) }; } } \ No newline at end of file From 485e8b7c39ab005fc7c56066769105ac7ffcec8a Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Wed, 24 Dec 2025 10:20:59 +0000 Subject: [PATCH 16/32] Fix incorrect index reuse --- .gitignore | 406 ++++++++++++++++++++++++++++++++++++++ src/cpp/encoding/Utf8.cpp | 16 +- 2 files changed, 413 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index fcc9ac911..4ff620b27 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,409 @@ hxcpp.n *.ilk .vscode + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudio +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudio + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml + +### VisualStudio Patch ### +# Additional files built by Visual Studio + +# End of https://www.toptal.com/developers/gitignore/api/visualstudio diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 8553a3f7a..27096e591 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -200,31 +200,29 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) return Ascii::decode(buffer); } - auto bytes = int64_t{ 0 }; - auto i = int64_t{ 0 }; + auto chars = int64_t{ 0 }; + auto i = int64_t{ 0 }; while (i < buffer.length) { auto p = codepoint(buffer.slice(i)); i += getByteCount(p); - bytes += Utf16::getByteCount(p); + chars += Utf16::getCharCount(p); } - auto backing = static_cast(hx::NewGCPrivate(0, bytes + sizeof(char16_t))); - auto output = View(backing, bytes); + auto backing = View(::String::allocChar16Ptr(chars), chars); + auto output = backing.reinterpret(); while (false == buffer.isEmpty()) { - auto p = codepoint(buffer.slice(i)); + auto p = codepoint(buffer); buffer = buffer.slice(getByteCount(p)); output = output.slice(Utf16::encode(p, output)); } - reinterpret_cast(backing)[-1] |= HX_GC_STRING_CHAR16_T; - - return String(reinterpret_cast(backing), bytes / sizeof(char16_t)); + return String(backing.ptr.ptr, chars); } char32_t cpp::encoding::Utf8::codepoint(cpp::marshal::View buffer) From 3dbceb9b2e922fcbe4c27b36d5878956521d8ce7 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Wed, 24 Dec 2025 11:33:52 +0000 Subject: [PATCH 17/32] Add new view extension cstring function tests --- src/cpp/encoding/Utf16.cpp | 41 +++++++++++++++ src/cpp/encoding/Utf8.cpp | 10 ++-- test/native/cpp/encoding/Ascii.hx | 15 ------ test/native/cpp/encoding/Utf16.hx | 22 -------- test/native/cpp/encoding/Utf8.hx | 20 -------- test/native/tests/encoding/TestUtf16.hx | 14 ++---- test/native/tests/encoding/TestUtf8.hx | 14 ++---- .../marshalling/view/TestViewExtensions.hx | 50 +++++++++++++++++++ 8 files changed, 106 insertions(+), 80 deletions(-) delete mode 100644 test/native/cpp/encoding/Ascii.hx delete mode 100644 test/native/cpp/encoding/Utf16.hx delete mode 100644 test/native/cpp/encoding/Utf8.hx diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 8682da9e7..ca5d742ee 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -19,6 +19,42 @@ namespace { return codepoint >= 0xd800 && codepoint < 0xdc00; } + + bool isAsciiBuffer(View buffer) + { + while (buffer.isEmpty() == false) + { + auto p = cpp::encoding::Utf16::codepoint(buffer); + + if (p > 127) + { + return false; + } + + buffer = buffer.slice(cpp::encoding::Utf16::getByteCount(p)); + } + + return true; + } + + String toAsciiString(View buffer) + { + auto bytes = buffer.length / sizeof(char16_t); + auto chars = View(hx::InternalNew(bytes + 1, false), bytes * sizeof(char)); + auto output = chars.reinterpret(); + + while (buffer.isEmpty() == false) + { + auto p = cpp::encoding::Utf16::codepoint(buffer); + + output[0] = static_cast(p); + + buffer = buffer.slice(cpp::encoding::Utf16::getByteCount(p)); + output = output.slice(1); + } + + return String(chars.ptr.ptr, chars.length); + } } bool cpp::encoding::Utf16::isEncoded(const String& string) @@ -174,6 +210,11 @@ String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) return String::emptyString; } + if (isAsciiBuffer(buffer)) + { + return toAsciiString(buffer); + } + auto chars = int64_t{ 0 }; auto i = int64_t{ 0 }; while (i < buffer.length) diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 27096e591..b6d0f1407 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -5,14 +5,18 @@ using namespace cpp::marshal; namespace { - bool isAsciiBuffer(View& buffer) + bool isAsciiBuffer(View buffer) { - for (auto i = int64_t{ 0 }; i < buffer.length; i++) + while (buffer.isEmpty() == false) { - if (buffer.ptr[i] > 127) + auto p = cpp::encoding::Utf8::codepoint(buffer); + + if (p > 127) { return false; } + + buffer = buffer.slice(cpp::encoding::Utf8::getByteCount(p)); } return true; diff --git a/test/native/cpp/encoding/Ascii.hx b/test/native/cpp/encoding/Ascii.hx deleted file mode 100644 index a96eb54d9..000000000 --- a/test/native/cpp/encoding/Ascii.hx +++ /dev/null @@ -1,15 +0,0 @@ -package cpp.encoding; - -import cpp.UInt8; -import cpp.Int64; -import cpp.marshal.View; - -@:semantics(value) -@:cpp.PointerType({ namespace : [ "cpp", "encoding" ] }) -extern class Ascii { - static function isEncoded(string:String):Bool; - - static function encode(string:String, buffer:View):Int64; - - static function decode(buffer:View):String; -} diff --git a/test/native/cpp/encoding/Utf16.hx b/test/native/cpp/encoding/Utf16.hx deleted file mode 100644 index 161591202..000000000 --- a/test/native/cpp/encoding/Utf16.hx +++ /dev/null @@ -1,22 +0,0 @@ -package cpp.encoding; - -import cpp.UInt8; -import cpp.Int64; -import cpp.Char32; -import cpp.marshal.View; -import haxe.extern.AsVar; - -@:semantics(value) -@:cpp.PointerType({ namespace : [ "cpp", "encoding" ] }) -extern class Utf16 { - static function isEncoded(string:String):Bool; - - static overload function getByteCount(codepoint:Char32):Int64; - static overload function getByteCount(string:String):Int64; - - static overload function encode(string:String, buffer:View):Int64; - static overload function encode(codepoint:Char32, buffer:View):Int64; - - static overload function decode(buffer:View):String; - static overload function decode(buffer:View, codepoint:AsVar):Int64; -} \ No newline at end of file diff --git a/test/native/cpp/encoding/Utf8.hx b/test/native/cpp/encoding/Utf8.hx deleted file mode 100644 index c1461c569..000000000 --- a/test/native/cpp/encoding/Utf8.hx +++ /dev/null @@ -1,20 +0,0 @@ -package cpp.encoding; - -import cpp.UInt8; -import cpp.Int64; -import cpp.Char32; -import cpp.marshal.View; -import haxe.extern.AsVar; - -@:semantics(value) -@:cpp.PointerType({ namespace : [ "cpp", "encoding" ] }) -extern class Utf8 { - static overload function getByteCount(codepoint:Char32):Int64; - static overload function getByteCount(string:String):Int64; - - static overload function encode(string:String, buffer:View):Int64; - static overload function encode(codepoint:Char32, buffer:View):Int64; - - static overload function decode(buffer:View):String; - static overload function decode(buffer:View, codepoint:AsVar):Int64; -} \ No newline at end of file diff --git a/test/native/tests/encoding/TestUtf16.hx b/test/native/tests/encoding/TestUtf16.hx index c970645b8..8198c0152 100644 --- a/test/native/tests/encoding/TestUtf16.hx +++ b/test/native/tests/encoding/TestUtf16.hx @@ -141,23 +141,17 @@ class TestUtf16 extends Test { } public function test_decode_codepoint() { - var codepoint : cpp.Char32 = 0; - var bytes = Bytes.ofHex('6100'); - Assert.equals(2i64, Utf16.decode(bytes.asView(), codepoint)); - Assert.equals('a'.code, cast codepoint); + Assert.equals('a'.code, Utf16.codepoint(bytes.asView())); var bytes = Bytes.ofHex('8501'); - Assert.equals(2i64, Utf16.decode(bytes.asView(), codepoint)); - Assert.equals('Ζ…'.code, cast codepoint); + Assert.equals('Ζ…'.code, Utf16.codepoint(bytes.asView())); var bytes = Bytes.ofHex('D030'); - Assert.equals(2i64, Utf16.decode(bytes.asView(), codepoint)); - Assert.equals('バ'.code, cast codepoint); + Assert.equals('バ'.code, Utf16.codepoint(bytes.asView())); var bytes = Bytes.ofHex('34D833DD'); - Assert.equals(4i64, Utf16.decode(bytes.asView(), codepoint)); - Assert.equals('𝄳'.code, cast codepoint); + Assert.equals('𝄳'.code, Utf16.codepoint(bytes.asView())); } public function test_decode_string() { diff --git a/test/native/tests/encoding/TestUtf8.hx b/test/native/tests/encoding/TestUtf8.hx index 739633f3c..716646709 100644 --- a/test/native/tests/encoding/TestUtf8.hx +++ b/test/native/tests/encoding/TestUtf8.hx @@ -118,23 +118,17 @@ class TestUtf8 extends Test { } public function test_decode_codepoint() { - var codepoint : cpp.Char32 = 0; - var bytes = Bytes.ofHex('61'); - Assert.equals(1i64, Utf8.decode(bytes.asView(), codepoint)); - Assert.equals('a'.code, cast codepoint); + Assert.equals('a'.code, Utf8.codepoint(bytes.asView())); var bytes = Bytes.ofHex('c685'); - Assert.equals(2i64, Utf8.decode(bytes.asView(), codepoint)); - Assert.equals('Ζ…'.code, cast codepoint); + Assert.equals('Ζ…'.code, Utf8.codepoint(bytes.asView())); var bytes = Bytes.ofHex('e38390'); - Assert.equals(3i64, Utf8.decode(bytes.asView(), codepoint)); - Assert.equals('バ'.code, cast codepoint); + Assert.equals('バ'.code, Utf8.codepoint(bytes.asView())); var bytes = Bytes.ofHex('f09d84b3'); - Assert.equals(4i64, Utf8.decode(bytes.asView(), codepoint)); - Assert.equals('𝄳'.code, cast codepoint); + Assert.equals('𝄳'.code, Utf8.codepoint(bytes.asView())); } public function test_decode_string() { diff --git a/test/native/tests/marshalling/view/TestViewExtensions.hx b/test/native/tests/marshalling/view/TestViewExtensions.hx index c9d2e2609..e9ff3f942 100644 --- a/test/native/tests/marshalling/view/TestViewExtensions.hx +++ b/test/native/tests/marshalling/view/TestViewExtensions.hx @@ -162,4 +162,54 @@ class TestViewExtensions extends Test { } } } + + function test_szToString_char_no_null() { + final vec = new Vector(4); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } + + function test_szToString_char() { + final vec = new Vector(9); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + vec[4] = 0; + vec[5] = 't'.code; + vec[6] = 'e'.code; + vec[7] = 's'.code; + vec[8] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } + + function test_szToString_char16_no_null() { + final vec = new Vector(4); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } + + function test_szToString16_char() { + final vec = new Vector(9); + vec[0] = 't'.code; + vec[1] = 'e'.code; + vec[2] = 's'.code; + vec[3] = 't'.code; + vec[4] = 0; + vec[5] = 't'.code; + vec[6] = 'e'.code; + vec[7] = 's'.code; + vec[8] = 't'.code; + + Assert.equals("test", vec.asView().szToString()); + } } \ No newline at end of file From 74e7e5725c11f92b3f38b951ef2a0d15e5d7b350 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Wed, 24 Dec 2025 12:31:53 +0000 Subject: [PATCH 18/32] Add a smart strings guard --- include/cpp/marshal/Marshal.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/cpp/marshal/Marshal.hpp b/include/cpp/marshal/Marshal.hpp index 585c72ba8..4bb3076be 100644 --- a/include/cpp/marshal/Marshal.hpp +++ b/include/cpp/marshal/Marshal.hpp @@ -35,6 +35,7 @@ inline cpp::marshal::View cpp::marshal::Marshal::asCharView(const ::String inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const ::String& string) { +#if defined(HX_SMART_STRINGS) if (null() == string) { hx::NullReference("string", false); @@ -46,6 +47,9 @@ inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const } return View(const_cast(string.raw_wptr()), string.length); +#else + return hx::Throw(HX_CSTRING("HX_SMART_STRINGS not defined")); +#endif } template From 2078eae9fd9f3a93658343df511d582050b2ea64 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Thu, 25 Dec 2025 21:14:33 +0000 Subject: [PATCH 19/32] const view --- include/cpp/marshal/Definitions.inc | 19 ++++++++------- include/cpp/marshal/View.hpp | 37 +++++++++++++++++++---------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index fed1d91bd..953f6da0e 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -197,20 +197,21 @@ namespace cpp View(::cpp::Pointer _ptr, int64_t _length); - void clear(); - void fill(T value); - bool isEmpty(); - View slice(int64_t index); - View slice(int64_t index, int64_t length); - void copyTo(const View& destination); - bool tryCopyTo(const View& destination); - template View reinterpret(); - int compare(const View& inRHS); + void clear() const; + void fill(T value) const; + bool isEmpty() const; + View slice(int64_t index) const; + View slice(int64_t index, int64_t length) const; + void copyTo(const View& destination) const; + bool tryCopyTo(const View& destination) const; + template View reinterpret() const; + int compare(const View& inRHS) const; bool operator==(const View& inRHS) const; bool operator!=(const View& inRHS) const; T& operator[] (int64_t index); + const T& operator[] (int64_t index) const; }; struct Marshal final diff --git a/include/cpp/marshal/View.hpp b/include/cpp/marshal/View.hpp index 1e54d80b3..5fb5eb23d 100644 --- a/include/cpp/marshal/View.hpp +++ b/include/cpp/marshal/View.hpp @@ -8,7 +8,7 @@ template inline cpp::marshal::View::View(::cpp::Pointer _ptr, int64_t _length) : ptr(_ptr), length(_length) {} template -inline bool cpp::marshal::View::tryCopyTo(const View& destination) +inline bool cpp::marshal::View::tryCopyTo(const View& destination) const { if (destination.length < length) { @@ -21,7 +21,7 @@ inline bool cpp::marshal::View::tryCopyTo(const View& destination) } template -inline void cpp::marshal::View::copyTo(const View& destination) +inline void cpp::marshal::View::copyTo(const View& destination) const { if (tryCopyTo(destination) == false) { @@ -30,51 +30,51 @@ inline void cpp::marshal::View::copyTo(const View& destination) } template -inline void cpp::marshal::View::clear() +inline void cpp::marshal::View::clear() const { - std::memset(ptr, 0, sizeof(T) * length); + std::memset(ptr.ptr, 0, sizeof(T) * length); } template -inline void cpp::marshal::View::fill(T value) +inline void cpp::marshal::View::fill(T value) const { for (auto i = 0; i < length; i++) { - ptr[i] = value; + ptr.ptr[i] = value; } } template -inline bool cpp::marshal::View::isEmpty() +inline bool cpp::marshal::View::isEmpty() const { return length == 0; } template -inline cpp::marshal::View cpp::marshal::View::slice(int64_t index) +inline cpp::marshal::View cpp::marshal::View::slice(int64_t index) const { if (index < 0 || index > length) { hx::Throw(HX_CSTRING("View OOB")); } - return View(ptr + index, length - index); + return View(ptr.ptr + index, length - index); } template -inline cpp::marshal::View cpp::marshal::View::slice(int64_t inIndex, int64_t inLength) +inline cpp::marshal::View cpp::marshal::View::slice(int64_t inIndex, int64_t inLength) const { if (inIndex < 0 || inLength < 0 || inIndex > length || inIndex + inLength > length) { hx::Throw(HX_CSTRING("View OOB")); } - return View(ptr + inIndex, inLength); + return View(ptr.ptr + inIndex, inLength); } template template -inline cpp::marshal::View cpp::marshal::View::reinterpret() +inline cpp::marshal::View cpp::marshal::View::reinterpret() const { auto newPtr = ::cpp::Pointer(reinterpret_cast(ptr.ptr)); auto fromSize = sizeof(T); @@ -96,7 +96,7 @@ inline cpp::marshal::View cpp::marshal::View::reinterpret() } template -inline int cpp::marshal::View::compare(const View& inRHS) +inline int cpp::marshal::View::compare(const View& inRHS) const { auto common = length < inRHS.length ? length : inRHS.length; auto result = std::memcmp(ptr.ptr, inRHS.ptr.ptr, sizeof(T) * common); @@ -129,5 +129,16 @@ inline T& cpp::marshal::View::operator[](int64_t index) hx::Throw(HX_CSTRING("View OOB")); } + return ptr[index]; +} + +template +inline const T& cpp::marshal::View::operator[](int64_t index) const +{ + if (index < 0 || index >= length) + { + hx::Throw(HX_CSTRING("View OOB")); + } + return ptr[index]; } \ No newline at end of file From db7a3bbd0172b9c7ddc161b534a4d83a691efaa4 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Thu, 25 Dec 2025 21:52:29 +0000 Subject: [PATCH 20/32] const ref marshal read and writes --- include/cpp/marshal/Definitions.inc | 130 ++++++++++++++-------------- include/cpp/marshal/Marshal.hpp | 128 ++++++++++++++------------- 2 files changed, 131 insertions(+), 127 deletions(-) diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index 953f6da0e..b08cf9643 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -225,71 +225,71 @@ namespace cpp static View asCharView(const ::String& string); static View asWideCharView(const ::String& string); - template static T read(View view); - template static ::cpp::Pointer readPointer(View view); - static int8_t readInt8(View view); - static int16_t readInt16(View view); - static int32_t readInt32(View view); - static int64_t readInt64(View view); - static uint8_t readUInt8(View view); - static uint16_t readUInt16(View view); - static uint32_t readUInt32(View view); - static uint64_t readUInt64(View view); - static float readFloat32(View view); - static double readFloat64(View view); - - template static ::cpp::Pointer readLittleEndianPointer(View view); - static int16_t readLittleEndianInt16(View view); - static int32_t readLittleEndianInt32(View view); - static int64_t readLittleEndianInt64(View view); - static uint16_t readLittleEndianUInt16(View view); - static uint32_t readLittleEndianUInt32(View view); - static uint64_t readLittleEndianUInt64(View view); - static float readLittleEndianFloat32(View view); - static double readLittleEndianFloat64(View view); - - template static ::cpp::Pointer readBigEndianPointer(View view); - static int16_t readBigEndianInt16(View view); - static int32_t readBigEndianInt32(View view); - static int64_t readBigEndianInt64(View view); - static uint16_t readBigEndianUInt16(View view); - static uint32_t readBigEndianUInt32(View view); - static uint64_t readBigEndianUInt64(View view); - static float readBigEndianFloat32(View view); - static double readBigEndianFloat64(View view); - - template static void write(View view, const T& value); - template static void writePointer(View view, const Pointer& value); - static void writeInt8(View view, const int8_t& value); - static void writeInt16(View view, const int16_t& value); - static void writeInt32(View view, const int32_t& value); - static void writeInt64(View view, const int64_t& value); - static void writeUInt8(View view, const uint8_t& value); - static void writeUInt16(View view, const uint16_t& value); - static void writeUInt32(View view, const uint32_t& value); - static void writeUInt64(View view, const uint64_t& value); - static void writeFloat32(View view, const float& value); - static void writeFloat64(View view, const double& value); - - template static void writeLittleEndianPointer(View view, const Pointer& value); - static void writeLittleEndianInt16(View view, const int16_t& value); - static void writeLittleEndianInt32(View view, const int32_t& value); - static void writeLittleEndianInt64(View view, const int64_t& value); - static void writeLittleEndianUInt16(View view, const uint16_t& value); - static void writeLittleEndianUInt32(View view, const uint32_t& value); - static void writeLittleEndianUInt64(View view, const uint64_t& value); - static void writeLittleEndianFloat32(View view, const float& value); - static void writeLittleEndianFloat64(View view, const double& value); - - template static void writeBigEndianPointer(View view, const Pointer& value); - static void writeBigEndianInt16(View view, const int16_t& value); - static void writeBigEndianInt32(View view, const int32_t& value); - static void writeBigEndianInt64(View view, const int64_t& value); - static void writeBigEndianUInt16(View view, const uint16_t& value); - static void writeBigEndianUInt32(View view, const uint32_t& value); - static void writeBigEndianUInt64(View view, const uint64_t& value); - static void writeBigEndianFloat32(View view, const float& value); - static void writeBigEndianFloat64(View view, const double& value); + template static T read(const View& view); + template static ::cpp::Pointer readPointer(const View& view); + static int8_t readInt8(const View& view); + static int16_t readInt16(const View& view); + static int32_t readInt32(const View& view); + static int64_t readInt64(const View& view); + static uint8_t readUInt8(const View& view); + static uint16_t readUInt16(const View& view); + static uint32_t readUInt32(const View& view); + static uint64_t readUInt64(const View& view); + static float readFloat32(const View& view); + static double readFloat64(const View& view); + + template static ::cpp::Pointer readLittleEndianPointer(const View& view); + static int16_t readLittleEndianInt16(const View& view); + static int32_t readLittleEndianInt32(const View& view); + static int64_t readLittleEndianInt64(const View& view); + static uint16_t readLittleEndianUInt16(const View& view); + static uint32_t readLittleEndianUInt32(const View& view); + static uint64_t readLittleEndianUInt64(const View& view); + static float readLittleEndianFloat32(const View& view); + static double readLittleEndianFloat64(const View& view); + + template static ::cpp::Pointer readBigEndianPointer(const View& view); + static int16_t readBigEndianInt16(const View& view); + static int32_t readBigEndianInt32(const View& view); + static int64_t readBigEndianInt64(const View& view); + static uint16_t readBigEndianUInt16(const View& view); + static uint32_t readBigEndianUInt32(const View& view); + static uint64_t readBigEndianUInt64(const View& view); + static float readBigEndianFloat32(const View& view); + static double readBigEndianFloat64(const View& view); + + template static void write(const View& view, const T& value); + template static void writePointer(const View& view, const Pointer& value); + static void writeInt8(const View& view, const int8_t& value); + static void writeInt16(const View& view, const int16_t& value); + static void writeInt32(const View& view, const int32_t& value); + static void writeInt64(const View& view, const int64_t& value); + static void writeUInt8(const View& view, const uint8_t& value); + static void writeUInt16(const View& view, const uint16_t& value); + static void writeUInt32(const View& view, const uint32_t& value); + static void writeUInt64(const View& view, const uint64_t& value); + static void writeFloat32(const View& view, const float& value); + static void writeFloat64(const View& view, const double& value); + + template static void writeLittleEndianPointer(const View& view, const Pointer& value); + static void writeLittleEndianInt16(const View& view, const int16_t& value); + static void writeLittleEndianInt32(const View& view, const int32_t& value); + static void writeLittleEndianInt64(const View& view, const int64_t& value); + static void writeLittleEndianUInt16(const View& view, const uint16_t& value); + static void writeLittleEndianUInt32(const View& view, const uint32_t& value); + static void writeLittleEndianUInt64(const View& view, const uint64_t& value); + static void writeLittleEndianFloat32(const View& view, const float& value); + static void writeLittleEndianFloat64(const View& view, const double& value); + + template static void writeBigEndianPointer(const View& view, const Pointer& value); + static void writeBigEndianInt16(const View& view, const int16_t& value); + static void writeBigEndianInt32(const View& view, const int32_t& value); + static void writeBigEndianInt64(const View& view, const int64_t& value); + static void writeBigEndianUInt16(const View& view, const uint16_t& value); + static void writeBigEndianUInt32(const View& view, const uint32_t& value); + static void writeBigEndianUInt64(const View& view, const uint64_t& value); + static void writeBigEndianFloat32(const View& view, const float& value); + static void writeBigEndianFloat64(const View& view, const double& value); }; } } diff --git a/include/cpp/marshal/Marshal.hpp b/include/cpp/marshal/Marshal.hpp index 4bb3076be..18375cfe9 100644 --- a/include/cpp/marshal/Marshal.hpp +++ b/include/cpp/marshal/Marshal.hpp @@ -53,141 +53,145 @@ inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const } template -inline T cpp::marshal::Marshal::read(View view) +inline T cpp::marshal::Marshal::read(const View& view) { if (view.length < sizeof(T)) { hx::Throw(HX_CSTRING("View too small")); } - return *(reinterpret_cast(view.ptr.ptr)); + T output{}; + + std::memcpy(&output, view.ptr.ptr, sizeof(T)); + + return output; } template -inline ::cpp::Pointer cpp::marshal::Marshal::readPointer(View view) +inline ::cpp::Pointer cpp::marshal::Marshal::readPointer(const View& view) { return read(view); } -inline int8_t cpp::marshal::Marshal::readInt8(View view) +inline int8_t cpp::marshal::Marshal::readInt8(const View& view) { return read(view); } -inline int16_t cpp::marshal::Marshal::readInt16(View view) +inline int16_t cpp::marshal::Marshal::readInt16(const View& view) { return read(view); } -inline int32_t cpp::marshal::Marshal::readInt32(View view) +inline int32_t cpp::marshal::Marshal::readInt32(const View& view) { return read(view); } -inline int64_t cpp::marshal::Marshal::readInt64(View view) +inline int64_t cpp::marshal::Marshal::readInt64(const View& view) { return read(view); } -inline uint8_t cpp::marshal::Marshal::readUInt8(View view) +inline uint8_t cpp::marshal::Marshal::readUInt8(const View& view) { return read(view); } -inline uint16_t cpp::marshal::Marshal::readUInt16(View view) +inline uint16_t cpp::marshal::Marshal::readUInt16(const View& view) { return read(view); } -inline uint32_t cpp::marshal::Marshal::readUInt32(View view) +inline uint32_t cpp::marshal::Marshal::readUInt32(const View& view) { return read(view); } -inline uint64_t cpp::marshal::Marshal::readUInt64(View view) +inline uint64_t cpp::marshal::Marshal::readUInt64(const View& view) { return read(view); } -inline float cpp::marshal::Marshal::readFloat32(View view) +inline float cpp::marshal::Marshal::readFloat32(const View& view) { return read(view); } -inline double cpp::marshal::Marshal::readFloat64(View view) +inline double cpp::marshal::Marshal::readFloat64(const View& view) { return read(view); } template -inline void cpp::marshal::Marshal::write(View view, const T& value) +inline void cpp::marshal::Marshal::write(const View& view, const T& value) { if (view.length < sizeof(T)) { hx::Throw(HX_CSTRING("View too small")); } - std::memcpy(view.ptr, reinterpret_cast(&value), sizeof(T)); + std::memcpy(view.ptr.ptr, reinterpret_cast(&value), sizeof(T)); } template -inline void cpp::marshal::Marshal::writePointer(View view, const ::cpp::Pointer& value) +inline void cpp::marshal::Marshal::writePointer(const View& view, const ::cpp::Pointer& value) { write(view, value.ptr); } -inline void cpp::marshal::Marshal::writeInt8(View view, const int8_t& value) +inline void cpp::marshal::Marshal::writeInt8(const View& view, const int8_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeInt16(View view, const int16_t& value) +inline void cpp::marshal::Marshal::writeInt16(const View& view, const int16_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeInt32(View view, const int32_t& value) +inline void cpp::marshal::Marshal::writeInt32(const View& view, const int32_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeInt64(View view, const int64_t& value) +inline void cpp::marshal::Marshal::writeInt64(const View& view, const int64_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeUInt8(View view, const uint8_t& value) +inline void cpp::marshal::Marshal::writeUInt8(const View& view, const uint8_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeUInt16(View view, const uint16_t& value) +inline void cpp::marshal::Marshal::writeUInt16(const View& view, const uint16_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeUInt32(View view, const uint32_t& value) +inline void cpp::marshal::Marshal::writeUInt32(const View& view, const uint32_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeUInt64(View view, const uint64_t& value) +inline void cpp::marshal::Marshal::writeUInt64(const View& view, const uint64_t& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeFloat32(View view, const float& value) +inline void cpp::marshal::Marshal::writeFloat32(const View& view, const float& value) { write(view, value); } -inline void cpp::marshal::Marshal::writeFloat64(View view, const double& value) +inline void cpp::marshal::Marshal::writeFloat64(const View& view, const double& value) { write(view, value); } template -inline ::cpp::Pointer cpp::marshal::Marshal::readBigEndianPointer(View view) +inline ::cpp::Pointer cpp::marshal::Marshal::readBigEndianPointer(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readPointer(view); @@ -196,7 +200,7 @@ inline ::cpp::Pointer cpp::marshal::Marshal::readBigEndianPointer(View view) +inline int16_t cpp::marshal::Marshal::readBigEndianInt16(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readInt16(view); @@ -205,7 +209,7 @@ inline int16_t cpp::marshal::Marshal::readBigEndianInt16(View view) #endif } -inline int32_t cpp::marshal::Marshal::readBigEndianInt32(View view) +inline int32_t cpp::marshal::Marshal::readBigEndianInt32(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readInt32(view); @@ -214,7 +218,7 @@ inline int32_t cpp::marshal::Marshal::readBigEndianInt32(View view) #endif } -inline int64_t cpp::marshal::Marshal::readBigEndianInt64(View view) +inline int64_t cpp::marshal::Marshal::readBigEndianInt64(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readInt64(view); @@ -223,7 +227,7 @@ inline int64_t cpp::marshal::Marshal::readBigEndianInt64(View view) #endif } -inline uint16_t cpp::marshal::Marshal::readBigEndianUInt16(View view) +inline uint16_t cpp::marshal::Marshal::readBigEndianUInt16(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readUInt16(view); @@ -232,7 +236,7 @@ inline uint16_t cpp::marshal::Marshal::readBigEndianUInt16(View view) #endif } -inline uint32_t cpp::marshal::Marshal::readBigEndianUInt32(View view) +inline uint32_t cpp::marshal::Marshal::readBigEndianUInt32(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readUInt32(view); @@ -241,7 +245,7 @@ inline uint32_t cpp::marshal::Marshal::readBigEndianUInt32(View view) #endif } -inline uint64_t cpp::marshal::Marshal::readBigEndianUInt64(View view) +inline uint64_t cpp::marshal::Marshal::readBigEndianUInt64(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readInt64(view); @@ -250,7 +254,7 @@ inline uint64_t cpp::marshal::Marshal::readBigEndianUInt64(View view) #endif } -inline float cpp::marshal::Marshal::readBigEndianFloat32(View view) +inline float cpp::marshal::Marshal::readBigEndianFloat32(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readFloat32(view); @@ -259,7 +263,7 @@ inline float cpp::marshal::Marshal::readBigEndianFloat32(View view) #endif } -inline double cpp::marshal::Marshal::readBigEndianFloat64(View view) +inline double cpp::marshal::Marshal::readBigEndianFloat64(const View& view) { #ifdef HXCPP_BIG_ENDIAN return readFloat64(view); @@ -269,7 +273,7 @@ inline double cpp::marshal::Marshal::readBigEndianFloat64(View view) } template -inline ::cpp::Pointer cpp::marshal::Marshal::readLittleEndianPointer(View view) +inline ::cpp::Pointer cpp::marshal::Marshal::readLittleEndianPointer(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readPointer(view); @@ -278,7 +282,7 @@ inline ::cpp::Pointer cpp::marshal::Marshal::readLittleEndianPointer(View view) +inline int16_t cpp::marshal::Marshal::readLittleEndianInt16(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readInt16(view); @@ -287,7 +291,7 @@ inline int16_t cpp::marshal::Marshal::readLittleEndianInt16(View view) #endif } -inline int32_t cpp::marshal::Marshal::readLittleEndianInt32(View view) +inline int32_t cpp::marshal::Marshal::readLittleEndianInt32(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readInt32(view); @@ -296,7 +300,7 @@ inline int32_t cpp::marshal::Marshal::readLittleEndianInt32(View view) #endif } -inline int64_t cpp::marshal::Marshal::readLittleEndianInt64(View view) +inline int64_t cpp::marshal::Marshal::readLittleEndianInt64(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readInt64(view); @@ -305,7 +309,7 @@ inline int64_t cpp::marshal::Marshal::readLittleEndianInt64(View view) #endif } -inline uint16_t cpp::marshal::Marshal::readLittleEndianUInt16(View view) +inline uint16_t cpp::marshal::Marshal::readLittleEndianUInt16(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readUInt16(view); @@ -314,7 +318,7 @@ inline uint16_t cpp::marshal::Marshal::readLittleEndianUInt16(View view #endif } -inline uint32_t cpp::marshal::Marshal::readLittleEndianUInt32(View view) +inline uint32_t cpp::marshal::Marshal::readLittleEndianUInt32(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readUInt32(view); @@ -323,7 +327,7 @@ inline uint32_t cpp::marshal::Marshal::readLittleEndianUInt32(View view #endif } -inline uint64_t cpp::marshal::Marshal::readLittleEndianUInt64(View view) +inline uint64_t cpp::marshal::Marshal::readLittleEndianUInt64(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readInt64(view); @@ -332,7 +336,7 @@ inline uint64_t cpp::marshal::Marshal::readLittleEndianUInt64(View view #endif } -inline float cpp::marshal::Marshal::readLittleEndianFloat32(View view) +inline float cpp::marshal::Marshal::readLittleEndianFloat32(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readFloat32(view); @@ -341,7 +345,7 @@ inline float cpp::marshal::Marshal::readLittleEndianFloat32(View view) #endif } -inline double cpp::marshal::Marshal::readLittleEndianFloat64(View view) +inline double cpp::marshal::Marshal::readLittleEndianFloat64(const View& view) { #ifndef HXCPP_BIG_ENDIAN return readFloat64(view); @@ -351,7 +355,7 @@ inline double cpp::marshal::Marshal::readLittleEndianFloat64(View view) } template -inline void cpp::marshal::Marshal::writeLittleEndianPointer(View view, const ::cpp::Pointer& value) +inline void cpp::marshal::Marshal::writeLittleEndianPointer(const View& view, const ::cpp::Pointer& value) { #ifdef HXCPP_BIG_ENDIAN write(view, reverse(value.ptr)); @@ -360,7 +364,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianPointer(View view, #endif } -inline void cpp::marshal::Marshal::writeLittleEndianInt16(View view, const int16_t& value) +inline void cpp::marshal::Marshal::writeLittleEndianInt16(const View& view, const int16_t& value) { #ifdef HXCPP_BIG_ENDIAN writeInt16(view, reverse(value)); @@ -369,7 +373,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianInt16(View view, co #endif } -inline void cpp::marshal::Marshal::writeLittleEndianInt32(View view, const int32_t& value) +inline void cpp::marshal::Marshal::writeLittleEndianInt32(const View& view, const int32_t& value) { #ifdef HXCPP_BIG_ENDIAN writeInt32(view, reverse(value)); @@ -378,7 +382,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianInt32(View view, co #endif } -inline void cpp::marshal::Marshal::writeLittleEndianInt64(View view, const int64_t& value) +inline void cpp::marshal::Marshal::writeLittleEndianInt64(const View& view, const int64_t& value) { #ifdef HXCPP_BIG_ENDIAN writeInt64(view, reverse(value)); @@ -387,7 +391,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianInt64(View view, co #endif } -inline void cpp::marshal::Marshal::writeLittleEndianUInt16(View view, const uint16_t& value) +inline void cpp::marshal::Marshal::writeLittleEndianUInt16(const View& view, const uint16_t& value) { #ifdef HXCPP_BIG_ENDIAN writeUInt16(view, reverse(value)); @@ -396,7 +400,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianUInt16(View view, c #endif } -inline void cpp::marshal::Marshal::writeLittleEndianUInt32(View view, const uint32_t& value) +inline void cpp::marshal::Marshal::writeLittleEndianUInt32(const View& view, const uint32_t& value) { #ifdef HXCPP_BIG_ENDIAN writeUInt32(view, reverse(value)); @@ -405,7 +409,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianUInt32(View view, c #endif } -inline void cpp::marshal::Marshal::writeLittleEndianUInt64(View view, const uint64_t& value) +inline void cpp::marshal::Marshal::writeLittleEndianUInt64(const View& view, const uint64_t& value) { #ifdef HXCPP_BIG_ENDIAN writeUInt16(view, reverse(value)); @@ -414,7 +418,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianUInt64(View view, c #endif } -inline void cpp::marshal::Marshal::writeLittleEndianFloat32(View view, const float& value) +inline void cpp::marshal::Marshal::writeLittleEndianFloat32(const View& view, const float& value) { #ifdef HXCPP_BIG_ENDIAN writeFloat32(view, reverse(value)); @@ -423,7 +427,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianFloat32(View view, #endif } -inline void cpp::marshal::Marshal::writeLittleEndianFloat64(View view, const double& value) +inline void cpp::marshal::Marshal::writeLittleEndianFloat64(const View& view, const double& value) { #ifdef HXCPP_BIG_ENDIAN writeFloat64(view, reverse(value)); @@ -433,7 +437,7 @@ inline void cpp::marshal::Marshal::writeLittleEndianFloat64(View view, } template -inline void cpp::marshal::Marshal::writeBigEndianPointer(View view, const ::cpp::Pointer& value) +inline void cpp::marshal::Marshal::writeBigEndianPointer(const View& view, const ::cpp::Pointer& value) { #ifndef HXCPP_BIG_ENDIAN write(view, reverse(value.ptr)); @@ -442,7 +446,7 @@ inline void cpp::marshal::Marshal::writeBigEndianPointer(View view, con #endif } -inline void cpp::marshal::Marshal::writeBigEndianInt16(View view, const int16_t& value) +inline void cpp::marshal::Marshal::writeBigEndianInt16(const View& view, const int16_t& value) { #ifndef HXCPP_BIG_ENDIAN writeInt16(view, reverse(value)); @@ -451,7 +455,7 @@ inline void cpp::marshal::Marshal::writeBigEndianInt16(View view, const #endif } -inline void cpp::marshal::Marshal::writeBigEndianInt32(View view, const int32_t& value) +inline void cpp::marshal::Marshal::writeBigEndianInt32(const View& view, const int32_t& value) { #ifndef HXCPP_BIG_ENDIAN writeInt32(view, reverse(value)); @@ -460,7 +464,7 @@ inline void cpp::marshal::Marshal::writeBigEndianInt32(View view, const #endif } -inline void cpp::marshal::Marshal::writeBigEndianInt64(View view, const int64_t& value) +inline void cpp::marshal::Marshal::writeBigEndianInt64(const View& view, const int64_t& value) { #ifndef HXCPP_BIG_ENDIAN writeInt64(view, reverse(value)); @@ -469,7 +473,7 @@ inline void cpp::marshal::Marshal::writeBigEndianInt64(View view, const #endif } -inline void cpp::marshal::Marshal::writeBigEndianUInt16(View view, const uint16_t& value) +inline void cpp::marshal::Marshal::writeBigEndianUInt16(const View& view, const uint16_t& value) { #ifndef HXCPP_BIG_ENDIAN writeUInt16(view, reverse(value)); @@ -478,7 +482,7 @@ inline void cpp::marshal::Marshal::writeBigEndianUInt16(View view, cons #endif } -inline void cpp::marshal::Marshal::writeBigEndianUInt32(View view, const uint32_t& value) +inline void cpp::marshal::Marshal::writeBigEndianUInt32(const View& view, const uint32_t& value) { #ifndef HXCPP_BIG_ENDIAN writeUInt32(view, reverse(value)); @@ -487,7 +491,7 @@ inline void cpp::marshal::Marshal::writeBigEndianUInt32(View view, cons #endif } -inline void cpp::marshal::Marshal::writeBigEndianUInt64(View view, const uint64_t& value) +inline void cpp::marshal::Marshal::writeBigEndianUInt64(const View& view, const uint64_t& value) { #ifndef HXCPP_BIG_ENDIAN writeUInt16(view, reverse(value)); @@ -496,7 +500,7 @@ inline void cpp::marshal::Marshal::writeBigEndianUInt64(View view, cons #endif } -inline void cpp::marshal::Marshal::writeBigEndianFloat32(View view, const float& value) +inline void cpp::marshal::Marshal::writeBigEndianFloat32(const View& view, const float& value) { #ifndef HXCPP_BIG_ENDIAN writeFloat32(view, reverse(value)); @@ -505,7 +509,7 @@ inline void cpp::marshal::Marshal::writeBigEndianFloat32(View view, con #endif } -inline void cpp::marshal::Marshal::writeBigEndianFloat64(View view, const double& value) +inline void cpp::marshal::Marshal::writeBigEndianFloat64(const View& view, const double& value) { #ifndef HXCPP_BIG_ENDIAN writeFloat64(view, reverse(value)); From 819044ed1110af397c3482df889863be5836becb Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Fri, 26 Dec 2025 17:28:38 +0000 Subject: [PATCH 21/32] more const ref and less object copying --- include/cpp/encoding/Utf16.hpp | 8 ++-- include/cpp/encoding/Utf8.hpp | 8 ++-- include/cpp/marshal/Definitions.inc | 3 +- include/cpp/marshal/View.hpp | 15 +------- src/cpp/encoding/Utf16.cpp | 57 +++++++++++++---------------- src/cpp/encoding/Utf8.cpp | 55 ++++++++++++++++------------ 6 files changed, 68 insertions(+), 78 deletions(-) diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp index 0e3d45c4b..992c0635d 100644 --- a/include/cpp/encoding/Utf16.hpp +++ b/include/cpp/encoding/Utf16.hpp @@ -14,11 +14,11 @@ namespace cpp static int getCharCount(const char32_t& codepoint); static int64_t getCharCount(const String& string); - static int encode(const char32_t& codepoint, cpp::marshal::View buffer); - static int64_t encode(const String& string, cpp::marshal::View buffer); + static int encode(const char32_t& codepoint, const cpp::marshal::View& buffer); + static int64_t encode(const String& string, const cpp::marshal::View& buffer); - static char32_t codepoint(cpp::marshal::View buffer); - static String decode(cpp::marshal::View buffer); + static char32_t codepoint(const cpp::marshal::View& buffer); + static String decode(const cpp::marshal::View& buffer); }; } } \ No newline at end of file diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp index 809d90f68..11497a692 100644 --- a/include/cpp/encoding/Utf8.hpp +++ b/include/cpp/encoding/Utf8.hpp @@ -12,11 +12,11 @@ namespace cpp static int getCharCount(const char32_t& codepoint); static int64_t getCharCount(const String& string); - static int encode(const char32_t& codepoint, cpp::marshal::View buffer); - static int64_t encode(const String& string, cpp::marshal::View buffer); + static int encode(const char32_t& codepoint, const cpp::marshal::View& buffer); + static int64_t encode(const String& string, const cpp::marshal::View& buffer); - static char32_t codepoint(cpp::marshal::View buffer); - static String decode(cpp::marshal::View buffer); + static char32_t codepoint(const cpp::marshal::View& buffer); + static String decode(const cpp::marshal::View& buffer); }; } } \ No newline at end of file diff --git a/include/cpp/marshal/Definitions.inc b/include/cpp/marshal/Definitions.inc index b08cf9643..bde43e655 100644 --- a/include/cpp/marshal/Definitions.inc +++ b/include/cpp/marshal/Definitions.inc @@ -210,8 +210,7 @@ namespace cpp bool operator==(const View& inRHS) const; bool operator!=(const View& inRHS) const; - T& operator[] (int64_t index); - const T& operator[] (int64_t index) const; + T& operator[] (int64_t index) const; }; struct Marshal final diff --git a/include/cpp/marshal/View.hpp b/include/cpp/marshal/View.hpp index 5fb5eb23d..394837f72 100644 --- a/include/cpp/marshal/View.hpp +++ b/include/cpp/marshal/View.hpp @@ -122,23 +122,12 @@ inline bool cpp::marshal::View::operator!=(const View& inRHS) const } template -inline T& cpp::marshal::View::operator[](int64_t index) +inline T& cpp::marshal::View::operator[](int64_t index) const { if (index < 0 || index >= length) { hx::Throw(HX_CSTRING("View OOB")); } - return ptr[index]; + return ptr.ptr[index]; } - -template -inline const T& cpp::marshal::View::operator[](int64_t index) const -{ - if (index < 0 || index >= length) - { - hx::Throw(HX_CSTRING("View OOB")); - } - - return ptr[index]; -} \ No newline at end of file diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index ca5d742ee..3304bb74d 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -20,37 +20,38 @@ namespace return codepoint >= 0xd800 && codepoint < 0xdc00; } - bool isAsciiBuffer(View buffer) + bool isAsciiBuffer(const View& buffer) { - while (buffer.isEmpty() == false) + auto i = int64_t{ 0 }; + while (i < buffer.length) { - auto p = cpp::encoding::Utf16::codepoint(buffer); + auto p = cpp::encoding::Utf16::codepoint(buffer.slice(i)); if (p > 127) { return false; } - buffer = buffer.slice(cpp::encoding::Utf16::getByteCount(p)); + i += cpp::encoding::Utf16::getByteCount(p); } return true; } - String toAsciiString(View buffer) + String toAsciiString(const View& buffer) { auto bytes = buffer.length / sizeof(char16_t); auto chars = View(hx::InternalNew(bytes + 1, false), bytes * sizeof(char)); - auto output = chars.reinterpret(); + auto i = int64_t{ 0 }; + auto k = int64_t{ 0 }; - while (buffer.isEmpty() == false) + while (i < buffer.length) { - auto p = cpp::encoding::Utf16::codepoint(buffer); + auto p = cpp::encoding::Utf16::codepoint(buffer.slice(i)); - output[0] = static_cast(p); + chars[k++] = static_cast(p); - buffer = buffer.slice(cpp::encoding::Utf16::getByteCount(p)); - output = output.slice(1); + i += cpp::encoding::Utf16::getByteCount(p); } return String(chars.ptr.ptr, chars.length); @@ -69,15 +70,7 @@ bool cpp::encoding::Utf16::isEncoded(const String& string) int cpp::encoding::Utf16::getByteCount(const char32_t& codepoint) { - if (codepoint >= 0x10000) - { - if (codepoint < 0x110000) - { - return 4; - } - } - - return 2; + return codepoint <= 0xFFFF ? 2 : 4; } int64_t cpp::encoding::Utf16::getByteCount(const String& string) @@ -113,7 +106,7 @@ int64_t cpp::encoding::Utf16::getCharCount(const String& string) return getByteCount(string) / sizeof(char16_t); } -int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View buffer) +int64_t cpp::encoding::Utf16::encode(const String& string, const cpp::marshal::View& buffer) { if (null() == string) { @@ -156,16 +149,17 @@ int64_t cpp::encoding::Utf16::encode(const String& string, cpp::marshal::View(string.raw_ptr()[i]), buffer)); + i += encode(static_cast(string.raw_ptr()[k]), buffer.slice(i)); } return bytes; } } -int cpp::encoding::Utf16::encode(const char32_t& codepoint, cpp::marshal::View buffer) +int cpp::encoding::Utf16::encode(const char32_t& codepoint, const cpp::marshal::View& buffer) { if (codepoint < 0xD800) { @@ -203,7 +197,7 @@ int cpp::encoding::Utf16::encode(const char32_t& codepoint, cpp::marshal::View buffer) +String cpp::encoding::Utf16::decode(const cpp::marshal::View& buffer) { if (buffer.isEmpty()) { @@ -227,19 +221,20 @@ String cpp::encoding::Utf16::decode(cpp::marshal::View buffer) auto backing = View(::String::allocChar16Ptr(chars), chars); auto output = backing.reinterpret(); + auto k = int64_t{ 0 }; - while (false == buffer.isEmpty()) + while (i < buffer.length) { - auto p = codepoint(buffer); + auto p = codepoint(buffer.slice(i)); - buffer = buffer.slice(getByteCount(p)); - output = output.slice(encode(p, output)); + i += getByteCount(p); + k += encode(p, output.slice(k)); } return String(backing.ptr.ptr, chars); } -char32_t cpp::encoding::Utf16::codepoint(cpp::marshal::View buffer) +char32_t cpp::encoding::Utf16::codepoint(const cpp::marshal::View& buffer) { auto first = static_cast(Marshal::readUInt16(buffer)); @@ -257,4 +252,4 @@ char32_t cpp::encoding::Utf16::codepoint(cpp::marshal::View buffer) { return static_cast(first); } -} +} \ No newline at end of file diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index b6d0f1407..a858c3a7e 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -5,18 +5,19 @@ using namespace cpp::marshal; namespace { - bool isAsciiBuffer(View buffer) + bool isAsciiBuffer(const View& buffer) { - while (buffer.isEmpty() == false) + auto i = int64_t{ 0 }; + while (i < buffer.length) { - auto p = cpp::encoding::Utf8::codepoint(buffer); + auto p = cpp::encoding::Utf8::codepoint(buffer.slice(i)); if (p > 127) { return false; } - buffer = buffer.slice(cpp::encoding::Utf8::getByteCount(p)); + i += cpp::encoding::Utf8::getByteCount(p); } return true; @@ -56,15 +57,17 @@ int64_t cpp::encoding::Utf8::getByteCount(const String& string) } #if defined(HX_SMART_STRINGS) - auto source = View(string.raw_wptr(), string.length).reinterpret(); - auto length = source.length; - auto bytes = int64_t{ 0 }; + auto source = View(string.raw_wptr(), string.length).reinterpret(); + auto length = source.length; + auto bytes = int64_t{ 0 }; + auto i = int64_t{ 0 }; - while (false == source.isEmpty()) + while (i < source.length) { - auto p = Utf16::codepoint(source); + auto slice = source.slice(i); + auto p = Utf16::codepoint(slice); - source = source.slice(Utf16::getByteCount(p)); + i += Utf16::getByteCount(p); bytes += getByteCount(p); } @@ -84,7 +87,7 @@ int64_t cpp::encoding::Utf8::getCharCount(const String& string) return getByteCount(string) / sizeof(char); } -int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View buffer) +int64_t cpp::encoding::Utf8::encode(const String& string, const cpp::marshal::View& buffer) { if (null() == string) { @@ -103,7 +106,7 @@ int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View(reinterpret_cast(const_cast(string.raw_ptr())), string.length * sizeof(char)); + auto src = cpp::marshal::View(reinterpret_cast(const_cast(string.raw_ptr())), string.length); if (src.tryCopyTo(buffer)) { @@ -123,22 +126,24 @@ int64_t cpp::encoding::Utf8::encode(const String& string, cpp::marshal::View(string.raw_wptr(), string.length).reinterpret(); + auto i = int64_t{ 0 }; + auto k = int64_t{ 0 }; - while (false == source.isEmpty()) + while (i < source.length) { - auto p = Utf16::codepoint(source); + auto p = Utf16::codepoint(source.slice(i)); - source = source.slice(Utf16::getByteCount(p)); - buffer = buffer.slice(encode(p, buffer)); + i += Utf16::getByteCount(p); + k += encode(p, buffer.slice(k)); } - return buffer.ptr.ptr - initialPtr; + return k; #else return hx::Throw(HX_CSTRING("Unexpected encoding error")); #endif } -int cpp::encoding::Utf8::encode(const char32_t& codepoint, cpp::marshal::View buffer) +int cpp::encoding::Utf8::encode(const char32_t& codepoint, const cpp::marshal::View& buffer) { if (codepoint <= 0x7F) { @@ -192,7 +197,7 @@ int cpp::encoding::Utf8::encode(const char32_t& codepoint, cpp::marshal::View buffer) +String cpp::encoding::Utf8::decode(const cpp::marshal::View& buffer) { if (buffer.isEmpty()) { @@ -217,19 +222,21 @@ String cpp::encoding::Utf8::decode(cpp::marshal::View buffer) auto backing = View(::String::allocChar16Ptr(chars), chars); auto output = backing.reinterpret(); + auto k = int64_t{ 0 }; - while (false == buffer.isEmpty()) + i = 0; + while (i < buffer.length) { - auto p = codepoint(buffer); + auto p = codepoint(buffer.slice(i)); - buffer = buffer.slice(getByteCount(p)); - output = output.slice(Utf16::encode(p, output)); + i += getByteCount(p); + k += Utf16::encode(p, output.slice(k)); } return String(backing.ptr.ptr, chars); } -char32_t cpp::encoding::Utf8::codepoint(cpp::marshal::View buffer) +char32_t cpp::encoding::Utf8::codepoint(const cpp::marshal::View& buffer) { auto b0 = static_cast(buffer[0]); From 16078b9ac71a96d6ba24dd062044277586bf573d Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Fri, 26 Dec 2025 22:12:47 +0000 Subject: [PATCH 22/32] Fix index reuse issue --- src/cpp/encoding/Utf16.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 3304bb74d..1887c403d 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -209,20 +209,20 @@ String cpp::encoding::Utf16::decode(const cpp::marshal::View& buffer) return toAsciiString(buffer); } - auto chars = int64_t{ 0 }; - auto i = int64_t{ 0 }; + auto i = int64_t{ 0 }; while (i < buffer.length) { auto p = codepoint(buffer.slice(i)); - chars += getCharCount(p); - i += getByteCount(p); + i += getByteCount(p); } + auto chars = i / sizeof(char16_t); auto backing = View(::String::allocChar16Ptr(chars), chars); auto output = backing.reinterpret(); auto k = int64_t{ 0 }; + i = 0; while (i < buffer.length) { auto p = codepoint(buffer.slice(i)); From 695e134657b2ae014de2d2c4cd88544eb0d62537 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sat, 27 Dec 2025 21:39:57 +0000 Subject: [PATCH 23/32] separate build xml for encoding --- include/cpp/encoding/Ascii.hpp | 2 ++ include/cpp/encoding/Utf16.hpp | 2 ++ include/cpp/encoding/Utf8.hpp | 2 ++ include/hxcpp.h | 3 --- src/cpp/encoding/Ascii.cpp | 1 + src/cpp/encoding/Build.xml | 20 ++++++++++++++++++++ src/cpp/encoding/Utf16.cpp | 2 ++ src/cpp/encoding/Utf8.cpp | 3 +++ toolchain/haxe-target.xml | 7 ------- 9 files changed, 32 insertions(+), 10 deletions(-) create mode 100644 src/cpp/encoding/Build.xml diff --git a/include/cpp/encoding/Ascii.hpp b/include/cpp/encoding/Ascii.hpp index 0d89fa63c..cb9bc4222 100644 --- a/include/cpp/encoding/Ascii.hpp +++ b/include/cpp/encoding/Ascii.hpp @@ -1,5 +1,7 @@ #pragma once +#include + namespace cpp { namespace encoding diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp index 992c0635d..089a1d9c4 100644 --- a/include/cpp/encoding/Utf16.hpp +++ b/include/cpp/encoding/Utf16.hpp @@ -1,5 +1,7 @@ #pragma once +#include + namespace cpp { namespace encoding diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp index 11497a692..0e5c9341f 100644 --- a/include/cpp/encoding/Utf8.hpp +++ b/include/cpp/encoding/Utf8.hpp @@ -1,5 +1,7 @@ #pragma once +#include + namespace cpp { namespace encoding diff --git a/include/hxcpp.h b/include/hxcpp.h index 71618c1b3..68824a682 100755 --- a/include/hxcpp.h +++ b/include/hxcpp.h @@ -358,9 +358,6 @@ typedef PropertyAccessMode PropertyAccess; #include #include #include -#include -#include -#include #include #include #include diff --git a/src/cpp/encoding/Ascii.cpp b/src/cpp/encoding/Ascii.cpp index 7a0acd8bf..bd113ac23 100644 --- a/src/cpp/encoding/Ascii.cpp +++ b/src/cpp/encoding/Ascii.cpp @@ -1,4 +1,5 @@ #include +#include using namespace cpp::marshal; diff --git a/src/cpp/encoding/Build.xml b/src/cpp/encoding/Build.xml new file mode 100644 index 000000000..d3cad8862 --- /dev/null +++ b/src/cpp/encoding/Build.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 1887c403d..286cb71ca 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include using namespace cpp::marshal; diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index a858c3a7e..9de96c761 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -1,4 +1,7 @@ #include +#include +#include +#include #include using namespace cpp::marshal; diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index 09d933727..8d7362e12 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -68,9 +68,6 @@ - - - @@ -202,10 +199,6 @@ - - - - From 111b3baa969a222662749b8fd8487cf19d709b56 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sat, 27 Dec 2025 21:46:16 +0000 Subject: [PATCH 24/32] Don't return hx::Throw result in non smart string case --- include/cpp/marshal/Marshal.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/cpp/marshal/Marshal.hpp b/include/cpp/marshal/Marshal.hpp index 18375cfe9..69d6a227e 100644 --- a/include/cpp/marshal/Marshal.hpp +++ b/include/cpp/marshal/Marshal.hpp @@ -48,7 +48,9 @@ inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const return View(const_cast(string.raw_wptr()), string.length); #else - return hx::Throw(HX_CSTRING("HX_SMART_STRINGS not defined")); + hx::Throw(HX_CSTRING("HX_SMART_STRINGS not defined")); + + return View(nullptr, 0); #endif } From ed3e5687a68c4270f7afb7fb30fe2af749d308b6 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sat, 27 Dec 2025 22:04:07 +0000 Subject: [PATCH 25/32] Revert "separate build xml for encoding" This reverts commit 695e134657b2ae014de2d2c4cd88544eb0d62537. --- include/cpp/encoding/Ascii.hpp | 2 -- include/cpp/encoding/Utf16.hpp | 2 -- include/cpp/encoding/Utf8.hpp | 2 -- include/hxcpp.h | 3 +++ src/cpp/encoding/Ascii.cpp | 1 - src/cpp/encoding/Build.xml | 20 -------------------- src/cpp/encoding/Utf16.cpp | 2 -- src/cpp/encoding/Utf8.cpp | 3 --- toolchain/haxe-target.xml | 7 +++++++ 9 files changed, 10 insertions(+), 32 deletions(-) delete mode 100644 src/cpp/encoding/Build.xml diff --git a/include/cpp/encoding/Ascii.hpp b/include/cpp/encoding/Ascii.hpp index cb9bc4222..0d89fa63c 100644 --- a/include/cpp/encoding/Ascii.hpp +++ b/include/cpp/encoding/Ascii.hpp @@ -1,7 +1,5 @@ #pragma once -#include - namespace cpp { namespace encoding diff --git a/include/cpp/encoding/Utf16.hpp b/include/cpp/encoding/Utf16.hpp index 089a1d9c4..992c0635d 100644 --- a/include/cpp/encoding/Utf16.hpp +++ b/include/cpp/encoding/Utf16.hpp @@ -1,7 +1,5 @@ #pragma once -#include - namespace cpp { namespace encoding diff --git a/include/cpp/encoding/Utf8.hpp b/include/cpp/encoding/Utf8.hpp index 0e5c9341f..11497a692 100644 --- a/include/cpp/encoding/Utf8.hpp +++ b/include/cpp/encoding/Utf8.hpp @@ -1,7 +1,5 @@ #pragma once -#include - namespace cpp { namespace encoding diff --git a/include/hxcpp.h b/include/hxcpp.h index 68824a682..71618c1b3 100755 --- a/include/hxcpp.h +++ b/include/hxcpp.h @@ -358,6 +358,9 @@ typedef PropertyAccessMode PropertyAccess; #include #include #include +#include +#include +#include #include #include #include diff --git a/src/cpp/encoding/Ascii.cpp b/src/cpp/encoding/Ascii.cpp index bd113ac23..7a0acd8bf 100644 --- a/src/cpp/encoding/Ascii.cpp +++ b/src/cpp/encoding/Ascii.cpp @@ -1,5 +1,4 @@ #include -#include using namespace cpp::marshal; diff --git a/src/cpp/encoding/Build.xml b/src/cpp/encoding/Build.xml deleted file mode 100644 index d3cad8862..000000000 --- a/src/cpp/encoding/Build.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 286cb71ca..1887c403d 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include using namespace cpp::marshal; diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index 9de96c761..a858c3a7e 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -1,7 +1,4 @@ #include -#include -#include -#include #include using namespace cpp::marshal; diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index 8d7362e12..09d933727 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -68,6 +68,9 @@ + + + @@ -199,6 +202,10 @@ + + + + From d477f6d4b190eeba10f797965779cf7e249bd23f Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sat, 27 Dec 2025 23:12:07 +0000 Subject: [PATCH 26/32] better smart string handling and shuffle some functions in lower hxcpp --- include/cpp/marshal/Marshal.hpp | 2 +- include/hx/StdLibs.h | 5 + include/hxString.h | 2 + src/String.cpp | 599 ++++++++++++++++---------------- src/cpp/encoding/Utf16.cpp | 6 + src/cpp/encoding/Utf8.cpp | 10 +- 6 files changed, 322 insertions(+), 302 deletions(-) diff --git a/include/cpp/marshal/Marshal.hpp b/include/cpp/marshal/Marshal.hpp index 69d6a227e..54df11f82 100644 --- a/include/cpp/marshal/Marshal.hpp +++ b/include/cpp/marshal/Marshal.hpp @@ -50,7 +50,7 @@ inline cpp::marshal::View cpp::marshal::Marshal::asWideCharView(const #else hx::Throw(HX_CSTRING("HX_SMART_STRINGS not defined")); - return View(nullptr, 0); + return View(cpp::Pointer(null()), 0); #endif } diff --git a/include/hx/StdLibs.h b/include/hx/StdLibs.h index 6a849a671..688ba27e2 100644 --- a/include/hx/StdLibs.h +++ b/include/hx/StdLibs.h @@ -141,6 +141,9 @@ HXCPP_EXTERN_CLASS_ATTRIBUTES String __hxcpp_get_kind(Dynamic inObject); // --- haxe.io.BytesData ---------------------------------------------------------------- +// These functions are no longer used in Haxe 5 in favour of the classes in cpp/encoding/ +#if (HXCPP_API_LEVEL<500) + HXCPP_EXTERN_CLASS_ATTRIBUTES void __hxcpp_bytes_of_string(Array &outBytes,const String &inString); HXCPP_EXTERN_CLASS_ATTRIBUTES void __hxcpp_string_of_bytes(Array &inBytes,String &outString,int pos,int len,bool inCopyPointer=false); // UTF8 processing @@ -149,6 +152,8 @@ HXCPP_EXTERN_CLASS_ATTRIBUTES Array __hxcpp_utf8_string_to_char_array(Strin HXCPP_EXTERN_CLASS_ATTRIBUTES String __hxcpp_char_bytes_to_utf8_string(String &inBytes); HXCPP_EXTERN_CLASS_ATTRIBUTES String __hxcpp_utf8_string_to_char_bytes(String &inUTF8); +#endif + #ifdef HXCPP_GC_GENERATIONAL #define HX_MAP_THIS this, h diff --git a/include/hxString.h b/include/hxString.h index e6cdf745e..a8f16cee8 100644 --- a/include/hxString.h +++ b/include/hxString.h @@ -148,8 +148,10 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES String ::String toString() { return *this; } +#if (HXCPP_API_LEVEL<500) ::String __URLEncode() const; ::String __URLDecode() const; +#else ::String toUpperCase() const; diff --git a/src/String.cpp b/src/String.cpp index fedef3873..637c6bc8e 100644 --- a/src/String.cpp +++ b/src/String.cpp @@ -520,130 +520,6 @@ inline String TCopyString(const T *inString,int inLength) #endif } - -String __hxcpp_char_array_to_utf8_string(Array &inChars,int inFirst, int inLen) -{ - int len = inChars->length; - if (inFirst<0) - inFirst = 0; - if (inLen<0) inLen = len; - if (inFirst+inLen>len) - inLen = len-inFirst; - if (inLen<=0) - return String::emptyString; - - int *base = &inChars[0]; - #ifdef HX_SMART_STRINGS - bool hasBig = false; - for(int i=0;i127) - { - hasBig = true; - break; - } - - if (hasBig) - { - char16_t *ptr = String::allocChar16Ptr(inLen); - for(int i=0;i __hxcpp_utf8_string_to_char_array(String &inString) -{ - #ifdef HX_SMART_STRINGS - Array result = Array_obj::__new(inString.length); - if (inString.isUTF16Encoded()) - { - const char16_t *ptr = inString.wc_str(); - for(int i=0;i result = Array_obj::__new(0,inString.length); - - const unsigned char *src = (const unsigned char *)inString.__s; - const unsigned char *end = src + inString.length; - while(srcpush(DecodeAdvanceUTF8(src)); - - if (src!=end) - hx::Throw(HX_CSTRING("Invalid UTF8")); - #endif - - return result; -} - - -String __hxcpp_char_bytes_to_utf8_string(String &inBytes) -{ - #ifdef HX_SMART_STRINGS - // This does not really make much sense - return inBytes; - #else - int len = inBytes.length; - char *result = TConvertToUTF8((unsigned char *)inBytes.__s,&len,0,true); - return String(result,len); - #endif -} - - -String __hxcpp_utf8_string_to_char_bytes(String &inUTF8) -{ - #ifdef HX_SMART_STRINGS - // This does not really make much sense - return inUTF8; - #else - const unsigned char *src = (unsigned char *)inUTF8.__s; - const unsigned char *end = src + inUTF8.length; - int char_count = 0; - while(src 255 ) - hx::Throw(HX_CSTRING("Utf8::decode invalid character")); - } - - if (src!=end) - hx::Throw(HX_CSTRING("Invalid UTF8")); - - char *result = hx::NewString(char_count); - - src = (unsigned char *)inUTF8.__s; - char_count = 0; - while(src bytes(0,length); - // utf8-encode - __hxcpp_bytes_of_string(bytes,*this); - - int extra = 0; - int utf8_chars = bytes->__length(); - for(int i=0;i>4 ]; - *ptr++ = hex[ b & 0x0f ]; - } - else - *ptr++ = bytes[i]; - } - return String(result,l); -} - String String::toUpperCase() const { #ifdef HX_SMART_STRINGS @@ -1110,73 +952,6 @@ static int hex(int inChar) return 0; } - -String String::__URLDecode() const -{ - // Create the decoded string; the decoded form might have fewer than - // [length] characters, but it won't have more. If it has fewer than - // [length], some memory will be wasted here, but on the assumption that - // most URLs have only a few '%NN' encodings in them, don't bother - // counting the number of characters in the resulting string first. - char *decoded = NewString(length), *d = decoded; - - bool hasBig = false; - - #ifdef HX_SMART_STRINGS - if (isUTF16Encoded()) - { - for (int i = 0; i < length; i++) - { - int c = __w[i]; - if (c > 127) - *d++ = '?'; - else if (c == '+') - *d++ = ' '; - else if ((c == '%') && (i < (length - 2))) - { - int ch = ((hex(__w[i + 1]) << 4) | (hex(__w[i + 2]))); - if (ch>127) - hasBig = true; - *d++ = ch; - i += 2; - } - else - *d++ = c; - } - } - else - #endif - { - for (int i = 0; i < length; i++) - { - int c = __s[i]; - if (c > 127) - *d++ = '?'; - else if (c == '+') - *d++ = ' '; - else if ((c == '%') && (i < (length - 2))) - { - int ch = ((hex(__s[i + 1]) << 4) | (hex(__s[i + 2]))); - #ifdef HX_SMART_STRINGS - if (ch>127) - hasBig = true; - #endif - *d++ = ch; - i += 2; - } - else - *d++ = c; - } - } - - #ifdef HX_SMART_STRINGS - if (hasBig) - return _hx_utf8_to_utf16((const unsigned char *)decoded, d-decoded,false); - #endif - - return String( decoded, (d - decoded) ); -} - ::String &::String::dup() { const char *s = __s; @@ -1478,50 +1253,6 @@ String String::charAt( int at ) const return fromCharCode(__s[at]); } -void __hxcpp_bytes_of_string(Array &outBytes,const String &inString) -{ - if (!inString.length) - return; - - #ifdef HX_SMART_STRINGS - if (inString.isUTF16Encoded()) - { - const char16_t *src = inString.raw_wptr(); - const char16_t *end = src + inString.length; - while(srcpush(c); - else if( c <= 0x7FF ) - { - outBytes->push( 0xC0 | (c >> 6) ); - outBytes->push( 0x80 | (c & 63) ); - } - else if( c <= 0xFFFF ) - { - outBytes->push( 0xE0 | (c >> 12) ); - outBytes->push( 0x80 | ((c >> 6) & 63) ); - outBytes->push( 0x80 | (c & 63) ); - } - else - { - outBytes->push( 0xF0 | (c >> 18) ); - outBytes->push( 0x80 | ((c >> 12) & 63) ); - outBytes->push( 0x80 | ((c >> 6) & 63) ); - outBytes->push( 0x80 | (c & 63) ); - } - } - } - else - #endif - { - outBytes->__SetSize(inString.length); - memcpy(outBytes->GetBase(), inString.raw_ptr(),inString.length); - } -} - #ifdef HX_SMART_STRINGS String _hx_utf8_to_utf16(const unsigned char *ptr, int inUtf8Len, bool addHash) { @@ -1567,37 +1298,6 @@ String _hx_utf8_to_utf16(const unsigned char *ptr, int inUtf8Len, bool addHash) } #endif - -void __hxcpp_string_of_bytes(Array &inBytes,String &outString,int pos,int len,bool inCopyPointer) -{ - if (inCopyPointer) - outString = String( (const char *)inBytes->GetBase(), len); - else if (len==0) - outString = String::emptyString; - else - { - const unsigned char *p0 = (const unsigned char *)inBytes->GetBase(); -#ifdef HX_SMART_STRINGS - bool hasWChar = false; - const unsigned char *p = p0 + pos; - for(int i=0;i127) - { - hasWChar = true; - break; - } - if (hasWChar) - { - outString = _hx_utf8_to_utf16(p0+pos,len,true); - } - else -#endif - outString = String( GCStringDup((const char *)p0+pos, len, 0), len); - } -} - - - const char * String::utf8_str(hx::IStringAlloc *inBuffer,bool throwInvalid, int *byteLength) const { #ifdef HX_SMART_STRINGS @@ -2556,3 +2256,302 @@ void String::__boot() &CreateEmptyString, &CreateString, 0, 0, 0 ); } + +// -------- HAXE < 5 FUNCTIONS ---------------------------------------- + +#if (HXCPP_API_LEVEL<500) + +String String::__URLEncode() const +{ + Array bytes(0, length); + // utf8-encode + __hxcpp_bytes_of_string(bytes, *this); + + int extra = 0; + int utf8_chars = bytes->__length(); + for (int i = 0; i < utf8_chars; i++) + if (!safeChars[bytes[i]]) + extra++; + if (extra == 0) + return *this; + + int l = utf8_chars + extra * 2; + char* result = hx::NewString(l); + char* ptr = result; + + for (int i = 0; i < utf8_chars; i++) + { + if (!safeChars[bytes[i]]) + { + static char hex[] = "0123456789ABCDEF"; + unsigned char b = bytes[i]; + *ptr++ = '%'; + *ptr++ = hex[b >> 4]; + *ptr++ = hex[b & 0x0f]; + } + else + *ptr++ = bytes[i]; + } + return String(result, l); +} + +String String::__URLDecode() const +{ + // Create the decoded string; the decoded form might have fewer than + // [length] characters, but it won't have more. If it has fewer than + // [length], some memory will be wasted here, but on the assumption that + // most URLs have only a few '%NN' encodings in them, don't bother + // counting the number of characters in the resulting string first. + char* decoded = NewString(length), * d = decoded; + + bool hasBig = false; + +#ifdef HX_SMART_STRINGS + if (isUTF16Encoded()) + { + for (int i = 0; i < length; i++) + { + int c = __w[i]; + if (c > 127) + *d++ = '?'; + else if (c == '+') + *d++ = ' '; + else if ((c == '%') && (i < (length - 2))) + { + int ch = ((hex(__w[i + 1]) << 4) | (hex(__w[i + 2]))); + if (ch > 127) + hasBig = true; + *d++ = ch; + i += 2; + } + else + *d++ = c; + } + } + else +#endif + { + for (int i = 0; i < length; i++) + { + int c = __s[i]; + if (c > 127) + *d++ = '?'; + else if (c == '+') + *d++ = ' '; + else if ((c == '%') && (i < (length - 2))) + { + int ch = ((hex(__s[i + 1]) << 4) | (hex(__s[i + 2]))); +#ifdef HX_SMART_STRINGS + if (ch > 127) + hasBig = true; +#endif + * d++ = ch; + i += 2; + } + else + *d++ = c; + } + } + +#ifdef HX_SMART_STRINGS + if (hasBig) + return _hx_utf8_to_utf16((const unsigned char*)decoded, d - decoded, false); +#endif + + return String(decoded, (d - decoded)); +} + +void __hxcpp_bytes_of_string(Array& outBytes, const String& inString) +{ + if (!inString.length) + return; + +#ifdef HX_SMART_STRINGS + if (inString.isUTF16Encoded()) + { + const char16_t* src = inString.raw_wptr(); + const char16_t* end = src + inString.length; + while (src < end) + { + int c = Char16Advance(src); + + if (c <= 0x7F) + outBytes->push(c); + else if (c <= 0x7FF) + { + outBytes->push(0xC0 | (c >> 6)); + outBytes->push(0x80 | (c & 63)); + } + else if (c <= 0xFFFF) + { + outBytes->push(0xE0 | (c >> 12)); + outBytes->push(0x80 | ((c >> 6) & 63)); + outBytes->push(0x80 | (c & 63)); + } + else + { + outBytes->push(0xF0 | (c >> 18)); + outBytes->push(0x80 | ((c >> 12) & 63)); + outBytes->push(0x80 | ((c >> 6) & 63)); + outBytes->push(0x80 | (c & 63)); + } + } + } + else +#endif + { + outBytes->__SetSize(inString.length); + memcpy(outBytes->GetBase(), inString.raw_ptr(), inString.length); + } +} + +void __hxcpp_string_of_bytes(Array& inBytes, String& outString, int pos, int len, bool inCopyPointer) +{ + if (inCopyPointer) + outString = String((const char*)inBytes->GetBase(), len); + else if (len == 0) + outString = String::emptyString; + else + { + const unsigned char* p0 = (const unsigned char*)inBytes->GetBase(); +#ifdef HX_SMART_STRINGS + bool hasWChar = false; + const unsigned char* p = p0 + pos; + for (int i = 0; i < len; i++) + if (p[i] > 127) + { + hasWChar = true; + break; + } + if (hasWChar) + { + outString = _hx_utf8_to_utf16(p0 + pos, len, true); + } + else +#endif + outString = String(GCStringDup((const char*)p0 + pos, len, 0), len); + } +} + +String __hxcpp_char_array_to_utf8_string(Array& inChars, int inFirst, int inLen) +{ + int len = inChars->length; + if (inFirst < 0) + inFirst = 0; + if (inLen < 0) inLen = len; + if (inFirst + inLen > len) + inLen = len - inFirst; + if (inLen <= 0) + return String::emptyString; + + int* base = &inChars[0]; +#ifdef HX_SMART_STRINGS + bool hasBig = false; + for (int i = 0; i < inLen; i++) + if (base[i + inFirst] > 127) + { + hasBig = true; + break; + } + + if (hasBig) + { + char16_t* ptr = String::allocChar16Ptr(inLen); + for (int i = 0; i < inLen; i++) + ptr[i] = base[i + inFirst]; + return String(ptr, inLen); + } +#endif + char* result = TConvertToUTF8(base + inFirst, &len, 0, true); + return String(result, len); +} + +Array __hxcpp_utf8_string_to_char_array(String& inString) +{ +#ifdef HX_SMART_STRINGS + Array result = Array_obj::__new(inString.length); + if (inString.isUTF16Encoded()) + { + const char16_t* ptr = inString.wc_str(); + for (int i = 0; i < inString.length; i++) + result[i] = ptr[i]; + } + else + { + const char* ptr = inString.raw_ptr(); + for (int i = 0; i < inString.length; i++) + result[i] = ptr[i]; + } +#else + Array result = Array_obj::__new(0, inString.length); + + const unsigned char* src = (const unsigned char*)inString.__s; + const unsigned char* end = src + inString.length; + while (src < end) + result->push(DecodeAdvanceUTF8(src)); + + if (src != end) + hx::Throw(HX_CSTRING("Invalid UTF8")); +#endif + + return result; +} + + +String __hxcpp_char_bytes_to_utf8_string(String& inBytes) +{ +#ifdef HX_SMART_STRINGS + // This does not really make much sense + return inBytes; +#else + int len = inBytes.length; + char* result = TConvertToUTF8((unsigned char*)inBytes.__s, &len, 0, true); + return String(result, len); +#endif +} + +String __hxcpp_utf8_string_to_char_bytes(String& inUTF8) +{ +#ifdef HX_SMART_STRINGS + // This does not really make much sense + return inUTF8; +#else + const unsigned char* src = (unsigned char*)inUTF8.__s; + const unsigned char* end = src + inUTF8.length; + int char_count = 0; + while (src < end) + { + int c = DecodeAdvanceUTF8(src, end); + char_count++; + if (c == 8364) // euro symbol + c = 164; + else if (c == 0xFEFF) // BOM + { + char_count--; + } + else if (c > 255) + hx::Throw(HX_CSTRING("Utf8::decode invalid character")); + } + + if (src != end) + hx::Throw(HX_CSTRING("Invalid UTF8")); + + char* result = hx::NewString(char_count); + + src = (unsigned char*)inUTF8.__s; + char_count = 0; + while (src < end) + { + int c = DecodeAdvanceUTF8(src); + if (c == 8364) // euro symbol + c = 164; + if (c != 0xFEFF) // BOM + result[char_count++] = c; + } + + result[char_count] = '\0'; + return String(result, char_count); +#endif +} + +#endif \ No newline at end of file diff --git a/src/cpp/encoding/Utf16.cpp b/src/cpp/encoding/Utf16.cpp index 1887c403d..f3b595a09 100644 --- a/src/cpp/encoding/Utf16.cpp +++ b/src/cpp/encoding/Utf16.cpp @@ -123,6 +123,7 @@ int64_t cpp::encoding::Utf16::encode(const String& string, const cpp::marshal::V return hx::Throw(HX_CSTRING("Buffer too small")); } +#if defined(HX_SMART_STRINGS) if (string.isUTF16Encoded()) { auto src = cpp::marshal::View(reinterpret_cast(const_cast(string.raw_wptr())), string.length * sizeof(char16_t)); @@ -137,6 +138,7 @@ int64_t cpp::encoding::Utf16::encode(const String& string, const cpp::marshal::V } } else +#endif { auto bytes = int64_t{ 0 }; for (auto i = 0; i < string.length; i++) @@ -209,6 +211,7 @@ String cpp::encoding::Utf16::decode(const cpp::marshal::View& buffer) return toAsciiString(buffer); } +#if defined(HX_SMART_STRINGS) auto i = int64_t{ 0 }; while (i < buffer.length) { @@ -232,6 +235,9 @@ String cpp::encoding::Utf16::decode(const cpp::marshal::View& buffer) } return String(backing.ptr.ptr, chars); +#else + return hx::Throw(HX_CSTRING("Not Implemented : UTF16 decode when HX_SMART_STRINGS is not defined")); +#endif } char32_t cpp::encoding::Utf16::codepoint(const cpp::marshal::View& buffer) diff --git a/src/cpp/encoding/Utf8.cpp b/src/cpp/encoding/Utf8.cpp index a858c3a7e..b6b7c4eb5 100644 --- a/src/cpp/encoding/Utf8.cpp +++ b/src/cpp/encoding/Utf8.cpp @@ -209,6 +209,7 @@ String cpp::encoding::Utf8::decode(const cpp::marshal::View& buffer) return Ascii::decode(buffer); } +#if defined(HX_SMART_STRINGS) auto chars = int64_t{ 0 }; auto i = int64_t{ 0 }; @@ -232,8 +233,15 @@ String cpp::encoding::Utf8::decode(const cpp::marshal::View& buffer) i += getByteCount(p); k += Utf16::encode(p, output.slice(k)); } - + return String(backing.ptr.ptr, chars); +#else + auto backing = View(hx::InternalNew(buffer.length, false), buffer.length); + + std::memcpy(backing.ptr.ptr, buffer.ptr.ptr, buffer.length); + + return String(backing.ptr.ptr, static_cast(buffer.length)); +#endif } char32_t cpp::encoding::Utf8::codepoint(const cpp::marshal::View& buffer) From 0ce64e22c84420b9cadf7d4c4fb2fa1bdfeff97e Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sun, 28 Dec 2025 10:36:37 +0000 Subject: [PATCH 27/32] Revert some version guards --- include/hx/StdLibs.h | 4 - include/hxString.h | 4 +- src/String.cpp | 599 ++++++++++++++++++++++--------------------- 3 files changed, 302 insertions(+), 305 deletions(-) diff --git a/include/hx/StdLibs.h b/include/hx/StdLibs.h index 688ba27e2..b202fa7a9 100644 --- a/include/hx/StdLibs.h +++ b/include/hx/StdLibs.h @@ -141,9 +141,6 @@ HXCPP_EXTERN_CLASS_ATTRIBUTES String __hxcpp_get_kind(Dynamic inObject); // --- haxe.io.BytesData ---------------------------------------------------------------- -// These functions are no longer used in Haxe 5 in favour of the classes in cpp/encoding/ -#if (HXCPP_API_LEVEL<500) - HXCPP_EXTERN_CLASS_ATTRIBUTES void __hxcpp_bytes_of_string(Array &outBytes,const String &inString); HXCPP_EXTERN_CLASS_ATTRIBUTES void __hxcpp_string_of_bytes(Array &inBytes,String &outString,int pos,int len,bool inCopyPointer=false); // UTF8 processing @@ -152,7 +149,6 @@ HXCPP_EXTERN_CLASS_ATTRIBUTES Array __hxcpp_utf8_string_to_char_array(Strin HXCPP_EXTERN_CLASS_ATTRIBUTES String __hxcpp_char_bytes_to_utf8_string(String &inBytes); HXCPP_EXTERN_CLASS_ATTRIBUTES String __hxcpp_utf8_string_to_char_bytes(String &inUTF8); -#endif #ifdef HXCPP_GC_GENERATIONAL diff --git a/include/hxString.h b/include/hxString.h index a8f16cee8..9e82f2469 100644 --- a/include/hxString.h +++ b/include/hxString.h @@ -148,10 +148,10 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES String ::String toString() { return *this; } -#if (HXCPP_API_LEVEL<500) + ::String __URLEncode() const; ::String __URLDecode() const; -#else + ::String toUpperCase() const; diff --git a/src/String.cpp b/src/String.cpp index 637c6bc8e..fedef3873 100644 --- a/src/String.cpp +++ b/src/String.cpp @@ -520,6 +520,130 @@ inline String TCopyString(const T *inString,int inLength) #endif } + +String __hxcpp_char_array_to_utf8_string(Array &inChars,int inFirst, int inLen) +{ + int len = inChars->length; + if (inFirst<0) + inFirst = 0; + if (inLen<0) inLen = len; + if (inFirst+inLen>len) + inLen = len-inFirst; + if (inLen<=0) + return String::emptyString; + + int *base = &inChars[0]; + #ifdef HX_SMART_STRINGS + bool hasBig = false; + for(int i=0;i127) + { + hasBig = true; + break; + } + + if (hasBig) + { + char16_t *ptr = String::allocChar16Ptr(inLen); + for(int i=0;i __hxcpp_utf8_string_to_char_array(String &inString) +{ + #ifdef HX_SMART_STRINGS + Array result = Array_obj::__new(inString.length); + if (inString.isUTF16Encoded()) + { + const char16_t *ptr = inString.wc_str(); + for(int i=0;i result = Array_obj::__new(0,inString.length); + + const unsigned char *src = (const unsigned char *)inString.__s; + const unsigned char *end = src + inString.length; + while(srcpush(DecodeAdvanceUTF8(src)); + + if (src!=end) + hx::Throw(HX_CSTRING("Invalid UTF8")); + #endif + + return result; +} + + +String __hxcpp_char_bytes_to_utf8_string(String &inBytes) +{ + #ifdef HX_SMART_STRINGS + // This does not really make much sense + return inBytes; + #else + int len = inBytes.length; + char *result = TConvertToUTF8((unsigned char *)inBytes.__s,&len,0,true); + return String(result,len); + #endif +} + + +String __hxcpp_utf8_string_to_char_bytes(String &inUTF8) +{ + #ifdef HX_SMART_STRINGS + // This does not really make much sense + return inUTF8; + #else + const unsigned char *src = (unsigned char *)inUTF8.__s; + const unsigned char *end = src + inUTF8.length; + int char_count = 0; + while(src 255 ) + hx::Throw(HX_CSTRING("Utf8::decode invalid character")); + } + + if (src!=end) + hx::Throw(HX_CSTRING("Invalid UTF8")); + + char *result = hx::NewString(char_count); + + src = (unsigned char *)inUTF8.__s; + char_count = 0; + while(src bytes(0,length); + // utf8-encode + __hxcpp_bytes_of_string(bytes,*this); + + int extra = 0; + int utf8_chars = bytes->__length(); + for(int i=0;i>4 ]; + *ptr++ = hex[ b & 0x0f ]; + } + else + *ptr++ = bytes[i]; + } + return String(result,l); +} + String String::toUpperCase() const { #ifdef HX_SMART_STRINGS @@ -952,6 +1110,73 @@ static int hex(int inChar) return 0; } + +String String::__URLDecode() const +{ + // Create the decoded string; the decoded form might have fewer than + // [length] characters, but it won't have more. If it has fewer than + // [length], some memory will be wasted here, but on the assumption that + // most URLs have only a few '%NN' encodings in them, don't bother + // counting the number of characters in the resulting string first. + char *decoded = NewString(length), *d = decoded; + + bool hasBig = false; + + #ifdef HX_SMART_STRINGS + if (isUTF16Encoded()) + { + for (int i = 0; i < length; i++) + { + int c = __w[i]; + if (c > 127) + *d++ = '?'; + else if (c == '+') + *d++ = ' '; + else if ((c == '%') && (i < (length - 2))) + { + int ch = ((hex(__w[i + 1]) << 4) | (hex(__w[i + 2]))); + if (ch>127) + hasBig = true; + *d++ = ch; + i += 2; + } + else + *d++ = c; + } + } + else + #endif + { + for (int i = 0; i < length; i++) + { + int c = __s[i]; + if (c > 127) + *d++ = '?'; + else if (c == '+') + *d++ = ' '; + else if ((c == '%') && (i < (length - 2))) + { + int ch = ((hex(__s[i + 1]) << 4) | (hex(__s[i + 2]))); + #ifdef HX_SMART_STRINGS + if (ch>127) + hasBig = true; + #endif + *d++ = ch; + i += 2; + } + else + *d++ = c; + } + } + + #ifdef HX_SMART_STRINGS + if (hasBig) + return _hx_utf8_to_utf16((const unsigned char *)decoded, d-decoded,false); + #endif + + return String( decoded, (d - decoded) ); +} + ::String &::String::dup() { const char *s = __s; @@ -1253,6 +1478,50 @@ String String::charAt( int at ) const return fromCharCode(__s[at]); } +void __hxcpp_bytes_of_string(Array &outBytes,const String &inString) +{ + if (!inString.length) + return; + + #ifdef HX_SMART_STRINGS + if (inString.isUTF16Encoded()) + { + const char16_t *src = inString.raw_wptr(); + const char16_t *end = src + inString.length; + while(srcpush(c); + else if( c <= 0x7FF ) + { + outBytes->push( 0xC0 | (c >> 6) ); + outBytes->push( 0x80 | (c & 63) ); + } + else if( c <= 0xFFFF ) + { + outBytes->push( 0xE0 | (c >> 12) ); + outBytes->push( 0x80 | ((c >> 6) & 63) ); + outBytes->push( 0x80 | (c & 63) ); + } + else + { + outBytes->push( 0xF0 | (c >> 18) ); + outBytes->push( 0x80 | ((c >> 12) & 63) ); + outBytes->push( 0x80 | ((c >> 6) & 63) ); + outBytes->push( 0x80 | (c & 63) ); + } + } + } + else + #endif + { + outBytes->__SetSize(inString.length); + memcpy(outBytes->GetBase(), inString.raw_ptr(),inString.length); + } +} + #ifdef HX_SMART_STRINGS String _hx_utf8_to_utf16(const unsigned char *ptr, int inUtf8Len, bool addHash) { @@ -1298,6 +1567,37 @@ String _hx_utf8_to_utf16(const unsigned char *ptr, int inUtf8Len, bool addHash) } #endif + +void __hxcpp_string_of_bytes(Array &inBytes,String &outString,int pos,int len,bool inCopyPointer) +{ + if (inCopyPointer) + outString = String( (const char *)inBytes->GetBase(), len); + else if (len==0) + outString = String::emptyString; + else + { + const unsigned char *p0 = (const unsigned char *)inBytes->GetBase(); +#ifdef HX_SMART_STRINGS + bool hasWChar = false; + const unsigned char *p = p0 + pos; + for(int i=0;i127) + { + hasWChar = true; + break; + } + if (hasWChar) + { + outString = _hx_utf8_to_utf16(p0+pos,len,true); + } + else +#endif + outString = String( GCStringDup((const char *)p0+pos, len, 0), len); + } +} + + + const char * String::utf8_str(hx::IStringAlloc *inBuffer,bool throwInvalid, int *byteLength) const { #ifdef HX_SMART_STRINGS @@ -2256,302 +2556,3 @@ void String::__boot() &CreateEmptyString, &CreateString, 0, 0, 0 ); } - -// -------- HAXE < 5 FUNCTIONS ---------------------------------------- - -#if (HXCPP_API_LEVEL<500) - -String String::__URLEncode() const -{ - Array bytes(0, length); - // utf8-encode - __hxcpp_bytes_of_string(bytes, *this); - - int extra = 0; - int utf8_chars = bytes->__length(); - for (int i = 0; i < utf8_chars; i++) - if (!safeChars[bytes[i]]) - extra++; - if (extra == 0) - return *this; - - int l = utf8_chars + extra * 2; - char* result = hx::NewString(l); - char* ptr = result; - - for (int i = 0; i < utf8_chars; i++) - { - if (!safeChars[bytes[i]]) - { - static char hex[] = "0123456789ABCDEF"; - unsigned char b = bytes[i]; - *ptr++ = '%'; - *ptr++ = hex[b >> 4]; - *ptr++ = hex[b & 0x0f]; - } - else - *ptr++ = bytes[i]; - } - return String(result, l); -} - -String String::__URLDecode() const -{ - // Create the decoded string; the decoded form might have fewer than - // [length] characters, but it won't have more. If it has fewer than - // [length], some memory will be wasted here, but on the assumption that - // most URLs have only a few '%NN' encodings in them, don't bother - // counting the number of characters in the resulting string first. - char* decoded = NewString(length), * d = decoded; - - bool hasBig = false; - -#ifdef HX_SMART_STRINGS - if (isUTF16Encoded()) - { - for (int i = 0; i < length; i++) - { - int c = __w[i]; - if (c > 127) - *d++ = '?'; - else if (c == '+') - *d++ = ' '; - else if ((c == '%') && (i < (length - 2))) - { - int ch = ((hex(__w[i + 1]) << 4) | (hex(__w[i + 2]))); - if (ch > 127) - hasBig = true; - *d++ = ch; - i += 2; - } - else - *d++ = c; - } - } - else -#endif - { - for (int i = 0; i < length; i++) - { - int c = __s[i]; - if (c > 127) - *d++ = '?'; - else if (c == '+') - *d++ = ' '; - else if ((c == '%') && (i < (length - 2))) - { - int ch = ((hex(__s[i + 1]) << 4) | (hex(__s[i + 2]))); -#ifdef HX_SMART_STRINGS - if (ch > 127) - hasBig = true; -#endif - * d++ = ch; - i += 2; - } - else - *d++ = c; - } - } - -#ifdef HX_SMART_STRINGS - if (hasBig) - return _hx_utf8_to_utf16((const unsigned char*)decoded, d - decoded, false); -#endif - - return String(decoded, (d - decoded)); -} - -void __hxcpp_bytes_of_string(Array& outBytes, const String& inString) -{ - if (!inString.length) - return; - -#ifdef HX_SMART_STRINGS - if (inString.isUTF16Encoded()) - { - const char16_t* src = inString.raw_wptr(); - const char16_t* end = src + inString.length; - while (src < end) - { - int c = Char16Advance(src); - - if (c <= 0x7F) - outBytes->push(c); - else if (c <= 0x7FF) - { - outBytes->push(0xC0 | (c >> 6)); - outBytes->push(0x80 | (c & 63)); - } - else if (c <= 0xFFFF) - { - outBytes->push(0xE0 | (c >> 12)); - outBytes->push(0x80 | ((c >> 6) & 63)); - outBytes->push(0x80 | (c & 63)); - } - else - { - outBytes->push(0xF0 | (c >> 18)); - outBytes->push(0x80 | ((c >> 12) & 63)); - outBytes->push(0x80 | ((c >> 6) & 63)); - outBytes->push(0x80 | (c & 63)); - } - } - } - else -#endif - { - outBytes->__SetSize(inString.length); - memcpy(outBytes->GetBase(), inString.raw_ptr(), inString.length); - } -} - -void __hxcpp_string_of_bytes(Array& inBytes, String& outString, int pos, int len, bool inCopyPointer) -{ - if (inCopyPointer) - outString = String((const char*)inBytes->GetBase(), len); - else if (len == 0) - outString = String::emptyString; - else - { - const unsigned char* p0 = (const unsigned char*)inBytes->GetBase(); -#ifdef HX_SMART_STRINGS - bool hasWChar = false; - const unsigned char* p = p0 + pos; - for (int i = 0; i < len; i++) - if (p[i] > 127) - { - hasWChar = true; - break; - } - if (hasWChar) - { - outString = _hx_utf8_to_utf16(p0 + pos, len, true); - } - else -#endif - outString = String(GCStringDup((const char*)p0 + pos, len, 0), len); - } -} - -String __hxcpp_char_array_to_utf8_string(Array& inChars, int inFirst, int inLen) -{ - int len = inChars->length; - if (inFirst < 0) - inFirst = 0; - if (inLen < 0) inLen = len; - if (inFirst + inLen > len) - inLen = len - inFirst; - if (inLen <= 0) - return String::emptyString; - - int* base = &inChars[0]; -#ifdef HX_SMART_STRINGS - bool hasBig = false; - for (int i = 0; i < inLen; i++) - if (base[i + inFirst] > 127) - { - hasBig = true; - break; - } - - if (hasBig) - { - char16_t* ptr = String::allocChar16Ptr(inLen); - for (int i = 0; i < inLen; i++) - ptr[i] = base[i + inFirst]; - return String(ptr, inLen); - } -#endif - char* result = TConvertToUTF8(base + inFirst, &len, 0, true); - return String(result, len); -} - -Array __hxcpp_utf8_string_to_char_array(String& inString) -{ -#ifdef HX_SMART_STRINGS - Array result = Array_obj::__new(inString.length); - if (inString.isUTF16Encoded()) - { - const char16_t* ptr = inString.wc_str(); - for (int i = 0; i < inString.length; i++) - result[i] = ptr[i]; - } - else - { - const char* ptr = inString.raw_ptr(); - for (int i = 0; i < inString.length; i++) - result[i] = ptr[i]; - } -#else - Array result = Array_obj::__new(0, inString.length); - - const unsigned char* src = (const unsigned char*)inString.__s; - const unsigned char* end = src + inString.length; - while (src < end) - result->push(DecodeAdvanceUTF8(src)); - - if (src != end) - hx::Throw(HX_CSTRING("Invalid UTF8")); -#endif - - return result; -} - - -String __hxcpp_char_bytes_to_utf8_string(String& inBytes) -{ -#ifdef HX_SMART_STRINGS - // This does not really make much sense - return inBytes; -#else - int len = inBytes.length; - char* result = TConvertToUTF8((unsigned char*)inBytes.__s, &len, 0, true); - return String(result, len); -#endif -} - -String __hxcpp_utf8_string_to_char_bytes(String& inUTF8) -{ -#ifdef HX_SMART_STRINGS - // This does not really make much sense - return inUTF8; -#else - const unsigned char* src = (unsigned char*)inUTF8.__s; - const unsigned char* end = src + inUTF8.length; - int char_count = 0; - while (src < end) - { - int c = DecodeAdvanceUTF8(src, end); - char_count++; - if (c == 8364) // euro symbol - c = 164; - else if (c == 0xFEFF) // BOM - { - char_count--; - } - else if (c > 255) - hx::Throw(HX_CSTRING("Utf8::decode invalid character")); - } - - if (src != end) - hx::Throw(HX_CSTRING("Invalid UTF8")); - - char* result = hx::NewString(char_count); - - src = (unsigned char*)inUTF8.__s; - char_count = 0; - while (src < end) - { - int c = DecodeAdvanceUTF8(src); - if (c == 8364) // euro symbol - c = 164; - if (c != 0xFEFF) // BOM - result[char_count++] = c; - } - - result[char_count] = '\0'; - return String(result, char_count); -#endif -} - -#endif \ No newline at end of file From e7d9ba41431ca71bcef3b6aaaf78efdaeea12072 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sun, 28 Dec 2025 10:50:31 +0000 Subject: [PATCH 28/32] Add pointer check define --- test/native/compile.hxml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/native/compile.hxml b/test/native/compile.hxml index 825082805..063a21e4f 100644 --- a/test/native/compile.hxml +++ b/test/native/compile.hxml @@ -1,3 +1,4 @@ -m Native -L utest +-D HXCPP_CHECK_POINTER --cpp bin \ No newline at end of file From e50dab9c5bbcc9f34c58820ad3cabae7909c67bc Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sun, 28 Dec 2025 12:24:27 +0000 Subject: [PATCH 29/32] I don't understand why this is needed No changes were made in this area, dev haxe + hxcpp also fails to build that test locally for me --- include/cpp/Pointer.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/cpp/Pointer.h b/include/cpp/Pointer.h index 48aaa50f7..117f0e8fe 100644 --- a/include/cpp/Pointer.h +++ b/include/cpp/Pointer.h @@ -350,6 +350,8 @@ class Pointer template inline void set_ref(O val) { } operator Dynamic () const { return CreateDynamicPointer(ptr); } + template + operator Pointer() const { return Pointer(static_cast(ptr)); } //operator hx::Val () const { return CreateDynamicPointer((void *)ptr); } operator void * () { return ptr; } void * get_raw() { return ptr; } From d31ea3aa9ccb0f6481b82c6a577946a2a453ff57 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Sun, 28 Dec 2025 13:11:28 +0000 Subject: [PATCH 30/32] remove addition since there must be something else weird going on --- include/cpp/Pointer.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/cpp/Pointer.h b/include/cpp/Pointer.h index 117f0e8fe..48aaa50f7 100644 --- a/include/cpp/Pointer.h +++ b/include/cpp/Pointer.h @@ -350,8 +350,6 @@ class Pointer template inline void set_ref(O val) { } operator Dynamic () const { return CreateDynamicPointer(ptr); } - template - operator Pointer() const { return Pointer(static_cast(ptr)); } //operator hx::Val () const { return CreateDynamicPointer((void *)ptr); } operator void * () { return ptr; } void * get_raw() { return ptr; } From d13d5a890a1cb286134cc25bec019d670cdfd484 Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Fri, 2 Jan 2026 00:00:49 +0000 Subject: [PATCH 31/32] Could it really be this? --- test/native/compile.hxml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/native/compile.hxml b/test/native/compile.hxml index 063a21e4f..a063c9266 100644 --- a/test/native/compile.hxml +++ b/test/native/compile.hxml @@ -1,4 +1,4 @@ -m Native -L utest --D HXCPP_CHECK_POINTER +-D HXCPP-DEBUGGER --cpp bin \ No newline at end of file From 8a7c915c65e55de27b2390a49ae6665a0d847f9e Mon Sep 17 00:00:00 2001 From: Aidan Lee Date: Fri, 2 Jan 2026 00:19:55 +0000 Subject: [PATCH 32/32] Dynamic char32_t support --- include/Array.h | 2 ++ include/Dynamic.h | 8 ++++++++ include/cpp/Variant.h | 6 ++++++ src/Dynamic.cpp | 5 +++++ 4 files changed, 21 insertions(+) diff --git a/include/Array.h b/include/Array.h index e95c433de..2c3f2d29e 100644 --- a/include/Array.h +++ b/include/Array.h @@ -40,6 +40,7 @@ template<> struct ReturnNull { typedef Dynamic type; }; template<> struct ReturnNull { typedef Dynamic type; }; template<> struct ReturnNull { typedef Dynamic type; }; template<> struct ReturnNull { typedef Dynamic type; }; +template<> struct ReturnNull { typedef Dynamic type; }; template<> struct ReturnNull { typedef Dynamic type; }; template<> struct ReturnNull { typedef Dynamic type; }; template<> struct ReturnNull { typedef Dynamic type; }; @@ -433,6 +434,7 @@ template<> struct ArrayClassId { enum { id=hx::clsIdArrayByte }; }; template<> struct ArrayClassId { enum { id=hx::clsIdArrayShort }; }; template<> struct ArrayClassId { enum { id=hx::clsIdArrayShort }; }; template<> struct ArrayClassId { enum { id = hx::clsIdArrayShort }; }; +template<> struct ArrayClassId { enum { id = hx::clsIdArrayInt }; }; template<> struct ArrayClassId { enum { id=hx::clsIdArrayInt }; }; template<> struct ArrayClassId { enum { id=hx::clsIdArrayInt }; }; template<> struct ArrayClassId { enum { id=hx::clsIdArrayFloat32 }; }; diff --git a/include/Dynamic.h b/include/Dynamic.h index 1d655f47b..efd6f1d8a 100644 --- a/include/Dynamic.h +++ b/include/Dynamic.h @@ -25,6 +25,7 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES Dynamic : public hx::ObjectPtr Dynamic(unsigned char inVal); Dynamic(signed char inVal); Dynamic(char16_t inVal); + Dynamic(char32_t inVal); Dynamic(const cpp::CppInt32__ &inVal); Dynamic(bool inVal); Dynamic(double inVal); @@ -73,6 +74,7 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES Dynamic : public hx::ObjectPtr inline operator char () const { return mPtr ? mPtr->__ToInt() : 0; } inline operator signed char () const { return mPtr ? mPtr->__ToInt() : 0; } inline operator char16_t () const { return mPtr ? mPtr->__ToInt() : 0; } + inline operator char32_t () const { return mPtr ? mPtr->__ToInt() : 0; } inline operator bool() const { return mPtr && mPtr->__ToInt(); } inline operator cpp::Int64() const { return mPtr ? mPtr->__ToInt64() : 0; } inline operator cpp::UInt64() const { return mPtr ? mPtr->__ToInt64() : 0; } @@ -174,6 +176,7 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES Dynamic : public hx::ObjectPtr bool operator op (signed char inRHS) const { return IsNumeric() && ((double)(*this) op (double)inRHS); } \ bool operator op (unsigned char inRHS) const { return IsNumeric() && ((double)(*this) op (double)inRHS); } \ bool operator op (char16_t inRHS) const { return IsNumeric() && ((double)(*this) op (double)inRHS); } \ + bool operator op (char32_t inRHS) const { return IsNumeric() && ((double)(*this) op (double)inRHS); } \ bool operator op (bool inRHS) const { return IsBool() && ((double)(*this) op (double)inRHS); } \ bool operator != (const String &inRHS) const { return !mPtr || ((String)(*this) != inRHS); } @@ -188,6 +191,7 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES Dynamic : public hx::ObjectPtr bool operator != (signed char inRHS) const { return !IsNumeric() || ((double)(*this) != (double)inRHS); } bool operator != (unsigned char inRHS) const { return !IsNumeric() || ((double)(*this) != (double)inRHS); } bool operator != (char16_t inRHS) const { return !IsNumeric() || ((double)(*this) != (double)inRHS); } + bool operator != (char32_t inRHS) const { return !IsNumeric() || ((double)(*this) != (double)inRHS); } bool operator != (bool inRHS) const { return !IsBool() || ((double)(*this) != (double)inRHS); } @@ -234,6 +238,7 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES Dynamic : public hx::ObjectPtr Dynamic operator+(const signed char &i) const; Dynamic operator+(const unsigned char &i) const; Dynamic operator+(const char16_t& i) const; + Dynamic operator+(const char32_t& i) const; Dynamic operator+(const double &d) const; Dynamic operator+(const float &d) const; Dynamic operator+(const cpp::Variant &d) const; @@ -277,6 +282,8 @@ class HXCPP_EXTERN_CLASS_ATTRIBUTES Dynamic : public hx::ObjectPtr { return mPtr->__GetType()==vtInt ? Dynamic((int)(*this) op inRHS) : Dynamic((double)(*this) op inRHS); } \ Dynamic operator op (const char16_t &inRHS) const \ { return mPtr->__GetType()==vtInt ? Dynamic((int)(*this) op inRHS) : Dynamic((double)(*this) op inRHS); } \ + Dynamic operator op (const char32_t &inRHS) const \ + { return mPtr->__GetType()==vtInt ? Dynamic((int)(*this) op inRHS) : Dynamic((double)(*this) op inRHS); } \ Dynamic operator op (const cpp::Int64 &inRHS) const \ { return Dynamic((double)(*this) op inRHS); } \ Dynamic operator op (const cpp::UInt64 &inRHS) const \ @@ -465,6 +472,7 @@ COMPARE_DYNAMIC_OP( > ) inline double operator op (const signed char &inLHS,const Dynamic &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const unsigned char &inLHS,const Dynamic &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const char16_t &inLHS,const Dynamic &inRHS) { return inLHS op (double)inRHS; } \ + inline double operator op (const char32_t &inLHS,const Dynamic &inRHS) { return inLHS op (double)inRHS; } \ ARITH_DYNAMIC( - ) ARITH_DYNAMIC( + ) diff --git a/include/cpp/Variant.h b/include/cpp/Variant.h index 7197927a7..e4f83e9eb 100644 --- a/include/cpp/Variant.h +++ b/include/cpp/Variant.h @@ -110,6 +110,7 @@ namespace cpp inline operator char () const { return asInt(); } inline operator signed char () const { return asInt(); } inline operator char16_t() const { return asInt(); } + inline operator char32_t() const { return asInt(); } inline operator cpp::Int64 () const { return asInt64(); } inline operator cpp::UInt64 () const { return asInt64(); } inline bool operator !() const { return !asInt(); } @@ -208,6 +209,7 @@ namespace cpp inline bool operator op (signed char inRHS) const { return isNumeric() && (asDouble() op (double)inRHS); } \ inline bool operator op (unsigned char inRHS) const { return isNumeric() && (asDouble() op (double)inRHS); } \ inline bool operator op (char16_t inRHS) const { return isNumeric() && (asDouble() op (double)inRHS); } \ + inline bool operator op (char32_t inRHS) const { return isNumeric() && (asDouble() op (double)inRHS); } \ inline bool operator op (bool inRHS) const { return isBool() && (asDouble() op (double)inRHS); } \ inline bool operator op (const Dynamic &inRHS) const { return Compare(inRHS) op 0; } \ @@ -281,6 +283,7 @@ namespace cpp inline double operator op (const signed char &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const unsigned char &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const char16_t &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ + inline double operator op (const char32_t &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const signed short &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const unsigned short &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ inline double operator op (const cpp::Int64 &inLHS,const cpp::Variant &inRHS) { return inLHS op (double)inRHS; } \ @@ -608,6 +611,7 @@ HX_VARIANT_OP_ISEQ(unsigned short) HX_VARIANT_OP_ISEQ(signed char) HX_VARIANT_OP_ISEQ(unsigned char) HX_VARIANT_OP_ISEQ(char16_t) +HX_VARIANT_OP_ISEQ(char32_t) HX_VARIANT_OP_ISEQ(bool) inline bool operator < (bool inLHS,const cpp::Variant &inRHS) { return false; } @@ -639,6 +643,8 @@ inline bool operator > (bool inLHS,const cpp::Variant &inRHS) { return false; } { return inRHS.isNumeric() && (inLHS op (double)inRHS); } \ inline bool operator op (char16_t inLHS,const ::cpp::Variant &inRHS) \ { return inRHS.isNumeric() && (inLHS op (double)inRHS); } \ + inline bool operator op (char32_t inLHS,const ::cpp::Variant &inRHS) \ + { return inRHS.isNumeric() && (inLHS op (double)inRHS); } \ inline bool operator op (const null &,const ::cpp::Variant &inRHS) \ { return false; } \ diff --git a/src/Dynamic.cpp b/src/Dynamic.cpp index daf7eae5a..1e3631b8f 100644 --- a/src/Dynamic.cpp +++ b/src/Dynamic.cpp @@ -402,6 +402,11 @@ Dynamic::Dynamic(char16_t inVal) mPtr = fromInt(inVal); } +Dynamic::Dynamic(char32_t inVal) +{ + mPtr = fromInt(inVal); +} + Dynamic::Dynamic(double inVal) { if ( (int)inVal==inVal && inVal>=-1 && inVal<256 )