Skip to content

Commit 90eab32

Browse files
committed
Add utf16ToUtf8() helper, instead of trying to use TextEncoder
1 parent 7f1f5a8 commit 90eab32

File tree

1 file changed

+36
-4
lines changed

1 file changed

+36
-4
lines changed

src/builtins.ts

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ import {
118118
isPowerOf2
119119
} from "./util";
120120

121-
// Use the built-in `TextEncoder` for UTF-8 conversion
122-
declare let TextEncoder: any;
123-
124121
/** Internal names of various compiler built-ins. */
125122
export namespace BuiltinNames {
126123

@@ -3495,6 +3492,41 @@ function builtin_memory_data(ctx: BuiltinFunctionContext): ExpressionRef {
34953492
}
34963493
builtinFunctions.set(BuiltinNames.memory_data, builtin_memory_data);
34973494

3495+
function utf16ToUtf8(str: string) : Uint8Array {
3496+
let result = new Uint8Array(str.length*3); // max possible length is 1.5x the UTF16 encoding
3497+
let utf8Length: i32 = 0; // track actual encoded length
3498+
for (let i: i32 = 0; i < str.length; ++i) {
3499+
// UTF16 decode
3500+
let codePoint: u32 = str.charCodeAt(i);
3501+
if (codePoint >= 0xD800 && codePoint < 0xDC00) {
3502+
// could be the first half of a surrogate pair (if)
3503+
let codePoint2: u32 = str.charCodeAt(i + 1);
3504+
if (i + 1 < str.length && codePoint2 >= 0xDC00 && codePoint2 < 0xE000) {
3505+
// valid surrogate pair - combine to get the code-point
3506+
codePoint = ((codePoint&0x3FF)<<10) + (codePoint2&0x3FF) + 0x10000;
3507+
++i;
3508+
}
3509+
}
3510+
// UTF8 encode
3511+
if (codePoint < 0x0080) {
3512+
result[utf8Length++] = codePoint;
3513+
} else if (codePoint < 0x0800) {
3514+
result[utf8Length++] = 0xC0 + (codePoint>>6);
3515+
result[utf8Length++] = 0x80 + (codePoint&0x3F);
3516+
} else if (codePoint < 0x10000) {
3517+
result[utf8Length++] = 0xE0 + ((codePoint>>12)&0x0F);
3518+
result[utf8Length++] = 0x80 + ((codePoint>>6)&0x3F);
3519+
result[utf8Length++] = 0x80 + (codePoint&0x3F);
3520+
} else {
3521+
result[utf8Length++] = 0xF0 + ((codePoint>>18)&0x07);
3522+
result[utf8Length++] = 0x80 + ((codePoint>>12)&0x3F);
3523+
result[utf8Length++] = 0x80 + ((codePoint>>6)&0x3F);
3524+
result[utf8Length++] = 0x80 + (codePoint&0x3F);
3525+
}
3526+
}
3527+
return result.subarray(0, utf8Length);
3528+
}
3529+
34983530
// memory.dataUTF8(value) -> usize
34993531
function builtin_memory_dataUTF8(ctx: BuiltinFunctionContext): ExpressionRef {
35003532
let compiler = ctx.compiler;
@@ -3515,7 +3547,7 @@ function builtin_memory_dataUTF8(ctx: BuiltinFunctionContext): ExpressionRef {
35153547
return module.unreachable();
35163548
}
35173549
let str = (<StringLiteralExpression>arg0).value;
3518-
let array : Uint8Array = new TextEncoder('utf8').encode(str);
3550+
let array : Uint8Array = utf16ToUtf8(str);
35193551
let arrayNullTerminated = new Uint8Array(array.length + 1);
35203552
arrayNullTerminated.set(array);
35213553
offset = compiler.addAlignedMemorySegment(arrayNullTerminated, 1).offset;

0 commit comments

Comments
 (0)