@@ -118,9 +118,6 @@ import {
118118 isPowerOf2
119119} from "./util" ;
120120
121- // Use the built-in `TextEncoder` for UTF-8 conversion
122- declare let TextEncoder : any ;
123-
124121/** Internal names of various compiler built-ins. */
125122export namespace BuiltinNames {
126123
@@ -3495,6 +3492,41 @@ function builtin_memory_data(ctx: BuiltinFunctionContext): ExpressionRef {
34953492}
34963493builtinFunctions . set ( BuiltinNames . memory_data , builtin_memory_data ) ;
34973494
3495+ function utf16ToUtf8 ( str : string ) : Uint8Array {
3496+ let result = new Uint8Array ( str . length * 3 ) ; // max possible length is 1.5x the UTF16 encoding
3497+ let utf8Length : i32 = 0 ; // track actual encoded length
3498+ for ( let i : i32 = 0 ; i < str . length ; ++ i ) {
3499+ // UTF16 decode
3500+ let codePoint : u32 = str . charCodeAt ( i ) ;
3501+ if ( codePoint >= 0xD800 && codePoint < 0xDC00 ) {
3502+ // could be the first half of a surrogate pair (if)
3503+ let codePoint2 : u32 = str . charCodeAt ( i + 1 ) ;
3504+ if ( i + 1 < str . length && codePoint2 >= 0xDC00 && codePoint2 < 0xE000 ) {
3505+ // valid surrogate pair - combine to get the code-point
3506+ codePoint = ( ( codePoint & 0x3FF ) << 10 ) + ( codePoint2 & 0x3FF ) + 0x10000 ;
3507+ ++ i ;
3508+ }
3509+ }
3510+ // UTF8 encode
3511+ if ( codePoint < 0x0080 ) {
3512+ result [ utf8Length ++ ] = codePoint ;
3513+ } else if ( codePoint < 0x0800 ) {
3514+ result [ utf8Length ++ ] = 0xC0 + ( codePoint >> 6 ) ;
3515+ result [ utf8Length ++ ] = 0x80 + ( codePoint & 0x3F ) ;
3516+ } else if ( codePoint < 0x10000 ) {
3517+ result [ utf8Length ++ ] = 0xE0 + ( ( codePoint >> 12 ) & 0x0F ) ;
3518+ result [ utf8Length ++ ] = 0x80 + ( ( codePoint >> 6 ) & 0x3F ) ;
3519+ result [ utf8Length ++ ] = 0x80 + ( codePoint & 0x3F ) ;
3520+ } else {
3521+ result [ utf8Length ++ ] = 0xF0 + ( ( codePoint >> 18 ) & 0x07 ) ;
3522+ result [ utf8Length ++ ] = 0x80 + ( ( codePoint >> 12 ) & 0x3F ) ;
3523+ result [ utf8Length ++ ] = 0x80 + ( ( codePoint >> 6 ) & 0x3F ) ;
3524+ result [ utf8Length ++ ] = 0x80 + ( codePoint & 0x3F ) ;
3525+ }
3526+ }
3527+ return result . subarray ( 0 , utf8Length ) ;
3528+ }
3529+
34983530// memory.dataUTF8(value) -> usize
34993531function builtin_memory_dataUTF8 ( ctx : BuiltinFunctionContext ) : ExpressionRef {
35003532 let compiler = ctx . compiler ;
@@ -3515,7 +3547,7 @@ function builtin_memory_dataUTF8(ctx: BuiltinFunctionContext): ExpressionRef {
35153547 return module . unreachable ( ) ;
35163548 }
35173549 let str = ( < StringLiteralExpression > arg0 ) . value ;
3518- let array : Uint8Array = new TextEncoder ( 'utf8' ) . encode ( str ) ;
3550+ let array : Uint8Array = utf16ToUtf8 ( str ) ;
35193551 let arrayNullTerminated = new Uint8Array ( array . length + 1 ) ;
35203552 arrayNullTerminated . set ( array ) ;
35213553 offset = compiler . addAlignedMemorySegment ( arrayNullTerminated , 1 ) . offset ;
0 commit comments