Skip to content

Commit 231750d

Browse files
committed
Add memory.dataUTF8() built-in function
1 parent 5a125c7 commit 231750d

File tree

6 files changed

+283
-3
lines changed

6 files changed

+283
-3
lines changed

src/builtins.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ import {
118118
isPowerOf2
119119
} from "./util";
120120

121+
// Use the built-in `TextEncoder` for UTF-8 conversion
122+
declare let TextEncoder: any;
123+
121124
/** Internal names of various compiler built-ins. */
122125
export namespace BuiltinNames {
123126

@@ -749,6 +752,7 @@ export namespace BuiltinNames {
749752
export const memory_copy = "~lib/memory/memory.copy";
750753
export const memory_fill = "~lib/memory/memory.fill";
751754
export const memory_data = "~lib/memory/memory.data";
755+
export const memory_dataUTF8 = "~lib/memory/memory.dataUTF8";
752756

753757
// std/typedarray.ts
754758
export const Int8Array = "~lib/typedarray/Int8Array";
@@ -3491,6 +3495,41 @@ function builtin_memory_data(ctx: BuiltinFunctionContext): ExpressionRef {
34913495
}
34923496
builtinFunctions.set(BuiltinNames.memory_data, builtin_memory_data);
34933497

3498+
// memory.dataUTF8(value) -> usize
3499+
function builtin_memory_dataUTF8(ctx: BuiltinFunctionContext): ExpressionRef {
3500+
let compiler = ctx.compiler;
3501+
let module = compiler.module;
3502+
if (
3503+
checkTypeAbsent(ctx) |
3504+
checkArgsRequired(ctx, 1)
3505+
) return module.unreachable();
3506+
let operands = ctx.operands;
3507+
let usizeType = compiler.options.usizeType;
3508+
let offset: i64;
3509+
let arg0 = operands[0];
3510+
if (!arg0.isLiteralKind(LiteralKind.String)) {
3511+
compiler.error(
3512+
DiagnosticCode.String_literal_expected,
3513+
arg0.range
3514+
);
3515+
return module.unreachable();
3516+
}
3517+
let str = (<StringLiteralExpression>arg0).value;
3518+
let array : Uint8Array = new TextEncoder('utf8').encode(str);
3519+
let arrayNullTerminated = new Uint8Array(array.length + 1);
3520+
arrayNullTerminated.set(array);
3521+
offset = compiler.addAlignedMemorySegment(arrayNullTerminated, 1).offset;
3522+
// FIXME: what if recompiles happen? recompiles are bad.
3523+
compiler.currentType = usizeType;
3524+
if (usizeType == Type.usize32) {
3525+
assert(!i64_high(offset));
3526+
return module.i32(i64_low(offset));
3527+
} else {
3528+
return module.i64(i64_low(offset), i64_high(offset));
3529+
}
3530+
}
3531+
builtinFunctions.set(BuiltinNames.memory_dataUTF8, builtin_memory_dataUTF8);
3532+
34943533
// === GC =====================================================================================
34953534

34963535
function builtin_i31_new(ctx: BuiltinFunctionContext): ExpressionRef {

std/assembly/index.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1798,6 +1798,8 @@ declare namespace memory {
17981798
export function data(size: i32, align?: i32): usize;
17991799
/** Gets a pointer to a pre-initialized static chunk of memory. Alignment defaults to the size of `T`. Arguments must be compile-time constants. */
18001800
export function data<T>(values: T[], align?: i32): usize;
1801+
/** Gets a pointer to a pre-initialized static chunk of memory containing null-terminated UTF8. Value must be a compile-time constant. */
1802+
export function dataUTF8(value: string): usize;
18011803

18021804
export namespace atomic {
18031805
/** Performs a wait operation on a 32-bit integer value in memory suspending this agent if the condition is met. */

std/assembly/memory.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ export namespace memory {
7676
// @ts-ignore: decorator
7777
@builtin
7878
export declare function data<T>(size: T, align?: i32): usize;
79+
80+
/** Gets a pointer to a null-terminated UTF8 constant in static memory. */
81+
// @ts-ignore: decorator
82+
@builtin
83+
export declare function dataUTF8(str : string): usize;
7984
}
8085

8186
// @ts-ignore: decorator

tests/compiler/memory.debug.wat

Lines changed: 125 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
(type $2 (func (param i32 i32 i32 i32)))
55
(import "env" "abort" (func $~lib/builtins/abort (param i32 i32 i32 i32)))
66
(global $memory/ptr (mut i32) (i32.const 80))
7-
(global $~lib/memory/__data_end i32 (i32.const 212))
8-
(global $~lib/memory/__stack_pointer (mut i32) (i32.const 32980))
9-
(global $~lib/memory/__heap_base i32 (i32.const 32980))
7+
(global $~lib/memory/__data_end i32 (i32.const 220))
8+
(global $~lib/memory/__stack_pointer (mut i32) (i32.const 32988))
9+
(global $~lib/memory/__heap_base i32 (i32.const 32988))
1010
(memory $0 1)
1111
(data $0 (i32.const 16) "\00\00\00\00")
1212
(data $1 (i32.const 28) ",\00\00\00\00\00\00\00\00\00\00\00\02\00\00\00\12\00\00\00m\00e\00m\00o\00r\00y\00.\00t\00s\00\00\00\00\00\00\00\00\00\00\00")
@@ -33,6 +33,8 @@
3333
(data $22 (i32.const 206) "\01")
3434
(data $23 (i32.const 207) "\01")
3535
(data $24 (i32.const 208) "\01")
36+
(data $25 (i32.const 209) ":)\00")
37+
(data $26 (i32.const 212) "\f0\9f\90\8c\00")
3638
(table $0 1 1 funcref)
3739
(elem $0 (i32.const 1))
3840
(export "memory" (memory $0))
@@ -475,6 +477,126 @@
475477
call $~lib/builtins/abort
476478
unreachable
477479
end
480+
i32.const 209
481+
global.set $memory/ptr
482+
global.get $memory/ptr
483+
i32.load8_u
484+
i32.const 58
485+
i32.eq
486+
i32.eqz
487+
if
488+
i32.const 0
489+
i32.const 48
490+
i32.const 66
491+
i32.const 1
492+
call $~lib/builtins/abort
493+
unreachable
494+
end
495+
global.get $memory/ptr
496+
i32.const 1
497+
i32.add
498+
i32.load8_u
499+
i32.const 41
500+
i32.eq
501+
i32.eqz
502+
if
503+
i32.const 0
504+
i32.const 48
505+
i32.const 67
506+
i32.const 1
507+
call $~lib/builtins/abort
508+
unreachable
509+
end
510+
global.get $memory/ptr
511+
i32.const 2
512+
i32.add
513+
i32.load8_u
514+
i32.const 0
515+
i32.eq
516+
i32.eqz
517+
if
518+
i32.const 0
519+
i32.const 48
520+
i32.const 68
521+
i32.const 1
522+
call $~lib/builtins/abort
523+
unreachable
524+
end
525+
i32.const 212
526+
global.set $memory/ptr
527+
global.get $memory/ptr
528+
i32.load8_u
529+
i32.const 240
530+
i32.eq
531+
i32.eqz
532+
if
533+
i32.const 0
534+
i32.const 48
535+
i32.const 71
536+
i32.const 1
537+
call $~lib/builtins/abort
538+
unreachable
539+
end
540+
global.get $memory/ptr
541+
i32.const 1
542+
i32.add
543+
i32.load8_u
544+
i32.const 159
545+
i32.eq
546+
i32.eqz
547+
if
548+
i32.const 0
549+
i32.const 48
550+
i32.const 72
551+
i32.const 1
552+
call $~lib/builtins/abort
553+
unreachable
554+
end
555+
global.get $memory/ptr
556+
i32.const 2
557+
i32.add
558+
i32.load8_u
559+
i32.const 144
560+
i32.eq
561+
i32.eqz
562+
if
563+
i32.const 0
564+
i32.const 48
565+
i32.const 73
566+
i32.const 1
567+
call $~lib/builtins/abort
568+
unreachable
569+
end
570+
global.get $memory/ptr
571+
i32.const 3
572+
i32.add
573+
i32.load8_u
574+
i32.const 140
575+
i32.eq
576+
i32.eqz
577+
if
578+
i32.const 0
579+
i32.const 48
580+
i32.const 74
581+
i32.const 1
582+
call $~lib/builtins/abort
583+
unreachable
584+
end
585+
global.get $memory/ptr
586+
i32.const 4
587+
i32.add
588+
i32.load8_u
589+
i32.const 0
590+
i32.eq
591+
i32.eqz
592+
if
593+
i32.const 0
594+
i32.const 48
595+
i32.const 75
596+
i32.const 1
597+
call $~lib/builtins/abort
598+
unreachable
599+
end
478600
)
479601
(func $~start
480602
call $start:memory

tests/compiler/memory.release.wat

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
(data $22 (i32.const 1214) "\01")
1515
(data $23 (i32.const 1215) "\01")
1616
(data $24 (i32.const 1216) "\01")
17+
(data $25 (i32.const 1217) ":)")
18+
(data $26 (i32.const 1220) "\f0\9f\90\8c")
1719
(export "memory" (memory $0))
1820
(start $~start)
1921
(func $start:memory
@@ -256,6 +258,102 @@
256258
global.set $memory/ptr
257259
i32.const 1215
258260
global.set $memory/ptr
261+
i32.const 1217
262+
global.set $memory/ptr
263+
i32.const 1217
264+
i32.load8_u
265+
i32.const 58
266+
i32.ne
267+
if
268+
i32.const 0
269+
i32.const 1056
270+
i32.const 66
271+
i32.const 1
272+
call $~lib/builtins/abort
273+
unreachable
274+
end
275+
global.get $memory/ptr
276+
i32.load8_u offset=1
277+
i32.const 41
278+
i32.ne
279+
if
280+
i32.const 0
281+
i32.const 1056
282+
i32.const 67
283+
i32.const 1
284+
call $~lib/builtins/abort
285+
unreachable
286+
end
287+
global.get $memory/ptr
288+
i32.load8_u offset=2
289+
if
290+
i32.const 0
291+
i32.const 1056
292+
i32.const 68
293+
i32.const 1
294+
call $~lib/builtins/abort
295+
unreachable
296+
end
297+
i32.const 1220
298+
global.set $memory/ptr
299+
i32.const 1220
300+
i32.load8_u
301+
i32.const 240
302+
i32.ne
303+
if
304+
i32.const 0
305+
i32.const 1056
306+
i32.const 71
307+
i32.const 1
308+
call $~lib/builtins/abort
309+
unreachable
310+
end
311+
global.get $memory/ptr
312+
i32.load8_u offset=1
313+
i32.const 159
314+
i32.ne
315+
if
316+
i32.const 0
317+
i32.const 1056
318+
i32.const 72
319+
i32.const 1
320+
call $~lib/builtins/abort
321+
unreachable
322+
end
323+
global.get $memory/ptr
324+
i32.load8_u offset=2
325+
i32.const 144
326+
i32.ne
327+
if
328+
i32.const 0
329+
i32.const 1056
330+
i32.const 73
331+
i32.const 1
332+
call $~lib/builtins/abort
333+
unreachable
334+
end
335+
global.get $memory/ptr
336+
i32.load8_u offset=3
337+
i32.const 140
338+
i32.ne
339+
if
340+
i32.const 0
341+
i32.const 1056
342+
i32.const 74
343+
i32.const 1
344+
call $~lib/builtins/abort
345+
unreachable
346+
end
347+
global.get $memory/ptr
348+
i32.load8_u offset=4
349+
if
350+
i32.const 0
351+
i32.const 1056
352+
i32.const 75
353+
i32.const 1
354+
call $~lib/builtins/abort
355+
unreachable
356+
end
259357
)
260358
(func $~start
261359
call $start:memory

tests/compiler/memory.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,17 @@ assert(ptr + 4 == (ptr = memory.data<u8>([1], 4)));
5959
assert(ptr + 2 == (ptr = memory.data<u8>([1], 2)));
6060
assert(ptr + 1 == (ptr = memory.data<u8>([1], 1)));
6161
assert(ptr + 1 == memory.data<u8>([1], 16));
62+
63+
// Should correctly encode strings to UTF-8
64+
65+
ptr = memory.dataUTF8(":)");
66+
assert(load<u8>(ptr) == 0x3A);
67+
assert(load<u8>(ptr + 1) == 0x29);
68+
assert(load<u8>(ptr + 2) == 0);
69+
70+
ptr = memory.dataUTF8("🐌");
71+
assert(load<u8>(ptr) == 0xF0);
72+
assert(load<u8>(ptr + 1) == 0x9F);
73+
assert(load<u8>(ptr + 2) == 0x90);
74+
assert(load<u8>(ptr + 3) == 0x8C);
75+
assert(load<u8>(ptr + 4) == 0x00);

0 commit comments

Comments
 (0)