+ );
+}
+
+// When this file is ran with `bun` this outputs the 10th Fibonacci number (55)
+if ('Bun' in globalThis && globalThis.Bun.main.includes('forth')) {
+ tokenize(fib10, async (_, __) => { /* no-op */ })
+ .then(tokens => {
+ compile(tokens, async (_, __) => { /* no-op */ })
+ .then(program => {
+ vm(program, async (_, __, ___, ____) => { /* no-op */ });
+ });
+ });
+}
diff --git a/data/posts.ts b/data/posts.ts
index f958754..2f6fb7a 100644
--- a/data/posts.ts
+++ b/data/posts.ts
@@ -7,6 +7,7 @@ export const popularPosts = [
// Starred posts (not in any specific order)
export const postStars = [
+ "compiling-a-forth",
"counting-words-at-simd-speed",
"optimizing-my-disk-usage-program",
"maybe-the-fastest-disk-usage-program-on-macos",
diff --git a/lib/prism-forth.js b/lib/prism-forth.js
new file mode 100644
index 0000000..47a135a
--- /dev/null
+++ b/lib/prism-forth.js
@@ -0,0 +1,27 @@
+(function (Prism) {
+ Prism.languages.forth = {
+ "comment": {
+ pattern: /\\\s.*$/m,
+ greedy: true,
+ },
+ "string": {
+ pattern: /"(?:[^"\\]|\\.)*"/,
+ greedy: true,
+ },
+ "number": /\b-?\d+(?:\.\d+)?\b/,
+ "keyword":
+ /\b(?:VARIABLE|DO|LOOP|IF|THEN|ELSE|BEGIN|UNTIL|WHILE|REPEAT)\b/i,
+ "builtin":
+ /\b(?:@|!|\+|-|\*|\/|MOD|=|<|>|AND|OR|NOT|DUP|DROP|SWAP|OVER|ROT|\.)\b/,
+ "definition": {
+ pattern: /(:)\s+(\S+)/,
+ lookbehind: true,
+ inside: {
+ "keyword": /^:/,
+ "function": /\S+/,
+ },
+ },
+ "variable": /\b[A-Z]+\b/,
+ "punctuation": /[;]/,
+ };
+})(Prism);
diff --git a/pages/index.tsx b/pages/index.tsx
index 0b15960..91a46c8 100644
--- a/pages/index.tsx
+++ b/pages/index.tsx
@@ -72,7 +72,7 @@ export default function Home({ allPostsData, description, words }) {
My experimental package manager uses simple concurrency patterns to be faster than every package manager aside from Bun (which is 11% faster) when cold-installing from a lockfile.
- I've created a few small programming languages and related tools, including a Lisp-to-JavaScript compiler, which I turned into an optimizing compiler, and for which I wrote a bytecode VM.
+ I've created a few small programming languages and related tools, including a Forth compiler, a Lisp-to-JavaScript compiler, which I turned into an optimizing compiler, and for which I wrote a bytecode VM.
I also built an interpreted language with a C-style syntax, which I profiled and made faster; I later added a WebAssembly compiler and a line profiler. I also ported an expression engine to Rust.
diff --git a/posts/compiling-a-forth.md b/posts/compiling-a-forth.md
new file mode 100644
index 0000000..b329262
--- /dev/null
+++ b/posts/compiling-a-forth.md
@@ -0,0 +1,410 @@
+---
+title: "Compiling a Forth"
+date: "2025-10-06"
+tags: ["forth"]
+description: "A bytecode compiler and VM for a Forth-like language."
+---
+
+I was curious how Forth worked so I built a bytecode compiler and a VM for a Forth-like language, as well as some visualizations to show how it all works.
+
+You don't need to know anything about Forth to follow along, aside from the fact it's a stack-oriented language.
+
+Here's a small program that prints the number three.
+
+```forth
+3 .
+```
+
+The number (`3`) is pushed to the data stack, and then the dot (`.`) pops it from the data stack and prints it.
+
+We'll need more Forth features than this to build interesting programs.
+
+Forth has two built-in stacks. The data stack (sometimes just called "the stack") and the return stack. When a word is called in Forth (words are like functions) the address of the next instruction is pushed to the return stack. When the word finishes executing, the return stack is popped into the instruction pointer.
+
+```forth
+\ (1) word declaration
+: PRINT10
+
+ \ (3) the word body is executed
+ 10 .
+
+ \ (4) ";" compiles an exit – at runtime it pops the return stack
+ \ into the instruction pointer.
+;
+
+\ (2) instruction pointer lands on a word,
+\ the next address is pushed to the return stack,
+\ and the instruction pointer is set to the word address
+PRINT10
+
+\ (5) next address is executed
+```
+
+As well as words, my compiler also supports `DO`/`LOOP`s. These use the return stack too. When `DO` executes, it pops the limit and the iterator from the data stack and stores them in the return stack. This allows the inner loop to freely operate on the data stack. When `LOOP` executes, it pops the limit and iterator from the return stack, adds one to the iterator and compares it to the limit (and exits or loops again).
+
+There are also variables, which can be declared with `VARIABLE X`, loaded with `X @`, and stored with `1 X !`.
+
+Putting these features together, here's how you can build `10` by adding `1` repeatedly.
+
+```forth
+VARIABLE A
+
+: RUN
+ 0 A ! \ initialize A
+ 10 0 DO \ push limit and iterator for DO
+ \ DO places these on the return stack
+ A @ 1 + A ! \ A = A + 1
+ LOOP \ increment i and exits when i == limit
+ A @ . \ prints 10
+;
+
+RUN
+```
+
+This set of features is enough for us to calculate numbers from the Fibonacci series, which is the example program I'll be using throughout the rest of this post.
+
+## Tokenizing
+
+Tokenization translates raw text into meaningful symbols.
+
+To turn source code into tokens, we scan through the code, skipping over whitespace and appending tokens to a list. Syntax that's a single character is turned straight into a token but multi-character syntax needs to be grouped together. For example, entire comments are discarded, and while they are being discarded, we need to track that we're "within" a comment.
+
+Identifiers, like keywords like `DO` or `LOOP`, or custom variables like `MYLONGVAR`, become single tokens.
+
+First, a visualization of what's happening:
+
+
+
+And here's a trimmed version of my tokenizer:
+
+```tsx
+function tokenize(source: string): Token[] {
+ const tokens: Token[] = [];
+
+ let index = 0;
+ while (index < source.length) {
+
+ // Consume and discard everything on a line after '\'
+ if (source[index] === "\\") {
+ const commentStart = index;
+ while (index < source.length && source[index] !== "\n") {
+ index++;
+ }
+ index++;
+ continue;
+ }
+
+ // Skip over whitespace
+ if (isWhitespace(source[index])) {
+ index++;
+ continue;
+ }
+
+ if (source[index] === "@") {
+ tokens.push({ type: "load" });
+ index++;
+ continue;
+ }
+
+ // Handle identifiers
+ if (isLetter(source[index])) {
+ const start = index;
+ let value = "";
+ while (isLetter(source[index])) {
+ value += source[index];
+ index++;
+ }
+
+ // Special-case the keywords
+ if (value === "DO") {
+ tokens.push({ type: "do" });
+ continue;
+ }
+ if (value === "LOOP") {
+ tokens.push({ type: "loop" });
+ continue;
+ }
+
+ tokens.push({ type: "identifier", value });
+ continue;
+ }
+
+ // .. trimmed other tokens, see source
+ }
+
+ return tokens;
+}
+```
+
+With our list of tokens, we're ready to start generating bytecode for the VM.
+
+## Generating Bytecode
+
+Usually, in a compiler, the step after tokenization is _parsing_ where an abstract syntax tree is built. However, the feature set of my Forth is so small, that I decided to generate bytecode directly from the list of tokens.
+
+_After_ bytecode generation, my VM needs two things:
+
+- A list of operations for the VM's instruction pointer to navigate
+- The number of variables that the program refers to
+
+The latter tells the VM how many variables to allocate (a zero-initialized array). Variables in source (e.g., `A`, `B`) become integer indices into this array.
+
+This means that my bytecode generation step needs to keep track of variables that have been seen before so that I can output the correct memory address (i.e. an index into the variable table).
+
+I'll show the full list of bytecode operations and then a few of the steps for handling specific tokens.
+
+```tsx
+type Op = {
+ op: "lit", // Push value or address to DS
+ value: number;
+} | {
+ op: "load", // Pop address from DS, push value at address
+} | {
+ op: "store", // Pop address from DS, pop value from DS, store value at address
+} | {
+ op: "dup2", // Duplicate top two values on DS [a, b] -> [a, b, a, b]
+} | {
+ op: "add", // Pop top two values from DS, push sum to DS
+} | {
+ op: "eq", // Pop top two values from DS, push 1 if equal, 0 if not
+} | {
+ op: "jz", // Pop value from DS, if zero, jump to address
+ address: number;
+} | {
+ op: "jmp", // Jump to address
+ address: number;
+} | {
+ op: "call", // Push IP to RS, jump to address
+ address: number;
+} | {
+ op: "ret", // Pop IP from RS, jump to IP
+} | {
+ op: "rs_push", // Pop from DS, push to RS
+} | {
+ op: "rs_pop", // Pop from RS, push to DS
+} | {
+ op: "drop", // Discard top value from DS
+} | {
+ op: "print", // Pop value from DS, print it
+}
+```
+
+The bytecode generation step scans through the list of tokens and, as it processes them, it appends to a list of bytecode and increments the variable count to set up the correct references.
+
+Identifier tokens are either variable references, or words (function calls).
+
+```tsx
+let index = 0;
+while (index < tokens.length) {
+ const token = tokens[index];
+
+ if (token.type === "identifier") {
+ if (token.value === "VARIABLE") {
+ const nextToken = tokens[index + 1];
+
+ // Store a binding of variable name to memory address
+ variableTable[nextToken.value] = Object.keys(variableTable).length;
+ index += 2;
+ continue;
+ }
+
+ // If the variable has been declared as a word like `: FIB10`
+ // then we have previously stored the bytecode offset which we
+ // will set the instruction pointer to at runtime
+ if (wordTable[token.value] !== undefined) {
+ bytecode.push({ op: "call", address: wordTable[token.value] });
+ index++;
+ continue;
+ }
+
+ // If it's not a variable declaration, or a word, then we
+ // look up the memory address
+ bytecode.push({ op: "lit", value: variableTable[token.value] });
+ index++;
+ continue;
+ }
+
+ // ..
+}
+```
+
+Setting up the `DO`/`LOOP` bytecode generation was the trickiest part of this project. It's a minefield of possible off-by-one errors. It's also not easy to read and understand but I've chosen to put it here anyway because even just glancing over it should help you understand how the loop variables (limit, iterator) and instruction pointer jumps are combined to execute loops in Forth.
+
+```tsx
+ // ..
+
+ if (token.type === "do") {
+ index++;
+
+ // Expect: DS has [limit, start] (start is top)
+ // Move both to RS: start then limit (RS top becomes limit)
+ bytecode.push({ op: "rs_push" }) // start -> RS
+ bytecode.push({ op: "rs_push" }) // limit -> RS
+
+ // Mark first instruction of loop body
+ loopStart.push(bytecode.length);
+ continue;
+ }
+
+ if (token.type === "loop") {
+
+ // Pop limit and i from RS (RS top is limit)
+ bytecode.push({ op: "rs_pop" }) // limit -> DS
+ bytecode.push({ op: "rs_pop" }) // i -> DS
+
+ // Increment i
+ bytecode.push({ op: "lit", value: 1 })
+ bytecode.push({ op: "add" }) // i on DS
+
+ // Duplicate i and limit for compare and possible restore
+ bytecode.push({ op: "dup2" })
+ bytecode.push({ op: "eq" }) // eq flag on DS
+
+ const loopStartAddress = loopStart.pop(); // first instr of loop body
+
+ // Branch: continue when not equal (eq==0), exit when equal
+ const continueAddress = bytecode.length + 4; // skip equal-path (2 drops + jmp)
+ bytecode.push({ op: "jz", address: continueAddress })
+
+ // Equal path (fallthrough): cleanup and exit
+ bytecode.push({ op: "drop" }) // drop i
+ bytecode.push({ op: "drop" }) // drop limit
+ const afterBlockAddress = bytecode.length + 1 /* jmp */ + 3 /* continue block */;
+ bytecode.push({ op: "jmp", address: afterBlockAddress })
+
+ // Continue path:
+ // address == continueAddress
+ bytecode.push({ op: "rs_push" }) // i -> RS (top)
+ bytecode.push({ op: "rs_push" }) // limit -> RS
+ bytecode.push({ op: "jmp", address: loopStartAddress })
+
+ index++;
+ continue;
+ }
+
+ // ..
+```
+
+The rest of the token branches are more straightforward. Tokens like dot, store, load, and print all map directly to bytecode operations.
+
+The colon token branch sets the bytecode offset for the word name which allows identifiers to become word calls as we saw above.
+
+Now we've earned a visualization break.
+
+
+
+## VM
+
+Writing the VM felt a little bit like dessert. Manually stepping through the bytecode as I worked on the generation logic gave me fairly good confidence that I was heading in the right direction, I only came across one or two off-by-one bugs when putting the VM together. Essentially, I had designed it ahead-of-time.
+
+The VM scans through the bytecode operations using the instruction pointer (which starts at `0`). The instruction pointer can jump around as it encounters `jmp` (jump to offset) or `jz` (conditional jump).
+
+It manages the data stack, return stack, and the variable table (i.e. memory addresses).
+
+
+
+Here's a trimmed version of the VM:
+
+```tsx
+function vm(program: Program) => {
+ const dataStack: number[] = [];
+ const returnStack: number[] = [];
+ const variableTable: number[] = new Array(program.variableCount).fill(0);
+
+ let ip = 0;
+ while (ip < program.bytecode.length) {
+ const cur = program.bytecode[ip];
+
+ if (cur.op === "lit") {
+ dataStack.push(cur.value); // Literal or memory address
+ ip++;
+ continue;
+ } else if (cur.op === "store") {
+ const address = dsPop();
+ const value = dsPop();
+ variableTable[address] = value;
+ ip++;
+ continue;
+ } else if (cur.op === "jmp") {
+ ip = cur.address;
+ continue;
+ } else if (cur.op === "jz") {
+ if (dsPop() === 0) {
+ ip = cur.address;
+ continue;
+ }
+ ip++;
+ continue;
+ } else if (cur.op === "call") {
+ ip++
+ returnStack.push(ip);
+ ip = cur.address;
+ continue;
+ } else if (cur.op === "ret") {
+ ip = rsPop();
+ continue;
+ }
+
+ // .. trimmed other ops, see source
+ }
+}
+```
+
+The code for my compiler and VM are [embedded in this website](https://github.com/healeycodes/healeycodes.com/blob/main/components/visuals/forth/components.tsx). I've been iterating on it by just running the TypeScript file:
+
+```bash
+bun ./components/visuals/forth/components.tsx
+55 # 10th Fibonacci number
+```
+
+The visuals are React components with sleeps. In order to display the progress of the different steps (tokenizing, bytecode generation, VM), I first got each working and then added a callback which takes the current data and then sleeps.
+
+So the VM function is actually async and accepts this callback:
+
+```tsx
+// VM
+async function vm(program: Program, callback:
+ (
+ highlight: { ip: number },
+ dataStack: number[],
+ returnStack: number[],
+ variableTable: number[]
+ ) => Promise) {
+
+ // .. inside VM loop
+ await callback({ ip }, dataStack, returnStack, variableTable);
+ // ..
+
+}
+```
+
+And the component calls it and passes `setState` functions:
+
+```tsx
+// Component
+export function VM() {
+
+ // .. inside useEffect
+ await vm(program, async (highlight, newDataStack, newReturnStack, newVariableTable) => {
+ setHighlightIP(highlight.ip);
+ setDataStack([...newDataStack]);
+ setReturnStack([...newReturnStack]);
+ setVariableTable([...newVariableTable]);
+ await new Promise(resolve => setTimeout(resolve, 500));
+ });
+ // ..
+
+}
+```
+
+For the Forth code snippets in this post, I had to write [a Prism plugin](https://github.com/healeycodes/healeycodes.com/blob/main/lib/prism-forth.js) to get syntax highlighting working. Now that I've learned how to do this, I'll be using this method for syntax highlighting for the more esoteric (or, original) programming languages I write about!
+
+## Discrepancies
+
+I described my compiler/VM as _Forth-like_ because it's a little bit different from how Forth works.
+
+My implementation compiles to bytecode ahead-of-time. Forth is traditionally interactive. Words are interpreted and executed as they are entered, and only colon definitions are compiled. Forth uses threaded code where words contain lists of addresses pointing to other words instead of a different bytecode offset.
+
+Real Forth uses a dynamic dictionary that can be altered at runtime with new variables or word definitions. As I mentioned earlier, my word bodies are compiled with jump-over logic in the main execution stream. Also, my variables compile to `lit address` operations but real Forth variables return their address when executed directly.
+
+These are just a few of the differences but I feel like my Forth-like compiler and VM capture enough of the spirit of Forth!
From 2d55941943d06bcc56aff58bd2788d277c689ca2 Mon Sep 17 00:00:00 2001
From: Andrew Healey
Date: Mon, 6 Oct 2025 11:44:57 +0100
Subject: [PATCH 2/6] fix useEffect
---
components/visuals/forth/components.tsx | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/components/visuals/forth/components.tsx b/components/visuals/forth/components.tsx
index 04d10f2..aefcb29 100644
--- a/components/visuals/forth/components.tsx
+++ b/components/visuals/forth/components.tsx
@@ -553,12 +553,16 @@ export function Tokenizer() {
// Auto-start tokenization loop
useEffect(() => {
+ let shouldContinue = true;
const loop = async () => {
- while (true) {
+ while (shouldContinue) {
await runTokenizer();
}
};
loop();
+ return () => {
+ shouldContinue = false;
+ };
}, []);
const charNodes: ReactNode[] = [];
@@ -714,10 +718,16 @@ export function Compiler() {
};
useEffect(() => {
+ let shouldContinue = true;
const loop = async () => {
- while (true) {
- await runCompiler();
+ while (shouldContinue) {
+ while (true) {
+ await runCompiler();
+ }
}
+ return () => {
+ shouldContinue = false;
+ };
};
loop();
}, []);
@@ -881,10 +891,14 @@ export function VM() {
};
useEffect(() => {
+ let shouldContinue = true;
const loop = async () => {
- while (true) {
+ while (shouldContinue) {
await runVM();
}
+ return () => {
+ shouldContinue = false;
+ };
};
loop();
}, []);
From c5c928d4fe5ef491a786444a9800a253a21587f9 Mon Sep 17 00:00:00 2001
From: Andrew Healey
Date: Mon, 6 Oct 2025 13:06:03 +0100
Subject: [PATCH 3/6] better cancellation
---
components/visuals/forth/components.tsx | 70 +++++++++++++++----------
1 file changed, 43 insertions(+), 27 deletions(-)
diff --git a/components/visuals/forth/components.tsx b/components/visuals/forth/components.tsx
index aefcb29..29c2a14 100644
--- a/components/visuals/forth/components.tsx
+++ b/components/visuals/forth/components.tsx
@@ -530,14 +530,15 @@ export function Tokenizer() {
const [tokens, setTokens] = useState([]);
const [isRunning, setIsRunning] = useState(false);
- const runTokenizer = async () => {
- if (isRunning) return;
+ const runTokenizer = async (shouldStop: () => boolean) => {
+ if (isRunning || shouldStop()) return;
setIsRunning(true);
setTokens([]);
setHighlight({ start: 0, end: 0 });
try {
await tokenize(fib10, async (newHighlight, newTokens) => {
+ if (shouldStop()) return;
setHighlight(newHighlight);
setTokens([...newTokens]);
await new Promise(resolve => setTimeout(resolve, 150));
@@ -547,21 +548,26 @@ export function Tokenizer() {
}
// Sleep for 2 seconds before allowing next run
- await new Promise(resolve => setTimeout(resolve, 2000));
+ if (!shouldStop()) {
+ await new Promise(resolve => setTimeout(resolve, 2000));
+ }
setIsRunning(false);
};
// Auto-start tokenization loop
useEffect(() => {
- let shouldContinue = true;
+ // Only run in browser environment
+ if (typeof window === 'undefined') return;
+
+ let cancelled = false;
const loop = async () => {
- while (shouldContinue) {
- await runTokenizer();
+ while (!cancelled) {
+ await runTokenizer(() => cancelled);
}
};
loop();
return () => {
- shouldContinue = false;
+ cancelled = true;
};
}, []);
@@ -688,8 +694,8 @@ export function Compiler() {
const [bytecode, setBytecode] = useState([]);
const [isRunning, setIsRunning] = useState(false);
- const runCompiler = async () => {
- if (isRunning) return;
+ const runCompiler = async (shouldStop: () => boolean) => {
+ if (isRunning || shouldStop()) return;
setIsRunning(true);
setTokens([]);
setBytecode([]);
@@ -703,6 +709,7 @@ export function Compiler() {
// Then compile with highlighting
await compile(allTokens, async (highlight, newBytecode) => {
+ if (shouldStop()) return;
setHighlightRange({ start: highlight.tokenIdxStart, end: highlight.tokenIdxEnd });
setTokens([...allTokens]);
setBytecode([...newBytecode]);
@@ -713,23 +720,26 @@ export function Compiler() {
}
// Sleep for 2 seconds before next run
- await new Promise(resolve => setTimeout(resolve, 2000));
+ if (!shouldStop()) {
+ await new Promise(resolve => setTimeout(resolve, 2000));
+ }
setIsRunning(false);
};
useEffect(() => {
- let shouldContinue = true;
+ // Only run in browser environment
+ if (typeof window === 'undefined') return;
+
+ let cancelled = false;
const loop = async () => {
- while (shouldContinue) {
- while (true) {
- await runCompiler();
- }
+ while (!cancelled) {
+ await runCompiler(() => cancelled);
}
- return () => {
- shouldContinue = false;
- };
};
loop();
+ return () => {
+ cancelled = true;
+ };
}, []);
// Calculate which tokens to show
@@ -852,8 +862,8 @@ export function VM() {
const [variableTable, setVariableTable] = useState([]);
const [isRunning, setIsRunning] = useState(false);
- const runVM = async () => {
- if (isRunning) return;
+ const runVM = async (shouldStop: () => boolean) => {
+ if (isRunning || shouldStop()) return;
setIsRunning(true);
setBytecode([]);
setDataStack([]);
@@ -875,6 +885,7 @@ export function VM() {
// Run the VM with highlighting
await vm(program, async (highlight, newDataStack, newReturnStack, newVariableTable) => {
+ if (shouldStop()) return;
setHighlightIP(highlight.ip);
setDataStack([...newDataStack]);
setReturnStack([...newReturnStack]);
@@ -886,21 +897,26 @@ export function VM() {
}
// Sleep for 2 seconds before next run
- await new Promise(resolve => setTimeout(resolve, 2000));
+ if (!shouldStop()) {
+ await new Promise(resolve => setTimeout(resolve, 2000));
+ }
setIsRunning(false);
};
useEffect(() => {
- let shouldContinue = true;
+ // Only run in browser environment
+ if (typeof window === 'undefined') return;
+
+ let cancelled = false;
const loop = async () => {
- while (shouldContinue) {
- await runVM();
+ while (!cancelled) {
+ await runVM(() => cancelled);
}
- return () => {
- shouldContinue = false;
- };
};
loop();
+ return () => {
+ cancelled = true;
+ };
}, []);
// Calculate which bytecode lines to show (scrolled around IP)
From 9669b072832b630d29f0fa06372aec073cfa8072 Mon Sep 17 00:00:00 2001
From: Andrew Healey
Date: Mon, 6 Oct 2025 13:39:54 +0100
Subject: [PATCH 4/6] add debug logs
---
components/visuals/forth/components.tsx | 351 +++++++++++++-----------
1 file changed, 198 insertions(+), 153 deletions(-)
diff --git a/components/visuals/forth/components.tsx b/components/visuals/forth/components.tsx
index 29c2a14..666f014 100644
--- a/components/visuals/forth/components.tsx
+++ b/components/visuals/forth/components.tsx
@@ -525,52 +525,65 @@ async function vm(program: Program, callback: (highlight: { ip: number }, dataSt
}
}
-export function Tokenizer() {
- const [highlight, setHighlight] = useState<{ start: number, end: number }>({ start: 0, end: 0 });
- const [tokens, setTokens] = useState([]);
- const [isRunning, setIsRunning] = useState(false);
-
- const runTokenizer = async (shouldStop: () => boolean) => {
- if (isRunning || shouldStop()) return;
- setIsRunning(true);
- setTokens([]);
- setHighlight({ start: 0, end: 0 });
-
- try {
- await tokenize(fib10, async (newHighlight, newTokens) => {
- if (shouldStop()) return;
- setHighlight(newHighlight);
- setTokens([...newTokens]);
- await new Promise(resolve => setTimeout(resolve, 150));
- });
- } catch (error) {
- console.error('Tokenization error:', error);
- }
+const TOKENIZER_WAIT_TIME = 150;
+const TOKENIZER_FINISH_TIME = 2000;
- // Sleep for 2 seconds before allowing next run
- if (!shouldStop()) {
- await new Promise(resolve => setTimeout(resolve, 2000));
- }
- setIsRunning(false);
- };
+export function Tokenizer() {
+ const [terminal, setTerminal] = useState(null);
- // Auto-start tokenization loop
useEffect(() => {
- // Only run in browser environment
- if (typeof window === 'undefined') return;
-
+ console.log('[Tokenizer] Component mounted, starting loop');
let cancelled = false;
- const loop = async () => {
+ (async () => {
while (!cancelled) {
- await runTokenizer(() => cancelled);
+ console.log('[Tokenizer] Starting tokenization run');
+ await runTokenizer(() => cancelled, (node) => {
+ console.log('[Tokenizer] Setting terminal node');
+ setTerminal(node);
+ });
+ console.log('[Tokenizer] Tokenization run complete, waiting before next run');
+ await new Promise((resolve) => setTimeout(resolve, TOKENIZER_FINISH_TIME));
}
- };
- loop();
+ })();
+
return () => {
+ console.log('[Tokenizer] Component unmounting, cancelling');
cancelled = true;
- };
+ }
}, []);
+ return (
+
{terminal}
+ );
+}
+
+async function runTokenizer(shouldStop: () => boolean, setTerminal: (node: React.ReactNode) => void) {
+ console.log('[runTokenizer] Starting tokenization process');
+ const tokens: Token[] = [];
+ let highlight = { start: 0, end: 0 };
+
+ try {
+ await tokenize(fib10, async (newHighlight, newTokens) => {
+ if (shouldStop()) {
+ console.log('[runTokenizer] Stopping due to cancellation');
+ return;
+ }
+
+ highlight = newHighlight;
+ tokens.splice(0, tokens.length, ...newTokens);
+ console.log(`[runTokenizer] Tokenized ${tokens.length} tokens, highlighting ${highlight.start}-${highlight.end}`);
+
+ const terminalNode = renderTokenizer(highlight, tokens);
+ setTerminal(terminalNode);
+
+ await new Promise(resolve => setTimeout(resolve, TOKENIZER_WAIT_TIME));
+ });
+ } catch (error) {
+ console.error('[runTokenizer] Tokenization error:', error);
+ }
+}
+
+function renderTokenizer(highlight: { start: number, end: number }, tokens: Token[]): React.ReactNode {
const charNodes: ReactNode[] = [];
let inComment = false;
@@ -586,13 +599,13 @@ export function Tokenizer() {
const ch = fib10[i];
if (ch === '\n') {
- inComment = false; // Reset comment flag on new line
+ inComment = false;
charNodes.push( );
continue;
}
if (ch === '\\') {
- inComment = true; // Start comment from backslash
+ inComment = true;
}
const highlightClass = getHighlightClass(i);
@@ -619,7 +632,6 @@ export function Tokenizer() {
} else if (highlightClass === ' hl-end') {
inlineStyle.borderRadius = '0 2px 2px 0';
}
- // hl-mid has no border radius
}
charNodes.push(
@@ -627,7 +639,6 @@ export function Tokenizer() {
);
}
- // Get the latest 5 tokens for display, pad with empty slots if needed
const latestTokens = tokens.slice(-5);
const renderTokenLine = (i: number) => {
@@ -647,7 +658,6 @@ export function Tokenizer() {
prefix = 'symbol';
}
- // Fixed-width layout for alignment
const indexPart = `${tokenIndex.toString().padStart(2, '\u00A0')}:`;
const valuePart = `\u00A0${tokenStr}`;
const prefixPart = `(${prefix})`;
@@ -688,65 +698,84 @@ export function Tokenizer() {
);
}
-export function Compiler() {
- const [highlightRange, setHighlightRange] = useState<{ start: number, end: number }>({ start: -1, end: -1 });
- const [tokens, setTokens] = useState([]);
- const [bytecode, setBytecode] = useState([]);
- const [isRunning, setIsRunning] = useState(false);
-
- const runCompiler = async (shouldStop: () => boolean) => {
- if (isRunning || shouldStop()) return;
- setIsRunning(true);
- setTokens([]);
- setBytecode([]);
- setHighlightRange({ start: -1, end: -1 });
-
- try {
- // First tokenize the source
- const allTokens = await tokenize(fib10, async () => {
- // No-op callback for tokenization, just need the tokens
- });
-
- // Then compile with highlighting
- await compile(allTokens, async (highlight, newBytecode) => {
- if (shouldStop()) return;
- setHighlightRange({ start: highlight.tokenIdxStart, end: highlight.tokenIdxEnd });
- setTokens([...allTokens]);
- setBytecode([...newBytecode]);
- await new Promise(resolve => setTimeout(resolve, 500));
- });
- } catch (error) {
- console.error('Compilation error:', error);
- }
+const COMPILER_WAIT_TIME = 500;
+const COMPILER_FINISH_TIME = 2000;
- // Sleep for 2 seconds before next run
- if (!shouldStop()) {
- await new Promise(resolve => setTimeout(resolve, 2000));
- }
- setIsRunning(false);
- };
+export function Compiler() {
+ const [terminal, setTerminal] = useState(null);
useEffect(() => {
- // Only run in browser environment
- if (typeof window === 'undefined') return;
-
+ console.log('[Compiler] Component mounted, starting loop');
let cancelled = false;
- const loop = async () => {
+ (async () => {
while (!cancelled) {
- await runCompiler(() => cancelled);
+ console.log('[Compiler] Starting compilation run');
+ await runCompiler(() => cancelled, (node) => {
+ console.log('[Compiler] Setting terminal node');
+ setTerminal(node);
+ });
+ console.log('[Compiler] Compilation run complete, waiting before next run');
+ await new Promise((resolve) => setTimeout(resolve, COMPILER_FINISH_TIME));
}
- };
- loop();
+ })();
+
return () => {
+ console.log('[Compiler] Component unmounting, cancelling');
cancelled = true;
- };
+ }
}, []);
- // Calculate which tokens to show
+ return (
+
{terminal}
+ );
+}
+
+async function runCompiler(shouldStop: () => boolean, setTerminal: (node: React.ReactNode) => void) {
+ console.log('[runCompiler] Starting compilation process');
+ let tokens: Token[] = [];
+ let bytecode: Bytecode[] = [];
+ let highlightRange = { start: -1, end: -1 };
+
+ try {
+ // First tokenize the source
+ console.log('[runCompiler] Tokenizing source');
+ const allTokens = await tokenize(fib10, async () => {
+ // No-op callback for tokenization, just need the tokens
+ });
+
+ if (shouldStop()) {
+ console.log('[runCompiler] Stopping due to cancellation after tokenization');
+ return;
+ }
+
+ tokens = allTokens;
+ console.log(`[runCompiler] Tokenized ${tokens.length} tokens`);
+
+ // Then compile with highlighting
+ await compile(allTokens, async (highlight, newBytecode) => {
+ if (shouldStop()) {
+ console.log('[runCompiler] Stopping due to cancellation during compilation');
+ return;
+ }
+
+ highlightRange = { start: highlight.tokenIdxStart, end: highlight.tokenIdxEnd };
+ bytecode = [...newBytecode];
+ console.log(`[runCompiler] Compiled ${bytecode.length} bytecode ops, highlighting tokens ${highlightRange.start}-${highlightRange.end}`);
+
+ const terminalNode = renderCompiler(highlightRange, tokens, bytecode);
+ setTerminal(terminalNode);
+
+ await new Promise(resolve => setTimeout(resolve, COMPILER_WAIT_TIME));
+ });
+ } catch (error) {
+ console.error('[runCompiler] Compilation error:', error);
+ }
+}
+
+function renderCompiler(highlightRange: { start: number, end: number }, tokens: Token[], bytecode: Bytecode[]): React.ReactNode {
const visibleTokenCount = 10;
let startIndex = 0;
- // If we have a highlighted token and it would be below the visible area, scroll
if (highlightRange.start >= 0 && tokens.length > 0) {
const maxVisibleIndex = startIndex + visibleTokenCount - 1;
if (highlightRange.start > maxVisibleIndex) {
@@ -775,7 +804,6 @@ export function Compiler() {
prefix = 'symbol';
}
- // Fixed-width layout for alignment
const indexPart = `${tokenIndex.toString().padStart(2, '\u00A0')}:`;
const valuePart = `\u00A0${tokenStr}`;
const prefixPart = `(${prefix})`;
@@ -800,7 +828,6 @@ export function Compiler() {
}
};
- // Latest bytecode ops
const latestBytecode = bytecode.slice(-10);
const renderBytecodeLine = (i: number) => {
if (i < latestBytecode.length) {
@@ -854,76 +881,97 @@ export function Compiler() {
);
}
-export function VM() {
- const [highlightIP, setHighlightIP] = useState(-1);
- const [bytecode, setBytecode] = useState([]);
- const [dataStack, setDataStack] = useState([]);
- const [returnStack, setReturnStack] = useState([]);
- const [variableTable, setVariableTable] = useState([]);
- const [isRunning, setIsRunning] = useState(false);
-
- const runVM = async (shouldStop: () => boolean) => {
- if (isRunning || shouldStop()) return;
- setIsRunning(true);
- setBytecode([]);
- setDataStack([]);
- setReturnStack([]);
- setVariableTable([]);
- setHighlightIP(-1);
-
- try {
- // First tokenize and compile to get the program
- const allTokens = await tokenize(fib10, async () => {
- // No-op
- });
-
- const program = await compile(allTokens, async () => {
- // No-op
- });
-
- setBytecode(program.bytecode);
-
- // Run the VM with highlighting
- await vm(program, async (highlight, newDataStack, newReturnStack, newVariableTable) => {
- if (shouldStop()) return;
- setHighlightIP(highlight.ip);
- setDataStack([...newDataStack]);
- setReturnStack([...newReturnStack]);
- setVariableTable([...newVariableTable]);
- await new Promise(resolve => setTimeout(resolve, 750));
- });
- } catch (error) {
- console.error('VM execution error:', error);
- }
+const VM_WAIT_TIME = 750;
+const VM_FINISH_TIME = 2000;
- // Sleep for 2 seconds before next run
- if (!shouldStop()) {
- await new Promise(resolve => setTimeout(resolve, 2000));
- }
- setIsRunning(false);
- };
+export function VM() {
+ const [terminal, setTerminal] = useState(null);
useEffect(() => {
- // Only run in browser environment
- if (typeof window === 'undefined') return;
-
+ console.log('[VM] Component mounted, starting loop');
let cancelled = false;
- const loop = async () => {
+ (async () => {
while (!cancelled) {
- await runVM(() => cancelled);
+ console.log('[VM] Starting VM run');
+ await runVM(() => cancelled, (node) => {
+ console.log('[VM] Setting terminal node');
+ setTerminal(node);
+ });
+ console.log('[VM] VM run complete, waiting before next run');
+ await new Promise((resolve) => setTimeout(resolve, VM_FINISH_TIME));
}
- };
- loop();
+ })();
+
return () => {
+ console.log('[VM] Component unmounting, cancelling');
cancelled = true;
- };
+ }
}, []);
- // Calculate which bytecode lines to show (scrolled around IP)
+ return (
+
{terminal}
+ );
+}
+
+async function runVM(shouldStop: () => boolean, setTerminal: (node: React.ReactNode) => void) {
+ console.log('[runVM] Starting VM execution process');
+ let bytecode: Bytecode[] = [];
+ let highlightIP = -1;
+ let dataStack: number[] = [];
+ let returnStack: number[] = [];
+ let variableTable: number[] = [];
+
+ try {
+ // First tokenize and compile to get the program
+ console.log('[runVM] Tokenizing and compiling source');
+ const allTokens = await tokenize(fib10, async () => {
+ // No-op
+ });
+
+ if (shouldStop()) {
+ console.log('[runVM] Stopping due to cancellation after tokenization');
+ return;
+ }
+
+ const program = await compile(allTokens, async () => {
+ // No-op
+ });
+
+ if (shouldStop()) {
+ console.log('[runVM] Stopping due to cancellation after compilation');
+ return;
+ }
+
+ bytecode = program.bytecode;
+ console.log(`[runVM] Compiled program with ${bytecode.length} bytecode ops`);
+
+ // Run the VM with highlighting
+ await vm(program, async (highlight, newDataStack, newReturnStack, newVariableTable) => {
+ if (shouldStop()) {
+ console.log('[runVM] Stopping due to cancellation during VM execution');
+ return;
+ }
+
+ highlightIP = highlight.ip;
+ dataStack = [...newDataStack];
+ returnStack = [...newReturnStack];
+ variableTable = [...newVariableTable];
+ console.log(`[runVM] VM step at IP ${highlightIP}, DS: [${dataStack.join(',')}], RS: [${returnStack.join(',')}]`);
+
+ const terminalNode = renderVM(highlightIP, bytecode, dataStack, returnStack, variableTable);
+ setTerminal(terminalNode);
+
+ await new Promise(resolve => setTimeout(resolve, VM_WAIT_TIME));
+ });
+ } catch (error) {
+ console.error('[runVM] VM execution error:', error);
+ }
+}
+
+function renderVM(highlightIP: number, bytecode: Bytecode[], dataStack: number[], returnStack: number[], variableTable: number[]): React.ReactNode {
const visibleBytecodeCount = 15;
let startIndex = 0;
- // If we have a highlighted IP and it would be below the visible area, scroll
if (highlightIP >= 0 && bytecode.length > 0) {
const maxVisibleIndex = startIndex + visibleBytecodeCount - 1;
if (highlightIP > maxVisibleIndex) {
@@ -931,7 +979,6 @@ export function VM() {
} else if (highlightIP < startIndex) {
startIndex = Math.max(0, highlightIP - Math.floor(visibleBytecodeCount / 2));
}
- // Ensure we don't scroll past the end!
if (startIndex + visibleBytecodeCount > bytecode.length) {
startIndex = Math.max(0, bytecode.length - visibleBytecodeCount);
}
@@ -976,9 +1023,8 @@ export function VM() {
}
};
- // Format stack display (show up to 5 items, most recent at top)
- const formatStack = (stack: number[], name: string) => {
- const displayStack = stack.slice(-5).reverse(); // Show last 5, most recent first
+ const formatStack = (stack: number[]) => {
+ const displayStack = stack.slice(-5).reverse();
const lines: ReactNode[] = [];
for (let i = 0; i < 5; i++) {
@@ -1003,7 +1049,6 @@ export function VM() {
return lines;
};
- // Format variable table display
const formatVariables = () => {
const lines: ReactNode[] = [];
@@ -1032,11 +1077,11 @@ export function VM() {