fix(benchmark tests): Ensure "benchmark end" event is emitted even if an error is thrown by the runner (#26018)

markfields · web-flow · commit bf869f58cbbb · 2025-12-12T12:24:47.000-08:00
One of my benchmark tests fails intermittently with the error `runtime
benchmarks Non-Compat completed with status 'failed' without reporting
any data.`

This indicates that the "benchmark end" event was never emitted. I added
a try-catch to see if that fixes it, along with a suggested refactor to share code.
diff --git a/tools/benchmark/src/ResultUtilities.ts b/tools/benchmark/src/ResultUtilities.ts
@@ -0,0 +1,38 @@
+/*!
+ * Copyright (c) Microsoft Corporation and contributors. All rights reserved.
+ * Licensed under the MIT License.
+ */
+
+import type { BenchmarkResult, BenchmarkError, BenchmarkData, CustomData } from "./ResultTypes";
+import { timer } from "./timer";
+
+/**
+ * Wraps a function that returns CustomData, measuring its execution time
+ * and capturing either its result or exception.
+ * Returns a callback suitable for passing to emitResultsMocha.
+ * This is a generic utility that is neither mocha-specific nor time benchmark-specific.
+ */
+export function captureResults(
+	f: () => CustomData | Promise<CustomData>,
+): () => Promise<{ result: BenchmarkResult; exception?: Error }> {
+	return async () => {
+		const startTime = timer.now();
+
+		let customData: CustomData;
+		try {
+			customData = await f();
+		} catch (error) {
+			const benchmarkError: BenchmarkError = { error: (error as Error).message };
+			return { result: benchmarkError, exception: error as Error };
+		}
+
+		const elapsedSeconds = timer.toSeconds(startTime, timer.now());
+
+		const result: BenchmarkData = {
+			elapsedSeconds,
+			customData,
+		};
+
+		return { result };
+	};
+}
diff --git a/tools/benchmark/src/mocha/customOutputRunner.ts b/tools/benchmark/src/mocha/customOutputRunner.ts
@@ -6,9 +6,11 @@
 import type { Test } from "mocha";
 
 import type { BenchmarkDescription, MochaExclusiveOptions, Titled } from "../Configuration";
-import type { BenchmarkData, BenchmarkError, CustomData } from "../ResultTypes";
+import type { CustomData } from "../ResultTypes";
+import { captureResults } from "../ResultUtilities";
 import { prettyNumber } from "../RunnerUtilities";
-import { timer } from "../timer";
+
+import { emitResultsMocha } from "./runnerUtilities";
 
 /**
  * Options to configure a benchmark that reports custom measurements.
@@ -48,26 +50,14 @@ export function benchmarkCustom(options: CustomBenchmarkOptions): Test {
 			},
 		};
 
-		const startTime = timer.now();
-
-		try {
-			await options.run(reporter);
-		} catch (error) {
-			const benchmarkError: BenchmarkError = { error: (error as Error).message };
-
-			test.emit("benchmark end", benchmarkError);
-
-			throw error;
-		}
-
-		const elapsedSeconds = timer.toSeconds(startTime, timer.now());
-
-		const results: BenchmarkData = {
-			elapsedSeconds,
-			customData,
-		};
-
-		test.emit("benchmark end", results);
+		// Emits the "benchmark end" event with the result
+		await emitResultsMocha(
+			captureResults(async () => {
+				await options.run(reporter);
+				return customData;
+			}),
+			test,
+		);
 	});
 	return test;
 }
diff --git a/tools/benchmark/src/mocha/runner.ts b/tools/benchmark/src/mocha/runner.ts
@@ -16,6 +16,8 @@ import type { BenchmarkResult } from "../ResultTypes";
 import { fail } from "../assert.js";
 import { Phase, runBenchmark } from "../runBenchmark";
 
+import { emitResultsMocha } from "./runnerUtilities";
+
 /**
  * This is wrapper for Mocha's it function that runs a performance benchmark.
  *
@@ -64,72 +66,92 @@ export function supportParentProcess<
 	const itFunction = args.only === true ? it.only : it;
 	const test = itFunction(args.title, async () => {
 		if (isParentProcess) {
-			// Instead of running the benchmark in this process, create a new process.
-			// See {@link isParentProcess} for why.
-			// Launch new process, with:
-			// - mocha filter to run only this test.
-			// - --parentProcess flag removed.
-			// - --childProcess flag added (so data will be returned via stdout as json)
-
-			// Pull the command (Node.js most likely) out of the first argument since spawnSync takes it separately.
-			const command = process.argv0 ?? fail("there must be a command");
-
-			// We expect all node-specific flags to be present in execArgv so they can be passed to the child process.
-			// At some point mocha was processing the expose-gc flag itself and not passing it here, unless explicitly
-			// put in mocha's --node-option flag.
-			const childArgs = [...process.execArgv, ...process.argv.slice(1)];
-			const processFlagIndex = childArgs.indexOf("--parentProcess");
-			childArgs[processFlagIndex] = "--childProcess";
-
-			// Remove arguments for any existing test filters.
-			for (const flag of ["--grep", "--fgrep"]) {
-				const flagIndex = childArgs.indexOf(flag);
-				if (flagIndex > 0) {
-					// Remove the flag, and the argument after it (all these flags take one argument)
-					childArgs.splice(flagIndex, 2);
-				}
-			}
+			await emitResultsMocha(async () => {
+				try {
+					// Instead of running the benchmark in this process, create a new process.
+					// See {@link isParentProcess} for why.
+					// Launch new process, with:
+					// - mocha filter to run only this test.
+					// - --parentProcess flag removed.
+					// - --childProcess flag added (so data will be returned via stdout as json)
 
-			// Add test filter so child process only run the current test.
-			childArgs.push("--fgrep", test.fullTitle());
-
-			// Remove arguments for debugging if they're present; in order to debug child processes we need
-			// to specify a new debugger port for each, or they'll fail to start. Doable, but leaving it out
-			// of scope for now.
-			let inspectArgIndex: number = -1;
-			while (
-				(inspectArgIndex = childArgs.findIndex((x) => x.match(/^(--inspect|--debug).*/))) >=
-				0
-			) {
-				childArgs.splice(inspectArgIndex, 1);
-			}
+					// Pull the command (Node.js most likely) out of the first argument since spawnSync takes it separately.
+					const command = process.argv0 ?? fail("there must be a command");
 
-			// Do this import only if isParentProcess to enable running in the web as long as isParentProcess is false.
-			const childProcess = await import("node:child_process");
-			const result = childProcess.spawnSync(command, childArgs, { encoding: "utf8" });
+					// We expect all node-specific flags to be present in execArgv so they can be passed to the child process.
+					// At some point mocha was processing the expose-gc flag itself and not passing it here, unless explicitly
+					// put in mocha's --node-option flag.
+					const childArgs = [...process.execArgv, ...process.argv.slice(1)];
+					const processFlagIndex = childArgs.indexOf("--parentProcess");
+					childArgs[processFlagIndex] = "--childProcess";
 
-			if (result.error) {
-				fail(`Child process reported an error: ${result.error.message}`);
-			}
+					// Remove arguments for any existing test filters.
+					for (const flag of ["--grep", "--fgrep"]) {
+						const flagIndex = childArgs.indexOf(flag);
+						if (flagIndex > 0) {
+							// Remove the flag, and the argument after it (all these flags take one argument)
+							childArgs.splice(flagIndex, 2);
+						}
+					}
 
-			if (result.stderr !== "") {
-				fail(`Child process logged errors: ${result.stderr}`);
-			}
+					// Add test filter so child process only run the current test.
+					childArgs.push("--fgrep", test.fullTitle());
 
-			// Find the json blob in the child's output.
-			const output =
-				result.stdout.split("\n").find((s) => s.startsWith("{")) ??
-				fail(`child process must output a json blob. Got:\n${result.stdout}`);
+					// Remove arguments for debugging if they're present; in order to debug child processes we need
+					// to specify a new debugger port for each, or they'll fail to start. Doable, but leaving it out
+					// of scope for now.
+					let inspectArgIndex: number = -1;
+					while (
+						(inspectArgIndex = childArgs.findIndex((x) =>
+							x.match(/^(--inspect|--debug).*/),
+						)) >= 0
+					) {
+						childArgs.splice(inspectArgIndex, 1);
+					}
 
-			test.emit("benchmark end", JSON.parse(output));
+					// Do this import only if isParentProcess to enable running in the web as long as isParentProcess is false.
+					const childProcess = await import("node:child_process");
+					const result = childProcess.spawnSync(command, childArgs, { encoding: "utf8" });
+
+					if (result.error) {
+						fail(`Child process reported an error: ${result.error.message}`);
+					}
+
+					if (result.stderr !== "") {
+						fail(`Child process logged errors: ${result.stderr}`);
+					}
+
+					// Find the json blob in the child's output.
+					const output =
+						result.stdout.split("\n").find((s) => s.startsWith("{")) ??
+						fail(`child process must output a json blob. Got:\n${result.stdout}`);
+
+					return { result: JSON.parse(output) as BenchmarkResult };
+				} catch (error) {
+					return {
+						result: { error: (error as Error).message },
+						exception: error as Error,
+					};
+				}
+			}, test);
 			return;
 		}
 
-		const stats = await args.run();
-		// Create and run a benchmark if we are in perfMode, else run the passed in function normally
-		if (isInPerformanceTestingMode) {
-			test.emit("benchmark end", stats);
-		}
+		// Only emit results in perfMode
+		await (isInPerformanceTestingMode
+			? emitResultsMocha(
+					async () =>
+						args.run().then(
+							(result) => ({ result }),
+							(error) => ({
+								result: { error: (error as Error).message },
+								exception: error as Error,
+							}),
+						),
+					test,
+			  )
+			: // In non-perf mode, just run the function without emitting
+			  args.run());
 	});
 	return test;
 }
diff --git a/tools/benchmark/src/mocha/runnerUtilities.ts b/tools/benchmark/src/mocha/runnerUtilities.ts
@@ -0,0 +1,23 @@
+/*!
+ * Copyright (c) Microsoft Corporation and contributors. All rights reserved.
+ * Licensed under the MIT License.
+ */
+
+import type { Test } from "mocha";
+
+import type { BenchmarkResult } from "../ResultTypes";
+
+/**
+ * Executes a function, emits the results to a mocha test, and throws any exception.
+ * This handles mocha-specific result emission.
+ */
+export async function emitResultsMocha(
+	f: () => Promise<{ result: BenchmarkResult; exception?: Error }>,
+	test: Test,
+): Promise<void> {
+	const { exception, result } = await f();
+	test.emit("benchmark end", result);
+	if (exception !== undefined) {
+		throw exception;
+	}
+}