Skip to content

Commit 9d1d2f5

Browse files
committed
fix: do not accidentally throw off serve results when test repairs fail
We accidentally regressed when we introduced the test command feature. See the rationale we had back then (shortly before we landed the test command feature): angular#69
1 parent 87201d7 commit 9d1d2f5

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

runner/eval-cli.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ interface Options {
4242
skipLighthouse?: boolean;
4343
maxTestRepairAttempts?: number;
4444
maxBuildRepairAttempts?: number;
45+
preserveBreakingTestRepairAttempts?: boolean;
4546
}
4647

4748
function builder(argv: Argv): Argv<Options> {
@@ -168,6 +169,13 @@ function builder(argv: Argv): Argv<Options> {
168169
description:
169170
'Number of repair attempts for discovered test failures (including a11y violations and ones from testCommand)',
170171
})
172+
.option('preserve-breaking-test-repair-attempts', {
173+
type: 'boolean',
174+
// See rationale for the default via:
175+
// https://github.com/angular/web-codegen-scorer/pull/69
176+
default: false,
177+
description: `Whether test repair attempts which break a build should be captured.`,
178+
})
171179
.strict()
172180
.version(false)
173181
.help()
@@ -221,6 +229,7 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
221229
skipLighthouse: cliArgs.skipLighthouse,
222230
maxBuildRepairAttempts: cliArgs.maxBuildRepairAttempts,
223231
maxTestRepairAttempts: cliArgs.maxTestRepairAttempts,
232+
preserveBreakingTestRepairAttempts: cliArgs.preserveBreakingTestRepairAttempts,
224233
abortSignal: abortCtrl.signal,
225234
});
226235

runner/orchestration/build-serve-test-loop.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,14 @@ export async function attemptBuildAndTest(
218218

219219
let hasBuildFailure = attempt.buildResult.status !== BuildResultStatus.SUCCESS;
220220
attempt.buildFailedDuringTestRepair = hasBuildFailure;
221-
attemptDetails.push(attempt);
222-
lastAttempt = attempt;
221+
222+
// By default, we don't preserve breaking test repair attempts as they significantly
223+
// impact evaluation results by e.g. lacking serve results.
224+
// TODO: In the future we should consider exploring this further, or at least capture tokens.
225+
if (!hasBuildFailure || config.preserveBreakingTestRepairAttempts) {
226+
attemptDetails.push(attempt);
227+
lastAttempt = attempt;
228+
}
223229
// If we somehow introduced build errors via the repair loop, we abort
224230
// further repairs and capture the failed build. This is useful insight
225231
// as LLMs seem to regress when asked to repair violations.

runner/shared-interfaces.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ export interface AssessmentConfig {
3030
skipLighthouse?: boolean;
3131
maxTestRepairAttempts?: number;
3232
maxBuildRepairAttempts?: number;
33+
preserveBreakingTestRepairAttempts?: boolean;
3334
abortSignal?: AbortSignal;
3435
}
3536

0 commit comments

Comments
 (0)