Revert "fix: do not accidentally throw off serve results when test repairs fail"

devversion · devversion · commit 0b7284453643 · 2025-11-12T13:03:39.000+01:00
This reverts commit 9d1d2f5. We will just reduce test repair attempts to 0 by default for now. The long-term solution is: - We don't do repair attempts at all. - or, we treat them as entirely separate assessments. That way they don't affect the score but can be still captured for the UI/report.
diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts
@@ -42,7 +42,6 @@ interface Options {
   skipLighthouse?: boolean;
   maxTestRepairAttempts?: number;
   maxBuildRepairAttempts?: number;
-  preserveBreakingTestRepairAttempts?: boolean;
 }
 
 function builder(argv: Argv): Argv<Options> {
@@ -169,13 +168,6 @@ function builder(argv: Argv): Argv<Options> {
         description:
           'Number of repair attempts for discovered test failures (including a11y violations and ones from testCommand)',
       })
-      .option('preserve-breaking-test-repair-attempts', {
-        type: 'boolean',
-        // See rationale for the default via:
-        // https://github.com/angular/web-codegen-scorer/pull/69
-        default: false,
-        description: `Whether test repair attempts which break a build should be captured.`,
-      })
       .strict()
       .version(false)
       .help()
@@ -229,7 +221,6 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
       skipLighthouse: cliArgs.skipLighthouse,
       maxBuildRepairAttempts: cliArgs.maxBuildRepairAttempts,
       maxTestRepairAttempts: cliArgs.maxTestRepairAttempts,
-      preserveBreakingTestRepairAttempts: cliArgs.preserveBreakingTestRepairAttempts,
       abortSignal: abortCtrl.signal,
     });
 
diff --git a/runner/orchestration/build-serve-test-loop.ts b/runner/orchestration/build-serve-test-loop.ts
@@ -218,14 +218,8 @@ export async function attemptBuildAndTest(
 
     let hasBuildFailure = attempt.buildResult.status !== BuildResultStatus.SUCCESS;
     attempt.buildFailedDuringTestRepair = hasBuildFailure;
-
-    // By default, we don't preserve breaking test repair attempts as they significantly
-    // impact evaluation results by e.g. lacking serve results.
-    // TODO: In the future we should consider exploring this further, or at least capture tokens.
-    if (!hasBuildFailure || config.preserveBreakingTestRepairAttempts) {
-      attemptDetails.push(attempt);
-      lastAttempt = attempt;
-    }
+    attemptDetails.push(attempt);
+    lastAttempt = attempt;
     // If we somehow introduced build errors via the repair loop, we abort
     // further repairs and capture the failed build. This is useful insight
     // as LLMs seem to regress when asked to repair violations.
diff --git a/runner/shared-interfaces.ts b/runner/shared-interfaces.ts
@@ -30,7 +30,6 @@ export interface AssessmentConfig {
   skipLighthouse?: boolean;
   maxTestRepairAttempts?: number;
   maxBuildRepairAttempts?: number;
-  preserveBreakingTestRepairAttempts?: boolean;
   abortSignal?: AbortSignal;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,6 @@ export interface AssessmentConfig {`
`30`	`30`	`skipLighthouse?: boolean;`
`31`	`31`	`maxTestRepairAttempts?: number;`
`32`	`32`	`maxBuildRepairAttempts?: number;`
`33`		`- preserveBreakingTestRepairAttempts?: boolean;`
`34`	`33`	`abortSignal?: AbortSignal;`
`35`	`34`	`}`
`36`	`35`