Skip to content

Commit 29e33c3

Browse files
committed
feat: support configuring timeout for evals
Right now we hard-coded to 30min. This is way too large for non-remote environment evaluations. We previously only increased for such situations.
1 parent 89f56ed commit 29e33c3

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

runner/configuration/environment-config.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ export const environmentConfigSchema = z.object({
5959
codeRatingPrompt: z.string().optional(),
6060
/** When enabled, the system prompts for this environment won't be included in the report. */
6161
classifyPrompts: z.boolean().optional(),
62+
/**
63+
* Timeout in minutes for a single prompt evaluation.
64+
*
65+
* E.g. if a single app takes longer than 10min, it will be aborted.
66+
*/
67+
promptTimeoutMin: z.number().optional(),
6268
/** Executor to be used for this environment. */
6369
executor: executorSchema
6470
.optional()

runner/configuration/environment.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export class Environment {
3636
readonly isBuiltIn: boolean;
3737
/** Configured executor. */
3838
readonly executor: Executor;
39+
/** Timeout for a single eval prompt in minutes. */
40+
readonly promptTimeoutMin: number | undefined;
3941

4042
constructor(
4143
rootPath: string,
@@ -62,6 +64,7 @@ export class Environment {
6264
this.classifyPrompts = config.classifyPrompts ?? false;
6365
this.isBuiltIn = rootPath.includes('node_modules');
6466
this.executor = config.executor;
67+
this.promptTimeoutMin = config.promptTimeoutMin;
6568
}
6669

6770
/** Prompts that should be executed as a part of the evaluation. */

runner/orchestration/generate.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,8 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
167167
workerConcurrencyQueue,
168168
progress,
169169
),
170-
// 30min max per app evaluation. We just want to make sure it never gets stuck.
171-
// Note that this timeout is expected to never be hit as individual action timeouts
172-
// should fire first. E.g. local executor build or test timeouts.
173-
30,
170+
// A timeout is used to prevent from stuck evaluations.
171+
env.promptTimeoutMin ?? 10,
174172
);
175173
return results;
176174
} catch (e: unknown) {

0 commit comments

Comments
 (0)