Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion runner/configuration/environment-config.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import z from 'zod';
import {createMessageBuilder, fromError} from 'zod-validation-error/v3';
import {UserFacingError} from '../utils/errors.js';
import {ratingSchema} from '../ratings/rating-types.js';
import {ratingOverrideSchema, ratingSchema} from '../ratings/rating-types.js';
import {EvalPrompt, EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js';
import {executorSchema} from '../orchestration/executors/executor.js';
import {
Expand All @@ -21,6 +21,11 @@ export const environmentConfigSchema = z.object({
clientSideFramework: z.string(),
/** Ratings to run when evaluating the environment. */
ratings: z.array(ratingSchema),
/**
* Map used to override fields for specific ratings. The key is the unique ID of
* the rating and the value are the override fields.
*/
ratingOverrides: z.record(z.string(), ratingOverrideSchema).optional(),
/** Path to the prompt used by the LLM for generating files. */
generationSystemPrompt: z.string(),
/**
Expand Down
25 changes: 21 additions & 4 deletions runner/configuration/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export class Environment {

/** Prompts that should be executed as a part of the evaluation. */
executablePrompts = lazy(async () => {
return this.resolveExecutablePrompts(this.config.executablePrompts, this.config.ratings);
return this.resolveExecutablePrompts(this.config.executablePrompts, this.config);
});

systemPromptGeneration = lazy(async () => {
Expand Down Expand Up @@ -166,15 +166,32 @@ export class Environment {

/**
* Resolves the prompt configuration into prompt definitions.
* @param rootPath Root path of the project.
* @param prompts Prompts to be resolved.
* @param envRatings Environment-level ratings.
* @param config Configuration for the environment.
*/
private async resolveExecutablePrompts(
prompts: EnvironmentConfig['executablePrompts'],
envRatings: Rating[],
config: EnvironmentConfig,
): Promise<RootPromptDefinition[]> {
const result: Promise<RootPromptDefinition>[] = [];
let envRatings: Rating[];

if (config.ratingOverrides) {
Object.keys(config.ratingOverrides).forEach(id => {
if (!config.ratings.some(rating => rating.id === id)) {
throw new UserFacingError(
`Rating with an ID of "${id}" has not been configured. Cannot apply an override to it.`,
);
}
});

envRatings = config.ratings.map(rating => {
const override = config.ratingOverrides![rating.id];
return override ? {...rating, ...override} : rating;
});
} else {
envRatings = config.ratings;
}

for (const def of prompts) {
if (def instanceof MultiStepPrompt) {
Expand Down
6 changes: 6 additions & 0 deletions runner/ratings/rating-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ export const ratingSchema = z.union([
llmBasedRatingSchema,
]);

export const ratingOverrideSchema = z.object({
category: z.custom<RatingCategory>().optional(),
scoreReduction: z.custom<`${number}%`>().optional(),
groupingLabels: z.array(z.string()).optional().optional(),
});

/** Result of a per-build rating. */
export type PerBuildRatingResult =
| {
Expand Down
Loading