@@ -15,6 +15,12 @@ import {lazy} from '../utils/lazy-creation.js';
1515import { EnvironmentConfig } from './environment-config.js' ;
1616import { EvalPromptWithMetadata , MultiStepPrompt } from './prompts.js' ;
1717import { renderPromptTemplate } from './prompt-templating.js' ;
18+ import { getSha256Hash } from '../utils/hashing.js' ;
19+
20+ interface CategoryConfig {
21+ name : string ;
22+ maxPoints : number ;
23+ }
1824
1925/** Represents a single prompt evaluation environment. */
2026export class Environment {
@@ -40,10 +46,18 @@ export class Environment {
4046 readonly promptTimeoutMinutes : number | undefined ;
4147 /** Configuration for the individual rating categories. */
4248 readonly ratingCategories : {
43- [ RatingCategory . HIGH_IMPACT ] : { name : string ; maxPoints : number } ;
44- [ RatingCategory . MEDIUM_IMPACT ] : { name : string ; maxPoints : number } ;
45- [ RatingCategory . LOW_IMPACT ] : { name : string ; maxPoints : number } ;
49+ [ RatingCategory . HIGH_IMPACT ] : CategoryConfig ;
50+ [ RatingCategory . MEDIUM_IMPACT ] : CategoryConfig ;
51+ [ RatingCategory . LOW_IMPACT ] : CategoryConfig ;
4652 } ;
53+ /**
54+ * Hash of the environment-level ratings. Can be used to
55+ * validate that the ratings haven't changed between runs.
56+ */
57+ readonly ratingHash : string ;
58+
59+ /** Ratings configured at the environment level. */
60+ private readonly ratings : Rating [ ] ;
4761
4862 constructor (
4963 rootPath : string ,
@@ -71,12 +85,15 @@ export class Environment {
7185 this . isBuiltIn = rootPath . includes ( 'node_modules' ) ;
7286 this . executor = config . executor ;
7387 this . promptTimeoutMinutes = config . promptTimeoutMinutes ;
88+ this . ratings = this . resolveRatings ( config ) ;
89+ this . ratingHash = this . getRatingHash ( this . ratings ) ;
7490 this . ratingCategories = this . getRatingCategories ( config ) ;
91+ this . validateRatingHash ( this . ratingHash , config ) ;
7592 }
7693
7794 /** Prompts that should be executed as a part of the evaluation. */
7895 executablePrompts = lazy ( async ( ) => {
79- return this . resolveExecutablePrompts ( this . config . executablePrompts , this . config ) ;
96+ return this . resolveExecutablePrompts ( this . config . executablePrompts ) ;
8097 } ) ;
8198
8299 systemPromptGeneration = lazy ( async ( ) => {
@@ -178,27 +195,9 @@ export class Environment {
178195 */
179196 private async resolveExecutablePrompts (
180197 prompts : EnvironmentConfig [ 'executablePrompts' ] ,
181- config : EnvironmentConfig ,
182198 ) : Promise < RootPromptDefinition [ ] > {
183199 const result : Promise < RootPromptDefinition > [ ] = [ ] ;
184- let envRatings : Rating [ ] ;
185-
186- if ( config . ratingOverrides ) {
187- Object . keys ( config . ratingOverrides ) . forEach ( id => {
188- if ( ! config . ratings . some ( rating => rating . id === id ) ) {
189- throw new UserFacingError (
190- `Rating with an ID of "${ id } " has not been configured. Cannot apply an override to it.` ,
191- ) ;
192- }
193- } ) ;
194-
195- envRatings = config . ratings . map ( rating => {
196- const override = config . ratingOverrides ! [ rating . id ] ;
197- return override ? { ...rating , ...override } : rating ;
198- } ) ;
199- } else {
200- envRatings = config . ratings ;
201- }
200+ const envRatings = this . ratings ;
202201
203202 for ( const def of prompts ) {
204203 if ( def instanceof MultiStepPrompt ) {
@@ -378,6 +377,25 @@ export class Environment {
378377 return result ;
379378 }
380379
380+ private resolveRatings ( config : EnvironmentConfig ) {
381+ if ( ! config . ratingOverrides ) {
382+ return config . ratings ;
383+ }
384+
385+ Object . keys ( config . ratingOverrides ) . forEach ( id => {
386+ if ( ! config . ratings . some ( rating => rating . id === id ) ) {
387+ throw new UserFacingError (
388+ `Rating with an ID of "${ id } " has not been configured. Cannot apply an override to it.` ,
389+ ) ;
390+ }
391+ } ) ;
392+
393+ return config . ratings . map ( rating => {
394+ const override = config . ratingOverrides ! [ rating . id ] ;
395+ return override ? { ...rating , ...override } : rating ;
396+ } ) ;
397+ }
398+
381399 private getRatingCategories ( config : EnvironmentConfig ) {
382400 const overrides = config . categoryOverrides ;
383401
@@ -399,4 +417,29 @@ export class Environment {
399417 } ,
400418 } ;
401419 }
420+
421+ private getRatingHash ( ratings : Rating [ ] ) : string {
422+ const parts : string [ ] = [ ] ;
423+
424+ for ( const rating of ratings ) {
425+ parts . push (
426+ `${ rating . category } ;${ rating . id } ;${ rating . scoreReduction } ;${ rating . groupingLabels || [ ] . sort ( ) . join ( ',' ) } ` ,
427+ ) ;
428+ }
429+
430+ return getSha256Hash ( parts . sort ( ) . join ( '|' ) ) ;
431+ }
432+
433+ private validateRatingHash ( currentHash : string , config : EnvironmentConfig ) {
434+ if ( config . expectedRatingHash && config . expectedRatingHash !== currentHash ) {
435+ throw new UserFacingError (
436+ [
437+ `Rating hash for environment "${ this . displayName } " does not match the expectation.` ,
438+ `Expected: ${ config . expectedRatingHash } ` ,
439+ `Actual: ${ this . ratingHash } ` ,
440+ `Either update the \`expectedRatingHash\` field in the config or revert the ratings back to their previous configuration` ,
441+ ] . join ( '\n' ) ,
442+ ) ;
443+ }
444+ }
402445}
0 commit comments