Skip to content

Commit ec906f3

Browse files
committed
feat: add hash for configured ratings
Adds a hash that can be used to easily determine if the set of ratings and categories changed between different runs.
1 parent df47768 commit ec906f3

File tree

4 files changed

+27
-3
lines changed

4 files changed

+27
-3
lines changed

runner/orchestration/generate-eval-task.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ import {attemptBuildAndTest} from './build-serve-test-loop.js';
1818
import {rateGeneratedCode} from '../ratings/rate-code.js';
1919
import {DEFAULT_AUTORATER_MODEL_NAME} from '../configuration/constants.js';
2020
import assert from 'node:assert';
21+
import {Rating} from '../ratings/rating-types.js';
22+
import {getSha256Hash} from '../utils/hashing.js';
2123

2224
/**
2325
* Creates and executes a task to generate or load code for a given prompt,
@@ -189,6 +191,7 @@ export async function startEvaluationTask(
189191
},
190192
outputFiles: attempt.outputFiles,
191193
finalAttempt: attempt,
194+
ratingHash: getRatingHash(promptDef.ratings),
192195
score,
193196
repairAttempts: attempt.repairAttempts,
194197
attemptDetails,
@@ -203,3 +206,15 @@ export async function startEvaluationTask(
203206
await cleanup();
204207
return results;
205208
}
209+
210+
function getRatingHash(ratings: Rating[]): string {
211+
const parts: string[] = [];
212+
213+
for (const rating of ratings) {
214+
parts.push(
215+
`${rating.category};${rating.id};${rating.scoreReduction};${rating.groupingLabels || [].sort().join(',')}`,
216+
);
217+
}
218+
219+
return getSha256Hash(parts.sort().join('|'));
220+
}

runner/orchestration/grouping.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import {createHash} from 'crypto';
2-
import type {LlmRunner} from '../codegen/llm-runner.js';
31
import type {Environment} from '../configuration/environment.js';
42
import {calculateBuildAndCheckStats} from '../ratings/stats.js';
53
import type {AssessmentResult, RunGroup, RunInfo} from '../shared-interfaces.js';
64
import {RunnerName} from '../codegen/runner-creation.js';
5+
import {getSha256Hash} from '../utils/hashing.js';
76

87
/** Generates a unique grouping ID for a run. */
98
export function getRunGroupId(
@@ -30,7 +29,7 @@ export function getRunGroupId(
3029
`${options.labels?.sort().join('/')}/${options.model}/${options.runner}`;
3130

3231
// The group string above can get long. Hash it to something shorter and fixed length.
33-
return createHash('sha256').update(group).digest('hex');
32+
return getSha256Hash(group);
3433
}
3534

3635
/**

runner/shared-interfaces.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,8 @@ export interface AssessmentResult {
535535
repairAttempts: number;
536536
/** An array detailing each attempt (initial and repairs) made for this prompt. */
537537
attemptDetails: AttemptDetails[];
538+
/** Hash that can be used to determine if the set of ratings changed between assessment runs. */
539+
ratingHash?: string;
538540
/** Pre-computed user journeys. */
539541
userJourneys?: UserJourneysResult;
540542
/** The number of repair attempts made after the axe initial failures. */

runner/utils/hashing.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import {createHash} from 'node:crypto';
2+
3+
/**
4+
* Returns a sha-256 hash of a string.
5+
*/
6+
export function getSha256Hash(value: string): string {
7+
return createHash('sha256').update(value).digest('hex');
8+
}

0 commit comments

Comments
 (0)