Skip to content

Commit 14a981f

Browse files
authored
test: enable shuffle mode and fix test isolation bugs (#6601)
1 parent aebd268 commit 14a981f

File tree

14 files changed

+257
-18
lines changed

14 files changed

+257
-18
lines changed

test/AGENTS.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ npm run test:integration
2626
- **NEVER** increase test timeouts - fix the slow test
2727
- **NEVER** use `.only()` or `.skip()` in committed code
2828
- **ALWAYS** clean up mocks in `afterEach`
29-
- **ALWAYS** use `--randomize` to ensure test independence
29+
- Tests run in **random order by default** (configured in vitest.config.ts)
30+
- Use `--sequence.shuffle=false` to disable when debugging specific failures
31+
- Use `--sequence.seed=12345` to reproduce a specific order
3032

3133
## Writing Tests
3234

@@ -67,6 +69,19 @@ axiosMock.post.mockResolvedValue({ data: { result: 'success' } });
6769
- Mock external dependencies but not the code being tested
6870
- Reset mocks between tests to prevent test pollution
6971

72+
**Critical: Mock Isolation**
73+
74+
`vi.clearAllMocks()` only clears call history, NOT mock implementations. Use `mockReset()` for full isolation:
75+
76+
```typescript
77+
beforeEach(() => {
78+
vi.clearAllMocks(); // Clears .mock.calls and .mock.results
79+
vi.mocked(myMock).mockReset(); // Also clears mockReturnValue/mockResolvedValue
80+
});
81+
```
82+
83+
For `vi.hoisted()` mocks or mocks with `mockReturnValue()`, you MUST call `mockReset()` in `beforeEach` to ensure test isolation when tests run in random order.
84+
7085
## Provider Testing
7186

7287
Every provider needs tests covering:

test/assertions/python.test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,9 @@ describe('Python file references', { timeout: 15000 }, () => {
378378

379379
it('should handle when python file assertions throw an error', async () => {
380380
const output = 'Expected output';
381+
// Must mock path.resolve and path.extname to ensure test isolation
382+
vi.mocked(path.resolve).mockReturnValue('/path/to/assert.py');
383+
vi.mocked(path.extname).mockReturnValue('.py');
381384
vi.mocked(runPython).mockRejectedValue(
382385
new Error('The Python script `call_api` function must return a dict with an `output`'),
383386
);
@@ -437,6 +440,9 @@ describe('Python file references', { timeout: 15000 }, () => {
437440
],
438441
};
439442

443+
// Must mock path.resolve and path.extname to ensure test isolation
444+
vi.mocked(path.resolve).mockReturnValue('/path/to/assert.py');
445+
vi.mocked(path.extname).mockReturnValue('.py');
440446
vi.mocked(runPython).mockResolvedValueOnce(pythonResult as any);
441447

442448
const fileAssertion: Assertion = {

test/commands/modelScan.test.ts

Lines changed: 125 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,36 @@ describe('modelScanCommand', () => {
3434
let program: Command;
3535
let mockExit: MockInstance;
3636

37-
beforeEach(() => {
37+
beforeEach(async () => {
3838
program = new Command();
3939
mockExit = vi.spyOn(process, 'exit').mockImplementation(function () {
4040
return undefined as never;
4141
});
4242
vi.clearAllMocks();
43+
44+
// Reset mock implementations (clearAllMocks only clears call history, not implementations)
45+
vi.mocked(spawn).mockReset();
46+
const { getModelAuditCurrentVersion } = await import('../../src/updates');
47+
vi.mocked(getModelAuditCurrentVersion).mockReset();
48+
vi.mocked(getModelAuditCurrentVersion).mockResolvedValue('0.2.16');
49+
50+
// Reset ModelAudit mock to default (no existing scan found)
51+
const ModelAudit = (await import('../../src/models/modelAudit')).default;
52+
vi.mocked(ModelAudit.findByRevision).mockReset();
53+
vi.mocked(ModelAudit.findByRevision).mockResolvedValue(null);
54+
vi.mocked(ModelAudit.create).mockReset();
55+
vi.mocked(ModelAudit.create).mockResolvedValue({ id: 'scan-abc-2025-01-01T00:00:00' } as any);
56+
57+
// Reset HuggingFace mocks
58+
const { isHuggingFaceModel, getHuggingFaceMetadata, parseHuggingFaceModel } = await import(
59+
'../../src/util/huggingfaceMetadata'
60+
);
61+
vi.mocked(isHuggingFaceModel).mockReset();
62+
vi.mocked(isHuggingFaceModel).mockReturnValue(false);
63+
vi.mocked(getHuggingFaceMetadata).mockReset();
64+
vi.mocked(getHuggingFaceMetadata).mockResolvedValue(null);
65+
vi.mocked(parseHuggingFaceModel).mockReset();
66+
vi.mocked(parseHuggingFaceModel).mockReturnValue(null);
4367
});
4468

4569
afterEach(() => {
@@ -299,12 +323,36 @@ describe('Re-scan on version change behavior', () => {
299323
let program: Command;
300324
let mockExit: MockInstance;
301325

302-
beforeEach(() => {
326+
beforeEach(async () => {
303327
program = new Command();
304328
mockExit = vi.spyOn(process, 'exit').mockImplementation(function () {
305329
return undefined as never;
306330
});
307331
vi.clearAllMocks();
332+
333+
// Reset mock implementations (clearAllMocks only clears call history, not implementations)
334+
vi.mocked(spawn).mockReset();
335+
const { getModelAuditCurrentVersion } = await import('../../src/updates');
336+
vi.mocked(getModelAuditCurrentVersion).mockReset();
337+
vi.mocked(getModelAuditCurrentVersion).mockResolvedValue('0.2.16');
338+
339+
// Reset ModelAudit mock to default (no existing scan found)
340+
const ModelAudit = (await import('../../src/models/modelAudit')).default;
341+
vi.mocked(ModelAudit.findByRevision).mockReset();
342+
vi.mocked(ModelAudit.findByRevision).mockResolvedValue(null);
343+
vi.mocked(ModelAudit.create).mockReset();
344+
vi.mocked(ModelAudit.create).mockResolvedValue({ id: 'scan-abc-2025-01-01T00:00:00' } as any);
345+
346+
// Reset HuggingFace mocks
347+
const { isHuggingFaceModel, getHuggingFaceMetadata, parseHuggingFaceModel } = await import(
348+
'../../src/util/huggingfaceMetadata'
349+
);
350+
vi.mocked(isHuggingFaceModel).mockReset();
351+
vi.mocked(isHuggingFaceModel).mockReturnValue(false);
352+
vi.mocked(getHuggingFaceMetadata).mockReset();
353+
vi.mocked(getHuggingFaceMetadata).mockResolvedValue(null);
354+
vi.mocked(parseHuggingFaceModel).mockReset();
355+
vi.mocked(parseHuggingFaceModel).mockReturnValue(null);
308356
});
309357

310358
afterEach(() => {
@@ -495,8 +543,32 @@ describe('Re-scan on version change behavior', () => {
495543
});
496544

497545
describe('checkModelAuditInstalled', () => {
498-
beforeEach(() => {
546+
beforeEach(async () => {
499547
vi.clearAllMocks();
548+
549+
// Reset mock implementations (clearAllMocks only clears call history, not implementations)
550+
vi.mocked(spawn).mockReset();
551+
const { getModelAuditCurrentVersion } = await import('../../src/updates');
552+
vi.mocked(getModelAuditCurrentVersion).mockReset();
553+
vi.mocked(getModelAuditCurrentVersion).mockResolvedValue('0.2.16');
554+
555+
// Reset ModelAudit mock to default (no existing scan found)
556+
const ModelAudit = (await import('../../src/models/modelAudit')).default;
557+
vi.mocked(ModelAudit.findByRevision).mockReset();
558+
vi.mocked(ModelAudit.findByRevision).mockResolvedValue(null);
559+
vi.mocked(ModelAudit.create).mockReset();
560+
vi.mocked(ModelAudit.create).mockResolvedValue({ id: 'scan-abc-2025-01-01T00:00:00' } as any);
561+
562+
// Reset HuggingFace mocks
563+
const { isHuggingFaceModel, getHuggingFaceMetadata, parseHuggingFaceModel } = await import(
564+
'../../src/util/huggingfaceMetadata'
565+
);
566+
vi.mocked(isHuggingFaceModel).mockReset();
567+
vi.mocked(isHuggingFaceModel).mockReturnValue(false);
568+
vi.mocked(getHuggingFaceMetadata).mockReset();
569+
vi.mocked(getHuggingFaceMetadata).mockResolvedValue(null);
570+
vi.mocked(parseHuggingFaceModel).mockReset();
571+
vi.mocked(parseHuggingFaceModel).mockReturnValue(null);
500572
});
501573

502574
it('should return installed: true and version when getModelAuditCurrentVersion returns version', async () => {
@@ -541,12 +613,36 @@ describe('Command Options Validation', () => {
541613
let program: Command;
542614
let mockExit: MockInstance;
543615

544-
beforeEach(() => {
616+
beforeEach(async () => {
545617
program = new Command();
546618
mockExit = vi.spyOn(process, 'exit').mockImplementation(function () {
547619
return undefined as never;
548620
});
549621
vi.clearAllMocks();
622+
623+
// Reset mock implementations (clearAllMocks only clears call history, not implementations)
624+
vi.mocked(spawn).mockReset();
625+
const { getModelAuditCurrentVersion } = await import('../../src/updates');
626+
vi.mocked(getModelAuditCurrentVersion).mockReset();
627+
vi.mocked(getModelAuditCurrentVersion).mockResolvedValue('0.2.16');
628+
629+
// Reset ModelAudit mock to default (no existing scan found)
630+
const ModelAudit = (await import('../../src/models/modelAudit')).default;
631+
vi.mocked(ModelAudit.findByRevision).mockReset();
632+
vi.mocked(ModelAudit.findByRevision).mockResolvedValue(null);
633+
vi.mocked(ModelAudit.create).mockReset();
634+
vi.mocked(ModelAudit.create).mockResolvedValue({ id: 'scan-abc-2025-01-01T00:00:00' } as any);
635+
636+
// Reset HuggingFace mocks
637+
const { isHuggingFaceModel, getHuggingFaceMetadata, parseHuggingFaceModel } = await import(
638+
'../../src/util/huggingfaceMetadata'
639+
);
640+
vi.mocked(isHuggingFaceModel).mockReset();
641+
vi.mocked(isHuggingFaceModel).mockReturnValue(false);
642+
vi.mocked(getHuggingFaceMetadata).mockReset();
643+
vi.mocked(getHuggingFaceMetadata).mockResolvedValue(null);
644+
vi.mocked(parseHuggingFaceModel).mockReset();
645+
vi.mocked(parseHuggingFaceModel).mockReturnValue(null);
550646
});
551647

552648
afterEach(() => {
@@ -766,12 +862,36 @@ describe('Temp file JSON output (CLI UI fix)', () => {
766862
let program: Command;
767863
let mockExit: MockInstance;
768864

769-
beforeEach(() => {
865+
beforeEach(async () => {
770866
program = new Command();
771867
mockExit = vi.spyOn(process, 'exit').mockImplementation(function () {
772868
return undefined as never;
773869
});
774870
vi.clearAllMocks();
871+
872+
// Reset mock implementations (clearAllMocks only clears call history, not implementations)
873+
vi.mocked(spawn).mockReset();
874+
const { getModelAuditCurrentVersion } = await import('../../src/updates');
875+
vi.mocked(getModelAuditCurrentVersion).mockReset();
876+
vi.mocked(getModelAuditCurrentVersion).mockResolvedValue('0.2.16');
877+
878+
// Reset ModelAudit mock to default (no existing scan found)
879+
const ModelAudit = (await import('../../src/models/modelAudit')).default;
880+
vi.mocked(ModelAudit.findByRevision).mockReset();
881+
vi.mocked(ModelAudit.findByRevision).mockResolvedValue(null);
882+
vi.mocked(ModelAudit.create).mockReset();
883+
vi.mocked(ModelAudit.create).mockResolvedValue({ id: 'scan-abc-2025-01-01T00:00:00' } as any);
884+
885+
// Reset HuggingFace mocks
886+
const { isHuggingFaceModel, getHuggingFaceMetadata, parseHuggingFaceModel } = await import(
887+
'../../src/util/huggingfaceMetadata'
888+
);
889+
vi.mocked(isHuggingFaceModel).mockReset();
890+
vi.mocked(isHuggingFaceModel).mockReturnValue(false);
891+
vi.mocked(getHuggingFaceMetadata).mockReset();
892+
vi.mocked(getHuggingFaceMetadata).mockResolvedValue(null);
893+
vi.mocked(parseHuggingFaceModel).mockReset();
894+
vi.mocked(parseHuggingFaceModel).mockReturnValue(null);
775895
});
776896

777897
afterEach(() => {

test/evaluator.test.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,9 @@ describe('evaluator', () => {
339339

340340
beforeEach(() => {
341341
vi.clearAllMocks();
342+
// Reset runExtensionHook to default implementation (other tests may have overridden it)
343+
vi.mocked(runExtensionHook).mockReset();
344+
vi.mocked(runExtensionHook).mockImplementation((_extensions, _hookName, context) => context);
342345
// Reset cliState for each test to ensure clean state
343346
cliState.resume = false;
344347
cliState.basePath = '';
@@ -4032,6 +4035,9 @@ describe('evaluator defaultTest merging', () => {
40324035

40334036
beforeEach(() => {
40344037
vi.clearAllMocks();
4038+
// Reset runExtensionHook to default implementation (other tests may have overridden it)
4039+
vi.mocked(runExtensionHook).mockReset();
4040+
vi.mocked(runExtensionHook).mockImplementation((_extensions, _hookName, context) => context);
40354041
});
40364042

40374043
it('should merge defaultTest.options.provider with test case options', async () => {
@@ -4144,6 +4150,9 @@ describe('Evaluator with external defaultTest', () => {
41444150

41454151
beforeEach(() => {
41464152
vi.clearAllMocks();
4153+
// Reset runExtensionHook to default implementation (other tests may have overridden it)
4154+
vi.mocked(runExtensionHook).mockReset();
4155+
vi.mocked(runExtensionHook).mockImplementation((_extensions, _hookName, context) => context);
41474156
});
41484157

41494158
it('should handle string defaultTest gracefully', async () => {
@@ -4364,6 +4373,9 @@ describe('defaultTest normalization for extensions', () => {
43644373

43654374
beforeEach(() => {
43664375
vi.clearAllMocks();
4376+
// Reset runExtensionHook to default implementation (other tests may have overridden it)
4377+
vi.mocked(runExtensionHook).mockReset();
4378+
vi.mocked(runExtensionHook).mockImplementation((_extensions, _hookName, context) => context);
43674379
});
43684380

43694381
it('should initialize defaultTest when undefined and extensions are present', async () => {

test/globalConfig/accounts.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ describe('accounts', () => {
152152

153153
describe('setUserEmail', () => {
154154
it('should write email to global config', () => {
155+
// Must mock readGlobalConfig to ensure clean state (no leftover account properties from other tests)
156+
vi.mocked(readGlobalConfig).mockReturnValue({
157+
id: 'test-id',
158+
});
155159
const email = 'test@example.com';
156160
setUserEmail(email);
157161
expect(writeGlobalConfigPartial).toHaveBeenCalledWith({

test/providers/watsonx.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,14 @@ describe('WatsonXProvider', () => {
422422
return true;
423423
});
424424

425+
// Must mock WatsonXAI.newInstance to ensure test isolation
426+
const mockedWatsonXAIClient: Partial<any> = {
427+
generateText: vi.fn(),
428+
};
429+
vi.mocked(WatsonXAI.newInstance).mockImplementation(function () {
430+
return mockedWatsonXAIClient as any;
431+
});
432+
425433
const provider = new WatsonXProvider(modelName, { config });
426434
const generateTextSpy = vi.spyOn(await provider.getClient(), 'generateText');
427435
const response = await provider.callApi(prompt);

test/redteam/commands/generate.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,6 +1496,27 @@ describe('doGenerateRedteam', () => {
14961496
describe('doGenerateRedteam with external defaultTest', () => {
14971497
beforeEach(() => {
14981498
vi.clearAllMocks();
1499+
1500+
// Reset resolveConfigs with default implementation (tests depend on this being set)
1501+
vi.mocked(configModule.resolveConfigs).mockReset();
1502+
vi.mocked(configModule.resolveConfigs).mockResolvedValue({
1503+
basePath: '/mock/path',
1504+
testSuite: {
1505+
providers: [
1506+
{
1507+
id: () => 'test-provider',
1508+
callApi: vi.fn().mockResolvedValue({ output: 'test output' }),
1509+
cleanup: vi.fn().mockResolvedValue(undefined),
1510+
},
1511+
],
1512+
prompts: [],
1513+
tests: [],
1514+
},
1515+
config: {
1516+
redteam: {},
1517+
},
1518+
});
1519+
14991520
vi.mocked(fs.existsSync).mockImplementation(function () {
15001521
return false;
15011522
});

test/redteam/providers/iterative.test.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ describe('RedteamIterativeProvider', () => {
4747
beforeEach(() => {
4848
vi.clearAllMocks();
4949

50+
// Reset hoisted mocks to ensure test isolation
51+
// mockReset clears both call history AND mock implementations
52+
mockGetProvider.mockReset();
53+
mockGetTargetResponse.mockReset();
54+
mockCheckPenalizedPhrases.mockReset();
55+
mockGetGraderById.mockReset();
56+
5057
mockRedteamProvider = {
5158
id: vi.fn().mockReturnValue('mock-redteam'),
5259
callApi: vi

test/server/routes/modelAudit.test.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { spawn } from 'child_process';
22
import fs from 'fs';
3+
import os from 'os';
4+
import path from 'path';
35
import request from 'supertest';
46
import { beforeEach, describe, expect, it, vi } from 'vitest';
57
import { createApp } from '../../../src/server/server';
@@ -22,6 +24,10 @@ describe('Model Audit Routes', () => {
2224

2325
beforeEach(() => {
2426
vi.clearAllMocks();
27+
// Reset mock implementations to ensure test isolation when tests run in random order.
28+
// vi.clearAllMocks() only clears call history, not mockResolvedValue/mockReturnValue.
29+
mockedCheckModelAuditInstalled.mockReset();
30+
mockedSpawn.mockReset();
2531
app = createApp();
2632
});
2733

@@ -31,7 +37,7 @@ describe('Model Audit Routes', () => {
3137
mockedCheckModelAuditInstalled.mockResolvedValue({ installed: true, version: '0.2.20' });
3238

3339
// Create a temporary test file
34-
const testFilePath = '/tmp/test-model-audit-scan.pkl';
40+
const testFilePath = path.join(os.tmpdir(), 'test-model-audit-scan.pkl');
3541
fs.writeFileSync(testFilePath, 'test data');
3642

3743
const mockScanOutput = JSON.stringify({
@@ -70,7 +76,7 @@ describe('Model Audit Routes', () => {
7076
it('should handle request with empty options object', async () => {
7177
mockedCheckModelAuditInstalled.mockResolvedValue({ installed: true, version: '0.2.20' });
7278

73-
const testFilePath = '/tmp/test-model-audit-scan-2.pkl';
79+
const testFilePath = path.join(os.tmpdir(), 'test-model-audit-scan-2.pkl');
7480
fs.writeFileSync(testFilePath, 'test data');
7581

7682
const mockScanOutput = JSON.stringify({
@@ -120,7 +126,7 @@ describe('Model Audit Routes', () => {
120126
it('should return 400 when modelaudit is not installed', async () => {
121127
mockedCheckModelAuditInstalled.mockResolvedValue({ installed: false, version: null });
122128

123-
const testFilePath = '/tmp/test-model-audit-not-installed.pkl';
129+
const testFilePath = path.join(os.tmpdir(), 'test-model-audit-not-installed.pkl');
124130
fs.writeFileSync(testFilePath, 'test data');
125131

126132
const response = await request(app)

0 commit comments

Comments
 (0)