Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,16 @@ jobs:
continue-on-error: true

- name: Build libraries
id: build
run: npx nx affected -t build
continue-on-error: true

- name: Retry build with cache reset
if: steps.build.outcome == 'failure'
run: |
echo "Build failed, resetting NX cache and retrying..."
npx nx reset
npx nx affected -t build --skip-nx-cache

- name: Test libraries
run: npx nx affected -t test --passWithNoTests
6 changes: 6 additions & 0 deletions docs/draft/docs/libraries/vectoriadb.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ const db = new VectoriaDB({

Uses Hugging Face transformers for higher quality embeddings.

**Note:** Transformer embeddings require installing the optional dependency:

```bash
npm install @huggingface/transformers
```

```typescript
const db = new VectoriaDB({
embedding: 'transformer',
Expand Down
6 changes: 6 additions & 0 deletions docs/live/docs/libraries/vectoriadb.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ const db = new VectoriaDB({

Uses Hugging Face transformers for higher quality embeddings.

**Note:** Transformer embeddings require installing the optional dependency:

```bash
npm install @huggingface/transformers
```

```typescript
const db = new VectoriaDB({
embedding: 'transformer',
Expand Down
1 change: 1 addition & 0 deletions libs/enclave-vm/eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export default [
'error',
{
ignoredFiles: ['{projectRoot}/eslint.config.{js,cjs,mjs,ts,cts,mts}'],
ignoredDependencies: ['@huggingface/transformers'], // Optional peer dependency loaded dynamically
},
],
},
Expand Down
5 changes: 1 addition & 4 deletions libs/enclave-vm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,8 @@
"astring": "1.9.0",
"zod": "^4.1.13"
},
"optionalDependencies": {
"@huggingface/transformers": "^3.0.0"
},
"peerDependencies": {
"@huggingface/transformers": "^3.0.0"
"@huggingface/transformers": "^3.2.2"
},
"peerDependenciesMeta": {
"@huggingface/transformers": {
Expand Down
6 changes: 4 additions & 2 deletions libs/enclave-vm/src/scoring/scorers/local-llm.scorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,10 @@ export class LocalLlmScorer extends BaseScorer {
*/
private async _initialize(): Promise<void> {
try {
// Dynamic import to avoid bundling issues
const { pipeline } = await import('@huggingface/transformers');
// Dynamic import using Function to avoid TypeScript checking for the optional dependency
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const transformers = await (Function('return import("@huggingface/transformers")')() as Promise<any>);
const { pipeline } = transformers;

// Use feature-extraction pipeline for both modes
// (classification mode uses embeddings + heuristics, similarity mode uses embeddings + VectoriaDB)
Expand Down
1 change: 1 addition & 0 deletions libs/vectoriadb/eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export default [
'error',
{
ignoredFiles: ['{projectRoot}/eslint.config.{js,cjs,mjs,ts,cts,mts}'],
ignoredDependencies: ['@huggingface/transformers'], // Optional peer dependency loaded dynamically
},
],
},
Expand Down
130 changes: 70 additions & 60 deletions libs/vectoriadb/jest.setup.ts
Original file line number Diff line number Diff line change
@@ -1,79 +1,89 @@
/**
* Jest setup file for vectoria tests
* Mocks the transformers.js library to avoid ONNX Runtime issues in test environment
* Injects a mock transformers module to avoid ONNX Runtime issues in test environment
*/

// Mock the transformers pipeline
jest.mock('@huggingface/transformers', () => {
// Helper to extract and normalize words from text
const extractWords = (text: string): string[] => {
return text
.toLowerCase()
.replace(/[^\w\s]/g, '')
.split(/\s+/)
.filter((word) => word.length > 2); // Filter out short words like "a", "an", "in"
};
import { EmbeddingService } from './src/embedding.service';

// Normalize word forms to their root (simple stemming)
const normalizeWord = (word: string): string => {
// Simple stemming - remove common suffixes
return word.replace(/ing$/, '').replace(/s$/, '').replace(/ed$/, '').replace(/er$/, '');
};
// Helper to extract and normalize words from text
const extractWords = (text: string): string[] => {
return text
.toLowerCase()
.replace(/[^\w\s]/g, '')
.split(/\s+/)
.filter((word) => word.length > 2); // Filter out short words like "a", "an", "in"
};

// Create a mock pipeline function that returns consistent embeddings
const createMockPipeline = () => {
return async (text: string | string[]) => {
const textStr = text.toString();
const words = extractWords(textStr);
const normalizedWords = words.map(normalizeWord);
// Normalize word forms to their root (simple stemming)
const normalizeWord = (word: string): string => {
// Simple stemming - remove common suffixes
return word.replace(/ing$/, '').replace(/s$/, '').replace(/ed$/, '').replace(/er$/, '');
};

// Create a 384-dimensional embedding (matching all-MiniLM-L6-v2)
const embedding = new Float32Array(384);
// Create a mock pipeline function that returns consistent embeddings
const createMockPipeline = () => {
return async (text: string | string[]) => {
const textStr = text.toString();
const words = extractWords(textStr);
const normalizedWords = words.map(normalizeWord);

// Each normalized word contributes to specific dimensions
// This ensures that texts with overlapping words have high similarity
normalizedWords.forEach((word) => {
// Calculate which dimensions this word affects
const wordHash = word.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
// Create a 384-dimensional embedding (matching all-MiniLM-L6-v2)
const embedding = new Float32Array(384);

// Each word contributes to ~50 dimensions centered around its hash position
for (let offset = -25; offset < 25; offset++) {
const dim = (wordHash + offset) % 384;
// Use Gaussian-like contribution
const contribution = Math.exp(-(offset * offset) / 100);
embedding[dim] += contribution;
}
});
// Each normalized word contributes to specific dimensions
// This ensures that texts with overlapping words have high similarity
normalizedWords.forEach((word) => {
// Calculate which dimensions this word affects
const wordHash = word.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);

// Add small random-like component for uniqueness
for (let i = 0; i < 384; i++) {
// Deterministic "noise" based on text length and position
const noise = Math.sin(textStr.length * 100 + i) * 0.01;
embedding[i] += noise;
// Each word contributes to ~50 dimensions centered around its hash position
for (let offset = -25; offset < 25; offset++) {
const dim = (wordHash + offset) % 384;
// Use Gaussian-like contribution
const contribution = Math.exp(-(offset * offset) / 100);
embedding[dim] += contribution;
}
});

// Normalize the embedding to unit length (as transformers.js does)
let norm = 0;
for (let i = 0; i < embedding.length; i++) {
norm += embedding[i] * embedding[i];
}
norm = Math.sqrt(norm);
// Add small random-like component for uniqueness
for (let i = 0; i < 384; i++) {
// Deterministic "noise" based on text length and position
const noise = Math.sin(textStr.length * 100 + i) * 0.01;
embedding[i] += noise;
}

// Normalize the embedding to unit length (as transformers.js does)
let norm = 0;
for (let i = 0; i < embedding.length; i++) {
norm += embedding[i] * embedding[i];
}
norm = Math.sqrt(norm);

if (norm > 0) {
for (let i = 0; i < embedding.length; i++) {
embedding[i] /= norm;
}
if (norm > 0) {
for (let i = 0; i < embedding.length; i++) {
embedding[i] /= norm;
}
}

return {
data: embedding,
};
return {
data: embedding,
};
};
};

return {
pipeline: jest.fn(async () => {
return createMockPipeline();
}),
};
// Create mock transformers module
const mockTransformersModule = {
pipeline: jest.fn(async () => {
return createMockPipeline();
}),
};

// Inject mock transformers module before all tests
beforeAll(() => {
EmbeddingService.setTransformersModule(mockTransformersModule);
});

// Clear the mock after all tests
afterAll(() => {
EmbeddingService.clearTransformersModule();
});
7 changes: 6 additions & 1 deletion libs/vectoriadb/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,14 @@
"default": "./dist/src/index.js"
}
},
"dependencies": {
"peerDependencies": {
"@huggingface/transformers": "^3.2.2"
},
"peerDependenciesMeta": {
"@huggingface/transformers": {
"optional": true
}
},
"devDependencies": {
"typescript": "^5.9.3"
}
Expand Down
60 changes: 57 additions & 3 deletions libs/vectoriadb/src/embedding.service.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,34 @@
import { pipeline } from '@huggingface/transformers';
import { EmbeddingError } from './errors';
import { EmbeddingError, ConfigurationError } from './errors';

/**
* Service for generating embeddings using transformers.js
*
* NOTE: This service requires @huggingface/transformers to be installed.
* Install it with: npm install @huggingface/transformers
*
* For a zero-dependency alternative, use TFIDFEmbeddingService instead.
*/
export class EmbeddingService {
// Static transformers module for dependency injection (used in testing)
private static _transformersModule: any = null;

/**
* Inject a transformers module (for testing purposes)
* @internal
*/
static setTransformersModule(module: any): void {
EmbeddingService._transformersModule = module;
}

/**
* Clear the injected transformers module
* @internal
*/
static clearTransformersModule(): void {
EmbeddingService._transformersModule = null;
}

// Using 'any' because @huggingface/transformers is an optional dependency
private pipeline: any = null;
private modelName: string;
private cacheDir: string;
Expand All @@ -17,6 +41,30 @@ export class EmbeddingService {
this.cacheDir = cacheDir;
}

/**
* Dynamically import @huggingface/transformers
* This allows the package to be optional - only loaded when actually used
*/
private async loadTransformers(): Promise<any> {
// Use injected module if available (for testing)
if (EmbeddingService._transformersModule) {
return EmbeddingService._transformersModule.pipeline;
}

try {
// Dynamic import - package may not be installed
// Using Function() to bypass TypeScript's static analysis for optional dependency
const transformers = await (Function('return import("@huggingface/transformers")')() as Promise<any>);
return transformers.pipeline;
} catch (_error) {
throw new ConfigurationError(
'@huggingface/transformers is not installed. ' +
'Install it with: npm install @huggingface/transformers\n' +
'Or use TFIDFVectoria/TFIDFEmbeddingService for a zero-dependency alternative.',
);
}
}

/**
* Initialize the embedding model
*/
Expand All @@ -36,8 +84,11 @@ export class EmbeddingService {

private async _initialize(): Promise<void> {
try {
// Dynamically load transformers
const pipelineFn = await this.loadTransformers();

// Create feature extraction pipeline
this.pipeline = await pipeline('feature-extraction', this.modelName, {
this.pipeline = await pipelineFn('feature-extraction', this.modelName, {
// Use local models directory to cache models
cache_dir: this.cacheDir,
// // Don't require progress bars in production
Expand All @@ -54,6 +105,9 @@ export class EmbeddingService {
this.isInitialized = true;
} catch (error) {
this.initializationPromise = null;
if (error instanceof ConfigurationError) {
throw error;
}
throw new EmbeddingError(
`Failed to initialize embedding model: ${error instanceof Error ? error.message : String(error)}`,
error instanceof Error ? error : undefined,
Expand Down
3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
"*.{json,css,scss,md,html,yml,yaml}": "prettier --write"
},
"private": true,
"dependencies": {
"@huggingface/transformers": "^3.2.2"
},
"devDependencies": {
"@eslint/js": "^9.8.0",
"@mdx-js/mdx": "^3.1.1",
Expand Down
Loading