Skip to content

Commit 45b17b8

Browse files
Merge remote-tracking branch 'origin/main' into feat/MCP-239-delete-vector-search-indexes
2 parents 7db2402 + 930b947 commit 45b17b8

36 files changed

+1092
-231
lines changed

.github/dependabot.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ updates:
44
directory: "/"
55
schedule:
66
interval: "weekly"
7+
cooldown:
8+
default-days: 7
9+
include:
10+
- "*"
711
ignore:
812
# We are ignoring major updates on yargs-parser because yargs-parser@22
913
# does not play nicely when bundled using webpack. Our VSCode extension

.github/workflows/code-health-fork.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ permissions: {}
1010
jobs:
1111
run-tests:
1212
name: Run MongoDB tests
13-
if: github.event.pull_request.user.login == 'dependabot[bot]' || github.event.pull_request.head.repo.full_name != github.repository
13+
# Code health disabled on forks for now
14+
# if: github.event.pull_request.user.login == 'dependabot[bot]' || github.event.pull_request.head.repo.full_name != github.repository
15+
if: github.event.pull_request.user.login == 'dependabot[bot]'
1416
strategy:
1517
matrix:
1618
os: [ubuntu-latest, macos-latest, windows-latest]

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "mongodb-mcp-server",
33
"description": "MongoDB Model Context Protocol Server",
4-
"version": "1.1.0-prerelease.1",
4+
"version": "1.1.0",
55
"type": "module",
66
"exports": {
77
".": {

src/common/atlas/performanceAdvisorUtils.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ export type SlowQueryLog = components["schemas"]["PerformanceAdvisorSlowQuery"];
99

1010
export const DEFAULT_SLOW_QUERY_LOGS_LIMIT = 50;
1111

12+
export const SUGGESTED_INDEXES_COPY = `Note: The "Weight" field is measured in bytes, and represents the estimated number of bytes saved in disk reads per executed read query that would be saved by implementing an index suggestion. Please convert this to MB or GB for easier readability.`;
13+
export const SLOW_QUERY_LOGS_COPY = `Please notify the user that the MCP server tool limits slow query logs to the most recent ${DEFAULT_SLOW_QUERY_LOGS_LIMIT} slow query logs. This is a limitation of the MCP server tool only. More slow query logs and performance suggestions can be seen in the Atlas UI. Please give to the user the following docs about the performance advisor: https://www.mongodb.com/docs/atlas/performance-advisor/.`;
14+
1215
interface SuggestedIndexesResponse {
1316
content: components["schemas"]["PerformanceAdvisorResponse"];
1417
}

src/common/config.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ const OPTIONS = {
5858
boolean: [
5959
"apiDeprecationErrors",
6060
"apiStrict",
61+
"disableEmbeddingsValidation",
6162
"help",
6263
"indexCheck",
6364
"ipv6",
@@ -183,6 +184,7 @@ export interface UserConfig extends CliOptions {
183184
maxBytesPerQuery: number;
184185
atlasTemporaryDatabaseUserLifetimeMs: number;
185186
voyageApiKey: string;
187+
disableEmbeddingsValidation: boolean;
186188
vectorSearchDimensions: number;
187189
vectorSearchSimilarityFunction: "cosine" | "euclidean" | "dotProduct";
188190
}
@@ -216,6 +218,7 @@ export const defaultUserConfig: UserConfig = {
216218
maxBytesPerQuery: 16 * 1024 * 1024, // By default, we only return ~16 mb of data per query / aggregation
217219
atlasTemporaryDatabaseUserLifetimeMs: 4 * 60 * 60 * 1000, // 4 hours
218220
voyageApiKey: "",
221+
disableEmbeddingsValidation: false,
219222
vectorSearchDimensions: 1024,
220223
vectorSearchSimilarityFunction: "euclidean",
221224
};

src/common/connectionManager.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export interface ConnectionState {
3232
connectedAtlasCluster?: AtlasClusterConnectionInfo;
3333
}
3434

35+
const MCP_TEST_DATABASE = "#mongodb-mcp";
3536
export class ConnectionStateConnected implements ConnectionState {
3637
public tag = "connected" as const;
3738

@@ -46,11 +47,11 @@ export class ConnectionStateConnected implements ConnectionState {
4647
public async isSearchSupported(): Promise<boolean> {
4748
if (this._isSearchSupported === undefined) {
4849
try {
49-
const dummyDatabase = "test";
50-
const dummyCollection = "test";
5150
// If a cluster supports search indexes, the call below will succeed
52-
// with a cursor otherwise will throw an Error
53-
await this.serviceProvider.getSearchIndexes(dummyDatabase, dummyCollection);
51+
// with a cursor otherwise will throw an Error.
52+
// the Search Index Management Service might not be ready yet, but
53+
// we assume that the agent can retry in that situation.
54+
await this.serviceProvider.getSearchIndexes(MCP_TEST_DATABASE, "test");
5455
this._isSearchSupported = true;
5556
} catch {
5657
this._isSearchSupported = false;

src/common/errors.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ export enum ErrorCodes {
33
MisconfiguredConnectionString = 1_000_001,
44
ForbiddenCollscan = 1_000_002,
55
ForbiddenWriteOperation = 1_000_003,
6+
AtlasSearchNotSupported = 1_000_004,
67
}
78

89
export class MongoDBError<ErrorCode extends ErrorCodes = ErrorCodes> extends Error {

src/common/packageInfo.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// This file was generated by scripts/updatePackageVersion.ts - Do not edit it manually.
22
export const packageInfo = {
3-
version: "1.1.0-prerelease.1",
3+
version: "1.1.0",
44
mcpServerName: "MongoDB MCP Server",
55
};
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import type { NodeDriverServiceProvider } from "@mongosh/service-provider-node-driver";
2+
import { BSON, type Document } from "bson";
3+
import type { UserConfig } from "../config.js";
4+
import type { ConnectionManager } from "../connectionManager.js";
5+
6+
export type VectorFieldIndexDefinition = {
7+
type: "vector";
8+
path: string;
9+
numDimensions: number;
10+
quantization: "none" | "scalar" | "binary";
11+
similarity: "euclidean" | "cosine" | "dotProduct";
12+
};
13+
14+
export type EmbeddingNamespace = `${string}.${string}`;
15+
export class VectorSearchEmbeddingsManager {
16+
constructor(
17+
private readonly config: UserConfig,
18+
private readonly connectionManager: ConnectionManager,
19+
private readonly embeddings: Map<EmbeddingNamespace, VectorFieldIndexDefinition[]> = new Map()
20+
) {
21+
connectionManager.events.on("connection-close", () => {
22+
this.embeddings.clear();
23+
});
24+
}
25+
26+
cleanupEmbeddingsForNamespace({ database, collection }: { database: string; collection: string }): void {
27+
const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
28+
this.embeddings.delete(embeddingDefKey);
29+
}
30+
31+
async embeddingsForNamespace({
32+
database,
33+
collection,
34+
}: {
35+
database: string;
36+
collection: string;
37+
}): Promise<VectorFieldIndexDefinition[]> {
38+
const provider = await this.assertAtlasSearchIsAvailable();
39+
if (!provider) {
40+
return [];
41+
}
42+
43+
// We only need the embeddings for validation now, so don't query them if
44+
// validation is disabled.
45+
if (this.config.disableEmbeddingsValidation) {
46+
return [];
47+
}
48+
49+
const embeddingDefKey: EmbeddingNamespace = `${database}.${collection}`;
50+
const definition = this.embeddings.get(embeddingDefKey);
51+
52+
if (!definition) {
53+
const allSearchIndexes = await provider.getSearchIndexes(database, collection);
54+
const vectorSearchIndexes = allSearchIndexes.filter((index) => index.type === "vectorSearch");
55+
const vectorFields = vectorSearchIndexes
56+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
57+
.flatMap<Document>((index) => (index.latestDefinition?.fields as Document) ?? [])
58+
.filter((field) => this.isVectorFieldIndexDefinition(field));
59+
60+
this.embeddings.set(embeddingDefKey, vectorFields);
61+
return vectorFields;
62+
}
63+
64+
return definition;
65+
}
66+
67+
async findFieldsWithWrongEmbeddings(
68+
{
69+
database,
70+
collection,
71+
}: {
72+
database: string;
73+
collection: string;
74+
},
75+
document: Document
76+
): Promise<VectorFieldIndexDefinition[]> {
77+
const provider = await this.assertAtlasSearchIsAvailable();
78+
if (!provider) {
79+
return [];
80+
}
81+
82+
// While we can do our best effort to ensure that the embedding validation is correct
83+
// based on https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/
84+
// it's a complex process so we will also give the user the ability to disable this validation
85+
if (this.config.disableEmbeddingsValidation) {
86+
return [];
87+
}
88+
89+
const embeddings = await this.embeddingsForNamespace({ database, collection });
90+
return embeddings.filter((emb) => !this.documentPassesEmbeddingValidation(emb, document));
91+
}
92+
93+
private async assertAtlasSearchIsAvailable(): Promise<NodeDriverServiceProvider | null> {
94+
const connectionState = this.connectionManager.currentConnectionState;
95+
if (connectionState.tag === "connected") {
96+
if (await connectionState.isSearchSupported()) {
97+
return connectionState.serviceProvider;
98+
}
99+
}
100+
101+
return null;
102+
}
103+
104+
private isVectorFieldIndexDefinition(doc: Document): doc is VectorFieldIndexDefinition {
105+
return doc["type"] === "vector";
106+
}
107+
108+
private documentPassesEmbeddingValidation(definition: VectorFieldIndexDefinition, document: Document): boolean {
109+
const fieldPath = definition.path.split(".");
110+
let fieldRef: unknown = document;
111+
112+
for (const field of fieldPath) {
113+
if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
114+
fieldRef = (fieldRef as Record<string, unknown>)[field];
115+
} else {
116+
return true;
117+
}
118+
}
119+
120+
switch (definition.quantization) {
121+
// Because quantization is not defined by the user
122+
// we have to trust them in the format they use.
123+
case "none":
124+
return true;
125+
case "scalar":
126+
case "binary":
127+
if (fieldRef instanceof BSON.Binary) {
128+
try {
129+
const elements = fieldRef.toFloat32Array();
130+
return elements.length === definition.numDimensions;
131+
} catch {
132+
// bits are also supported
133+
try {
134+
const bits = fieldRef.toBits();
135+
return bits.length === definition.numDimensions;
136+
} catch {
137+
return false;
138+
}
139+
}
140+
} else {
141+
if (!Array.isArray(fieldRef)) {
142+
return false;
143+
}
144+
145+
if (fieldRef.length !== definition.numDimensions) {
146+
return false;
147+
}
148+
149+
if (!fieldRef.every((e) => this.isANumber(e))) {
150+
return false;
151+
}
152+
}
153+
154+
break;
155+
}
156+
157+
return true;
158+
}
159+
160+
private isANumber(value: unknown): boolean {
161+
if (typeof value === "number") {
162+
return true;
163+
}
164+
165+
if (
166+
value instanceof BSON.Int32 ||
167+
value instanceof BSON.Decimal128 ||
168+
value instanceof BSON.Double ||
169+
value instanceof BSON.Long
170+
) {
171+
return true;
172+
}
173+
174+
return false;
175+
}
176+
}

0 commit comments

Comments
 (0)