Skip to content

Commit 25e0367

Browse files
chore: PR feedback about generous config defaults
This commit implements the PR feedback about being generous on the config defaults and applying recommended restrictions on the tool parameters for capping the memory usage.
1 parent 21f1d3e commit 25e0367

File tree

10 files changed

+544
-220
lines changed

10 files changed

+544
-220
lines changed

src/common/config.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import levenshtein from "ts-levenshtein";
99

1010
// From: https://github.com/mongodb-js/mongosh/blob/main/packages/cli-repl/src/arg-parser.ts
1111
const OPTIONS = {
12+
number: ["maxDocumentsPerQuery", "maxBytesPerQuery"],
1213
string: [
1314
"apiBaseUrl",
1415
"apiClientId",
@@ -98,6 +99,7 @@ const OPTIONS = {
9899

99100
interface Options {
100101
string: string[];
102+
number: string[];
101103
boolean: string[];
102104
array: string[];
103105
alias: Record<string, string>;
@@ -106,6 +108,7 @@ interface Options {
106108

107109
export const ALL_CONFIG_KEYS = new Set(
108110
(OPTIONS.string as readonly string[])
111+
.concat(OPTIONS.number)
109112
.concat(OPTIONS.array)
110113
.concat(OPTIONS.boolean)
111114
.concat(Object.keys(OPTIONS.alias))
@@ -204,8 +207,8 @@ export const defaultUserConfig: UserConfig = {
204207
idleTimeoutMs: 10 * 60 * 1000, // 10 minutes
205208
notificationTimeoutMs: 9 * 60 * 1000, // 9 minutes
206209
httpHeaders: {},
207-
maxDocumentsPerQuery: 10, // By default, we only fetch a maximum 10 documents per query / aggregation
208-
maxBytesPerQuery: 1 * 1024 * 1024, // By default, we only return ~1 mb of data per query / aggregation
210+
maxDocumentsPerQuery: 100, // By default, we only fetch a maximum 100 documents per query / aggregation
211+
maxBytesPerQuery: 16 * 1024 * 1024, // By default, we only return ~16 mb of data per query / aggregation
209212
atlasTemporaryDatabaseUserLifetimeMs: 4 * 60 * 60 * 1000, // 4 hours
210213
};
211214

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { calculateObjectSize } from "bson";
2+
import type { AggregationCursor, FindCursor } from "mongodb";
3+
4+
export function getResponseBytesLimit(
5+
toolResponseBytesLimit: number | undefined | null,
6+
configuredMaxBytesPerQuery: unknown
7+
): {
8+
cappedBy: "config.maxBytesPerQuery" | "tool.responseBytesLimit" | undefined;
9+
limit: number;
10+
} {
11+
const configuredLimit: number = parseInt(String(configuredMaxBytesPerQuery), 10);
12+
13+
// Setting configured maxBytesPerQuery to negative, zero or nullish is
14+
// equivalent to disabling the max limit applied on documents
15+
const configuredLimitIsNotApplicable = Number.isNaN(configuredLimit) || configuredLimit <= 0;
16+
17+
// It's possible to have tool parameter responseBytesLimit as null or
18+
// negative values in which case we consider that no limit is to be
19+
// applied from tool call perspective unless we have a maxBytesPerQuery
20+
// configured.
21+
const toolResponseLimitIsNotApplicable = typeof toolResponseBytesLimit !== "number" || toolResponseBytesLimit <= 0;
22+
23+
if (configuredLimitIsNotApplicable) {
24+
return {
25+
cappedBy: toolResponseLimitIsNotApplicable ? undefined : "tool.responseBytesLimit",
26+
limit: toolResponseLimitIsNotApplicable ? 0 : toolResponseBytesLimit,
27+
};
28+
}
29+
30+
if (toolResponseLimitIsNotApplicable) {
31+
return { cappedBy: "config.maxBytesPerQuery", limit: configuredLimit };
32+
}
33+
34+
return {
35+
cappedBy: configuredLimit < toolResponseBytesLimit ? "config.maxBytesPerQuery" : "tool.responseBytesLimit",
36+
limit: Math.min(toolResponseBytesLimit, configuredLimit),
37+
};
38+
}
39+
40+
/**
41+
* This function attempts to put a guard rail against accidental memory overflow
42+
* on the MCP server.
43+
*
44+
* The cursor is iterated until we can predict that fetching next doc won't
45+
* exceed the derived limit on number of bytes for the tool call. The derived
46+
* limit takes into account the limit provided from the Tool's interface and the
47+
* configured maxBytesPerQuery for the server.
48+
*/
49+
export async function collectCursorUntilMaxBytesLimit<T = unknown>({
50+
cursor,
51+
toolResponseBytesLimit,
52+
configuredMaxBytesPerQuery,
53+
abortSignal,
54+
}: {
55+
cursor: FindCursor<T> | AggregationCursor<T>;
56+
toolResponseBytesLimit: number | undefined | null;
57+
configuredMaxBytesPerQuery: unknown;
58+
abortSignal?: AbortSignal;
59+
}): Promise<{ cappedBy: "config.maxBytesPerQuery" | "tool.responseBytesLimit" | undefined; documents: T[] }> {
60+
const { limit: maxBytesPerQuery, cappedBy } = getResponseBytesLimit(
61+
toolResponseBytesLimit,
62+
configuredMaxBytesPerQuery
63+
);
64+
65+
// It's possible to have no limit on the cursor response by setting both the
66+
// config.maxBytesPerQuery and tool.responseBytesLimit to nullish or
67+
// negative values.
68+
if (maxBytesPerQuery <= 0) {
69+
return {
70+
cappedBy,
71+
documents: await cursor.toArray(),
72+
};
73+
}
74+
75+
let wasCapped: boolean = false;
76+
let totalBytes = 0;
77+
let biggestDocSizeSoFar = 0;
78+
const bufferedDocuments: T[] = [];
79+
while (true) {
80+
if (abortSignal?.aborted) {
81+
break;
82+
}
83+
84+
// This is an eager attempt to validate that fetching the next document
85+
// won't exceed the applicable limit.
86+
if (totalBytes + biggestDocSizeSoFar >= maxBytesPerQuery) {
87+
wasCapped = true;
88+
break;
89+
}
90+
91+
// If the cursor is empty then there is nothing for us to do anymore.
92+
const nextDocument = await cursor.tryNext();
93+
if (!nextDocument) {
94+
break;
95+
}
96+
97+
const nextDocumentSize = calculateObjectSize(nextDocument);
98+
if (totalBytes + nextDocumentSize >= maxBytesPerQuery) {
99+
wasCapped = true;
100+
break;
101+
}
102+
103+
totalBytes += nextDocumentSize;
104+
biggestDocSizeSoFar = Math.max(biggestDocSizeSoFar, nextDocumentSize);
105+
bufferedDocuments.push(nextDocument);
106+
}
107+
108+
return {
109+
cappedBy: wasCapped ? cappedBy : undefined,
110+
documents: bufferedDocuments,
111+
};
112+
}

src/helpers/constants.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,15 @@ export const QUERY_COUNT_MAX_TIME_MS_CAP: number = 10_000;
1212
* aggregation.
1313
*/
1414
export const AGG_COUNT_MAX_TIME_MS_CAP: number = 60_000;
15+
16+
export const ONE_MB: number = 1 * 1024 * 1024;
17+
18+
/**
19+
* A map of applied limit on cursors to a text that is supposed to be sent as
20+
* response to LLM
21+
*/
22+
export const CURSOR_LIMITS_TO_LLM_TEXT = {
23+
"config.maxDocumentsPerQuery": "server's configured - maxDocumentsPerQuery",
24+
"config.maxBytesPerQuery": "server's configured - maxBytesPerQuery",
25+
"tool.responseBytesLimit": "tool's parameter - responseBytesLimit",
26+
} as const;

src/helpers/iterateCursor.ts

Lines changed: 0 additions & 54 deletions
This file was deleted.

src/tools/mongodb/read/aggregate.ts

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,16 @@ import { formatUntrustedData } from "../../tool.js";
88
import { checkIndexUsage } from "../../../helpers/indexCheck.js";
99
import { type Document, EJSON } from "bson";
1010
import { ErrorCodes, MongoDBError } from "../../../common/errors.js";
11-
import { iterateCursorUntilMaxBytes } from "../../../helpers/iterateCursor.js";
11+
import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js";
1212
import { operationWithFallback } from "../../../helpers/operationWithFallback.js";
13-
import { AGG_COUNT_MAX_TIME_MS_CAP } from "../../../helpers/constants.js";
13+
import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js";
1414

1515
export const AggregateArgs = {
1616
pipeline: z.array(z.object({}).passthrough()).describe("An array of aggregation stages to execute"),
17+
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
18+
The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \
19+
Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\
20+
`),
1721
};
1822

1923
export class AggregateTool extends MongoDBToolBase {
@@ -26,7 +30,7 @@ export class AggregateTool extends MongoDBToolBase {
2630
public operationType: OperationType = "read";
2731

2832
protected async execute(
29-
{ database, collection, pipeline }: ToolArgs<typeof this.argsShape>,
33+
{ database, collection, pipeline, responseBytesLimit }: ToolArgs<typeof this.argsShape>,
3034
{ signal }: ToolExecutionContext
3135
): Promise<CallToolResult> {
3236
let aggregationCursor: AggregationCursor | undefined;
@@ -50,29 +54,36 @@ export class AggregateTool extends MongoDBToolBase {
5054
}
5155
aggregationCursor = provider.aggregate(database, collection, cappedResultsPipeline);
5256

53-
const [totalDocuments, documents] = await Promise.all([
57+
const [totalDocuments, cursorResults] = await Promise.all([
5458
this.countAggregationResultDocuments({ provider, database, collection, pipeline }),
55-
iterateCursorUntilMaxBytes({
59+
collectCursorUntilMaxBytesLimit({
5660
cursor: aggregationCursor,
57-
maxBytesPerQuery: this.config.maxBytesPerQuery,
61+
configuredMaxBytesPerQuery: this.config.maxBytesPerQuery,
62+
toolResponseBytesLimit: responseBytesLimit,
5863
abortSignal: signal,
5964
}),
6065
]);
6166

62-
let messageDescription = `\
63-
The aggregation resulted in ${totalDocuments === undefined ? "indeterminable number of" : totalDocuments} documents.\
64-
`;
65-
if (documents.length) {
66-
messageDescription += ` \
67-
Returning ${documents.length} documents while respecting the applied limits. \
68-
Note to LLM: If entire aggregation result is needed then use "export" tool to export the aggregation results.\
69-
`;
70-
}
67+
// If the total number of documents that the aggregation would've
68+
// resulted in would be greater than the configured
69+
// maxDocumentsPerQuery then we know for sure that the results were
70+
// capped.
71+
const aggregationResultsCappedByMaxDocumentsLimit =
72+
this.config.maxDocumentsPerQuery > 0 &&
73+
!!totalDocuments &&
74+
totalDocuments > this.config.maxDocumentsPerQuery;
7175

7276
return {
7377
content: formatUntrustedData(
74-
messageDescription,
75-
documents.length > 0 ? EJSON.stringify(documents) : undefined
78+
this.generateMessage({
79+
aggResultsCount: totalDocuments,
80+
documents: cursorResults.documents,
81+
appliedLimits: [
82+
aggregationResultsCappedByMaxDocumentsLimit ? "config.maxDocumentsPerQuery" : undefined,
83+
cursorResults.cappedBy,
84+
].filter((limit): limit is keyof typeof CURSOR_LIMITS_TO_LLM_TEXT => !!limit),
85+
}),
86+
cursorResults.documents.length > 0 ? EJSON.stringify(cursorResults.documents) : undefined
7687
),
7788
};
7889
} finally {
@@ -132,4 +143,26 @@ Note to LLM: If entire aggregation result is needed then use "export" tool to ex
132143
return totalDocuments;
133144
}, undefined);
134145
}
146+
147+
private generateMessage({
148+
aggResultsCount,
149+
documents,
150+
appliedLimits,
151+
}: {
152+
aggResultsCount: number | undefined;
153+
documents: unknown[];
154+
appliedLimits: (keyof typeof CURSOR_LIMITS_TO_LLM_TEXT)[];
155+
}): string {
156+
const appliedLimitText = appliedLimits.length
157+
? `\
158+
while respecting the applied limits of ${appliedLimits.map((limit) => CURSOR_LIMITS_TO_LLM_TEXT[limit]).join(", ")}. \
159+
Note to LLM: If the entire query result is required then use "export" tool to export the query results.\
160+
`
161+
: "";
162+
163+
return `\
164+
The aggregation resulted in ${aggResultsCount === undefined ? "indeterminable number of" : aggResultsCount} documents. \
165+
Returning ${documents.length} documents${appliedLimitText ? ` ${appliedLimitText}` : "."}\
166+
`;
167+
}
135168
}

0 commit comments

Comments
 (0)