Skip to content

Commit 3ada0ef

Browse files
authored
feat: add getSimplifiedSchema helper which returns a schema without any metadata COMPASS-6979 (#198)
1 parent d6c9a49 commit 3ada0ef

File tree

7 files changed

+425
-93
lines changed

7 files changed

+425
-93
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ below accordingly).
5555
3. Create a new file `parse-schema.js` and paste in the following code:
5656

5757
```javascript
58-
const parseSchema = require('mongodb-schema');
58+
const { parseSchema } = require('mongodb-schema');
5959
const { MongoClient } = require('mongodb');
6060

6161
const dbName = 'test';

src/index.ts

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { Readable, PassThrough } from 'stream';
33
import { pipeline } from 'stream/promises';
44

55
import stream from './stream';
6+
import type { ParseStreamOptions } from './stream';
67
import { SchemaAnalyzer } from './schema-analyzer';
78
import type {
89
ArraySchemaType,
@@ -13,7 +14,13 @@ import type {
1314
SchemaType,
1415
Schema,
1516
SchemaField,
16-
SchemaParseOptions
17+
SchemaParseOptions,
18+
SimplifiedSchemaBaseType,
19+
SimplifiedSchemaArrayType,
20+
SimplifiedSchemaDocumentType,
21+
SimplifiedSchemaType,
22+
SimplifiedSchemaField,
23+
SimplifiedSchema
1724
} from './schema-analyzer';
1825
import * as schemaStats from './stats';
1926

@@ -42,43 +49,47 @@ function getStreamSource(
4249
return streamSource;
4350
}
4451

45-
/**
46-
* Convenience shortcut for parsing schemas. Accepts an array, stream or
47-
* MongoDB cursor object to parse documents` from.
48-
*/
49-
async function parseSchema(
52+
async function schemaStream(
5053
source: Document[] | MongoDBCursor | Readable,
51-
options?: SchemaParseOptions
52-
): Promise<Schema> {
53-
// Shift parameters if no options are specified.
54-
if (typeof options === 'undefined') {
55-
options = {};
56-
}
57-
54+
options?: ParseStreamOptions
55+
) {
5856
const streamSource = getStreamSource(source);
5957

6058
const dest = new PassThrough({ objectMode: true });
6159
await pipeline(streamSource, stream(options), dest);
6260
for await (const result of dest) {
6361
return result;
6462
}
65-
throw new Error('unreachable'); // `dest` always emits one doc.
63+
throw new Error('unreachable'); // `dest` always emits exactly one doc.
64+
}
65+
66+
/**
67+
* Convenience shortcut for parsing schemas. Accepts an array, stream or
68+
* MongoDB cursor object to parse documents` from.
69+
*/
70+
async function parseSchema(
71+
source: Document[] | MongoDBCursor | Readable,
72+
options?: SchemaParseOptions
73+
): Promise<Schema> {
74+
return await schemaStream(source, options);
6675
}
6776

6877
// Convenience shortcut for getting schema paths.
6978
async function getSchemaPaths(
7079
source: Document[] | MongoDBCursor | Readable
7180
): Promise<string[][]> {
72-
const streamSource = getStreamSource(source);
73-
74-
const dest = new PassThrough({ objectMode: true });
75-
await pipeline(streamSource, stream({
81+
return await schemaStream(source, {
7682
schemaPaths: true
77-
}), dest);
78-
for await (const result of dest) {
79-
return result;
80-
}
81-
throw new Error('unreachable'); // `dest` always emits one doc.
83+
});
84+
}
85+
86+
// Convenience shortcut for getting the simplified schema.
87+
async function getSimplifiedSchema(
88+
source: Document[] | MongoDBCursor | Readable
89+
): Promise<SimplifiedSchema> {
90+
return await schemaStream(source, {
91+
simplifiedSchema: true
92+
});
8293
}
8394

8495
export default parseSchema;
@@ -92,12 +103,20 @@ export type {
92103
SchemaType,
93104
Schema,
94105
SchemaField,
95-
SchemaParseOptions
106+
SchemaParseOptions,
107+
SimplifiedSchemaBaseType,
108+
SimplifiedSchemaArrayType,
109+
SimplifiedSchemaDocumentType,
110+
SimplifiedSchemaType,
111+
SimplifiedSchemaField,
112+
SimplifiedSchema
96113
};
97114

98115
export {
99116
stream,
117+
parseSchema,
100118
getSchemaPaths,
119+
getSimplifiedSchema,
101120
SchemaAnalyzer,
102121
schemaStats
103122
};

src/schema-analyzer.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,55 @@ function schemaToPaths(
270270
return paths;
271271
}
272272

273+
export type SimplifiedSchemaBaseType = {
274+
bsonType: SchemaBSONType;
275+
}
276+
export type SimplifiedSchemaArrayType = SimplifiedSchemaBaseType & {
277+
bsonType: 'Array';
278+
// eslint-disable-next-line no-use-before-define
279+
types: SimplifiedSchemaType[];
280+
}
281+
export type SimplifiedSchemaDocumentType = SimplifiedSchemaBaseType & {
282+
bsonType: 'Document';
283+
// eslint-disable-next-line no-use-before-define
284+
fields: SimplifiedSchema;
285+
}
286+
export type SimplifiedSchemaType = SimplifiedSchemaBaseType | SimplifiedSchemaArrayType | SimplifiedSchemaDocumentType;
287+
export type SimplifiedSchemaField = {
288+
types: SimplifiedSchemaType[];
289+
};
290+
export type SimplifiedSchema = {
291+
[fieldName: string]: SimplifiedSchemaField
292+
}
293+
294+
function simplifiedSchema(fields: SchemaAnalysisFieldsMap): SimplifiedSchema {
295+
function finalizeSchemaFieldTypes(types: SchemaAnalysisFieldTypes): SimplifiedSchemaType[] {
296+
return Object.values(types).map((type) => {
297+
return {
298+
bsonType: type.bsonType, // Note: `Object` is replaced with `Document`.
299+
...(isArrayType(type) ? {
300+
types: finalizeSchemaFieldTypes(type.types)
301+
} : {}),
302+
...(isDocumentType(type) ? { fields: finalizeDocumentFieldSchema(type.fields) } : {})
303+
};
304+
});
305+
}
306+
307+
function finalizeDocumentFieldSchema(fieldMap: SchemaAnalysisFieldsMap): SimplifiedSchema {
308+
const fieldSchema: SimplifiedSchema = {};
309+
Object.values(fieldMap).forEach((field: SchemaAnalysisField) => {
310+
const fieldTypes = finalizeSchemaFieldTypes(field.types);
311+
312+
fieldSchema[field.name] = {
313+
types: fieldTypes
314+
};
315+
});
316+
return fieldSchema;
317+
}
318+
319+
return finalizeDocumentFieldSchema(fields);
320+
}
321+
273322
function cropStringAt10kCharacters(value: string) {
274323
return value.charCodeAt(10000 - 1) === value.codePointAt(10000 - 1)
275324
? value.slice(0, 10000)
@@ -523,4 +572,11 @@ export class SchemaAnalyzer {
523572
getSchemaPaths(): string[][] {
524573
return schemaToPaths(this.schemaAnalysisRoot.fields);
525574
}
575+
576+
/**
577+
* Returns a simplified schema result, this has no type metadata.
578+
*/
579+
getSimplifiedSchema(): SimplifiedSchema {
580+
return simplifiedSchema(this.schemaAnalysisRoot.fields);
581+
}
526582
}

src/stream.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@ import type {
77
import { SchemaAnalyzer } from './schema-analyzer';
88
import type { SchemaParseOptions } from './schema-analyzer';
99

10+
export type ParseStreamOptions = SchemaParseOptions & {
11+
simplifiedSchema?: boolean,
12+
schemaPaths?: boolean;
13+
};
14+
1015
export class ParseStream extends Duplex {
1116
analyzer: SchemaAnalyzer;
17+
options: ParseStreamOptions;
1218
schemaPaths = false;
13-
constructor(options?: SchemaParseOptions & {
14-
schemaPaths?: boolean;
15-
}) {
19+
20+
constructor(options?: ParseStreamOptions) {
1621
super({ objectMode: true });
17-
this.schemaPaths = !!options?.schemaPaths;
22+
this.options = options || {};
1823
this.analyzer = new SchemaAnalyzer(options);
1924
}
2025

@@ -27,8 +32,10 @@ export class ParseStream extends Duplex {
2732
_read() {}
2833

2934
_final(cb: () => void) {
30-
if (this.schemaPaths) {
35+
if (this.options.schemaPaths) {
3136
this.push(this.analyzer.getSchemaPaths());
37+
} else if (this.options.simplifiedSchema) {
38+
this.push(this.analyzer.getSimplifiedSchema());
3239
} else {
3340
this.push(this.analyzer.getResult());
3441
}
@@ -37,8 +44,6 @@ export class ParseStream extends Duplex {
3744
}
3845
}
3946

40-
export default function makeParseStream(options?: SchemaParseOptions & {
41-
schemaPaths?: boolean;
42-
}) {
47+
export default function makeParseStream(options?: ParseStreamOptions) {
4348
return new ParseStream(options);
4449
}

test/all-bson-types-fixture.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import {
2+
BSONRegExp,
3+
Binary,
4+
Code,
5+
DBRef,
6+
Decimal128,
7+
Double,
8+
Int32,
9+
Long,
10+
MaxKey,
11+
MinKey,
12+
ObjectId,
13+
Timestamp,
14+
UUID,
15+
BSONSymbol
16+
} from 'bson';
17+
18+
export const allBSONTypesDoc = {
19+
_id: new ObjectId('642d766b7300158b1f22e972'),
20+
double: new Double(1.2), // Double, 1, double
21+
string: 'Hello, world!', // String, 2, string
22+
object: { key: 'value' }, // Object, 3, object
23+
array: [1, 2, 3], // Array, 4, array
24+
binData: new Binary(Buffer.from([1, 2, 3])), // Binary data, 5, binData
25+
// Undefined, 6, undefined (deprecated)
26+
objectId: new ObjectId('642d766c7300158b1f22e975'), // ObjectId, 7, objectId
27+
boolean: true, // Boolean, 8, boolean
28+
date: new Date('2023-04-05T13:25:08.445Z'), // Date, 9, date
29+
null: null, // Null, 10, null
30+
regex: new BSONRegExp('pattern', 'i'), // Regular Expression, 11, regex
31+
// DBPointer, 12, dbPointer (deprecated)
32+
javascript: new Code('function() {}'), // JavaScript, 13, javascript
33+
symbol: new BSONSymbol('symbol'), // Symbol, 14, symbol (deprecated)
34+
javascriptWithScope: new Code('function() {}', { foo: 1, bar: 'a' }), // JavaScript code with scope 15 "javascriptWithScope" Deprecated in MongoDB 4.4.
35+
int: new Int32(12345), // 32-bit integer, 16, "int"
36+
timestamp: new Timestamp(new Long('7218556297505931265')), // Timestamp, 17, timestamp
37+
long: new Long('123456789123456789'), // 64-bit integer, 18, long
38+
decimal: new Decimal128(
39+
Buffer.from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
40+
), // Decimal128, 19, decimal
41+
minKey: new MinKey(), // Min key, -1, minKey
42+
maxKey: new MaxKey(), // Max key, 127, maxKey
43+
44+
binaries: {
45+
generic: new Binary(Buffer.from([1, 2, 3]), 0), // 0
46+
functionData: new Binary('//8=', 1), // 1
47+
binaryOld: new Binary('//8=', 2), // 2
48+
uuidOld: new Binary('c//SZESzTGmQ6OfR38A11A==', 3), // 3
49+
uuid: new UUID('AAAAAAAA-AAAA-4AAA-AAAA-AAAAAAAAAAAA'), // 4
50+
md5: new Binary('c//SZESzTGmQ6OfR38A11A==', 5), // 5
51+
encrypted: new Binary('c//SZESzTGmQ6OfR38A11A==', 6), // 6
52+
compressedTimeSeries: new Binary('c//SZESzTGmQ6OfR38A11A==', 7), // 7
53+
custom: new Binary('//8=', 128) // 128
54+
},
55+
56+
dbRef: new DBRef('namespace', new ObjectId('642d76b4b7ebfab15d3c4a78')) // not actually a separate type, just a convention
57+
};

test/all-bson-types.test.ts

Lines changed: 2 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,13 @@
1-
import {
2-
BSONRegExp,
3-
Binary,
4-
Code,
5-
DBRef,
6-
Decimal128,
7-
Double,
8-
Int32,
9-
Long,
10-
MaxKey,
11-
MinKey,
12-
ObjectId,
13-
Timestamp,
14-
UUID,
15-
BSONSymbol
16-
} from 'bson';
171
import assert from 'assert';
182

193
import type { Schema } from '../src/schema-analyzer';
204
import getSchema from '../src';
21-
22-
const allBsonTypes = [
23-
{
24-
_id: new ObjectId('642d766b7300158b1f22e972'),
25-
double: new Double(1.2), // Double, 1, double
26-
string: 'Hello, world!', // String, 2, string
27-
object: { key: 'value' }, // Object, 3, object
28-
array: [1, 2, 3], // Array, 4, array
29-
binData: new Binary(Buffer.from([1, 2, 3])), // Binary data, 5, binData
30-
// Undefined, 6, undefined (deprecated)
31-
objectId: new ObjectId('642d766c7300158b1f22e975'), // ObjectId, 7, objectId
32-
boolean: true, // Boolean, 8, boolean
33-
date: new Date('2023-04-05T13:25:08.445Z'), // Date, 9, date
34-
null: null, // Null, 10, null
35-
regex: new BSONRegExp('pattern', 'i'), // Regular Expression, 11, regex
36-
// DBPointer, 12, dbPointer (deprecated)
37-
javascript: new Code('function() {}'), // JavaScript, 13, javascript
38-
symbol: new BSONSymbol('symbol'), // Symbol, 14, symbol (deprecated)
39-
javascriptWithScope: new Code('function() {}', { foo: 1, bar: 'a' }), // JavaScript code with scope 15 "javascriptWithScope" Deprecated in MongoDB 4.4.
40-
int: new Int32(12345), // 32-bit integer, 16, "int"
41-
timestamp: new Timestamp(new Long('7218556297505931265')), // Timestamp, 17, timestamp
42-
long: new Long('123456789123456789'), // 64-bit integer, 18, long
43-
decimal: new Decimal128(
44-
Buffer.from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
45-
), // Decimal128, 19, decimal
46-
minKey: new MinKey(), // Min key, -1, minKey
47-
maxKey: new MaxKey(), // Max key, 127, maxKey
48-
49-
binaries: {
50-
generic: new Binary(Buffer.from([1, 2, 3]), 0), // 0
51-
functionData: new Binary('//8=', 1), // 1
52-
binaryOld: new Binary('//8=', 2), // 2
53-
uuidOld: new Binary('c//SZESzTGmQ6OfR38A11A==', 3), // 3
54-
uuid: new UUID('AAAAAAAA-AAAA-4AAA-AAAA-AAAAAAAAAAAA'), // 4
55-
md5: new Binary('c//SZESzTGmQ6OfR38A11A==', 5), // 5
56-
encrypted: new Binary('c//SZESzTGmQ6OfR38A11A==', 6), // 6
57-
compressedTimeSeries: new Binary('c//SZESzTGmQ6OfR38A11A==', 7), // 7
58-
custom: new Binary('//8=', 128) // 128
59-
},
60-
61-
dbRef: new DBRef('namespace', new ObjectId('642d76b4b7ebfab15d3c4a78')) // not actually a separate type, just a convention
62-
}
63-
];
5+
import { allBSONTypesDoc } from './all-bson-types-fixture';
646

657
describe('using a document with all bson types', function() {
668
let schema: Schema;
679
before(async function() {
68-
schema = await getSchema(allBsonTypes);
10+
schema = await getSchema([allBSONTypesDoc]);
6911
});
7012

7113
it('contains all of the types', function() {

0 commit comments

Comments
 (0)