Skip to content

Commit a3f9d31

Browse files
authored
feat: add getSchemaPaths helper (#192)
1 parent 944f574 commit a3f9d31

File tree

4 files changed

+212
-23
lines changed

4 files changed

+212
-23
lines changed

src/index.ts

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { AggregationCursor, Document, FindCursor } from 'mongodb';
2-
import { pipeline as callbackPipeline, Readable, PassThrough } from 'stream';
3-
import { promisify } from 'util';
2+
import { Readable, PassThrough } from 'stream';
3+
import { pipeline } from 'stream/promises';
44

55
import stream from './stream';
66
import { SchemaAnalyzer } from './schema-analyzer';
@@ -19,39 +19,62 @@ import * as schemaStats from './stats';
1919

2020
type MongoDBCursor = AggregationCursor | FindCursor;
2121

22+
function getStreamSource(
23+
source: Document[] | MongoDBCursor | Readable
24+
): Readable {
25+
let streamSource: Readable;
26+
if ('stream' in source) {
27+
// MongoDB Cursor.
28+
streamSource = source.stream();
29+
} else if ('pipe' in source) {
30+
// Document stream.
31+
streamSource = source;
32+
} else if (Array.isArray(source)) {
33+
// Array of documents.
34+
streamSource = Readable.from(source);
35+
} else {
36+
throw new Error(
37+
'Unknown input type for `docs`. Must be an array, ' +
38+
'stream or MongoDB Cursor.'
39+
);
40+
}
41+
42+
return streamSource;
43+
}
44+
2245
/**
2346
* Convenience shortcut for parsing schemas. Accepts an array, stream or
2447
* MongoDB cursor object to parse documents` from.
2548
*/
2649
async function parseSchema(
27-
docs: Document[] | MongoDBCursor | Readable,
50+
source: Document[] | MongoDBCursor | Readable,
2851
options?: SchemaParseOptions
2952
): Promise<Schema> {
3053
// Shift parameters if no options are specified.
3154
if (typeof options === 'undefined') {
3255
options = {};
3356
}
3457

35-
let src: Readable;
36-
if ('stream' in docs) {
37-
// MongoDB Cursor.
38-
src = docs.stream();
39-
} else if ('pipe' in docs) {
40-
// Document stream.
41-
src = docs;
42-
} else if (Array.isArray(docs)) {
43-
// Array of documents.
44-
src = Readable.from(docs);
45-
} else {
46-
throw new Error(
47-
'Unknown input type for `docs`. Must be an array, ' +
48-
'stream or MongoDB Cursor.'
49-
);
58+
const streamSource = getStreamSource(source);
59+
60+
const dest = new PassThrough({ objectMode: true });
61+
await pipeline(streamSource, stream(options), dest);
62+
for await (const result of dest) {
63+
return result;
5064
}
65+
throw new Error('unreachable'); // `dest` always emits one doc.
66+
}
67+
68+
// Convenience shortcut for getting schema paths.
69+
async function getSchemaPaths(
70+
source: Document[] | MongoDBCursor | Readable
71+
): Promise<string[][]> {
72+
const streamSource = getStreamSource(source);
5173

5274
const dest = new PassThrough({ objectMode: true });
53-
const pipeline = promisify(callbackPipeline);
54-
await pipeline(src, stream(options), dest);
75+
await pipeline(streamSource, stream({
76+
schemaPaths: true
77+
}), dest);
5578
for await (const result of dest) {
5679
return result;
5780
}
@@ -74,6 +97,7 @@ export type {
7497

7598
export {
7699
stream,
100+
getSchemaPaths,
77101
SchemaAnalyzer,
78102
schemaStats
79103
};

src/schema-analyzer.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,46 @@ function isDocumentType(type: SchemaAnalysisType): type is SchemaAnalysisDocumen
225225
return (type as SchemaAnalysisDocumentType).name === 'Document';
226226
}
227227

228+
/**
229+
* Recursively extracts all of the schema field paths as string arrays.
230+
*/
231+
function schemaToPaths(
232+
fields: SchemaAnalysisFieldsMap,
233+
parent: string[] = []
234+
): string[][] {
235+
const paths: string[][] = [];
236+
237+
for (const field of Object.values(fields)) {
238+
const path = [...parent, field.name];
239+
paths.push(path);
240+
241+
// Recurse on doc.
242+
const doc = Object.values(field.types).find((f) => f.bsonType === 'Document') as
243+
| SchemaAnalysisDocumentType
244+
| undefined;
245+
246+
if (doc) {
247+
paths.push(...schemaToPaths(doc.fields, path));
248+
}
249+
250+
// Recurse on array.
251+
const array = Object.values(field.types).find((f) => f.bsonType === 'Array') as
252+
| SchemaAnalysisArrayType
253+
| undefined;
254+
if (array) {
255+
const arrayDoc = Object.values(array.types).find((f) => f.bsonType === 'Document') as
256+
| SchemaAnalysisDocumentType
257+
| undefined;
258+
259+
if (arrayDoc) {
260+
paths.push(...schemaToPaths(arrayDoc.fields, path));
261+
}
262+
}
263+
}
264+
265+
return paths;
266+
}
267+
228268
function cropStringAt10kCharacters(value: string) {
229269
return value.charCodeAt(10000 - 1) === value.codePointAt(10000 - 1)
230270
? value.slice(0, 10000)
@@ -474,4 +514,8 @@ export class SchemaAnalyzer {
474514
this.finalized = true;
475515
return this.schemaResult;
476516
}
517+
518+
getSchemaPaths(): string[][] {
519+
return schemaToPaths(this.schemaAnalysisRoot.fields);
520+
}
477521
}

src/stream.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ import type { SchemaParseOptions } from './schema-analyzer';
99

1010
export class ParseStream extends Duplex {
1111
analyzer: SchemaAnalyzer;
12-
constructor(options?: SchemaParseOptions) {
12+
schemaPaths = false;
13+
constructor(options?: SchemaParseOptions & {
14+
schemaPaths?: boolean;
15+
}) {
1316
super({ objectMode: true });
17+
this.schemaPaths = !!options?.schemaPaths;
1418
this.analyzer = new SchemaAnalyzer(options);
1519
}
1620

@@ -23,12 +27,18 @@ export class ParseStream extends Duplex {
2327
_read() {}
2428

2529
_final(cb: () => void) {
26-
this.push(this.analyzer.getResult());
30+
if (this.schemaPaths) {
31+
this.push(this.analyzer.getSchemaPaths());
32+
} else {
33+
this.push(this.analyzer.getResult());
34+
}
2735
this.push(null);
2836
cb();
2937
}
3038
}
3139

32-
export default function makeParseStream(options?: SchemaParseOptions) {
40+
export default function makeParseStream(options?: SchemaParseOptions & {
41+
schemaPaths?: boolean;
42+
}) {
3343
return new ParseStream(options);
3444
}

test/get-schema-paths.test.ts

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import assert from 'assert';
2+
import type { Document } from 'bson';
3+
4+
import { getSchemaPaths } from '../src';
5+
6+
describe('getSchemaPaths', function() {
7+
let schemaPaths: string[][];
8+
9+
describe('with fields with dots in them', function() {
10+
const docs = [
11+
{
12+
pineapple: {
13+
orange: {
14+
apple: 1
15+
},
16+
'orange.with.dot': {
17+
bah: 2
18+
}
19+
}
20+
}
21+
];
22+
23+
before(async function() {
24+
schemaPaths = await getSchemaPaths(docs);
25+
});
26+
27+
it('returns an array of the fields', function() {
28+
assert.deepEqual(schemaPaths, [
29+
['pineapple'],
30+
['pineapple', 'orange'],
31+
['pineapple', 'orange', 'apple'],
32+
['pineapple', 'orange.with.dot'],
33+
['pineapple', 'orange.with.dot', 'bah']
34+
]);
35+
});
36+
});
37+
38+
describe('with multiple documents with different fields', function() {
39+
const docs = [
40+
{
41+
pineapple: {
42+
orange: {
43+
apple: 1
44+
}
45+
}
46+
},
47+
{
48+
pineapple: {
49+
orange: 'ok'
50+
}
51+
},
52+
{
53+
pineapple: ['test', '123'],
54+
clementine: false
55+
}
56+
];
57+
58+
before(async function() {
59+
schemaPaths = await getSchemaPaths(docs);
60+
});
61+
62+
it('returns all of the field paths', function() {
63+
assert.deepEqual(schemaPaths, [
64+
['pineapple'],
65+
['pineapple', 'orange'],
66+
['pineapple', 'orange', 'apple'],
67+
['clementine']
68+
]);
69+
});
70+
});
71+
72+
describe('with nested array documents', function() {
73+
const docs = [
74+
{
75+
orangutan: [{
76+
tuatara: 'yes',
77+
lizard: {
78+
snakes: false,
79+
birds: false
80+
}
81+
}]
82+
}
83+
];
84+
85+
before(async function() {
86+
schemaPaths = await getSchemaPaths(docs);
87+
});
88+
89+
it('returns all of the field paths', function() {
90+
assert.deepEqual(schemaPaths, [
91+
['orangutan'],
92+
['orangutan', 'tuatara'],
93+
['orangutan', 'lizard'],
94+
['orangutan', 'lizard', 'snakes'],
95+
['orangutan', 'lizard', 'birds']
96+
]);
97+
});
98+
});
99+
100+
describe('with no documents', function() {
101+
const docs: Document[] = [];
102+
103+
before(async function() {
104+
schemaPaths = await getSchemaPaths(docs);
105+
});
106+
107+
it('returns no paths', function() {
108+
assert.deepEqual(schemaPaths, []);
109+
});
110+
});
111+
});

0 commit comments

Comments
 (0)