Skip to content

Commit accee3d

Browse files
authored
feat!: update schema path property to be a string array to support field names with dots (#191)
1 parent b2fc41b commit accee3d

File tree

6 files changed

+35
-23
lines changed

6 files changed

+35
-23
lines changed
140 KB
Binary file not shown.

docs/mongodb-schema_diagram.png

-36.9 KB
Loading

src/schema-analyzer.ts

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ type TypeCastTypes = keyof TypeCastMap;
4545
type BSONValue = TypeCastMap[TypeCastTypes];
4646

4747
export type BaseSchemaType = {
48-
path: string;
48+
path: string[];
4949
name: string;
5050
count: number;
5151
probability: number;
@@ -88,7 +88,7 @@ export type SchemaType = BaseSchemaType | ConstantSchemaType | PrimitiveSchemaTy
8888
export type SchemaField = {
8989
name: string;
9090
count: number;
91-
path: string;
91+
path: string[];
9292
type: string | string[];
9393
probability: number;
9494
has_duplicates: boolean;
@@ -104,7 +104,7 @@ type SchemaBSONType = Exclude<keyof TypeCastMap, 'Object'> | 'Document';
104104

105105
type SchemaAnalysisBaseType = {
106106
name: string;
107-
path: string;
107+
path: string[];
108108
bsonType: SchemaBSONType;
109109
count: number;
110110
values?: ReturnType<typeof Reservoir>
@@ -141,7 +141,7 @@ type SchemaAnalysisFieldTypes = {
141141

142142
type SchemaAnalysisField = {
143143
name: string;
144-
path: string;
144+
path: string[];
145145
count: number;
146146
types: SchemaAnalysisFieldTypes;
147147
}
@@ -156,7 +156,7 @@ type SchemaAnalysisRoot = {
156156
count: number;
157157
}
158158

159-
type SemanticTypeFunction = ((value: BSONValue, path?: string) => boolean);
159+
type SemanticTypeFunction = ((value: BSONValue, path?: string[]) => boolean);
160160
type SemanticTypeMap = {
161161
[typeName: string]: SemanticTypeFunction | boolean;
162162
};
@@ -377,7 +377,7 @@ export class SchemaAnalyzer {
377377
}
378378
}
379379

380-
getSemanticType(value: BSONValue, path: string) {
380+
getSemanticType(value: BSONValue, path: string[]) {
381381
// Pass value to semantic type detectors, return first match or undefined.
382382
const returnValue = Object.entries(this.semanticTypes)
383383
.filter(([, v]) => {
@@ -395,7 +395,7 @@ export class SchemaAnalyzer {
395395
* nested arrays and documents, and passes the value down to `addToValue`.
396396
* Note: This mutates the `schema` argument.
397397
*/
398-
const addToType = (path: string, value: BSONValue, schema: SchemaAnalysisFieldTypes) => {
398+
const addToType = (path: string[], value: BSONValue, schema: SchemaAnalysisFieldTypes) => {
399399
const bsonType = getBSONType(value);
400400
// If semantic type detection is enabled, the type is the semantic type
401401
// or the original bson type if no semantic type was detected. If disabled,
@@ -405,7 +405,7 @@ export class SchemaAnalyzer {
405405
schema[typeName] = {
406406
name: typeName,
407407
bsonType: bsonType,
408-
path: path,
408+
path,
409409
count: 0
410410
};
411411
}
@@ -421,7 +421,9 @@ export class SchemaAnalyzer {
421421
} else if (isDocumentType(type)) {
422422
// Recurse into nested documents by calling `addToField` for all sub-fields.
423423
type.fields = type.fields ?? {};
424-
Object.entries(value as Document).forEach(([k, v]) => addToField(`${path}.${k}`, v, type.fields));
424+
Object.entries(value as Document).forEach(
425+
([fieldName, v]) => addToField(fieldName, [...path, fieldName], v, type.fields)
426+
);
425427
} else if (this.options.storeValues && !isNullType(type)) {
426428
// When the `storeValues` option is enabled, store some example values.
427429
if (!type.values) {
@@ -439,23 +441,23 @@ export class SchemaAnalyzer {
439441
* Handles a field from a document. Passes the value to `addToType`.
440442
* Note: This mutates the `schema` argument.
441443
*/
442-
const addToField = (path: string, value: BSONValue, schema: SchemaAnalysisFieldsMap) => {
443-
if (!schema[path]) {
444-
schema[path] = {
445-
name: path.split('.')?.pop() || path,
444+
const addToField = (fieldName: string, path: string[], value: BSONValue, schema: SchemaAnalysisFieldsMap) => {
445+
if (!schema[fieldName]) {
446+
schema[fieldName] = {
447+
name: fieldName,
446448
path: path,
447449
count: 0,
448450
types: {}
449451
};
450452
}
451-
const field = schema[path];
453+
const field = schema[fieldName];
452454

453455
field.count++;
454456
addToType(path, value, field.types);
455457
};
456458

457459
for (const key of Object.keys(doc)) {
458-
addToField(key, doc[key], this.schemaAnalysisRoot.fields);
460+
addToField(key, [key], doc[key], this.schemaAnalysisRoot.fields);
459461
}
460462
this.schemaAnalysisRoot.count += 1;
461463
}

test/basic-embedded-documents.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ describe('basic embedded documents', function() {
4949
];
5050

5151
const nestedFieldPaths = [
52-
'push_token.android',
53-
'push_token.apple'
52+
['push_token', 'android'],
53+
['push_token', 'apple']
5454
];
5555

56-
assert.deepEqual(schema.fields.map(v => v.name).sort(), fieldNames.sort());
56+
assert.deepEqual(schema.fields.map(v => v.name), fieldNames);
5757

5858
const types = schema.fields.find(v => v.name === 'push_token')?.types;
5959
const pushTokens = (types?.find(v => v.name === 'Document') as DocumentSchemaType)?.fields;
60-
assert.deepEqual(pushTokens?.map((v: SchemaField) => v.path).sort(), nestedFieldPaths.sort());
60+
assert.deepEqual(pushTokens?.map((v: SchemaField) => v.path), nestedFieldPaths);
6161
});
6262
});

test/nested-document-path.test.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ describe('nested document path', function() {
1010
foo: {
1111
bar: {
1212
baz: 1
13+
},
14+
'bar.with.dot': {
15+
bah: 2
1316
}
1417
}
1518
}
@@ -19,13 +22,20 @@ describe('nested document path', function() {
1922
schema = await getSchema(docs);
2023
});
2124

22-
it('should assemble the path correctly with dot-notation', function() {
25+
it('should assemble the path correctly fields with dots in their names', function() {
2326
const foo = schema.fields.find(v => v.name === 'foo');
2427
const bar = (foo?.types.find(v => v.name === 'Document') as DocumentSchemaType)?.fields.find(v => v.name === 'bar');
2528
const baz = (bar?.types.find(v => v.name === 'Document') as DocumentSchemaType)?.fields.find(v => v.name === 'baz');
2629
assert.ok(foo);
2730
assert.ok(bar);
2831
assert.ok(baz);
29-
assert.equal(baz.path, 'foo.bar.baz');
32+
assert.deepEqual(baz.path, ['foo', 'bar', 'baz']);
33+
34+
const barWithDot = (foo.types.find(v => v.name === 'Document') as DocumentSchemaType)?.fields.find(v => v.name === 'bar.with.dot');
35+
assert.ok(barWithDot);
36+
const bah = (barWithDot.types.find(v => v.name === 'Document') as DocumentSchemaType)?.fields.find(v => v.name === 'bah');
37+
assert.ok(bah);
38+
assert.deepEqual(barWithDot.path, ['foo', 'bar.with.dot']);
39+
assert.deepEqual(bah.path, ['foo', 'bar.with.dot', 'bah']);
3040
});
3141
});

test/semantic-types.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ describe('options', function() {
9696
schema = await getSchema(docs, {
9797
semanticTypes: {
9898
Verification: function(value, key) {
99-
return !!key?.match(/verified/);
99+
return !!key?.pop?.()?.match(/verified/);
100100
}
101101
}
102102
});
@@ -115,7 +115,7 @@ describe('options', function() {
115115
semanticTypes: {
116116
email: true,
117117
Verification: function(value, key) {
118-
return !!key?.match(/verified/);
118+
return !!key?.pop?.()?.match(/verified/);
119119
}
120120
}
121121
});

0 commit comments

Comments
 (0)