Skip to content

Commit 103594e

Browse files
authored
Merge pull request #20 from jg-rp/map-regex
Fix regex mapping
2 parents 0bfab86 + efaa012 commit 103594e

File tree

10 files changed

+207
-99
lines changed

10 files changed

+207
-99
lines changed

CHANGELOG.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
11
# JSON P3 Change Log
22

3+
## Version 1.3.2
4+
5+
**Fixes**
6+
7+
- Fixed more I-Regexp to RegExp pattern mapping. See [jsonpath-compliance-test-suite#77](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite/pull/77).
8+
9+
**Compliance**
10+
11+
- We now check that regular expression patterns passed to `match` and `search` are valid according to RFC 9485. The standard behavior is to silently return `false` from these filter function if the pattern is invalid. The `throwErrors` option can be passed to `Match` and/or `Search` to throw an error instead, and the `iRegexpCheck` option can be set to `false` to disable I-Regexp checks.
12+
313
## Version 1.3.1
414

515
**Fixes**
616

7-
- Fixed RegExp to I-Regex pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`.
17+
- Fixed I-Regexp to RegExp pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`.
818

919
## Version 1.3.0
1020

package-lock.json

Lines changed: 9 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "json-p3",
3-
"version": "1.3.1",
3+
"version": "1.3.2",
44
"author": "James Prior",
55
"license": "MIT",
66
"description": "JSONPath, JSON Pointer and JSON Patch",
@@ -67,6 +67,7 @@
6767
"eslint-plugin-promise": "^6.1.1",
6868
"eslint-plugin-sonarjs": "^0.23.0",
6969
"eslint-plugin-tsdoc": "^0.2.17",
70+
"iregexp-check": "^0.1.1",
7071
"jest": "^29.7.0",
7172
"prettier": "^3.1.1",
7273
"rollup": "^4.9.2",

src/path/errors.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,14 @@ export class JSONPathRecursionLimitError extends JSONPathError {
126126
this.message = withErrorContext(message, token);
127127
}
128128
}
129+
130+
/**
131+
* Error thrown due to invalid I-Regexp syntax.
132+
*/
133+
export class IRegexpError extends Error {
134+
constructor(readonly message: string) {
135+
super(message);
136+
Object.setPrototypeOf(this, new.target.prototype);
137+
this.name = "IRegexpError";
138+
}
139+
}

src/path/functions/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ export { Search } from "./search";
55
export { Value } from "./value";
66
export { FunctionExpressionType } from "./function";
77
export type { FilterFunction } from "./function";
8+
export type { MatchFilterFunctionOptions } from "./match";
9+
export type { SearchFilterFunctionOptions } from "./search";

src/path/functions/match.ts

Lines changed: 43 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import { isString } from "../../types";
2+
import { IRegexpError } from "../errors";
13
import { LRUCache } from "../lru_cache";
24
import { FilterFunction, FunctionExpressionType } from "./function";
5+
import { mapRegexp } from "./pattern";
6+
import { check } from "iregexp-check";
37

48
export type MatchFilterFunctionOptions = {
59
/**
@@ -8,11 +12,21 @@ export type MatchFilterFunctionOptions = {
812
cacheSize?: number;
913

1014
/**
11-
* If _true_, throw errors from regex construction and matching.
12-
* The standard and default behavior is to ignore these errors
13-
* and return _false_.
15+
* If _true_, throw errors from regex checking, construction and matching.
16+
* The standard and default behavior is to ignore these errors and return
17+
* _false_.
1418
*/
1519
throwErrors?: boolean;
20+
21+
/**
22+
* If _true_, check that regexp patterns are valid according to I-Regexp.
23+
* The standard and default behavior is to silently return _false_ if a
24+
* pattern is invalid.
25+
*
26+
* If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError`
27+
* will be thrown.
28+
*/
29+
iRegexpCheck?: boolean;
1630
};
1731

1832
export class Match implements FilterFunction {
@@ -25,14 +39,17 @@ export class Match implements FilterFunction {
2539

2640
readonly cacheSize: number;
2741
readonly throwErrors: boolean;
42+
readonly iRegexpCheck: boolean;
2843
#cache: LRUCache<string, RegExp>;
2944

3045
constructor(readonly options: MatchFilterFunctionOptions = {}) {
3146
this.cacheSize = options.cacheSize ?? 10;
3247
this.throwErrors = options.throwErrors ?? false;
48+
this.iRegexpCheck = options.iRegexpCheck ?? true;
3349
this.#cache = new LRUCache(this.cacheSize);
3450
}
3551

52+
// eslint-disable-next-line sonarjs/cognitive-complexity
3653
public call(s: string, pattern: string): boolean {
3754
if (this.cacheSize > 0) {
3855
const re = this.#cache.get(pattern);
@@ -46,6 +63,24 @@ export class Match implements FilterFunction {
4663
}
4764
}
4865

66+
if (!isString(pattern)) {
67+
if (this.throwErrors) {
68+
throw new IRegexpError(
69+
`match() expected a string pattern, found ${pattern}`,
70+
);
71+
}
72+
return false;
73+
}
74+
75+
if (this.iRegexpCheck && !check(pattern)) {
76+
if (this.throwErrors) {
77+
throw new IRegexpError(
78+
`pattern ${pattern} is not a valid I-Regexp pattern`,
79+
);
80+
}
81+
return false;
82+
}
83+
4984
try {
5085
const re = new RegExp(this.fullMatch(pattern), "u");
5186
if (this.cacheSize > 0) this.#cache.set(pattern, re);
@@ -58,56 +93,11 @@ export class Match implements FilterFunction {
5893

5994
protected fullMatch(pattern: string): string {
6095
const parts: string[] = [];
61-
let nonCaptureGroup = false;
62-
63-
if (!pattern.startsWith("^") && !pattern.startsWith("^(")) {
64-
nonCaptureGroup = true;
65-
parts.push("^(?:");
66-
}
67-
parts.push(this.mapRegexp(pattern));
68-
69-
if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) {
70-
parts.push(")$");
71-
}
72-
73-
return parts.join("");
74-
}
75-
76-
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
77-
protected mapRegexp(pattern: string): string {
78-
let escaped = false;
79-
let charClass = false;
80-
const parts: string[] = [];
81-
for (const ch of pattern) {
82-
switch (ch) {
83-
case ".":
84-
if (!escaped && !charClass) {
85-
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
86-
} else {
87-
parts.push(ch);
88-
escaped = false;
89-
}
90-
break;
91-
case "\\":
92-
escaped = true;
93-
parts.push(ch);
94-
break;
95-
case "[":
96-
charClass = true;
97-
escaped = false;
98-
parts.push(ch);
99-
break;
100-
case "]":
101-
charClass = false;
102-
escaped = false;
103-
parts.push(ch);
104-
break;
105-
default:
106-
escaped = false;
107-
parts.push(ch);
108-
break;
109-
}
110-
}
96+
const explicitCaret = pattern.startsWith("^");
97+
const explicitDollar = pattern.endsWith("$");
98+
if (!explicitCaret && !explicitDollar) parts.push("^(?:");
99+
parts.push(mapRegexp(pattern));
100+
if (!explicitCaret && !explicitDollar) parts.push(")$");
111101
return parts.join("");
112102
}
113103
}

src/path/functions/pattern.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
2+
export function mapRegexp(pattern: string): string {
3+
let escaped = false;
4+
let charClass = false;
5+
const parts: string[] = [];
6+
for (const ch of pattern) {
7+
if (escaped) {
8+
parts.push(ch);
9+
escaped = false;
10+
continue;
11+
}
12+
13+
switch (ch) {
14+
case ".":
15+
if (!charClass) {
16+
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
17+
} else {
18+
parts.push(ch);
19+
}
20+
break;
21+
case "\\":
22+
escaped = true;
23+
parts.push(ch);
24+
break;
25+
case "[":
26+
charClass = true;
27+
parts.push(ch);
28+
break;
29+
case "]":
30+
charClass = false;
31+
parts.push(ch);
32+
break;
33+
default:
34+
parts.push(ch);
35+
break;
36+
}
37+
}
38+
return parts.join("");
39+
}

src/path/functions/search.ts

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import { check } from "iregexp-check";
12
import { LRUCache } from "../lru_cache";
23
import { FilterFunction, FunctionExpressionType } from "./function";
4+
import { mapRegexp } from "./pattern";
5+
import { IRegexpError } from "../errors";
6+
import { isString } from "../../types";
37

48
export type SearchFilterFunctionOptions = {
59
/**
@@ -14,6 +18,16 @@ export type SearchFilterFunctionOptions = {
1418
* and return _false_.
1519
*/
1620
throwErrors?: boolean;
21+
22+
/**
23+
* If _true_, check that regexp patterns are valid according to I-Regexp.
24+
* The standard and default behavior is to silently return _false_ if a
25+
* pattern is invalid.
26+
*
27+
* If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError`
28+
* will be thrown.
29+
*/
30+
iRegexpCheck?: boolean;
1731
};
1832

1933
export class Search implements FilterFunction {
@@ -26,14 +40,17 @@ export class Search implements FilterFunction {
2640

2741
readonly cacheSize: number;
2842
readonly throwErrors: boolean;
43+
readonly iRegexpCheck: boolean;
2944
#cache: LRUCache<string, RegExp>;
3045

3146
constructor(readonly options: SearchFilterFunctionOptions = {}) {
3247
this.cacheSize = options.cacheSize ?? 10;
3348
this.throwErrors = options.throwErrors ?? false;
49+
this.iRegexpCheck = options.iRegexpCheck ?? true;
3450
this.#cache = new LRUCache(this.cacheSize);
3551
}
3652

53+
// eslint-disable-next-line sonarjs/cognitive-complexity
3754
public call(s: string, pattern: string): boolean {
3855
if (this.cacheSize > 0) {
3956
const re = this.#cache.get(pattern);
@@ -47,51 +64,31 @@ export class Search implements FilterFunction {
4764
}
4865
}
4966

67+
if (!isString(pattern)) {
68+
if (this.throwErrors) {
69+
throw new IRegexpError(
70+
`match() expected a string pattern, found ${pattern}`,
71+
);
72+
}
73+
return false;
74+
}
75+
76+
if (this.iRegexpCheck && !check(pattern)) {
77+
if (this.throwErrors) {
78+
throw new IRegexpError(
79+
`pattern ${pattern} is not a valid I-Regexp pattern`,
80+
);
81+
}
82+
return false;
83+
}
84+
5085
try {
51-
const re = new RegExp(this.mapRegexp(pattern), "u");
86+
const re = new RegExp(mapRegexp(pattern), "u");
5287
if (this.cacheSize > 0) this.#cache.set(pattern, re);
5388
return !!s.match(re);
5489
} catch (error) {
5590
if (this.throwErrors) throw error;
5691
return false;
5792
}
5893
}
59-
60-
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
61-
protected mapRegexp(pattern: string): string {
62-
let escaped = false;
63-
let charClass = false;
64-
const parts: string[] = [];
65-
for (const ch of pattern) {
66-
switch (ch) {
67-
case ".":
68-
if (!escaped && !charClass) {
69-
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
70-
} else {
71-
parts.push(ch);
72-
escaped = false;
73-
}
74-
break;
75-
case "\\":
76-
escaped = true;
77-
parts.push(ch);
78-
break;
79-
case "[":
80-
charClass = true;
81-
escaped = false;
82-
parts.push(ch);
83-
break;
84-
case "]":
85-
charClass = false;
86-
escaped = false;
87-
parts.push(ch);
88-
break;
89-
default:
90-
escaped = false;
91-
parts.push(ch);
92-
break;
93-
}
94-
}
95-
return parts.join("");
96-
}
9794
}

0 commit comments

Comments
 (0)