Skip to content

Commit 8e235f6

Browse files
committed
Fix regex mapping
1 parent 0bfab86 commit 8e235f6

File tree

4 files changed

+82
-89
lines changed

4 files changed

+82
-89
lines changed

src/path/functions/match.ts

Lines changed: 6 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { LRUCache } from "../lru_cache";
22
import { FilterFunction, FunctionExpressionType } from "./function";
3+
import { mapRegexp } from "./pattern";
34

45
export type MatchFilterFunctionOptions = {
56
/**
@@ -58,56 +59,11 @@ export class Match implements FilterFunction {
5859

5960
protected fullMatch(pattern: string): string {
6061
const parts: string[] = [];
61-
let nonCaptureGroup = false;
62-
63-
if (!pattern.startsWith("^") && !pattern.startsWith("^(")) {
64-
nonCaptureGroup = true;
65-
parts.push("^(?:");
66-
}
67-
parts.push(this.mapRegexp(pattern));
68-
69-
if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) {
70-
parts.push(")$");
71-
}
72-
73-
return parts.join("");
74-
}
75-
76-
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
77-
protected mapRegexp(pattern: string): string {
78-
let escaped = false;
79-
let charClass = false;
80-
const parts: string[] = [];
81-
for (const ch of pattern) {
82-
switch (ch) {
83-
case ".":
84-
if (!escaped && !charClass) {
85-
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
86-
} else {
87-
parts.push(ch);
88-
escaped = false;
89-
}
90-
break;
91-
case "\\":
92-
escaped = true;
93-
parts.push(ch);
94-
break;
95-
case "[":
96-
charClass = true;
97-
escaped = false;
98-
parts.push(ch);
99-
break;
100-
case "]":
101-
charClass = false;
102-
escaped = false;
103-
parts.push(ch);
104-
break;
105-
default:
106-
escaped = false;
107-
parts.push(ch);
108-
break;
109-
}
110-
}
62+
const explicitCaret = pattern.startsWith("^");
63+
const explicitDollar = pattern.endsWith("$");
64+
if (!explicitCaret && !explicitDollar) parts.push("^(?:");
65+
parts.push(mapRegexp(pattern));
66+
if (!explicitCaret && !explicitDollar) parts.push(")$");
11167
return parts.join("");
11268
}
11369
}

src/path/functions/pattern.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
2+
export function mapRegexp(pattern: string): string {
3+
let escaped = false;
4+
let charClass = false;
5+
const parts: string[] = [];
6+
for (const ch of pattern) {
7+
if (escaped) {
8+
parts.push(ch);
9+
escaped = false;
10+
continue;
11+
}
12+
13+
switch (ch) {
14+
case ".":
15+
if (!charClass) {
16+
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
17+
} else {
18+
parts.push(ch);
19+
}
20+
break;
21+
case "\\":
22+
escaped = true;
23+
parts.push(ch);
24+
break;
25+
case "[":
26+
charClass = true;
27+
parts.push(ch);
28+
break;
29+
case "]":
30+
charClass = false;
31+
parts.push(ch);
32+
break;
33+
default:
34+
parts.push(ch);
35+
break;
36+
}
37+
}
38+
return parts.join("");
39+
}

src/path/functions/search.ts

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { LRUCache } from "../lru_cache";
22
import { FilterFunction, FunctionExpressionType } from "./function";
3+
import { mapRegexp } from "./pattern";
34

45
export type SearchFilterFunctionOptions = {
56
/**
@@ -48,50 +49,12 @@ export class Search implements FilterFunction {
4849
}
4950

5051
try {
51-
const re = new RegExp(this.mapRegexp(pattern), "u");
52+
const re = new RegExp(mapRegexp(pattern), "u");
5253
if (this.cacheSize > 0) this.#cache.set(pattern, re);
5354
return !!s.match(re);
5455
} catch (error) {
5556
if (this.throwErrors) throw error;
5657
return false;
5758
}
5859
}
59-
60-
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
61-
protected mapRegexp(pattern: string): string {
62-
let escaped = false;
63-
let charClass = false;
64-
const parts: string[] = [];
65-
for (const ch of pattern) {
66-
switch (ch) {
67-
case ".":
68-
if (!escaped && !charClass) {
69-
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
70-
} else {
71-
parts.push(ch);
72-
escaped = false;
73-
}
74-
break;
75-
case "\\":
76-
escaped = true;
77-
parts.push(ch);
78-
break;
79-
case "[":
80-
charClass = true;
81-
escaped = false;
82-
parts.push(ch);
83-
break;
84-
case "]":
85-
charClass = false;
86-
escaped = false;
87-
parts.push(ch);
88-
break;
89-
default:
90-
escaped = false;
91-
parts.push(ch);
92-
break;
93-
}
94-
}
95-
return parts.join("");
96-
}
9760
}

tests/path/regex_filters.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,41 @@ describe("match filter", () => {
2626
SyntaxError,
2727
);
2828
});
29+
test("don't replace dot in character group", () => {
30+
const env = new JSONPathEnvironment();
31+
const query = "$[?match(@, 'ab[.c]d')]";
32+
const data = ["abcd", "ab.d", "abxd"];
33+
const rv = env.query(query, data);
34+
expect(rv.values()).toStrictEqual(["abcd", "ab.d"]);
35+
});
36+
test("don't replace escaped dots", () => {
37+
const env = new JSONPathEnvironment();
38+
const query = "$[?match(@, 'ab\\\\.d')]";
39+
const data = ["abcd", "ab.d", "abxd"];
40+
const rv = env.query(query, data);
41+
expect(rv.values()).toStrictEqual(["ab.d"]);
42+
});
43+
test("handle escaped right square bracket in character group", () => {
44+
const env = new JSONPathEnvironment();
45+
const query = "$[?match(@, 'ab[\\\\].c]d')]";
46+
const data = ["abcd", "ab.d", "abxd"];
47+
const rv = env.query(query, data);
48+
expect(rv.values()).toStrictEqual(["abcd", "ab.d"]);
49+
});
50+
test("explicit start caret", () => {
51+
const env = new JSONPathEnvironment();
52+
const query = "$[?match(@, '^ab.*')]";
53+
const data = ["abcd", "ab.d", "axc"];
54+
const rv = env.query(query, data);
55+
expect(rv.values()).toStrictEqual(["abcd", "ab.d"]);
56+
});
57+
test("explicit end dollar", () => {
58+
const env = new JSONPathEnvironment();
59+
const query = "$[?match(@, '.*?bc$')]";
60+
const data = ["abcd", "abc", "axc"];
61+
const rv = env.query(query, data);
62+
expect(rv.values()).toStrictEqual(["abc"]);
63+
});
2964
});
3065

3166
describe("search filter", () => {

0 commit comments

Comments
 (0)