Skip to content

Commit 9cc9dae

Browse files
committed
Unescape strings without JSON.parse
1 parent a2d342a commit 9cc9dae

File tree

2 files changed

+205
-17
lines changed

2 files changed

+205
-17
lines changed

src/path/parse.ts

Lines changed: 204 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ export class Parser {
251251
new NameSelector(
252252
this.environment,
253253
stream.current,
254-
this.decodeString(stream.current, true),
254+
this.decodeString(stream.current),
255255
false,
256256
),
257257
);
@@ -278,7 +278,7 @@ export class Parser {
278278
new KeySelector(
279279
this.environment,
280280
stream.current,
281-
this.decodeString(stream.current, true),
281+
this.decodeString(stream.current),
282282
false,
283283
),
284284
);
@@ -529,21 +529,201 @@ export class Parser {
529529
return left;
530530
}
531531

532-
protected decodeString(token: Token, isName: boolean = false): string {
533-
try {
534-
return JSON.parse(
535-
token.kind === TokenKind.SINGLE_QUOTE_STRING
536-
? `"${token.value.replaceAll('"', '\\"').replaceAll("\\'", "'")}"`
537-
: `"${token.value}"`,
538-
);
539-
} catch {
540-
throw new JSONPathSyntaxError(
541-
`invalid ${isName ? "name selector" : "string literal"} '${
542-
token.value
543-
}'`,
544-
token,
545-
);
532+
protected decodeString(token: Token): string {
533+
return this.unescapeString(
534+
token.kind === TokenKind.SINGLE_QUOTE_STRING
535+
? token.value.replaceAll('"', '\\"').replaceAll("\\'", "'")
536+
: token.value,
537+
token,
538+
);
539+
}
540+
541+
// eslint-disable-next-line sonarjs/cognitive-complexity
542+
protected unescapeString(value: string, token: Token): string {
543+
const rv: string[] = [];
544+
const length = value.length;
545+
let index = 0;
546+
let digits: string;
547+
let codepoint: number;
548+
let lowSurrogate: number;
549+
let unescaped: string;
550+
let codepointAt: number | undefined;
551+
552+
while (index < length) {
553+
const ch = value[index];
554+
if (ch === "\\") {
555+
// Handle escape sequences
556+
index += 1;
557+
558+
switch (value[index]) {
559+
case '"':
560+
rv.push('"');
561+
break;
562+
case "\\":
563+
rv.push("\\");
564+
break;
565+
case "/":
566+
rv.push("/");
567+
break;
568+
case "b":
569+
rv.push("\x08");
570+
break;
571+
case "f":
572+
rv.push("\x0C");
573+
break;
574+
case "n":
575+
rv.push("\n");
576+
break;
577+
case "r":
578+
rv.push("\r");
579+
break;
580+
case "t":
581+
rv.push("\t");
582+
break;
583+
case "u":
584+
if (index + 4 >= length) {
585+
throw new JSONPathSyntaxError(
586+
`invalid escape sequence at offset ${index}`,
587+
token,
588+
);
589+
}
590+
591+
index += 1;
592+
593+
digits = value.slice(index, index + 4);
594+
codepoint = this.parseInt16(digits, token);
595+
596+
if (isNaN(codepoint)) {
597+
throw new JSONPathSyntaxError(
598+
`invalid escape sequence at offset ${index}`,
599+
token,
600+
);
601+
}
602+
603+
if (
604+
index + 5 < length &&
605+
value[index + 4] === "\\" &&
606+
value[index + 5] === "u"
607+
) {
608+
// expect a surrogate pair
609+
if (index + 9 >= length || !isHighSurrogate(codepoint)) {
610+
throw new JSONPathSyntaxError(
611+
`invalid escape sequence at offset ${index}`,
612+
token,
613+
);
614+
}
615+
616+
digits = value.slice(index + 6, index + 10);
617+
lowSurrogate = this.parseInt16(digits, token);
618+
619+
if (isNaN(lowSurrogate) || !isLowSurrogate(lowSurrogate)) {
620+
throw new JSONPathSyntaxError(
621+
`invalid escape sequence at offset ${index + 4}`,
622+
token,
623+
);
624+
}
625+
626+
codepoint =
627+
0x10000 +
628+
(((codepoint & 0x03ff) << 10) | (lowSurrogate & 0x03ff));
629+
630+
index += 6;
631+
} else if (
632+
isHighSurrogate(codepoint) ||
633+
isLowSurrogate(codepoint)
634+
) {
635+
throw new JSONPathSyntaxError(
636+
`invalid escape sequence at offset ${index}`,
637+
token,
638+
);
639+
}
640+
641+
try {
642+
unescaped = String.fromCodePoint(codepoint);
643+
} catch {
644+
// TODO: offset is wrong
645+
throw new JSONPathSyntaxError(
646+
`invalid escape sequence at offset ${index}`,
647+
token,
648+
);
649+
}
650+
651+
codepointAt = unescaped.codePointAt(0);
652+
if (codepointAt !== undefined && codepointAt <= 0x1f) {
653+
throw new JSONPathSyntaxError(
654+
`invalid character at offset ${index}`,
655+
token,
656+
);
657+
}
658+
659+
rv.push(unescaped);
660+
index += 3;
661+
break;
662+
default:
663+
throw new JSONPathSyntaxError(
664+
`invalid escape sequence at offset ${index}`,
665+
token,
666+
);
667+
}
668+
} else {
669+
codepointAt = ch.codePointAt(0);
670+
if (codepointAt !== undefined && codepointAt <= 0x1f) {
671+
throw new JSONPathSyntaxError(
672+
`invalid character at offset ${index}`,
673+
token,
674+
);
675+
}
676+
rv.push(ch);
677+
}
678+
679+
index += 1;
546680
}
681+
682+
return rv.join("");
683+
}
684+
685+
protected parseInt16(digits: string, token: Token): number {
686+
const encoder = new TextEncoder();
687+
let codepoint = 0;
688+
for (const digit of encoder.encode(digits)) {
689+
codepoint <<= 4;
690+
switch (digit) {
691+
case 48:
692+
case 49:
693+
case 50:
694+
case 51:
695+
case 52:
696+
case 53:
697+
case 54:
698+
case 55:
699+
case 56:
700+
case 57:
701+
codepoint |= digit - 48; // '0'
702+
break;
703+
case 97:
704+
case 98:
705+
case 99:
706+
case 100:
707+
case 101:
708+
case 102:
709+
codepoint |= digit - 97 + 10; // 'a'
710+
break;
711+
case 65:
712+
case 66:
713+
case 67:
714+
case 68:
715+
case 69:
716+
case 70:
717+
codepoint |= digit - 65 + 10; // 'A'
718+
break;
719+
default:
720+
throw new JSONPathSyntaxError(
721+
`invalid \\uXXXX escape sequence (${digit})`,
722+
token,
723+
);
724+
}
725+
}
726+
return codepoint;
547727
}
548728

549729
protected throwForNonComparable(expr: FilterExpression): void {
@@ -577,3 +757,11 @@ export class Parser {
577757
}
578758
}
579759
}
760+
761+
function isHighSurrogate(codepoint: number): boolean {
762+
return codepoint >= 0xd800 && codepoint <= 0xdbff;
763+
}
764+
765+
function isLowSurrogate(codepoint: number): boolean {
766+
return codepoint >= 0xdc00 && codepoint <= 0xdfff;
767+
}

tests/path/cts

0 commit comments

Comments
 (0)