@@ -251,7 +251,7 @@ export class Parser {
251251 new NameSelector (
252252 this . environment ,
253253 stream . current ,
254- this . decodeString ( stream . current , true ) ,
254+ this . decodeString ( stream . current ) ,
255255 false ,
256256 ) ,
257257 ) ;
@@ -278,7 +278,7 @@ export class Parser {
278278 new KeySelector (
279279 this . environment ,
280280 stream . current ,
281- this . decodeString ( stream . current , true ) ,
281+ this . decodeString ( stream . current ) ,
282282 false ,
283283 ) ,
284284 ) ;
@@ -529,21 +529,211 @@ export class Parser {
529529 return left ;
530530 }
531531
532- protected decodeString ( token : Token , isName : boolean = false ) : string {
533- try {
534- return JSON . parse (
535- token . kind === TokenKind . SINGLE_QUOTE_STRING
536- ? `"${ token . value . replaceAll ( '"' , '\\"' ) . replaceAll ( "\\'" , "'" ) } "`
537- : `"${ token . value } "` ,
532+ protected decodeString ( token : Token ) : string {
533+ return this . unescapeString (
534+ token . kind === TokenKind . SINGLE_QUOTE_STRING
535+ ? token . value . replaceAll ( '"' , '\\"' ) . replaceAll ( "\\'" , "'" )
536+ : token . value ,
537+ token ,
538+ ) ;
539+ }
540+
541+ protected unescapeString ( value : string , token : Token ) : string {
542+ const rv : string [ ] = [ ] ;
543+ const length = value . length ;
544+ let index = 0 ;
545+ let codepoint : number ;
546+
547+ while ( index < length ) {
548+ const ch = value [ index ] ;
549+ if ( ch === "\\" ) {
550+ // Handle escape sequences
551+ index += 1 ; // Move past '\'
552+
553+ switch ( value [ index ] ) {
554+ case '"' :
555+ rv . push ( '"' ) ;
556+ break ;
557+ case "\\" :
558+ rv . push ( "\\" ) ;
559+ break ;
560+ case "/" :
561+ rv . push ( "/" ) ;
562+ break ;
563+ case "b" :
564+ rv . push ( "\x08" ) ;
565+ break ;
566+ case "f" :
567+ rv . push ( "\x0C" ) ;
568+ break ;
569+ case "n" :
570+ rv . push ( "\n" ) ;
571+ break ;
572+ case "r" :
573+ rv . push ( "\r" ) ;
574+ break ;
575+ case "t" :
576+ rv . push ( "\t" ) ;
577+ break ;
578+ case "u" :
579+ [ codepoint , index ] = this . decodeHexChar ( value , index , token ) ;
580+ rv . push ( this . stringFromCodePoint ( codepoint , token ) ) ;
581+ break ;
582+ default :
583+ // TODO: This is unreachable. The lexer will catch unknown escape sequences.
584+ throw new JSONPathSyntaxError (
585+ `unknown escape sequence at index ${ token . index + index - 1 } ` ,
586+ token ,
587+ ) ;
588+ }
589+ } else {
590+ this . stringFromCodePoint ( ch . codePointAt ( 0 ) , token ) ;
591+ rv . push ( ch ) ;
592+ }
593+
594+ index += 1 ;
595+ }
596+
597+ return rv . join ( "" ) ;
598+ }
599+
600+ /**
601+ * Decode a `\uXXXX` or `\uXXXX\uXXXX` escape sequence from _value_ at _index_.
602+ *
603+ * @param value - A string value containing the sequence to decode.
604+ * @param index - The start index of an escape sequence in _value_.
605+ * @param token - The token for the string value.
606+ * @returns - A codepoint, new index tuple.
607+ */
608+ protected decodeHexChar (
609+ value : string ,
610+ index : number ,
611+ token : Token ,
612+ ) : [ number , number ] {
613+ const length = value . length ;
614+
615+ if ( index + 4 >= length ) {
616+ throw new JSONPathSyntaxError (
617+ `incomplete escape sequence at index ${ token . index + index - 1 } ` ,
618+ token ,
538619 ) ;
539- } catch {
620+ }
621+
622+ index += 1 ; // Move past 'u'
623+ let codepoint = this . parseHexDigits ( value . slice ( index , index + 4 ) , token ) ;
624+
625+ if ( isLowSurrogate ( codepoint ) ) {
540626 throw new JSONPathSyntaxError (
541- `invalid ${ isName ? "name selector" : "string literal" } '${
542- token . value
543- } '`,
627+ `unexpected low surrogate codepoint at index ${ token . index + index - 2 } ` ,
544628 token ,
545629 ) ;
546630 }
631+
632+ if ( isHighSurrogate ( codepoint ) ) {
633+ // Expect a surrogate pair.
634+ if (
635+ ! (
636+ index + 9 < length &&
637+ value [ index + 4 ] === "\\" &&
638+ value [ index + 5 ] === "u"
639+ )
640+ ) {
641+ throw new JSONPathSyntaxError (
642+ `incomplete escape sequence at index ${ token . index + index - 2 } ` ,
643+ token ,
644+ ) ;
645+ }
646+
647+ const lowSurrogate = this . parseHexDigits (
648+ value . slice ( index + 6 , index + 10 ) ,
649+ token ,
650+ ) ;
651+
652+ if ( ! isLowSurrogate ( lowSurrogate ) ) {
653+ throw new JSONPathSyntaxError (
654+ `unexpected codepoint at index ${ token . index + index + 4 } ` ,
655+ token ,
656+ ) ;
657+ }
658+
659+ codepoint =
660+ 0x10000 + ( ( ( codepoint & 0x03ff ) << 10 ) | ( lowSurrogate & 0x03ff ) ) ;
661+
662+ return [ codepoint , index + 9 ] ;
663+ }
664+
665+ return [ codepoint , index + 3 ] ;
666+ }
667+
668+ /**
669+ * Parse a hexadecimal string as an integer.
670+ *
671+ * @param digits - Hexadecimal digit string.
672+ * @param token - The token for the string value.
673+ * @returns - The number representation of _digits_.
674+ *
675+ * Note that we're not using `parseInt(digits, 16)` because it accepts `+`
676+ * and `-` and things we don't allow.
677+ */
678+ protected parseHexDigits ( digits : string , token : Token ) : number {
679+ const encoder = new TextEncoder ( ) ;
680+ let codepoint = 0 ;
681+ for ( const digit of encoder . encode ( digits ) ) {
682+ codepoint <<= 4 ;
683+ switch ( digit ) {
684+ case 48 :
685+ case 49 :
686+ case 50 :
687+ case 51 :
688+ case 52 :
689+ case 53 :
690+ case 54 :
691+ case 55 :
692+ case 56 :
693+ case 57 :
694+ codepoint |= digit - 48 ; // '0'
695+ break ;
696+ case 97 :
697+ case 98 :
698+ case 99 :
699+ case 100 :
700+ case 101 :
701+ case 102 :
702+ codepoint |= digit - 97 + 10 ; // 'a'
703+ break ;
704+ case 65 :
705+ case 66 :
706+ case 67 :
707+ case 68 :
708+ case 69 :
709+ case 70 :
710+ codepoint |= digit - 65 + 10 ; // 'A'
711+ break ;
712+ default :
713+ throw new JSONPathSyntaxError (
714+ "invalid \\uXXXX escape sequence" ,
715+ token ,
716+ ) ;
717+ }
718+ }
719+ return codepoint ;
720+ }
721+
722+ /** Check the codepoint is valid and return its string representation. */
723+ protected stringFromCodePoint (
724+ codepoint : number | undefined ,
725+ token : Token ,
726+ ) : string {
727+ if ( codepoint === undefined || codepoint <= 0x1f ) {
728+ throw new JSONPathSyntaxError ( `invalid character` , token ) ;
729+ }
730+
731+ try {
732+ return String . fromCodePoint ( codepoint ) ;
733+ } catch {
734+ // This should not be reachable.
735+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
736+ }
547737 }
548738
549739 protected throwForNonComparable ( expr : FilterExpression ) : void {
@@ -577,3 +767,11 @@ export class Parser {
577767 }
578768 }
579769}
770+
771+ export function isHighSurrogate ( codepoint : number ) : boolean {
772+ return codepoint >= 0xd800 && codepoint <= 0xdbff ;
773+ }
774+
775+ export function isLowSurrogate ( codepoint : number ) : boolean {
776+ return codepoint >= 0xdc00 && codepoint <= 0xdfff ;
777+ }
0 commit comments