@@ -538,16 +538,11 @@ export class Parser {
538538 ) ;
539539 }
540540
541- // eslint-disable-next-line sonarjs/cognitive-complexity
542541 protected unescapeString ( value : string , token : Token ) : string {
543542 const rv : string [ ] = [ ] ;
544543 const length = value . length ;
545544 let index = 0 ;
546- let digits : string ;
547545 let codepoint : number ;
548- let lowSurrogate : number ;
549- let unescaped : string ;
550- let codepointAt : number | undefined ;
551546
552547 while ( index < length ) {
553548 const ch = value [ index ] ;
@@ -581,98 +576,14 @@ export class Parser {
581576 rv . push ( "\t" ) ;
582577 break ;
583578 case "u" :
584- if ( index + 4 >= length ) {
585- throw new JSONPathSyntaxError (
586- `invalid escape sequence at offset ${ index } ` ,
587- token ,
588- ) ;
589- }
590-
591- index += 1 ;
592-
593- digits = value . slice ( index , index + 4 ) ;
594- codepoint = this . parseInt16 ( digits , token ) ;
595-
596- if ( isNaN ( codepoint ) ) {
597- throw new JSONPathSyntaxError (
598- `invalid escape sequence at offset ${ index } ` ,
599- token ,
600- ) ;
601- }
602-
603- if (
604- index + 5 < length &&
605- value [ index + 4 ] === "\\" &&
606- value [ index + 5 ] === "u"
607- ) {
608- // expect a surrogate pair
609- if ( index + 9 >= length || ! isHighSurrogate ( codepoint ) ) {
610- throw new JSONPathSyntaxError (
611- `invalid escape sequence at offset ${ index } ` ,
612- token ,
613- ) ;
614- }
615-
616- digits = value . slice ( index + 6 , index + 10 ) ;
617- lowSurrogate = this . parseInt16 ( digits , token ) ;
618-
619- if ( isNaN ( lowSurrogate ) || ! isLowSurrogate ( lowSurrogate ) ) {
620- throw new JSONPathSyntaxError (
621- `invalid escape sequence at offset ${ index + 4 } ` ,
622- token ,
623- ) ;
624- }
625-
626- codepoint =
627- 0x10000 +
628- ( ( ( codepoint & 0x03ff ) << 10 ) | ( lowSurrogate & 0x03ff ) ) ;
629-
630- index += 6 ;
631- } else if (
632- isHighSurrogate ( codepoint ) ||
633- isLowSurrogate ( codepoint )
634- ) {
635- throw new JSONPathSyntaxError (
636- `invalid escape sequence at offset ${ index } ` ,
637- token ,
638- ) ;
639- }
640-
641- try {
642- unescaped = String . fromCodePoint ( codepoint ) ;
643- } catch {
644- // TODO: offset is wrong
645- throw new JSONPathSyntaxError (
646- `invalid escape sequence at offset ${ index } ` ,
647- token ,
648- ) ;
649- }
650-
651- codepointAt = unescaped . codePointAt ( 0 ) ;
652- if ( codepointAt !== undefined && codepointAt <= 0x1f ) {
653- throw new JSONPathSyntaxError (
654- `invalid character at offset ${ index } ` ,
655- token ,
656- ) ;
657- }
658-
659- rv . push ( unescaped ) ;
660- index += 3 ;
579+ [ codepoint , index ] = this . decodeEscapeSequence ( value , index , token ) ;
580+ rv . push ( this . stringFromCodePoint ( codepoint , token ) ) ;
661581 break ;
662582 default :
663- throw new JSONPathSyntaxError (
664- `invalid escape sequence at offset ${ index } ` ,
665- token ,
666- ) ;
583+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
667584 }
668585 } else {
669- codepointAt = ch . codePointAt ( 0 ) ;
670- if ( codepointAt !== undefined && codepointAt <= 0x1f ) {
671- throw new JSONPathSyntaxError (
672- `invalid character at offset ${ index } ` ,
673- token ,
674- ) ;
675- }
586+ this . stringFromCodePoint ( ch . codePointAt ( 0 ) , token ) ;
676587 rv . push ( ch ) ;
677588 }
678589
@@ -682,6 +593,72 @@ export class Parser {
682593 return rv . join ( "" ) ;
683594 }
684595
596+ /**
597+ * Decode a `\uXXXX` or `\uXXXX\uXXXX` escape sequence from _value_ at _index_.
598+ *
599+ * @param value - A string value containing the sequence to decode.
600+ * @param index - The start index of an escape sequence in _value_.
601+ * @param token - The token for the string value.
602+ * @returns - A codepoint, new index tuple.
603+ */
604+ protected decodeEscapeSequence (
605+ value : string ,
606+ index : number ,
607+ token : Token ,
608+ ) : [ number , number ] {
609+ const length = value . length ;
610+
611+ if ( index + 4 >= length ) {
612+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
613+ }
614+
615+ index += 1 ; // Move past 'u'
616+ let codepoint = this . parseInt16 ( value . slice ( index , index + 4 ) , token ) ;
617+
618+ if ( isLowSurrogate ( codepoint ) ) {
619+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
620+ }
621+
622+ if ( isHighSurrogate ( codepoint ) ) {
623+ // Expect a surrogate pair.
624+ if (
625+ ! (
626+ index + 9 < length &&
627+ value [ index + 4 ] === "\\" &&
628+ value [ index + 5 ] === "u"
629+ )
630+ ) {
631+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
632+ }
633+
634+ const lowSurrogate = this . parseInt16 (
635+ value . slice ( index + 6 , index + 10 ) ,
636+ token ,
637+ ) ;
638+
639+ if ( ! isLowSurrogate ( lowSurrogate ) ) {
640+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
641+ }
642+
643+ codepoint =
644+ 0x10000 + ( ( ( codepoint & 0x03ff ) << 10 ) | ( lowSurrogate & 0x03ff ) ) ;
645+
646+ return [ codepoint , index + 9 ] ;
647+ }
648+
649+ return [ codepoint , index + 3 ] ;
650+ }
651+
652+ /**
653+ * Parse a hexadecimal string as an integer.
654+ *
655+ * @param digits - Hexadecimal digit string.
656+ * @param token - The token for the string value.
657+ * @returns - The number representation of _digits_.
658+ *
659+ * Note that we're not using `parseInt(digits, 16)` because it accepts `+`
660+ * and `-` and things we don't allow.
661+ */
685662 protected parseInt16 ( digits : string , token : Token ) : number {
686663 const encoder = new TextEncoder ( ) ;
687664 let codepoint = 0 ;
@@ -726,6 +703,22 @@ export class Parser {
726703 return codepoint ;
727704 }
728705
706+ /** Check the codepoint is valid and return its string representation. */
707+ protected stringFromCodePoint (
708+ codepoint : number | undefined ,
709+ token : Token ,
710+ ) : string {
711+ if ( codepoint === undefined || codepoint <= 0x1f ) {
712+ throw new JSONPathSyntaxError ( `invalid character` , token ) ;
713+ }
714+
715+ try {
716+ return String . fromCodePoint ( codepoint ) ;
717+ } catch {
718+ throw new JSONPathSyntaxError ( "invalid escape sequence" , token ) ;
719+ }
720+ }
721+
729722 protected throwForNonComparable ( expr : FilterExpression ) : void {
730723 if (
731724 ( expr instanceof RootQuery || expr instanceof RelativeQuery ) &&
@@ -758,10 +751,10 @@ export class Parser {
758751 }
759752}
760753
761- function isHighSurrogate ( codepoint : number ) : boolean {
754+ export function isHighSurrogate ( codepoint : number ) : boolean {
762755 return codepoint >= 0xd800 && codepoint <= 0xdbff ;
763756}
764757
765- function isLowSurrogate ( codepoint : number ) : boolean {
758+ export function isLowSurrogate ( codepoint : number ) : boolean {
766759 return codepoint >= 0xdc00 && codepoint <= 0xdfff ;
767760}
0 commit comments