github
diff --git a/‎javascript/ql/lib/semmle/javascript/dataflow/Nodes.qll‎
Lines changed: 6 additions & 1 deletion b/‎javascript/ql/lib/semmle/javascript/dataflow/Nodes.qll‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎javascript/ql/lib/semmle/javascript/security/TaintedUrlSuffix.qll‎
Lines changed: 3 additions & 102 deletions b/‎javascript/ql/lib/semmle/javascript/security/TaintedUrlSuffix.qll‎
Lines changed: 3 additions & 102 deletions
diff --git a/‎javascript/ql/lib/semmle/javascript/security/TaintedUrlSuffixCustomizations.qll‎
Lines changed: 153 additions & 0 deletions b/‎javascript/ql/lib/semmle/javascript/security/TaintedUrlSuffixCustomizations.qll‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectCustomizations.qll‎
Lines changed: 8 additions & 7 deletions b/‎javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectCustomizations.qll‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectQuery.qll‎
Lines changed: 4 additions & 6 deletions b/‎javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectQuery.qll‎
Lines changed: 4 additions & 6 deletions
@@ -1611,7 +1611,12 @@ class RegExpConstructorInvokeNode extends DataFlow::InvokeNode {
    * Gets the AST of the regular expression created here, provided that the
    * first argument is a string literal.
    */
-  RegExpTerm getRoot() { result = this.getArgument(0).asExpr().(StringLiteral).asRegExp() }
+  RegExpTerm getRoot() {
+    result = this.getArgument(0).asExpr().(StringLiteral).asRegExp()
+    or
+    // In case someone writes `new RegExp(/foo/)` for some reason
+    result = this.getArgument(0).asExpr().(RegExpLiteral).getRoot()
+  }
 
   /**
    * Gets the flags provided in the second argument, or an empty string if no
 
@@ -4,114 +4,15 @@
  */
 
 import javascript
-private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
 
 /**
  * Provides a flow label for reasoning about URLs with a tainted query and fragment part,
  * which we collectively refer to as the "suffix" of the URL.
  */
 module TaintedUrlSuffix {
-  private import DataFlow
+  import TaintedUrlSuffixCustomizations::TaintedUrlSuffix
 
-  /**
-   * The flow label representing a URL with a tainted query and fragment part.
-   *
-   * Can also be accessed using `TaintedUrlSuffix::label()`.
-   */
-  class TaintedUrlSuffixLabel extends FlowLabel {
-    TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
-  }
-
-  /**
-   * Gets the flow label representing a URL with a tainted query and fragment part.
-   */
-  FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
-
-  /** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
-  ClientSideRemoteFlowSource source() {
-    result = DOM::locationRef().getAPropertyRead(["search", "hash"])
-    or
-    result = DOM::locationSource()
-    or
-    result.getKind().isUrl()
-  }
-
-  /**
-   * Holds if `node` should be a barrier for the given `label`.
-   *
-   * This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
-   * label.
-   */
-  predicate isBarrier(Node node, FlowLabel label) {
-    label = label() and
-    DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
-  }
-
-  /**
-   * Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
-   *
-   * This handles steps through string operations, promises, URL parsers, and URL accessors.
-   */
-  predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
-    // Transition from tainted-url-suffix to general taint when entering the second array element
-    // of a split('#') or split('?') array.
-    //
-    //   x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
-    //
-    // Technically we should also preverse tainted-url-suffix when entering the first array element of such
-    // a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
-    // (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
-    srclbl = label() and
-    dstlbl.isTaint() and
-    DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
-    or
-    // Transition from URL suffix to full taint when extracting the query/fragment part.
-    srclbl = label() and
-    dstlbl.isTaint() and
-    (
-      exists(MethodCallNode call, string name |
-        src = call.getReceiver() and
-        dst = call and
-        name = call.getMethodName()
-      |
-        // Substring that is not a prefix
-        name = StringOps::substringMethodName() and
-        not call.getArgument(0).getIntValue() = 0
-        or
-        // Replace '#' and '?' with nothing
-        name = "replace" and
-        call.getArgument(0).getStringValue() = ["#", "?"] and
-        call.getArgument(1).getStringValue() = ""
-        or
-        // The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
-        // The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
-        name = "get"
-        or
-        // Methods on URL objects from the Closure library
-        name = "getDecodedQuery"
-        or
-        name = "getFragment"
-        or
-        name = "getParameterValue"
-        or
-        name = "getParameterValues"
-        or
-        name = "getQueryData"
-      )
-      or
-      exists(PropRead read |
-        src = read.getBase() and
-        dst = read and
-        // Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
-        read.getPropertyName() = "query"
-      )
-      or
-      // Assume calls to regexp.exec always extract query/fragment parameters.
-      exists(MethodCallNode call |
-        call = any(RegExpLiteral re).flow().(DataFlow::SourceNode).getAMethodCall("exec") and
-        src = call.getArgument(0) and
-        dst = call
-      )
-    )
+  private class ConcreteTaintedUrlSuffixLabel extends TaintedUrlSuffixLabel {
+    ConcreteTaintedUrlSuffixLabel() { this = this }
   }
 }
@@ -0,0 +1,153 @@
+/**
+ * Provides a flow label for reasoning about URLs with a tainted query and fragment part,
+ * which we collectively refer to as the "suffix" of the URL.
+ */
+
+import javascript
+private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
+
+/**
+ * Provides a flow label for reasoning about URLs with a tainted query and fragment part,
+ * which we collectively refer to as the "suffix" of the URL.
+ */
+module TaintedUrlSuffix {
+  private import DataFlow
+
+  /**
+   * The flow label representing a URL with a tainted query and fragment part.
+   *
+   * Can also be accessed using `TaintedUrlSuffix::label()`.
+   */
+  abstract class TaintedUrlSuffixLabel extends FlowLabel {
+    TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
+  }
+
+  /**
+   * Gets the flow label representing a URL with a tainted query and fragment part.
+   */
+  FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
+
+  /** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
+  ClientSideRemoteFlowSource source() {
+    result = DOM::locationRef().getAPropertyRead(["search", "hash"])
+    or
+    result = DOM::locationSource()
+    or
+    result.getKind().isUrl()
+  }
+
+  /**
+   * Holds if `node` should be a barrier for the given `label`.
+   *
+   * This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
+   * label.
+   */
+  predicate isBarrier(Node node, FlowLabel label) {
+    label = label() and
+    DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
+  }
+
+  /**
+   * Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
+   *
+   * This handles steps through string operations, promises, URL parsers, and URL accessors.
+   */
+  predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
+    // Transition from tainted-url-suffix to general taint when entering the second array element
+    // of a split('#') or split('?') array.
+    //
+    //   x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
+    //
+    // Technically we should also preverse tainted-url-suffix when entering the first array element of such
+    // a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
+    // (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
+    srclbl = label() and
+    dstlbl.isTaint() and
+    DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
+    or
+    // Transition from URL suffix to full taint when extracting the query/fragment part.
+    srclbl = label() and
+    dstlbl.isTaint() and
+    (
+      exists(MethodCallNode call, string name |
+        src = call.getReceiver() and
+        dst = call and
+        name = call.getMethodName()
+      |
+        // Substring that is not a prefix
+        name = StringOps::substringMethodName() and
+        not call.getArgument(0).getIntValue() = 0
+        or
+        // Replace '#' and '?' with nothing
+        name = "replace" and
+        call.getArgument(0).getStringValue() = ["#", "?"] and
+        call.getArgument(1).getStringValue() = ""
+        or
+        // The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
+        // The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
+        name = "get"
+        or
+        // Methods on URL objects from the Closure library
+        name = "getDecodedQuery"
+        or
+        name = "getFragment"
+        or
+        name = "getParameterValue"
+        or
+        name = "getParameterValues"
+        or
+        name = "getQueryData"
+      )
+      or
+      exists(PropRead read |
+        src = read.getBase() and
+        dst = read and
+        // Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
+        read.getPropertyName() = "query"
+      )
+      or
+      exists(MethodCallNode call, DataFlow::RegExpCreationNode re |
+        (
+          call = re.getAMethodCall("exec") and
+          src = call.getArgument(0) and
+          dst = call
+          or
+          call.getMethodName() = ["match", "matchAll"] and
+          re.flowsTo(call.getArgument(0)) and
+          src = call.getReceiver() and
+          dst = call
+        )
+      |
+        captureAfterSuffixIndicator(re.getRoot().getAChild*())
+        or
+        // If the regexp is unknown, assume it will extract the URL suffix
+        not exists(re.getRoot())
+      )
+    )
+  }
+
+  /** Holds if the `n`th child of `seq` contains a character indicating that everything thereafter is part of the suffix */
+  private predicate containsSuffixIndicator(RegExpSequence seq, int n) {
+    // Also include '=' as it usually only appears in the URL suffix
+    seq.getChild(n).getAChild*().(RegExpConstant).getValue().regexpMatch(".*[?#=].*")
+  }
+
+  /** Holds if the `n`th child of `seq` contains a capture group. */
+  private predicate containsCaptureGroup(RegExpSequence seq, int n) {
+    seq.getChild(n).getAChild*().(RegExpGroup).isCapture()
+  }
+
+  /**
+   * Holds if `seq` contains a capture group that will likely match path of the URL suffix,
+   * thereby extracting tainted data.
+   *
+   * For example, `/#(.*)/.exec(url)` will extract the tainted URL suffix from `url`.
+   */
+  private predicate captureAfterSuffixIndicator(RegExpSequence seq) {
+    exists(int suffix, int capture |
+      containsSuffixIndicator(seq, suffix) and
+      containsCaptureGroup(seq, capture) and
+      suffix < capture
+    )
+  }
+}
@@ -5,6 +5,7 @@
  */
 
 import javascript
+private import semmle.javascript.security.TaintedUrlSuffixCustomizations
 
 module ClientSideUrlRedirect {
   /**
@@ -31,12 +32,12 @@ module ClientSideUrlRedirect {
   abstract class Sanitizer extends DataFlow::Node { }
 
   /**
+   * DEPRECATED. Replaced by functionality from the `TaintedUrlSuffix` library.
+   *
    * A flow label for values that represent the URL of the current document, and
    * hence are only partially user-controlled.
    */
-  abstract class DocumentUrl extends DataFlow::FlowLabel {
-    DocumentUrl() { this = "document.url" } // TODO: replace with TaintedUrlSuffix
-  }
+  deprecated class DocumentUrl = TaintedUrlSuffix::TaintedUrlSuffixLabel;
 
   /**
    * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
@@ -50,8 +51,8 @@ module ClientSideUrlRedirect {
     ActiveThreatModelSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
 
     override DataFlow::FlowLabel getAFlowLabel() {
-      if this.(ClientSideRemoteFlowSource).getKind().isUrl()
-      then result instanceof DocumentUrl
+      if this = TaintedUrlSuffix::source()
+      then result = TaintedUrlSuffix::label()
       else result.isTaint()
     }
   }
@@ -60,7 +61,7 @@ module ClientSideUrlRedirect {
    * Holds if `node` extracts a part of a URL that does not contain the suffix.
    */
   pragma[inline]
-  predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
+  deprecated predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
     // Block flow through prefix-extraction `substring(0, ...)` and `split("#")[0]`
     node.getMethodName() = [StringOps::substringMethodName(), "split"] and
     not untrustedUrlSubstring(_, node)
@@ -70,7 +71,7 @@ module ClientSideUrlRedirect {
    * Holds if `substring` refers to a substring of `base` which is considered untrusted
    * when `base` is the current URL.
    */
-  predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
+  deprecated predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
     exists(DataFlow::MethodCallNode mcn, string methodName |
       mcn = substring and mcn.calls(base, methodName)
     |
 
@@ -10,9 +10,10 @@
 import javascript
 import UrlConcatenation
 import ClientSideUrlRedirectCustomizations::ClientSideUrlRedirect
+import semmle.javascript.security.TaintedUrlSuffix
 
 // Materialize flow labels
-private class ConcreteDocumentUrl extends DocumentUrl {
+deprecated private class ConcreteDocumentUrl extends DocumentUrl {
   ConcreteDocumentUrl() { this = this }
 }
 
@@ -35,8 +36,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
   }
 
   predicate isBarrier(DataFlow::Node node, DataFlow::FlowLabel state) {
-    isPrefixExtraction(node) and
-    state instanceof DocumentUrl
+    TaintedUrlSuffix::isBarrier(node, state)
   }
 
   predicate isBarrierOut(DataFlow::Node node) { hostnameSanitizingPrefixEdge(node, _) }
@@ -47,9 +47,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
     DataFlow::Node node1, DataFlow::FlowLabel state1, DataFlow::Node node2,
     DataFlow::FlowLabel state2
   ) {
-    untrustedUrlSubstring(node1, node2) and
-    state1 instanceof DocumentUrl and
-    state2.isTaint()
+    TaintedUrlSuffix::step(node1, node2, state1, state2)
     or
     exists(HtmlSanitizerCall call |
       node1 = call.getInput() and