|
| 1 | +/** |
| 2 | + * @name Regular expression always matches |
| 3 | + * @description Regular expression tests that always find a match indicate dead code or a logic error |
| 4 | + * @kind problem |
| 5 | + * @problem.severity warning |
| 6 | + * @id js/regex/always-matches |
| 7 | + * @tags correctness |
| 8 | + * regular-expressions |
| 9 | + * @precision high |
| 10 | + */ |
| 11 | + |
| 12 | +import javascript |
| 13 | + |
| 14 | +/** |
| 15 | + * Gets a node reachable from the given root term through alts and groups only. |
| 16 | + * |
| 17 | + * For example, for `/(foo|bar)/` this gets `(foo|bar)`, `foo|bar`, `foo` and `bar`. |
| 18 | + */ |
| 19 | +RegExpTerm getEffectiveRootAux(RegExpTerm actualRoot) { |
| 20 | + actualRoot.isRootTerm() and |
| 21 | + result = actualRoot |
| 22 | + or |
| 23 | + result = getEffectiveRootAux(actualRoot).(RegExpAlt).getAChild() |
| 24 | + or |
| 25 | + result = getEffectiveRootAux(actualRoot).(RegExpGroup).getAChild() |
| 26 | +} |
| 27 | + |
| 28 | +/** |
| 29 | + * Gets the effective root of the given term. |
| 30 | + * |
| 31 | + * For example, for `/(foo|bar)/` this gets `foo` and `bar`. |
| 32 | + */ |
| 33 | +RegExpTerm getEffectiveRoot(RegExpTerm actualRoot) { |
| 34 | + result = getEffectiveRootAux(actualRoot) and |
| 35 | + not result instanceof RegExpAlt and |
| 36 | + not result instanceof RegExpGroup |
| 37 | +} |
| 38 | + |
| 39 | +/** |
| 40 | + * Holds if `term` contains an anchor on both ends. |
| 41 | + */ |
| 42 | +predicate isPossiblyAnchoredOnBothEnds(RegExpSequence node) { |
| 43 | + node.getAChild*() instanceof RegExpCaret and |
| 44 | + node.getAChild*() instanceof RegExpDollar and |
| 45 | + node.getNumChild() >= 2 |
| 46 | +} |
| 47 | + |
| 48 | +/** |
| 49 | + * Holds if `term` is obviously intended to match any string. |
| 50 | + */ |
| 51 | +predicate isUniversalRegExp(RegExpTerm term) { |
| 52 | + exists(RegExpTerm child | child = term.(RegExpStar).getAChild() | |
| 53 | + child instanceof RegExpDot |
| 54 | + or |
| 55 | + child.(RegExpCharacterClass).isUniversalClass() |
| 56 | + ) |
| 57 | +} |
| 58 | + |
| 59 | +/** |
| 60 | + * A call that searches for a regexp match within a string, but does not |
| 61 | + * extract the capture groups or the matched string itself. |
| 62 | + * |
| 63 | + * Because of the longest-match rule, queries that are more than pure tests |
| 64 | + * aren't necessarily broken just because the regexp can accept the empty string. |
| 65 | + */ |
| 66 | +abstract class RegExpQuery extends DataFlow::CallNode { |
| 67 | + abstract RegExpTerm getRegExp(); |
| 68 | +} |
| 69 | + |
| 70 | +/** |
| 71 | + * A call to `RegExp.prototype.test`. |
| 72 | + */ |
| 73 | +class RegExpTestCall extends DataFlow::MethodCallNode, RegExpQuery { |
| 74 | + DataFlow::RegExpCreationNode regexp; |
| 75 | + |
| 76 | + RegExpTestCall() { |
| 77 | + this = regexp.getAReference().getAMethodCall("test") |
| 78 | + } |
| 79 | + |
| 80 | + override RegExpTerm getRegExp() { |
| 81 | + result = regexp.getRoot() |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +/** |
| 86 | + * A call to `String.prototype.search`. |
| 87 | + */ |
| 88 | +class RegExpSearchCall extends DataFlow::MethodCallNode, RegExpQuery { |
| 89 | + DataFlow::RegExpCreationNode regexp; |
| 90 | + |
| 91 | + RegExpSearchCall() { |
| 92 | + getMethodName() = "search" and |
| 93 | + regexp.getAReference().flowsTo(getArgument(0)) |
| 94 | + } |
| 95 | + |
| 96 | + override RegExpTerm getRegExp() { |
| 97 | + result = regexp.getRoot() |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +/** |
| 102 | + * Holds if `t` is a zero-width assertion other than an anchor. |
| 103 | + */ |
| 104 | +predicate isAssertion(RegExpTerm t) { |
| 105 | + t instanceof RegExpSubPattern or |
| 106 | + t instanceof RegExpWordBoundary or |
| 107 | + t instanceof RegExpNonWordBoundary |
| 108 | +} |
| 109 | + |
| 110 | +from RegExpTerm term, RegExpQuery call, string message |
| 111 | +where |
| 112 | + term.isNullable() and |
| 113 | + not isAssertion(term.getAChild*()) and |
| 114 | + not isUniversalRegExp(term) and |
| 115 | + term = getEffectiveRoot(call.getRegExp()) and |
| 116 | + ( |
| 117 | + call instanceof RegExpTestCall and |
| 118 | + not isPossiblyAnchoredOnBothEnds(term) and |
| 119 | + message = "This regular expression always matches when used in a test $@, as it can match an empty substring." |
| 120 | + or |
| 121 | + call instanceof RegExpSearchCall and |
| 122 | + not term.getAChild*() instanceof RegExpDollar and |
| 123 | + message = "This regular expression always the matches at index 0 when used $@, as it matches the empty substring." |
| 124 | + ) |
| 125 | +select term, message, call, "here" |
0 commit comments