@@ -72,6 +72,49 @@ private int ascii(string char) {
7272 )
7373}
7474
75+ /**
76+ * Holds if `t` matches at least an epsilon symbol.
77+ *
78+ * That is, this term does not restrict the language of the enclosing regular expression.
79+ *
80+ * This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
81+ */
82+ predicate matchesEpsilon ( RegExpTerm t ) {
83+ t instanceof RegExpStar
84+ or
85+ t instanceof RegExpOpt
86+ or
87+ t .( RegExpRange ) .getLowerBound ( ) = 0
88+ or
89+ exists ( RegExpTerm child |
90+ child = t .getAChild ( ) and
91+ matchesEpsilon ( child )
92+ |
93+ t instanceof RegExpAlt or
94+ t instanceof RegExpGroup or
95+ t instanceof RegExpPlus or
96+ t instanceof RegExpRange
97+ )
98+ or
99+ matchesEpsilon ( t .( RegExpBackRef ) .getGroup ( ) )
100+ or
101+ forex ( RegExpTerm child | child = t .( RegExpSequence ) .getAChild ( ) | matchesEpsilon ( child ) )
102+ }
103+
104+ /**
105+ * A lookahead/lookbehind that matches the empty string.
106+ */
107+ class EmptyPositiveSubPatttern extends RegExpSubPattern {
108+ EmptyPositiveSubPatttern ( ) {
109+ (
110+ this instanceof RegExpPositiveLookahead
111+ or
112+ this instanceof RegExpPositiveLookbehind
113+ ) and
114+ matchesEpsilon ( this .getOperand ( ) )
115+ }
116+ }
117+
75118/**
76119 * A branch in a disjunction that is the root node in a literal, or a literal
77120 * whose root node is not a disjunction.
@@ -659,6 +702,10 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
659702 exists ( RegExpDollar dollar | q1 = before ( dollar ) |
660703 lbl = Epsilon ( ) and q2 = Accept ( getRoot ( dollar ) )
661704 )
705+ or
706+ exists ( EmptyPositiveSubPatttern empty | q1 = before ( empty ) |
707+ lbl = Epsilon ( ) and q2 = after ( empty )
708+ )
662709}
663710
664711/**
0 commit comments