@@ -21,6 +21,7 @@ private import codeql.ruby.typetracking.TypeTracker
2121private import codeql.ruby.ApiGraphs
2222private import codeql.ruby.Concepts
2323private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
24+ private import codeql.ruby.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
2425private import codeql.ruby.TaintTracking
2526private import codeql.ruby.frameworks.core.String
2627
@@ -37,43 +38,6 @@ DataFlow::LocalSourceNode strStart() {
3738/** Gets a dataflow node for a regular expression literal. */
3839DataFlow:: LocalSourceNode regStart ( ) { result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral }
3940
40- /**
41- * Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
42- * `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
43- * The types are either "string" or "regexp".
44- */
45- predicate step (
46- DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo , string fromType , string toType
47- ) {
48- fromType = toType and
49- fromType = "string" and
50- (
51- // include taint flow through `String` summaries
52- TaintTracking:: localTaintStep ( nodeFrom , nodeTo ) and
53- nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
54- String:: SummarizedCallable
55- or
56- // string concatenations, and
57- exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
58- op = nodeTo .asExpr ( ) and
59- op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
60- op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
61- )
62- or
63- // string interpolations
64- nodeFrom .asExpr ( ) =
65- nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
66- )
67- or
68- fromType = "string" and
69- toType = "reg" and
70- exists ( DataFlow:: CallNode call |
71- call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
72- nodeFrom = call .getArgument ( 0 ) and
73- nodeTo = call
74- )
75- }
76-
7741/** Gets a node where string values that flow to the node are interpreted as regular expressions. */
7842DataFlow:: Node stringSink ( ) {
7943 result instanceof RE:: RegExpInterpretation:: Range and
@@ -91,69 +55,172 @@ DataFlow::Node stringSink() {
9155/** Gets a node where regular expressions that flow to the node are used. */
9256DataFlow:: Node regSink ( ) { result = any ( RegexExecution exec ) .getRegex ( ) }
9357
94- /** Gets a node that is reachable by type-tracking from any string or regular expression. */
95- DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
96- t .start ( ) and
97- result = [ strStart ( ) , regStart ( ) ]
98- or
99- exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
100- or
101- exists ( TypeTracker t2 | t2 = t .continue ( ) | step ( forward ( t2 ) .getALocalUse ( ) , result , _, _) )
58+ private signature module ReachInputSig {
59+ DataFlow:: LocalSourceNode start ( TypeTracker t ) ;
60+
61+ DataFlow:: Node end ( ) ;
62+
63+ predicate additionalStep ( DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo ) ;
10264}
10365
104- /**
105- * Gets a node that is backwards reachable from any regular expression use,
106- * where that use is reachable by type-tracking from any string or regular expression.
107- */
108- DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
109- t .start ( ) and
110- result .flowsTo ( [ stringSink ( ) , regSink ( ) ] ) and
111- result = forward ( TypeTracker:: end ( ) )
112- or
113- exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
114- or
115- exists ( TypeBackTracker t2 | t2 = t .continue ( ) | step ( result .getALocalUse ( ) , backwards ( t2 ) , _, _) )
66+ private module Reach< ReachInputSig Input> {
67+ /** Gets a node that is forwards reachable by type-tracking. */
68+ pragma [ nomagic]
69+ private DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
70+ result = Input:: start ( t )
71+ or
72+ exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
73+ or
74+ exists ( TypeTracker t2 | t2 = t .continue ( ) | Input:: additionalStep ( forward ( t2 ) , result ) )
75+ }
76+
77+ bindingset [ result , tbt]
78+ pragma [ inline_late]
79+ pragma [ noopt]
80+ private DataFlow:: LocalSourceNode forwardLateInline ( TypeBackTracker tbt ) {
81+ exists ( TypeTracker tt |
82+ result = forward ( tt ) and
83+ tt = tbt .getACompatibleTypeTracker ( )
84+ )
85+ }
86+
87+ /** Gets a node that is backwards reachable by type-tracking. */
88+ pragma [ nomagic]
89+ private DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
90+ result = forwardLateInline ( t ) and
91+ (
92+ t .start ( ) and
93+ result .flowsTo ( Input:: end ( ) )
94+ or
95+ exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
96+ or
97+ exists ( TypeBackTracker t2 | t2 = t .continue ( ) | Input:: additionalStep ( result , backwards ( t2 ) ) )
98+ )
99+ }
100+
101+ bindingset [ result , tt]
102+ pragma [ inline_late]
103+ pragma [ noopt]
104+ private DataFlow:: LocalSourceNode backwardsInlineLate ( TypeTracker tt ) {
105+ exists ( TypeBackTracker tbt |
106+ result = backwards ( tbt ) and
107+ tt = tbt .getACompatibleTypeTracker ( )
108+ )
109+ }
110+
111+ pragma [ nomagic]
112+ predicate reached ( DataFlow:: LocalSourceNode n , TypeTracker t ) {
113+ n = forward ( t ) and
114+ n = backwardsInlineLate ( t )
115+ }
116+
117+ pragma [ nomagic]
118+ TypeTracker stepReached (
119+ TypeTracker t , DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo
120+ ) {
121+ exists ( StepSummary summary |
122+ StepSummary:: step ( nodeFrom , nodeTo , summary ) and
123+ reached ( nodeFrom , t ) and
124+ reached ( nodeTo , result ) and
125+ result = t .append ( summary )
126+ )
127+ or
128+ Input:: additionalStep ( nodeFrom , nodeTo ) and
129+ reached ( nodeFrom , pragma [ only_bind_into ] ( t ) ) and
130+ reached ( nodeTo , pragma [ only_bind_into ] ( t ) ) and
131+ result = t .continue ( )
132+ }
133+ }
134+
135+ pragma [ nomagic]
136+ private predicate regFromString ( DataFlow:: LocalSourceNode n , DataFlow:: CallNode call ) {
137+ exists ( DataFlow:: Node mid |
138+ n .flowsTo ( mid ) and
139+ call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
140+ mid = call .getArgument ( 0 )
141+ )
142+ }
143+
144+ private module StringReachInput implements ReachInputSig {
145+ DataFlow:: LocalSourceNode start ( TypeTracker t ) { result = strStart ( ) and t .start ( ) }
146+
147+ DataFlow:: Node end ( ) {
148+ result = stringSink ( ) or
149+ regFromString ( result , _)
150+ }
151+
152+ predicate additionalStep ( DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo ) {
153+ exists ( DataFlow:: Node mid | nodeFrom .flowsTo ( mid ) |
154+ // include taint flow through `String` summaries
155+ TaintTrackingPrivate:: summaryThroughStepTaint ( mid , nodeTo , any ( String:: SummarizedCallable c ) )
156+ or
157+ // string concatenations, and
158+ exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
159+ op = nodeTo .asExpr ( ) and
160+ op .getAnOperand ( ) = mid .asExpr ( ) and
161+ op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
162+ )
163+ or
164+ // string interpolations
165+ mid .asExpr ( ) = nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
166+ )
167+ }
116168}
117169
170+ private module StringReach = Reach< StringReachInput > ;
171+
118172/**
119173 * Gets a node that has been tracked from the string constant `start` to some node.
120174 * This is used to figure out where `start` is evaluated as a regular expression against an input string,
121175 * or where `start` is compiled into a regular expression.
122176 */
123177private DataFlow:: LocalSourceNode trackStrings ( DataFlow:: Node start , TypeTracker t ) {
124- result = backwards ( _) and
125- (
126- t .start ( ) and
127- start = result and
128- result = strStart ( )
129- or
130- exists ( TypeTracker t2 | result = trackStrings ( start , t2 ) .track ( t2 , t ) )
178+ t .start ( ) and
179+ start = result and
180+ result = strStart ( ) and
181+ StringReach:: reached ( result , t )
182+ or
183+ exists ( TypeTracker t2 | t = StringReach:: stepReached ( t2 , trackStrings ( start , t2 ) , result ) )
184+ }
185+
186+ pragma [ nomagic]
187+ private predicate regFromStringStart ( DataFlow:: Node start , TypeTracker t , DataFlow:: CallNode nodeTo ) {
188+ regFromString ( trackStrings ( start , t ) , nodeTo ) and
189+ exists ( t .continue ( ) )
190+ }
191+
192+ private module RegReachInput implements ReachInputSig {
193+ DataFlow:: LocalSourceNode start ( TypeTracker t ) {
194+ result = regStart ( ) and
195+ t .start ( )
131196 or
132- // an additional step from string to string
133- exists ( TypeTracker t2 | t2 = t .continue ( ) |
134- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "string" )
135- )
136- )
197+ regFromStringStart ( _, t , result )
198+ }
199+
200+ DataFlow:: Node end ( ) { result = regSink ( ) }
201+
202+ predicate additionalStep ( DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo ) {
203+ none ( )
204+ }
137205}
138206
207+ private module RegReach = Reach< RegReachInput > ;
208+
139209/**
140210 * Gets a node that has been tracked from the regular expression `start` to some node.
141211 * This is used to figure out where `start` is executed against an input string.
142212 */
143213private DataFlow:: LocalSourceNode trackRegs ( DataFlow:: Node start , TypeTracker t ) {
144- result = backwards ( _ ) and
214+ RegReach :: reached ( result , t ) and
145215 (
146216 t .start ( ) and
147217 start = result and
148218 result = regStart ( )
149219 or
150- exists ( TypeTracker t2 | result = trackRegs ( start , t2 ) .track ( t2 , t ) )
151- or
152- // an additional step where a string is converted to a regular expression
153- exists ( TypeTracker t2 | t2 = t .continue ( ) |
154- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "reg" )
155- )
220+ regFromStringStart ( start , t , result )
156221 )
222+ or
223+ exists ( TypeTracker t2 | t = RegReach:: stepReached ( t2 , trackRegs ( start , t2 ) , result ) )
157224}
158225
159226/** Gets a node that references a regular expression. */
0 commit comments