@@ -227,6 +227,7 @@ private module Cached {
227227 } or
228228 TSelfParameterNode ( MethodBase m ) or
229229 TBlockParameterNode ( MethodBase m ) or
230+ TSynthHashSplatParameterNode ( MethodBase m ) { m .getAParameter ( ) instanceof KeywordParameter } or
230231 TExprPostUpdateNode ( CfgNodes:: ExprCfgNode n ) {
231232 n instanceof Argument or
232233 n = any ( CfgNodes:: ExprNodes:: InstanceVariableAccessCfgNode v ) .getReceiver ( )
@@ -240,12 +241,13 @@ private module Cached {
240241 TSummaryParameterNode ( FlowSummaryImpl:: Public:: SummarizedCallable c , ParameterPosition pos ) {
241242 FlowSummaryImpl:: Private:: summaryParameterNodeRange ( c , pos )
242243 } or
243- THashSplatArgumentsNode ( CfgNodes:: ExprNodes:: CallCfgNode c ) {
244+ TSynthHashSplatArgumentsNode ( CfgNodes:: ExprNodes:: CallCfgNode c ) {
244245 exists ( Argument arg | arg .isArgumentOf ( c , any ( ArgumentPosition pos | pos .isKeyword ( _) ) ) )
245246 }
246247
247248 class TParameterNode =
248- TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode ;
249+ TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or
250+ TSynthHashSplatParameterNode or TSummaryParameterNode ;
249251
250252 private predicate defaultValueFlow ( NamedParameter p , ExprNode e ) {
251253 p .( OptionalParameter ) .getDefaultValue ( ) = e .getExprNode ( ) .getExpr ( )
@@ -328,18 +330,21 @@ private module Cached {
328330
329331 cached
330332 predicate isLocalSourceNode ( Node n ) {
331- n instanceof ParameterNode
332- or
333- n instanceof PostUpdateNodes:: ExprPostUpdateNode
334- or
335- // Nodes that can't be reached from another entry definition or expression.
336- not reachedFromExprOrEntrySsaDef ( n )
337- or
338- // Ensure all entry SSA definitions are local sources -- for parameters, this
339- // is needed by type tracking. Note that when the parameter has a default value,
340- // it will be reachable from an expression (the default value) and therefore
341- // won't be caught by the rule above.
342- entrySsaDefinition ( n )
333+ not n instanceof SynthHashSplatParameterNode and
334+ (
335+ n instanceof ParameterNode
336+ or
337+ n instanceof PostUpdateNodes:: ExprPostUpdateNode
338+ or
339+ // Nodes that can't be reached from another entry definition or expression.
340+ not reachedFromExprOrEntrySsaDef ( n )
341+ or
342+ // Ensure all entry SSA definitions are local sources -- for parameters, this
343+ // is needed by type tracking. Note that when the parameter has a default value,
344+ // it will be reachable from an expression (the default value) and therefore
345+ // won't be caught by the rule above.
346+ entrySsaDefinition ( n )
347+ )
343348 }
344349
345350 cached
@@ -415,7 +420,9 @@ predicate nodeIsHidden(Node n) {
415420 or
416421 n instanceof SynthReturnNode
417422 or
418- n instanceof HashSplatArgumentsNode
423+ n instanceof SynthHashSplatParameterNode
424+ or
425+ n instanceof SynthHashSplatArgumentsNode
419426}
420427
421428/** An SSA definition, viewed as a node in a data flow graph. */
@@ -570,6 +577,74 @@ private module ParameterNodes {
570577 }
571578 }
572579
580+ /**
581+ * For all methods containing keyword parameters, we construct a synthesized
582+ * (hidden) parameter node to contain all keyword arguments. This allows us
583+ * to handle cases like
584+ *
585+ * ```rb
586+ * def foo(p1:, p2:)
587+ * sink(p1)
588+ * sink(p2)
589+ * end
590+ *
591+ * args = {:p1 => taint(1), :p2 => taint(2) }
592+ * foo(**args)
593+ * ```
594+ *
595+ * by adding read steps out of the synthesized parameter node to the relevant
596+ * keyword parameters.
597+ *
598+ * Note that this will introduce a bit of redundancy in cases like
599+ *
600+ * ```rb
601+ * foo(p1: taint(1), p2: taint(2))
602+ * ```
603+ *
604+ * where direct keyword matching is possible, since we construct a synthesized hash
605+ * splat argument (`SynthHashSplatArgumentsNode`) at the call site, which means that
606+ * `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
607+ * nodes (and similarly for `p2`). However, this redunancy is OK since
608+ * (a) it means that type-tracking through keyword arguments also works in most cases,
609+ * (b) read/store steps can be avoided when direct keyword matching is possible, and
610+ * hence access path limits are not a concern, and
611+ * (c) since the synthesized nodes are hidden, the reported data-flow paths will be
612+ * collapsed anyway.
613+ */
614+ class SynthHashSplatParameterNode extends ParameterNodeImpl , TSynthHashSplatParameterNode {
615+ private MethodBase method ;
616+
617+ SynthHashSplatParameterNode ( ) { this = TSynthHashSplatParameterNode ( method ) }
618+
619+ final Callable getMethod ( ) { result = method }
620+
621+ /**
622+ * Gets a keyword parameter that will be the result of reading `c` out of this
623+ * synthesized node.
624+ */
625+ NormalParameterNode getAKeywordParameter ( ContentSet c ) {
626+ exists ( KeywordParameter p |
627+ p = result .getParameter ( ) and
628+ p = method .getAParameter ( )
629+ |
630+ c = getKeywordContent ( p .getName ( ) ) or
631+ c .isSingleton ( TUnknownElementContent ( ) )
632+ )
633+ }
634+
635+ override Parameter getParameter ( ) { none ( ) }
636+
637+ override predicate isSourceParameterOf ( Callable c , ParameterPosition pos ) {
638+ c = method and pos .isHashSplat ( )
639+ }
640+
641+ override CfgScope getCfgScope ( ) { result = method }
642+
643+ override Location getLocationImpl ( ) { result = method .getLocation ( ) }
644+
645+ override string toStringImpl ( ) { result = "**kwargs" }
646+ }
647+
573648 /** A parameter for a library callable with a flow summary. */
574649 class SummaryParameterNode extends ParameterNodeImpl , TSummaryParameterNode {
575650 private FlowSummaryImpl:: Public:: SummarizedCallable sc ;
@@ -689,10 +764,10 @@ private module ArgumentNodes {
689764 * part of the method signature, such that those cannot end up in the hash-splat
690765 * parameter.
691766 */
692- class HashSplatArgumentsNode extends ArgumentNode , THashSplatArgumentsNode {
767+ class SynthHashSplatArgumentsNode extends ArgumentNode , TSynthHashSplatArgumentsNode {
693768 CfgNodes:: ExprNodes:: CallCfgNode c ;
694769
695- HashSplatArgumentsNode ( ) { this = THashSplatArgumentsNode ( c ) }
770+ SynthHashSplatArgumentsNode ( ) { this = TSynthHashSplatArgumentsNode ( c ) }
696771
697772 override predicate argumentOf ( DataFlowCall call , ArgumentPosition pos ) {
698773 this .sourceArgumentOf ( call .asCall ( ) , pos )
@@ -704,10 +779,10 @@ private module ArgumentNodes {
704779 }
705780 }
706781
707- private class HashSplatArgumentsNodeImpl extends NodeImpl , THashSplatArgumentsNode {
782+ private class SynthHashSplatArgumentsNodeImpl extends NodeImpl , TSynthHashSplatArgumentsNode {
708783 CfgNodes:: ExprNodes:: CallCfgNode c ;
709784
710- HashSplatArgumentsNodeImpl ( ) { this = THashSplatArgumentsNode ( c ) }
785+ SynthHashSplatArgumentsNodeImpl ( ) { this = TSynthHashSplatArgumentsNode ( c ) }
711786
712787 override CfgScope getCfgScope ( ) { result = c .getExpr ( ) .getCfgScope ( ) }
713788
@@ -929,7 +1004,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
9291004 or
9301005 // Wrap all keyword arguments in a synthesized hash-splat argument node
9311006 exists ( CfgNodes:: ExprNodes:: CallCfgNode call , ArgumentPosition keywordPos , string name |
932- node2 = THashSplatArgumentsNode ( call ) and
1007+ node2 = TSynthHashSplatArgumentsNode ( call ) and
9331008 node1 .asExpr ( ) .( Argument ) .isArgumentOf ( call , keywordPos ) and
9341009 keywordPos .isKeyword ( name ) and
9351010 c = getKeywordContent ( name )
@@ -962,6 +1037,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
9621037 ) )
9631038 )
9641039 or
1040+ node2 = node1 .( SynthHashSplatParameterNode ) .getAKeywordParameter ( c )
1041+ or
9651042 FlowSummaryImpl:: Private:: Steps:: summaryReadStep ( node1 , c , node2 )
9661043}
9671044
0 commit comments