@@ -1495,162 +1495,158 @@ private module StdlibPrivate {
14951495 result = this .getArg ( any ( int i | i >= msgIndex ) )
14961496 }
14971497 }
1498- }
14991498
1500- // ---------------------------------------------------------------------------
1501- // re
1502- // ---------------------------------------------------------------------------
1503- /**
1504- * List of methods in the `re` module immediately executing a regular expression.
1505- *
1506- * See https://docs.python.org/3/library/re.html#module-contents
1507- */
1508- private class RegexExecutionMethod extends string {
1509- RegexExecutionMethod ( ) {
1510- this in [ "match" , "fullmatch" , "search" , "split" , "findall" , "finditer" , "sub" , "subn" ]
1499+ // ---------------------------------------------------------------------------
1500+ // re
1501+ // ---------------------------------------------------------------------------
1502+ /**
1503+ * List of methods in the `re` module immediately executing a regular expression.
1504+ *
1505+ * See https://docs.python.org/3/library/re.html#module-contents
1506+ */
1507+ private class RegexExecutionMethod extends string {
1508+ RegexExecutionMethod ( ) {
1509+ this in [ "match" , "fullmatch" , "search" , "split" , "findall" , "finditer" , "sub" , "subn" ]
1510+ }
15111511 }
1512- }
15131512
1514- /** Gets the index of the argument representing the string to be searched by a regex. */
1515- int stringArg ( RegexExecutionMethod method ) {
1516- method in [ "match" , "fullmatch" , "search" , "split" , "findall" , "finditer" ] and
1517- result = 1
1518- or
1519- method in [ "sub" , "subn" ] and
1520- result = 2
1521- }
1513+ /** Gets the index of the argument representing the string to be searched by a regex. */
1514+ int getStringArgIndex ( RegexExecutionMethod method ) {
1515+ method in [ "match" , "fullmatch" , "search" , "split" , "findall" , "finditer" ] and
1516+ result = 1
1517+ or
1518+ method in [ "sub" , "subn" ] and
1519+ result = 2
1520+ }
15221521
1523- /**
1524- * A a call to a method from the `re` module immediately executing a regular expression.
1525- *
1526- * See `RegexExecutionMethods`
1527- */
1528- private class DirectRegexExecution extends DataFlow:: CallCfgNode , RegexExecution:: Range {
1529- RegexExecutionMethod method ;
1522+ /**
1523+ * A a call to a method from the `re` module immediately executing a regular expression.
1524+ *
1525+ * See `RegexExecutionMethods`
1526+ */
1527+ private class DirectRegexExecution extends DataFlow:: CallCfgNode , RegexExecution:: Range {
1528+ RegexExecutionMethod method ;
15301529
1531- DirectRegex ( ) { this = API:: moduleImport ( "re" ) .getMember ( method ) .getACall ( ) }
1530+ DirectRegexExecution ( ) { this = API:: moduleImport ( "re" ) .getMember ( method ) .getACall ( ) }
15321531
1533- override DataFlow:: Node getRegexNode ( ) {
1534- result in [ this .getArg ( 0 ) , this .getArgByName ( "pattern" ) ]
1535- }
1532+ override DataFlow:: Node getRegex ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "pattern" ) ] }
15361533
1537- override DataFlow:: Node getString ( ) {
1538- result in [ this .getArg ( stringArg ( method ) ) , this .getArgByName ( "string" ) ]
1534+ override DataFlow:: Node getString ( ) {
1535+ result in [ this .getArg ( getStringArgIndex ( method ) ) , this .getArgByName ( "string" ) ]
1536+ }
1537+
1538+ override string getName ( ) { result = "re." + method }
15391539 }
15401540
1541- override string getName ( ) { result = "re." + method }
1542- }
1541+ /** Helper module for tracking compiled regexes. */
1542+ private module CompiledRegexes {
1543+ private import semmle.python.dataflow.new.DataFlow4
1544+ private import semmle.python.RegexTreeView
15431545
1544- /** Helper module for tracking compiled regexes. */
1545- private module CompiledRegexes {
1546- private import semmle.python.dataflow.new.DataFlow4
1547- private import semmle.python.RegexTreeView
1546+ // TODO: This module should be refactored once API graphs are more expressive.
1547+ // For now it uses data flow, so we pick the verion with least change of collision (4) .
1548+ /** A configuration for finding uses of compiled regexes. */
1549+ class RegexDefinitionConfiguration extends DataFlow4:: Configuration {
1550+ RegexDefinitionConfiguration ( ) { this = "RegexDefinitionConfiguration" }
15481551
1549- // TODO: This module should be refactored once API graphs are more expressive.
1550- // For now it uses data flow, so we pick the verion with least change of collision (4) .
1551- /** A configuration for finding uses of compiled regexes. */
1552- class RegexDefinitionConfiguration extends DataFlow4:: Configuration {
1553- RegexDefinitionConfiguration ( ) { this = "RegexDefinitionConfiguration" }
1552+ override predicate isSource ( DataFlow:: Node source ) { source instanceof RegexDefinitonSource }
15541553
1555- override predicate isSource ( DataFlow:: Node source ) { source instanceof RegexDefinitonSource }
1554+ override predicate isSink ( DataFlow:: Node sink ) { sink instanceof RegexDefinitionSink }
1555+ }
15561556
1557- override predicate isSink ( DataFlow:: Node sink ) { sink instanceof RegexDefinitionSink }
1558- }
1557+ /** A regex compilation. */
1558+ class RegexDefinitonSource extends DataFlow:: CallCfgNode {
1559+ DataFlow:: Node regexNode ;
15591560
1560- /** A regex compilation. */
1561- class RegexDefinitonSource extends DataFlow:: CallCfgNode {
1562- DataFlow:: Node regexNode ;
1561+ RegexDefinitonSource ( ) {
1562+ this = API:: moduleImport ( "re" ) .getMember ( "compile" ) .getACall ( ) and
1563+ regexNode in [ this .getArg ( 0 ) , this .getArgByName ( "pattern" ) ]
1564+ }
15631565
1564- RegexDefinitonSource ( ) {
1565- this = API:: moduleImport ( "re" ) .getMember ( "compile" ) .getACall ( ) and
1566- regexNode in [ this .getArg ( 0 ) , this .getArgByName ( "pattern" ) ]
1566+ /** Gets the data flow node for the regex being compiled by this node. */
1567+ DataFlow:: Node getRegexNode ( ) { result = regexNode }
15671568 }
15681569
1569- /** Gets the data flow node for the regex being compiled by this node. */
1570- DataFlow:: Node getRegexNode ( ) { result = regexNode }
1571- }
1572-
1573- /** A use of a compiled regex. */
1574- class RegexDefinitionSink extends DataFlow:: Node {
1575- RegexExecutionMethod method ;
1576- DataFlow:: CallCfgNode executingCall ;
1570+ /** A use of a compiled regex. */
1571+ class RegexDefinitionSink extends DataFlow:: Node {
1572+ RegexExecutionMethod method ;
1573+ DataFlow:: CallCfgNode executingCall ;
15771574
1578- RegexDefinitionSink ( ) {
1579- executingCall =
1580- API:: moduleImport ( "re" ) .getMember ( "compile" ) .getReturn ( ) .getMember ( method ) .getACall ( ) and
1581- this = executingCall .getFunction ( ) .( DataFlow:: AttrRead ) .getObject ( )
1582- }
1575+ RegexDefinitionSink ( ) {
1576+ executingCall =
1577+ API:: moduleImport ( "re" ) .getMember ( "compile" ) .getReturn ( ) .getMember ( method ) .getACall ( ) and
1578+ this = executingCall .getFunction ( ) .( DataFlow:: AttrRead ) .getObject ( )
1579+ }
15831580
1584- /** Gets the method used to execute the regex. */
1585- RegexExecutionMethod getMethod ( ) { result = method }
1581+ /** Gets the method used to execute the regex. */
1582+ RegexExecutionMethod getMethod ( ) { result = method }
15861583
1587- /** Gets the data flow node for the executing call. */
1588- DataFlow:: CallCfgNode getExecutingCall ( ) { result = executingCall }
1584+ /** Gets the data flow node for the executing call. */
1585+ DataFlow:: CallCfgNode getExecutingCall ( ) { result = executingCall }
1586+ }
15891587 }
1590- }
15911588
1592- private import CompiledRegexes
1589+ private import CompiledRegexes
15931590
1594- /**
1595- * A call on compiled regular expression (obtained via `re.compile`) executing a
1596- * regular expression.
1597- *
1598- * Given the following example:
1599- *
1600- * ```py
1601- * pattern = re.compile(input)
1602- * pattern.match(s)
1603- * ```
1604- *
1605- * This class will identify that `re.compile` compiles `input` and afterwards
1606- * executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
1607- * and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument).
1608- *
1609- *
1610- * See `RegexExecutionMethods`
1611- *
1612- * See https://docs.python.org/3/library/re.html#regular-expression-objects
1613- */
1614- private class CompiledRegexExecution extends DataFlow:: CallCfgNode , RegexExecution:: Range {
1615- DataFlow:: Node regexNode ;
1616- RegexExecutionMethod method ;
1617-
1618- CompiledRegex ( ) {
1619- exists (
1620- RegexDefinitionConfiguration conf , RegexDefinitonSource source , RegexDefinitionSink sink
1621- |
1622- conf .hasFlow ( source , sink ) and
1623- regexNode = source .getRegexNode ( ) and
1624- method = sink .getMethod ( ) and
1625- this = sink .getExecutingCall ( )
1626- )
1627- }
1591+ /**
1592+ * A call on compiled regular expression (obtained via `re.compile`) executing a
1593+ * regular expression.
1594+ *
1595+ * Given the following example:
1596+ *
1597+ * ```py
1598+ * pattern = re.compile(input)
1599+ * pattern.match(s)
1600+ * ```
1601+ *
1602+ * This class will identify that `re.compile` compiles `input` and afterwards
1603+ * executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
1604+ * and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument).
1605+ *
1606+ *
1607+ * See `RegexExecutionMethods`
1608+ *
1609+ * See https://docs.python.org/3/library/re.html#regular-expression-objects
1610+ */
1611+ private class CompiledRegexExecution extends DataFlow:: CallCfgNode , RegexExecution:: Range {
1612+ DataFlow:: Node regexNode ;
1613+ RegexExecutionMethod method ;
16281614
1629- override DataFlow:: Node getRegexNode ( ) { result = regexNode }
1615+ CompiledRegexExecution ( ) {
1616+ exists (
1617+ RegexDefinitionConfiguration conf , RegexDefinitonSource source , RegexDefinitionSink sink
1618+ |
1619+ conf .hasFlow ( source , sink ) and
1620+ regexNode = source .getRegexNode ( ) and
1621+ method = sink .getMethod ( ) and
1622+ this = sink .getExecutingCall ( )
1623+ )
1624+ }
16301625
1631- override DataFlow:: Node getString ( ) {
1632- result in [ this .getArg ( stringArg ( method ) - 1 ) , this .getArgByName ( "string" ) ]
1633- }
1626+ override DataFlow:: Node getRegex ( ) { result = regexNode }
16341627
1635- override string getName ( ) { result = "re." + method }
1636- }
1628+ override DataFlow:: Node getString ( ) {
1629+ result in [ this .getArg ( getStringArgIndex ( method ) - 1 ) , this .getArgByName ( "string" ) ]
1630+ }
16371631
1638- /**
1639- * A call to 're.escape'.
1640- * See https://docs.python.org/3/library/re.html#re.escape
1641- */
1642- private class ReEscapeCall extends Escaping:: Range , DataFlow:: CallCfgNode {
1643- ReEscapeCall ( ) {
1644- this = API:: moduleImport ( "re" ) .getMember ( "escape" ) .getACall ( )
1632+ override string getName ( ) { result = "re." + method }
16451633 }
16461634
1647- override DataFlow:: Node getAnInput ( ) {
1648- result in [ this .getArg ( 0 ) , this .getArgByName ( "pattern" ) ]
1649- }
1635+ /**
1636+ * A call to 're.escape'.
1637+ * See https://docs.python.org/3/library/re.html#re.escape
1638+ */
1639+ private class ReEscapeCall extends Escaping:: Range , DataFlow:: CallCfgNode {
1640+ ReEscapeCall ( ) { this = API:: moduleImport ( "re" ) .getMember ( "escape" ) .getACall ( ) }
16501641
1651- override DataFlow:: Node getOutput ( ) { result = this }
1642+ override DataFlow:: Node getAnInput ( ) {
1643+ result in [ this .getArg ( 0 ) , this .getArgByName ( "pattern" ) ]
1644+ }
1645+
1646+ override DataFlow:: Node getOutput ( ) { result = this }
16521647
1653- override string getKind ( ) { result = Escaping:: getRegexKind ( ) }
1648+ override string getKind ( ) { result = Escaping:: getRegexKind ( ) }
1649+ }
16541650}
16551651
16561652// ---------------------------------------------------------------------------
0 commit comments