@@ -3442,8 +3442,11 @@ private module StdlibPrivate {
34423442
34433443 /**
34443444 * A call to the `parse` method on a SAX XML parser.
3445+ *
3446+ * See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
34453447 */
3446- private class XMLSaxInstanceParsing extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range {
3448+ private class XMLSaxInstanceParsing extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range ,
3449+ FileSystemAccess:: Range {
34473450 XMLSaxInstanceParsing ( ) {
34483451 this =
34493452 API:: moduleImport ( "xml" )
@@ -3473,6 +3476,17 @@ private module StdlibPrivate {
34733476 // really give us any value, at least not as of right now).
34743477 none ( )
34753478 }
3479+
3480+ override DataFlow:: Node getAPathArgument ( ) {
3481+ // I considered whether we should try to reduce FPs from people passing file-like
3482+ // objects, which will not be a file system access (and couldn't cause a
3483+ // path-injection).
3484+ //
3485+ // I suppose that once we have proper flow-summary support for file-like objects,
3486+ // we can make the XXE/XML-bomb sinks allow an access-path, while the
3487+ // path-injection sink wouldn't, and then we will not end up with such FPs.
3488+ result = this .getAnInput ( )
3489+ }
34763490 }
34773491
34783492 /**
@@ -3513,13 +3527,40 @@ private module StdlibPrivate {
35133527 }
35143528 }
35153529
3530+ /**
3531+ * A call to `xml.sax.parse`, which takes either a filename or a file-like object as
3532+ * argument. To capture the filename for path-injection, we have this subclass.
3533+ *
3534+ * See
3535+ * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
3536+ * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
3537+ */
3538+ private class FileAccessFromXMLSaxParsing extends XMLSaxParsing , FileSystemAccess:: Range {
3539+ FileAccessFromXMLSaxParsing ( ) {
3540+ this = API:: moduleImport ( "xml" ) .getMember ( "sax" ) .getMember ( "parse" ) .getACall ( )
3541+ // I considered whether we should try to reduce FPs from people passing file-like
3542+ // objects, which will not be a file system access (and couldn't cause a
3543+ // path-injection).
3544+ //
3545+ // I suppose that once we have proper flow-summary support for file-like objects,
3546+ // we can make the XXE/XML-bomb sinks allow an access-path, while the
3547+ // path-injection sink wouldn't, and then we will not end up with such FPs.
3548+ }
3549+
3550+ override DataFlow:: Node getAPathArgument ( ) { result = this .getAnInput ( ) }
3551+ }
3552+
35163553 // ---------------------------------------------------------------------------
35173554 // xml.dom.*
35183555 // ---------------------------------------------------------------------------
35193556 /**
35203557 * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
35213558 *
35223559 * Both of these modules are based on SAX parsers.
3560+ *
3561+ * See
3562+ * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
3563+ * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
35233564 */
35243565 private class XMLDomParsing extends DataFlow:: CallCfgNode , XML:: XMLParsing:: Range {
35253566 XMLDomParsing ( ) {
@@ -3556,6 +3597,35 @@ private module StdlibPrivate {
35563597
35573598 override DataFlow:: Node getOutput ( ) { result = this }
35583599 }
3600+
3601+ /**
3602+ * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or
3603+ * `xml.dom.pulldom`, which takes either a filename or a file-like object as argument.
3604+ * To capture the filename for path-injection, we have this subclass.
3605+ *
3606+ * See
3607+ * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
3608+ * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
3609+ */
3610+ private class FileAccessFromXMLDomParsing extends XMLDomParsing , FileSystemAccess:: Range {
3611+ FileAccessFromXMLDomParsing ( ) {
3612+ this =
3613+ API:: moduleImport ( "xml" )
3614+ .getMember ( "dom" )
3615+ .getMember ( [ "minidom" , "pulldom" ] )
3616+ .getMember ( "parse" )
3617+ .getACall ( )
3618+ // I considered whether we should try to reduce FPs from people passing file-like
3619+ // objects, which will not be a file system access (and couldn't cause a
3620+ // path-injection).
3621+ //
3622+ // I suppose that once we have proper flow-summary support for file-like objects,
3623+ // we can make the XXE/XML-bomb sinks allow an access-path, while the
3624+ // path-injection sink wouldn't, and then we will not end up with such FPs.
3625+ }
3626+
3627+ override DataFlow:: Node getAPathArgument ( ) { result = this .getAnInput ( ) }
3628+ }
35593629}
35603630
35613631// ---------------------------------------------------------------------------
0 commit comments