Skip to content

Commit b4c0065

Browse files
committed
Python: Extend FileSystemAccess for xml.sax and xml.dom.* parsing
1 parent 1d7cec6 commit b4c0065

File tree

3 files changed

+90
-20
lines changed

3 files changed

+90
-20
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3442,8 +3442,11 @@ private module StdlibPrivate {
34423442

34433443
/**
34443444
* A call to the `parse` method on a SAX XML parser.
3445+
*
3446+
* See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
34453447
*/
3446-
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
3448+
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range,
3449+
FileSystemAccess::Range {
34473450
XMLSaxInstanceParsing() {
34483451
this =
34493452
API::moduleImport("xml")
@@ -3473,6 +3476,17 @@ private module StdlibPrivate {
34733476
// really give us any value, at least not as of right now).
34743477
none()
34753478
}
3479+
3480+
override DataFlow::Node getAPathArgument() {
3481+
// I considered whether we should try to reduce FPs from people passing file-like
3482+
// objects, which will not be a file system access (and couldn't cause a
3483+
// path-injection).
3484+
//
3485+
// I suppose that once we have proper flow-summary support for file-like objects,
3486+
// we can make the XXE/XML-bomb sinks allow an access-path, while the
3487+
// path-injection sink wouldn't, and then we will not end up with such FPs.
3488+
result = this.getAnInput()
3489+
}
34763490
}
34773491

34783492
/**
@@ -3513,13 +3527,40 @@ private module StdlibPrivate {
35133527
}
35143528
}
35153529

3530+
/**
3531+
* A call to `xml.sax.parse`, which takes either a filename or a file-like object as
3532+
* argument. To capture the filename for path-injection, we have this subclass.
3533+
*
3534+
* See
3535+
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
3536+
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
3537+
*/
3538+
private class FileAccessFromXMLSaxParsing extends XMLSaxParsing, FileSystemAccess::Range {
3539+
FileAccessFromXMLSaxParsing() {
3540+
this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall()
3541+
// I considered whether we should try to reduce FPs from people passing file-like
3542+
// objects, which will not be a file system access (and couldn't cause a
3543+
// path-injection).
3544+
//
3545+
// I suppose that once we have proper flow-summary support for file-like objects,
3546+
// we can make the XXE/XML-bomb sinks allow an access-path, while the
3547+
// path-injection sink wouldn't, and then we will not end up with such FPs.
3548+
}
3549+
3550+
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
3551+
}
3552+
35163553
// ---------------------------------------------------------------------------
35173554
// xml.dom.*
35183555
// ---------------------------------------------------------------------------
35193556
/**
35203557
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
35213558
*
35223559
* Both of these modules are based on SAX parsers.
3560+
*
3561+
* See
3562+
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
3563+
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
35233564
*/
35243565
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
35253566
XMLDomParsing() {
@@ -3556,6 +3597,35 @@ private module StdlibPrivate {
35563597

35573598
override DataFlow::Node getOutput() { result = this }
35583599
}
3600+
3601+
/**
3602+
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or
3603+
* `xml.dom.pulldom`, which takes either a filename or a file-like object as argument.
3604+
* To capture the filename for path-injection, we have this subclass.
3605+
*
3606+
* See
3607+
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
3608+
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
3609+
*/
3610+
private class FileAccessFromXMLDomParsing extends XMLDomParsing, FileSystemAccess::Range {
3611+
FileAccessFromXMLDomParsing() {
3612+
this =
3613+
API::moduleImport("xml")
3614+
.getMember("dom")
3615+
.getMember(["minidom", "pulldom"])
3616+
.getMember("parse")
3617+
.getACall()
3618+
// I considered whether we should try to reduce FPs from people passing file-like
3619+
// objects, which will not be a file system access (and couldn't cause a
3620+
// path-injection).
3621+
//
3622+
// I suppose that once we have proper flow-summary support for file-like objects,
3623+
// we can make the XXE/XML-bomb sinks allow an access-path, while the
3624+
// path-injection sink wouldn't, and then we will not end up with such FPs.
3625+
}
3626+
3627+
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
3628+
}
35593629
}
35603630

35613631
// ---------------------------------------------------------------------------

python/ql/test/library-tests/frameworks/stdlib/xml_dom.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
x = "some xml"
77

88
# minidom
9-
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
10-
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
9+
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
10+
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
1111

1212
xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
1313
xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
1414

1515

1616
# pulldom
17-
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
18-
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
17+
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
18+
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
1919

2020
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
2121
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
@@ -24,8 +24,8 @@
2424
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
2525
parser = xml.sax.make_parser()
2626
parser.setFeature(xml.sax.handler.feature_external_ges, True)
27-
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
28-
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
27+
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
28+
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
2929

30-
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
31-
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
30+
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
31+
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)

python/ql/test/library-tests/frameworks/stdlib/xml_sax.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,49 +10,49 @@ def __init__(self):
1010
def characters(self, data):
1111
self._result.append(data)
1212

13-
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
14-
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
13+
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
14+
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
1515

1616
xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
1717
xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
1818

1919
parser = xml.sax.make_parser()
20-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
21-
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
20+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
21+
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
2222

2323
# You can make it vuln to both XXE and DTD retrieval by setting this flag
2424
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
2525
parser = xml.sax.make_parser()
2626
parser.setFeature(xml.sax.handler.feature_external_ges, True)
27-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
27+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
2828

2929
parser = xml.sax.make_parser()
3030
parser.setFeature(xml.sax.handler.feature_external_ges, False)
31-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
31+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
3232

3333
# Forward Type Tracking test
3434
def func(cond):
3535
parser = xml.sax.make_parser()
3636
if cond:
3737
parser.setFeature(xml.sax.handler.feature_external_ges, True)
38-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
38+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
3939
else:
40-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
40+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
4141

4242
# make it vuln, then making it safe
4343
# a bit of an edge-case, but is nice to be able to handle.
4444
parser = xml.sax.make_parser()
4545
parser.setFeature(xml.sax.handler.feature_external_ges, True)
4646
parser.setFeature(xml.sax.handler.feature_external_ges, False)
47-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
47+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
4848

4949
def check_conditional_assignment(cond):
5050
parser = xml.sax.make_parser()
5151
if cond:
5252
parser.setFeature(xml.sax.handler.feature_external_ges, True)
5353
else:
5454
parser.setFeature(xml.sax.handler.feature_external_ges, False)
55-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
55+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
5656

5757
def check_conditional_assignment2(cond):
5858
parser = xml.sax.make_parser()
@@ -61,4 +61,4 @@ def check_conditional_assignment2(cond):
6161
else:
6262
flag_value = False
6363
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
64-
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
64+
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)

0 commit comments

Comments
 (0)