Skip to content

Commit 3278793

Browse files
committed
Python: Handle more functions and kw-args
1 parent 2451123 commit 3278793

File tree

6 files changed

+114
-16
lines changed

6 files changed

+114
-16
lines changed

python/ql/src/experimental/semmle/python/frameworks/Xml.qll

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,21 @@ private module Xml {
5353
API::moduleImport("xml")
5454
.getMember("etree")
5555
.getMember("ElementTree")
56-
.getMember(["fromstring", "fromstringlist", "XML", "parse"])
56+
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
5757
.getACall()
5858
}
5959

60-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
60+
override DataFlow::Node getAnInput() {
61+
result in [
62+
this.getArg(0),
63+
// fromstring / XML / XMLID
64+
this.getArgByName("text"),
65+
// fromstringlist
66+
this.getArgByName("sequence"),
67+
// parse / iterparse
68+
this.getArgByName("source"),
69+
]
70+
}
6171

6272
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
6373
not exists(this.getArgByName("parser")) and
@@ -163,8 +173,8 @@ private module Xml {
163173
* parsed_xml = BadHandler._result
164174
* ```
165175
*/
166-
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
167-
XMLSaxParsing() {
176+
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
177+
XMLSaxInstanceParsing() {
168178
this =
169179
API::moduleImport("xml")
170180
.getMember("sax")
@@ -174,7 +184,40 @@ private module Xml {
174184
.getACall()
175185
}
176186

177-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
187+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
188+
189+
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
190+
// always vuln to these
191+
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
192+
or
193+
// can be vuln to other things if features has been turned on
194+
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
195+
(kind.isXxe() or kind.isDtdRetrieval())
196+
}
197+
}
198+
199+
/**
200+
* A call to either `parse` or `parseString` from `xml.sax` module.
201+
*
202+
* See:
203+
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
204+
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
205+
*/
206+
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
207+
XMLSaxParsing() {
208+
this =
209+
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
210+
}
211+
212+
override DataFlow::Node getAnInput() {
213+
result in [
214+
this.getArg(0),
215+
// parseString
216+
this.getArgByName("string"),
217+
// parse
218+
this.getArgByName("source"),
219+
]
220+
}
178221

179222
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
180223
// always vuln to these
@@ -262,11 +305,21 @@ private module Xml {
262305
this =
263306
API::moduleImport("lxml")
264307
.getMember("etree")
265-
.getMember(["fromstring", "fromstringlist", "XML", "parse"])
308+
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
266309
.getACall()
267310
}
268311

269-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
312+
override DataFlow::Node getAnInput() {
313+
result in [
314+
this.getArg(0),
315+
// fromstring / XML
316+
this.getArgByName("text"),
317+
// fromstringlist
318+
this.getArgByName("strings"),
319+
// parse / parseid
320+
this.getArgByName("source"),
321+
]
322+
}
270323

271324
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
272325
exists(XML::XMLParser xmlParser |
@@ -293,7 +346,9 @@ private module Xml {
293346
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
294347
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
295348

296-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
349+
override DataFlow::Node getAnInput() {
350+
result in [this.getArg(0), this.getArgByName("xml_input")]
351+
}
297352

298353
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
299354
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
@@ -317,7 +372,15 @@ private module Xml {
317372
}
318373

319374
override DataFlow::Node getAnInput() {
320-
result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")]
375+
result in [
376+
this.getArg(0),
377+
// parseString
378+
this.getArgByName("string"),
379+
// minidom.parse
380+
this.getArgByName("file"),
381+
// pulldom.parse
382+
this.getArgByName("stream_or_string"),
383+
]
321384
}
322385

323386
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }

python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,19 @@
55

66
# different parsing methods
77
lxml.etree.fromstring(x) # $ input=x vuln='XXE'
8+
lxml.etree.fromstring(text=x) # $ input=x vuln='XXE'
89

910
lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE'
11+
lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE'
1012

1113
lxml.etree.XML(x) # $ input=x vuln='XXE'
14+
lxml.etree.XML(text=x) # $ input=x vuln='XXE'
1215

13-
lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE'
16+
lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
17+
lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
18+
19+
lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
20+
lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
1421

1522
# With default parsers (nothing changed)
1623
parser = lxml.etree.XMLParser()

python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,25 @@
77

88
# minidom
99
xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
10+
xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
11+
1012
xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
13+
xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
14+
1115

1216
# pulldom
1317
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
18+
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
19+
1420
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
21+
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
22+
1523

1624
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
1725
parser = xml.sax.make_parser()
1826
parser.setFeature(xml.sax.handler.feature_external_ges, True)
27+
xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
1928
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
29+
30+
xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
31+
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'

python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,23 @@
55

66
# Parsing in different ways
77
xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
8-
xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
8+
xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
9+
10+
xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
11+
xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
12+
913
xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
14+
xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
15+
16+
xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
17+
xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
18+
1019
xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
20+
xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
21+
22+
xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
23+
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
24+
1125

1226
# With parsers (no options available to disable/enable security features)
1327
parser = xml.etree.ElementTree.XMLParser()

python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@ def __init__(self):
1010
def characters(self, data):
1111
self._result.append(data)
1212

13-
def parse(self, f):
14-
xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup'
15-
self._result
13+
xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
14+
xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
1615

17-
MainHandler().parse(StringIO(x))
16+
xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
17+
xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
1818

1919
parser = xml.sax.make_parser()
2020
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
21+
parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
2122

2223
# You can make it vuln to both XXE and DTD retrieval by setting this flag
2324
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
@@ -30,7 +31,6 @@ def parse(self, f):
3031
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
3132

3233
# Forward Type Tracking test
33-
3434
def func(cond):
3535
parser = xml.sax.make_parser()
3636
if cond:

python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@
33
x = "some xml"
44

55
xmltodict.parse(x) # $ input=x
6+
xmltodict.parse(xml_input=x) # $ input=x
7+
68
xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'

0 commit comments

Comments
 (0)