Skip to content

Commit c0a6f9f

Browse files
committed
Python: Restructure lxml modeling
and handle parser being passed as positional argument
1 parent c0a2c25 commit c0a6f9f

File tree

2 files changed

+94
-71
lines changed
  • python/ql
    • src/experimental/semmle/python/frameworks
    • test/experimental/library-tests/frameworks/XML

2 files changed

+94
-71
lines changed

python/ql/src/experimental/semmle/python/frameworks/Xml.qll

Lines changed: 93 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -293,55 +293,104 @@ private module SaxBasedParsing {
293293

294294
private module Lxml {
295295
/**
296-
* A call to `lxml.etree.get_default_parser`.
296+
* Provides models for `lxml.etree` parsers
297297
*
298-
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
298+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
299299
*/
300-
private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
301-
LXMLDefaultParser() {
302-
this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
300+
module XMLParser {
301+
/**
302+
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
303+
*
304+
* This can include instantiations of the class, return values from function
305+
* calls, or a special parameter that will be set when functions are called by an external
306+
* library.
307+
*
308+
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
309+
*/
310+
abstract class InstanceSource extends DataFlow::LocalSourceNode {
311+
/** Holds if this instance is vulnerable to `kind`. */
312+
abstract predicate vulnerable(XML::XMLVulnerabilityKind kind);
303313
}
304314

305-
override DataFlow::Node getAnInput() { none() }
315+
/**
316+
* A call to `lxml.etree.XMLParser`.
317+
*
318+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
319+
*/
320+
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
321+
LXMLParser() {
322+
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
323+
}
306324

307-
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
308-
// as highlighted by
309-
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
310-
// by default XXE is allow. so as long as the default parser has not been
311-
// overridden, the result is also vuln to XXE.
312-
kind.isXxe()
313-
// TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
325+
// NOTE: it's not possible to change settings of a parser after constructing it
326+
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
327+
kind.isXxe() and
328+
(
329+
// resolve_entities has default True
330+
not exists(this.getArgByName("resolve_entities"))
331+
or
332+
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
333+
)
334+
or
335+
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
336+
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
337+
or
338+
kind.isDtdRetrieval() and
339+
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
340+
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
341+
}
314342
}
315-
}
316343

317-
/**
318-
* A call to `lxml.etree.XMLParser`.
319-
*
320-
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
321-
*/
322-
private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
323-
LXMLParser() {
324-
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
325-
}
344+
/**
345+
* A call to `lxml.etree.get_default_parser`.
346+
*
347+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
348+
*/
349+
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
350+
LXMLDefaultParser() {
351+
this =
352+
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
353+
}
326354

327-
override DataFlow::Node getAnInput() { none() }
355+
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
356+
// as highlighted by
357+
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
358+
// by default XXE is allow. so as long as the default parser has not been
359+
// overridden, the result is also vuln to XXE.
360+
kind.isXxe()
361+
// TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
362+
}
363+
}
328364

329-
// NOTE: it's not possible to change settings of a parser after constructing it
330-
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
331-
kind.isXxe() and
332-
(
333-
// resolve_entities has default True
334-
not exists(this.getArgByName("resolve_entities"))
335-
or
336-
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
337-
)
338-
or
339-
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
340-
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
365+
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
366+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
367+
t.start() and
368+
result = origin
341369
or
342-
kind.isDtdRetrieval() and
343-
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
344-
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
370+
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
371+
}
372+
373+
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
374+
DataFlow::Node instance(InstanceSource origin) {
375+
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
376+
}
377+
378+
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
379+
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
380+
exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind))
381+
}
382+
383+
/**
384+
* A call to the `feed` method of an `lxml` parser.
385+
*/
386+
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
387+
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
388+
389+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
390+
391+
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
392+
this.calls(instanceVulnerableTo(kind), "feed")
393+
}
345394
}
346395
}
347396

@@ -376,40 +425,13 @@ private module Lxml {
376425
]
377426
}
378427

379-
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
380-
// TODO: This should be done with type-tracking
381-
exists(XML::XMLParser xmlParser |
382-
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
383-
)
384-
or
385-
kind.isXxe() and not exists(this.getArgByName("parser"))
386-
}
387-
}
388-
389-
/**
390-
* A call to the `feed` method of an `lxml` parser.
391-
*/
392-
private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
393-
LXMLEtreeParserFeedCall() {
394-
exists(API::Node parserInstance |
395-
parserInstance =
396-
API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn()
397-
or
398-
parserInstance =
399-
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn()
400-
|
401-
this = parserInstance.getMember("feed").getACall()
402-
)
403-
}
404-
405-
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
428+
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
406429

407430
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
408-
// TODO: This should be done with type-tracking
409-
exists(XML::XMLParser xmlParser |
410-
xmlParser = this.getObject().getALocalSource() and
411-
xmlParser.vulnerable(kind)
412-
)
431+
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
432+
or
433+
kind.isXxe() and
434+
not exists(this.getParserArg())
413435
}
414436
}
415437
}

python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
# XXE-safe
3636
parser = lxml.etree.XMLParser(resolve_entities=False)
37+
lxml.etree.fromstring(x, parser) # $ input=x
3738
lxml.etree.fromstring(x, parser=parser) # $ input=x
3839

3940
# XXE-vuln

0 commit comments

Comments
 (0)