@@ -293,55 +293,104 @@ private module SaxBasedParsing {
293293
294294private module Lxml {
295295 /**
296- * A call to `lxml.etree.get_default_parser`.
296+ * Provides models for `lxml.etree` parsers
297297 *
298- * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
298+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
299299 */
300- private class LXMLDefaultParser extends DataFlow:: CallCfgNode , XML:: XMLParser:: Range {
301- LXMLDefaultParser ( ) {
302- this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "get_default_parser" ) .getACall ( )
300+ module XMLParser {
301+ /**
302+ * A source of instances of `lxml.etree` parsers, extend this class to model new instances.
303+ *
304+ * This can include instantiations of the class, return values from function
305+ * calls, or a special parameter that will be set when functions are called by an external
306+ * library.
307+ *
308+ * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
309+ */
310+ abstract class InstanceSource extends DataFlow:: LocalSourceNode {
311+ /** Holds if this instance is vulnerable to `kind`. */
312+ abstract predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) ;
303313 }
304314
305- override DataFlow:: Node getAnInput ( ) { none ( ) }
315+ /**
316+ * A call to `lxml.etree.XMLParser`.
317+ *
318+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
319+ */
320+ private class LXMLParser extends InstanceSource , DataFlow:: CallCfgNode {
321+ LXMLParser ( ) {
322+ this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "XMLParser" ) .getACall ( )
323+ }
306324
307- override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
308- // as highlighted by
309- // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
310- // by default XXE is allow. so as long as the default parser has not been
311- // overridden, the result is also vuln to XXE.
312- kind .isXxe ( )
313- // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
325+ // NOTE: it's not possible to change settings of a parser after constructing it
326+ override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
327+ kind .isXxe ( ) and
328+ (
329+ // resolve_entities has default True
330+ not exists ( this .getArgByName ( "resolve_entities" ) )
331+ or
332+ this .getArgByName ( "resolve_entities" ) .getALocalSource ( ) .asExpr ( ) = any ( True t )
333+ )
334+ or
335+ ( kind .isBillionLaughs ( ) or kind .isQuadraticBlowup ( ) ) and
336+ this .getArgByName ( "huge_tree" ) .getALocalSource ( ) .asExpr ( ) = any ( True t )
337+ or
338+ kind .isDtdRetrieval ( ) and
339+ this .getArgByName ( "load_dtd" ) .getALocalSource ( ) .asExpr ( ) = any ( True t ) and
340+ this .getArgByName ( "no_network" ) .getALocalSource ( ) .asExpr ( ) = any ( False t )
341+ }
314342 }
315- }
316343
317- /**
318- * A call to `lxml.etree.XMLParser`.
319- *
320- * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
321- */
322- private class LXMLParser extends DataFlow:: CallCfgNode , XML:: XMLParser:: Range {
323- LXMLParser ( ) {
324- this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "XMLParser" ) .getACall ( )
325- }
344+ /**
345+ * A call to `lxml.etree.get_default_parser`.
346+ *
347+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
348+ */
349+ private class LXMLDefaultParser extends InstanceSource , DataFlow:: CallCfgNode {
350+ LXMLDefaultParser ( ) {
351+ this =
352+ API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "get_default_parser" ) .getACall ( )
353+ }
326354
327- override DataFlow:: Node getAnInput ( ) { none ( ) }
355+ override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
356+ // as highlighted by
357+ // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
358+ // by default XXE is allow. so as long as the default parser has not been
359+ // overridden, the result is also vuln to XXE.
360+ kind .isXxe ( )
361+ // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`.
362+ }
363+ }
328364
329- // NOTE: it's not possible to change settings of a parser after constructing it
330- override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
331- kind .isXxe ( ) and
332- (
333- // resolve_entities has default True
334- not exists ( this .getArgByName ( "resolve_entities" ) )
335- or
336- this .getArgByName ( "resolve_entities" ) .getALocalSource ( ) .asExpr ( ) = any ( True t )
337- )
338- or
339- ( kind .isBillionLaughs ( ) or kind .isQuadraticBlowup ( ) ) and
340- this .getArgByName ( "huge_tree" ) .getALocalSource ( ) .asExpr ( ) = any ( True t )
365+ /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
366+ private DataFlow:: TypeTrackingNode instance ( DataFlow:: TypeTracker t , InstanceSource origin ) {
367+ t .start ( ) and
368+ result = origin
341369 or
342- kind .isDtdRetrieval ( ) and
343- this .getArgByName ( "load_dtd" ) .getALocalSource ( ) .asExpr ( ) = any ( True t ) and
344- this .getArgByName ( "no_network" ) .getALocalSource ( ) .asExpr ( ) = any ( False t )
370+ exists ( DataFlow:: TypeTracker t2 | result = instance ( t2 , origin ) .track ( t2 , t ) )
371+ }
372+
373+ /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
374+ DataFlow:: Node instance ( InstanceSource origin ) {
375+ instance ( DataFlow:: TypeTracker:: end ( ) , origin ) .flowsTo ( result )
376+ }
377+
378+ /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
379+ DataFlow:: Node instanceVulnerableTo ( XML:: XMLVulnerabilityKind kind ) {
380+ exists ( InstanceSource origin | result = instance ( origin ) and origin .vulnerable ( kind ) )
381+ }
382+
383+ /**
384+ * A call to the `feed` method of an `lxml` parser.
385+ */
386+ private class LXMLParserFeedCall extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range {
387+ LXMLParserFeedCall ( ) { this .calls ( instance ( _) , "feed" ) }
388+
389+ override DataFlow:: Node getAnInput ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "data" ) ] }
390+
391+ override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
392+ this .calls ( instanceVulnerableTo ( kind ) , "feed" )
393+ }
345394 }
346395 }
347396
@@ -376,40 +425,13 @@ private module Lxml {
376425 ]
377426 }
378427
379- override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
380- // TODO: This should be done with type-tracking
381- exists ( XML:: XMLParser xmlParser |
382- xmlParser = this .getArgByName ( "parser" ) .getALocalSource ( ) and xmlParser .vulnerable ( kind )
383- )
384- or
385- kind .isXxe ( ) and not exists ( this .getArgByName ( "parser" ) )
386- }
387- }
388-
389- /**
390- * A call to the `feed` method of an `lxml` parser.
391- */
392- private class LXMLEtreeParserFeedCall extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range {
393- LXMLEtreeParserFeedCall ( ) {
394- exists ( API:: Node parserInstance |
395- parserInstance =
396- API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "XMLParser" ) .getReturn ( )
397- or
398- parserInstance =
399- API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "get_default_parser" ) .getReturn ( )
400- |
401- this = parserInstance .getMember ( "feed" ) .getACall ( )
402- )
403- }
404-
405- override DataFlow:: Node getAnInput ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "data" ) ] }
428+ DataFlow:: Node getParserArg ( ) { result in [ this .getArg ( 1 ) , this .getArgByName ( "parser" ) ] }
406429
407430 override predicate vulnerable ( XML:: XMLVulnerabilityKind kind ) {
408- // TODO: This should be done with type-tracking
409- exists ( XML:: XMLParser xmlParser |
410- xmlParser = this .getObject ( ) .getALocalSource ( ) and
411- xmlParser .vulnerable ( kind )
412- )
431+ this .getParserArg ( ) = XMLParser:: instanceVulnerableTo ( kind )
432+ or
433+ kind .isXxe ( ) and
434+ not exists ( this .getParserArg ( ) )
413435 }
414436 }
415437}
0 commit comments