@@ -69,7 +69,7 @@ module Lxml {
6969 */
7070 class XPathCall extends XML:: XPathExecution:: Range , DataFlow:: CallCfgNode {
7171 XPathCall ( ) {
72- this . ( DataFlow :: MethodCallNode ) . calls ( [ Element:: instance ( ) , ElementTree:: instance ( ) ] , "xpath" )
72+ this = [ Element:: instance ( ) , ElementTree:: instance ( ) ] . getMember ( "xpath" ) . getACall ( )
7373 }
7474
7575 override DataFlow:: Node getXPath ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "_path" ) ] }
@@ -201,6 +201,7 @@ module Lxml {
201201 * A call to either of:
202202 * - `lxml.etree.fromstring`
203203 * - `lxml.etree.fromstringlist`
204+ * -
204205 * - `lxml.etree.XML`
205206 * - `lxml.etree.XMLID`
206207 * - `lxml.etree.parse`
@@ -209,23 +210,27 @@ module Lxml {
209210 * See
210211 * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
211212 * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist
213+ * - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.HTML
212214 * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML
213215 * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID
216+ * - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLDTDID
214217 * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
215218 * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
216219 */
217220 private class LxmlParsing extends DataFlow:: CallCfgNode , XML:: XmlParsing:: Range {
218221 string functionName ;
219222
220223 LxmlParsing ( ) {
221- functionName in [ "fromstring" , "fromstringlist" , "XML" , "XMLID" , "parse" , "parseid" ] and
224+ functionName in [
225+ "fromstring" , "fromstringlist" , "HTML" , "XML" , "XMLID" , "XMLDTDID" , "parse" , "parseid"
226+ ] and
222227 this = etreeRef ( ) .getMember ( functionName ) .getACall ( )
223228 }
224229
225230 override DataFlow:: Node getAnInput ( ) {
226231 result in [
227232 this .getArg ( 0 ) ,
228- // fromstring / XML / XMLID
233+ // fromstring / HTML / XML / XMLID / XMLDTDID
229234 this .getArgByName ( "text" ) ,
230235 // fromstringlist
231236 this .getArgByName ( "strings" ) ,
@@ -240,7 +245,8 @@ module Lxml {
240245 this .getParserArg ( ) = XmlParser:: instanceVulnerableTo ( kind )
241246 or
242247 kind .isXxe ( ) and
243- not exists ( this .getParserArg ( ) )
248+ not exists ( this .getParserArg ( ) ) and
249+ not functionName = "HTML"
244250 }
245251
246252 override predicate mayExecuteInput ( ) { none ( ) }
@@ -314,78 +320,97 @@ module Lxml {
314320 /** Provides models for instances of the `lxml.etree.Element` class. */
315321 module Element {
316322 /** Gets a reference to the `Element` class. */
317- API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( "Element" ) }
318-
319- abstract class InstanceSource extends DataFlow:: LocalSourceNode { }
323+ API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( [ "Element" , "_Element" ] ) }
320324
321- /** Gets a reference to an `lxml.etree.Element` instance. */
322- private DataFlow:: TypeTrackingNode instance ( DataFlow:: TypeTracker t ) {
323- t .start ( ) and
324- result instanceof InstanceSource
325- or
326- exists ( DataFlow:: TypeTracker t2 | result = instance ( t2 ) .track ( t2 , t ) )
325+ abstract class InstanceSource instanceof API:: Node {
326+ string toString ( ) { result = super .toString ( ) }
327327 }
328328
329329 /** Gets a reference to an `lxml.etree.Element` instance. */
330- DataFlow :: Node instance ( ) { instance ( DataFlow :: TypeTracker :: end ( ) ) . flowsTo ( result ) }
330+ API :: Node instance ( ) { result instanceof InstanceSource }
331331
332332 /** An `Element` instantiated directly. */
333333 private class ElementInstance extends InstanceSource {
334- ElementInstance ( ) { this = classRef ( ) .getACall ( ) }
334+ ElementInstance ( ) { this = classRef ( ) .getAnInstance ( ) }
335335 }
336336
337337 /** The result of a parse operation that returns an `Element`. */
338- private class ParseResult extends InstanceSource , DataFlow :: MethodCallNode {
338+ private class ParseResult extends InstanceSource {
339339 ParseResult ( ) {
340- this .calls ( XmlParser:: instance ( _) , "close" )
340+ // TODO: The XmlParser module does not currently use API graphs
341+ this =
342+ [
343+ etreeRef ( ) .getMember ( "XMLParser" ) .getAnInstance ( ) ,
344+ etreeRef ( ) .getMember ( "get_default_parser" ) .getReturn ( )
345+ ] .getMember ( "close" ) .getReturn ( )
341346 or
342347 // TODO: `XMLID` and `parseid` returns a tuple of which the first element is an `Element`
343- this = etreeRef ( ) .getMember ( [ "XML" , "HTML" , "fromstring" , "fromstringlist" ] ) .getACall ( )
348+ this = etreeRef ( ) .getMember ( [ "XML" , "HTML" , "fromstring" , "fromstringlist" ] ) .getReturn ( )
344349 }
345350 }
346351
347- /** An element index such as `etree.parse(...)[0]` */
348- private class ElementIndex extends InstanceSource , DataFlow:: Node {
349- ElementIndex ( ) { this .asExpr ( ) .( Subscript ) .getObject ( ) = instance ( ) .asExpr ( ) }
350- }
351-
352352 /** A call to a method on an `Element` that returns another `Element`. */
353- private class ElementMethod extends InstanceSource , DataFlow :: MethodCallNode {
353+ private class ElementMethod extends InstanceSource {
354354 ElementMethod ( ) {
355- // TODO: methods that return iterators of `Element`s - `findall`, `finditer`, `iter`, a few others
356- // an `Element` itself can be used as an iterator of its children.
357- this .calls ( instance ( ) , [ "find" , "getnext" , "getprevious" , "getparent" ] )
355+ // an Element is an iterator of Elements
356+ this = instance ( ) .getASubscript ( )
357+ or
358+ // methods that return an Element
359+ this = instance ( ) .getMember ( [ "find" , "getnext" , "getprevious" , "getparent" ] ) .getReturn ( )
360+ or
361+ // methods that return an iterator of Elements
362+ this =
363+ instance ( )
364+ .getMember ( [
365+ "cssselect" , "findall" , "getchildren" , "getiterator" , "iter" , "iterancestors" ,
366+ "iterdecendants" , "iterchildren" , "itersiblings" , "iterfind" , "xpath"
367+ ] )
368+ .getReturn ( )
369+ .getASubscript ( )
358370 }
359371 }
360372
361373 /** A call to a method on an `ElementTree` that returns an `Element`. */
362- private class ElementTreeMethod extends InstanceSource , DataFlow:: MethodCallNode {
363- ElementTreeMethod ( ) { this .calls ( ElementTree:: instance ( ) , "getroot" ) }
374+ private class ElementTreeMethod extends InstanceSource {
375+ ElementTreeMethod ( ) {
376+ this = ElementTree:: instance ( ) .getMember ( [ "getroot" , "find" ] ) .getReturn ( )
377+ or
378+ this =
379+ ElementTree:: instance ( )
380+ .getMember ( [ "findall" , "getiterator" , "iter" , "iterfind" , "xpath" ] )
381+ .getReturn ( )
382+ .getASubscript ( )
383+ }
364384 }
365385
366386 /** An additional taint step from an `Element` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase */
367387 private class ElementTaintStep extends TaintTracking:: AdditionalTaintStep {
368388 override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
369- exists ( DataFlow:: MethodCallNode call | nodeTo = call and nodeFrom = instance ( ) |
389+ exists ( DataFlow:: MethodCallNode call |
390+ nodeTo = call and instance ( ) .asSource ( ) .flowsTo ( nodeFrom )
391+ |
370392 call .calls ( nodeFrom ,
371- // We don't consider sibling nodes to be tainted (getnext, getprevious, itersiblings)
393+ // We consider a node to be tainted if there could be taint anywhere in the element tree
394+ // So sibling nodes (e.g. `getnext`) are also tainted
395+ // This ensures nodes like `elem[0].getnext()` are tracked.
372396 [
373397 "cssselect" , "find" , "findall" , "findtext" , "get" , "getchildren" , "getiterator" ,
374- "getparent" , "getroottree" , "items" , "iter" , "iterancestors" , "iterchildren" ,
375- "iterdescendants" , "iterfind" , "itertext" , "keys" , "values" , "xpath"
398+ "getnext" , "getparent" , "getprevious" , "getroottree" , "items" , "iter" ,
399+ "iterancestors" , "iterchildren" , "iterdescendants" , "itersiblings" , "iterfind" ,
400+ "itertext" , "keys" , "values" , "xpath"
376401 ] )
377402 )
378403 or
379- exists ( DataFlow:: AttrRead attr | nodeTo = attr and nodeFrom = instance ( ) |
380- attr .accesses ( nodeFrom , [ "attrib" , "base" , "nsmap" , "tag" , "tail" , "text" ] )
404+ exists ( DataFlow:: AttrRead attr | nodeTo = attr and instance ( ) . asSource ( ) . flowsTo ( nodeFrom ) |
405+ attr .accesses ( nodeFrom , [ "attrib" , "base" , "nsmap" , "prefix" , " tag", "tail" , "text" ] )
381406 )
382407 }
383408 }
384409 }
385410
386411 /** Provides models for instances of the `lxml.etree.ElementTree` class. */
387412 module ElementTree {
388- API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( "ElementTree" ) }
413+ API:: Node classRef ( ) { result = etreeRef ( ) .getMember ( [ "ElementTree" , "_ElementTree" ] ) }
389414
390415 /**
391416 * A source of instances of `lxml.etree.ElementTree` instances, extend this class to model new instances.
@@ -396,50 +421,42 @@ module Lxml {
396421 *
397422 * Use the predicate `ElementTree::instance()` to get references to instances of `lxml.etree.ElementTree` instances.
398423 */
399- abstract class InstanceSource extends DataFlow:: LocalSourceNode { }
400-
401- /** Gets a reference to an `lxml.etree.ElementTree` instance.` */
402- private DataFlow:: TypeTrackingNode instance ( DataFlow:: TypeTracker t ) {
403- t .start ( ) and
404- result instanceof InstanceSource
405- or
406- exists ( DataFlow:: TypeTracker t2 | result = instance ( t2 ) .track ( t2 , t ) )
424+ abstract class InstanceSource instanceof API:: Node {
425+ string toString ( ) { result = super .toString ( ) }
407426 }
408427
409428 /** Gets a reference to an `lxml.etree.ElementTree` instance. */
410- DataFlow :: Node instance ( ) { instance ( DataFlow :: TypeTracker :: end ( ) ) . flowsTo ( result ) }
429+ API :: Node instance ( ) { result instanceof InstanceSource }
411430
412431 /** An `ElementTree` instantiated directly. */
413432 private class ElementTreeInstance extends InstanceSource {
414- ElementTreeInstance ( ) { this = classRef ( ) .getACall ( ) }
433+ ElementTreeInstance ( ) { this = classRef ( ) .getAnInstance ( ) }
415434 }
416435
417436 /** The result of a parst operation that returns an `ElementTree` */
418- private class ParseResult extends InstanceSource , DataFlow :: MethodCallNode {
419- ParseResult ( ) { this = etreeRef ( ) .getMember ( "parse" ) .getACall ( ) }
437+ private class ParseResult extends InstanceSource {
438+ ParseResult ( ) { this = etreeRef ( ) .getMember ( "parse" ) .getReturn ( ) }
420439 }
421440
422441 /** A call to a method on an `Element` that returns another `Element`. */
423- private class ElementMethod extends InstanceSource , DataFlow:: MethodCallNode {
424- ElementMethod ( ) {
425- // TODO: methods that return iterators of `Element`s - `findall`, `finditer`, `iter`, a few others
426- // an `Element` itself can be used as an iterator of its children.
427- this .calls ( Element:: instance ( ) , "getroottree" )
428- }
442+ private class ElementMethod extends InstanceSource {
443+ ElementMethod ( ) { this = Element:: instance ( ) .getMember ( "getroottree" ) .getReturn ( ) }
429444 }
430445
431446 /** An additional taint step from an `ElementTree` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree._ElementTree */
432447 private class ElementTaintStep extends TaintTracking:: AdditionalTaintStep {
433448 override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
434- exists ( DataFlow:: MethodCallNode call | nodeTo = call and nodeFrom = instance ( ) |
449+ exists ( DataFlow:: MethodCallNode call |
450+ nodeTo = call and instance ( ) .asSource ( ) .flowsTo ( nodeFrom )
451+ |
435452 call .calls ( nodeFrom ,
436453 [
437454 "find" , "findall" , "findtext" , "get" , "getiterator" , "getroot" , "iter" , "iterfind" ,
438455 "xpath"
439456 ] )
440457 )
441458 or
442- exists ( DataFlow:: AttrRead attr | nodeTo = attr and nodeFrom = instance ( ) |
459+ exists ( DataFlow:: AttrRead attr | nodeTo = attr and instance ( ) . asSource ( ) . flowsTo ( nodeFrom ) |
443460 attr .accesses ( nodeFrom , "docinfo" )
444461 )
445462 }
0 commit comments