55
66private import python
77private import semmle.python.dataflow.new.DataFlow
8- // Need to import since frameworks can extend `RemoteFlowSource ::Range`
8+ // Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource ::Range`
99private import semmle.python.Frameworks
10- private import semmle.python.Concepts
11- private import semmle.python.security.SensitiveData as OldSensitiveData
10+ private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
11+
12+ // We export these explicitly, so we don't also export the `HeuristicNames` module.
13+ class SensitiveDataClassification = SensitiveDataHeuristics:: SensitiveDataClassification ;
14+
15+ module SensitiveDataClassification = SensitiveDataHeuristics:: SensitiveDataClassification;
1216
1317/**
1418 * A data flow source of sensitive data, such as secrets, certificates, or passwords.
@@ -22,13 +26,9 @@ class SensitiveDataSource extends DataFlow::Node {
2226 SensitiveDataSource ( ) { this = range }
2327
2428 /**
25- * INTERNAL: Do not use.
26- *
27- * This will be rewritten to have better types soon, and therefore should only be used internally until then.
28- *
2929 * Gets the classification of the sensitive data.
3030 */
31- string getClassification ( ) { result = range .getClassification ( ) }
31+ SensitiveDataClassification getClassification ( ) { result = range .getClassification ( ) }
3232}
3333
3434/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
@@ -41,26 +41,225 @@ module SensitiveDataSource {
4141 */
4242 abstract class Range extends DataFlow:: Node {
4343 /**
44- * INTERNAL: Do not use.
45- *
46- * This will be rewritten to have better types soon, and therefore should only be used internally until then.
47- *
4844 * Gets the classification of the sensitive data.
4945 */
50- abstract string getClassification ( ) ;
46+ abstract SensitiveDataClassification getClassification ( ) ;
5147 }
5248}
5349
54- private class PortOfOldModeling extends SensitiveDataSource:: Range {
55- OldSensitiveData:: SensitiveData:: Source oldSensitiveSource ;
50+ /** Actual sensitive data modeling */
51+ private module SensitiveDataModeling {
52+ private import SensitiveDataHeuristics:: HeuristicNames
5653
57- PortOfOldModeling ( ) { this .asCfgNode ( ) = oldSensitiveSource }
54+ /**
55+ * Gets a reference to a function that is considered to be a sensitive source of
56+ * `classification`.
57+ */
58+ private DataFlow:: LocalSourceNode sensitiveFunction (
59+ DataFlow:: TypeTracker t , SensitiveDataClassification classification
60+ ) {
61+ t .start ( ) and
62+ exists ( Function f |
63+ nameIndicatesSensitiveData ( f .getName ( ) , classification ) and
64+ result .asExpr ( ) = f .getDefinition ( )
65+ )
66+ or
67+ exists ( DataFlow:: TypeTracker t2 | result = sensitiveFunction ( t2 , classification ) .track ( t2 , t ) )
68+ }
5869
59- override string getClassification ( ) {
60- exists ( OldSensitiveData:: SensitiveData classification |
61- oldSensitiveSource .isSourceOf ( classification )
62- |
63- classification = "sensitive.data." + result
70+ /**
71+ * Gets a reference to a function that is considered to be a sensitive source of
72+ * `classification`.
73+ */
74+ DataFlow:: Node sensitiveFunction ( SensitiveDataClassification classification ) {
75+ sensitiveFunction ( DataFlow:: TypeTracker:: end ( ) , classification ) .flowsTo ( result )
76+ }
77+
78+ /**
79+ * Gets a reference to a string constant that, if used as the key in a lookup,
80+ * indicates the presence of sensitive data with `classification`.
81+ */
82+ private DataFlow:: LocalSourceNode sensitiveLookupStringConst (
83+ DataFlow:: TypeTracker t , SensitiveDataClassification classification
84+ ) {
85+ t .start ( ) and
86+ nameIndicatesSensitiveData ( result .asExpr ( ) .( StrConst ) .getText ( ) , classification )
87+ or
88+ exists ( DataFlow:: TypeTracker t2 |
89+ result = sensitiveLookupStringConst ( t2 , classification ) .track ( t2 , t )
6490 )
6591 }
92+
93+ /**
94+ * Gets a reference to a string constant that, if used as the key in a lookup,
95+ * indicates the presence of sensitive data with `classification`.
96+ *
97+ * Also see `extraStepForCalls`.
98+ */
99+ DataFlow:: Node sensitiveLookupStringConst ( SensitiveDataClassification classification ) {
100+ sensitiveLookupStringConst ( DataFlow:: TypeTracker:: end ( ) , classification ) .flowsTo ( result )
101+ }
102+
103+ /** A function call that is considered a source of sensitive data. */
104+ class SensitiveFunctionCall extends SensitiveDataSource:: Range , DataFlow:: CallCfgNode {
105+ SensitiveDataClassification classification ;
106+
107+ SensitiveFunctionCall ( ) {
108+ this .getFunction ( ) = sensitiveFunction ( classification )
109+ or
110+ // to cover functions that we don't have the definition for, and where the
111+ // reference to the function has not already been marked as being sensitive
112+ nameIndicatesSensitiveData ( this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) , classification )
113+ }
114+
115+ override SensitiveDataClassification getClassification ( ) { result = classification }
116+ }
117+
118+ /**
119+ * Tracks any modeled source of sensitive data (with any classification),
120+ * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
121+ */
122+ private DataFlow:: LocalSourceNode possibleSensitiveCallable ( DataFlow:: TypeTracker t ) {
123+ t .start ( ) and
124+ result instanceof SensitiveDataSource
125+ or
126+ exists ( DataFlow:: TypeTracker t2 | result = possibleSensitiveCallable ( t2 ) .track ( t2 , t ) )
127+ }
128+
129+ /**
130+ * Tracks any modeled source of sensitive data (with any classification),
131+ * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
132+ */
133+ private DataFlow:: Node possibleSensitiveCallable ( ) {
134+ possibleSensitiveCallable ( DataFlow:: TypeTracker:: end ( ) ) .flowsTo ( result )
135+ }
136+
137+ /**
138+ * Holds if the step from `nodeFrom` to `nodeTo` should be considered a
139+ * taint-flow step for sensitive-data, to ensure calls are handled correctly.
140+ *
141+ * To handle calls properly, while preserving a good source for path explanations,
142+ * you need to include this predicate as an additional taint step in your taint-tracking
143+ * configurations.
144+ *
145+ * The core problem can be illustrated by the example below. If we consider the
146+ * `print` a sink, what path and what source do we want to show? My initial approach
147+ * would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
148+ * lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
149+ * like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
150+ * with a non-optimal path, which starts at _bad source_, and therefore doesn't show
151+ * how we figured out that `non_sensitive_name`
152+ * could be a function that returns a password (and in cases where there is many calls to
153+ * `my_func` it will be annoying for someone to figure this out manually).
154+ *
155+ * By including this additional taint-step in the taint-tracking configuration, it's possible
156+ * to get a path explanation going from _good source_ to the sink.
157+ *
158+ * ```python
159+ * def my_func(non_sensitive_name):
160+ * x = non_sensitive_name() # <-- bad source
161+ * print(x) # <-- sink
162+ *
163+ * import not_found
164+ * f = not_found.get_passwd # <-- good source
165+ * my_func(f)
166+ * ```
167+ */
168+ predicate extraStepForCalls ( DataFlow:: Node nodeFrom , DataFlow:: CallCfgNode nodeTo ) {
169+ // However, we do still use the type-tracking approach to limit the size of this
170+ // predicate.
171+ nodeTo .getFunction ( ) = nodeFrom and
172+ nodeFrom = possibleSensitiveCallable ( )
173+ }
174+
175+ /**
176+ * Any kind of variable assignment (also including with/for) where the name indicates
177+ * it contains sensitive data.
178+ *
179+ * Note: We _could_ make any access to a variable with a sensitive name a source of
180+ * sensitive data, but to make path explanations in data-flow/taint-tracking good,
181+ * we don't want that, since it works against allowing users to understand the flow
182+ * in the program (which is the whole point).
183+ *
184+ * Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
185+ * the variable is marked as the source (as compared to marking the variable as the
186+ * source).
187+ */
188+ class SensitiveVariableAssignment extends SensitiveDataSource:: Range {
189+ SensitiveDataClassification classification ;
190+
191+ SensitiveVariableAssignment ( ) {
192+ exists ( DefinitionNode def |
193+ nameIndicatesSensitiveData ( def .( NameNode ) .getId ( ) , classification ) and
194+ (
195+ this .asCfgNode ( ) = def .getValue ( )
196+ or
197+ this .asCfgNode ( ) = def .getValue ( ) .( ForNode ) .getSequence ( )
198+ ) and
199+ not this .asExpr ( ) instanceof FunctionExpr and
200+ not this .asExpr ( ) instanceof ClassExpr
201+ )
202+ or
203+ exists ( With with |
204+ nameIndicatesSensitiveData ( with .getOptionalVars ( ) .( Name ) .getId ( ) , classification ) and
205+ this .asExpr ( ) = with .getContextExpr ( )
206+ )
207+ }
208+
209+ override SensitiveDataClassification getClassification ( ) { result = classification }
210+ }
211+
212+ /** An attribute access that is considered a source of sensitive data. */
213+ class SensitiveAttributeAccess extends SensitiveDataSource:: Range {
214+ SensitiveDataClassification classification ;
215+
216+ SensitiveAttributeAccess ( ) {
217+ // Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
218+ // I considered excluding any `from ... import something_sensitive`, but then realized that
219+ // we should flag up `form ... import password as ...` as a password
220+ nameIndicatesSensitiveData ( this .( DataFlow:: AttrRead ) .getAttributeName ( ) , classification )
221+ or
222+ // Things like `getattr(foo, <reference-to-string>)`
223+ this .( DataFlow:: AttrRead ) .getAttributeNameExpr ( ) = sensitiveLookupStringConst ( classification )
224+ }
225+
226+ override SensitiveDataClassification getClassification ( ) { result = classification }
227+ }
228+
229+ /** A subscript, where the key indicates the result will be sensitive data. */
230+ class SensitiveSubscript extends SensitiveDataSource:: Range {
231+ SensitiveDataClassification classification ;
232+
233+ SensitiveSubscript ( ) {
234+ this .asCfgNode ( ) .( SubscriptNode ) .getIndex ( ) =
235+ sensitiveLookupStringConst ( classification ) .asCfgNode ( )
236+ }
237+
238+ override SensitiveDataClassification getClassification ( ) { result = classification }
239+ }
240+
241+ /** A call to `get` on an object, where the key indicates the result will be sensitive data. */
242+ class SensitiveGetCall extends SensitiveDataSource:: Range , DataFlow:: CallCfgNode {
243+ SensitiveDataClassification classification ;
244+
245+ SensitiveGetCall ( ) {
246+ this .getFunction ( ) .asCfgNode ( ) .( AttrNode ) .getName ( ) = "get" and
247+ this .getArg ( 0 ) = sensitiveLookupStringConst ( classification )
248+ }
249+
250+ override SensitiveDataClassification getClassification ( ) { result = classification }
251+ }
252+
253+ /** A parameter where the name indicates it will receive sensitive data. */
254+ class SensitiveParameter extends SensitiveDataSource:: Range , DataFlow:: ParameterNode {
255+ SensitiveDataClassification classification ;
256+
257+ SensitiveParameter ( ) {
258+ nameIndicatesSensitiveData ( this .getParameter ( ) .getName ( ) , classification )
259+ }
260+
261+ override SensitiveDataClassification getClassification ( ) { result = classification }
262+ }
66263}
264+
265+ predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling:: extraStepForCalls / 2 ;
0 commit comments