|
| 1 | +/** |
| 2 | + * Definitions for reasoning about untrusted data used in APIs defined outside the |
| 3 | + * database. |
| 4 | + */ |
| 5 | + |
| 6 | +import python |
| 7 | +import semmle.python.dataflow.new.DataFlow |
| 8 | +import semmle.python.dataflow.new.TaintTracking |
| 9 | +import semmle.python.Concepts |
| 10 | +import semmle.python.dataflow.new.RemoteFlowSources |
| 11 | +private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate |
| 12 | +private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate |
| 13 | +private import semmle.python.types.Builtins |
| 14 | +private import semmle.python.objects.ObjectInternal |
| 15 | + |
| 16 | +// IMPLEMENTATION NOTES: |
| 17 | +// |
| 18 | +// This query uses *both* the new data-flow library, and points-to. Why? To get this |
| 19 | +// finished quickly, so it can provide value for our field team and ourselves. |
| 20 | +// |
| 21 | +// In the long run, it should not need to use points-to for anything. Possibly this can |
| 22 | +// even be helpful in figuring out what we need from TypeTrackers and the new data-flow |
| 23 | +// library to be fully operational. |
| 24 | +// |
| 25 | +// At least it will allow us to provide a baseline comparison against a solution that |
| 26 | +// doesn't use points-to at all |
| 27 | +// |
| 28 | +// There is a few dirty things we do here: |
| 29 | +// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed |
| 30 | +// publicly, but we really want access to them. |
| 31 | +// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and |
| 32 | +// `DataFlowCallable` |
| 33 | +// 3. ObjectInternal: to provide better names for built-in functions and methods. If we |
| 34 | +// really wanted to polish our points-to implementation, we could move this |
| 35 | +// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will |
| 36 | +// probably require some more work: for this query, it's totally ok to use |
| 37 | +// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to |
| 38 | +// figure out if that is desireable in general. I simply skipped a corner here! |
| 39 | +// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :( |
| 40 | +/** |
| 41 | + * A callable that is considered a "safe" external API from a security perspective. |
| 42 | + */ |
| 43 | +class SafeExternalAPI extends Unit { |
| 44 | + abstract DataFlowPrivate::DataFlowCallable getSafeCallable(); |
| 45 | +} |
| 46 | + |
| 47 | +/** The default set of "safe" external APIs. */ |
| 48 | +private class DefaultSafeExternalAPI extends SafeExternalAPI { |
| 49 | + override DataFlowPrivate::DataFlowCallable getSafeCallable() { |
| 50 | + exists(CallableValue cv | cv = result.getCallableValue() | |
| 51 | + cv = Value::named(["len", "isinstance", "getattr", "hasattr"]) |
| 52 | + or |
| 53 | + exists(ClassValue cls, string attr | |
| 54 | + cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"] |
| 55 | + | |
| 56 | + cls.lookup(attr) = cv |
| 57 | + ) |
| 58 | + ) |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +/** A node representing data being passed to an external API through a call. */ |
| 63 | +class ExternalAPIDataNode extends DataFlow::Node { |
| 64 | + DataFlowPrivate::DataFlowCall call; |
| 65 | + DataFlowPrivate::DataFlowCallable callable; |
| 66 | + int i; |
| 67 | + |
| 68 | + ExternalAPIDataNode() { |
| 69 | + exists(call.getLocation().getFile().getRelativePath()) and |
| 70 | + callable = call.getCallable() and |
| 71 | + not any(SafeExternalAPI safe).getSafeCallable() = callable and |
| 72 | + exists(Value cv | cv = callable.getCallableValue() | |
| 73 | + cv.isAbsent() |
| 74 | + or |
| 75 | + cv.isBuiltin() |
| 76 | + or |
| 77 | + cv.(CallableValue).getScope().getLocation().getFile().inStdlib() |
| 78 | + or |
| 79 | + not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath()) |
| 80 | + ) and |
| 81 | + // TODO: this ignores some complexity of keyword arguments (especially keyword-only args) |
| 82 | + this = call.getArg(i) and |
| 83 | + // Not already modeled as a taint step |
| 84 | + not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and |
| 85 | + // for `list.append(x)`, we have a additional taint step from x -> [post] list. |
| 86 | + // Since we have modeled this explicitly, I don't see any cases where we would want to report this. |
| 87 | + not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post | |
| 88 | + post.getPreUpdateNode() = this and |
| 89 | + TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post) |
| 90 | + ) |
| 91 | + } |
| 92 | + |
| 93 | + /** Gets the index for the parameter that will receive this untrusted data */ |
| 94 | + int getIndex() { result = i } |
| 95 | + |
| 96 | + /** Gets the callable to which this argument is passed. */ |
| 97 | + DataFlowPrivate::DataFlowCallable getCallable() { result = callable } |
| 98 | +} |
| 99 | + |
| 100 | +/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */ |
| 101 | +class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration { |
| 102 | + UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" } |
| 103 | + |
| 104 | + override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } |
| 105 | + |
| 106 | + override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode } |
| 107 | +} |
| 108 | + |
| 109 | +/** A node representing untrusted data being passed to an external API. */ |
| 110 | +class UntrustedExternalAPIDataNode extends ExternalAPIDataNode { |
| 111 | + UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) } |
| 112 | + |
| 113 | + /** Gets a source of untrusted data which is passed to this external API data node. */ |
| 114 | + DataFlow::Node getAnUntrustedSource() { |
| 115 | + any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this) |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +private newtype TExternalAPI = |
| 120 | + TExternalAPIParameter(DataFlowPrivate::DataFlowCallable callable, int index) { |
| 121 | + exists(UntrustedExternalAPIDataNode n | |
| 122 | + callable = n.getCallable() and |
| 123 | + index = n.getIndex() |
| 124 | + ) |
| 125 | + } |
| 126 | + |
| 127 | +/** An external API which is used with untrusted data. */ |
| 128 | +class ExternalAPIUsedWithUntrustedData extends TExternalAPI { |
| 129 | + /** Gets a possibly untrusted use of this external API. */ |
| 130 | + UntrustedExternalAPIDataNode getUntrustedDataNode() { |
| 131 | + this = TExternalAPIParameter(result.getCallable(), result.getIndex()) |
| 132 | + } |
| 133 | + |
| 134 | + /** Gets the number of untrusted sources used with this external API. */ |
| 135 | + int getNumberOfUntrustedSources() { |
| 136 | + result = count(getUntrustedDataNode().getAnUntrustedSource()) |
| 137 | + } |
| 138 | + |
| 139 | + /** Gets a textual representation of this element. */ |
| 140 | + string toString() { |
| 141 | + exists( |
| 142 | + DataFlowPrivate::DataFlowCallable callable, int index, string callableString, |
| 143 | + string indexString |
| 144 | + | |
| 145 | + this = TExternalAPIParameter(callable, index) and |
| 146 | + indexString = "param " + index and |
| 147 | + exists(CallableValue cv | cv = callable.getCallableValue() | |
| 148 | + callableString = |
| 149 | + cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName() |
| 150 | + or |
| 151 | + not exists(cv.getScope()) and |
| 152 | + ( |
| 153 | + cv instanceof BuiltinFunctionValue and |
| 154 | + callableString = pretty_builtin_function_value(cv) |
| 155 | + or |
| 156 | + cv instanceof BuiltinMethodValue and |
| 157 | + callableString = pretty_builtin_method_value(cv) |
| 158 | + or |
| 159 | + not cv instanceof BuiltinFunctionValue and |
| 160 | + not cv instanceof BuiltinMethodValue and |
| 161 | + callableString = cv.toString() |
| 162 | + ) |
| 163 | + ) and |
| 164 | + result = callableString + " [" + indexString + "]" |
| 165 | + ) |
| 166 | + } |
| 167 | +} |
| 168 | + |
| 169 | +/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */ |
| 170 | +private string pretty_builtin_function_value(BuiltinFunctionValue bfv) { |
| 171 | + exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() | |
| 172 | + result = prefix_with_module_if_found(b) |
| 173 | + ) |
| 174 | +} |
| 175 | + |
| 176 | +/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */ |
| 177 | +private string pretty_builtin_method_value(BuiltinMethodValue bmv) { |
| 178 | + exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() | |
| 179 | + exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b | |
| 180 | + result = prefix_with_module_if_found(cls) + "." + b.getName() |
| 181 | + ) |
| 182 | + or |
| 183 | + not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and |
| 184 | + result = b.getName() |
| 185 | + ) |
| 186 | +} |
| 187 | + |
| 188 | +/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */ |
| 189 | +private string prefix_with_module_if_found(Builtin b) { |
| 190 | + exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b | |
| 191 | + result = mod.getName() + "." + b.getName() |
| 192 | + ) |
| 193 | + or |
| 194 | + not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and |
| 195 | + result = b.getName() |
| 196 | +} |
0 commit comments