Skip to content

Commit 4a2ab49

Browse files
am0o0tausbn
authored andcommitted
better structure for pandas DataFrame, it is now much better readable and also we can find much more DataFrame objects
1 parent 8b93e81 commit 4a2ab49

File tree

1 file changed

+81
-76
lines changed

1 file changed

+81
-76
lines changed

python/ql/lib/semmle/python/frameworks/Pandas.qll

Lines changed: 81 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -35,96 +35,99 @@ private module Pandas {
3535
override string getFormat() { result = "pickle" }
3636
}
3737

38+
/**
39+
* Provides security related models for `pandas.DataFrame`.
40+
* See https://pandas.pydata.org/docs/reference/frame.html
41+
*/
3842
module DataFrame {
3943
/**
4044
* A `pandas.DataFrame` Object.
45+
*
46+
* Extend this class to model new APIs.
4147
* See https://pandas.pydata.org/docs/reference/frame.html
4248
*/
43-
abstract class Range extends API::Node {
49+
abstract class DataFrame extends API::Node {
4450
override string toString() { result = this.(API::Node).toString() }
4551
}
46-
}
4752

48-
/**
49-
* The `pandas.DataFrame` Objects including secondary `pandas.DataFrame` Objects.
50-
* Use this class where you want to find all `pandas.DataFrame` Objects.
51-
* See https://pandas.pydata.org/pandas-docs/stable/reference/frame.html
52-
*/
53-
class DataFrame extends API::Node {
54-
DataFrame() {
55-
this = any(DataFrame::Range df)
56-
or
57-
exists(API::Node dataFrame | dataFrame = any(DataFrame::Range df) |
53+
/**
54+
* A `pandas.DataFrame` instantiation.
55+
* See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
56+
*/
57+
class DataFrameConstructor extends DataFrame {
58+
DataFrameConstructor() {
59+
this = API::moduleImport("pandas").getMember("DataFrame").getReturn()
60+
}
61+
}
62+
63+
/**
64+
* The `pandas.read_*` functions that return a `pandas.DataFrame`.
65+
* See https://pandas.pydata.org/docs/reference/io.html
66+
*/
67+
class InputRead extends DataFrame {
68+
InputRead() {
5869
this =
59-
dataFrame
70+
API::moduleImport("pandas")
6071
.getMember([
61-
"copy", "from_records", "from_dict", "from_spmatrix", "assign", "select_dtypes",
62-
"set_flags", "astype", "infer_objects", "head", "xs", "get", "isin", "where",
63-
"mask", "query", "add", "mul", "truediv", "mod", "pow", "dot", "radd", "rsub",
64-
"rdiv", "rfloordiv", "rtruediv", "rpow", "lt", "gt", "le", "ne", "agg", "combine",
65-
"apply", "aggregate", "transform", "all", "any", "clip", "corr", "cov", "cummax",
66-
"cummin", "cumprod", "describe", "mode", "pct_change", "quantile", "rank",
67-
"round", "sem", "add_prefix", "add_suffix", "at_time", "between_time", "drop",
68-
"drop_duplicates", "filter", "first", "head", "idxmin", "last", "reindex",
69-
"reindex_like", "reset_index", "sample", "set_axis", "tail", "take", "truncate",
70-
"bfill", "dropna", "ffill", "fillna", "interpolate", "isna", "isnull", "notna",
71-
"notnull", "pad", "replace", "droplevel", "pivot", "pivot_table",
72-
"reorder_levels", "sort_values", "sort_index", "nlargest", "nsmallest",
73-
"swaplevel", "stack", "unstack", "isnull", "notna", "notnull", "replace",
74-
"droplevel", "pivot", "pivot_table", "reorder_levels", "sort_values",
75-
"sort_index", "nlargest", "nsmallest", "swaplevel", "stack", "unstack", "melt",
76-
"explode", "squeeze", "T", "transpose", "compare", "join", "from_spmatrix",
77-
"shift", "asof", "merge", "from_dict", "tz_convert", "to_period", "asfreq",
78-
"to_dense", "tz_localize", "box", "__dataframe__"
72+
"read_csv", "read_fwf", "read_pickle", "read_table", "read_clipboard",
73+
"read_excel", "read_xml", "read_parquet", "read_orc", "read_spss",
74+
"read_sql_table", "read_sql_query", "read_sql", "read_gbq", "read_stata"
7975
])
8076
.getReturn()
81-
)
77+
or
78+
this = API::moduleImport("pandas").getMember("read_html").getReturn().getASubscript()
79+
or
80+
exists(API::Node readSas, API::CallNode readSasCall |
81+
readSas = API::moduleImport("pandas").getMember("read_sas") and
82+
this = readSas.getReturn() and
83+
readSasCall = readSas.getACall()
84+
|
85+
// Returns DataFrame if iterator=False and chunksize=None, Also with default values it returns DataFrame.
86+
(
87+
not readSasCall.getParameter(5, "iterator").asSink().asExpr().(BooleanLiteral)
88+
instanceof True
89+
or
90+
not exists(readSasCall.getParameter(5, "iterator").asSink())
91+
) and
92+
not exists(
93+
readSasCall.getParameter(4, "chunksize").asSink().asExpr().(IntegerLiteral).getN()
94+
)
95+
)
96+
}
8297
}
8398

84-
override string toString() { result = this.(API::Node).toString() }
85-
}
86-
87-
/**
88-
* A `pandas.DataFrame` instantiation.
89-
* See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
90-
*/
91-
class DataFrameConstructor extends DataFrame::Range {
92-
DataFrameConstructor() { this = API::moduleImport("pandas").getMember("DataFrame").getReturn() }
93-
}
94-
95-
/**
96-
* The `pandas.read_*` functions that return a `pandas.DataFrame`.
97-
* See https://pandas.pydata.org/docs/reference/io.html
98-
*/
99-
class InputRead extends DataFrame::Range {
100-
InputRead() {
101-
this =
102-
API::moduleImport("pandas")
103-
.getMember([
104-
"read_csv", "read_fwf", "read_pickle", "read_table", "read_clipboard", "read_excel",
105-
"read_xml", "read_parquet", "read_orc", "read_spss", "read_sql_table",
106-
"read_sql_query", "read_sql", "read_gbq", "read_stata"
107-
])
108-
.getReturn()
109-
or
110-
this = API::moduleImport("pandas").getMember("read_html").getReturn().getASubscript()
111-
or
112-
exists(API::Node readSas, API::CallNode readSasCall |
113-
readSas = API::moduleImport("pandas").getMember("read_sas") and
114-
this = readSas.getReturn() and
115-
readSasCall = readSas.getACall()
116-
|
117-
// Returns DataFrame if iterator=False and chunksize=None, With default values it returns DataFrame.
118-
(
119-
not readSasCall.getParameter(5, "iterator").asSink().asExpr().(BooleanLiteral) instanceof
120-
True
121-
or
122-
not exists(readSasCall.getParameter(5, "iterator").asSink())
123-
) and
124-
not exists(
125-
readSasCall.getParameter(4, "chunksize").asSink().asExpr().(IntegerLiteral).getN()
99+
/**
100+
* The `pandas.DataFrame.*` methods that return a `pandas.DataFrame` object.
101+
* See https://pandas.pydata.org/docs/reference/io.html
102+
*/
103+
class DataFrameMethods extends DataFrame {
104+
DataFrameMethods() {
105+
exists(API::Node dataFrame | dataFrame = any(DataFrame df) |
106+
this =
107+
dataFrame
108+
.getMember([
109+
"copy", "from_records", "from_dict", "from_spmatrix", "assign", "select_dtypes",
110+
"set_flags", "astype", "infer_objects", "head", "xs", "get", "isin", "where",
111+
"mask", "query", "add", "mul", "truediv", "mod", "pow", "dot", "radd", "rsub",
112+
"rdiv", "rfloordiv", "rtruediv", "rpow", "lt", "gt", "le", "ne", "agg",
113+
"combine", "apply", "aggregate", "transform", "all", "any", "clip", "corr",
114+
"cov", "cummax", "cummin", "cumprod", "describe", "mode", "pct_change",
115+
"quantile", "rank", "round", "sem", "add_prefix", "add_suffix", "at_time",
116+
"between_time", "drop", "drop_duplicates", "filter", "first", "head", "idxmin",
117+
"last", "reindex", "reindex_like", "reset_index", "sample", "set_axis", "tail",
118+
"take", "truncate", "bfill", "dropna", "ffill", "fillna", "interpolate", "isna",
119+
"isnull", "notna", "notnull", "pad", "replace", "droplevel", "pivot",
120+
"pivot_table", "reorder_levels", "sort_values", "sort_index", "nlargest",
121+
"nsmallest", "swaplevel", "stack", "unstack", "isnull", "notna", "notnull",
122+
"replace", "droplevel", "pivot", "pivot_table", "reorder_levels", "sort_values",
123+
"sort_index", "nlargest", "nsmallest", "swaplevel", "stack", "unstack", "melt",
124+
"explode", "squeeze", "T", "transpose", "compare", "join", "from_spmatrix",
125+
"shift", "asof", "merge", "from_dict", "tz_convert", "to_period", "asfreq",
126+
"to_dense", "tz_localize", "box", "__dataframe__"
127+
])
128+
.getReturn()
126129
)
127-
)
130+
}
128131
}
129132
}
130133

@@ -134,7 +137,9 @@ private module Pandas {
134137
* https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.eval.html
135138
*/
136139
class DataFlowQueryCall extends CodeExecution::Range, API::CallNode {
137-
DataFlowQueryCall() { this = any(DataFrame df).getMember(["query", "eval"]).getACall() }
140+
DataFlowQueryCall() {
141+
this = any(DataFrame::DataFrame df).getMember(["query", "eval"]).getACall()
142+
}
138143

139144
override DataFlow::Node getCode() { result = this.getParameter(0, "expr").asSink() }
140145
}

0 commit comments

Comments
 (0)