@@ -34,4 +34,118 @@ private module Pandas {
3434
3535 override string getFormat ( ) { result = "pickle" }
3636 }
37+
38+ module DataFrame {
39+ /**
40+ * A `pandas.DataFrame` Object.
41+ * See https://pandas.pydata.org/docs/reference/frame.html
42+ */
43+ abstract class Range extends API:: Node {
44+ override string toString ( ) { result = this .( API:: Node ) .toString ( ) }
45+ }
46+ }
47+
48+ /**
49+ * The `pandas.DataFrame` Objects including secondary `pandas.DataFrame` Objects.
50+ * Use this class where you want to find all `pandas.DataFrame` Objects.
51+ * See https://pandas.pydata.org/pandas-docs/stable/reference/frame.html
52+ */
53+ class DataFrame extends API:: Node {
54+ DataFrame ( ) {
55+ this = any ( DataFrame:: Range df )
56+ or
57+ exists ( API:: Node dataFrame | dataFrame = any ( DataFrame:: Range df ) |
58+ this =
59+ dataFrame
60+ .getMember ( [
61+ "copy" , "from_records" , "from_dict" , "from_spmatrix" , "assign" , "select_dtypes" ,
62+ "set_flags" , "astype" , "infer_objects" , "head" , "xs" , "get" , "isin" , "where" ,
63+ "mask" , "query" , "add" , "mul" , "truediv" , "mod" , "pow" , "dot" , "radd" , "rsub" ,
64+ "rdiv" , "rfloordiv" , "rtruediv" , "rpow" , "lt" , "gt" , "le" , "ne" , "agg" , "combine" ,
65+ "apply" , "aggregate" , "transform" , "all" , "any" , "clip" , "corr" , "cov" , "cummax" ,
66+ "cummin" , "cumprod" , "describe" , "mode" , "pct_change" , "quantile" , "rank" ,
67+ "round" , "sem" , "add_prefix" , "add_suffix" , "at_time" , "between_time" , "drop" ,
68+ "drop_duplicates" , "filter" , "first" , "head" , "idxmin" , "last" , "reindex" ,
69+ "reindex_like" , "reset_index" , "sample" , "set_axis" , "tail" , "take" , "truncate" ,
70+ "bfill" , "dropna" , "ffill" , "fillna" , "interpolate" , "isna" , "isnull" , "notna" ,
71+ "notnull" , "pad" , "replace" , "droplevel" , "pivot" , "pivot_table" ,
72+ "reorder_levels" , "sort_values" , "sort_index" , "nlargest" , "nsmallest" ,
73+ "swaplevel" , "stack" , "unstack" , "isnull" , "notna" , "notnull" , "replace" ,
74+ "droplevel" , "pivot" , "pivot_table" , "reorder_levels" , "sort_values" ,
75+ "sort_index" , "nlargest" , "nsmallest" , "swaplevel" , "stack" , "unstack" , "melt" ,
76+ "explode" , "squeeze" , "T" , "transpose" , "compare" , "join" , "from_spmatrix" ,
77+ "shift" , "asof" , "merge" , "from_dict" , "tz_convert" , "to_period" , "asfreq" ,
78+ "to_dense" , "tz_localize" , "box" , "__dataframe__"
79+ ] )
80+ .getReturn ( )
81+ )
82+ }
83+
84+ override string toString ( ) { result = this .( API:: Node ) .toString ( ) }
85+ }
86+
87+ /**
88+ * A `pandas.DataFrame` instantiation.
89+ * See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
90+ */
91+ class DataFrameConstructor extends DataFrame:: Range {
92+ DataFrameConstructor ( ) { this = API:: moduleImport ( "pandas" ) .getMember ( "DataFrame" ) .getReturn ( ) }
93+ }
94+
95+ /**
96+ * The `pandas.read_*` functions that return a `pandas.DataFrame`.
97+ * See https://pandas.pydata.org/docs/reference/io.html
98+ */
99+ class InputRead extends DataFrame:: Range {
100+ InputRead ( ) {
101+ this =
102+ API:: moduleImport ( "pandas" )
103+ .getMember ( [
104+ "read_csv" , "read_fwf" , "read_pickle" , "read_table" , "read_clipboard" , "read_excel" ,
105+ "read_xml" , "read_parquet" , "read_orc" , "read_spss" , "read_sql_table" ,
106+ "read_sql_query" , "read_sql" , "read_gbq" , "read_stata"
107+ ] )
108+ .getReturn ( )
109+ or
110+ this = API:: moduleImport ( "pandas" ) .getMember ( "read_html" ) .getReturn ( ) .getASubscript ( )
111+ or
112+ exists ( API:: Node readSas , API:: CallNode readSasCall |
113+ readSas = API:: moduleImport ( "pandas" ) .getMember ( "read_sas" ) and
114+ this = readSas .getReturn ( ) and
115+ readSasCall = readSas .getACall ( )
116+ |
117+ // Returns DataFrame if iterator=False and chunksize=None, With default values it returns DataFrame.
118+ (
119+ not readSasCall .getParameter ( 5 , "iterator" ) .asSink ( ) .asExpr ( ) .( BooleanLiteral ) instanceof
120+ True
121+ or
122+ not exists ( readSasCall .getParameter ( 5 , "iterator" ) .asSink ( ) )
123+ ) and
124+ not exists (
125+ readSasCall .getParameter ( 4 , "chunksize" ) .asSink ( ) .asExpr ( ) .( IntegerLiteral ) .getN ( )
126+ )
127+ )
128+ }
129+ }
130+
131+ /**
132+ * A Call to `pandas.DataFrame.query` or `pandas.DataFrame.eval`.
133+ * See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
134+ * https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.eval.html
135+ */
136+ class DataFlowQueryCall extends CodeExecution:: Range , API:: CallNode {
137+ DataFlowQueryCall ( ) { this = any ( DataFrame df ) .getMember ( [ "query" , "eval" ] ) .getACall ( ) }
138+
139+ override DataFlow:: Node getCode ( ) { result = this .getParameter ( 0 , "expr" ) .asSink ( ) }
140+ }
141+
142+ /**
143+ * A Call to `pandas.eval`.
144+ * See https://pandas.pydata.org/docs/reference/api/pandas.eval.html
145+ */
146+ class PandasEval extends CodeExecution:: Range , API:: CallNode {
147+ PandasEval ( ) { this = API:: moduleImport ( "pandas" ) .getMember ( "eval" ) .getACall ( ) }
148+
149+ override DataFlow:: Node getCode ( ) { result = this .getParameter ( 0 , "expr" ) .asSink ( ) }
150+ }
37151}
0 commit comments