Use date_as_object and timestamp_as_object in pandas conversion pysql client

sander-goos · susodapop · commit ae435ba3bab4 · 2022-06-02T11:14:59.000-05:00
Use date_as_object and timestamp_as_object options in the to_pandas conversion from Arrow to prevent errors with out-of-bound values. See also: https://issues.apache.org/jira/browse/ARROW-5359 Add test cases to TimestampTestsMixin (would be nice if we could replace those with unit tests)
diff --git a/cmdexec/clients/python/src/databricks/sql/client.py b/cmdexec/clients/python/src/databricks/sql/client.py
@@ -537,14 +537,8 @@ def _convert_arrow_table(self, table):
 
         # Need to rename columns, as the to_pandas function cannot handle duplicate column names
         table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
-        df = table_renamed.to_pandas(types_mapper=dtype_mapping.get)
-
-        for (i, col) in enumerate(df.columns):
-            # Check for 0 because .dt doesn't work on empty series
-            if self.description[i][1] == 'timestamp' and len(df) > 0:
-                # We store the dtype as object so we don't use the pandas datetime dtype but
-                # a native datetime.datetime
-                df[col] = pandas.Series(df[col].dt.to_pydatetime(), dtype='object')
+        df = table_renamed.to_pandas(
+            types_mapper=dtype_mapping.get, date_as_object=True, timestamp_as_object=True)
 
         res = df.to_numpy(na_value=None)
         return [ResultRow(*v) for v in res]