Skip to content

Commit 63661f2

Browse files
committed
prev was better
1 parent 45b85a8 commit 63661f2

File tree

1 file changed

+37
-33
lines changed

1 file changed

+37
-33
lines changed

src/databricks/sql/client.py

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,42 +1407,46 @@ def _convert_arrow_table(self, table: "pyarrow.Table"):
14071407
column_names = [c[0] for c in self.description]
14081408
ResultRow = Row(*column_names)
14091409

1410-
# if self.connection.disable_pandas is True:
1410+
if self.connection.disable_pandas is True:
1411+
start_time = time.time()
1412+
columns_as_lists = [col.to_pylist() for col in table.itercolumns()]
1413+
res = [ResultRow(*row) for row in zip(*columns_as_lists)]
1414+
end_time = time.time()
1415+
print(f"Time taken to convert arrow table to list: {end_time - start_time} seconds")
1416+
return res
1417+
14111418
start_time = time.time()
1412-
columns_as_lists = [col.to_pylist() for col in table.itercolumns()]
1413-
res = [ResultRow(*row) for row in zip(*columns_as_lists)]
1419+
# Need to use nullable types, as otherwise type can change when there are missing values.
1420+
# See https://arrow.apache.org/docs/python/pandas.html#nullable-types
1421+
# NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
1422+
dtype_mapping = {
1423+
pyarrow.int8(): pandas.Int8Dtype(),
1424+
pyarrow.int16(): pandas.Int16Dtype(),
1425+
pyarrow.int32(): pandas.Int32Dtype(),
1426+
pyarrow.int64(): pandas.Int64Dtype(),
1427+
pyarrow.uint8(): pandas.UInt8Dtype(),
1428+
pyarrow.uint16(): pandas.UInt16Dtype(),
1429+
pyarrow.uint32(): pandas.UInt32Dtype(),
1430+
pyarrow.uint64(): pandas.UInt64Dtype(),
1431+
pyarrow.bool_(): pandas.BooleanDtype(),
1432+
pyarrow.float32(): pandas.Float32Dtype(),
1433+
pyarrow.float64(): pandas.Float64Dtype(),
1434+
pyarrow.string(): pandas.StringDtype(),
1435+
}
1436+
1437+
# Need to rename columns, as the to_pandas function cannot handle duplicate column names
1438+
table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
1439+
df = table_renamed.to_pandas(
1440+
types_mapper=dtype_mapping.get,
1441+
date_as_object=True,
1442+
timestamp_as_object=True,
1443+
)
1444+
1445+
res = df.to_numpy(na_value=None, dtype="object")
1446+
tmp_res = [ResultRow(*v) for v in res]
14141447
end_time = time.time()
14151448
print(f"Time taken to convert arrow table to list: {end_time - start_time} seconds")
1416-
return res
1417-
1418-
# # Need to use nullable types, as otherwise type can change when there are missing values.
1419-
# # See https://arrow.apache.org/docs/python/pandas.html#nullable-types
1420-
# # NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
1421-
# dtype_mapping = {
1422-
# pyarrow.int8(): pandas.Int8Dtype(),
1423-
# pyarrow.int16(): pandas.Int16Dtype(),
1424-
# pyarrow.int32(): pandas.Int32Dtype(),
1425-
# pyarrow.int64(): pandas.Int64Dtype(),
1426-
# pyarrow.uint8(): pandas.UInt8Dtype(),
1427-
# pyarrow.uint16(): pandas.UInt16Dtype(),
1428-
# pyarrow.uint32(): pandas.UInt32Dtype(),
1429-
# pyarrow.uint64(): pandas.UInt64Dtype(),
1430-
# pyarrow.bool_(): pandas.BooleanDtype(),
1431-
# pyarrow.float32(): pandas.Float32Dtype(),
1432-
# pyarrow.float64(): pandas.Float64Dtype(),
1433-
# pyarrow.string(): pandas.StringDtype(),
1434-
# }
1435-
1436-
# # Need to rename columns, as the to_pandas function cannot handle duplicate column names
1437-
# table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
1438-
# df = table_renamed.to_pandas(
1439-
# types_mapper=dtype_mapping.get,
1440-
# date_as_object=True,
1441-
# timestamp_as_object=True,
1442-
# )
1443-
1444-
# res = df.to_numpy(na_value=None, dtype="object")
1445-
# return [ResultRow(*v) for v in res]
1449+
return tmp_res
14461450

14471451
@property
14481452
def rownumber(self):

0 commit comments

Comments
 (0)