@@ -1407,42 +1407,46 @@ def _convert_arrow_table(self, table: "pyarrow.Table"):
14071407 column_names = [c [0 ] for c in self .description ]
14081408 ResultRow = Row (* column_names )
14091409
1410- # if self.connection.disable_pandas is True:
1410+ if self .connection .disable_pandas is True :
1411+ start_time = time .time ()
1412+ columns_as_lists = [col .to_pylist () for col in table .itercolumns ()]
1413+ res = [ResultRow (* row ) for row in zip (* columns_as_lists )]
1414+ end_time = time .time ()
1415+ print (f"Time taken to convert arrow table to list: { end_time - start_time } seconds" )
1416+ return res
1417+
14111418 start_time = time .time ()
1412- columns_as_lists = [col .to_pylist () for col in table .itercolumns ()]
1413- res = [ResultRow (* row ) for row in zip (* columns_as_lists )]
1419+ # Need to use nullable types, as otherwise type can change when there are missing values.
1420+ # See https://arrow.apache.org/docs/python/pandas.html#nullable-types
1421+ # NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
1422+ dtype_mapping = {
1423+ pyarrow .int8 (): pandas .Int8Dtype (),
1424+ pyarrow .int16 (): pandas .Int16Dtype (),
1425+ pyarrow .int32 (): pandas .Int32Dtype (),
1426+ pyarrow .int64 (): pandas .Int64Dtype (),
1427+ pyarrow .uint8 (): pandas .UInt8Dtype (),
1428+ pyarrow .uint16 (): pandas .UInt16Dtype (),
1429+ pyarrow .uint32 (): pandas .UInt32Dtype (),
1430+ pyarrow .uint64 (): pandas .UInt64Dtype (),
1431+ pyarrow .bool_ (): pandas .BooleanDtype (),
1432+ pyarrow .float32 (): pandas .Float32Dtype (),
1433+ pyarrow .float64 (): pandas .Float64Dtype (),
1434+ pyarrow .string (): pandas .StringDtype (),
1435+ }
1436+
1437+ # Need to rename columns, as the to_pandas function cannot handle duplicate column names
1438+ table_renamed = table .rename_columns ([str (c ) for c in range (table .num_columns )])
1439+ df = table_renamed .to_pandas (
1440+ types_mapper = dtype_mapping .get ,
1441+ date_as_object = True ,
1442+ timestamp_as_object = True ,
1443+ )
1444+
1445+ res = df .to_numpy (na_value = None , dtype = "object" )
1446+ tmp_res = [ResultRow (* v ) for v in res ]
14141447 end_time = time .time ()
14151448 print (f"Time taken to convert arrow table to list: { end_time - start_time } seconds" )
1416- return res
1417-
1418- # # Need to use nullable types, as otherwise type can change when there are missing values.
1419- # # See https://arrow.apache.org/docs/python/pandas.html#nullable-types
1420- # # NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
1421- # dtype_mapping = {
1422- # pyarrow.int8(): pandas.Int8Dtype(),
1423- # pyarrow.int16(): pandas.Int16Dtype(),
1424- # pyarrow.int32(): pandas.Int32Dtype(),
1425- # pyarrow.int64(): pandas.Int64Dtype(),
1426- # pyarrow.uint8(): pandas.UInt8Dtype(),
1427- # pyarrow.uint16(): pandas.UInt16Dtype(),
1428- # pyarrow.uint32(): pandas.UInt32Dtype(),
1429- # pyarrow.uint64(): pandas.UInt64Dtype(),
1430- # pyarrow.bool_(): pandas.BooleanDtype(),
1431- # pyarrow.float32(): pandas.Float32Dtype(),
1432- # pyarrow.float64(): pandas.Float64Dtype(),
1433- # pyarrow.string(): pandas.StringDtype(),
1434- # }
1435-
1436- # # Need to rename columns, as the to_pandas function cannot handle duplicate column names
1437- # table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
1438- # df = table_renamed.to_pandas(
1439- # types_mapper=dtype_mapping.get,
1440- # date_as_object=True,
1441- # timestamp_as_object=True,
1442- # )
1443-
1444- # res = df.to_numpy(na_value=None, dtype="object")
1445- # return [ResultRow(*v) for v in res]
1449+ return tmp_res
14461450
14471451 @property
14481452 def rownumber (self ):
0 commit comments