@@ -1402,17 +1402,42 @@ def _convert_columnar_table(self, table):
14021402 result .append (ResultRow (* curr_row ))
14031403
14041404 return result
1405+
1406+ def print_mem (self ):
1407+ import os
1408+ import psutil
1409+
1410+ process = psutil .Process (os .getpid ())
1411+ mem_info = process .memory_info ()
1412+ total_mem_mb = mem_info .rss / 1024 / 1024
1413+ cpu_percent = process .cpu_percent (interval = 0.1 )
1414+ print (f"Total memory usage: { total_mem_mb :.2f} MB" )
1415+ print (f"CPU percent: { cpu_percent :.2f} %" )
1416+ # total_size_bytes = table.get_total_buffer_size()
1417+ # total_size_mb = total_size_bytes / (1024 * 1024)
1418+
1419+ # print(f"Total PyArrow table size: {total_size_bytes} bytes ({total_size_mb:.2f} MB)")
14051420
14061421 def _convert_arrow_table (self , table : "pyarrow.Table" ):
1422+ import sys
1423+ from pympler import asizeof
1424+
1425+ self .print_mem ()
1426+ print (f"Memory size table: { table .nbytes / (1024 ** 2 ):.2f} MB" )
1427+ # Convert to MB for easier reading
14071428 column_names = [c [0 ] for c in self .description ]
14081429 ResultRow = Row (* column_names )
14091430
14101431 if self .connection .disable_pandas is True :
14111432 start_time = time .time ()
14121433 columns_as_lists = [col .to_pylist () for col in table .itercolumns ()]
1434+ self .print_mem ()
1435+ print (f"Memory size columns_as_lists: { sum (sys .getsizeof (col ) for col in columns_as_lists ) / (1024 ** 2 ):.2f} MB" )
14131436 res = [ResultRow (* row ) for row in zip (* columns_as_lists )]
1437+ self .print_mem ()
14141438 end_time = time .time ()
14151439 print (f"Time taken to convert arrow table to list: { end_time - start_time } seconds" )
1440+ print (f"Memory size res: { sum (sys .getsizeof (row ) for row in res ) / (1024 ** 2 ):.2f} MB" )
14161441 return res
14171442
14181443 start_time = time .time ()
@@ -1436,14 +1461,23 @@ def _convert_arrow_table(self, table: "pyarrow.Table"):
14361461
14371462 # Need to rename columns, as the to_pandas function cannot handle duplicate column names
14381463 table_renamed = table .rename_columns ([str (c ) for c in range (table .num_columns )])
1464+ print (f"Memory size table_renamed: { table_renamed .nbytes / (1024 ** 2 ):.2f} MB" )
14391465 df = table_renamed .to_pandas (
14401466 types_mapper = dtype_mapping .get ,
14411467 date_as_object = True ,
14421468 timestamp_as_object = True ,
1469+ self_destruct = True ,
14431470 )
1471+ print (f"Memory size df: { df .memory_usage (deep = True ).sum () / (1024 ** 2 ):.2f} MB" )
1472+ self .print_mem ()
1473+ # del table_renamed
14441474
14451475 res = df .to_numpy (na_value = None , dtype = "object" )
1476+ print (f"Memory size res: { res .nbytes / (1024 ** 2 ):.2f} MB" )
1477+ self .print_mem ()
1478+ # del df
14461479 tmp_res = [ResultRow (* v ) for v in res ]
1480+ self .print_mem ()
14471481 end_time = time .time ()
14481482 print (f"Time taken to convert arrow table to list: { end_time - start_time } seconds" )
14491483 return tmp_res
@@ -1471,7 +1505,7 @@ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
14711505 and not self .has_been_closed_server_side
14721506 and self .has_more_rows
14731507 ):
1474- print (f"TOTAL DATA ROWS { TOTAL_SIZE } " )
1508+ # print(f"TOTAL DATA ROWS {TOTAL_SIZE}")
14751509 self ._fill_results_buffer ()
14761510 partial_results = self .results .next_n_rows (n_remaining_rows )
14771511 results .append (partial_results )
@@ -1515,11 +1549,11 @@ def fetchall_arrow(self) -> "pyarrow.Table":
15151549 self ._next_row_index += results .num_rows
15161550
15171551 # partial_result_chunks = [results]
1518- print ("Server side has more rows" , self .has_more_rows )
1552+ # print("Server side has more rows", self.has_more_rows)
15191553 TOTAL_SIZE = results .num_rows
15201554
15211555 while not self .has_been_closed_server_side and self .has_more_rows :
1522- print (f"TOTAL DATA ROWS { TOTAL_SIZE } " )
1556+ # print(f"TOTAL DATA ROWS {TOTAL_SIZE}")
15231557 self ._fill_results_buffer ()
15241558 partial_results = self .results .remaining_rows ()
15251559 results .append (partial_results )
0 commit comments