@@ -88,7 +88,7 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
8888
8989 yield batch
9090
91- def to_arrow_table (self ) -> pyarrow .Table :
91+ def to_arrow_table (self , limit : Optional [ int ] = None ) -> pyarrow .Table :
9292 # Need to provide schema if no result rows, as arrow can't infer
9393 # If ther are rows, it is safest to infer schema from batches.
9494 # Any discrepencies between predicted schema and actual schema will produce errors.
@@ -97,9 +97,12 @@ def to_arrow_table(self) -> pyarrow.Table:
9797 peek_value = list (peek_it )
9898 # TODO: Enforce our internal schema on the table for consistency
9999 if len (peek_value ) > 0 :
100- return pyarrow .Table .from_batches (
101- itertools .chain (peek_value , batches ), # reconstruct
102- )
100+ batches = itertools .chain (peek_value , batches ) # reconstruct
101+ if limit :
102+ batches = pyarrow_utils .truncate_pyarrow_iterable (
103+ batches , max_results = limit
104+ )
105+ return pyarrow .Table .from_batches (batches )
103106 else :
104107 try :
105108 return self ._schema .to_pyarrow ().empty_table ()
@@ -108,7 +111,7 @@ def to_arrow_table(self) -> pyarrow.Table:
108111 return self ._schema .to_pyarrow (use_storage_types = True ).empty_table ()
109112
110113 def to_pandas (self , limit : Optional [int ] = None ) -> pd .DataFrame :
111- return pd . concat (self .to_pandas_batches ( max_results = limit ))
114+ return io_pandas . arrow_to_pandas (self .to_arrow_table ( limit = limit ), self . _schema )
112115
113116 def to_pandas_batches (
114117 self , page_size : Optional [int ] = None , max_results : Optional [int ] = None
0 commit comments