Skip to content

Commit 1d00d0e

Browse files
committed
#46 Improve Readability of TableRead Impletation
1 parent dd9dfaa commit 1d00d0e

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

pypaimon/api/table_read.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@
3131
class TableRead(ABC):
3232
"""To read data from data splits."""
3333

34-
@abstractmethod
35-
def to_arrow_batch_reader(self, splits: List[Split]) -> pa.RecordBatchReader:
36-
"""Read data from splits and converted to pyarrow.RecordBatchReader format."""
37-
3834
@abstractmethod
3935
def to_arrow(self, splits: List[Split]) -> pa.Table:
4036
"""Read data from splits and converted to pyarrow.Table format."""
4137

38+
@abstractmethod
39+
def to_arrow_batch_reader(self, splits: List[Split]) -> pa.RecordBatchReader:
40+
"""Read data from splits and converted to pyarrow.RecordBatchReader format."""
41+
4242
@abstractmethod
4343
def to_pandas(self, splits: List[Split]) -> pd.DataFrame:
4444
"""Read data from splits and converted to pandas.DataFrame format."""

pypaimon/py4j/java_implementation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,16 +181,16 @@ def __init__(self, j_table_read, j_read_type, catalog_options):
181181
self._j_bytes_reader = get_gateway().jvm.InvocationUtil.createParallelBytesReader(
182182
j_table_read, j_read_type, TableRead._get_max_workers(catalog_options))
183183

184+
def to_arrow(self, splits) -> pa.Table:
185+
record_batch_reader = self.to_arrow_batch_reader(splits)
186+
return pa.Table.from_batches(record_batch_reader, schema=self._arrow_schema)
187+
184188
def to_arrow_batch_reader(self, splits) -> pa.RecordBatchReader:
185189
j_splits = list(map(lambda s: s.to_j_split(), splits))
186190
self._j_bytes_reader.setSplits(j_splits)
187191
batch_iterator = self._batch_generator()
188192
return pa.RecordBatchReader.from_batches(self._arrow_schema, batch_iterator)
189193

190-
def to_arrow(self, splits) -> pa.Table:
191-
record_batch_reader = self.to_arrow_batch_reader(splits)
192-
return pa.Table.from_batches(record_batch_reader, schema=self._arrow_schema)
193-
194194
def to_pandas(self, splits: List[Split]) -> pd.DataFrame:
195195
return self.to_arrow(splits).to_pandas()
196196

0 commit comments

Comments
 (0)