Skip to content

Commit c74719b

Browse files
committed
Add read API to convert result to Polars
1 parent 03108ec commit c74719b

File tree

4 files changed

+16
-0
lines changed

4 files changed

+16
-0
lines changed

dev/dev-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ pytz>=2018.3
2828
pytest~=7.0
2929
duckdb>=0.5.0,<2.0.0
3030
ray~=2.10.0
31+
polars~=1.15.0

paimon_python_api/table_read.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#################################################################################
1818

1919
import pandas as pd
20+
import polars as pl
2021
import pyarrow as pa
2122
import ray
2223

@@ -41,6 +42,10 @@ def to_arrow_batch_reader(self, splits: List[Split]) -> pa.RecordBatchReader:
4142
def to_pandas(self, splits: List[Split]) -> pd.DataFrame:
4243
"""Read data from splits and converted to pandas.DataFrame format."""
4344

45+
@abstractmethod
46+
def to_polars(self, splits: List[Split]) -> pl.DataFrame:
47+
"""Read data from splits and converted to polars.DataFrame format."""
48+
4449
@abstractmethod
4550
def to_duckdb(
4651
self,

paimon_python_java/pypaimon.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import duckdb
2020
import pandas as pd
21+
import polars as pl
2122
import pyarrow as pa
2223
import ray
2324

@@ -164,6 +165,9 @@ def to_arrow_batch_reader(self, splits):
164165
def to_pandas(self, splits: List[Split]) -> pd.DataFrame:
165166
return self.to_arrow(splits).to_pandas()
166167

168+
def to_polars(self, splits: List[Split]) -> pl.DataFrame:
169+
return pl.from_arrow(self.to_arrow(splits))
170+
167171
def to_duckdb(
168172
self,
169173
splits: List[Split],

paimon_python_java/tests/test_write_and_read.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
import unittest
2323
import pandas as pd
2424
import pyarrow as pa
25+
import polars as pl
2526
from py4j.protocol import Py4JJavaError
2627

28+
from polars import testing as pl_testing
2729
from paimon_python_api import Schema
2830
from paimon_python_java import Catalog
2931
from paimon_python_java.java_gateway import get_gateway
@@ -297,6 +299,10 @@ def testAllWriteAndReadApi(self):
297299
pd.testing.assert_frame_equal(
298300
actual.reset_index(drop=True), all_data.reset_index(drop=True))
299301

302+
# to_polars
303+
pl_df = table_read.to_polars(splits)
304+
pl_testing.assert_frame_equal(pl_df, pl.from_pandas(all_data))
305+
300306
# to_duckdb
301307
duckdb_con = table_read.to_duckdb(splits, 'duckdb_table')
302308
# select *

0 commit comments

Comments
 (0)