Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 41 additions & 3 deletions tests/fast/arrow/test_filter_pushdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
import duckdb

pa = pytest.importorskip("pyarrow")
pd = pytest.importorskip("pyarrow.dataset")
pa_lib = pytest.importorskip("pyarrow.lib")
pq = pytest.importorskip("pyarrow.parquet")
ds = pytest.importorskip("pyarrow.dataset")
np = pytest.importorskip("numpy")
re = pytest.importorskip("re")

Expand All @@ -26,7 +27,7 @@ def create_pyarrow_table(rel):

def create_pyarrow_dataset(rel):
table = create_pyarrow_table(rel)
return ds.dataset(table)
return pd.dataset(table)


def test_decimal_filter_pushdown(duckdb_cursor):
Expand Down Expand Up @@ -549,7 +550,7 @@ def test_9371(self, duckdb_cursor, tmp_path):
df = df.set_index("ts") # SET INDEX! (It all works correctly when the index is not set)
df.to_parquet(str(file_path))

my_arrow_dataset = ds.dataset(str(file_path))
my_arrow_dataset = pd.dataset(str(file_path))
res = duckdb_cursor.execute("SELECT * FROM my_arrow_dataset WHERE ts = ?", parameters=[dt]).fetch_arrow_table()
output = duckdb_cursor.sql("select * from res").fetchall()
expected = [(1, dt), (2, dt), (3, dt)]
Expand Down Expand Up @@ -1018,3 +1019,40 @@ def test_dynamic_filter(self, duckdb_cursor):
duckdb_cursor.register("t", t)
res = duckdb_cursor.sql("SELECT a FROM t ORDER BY a LIMIT 11").fetchall()
assert len(res) == 11

def test_binary_view_filter(self, duckdb_cursor):
"""Filters on a view column work (without pushdown because pyarrow does not support view filters yet)."""
table = pa.table({"col": pa.array([b"abc", b"efg"], type=pa.binary_view())})
dset = pd.dataset(table)
res = duckdb_cursor.sql("select * from dset where col = 'abc'::binary")
assert len(res) == 1

def test_string_view_filter(self, duckdb_cursor):
"""Filters on a view column work (without pushdown because pyarrow does not support view filters yet)."""
table = pa.table({"col": pa.array(["abc", "efg"], type=pa.string_view())})
dset = pd.dataset(table)
res = duckdb_cursor.sql("select * from dset where col = 'abc'")
assert len(res) == 1

@pytest.mark.xfail(raises=pa_lib.ArrowNotImplementedError)
def test_canary_for_pyarrow_string_view_filter_support(self, duckdb_cursor):
"""This canary will xpass when pyarrow implements string view filter support."""
# predicate: field == "string value"
filter_expr = pd.field("col") == pd.scalar("val1")
# dataset with a string view column
table = pa.table({"col": pa.array(["val1", "val2"], type=pa.string_view())})
dset = pd.dataset(table)
# creating the scanner fails
dset.scanner(columns=["col"], filter=filter_expr)

@pytest.mark.xfail(raises=pa_lib.ArrowNotImplementedError)
def test_canary_for_pyarrow_binary_view_filter_support(self, duckdb_cursor):
"""This canary will xpass when pyarrow implements binary view filter support."""
# predicate: field == const
const = pd.scalar(pa.scalar(b"bin1", pa.binary_view()))
filter_expr = pd.field("col") == const
# dataset with a string view column
table = pa.table({"col": pa.array([b"bin1", b"bin2"], type=pa.binary_view())})
dset = pd.dataset(table)
# creating the scanner fails
dset.scanner(columns=["col"], filter=filter_expr)
Loading