Skip to content

Commit 6c3567b

Browse files
committed
call API on local data for complier.py
1 parent 8e4ea88 commit 6c3567b

File tree

1 file changed

+5
-26
lines changed

1 file changed

+5
-26
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -622,32 +622,11 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
622622
for scan_item in node.scan_list.items
623623
}
624624

625-
# Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
626-
# Convert JSON columns to strings before Polars processing
627-
arrow_data = node.local_data_source.data
628-
schema = arrow_data.schema
629-
630-
# Check if any columns are JSON type
631-
json_field_indices = [
632-
i
633-
for i, field in enumerate(schema)
634-
if pa.types.is_extension_type(field.type)
635-
and field.type.extension_name == "google:sqlType:json"
636-
]
637-
638-
if json_field_indices:
639-
# Convert JSON columns to string columns
640-
new_arrays = []
641-
new_fields = []
642-
for i, field in enumerate(schema):
643-
if i in json_field_indices:
644-
# Cast JSON to string
645-
new_arrays.append(arrow_data.column(i).cast(pa.string()))
646-
new_fields.append(pa.field(field.name, pa.string()))
647-
else:
648-
new_arrays.append(arrow_data.column(i))
649-
new_fields.append(field)
650-
arrow_data = pa.table(new_arrays, schema=pa.schema(new_fields))
625+
if hasattr(node.local_data_source, "to_arrow"):
626+
schema, batches = node.local_data_source.to_arrow(json_type="string")
627+
arrow_data = pa.Table.from_batches(batches, schema)
628+
else:
629+
arrow_data = node.local_data_source.data
651630

652631
lazy_frame = cast(pl.DataFrame, pl.from_arrow(arrow_data)).lazy()
653632
lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)

0 commit comments

Comments
 (0)