Skip to content

Commit 52bce76

Browse files
fix extension type issue
1 parent 235e5e5 commit 52bce76

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

bigframes/core/schema.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,16 @@ def to_bigquery(
9595
for item in self.items
9696
)
9797

98-
def to_pyarrow(self, use_storage_type: bool = False) -> pyarrow.Schema:
98+
def to_pyarrow(self, use_storage_types: bool = False) -> pyarrow.Schema:
9999
fields = []
100100
for item in self.items:
101101
pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(item.dtype)
102+
if use_storage_types:
103+
pa_type = bigframes.dtypes.to_storage_type(pa_type)
102104
fields.append(
103105
pyarrow.field(
104106
item.column,
105-
pa_type.storage_type
106-
if use_storage_type and isinstance(pa_type, pyarrow.ExtensionType)
107-
else pa_type,
107+
type=pa_type,
108108
nullable=not pyarrow.types.is_list(pa_type),
109109
)
110110
)

bigframes/dtypes.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,24 @@ def bigframes_dtype_to_arrow_dtype(
553553
)
554554

555555

556+
def to_storage_type(
557+
arrow_type: pa.DataType,
558+
):
559+
"""Some pyarrow versions don't support extension types fully, such as for empty table generation."""
560+
if isinstance(arrow_type, pa.ExtensionType):
561+
return arrow_type.storage_type
562+
if pa.types.is_list(arrow_type):
563+
assert isinstance(arrow_type, pa.ListType)
564+
return pa.list_(to_storage_type(arrow_type.value_type))
565+
if pa.types.is_struct(arrow_type):
566+
assert isinstance(arrow_type, pa.StructType)
567+
return pa.struct(
568+
field.with_type(to_storage_type(field.type))
569+
for field in bigframes.core.backports.pyarrow_struct_type_fields(arrow_type)
570+
)
571+
return arrow_type
572+
573+
556574
def arrow_type_to_literal(
557575
arrow_type: pa.DataType,
558576
) -> Any:

bigframes/session/executor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import bigframes.core
3131
from bigframes.core import bq_data, local_data, pyarrow_utils
3232
import bigframes.core.schema
33+
import bigframes.dtypes
3334
import bigframes.session._io.pandas as io_pandas
3435
import bigframes.session.execution_spec as ex_spec
3536

@@ -103,7 +104,8 @@ def to_arrow_table(self) -> pyarrow.Table:
103104
try:
104105
return self._schema.to_pyarrow().empty_table()
105106
except pa.ArrowNotImplementedError:
106-
return self._schema.to_pyarrow(use_storage_type=True).empty_table()
107+
# Bug with some pyarrow versions, empty_table only supports base storage types, not extension types.
108+
return self._schema.to_pyarrow(use_storage_types=True).empty_table()
107109

108110
def to_pandas(self) -> pd.DataFrame:
109111
return io_pandas.arrow_to_pandas(self.to_arrow_table(), self._schema)

0 commit comments

Comments
 (0)