Skip to content

Commit 5955bfe

Browse files
committed
code refactor
1 parent 3b86941 commit 5955bfe

File tree

2 files changed

+8
-32
lines changed

2 files changed

+8
-32
lines changed

bigframes/core/blocks.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -720,17 +720,12 @@ def to_pandas_batches(
720720
series_map = {}
721721
for col in itertools.chain(self.value_columns, self.index_columns):
722722
dtype = self.expr.get_column_type(col)
723-
if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype):
724-
# Due to a limitation in Apache Arrow (#45262), JSON columns are not
725-
# natively supported by the to_pandas_batches() method, which is
726-
# used by the anywidget backend.
727-
# Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
728-
# PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType,
729-
# especially when nested.
723+
try:
724+
series_map[col] = pd.Series([], dtype=dtype)
725+
except pa.ArrowNotImplementedError:
726+
# PyArrow doesn't support creating an empty array with
727+
# db_dtypes.JSONArrowType, especially when nested.
730728
# Create with string type and then cast.
731-
732-
# MyPy doesn't automatically narrow the type of 'dtype' here,
733-
# so we add an explicit check.
734729
if isinstance(dtype, pd.ArrowDtype):
735730
safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
736731
dtype.pyarrow_dtype
@@ -742,8 +737,6 @@ def to_pandas_batches(
742737
# contains_db_dtypes_json_dtype is accurate,
743738
# but it's here for MyPy's sake.
744739
series_map[col] = pd.Series([], dtype=dtype)
745-
else:
746-
series_map[col] = pd.Series([], dtype=dtype)
747740
empty_val = pd.DataFrame(series_map)
748741
dfs = map(
749742
lambda a: a[0],

bigframes/session/loader.py

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
import google.cloud.bigquery.table
4646
from google.cloud.bigquery_storage_v1 import types as bq_storage_types
4747
import pandas
48-
import pyarrow as pa
4948

5049
import bigframes._tools
5150
import bigframes._tools.strings
@@ -1307,22 +1306,6 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
13071306
return configuration
13081307

13091308

1310-
def _has_json_arrow_type(arrow_type: pa.DataType) -> bool:
1311-
"""
1312-
Searches recursively for JSON array type within a PyArrow DataType.
1313-
"""
1314-
if arrow_type == bigframes.dtypes.JSON_ARROW_TYPE:
1315-
return True
1316-
if pa.types.is_list(arrow_type):
1317-
return _has_json_arrow_type(arrow_type.value_type)
1318-
if pa.types.is_struct(arrow_type):
1319-
for i in range(arrow_type.num_fields):
1320-
if _has_json_arrow_type(arrow_type.field(i).type):
1321-
return True
1322-
return False
1323-
return False
1324-
1325-
13261309
def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
13271310
"""
13281311
Determines whether a datatype is supported by bq load jobs.
@@ -1339,9 +1322,9 @@ def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
13391322
if column_type == bigframes.dtypes.JSON_DTYPE:
13401323
return
13411324

1342-
if isinstance(column_type, pandas.ArrowDtype) and _has_json_arrow_type(
1343-
column_type.pyarrow_dtype
1344-
):
1325+
if isinstance(
1326+
column_type, pandas.ArrowDtype
1327+
) and bigframes.dtypes.contains_db_dtypes_json_dtype(column_type):
13451328
raise NotImplementedError(
13461329
f"Nested JSON types, found in column `{name}`: `{column_type}`', "
13471330
f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"

0 commit comments

Comments
 (0)