Skip to content

Commit 39cf595

Browse files
committed
revert 1 files to match main branch
1 parent 3cc643d commit 39cf595

File tree

11 files changed

+13
-217
lines changed

11 files changed

+13
-217
lines changed

bigframes/bigquery/_operations/ai.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,6 @@ def generate(
123123
if output_schema is None:
124124
output_schema_str = None
125125
else:
126-
# Validate output schema types
127-
for col_name, col_type in output_schema.items():
128-
if col_type.upper() == "JSON":
129-
raise ValueError(
130-
"JSON type is not supported in output_schema. "
131-
"Supported types are: STRING, INT64, FLOAT64, BOOL, ARRAY, and STRUCT."
132-
)
133126
output_schema_str = ", ".join(
134127
[f"{name} {sql_type}" for name, sql_type in output_schema.items()]
135128
)

bigframes/core/compile/polars/compiler.py

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,13 @@
4545
polars_installed = True
4646
if TYPE_CHECKING:
4747
import polars as pl
48-
import pyarrow as pa
4948
else:
5049
try:
5150
import bigframes._importing
5251

52+
# Use import_polars() instead of importing directly so that we check
53+
# the version numbers.
5354
pl = bigframes._importing.import_polars()
54-
import pyarrow as pa
5555
except Exception:
5656
polars_installed = False
5757

@@ -427,21 +427,6 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
427427
assert isinstance(op, json_ops.JSONDecode)
428428
return input.str.json_decode(_DTYPE_MAPPING[op.to_type])
429429

430-
@compile_op.register(json_ops.ToJSONString)
431-
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
432-
# Convert JSON to string representation
433-
return input.cast(pl.String())
434-
435-
@compile_op.register(json_ops.ParseJSON)
436-
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
437-
# In Polars, JSON is stored as string, so no decoding needed
438-
return input
439-
440-
@compile_op.register(json_ops.JSONExtract)
441-
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
442-
assert isinstance(op, json_ops.JSONExtract)
443-
return input.str.json_path_match(op.json_path)
444-
445430
@compile_op.register(arr_ops.ToArrayOp)
446431
def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:
447432
return pl.concat_list(*inputs)
@@ -621,14 +606,9 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
621606
scan_item.source_id: scan_item.id.sql
622607
for scan_item in node.scan_list.items
623608
}
624-
625-
if hasattr(node.local_data_source, "to_arrow"):
626-
schema, batches = node.local_data_source.to_arrow(json_type="string")
627-
arrow_data = pa.Table.from_batches(batches, schema)
628-
else:
629-
arrow_data = node.local_data_source.data
630-
631-
lazy_frame = cast(pl.DataFrame, pl.from_arrow(arrow_data)).lazy()
609+
lazy_frame = cast(
610+
pl.DataFrame, pl.from_arrow(node.local_data_source.data)
611+
).lazy()
632612
lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)
633613
if node.offsets_col:
634614
lazy_frame = lazy_frame.with_columns(

bigframes/dataframe.py

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -783,8 +783,7 @@ def __repr__(self) -> str:
783783

784784
opts = bigframes.options.display
785785
max_results = opts.max_rows
786-
787-
# anywidget mode uses the same display logic as the "deferred" mode
786+
# anywdiget mode uses the same display logic as the "deferred" mode
788787
# for faster execution
789788
if opts.repr_mode in ("deferred", "anywidget"):
790789
return formatter.repr_query_job(self._compute_dry_run())
@@ -856,28 +855,14 @@ def _repr_html_(self) -> str:
856855

857856
from bigframes import display
858857

859-
# The anywidget frontend doesn't support the db_dtypes JSON type, so
860-
# convert to strings for display.
861-
json_cols = [
862-
series_name
863-
for series_name, series in df.items()
864-
if bigframes.dtypes.contains_db_dtypes_json_dtype(series.dtype)
865-
]
866-
if json_cols:
867-
warnings.warn(
868-
"Converting JSON columns to strings for display. "
869-
"This is temporary and will be removed when the frontend supports JSON types."
870-
)
871-
for col in json_cols:
872-
df[col] = df[col]._apply_unary_op(ops.json_ops.ToJSONString())
873-
874858
# Always create a new widget instance for each display call
875859
# This ensures that each cell gets its own widget and prevents
876860
# unintended sharing between cells
877861
widget = display.TableWidget(df.copy())
878862

879863
ipython_display(widget)
880864
return "" # Return empty string since we used display()
865+
881866
except (AttributeError, ValueError, ImportError):
882867
# Fallback if anywidget is not available
883868
warnings.warn(
@@ -1963,22 +1948,7 @@ def _to_pandas_batches(
19631948
*,
19641949
allow_large_results: Optional[bool] = None,
19651950
) -> blocks.PandasBatches:
1966-
# Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
1967-
# JSON columns are not supported in to_pandas_batches
1968-
json_cols = [
1969-
str(col_name) # Cast to string
1970-
for col_name, dtype in self.dtypes.items()
1971-
if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype)
1972-
]
1973-
1974-
df = self
1975-
if json_cols:
1976-
# Convert JSON columns to strings before materialization
1977-
df = df.copy()
1978-
for col in json_cols:
1979-
df[col] = df[col].astype("string")
1980-
1981-
return df._block.to_pandas_batches(
1951+
return self._block.to_pandas_batches(
19821952
page_size=page_size,
19831953
max_results=max_results,
19841954
allow_large_results=allow_large_results,

bigframes/display/anywidget.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,6 @@ def _reset_batches_for_new_page_size(self) -> None:
209209

210210
def _set_table_html(self) -> None:
211211
"""Sets the current html data based on the current page and page size."""
212-
# For empty dataframe, render empty table with headers.
213-
if self.row_count == 0:
214-
page_data = self._cached_data
215-
else:
216-
start = self.page * self.page_size
217-
end = start + self.page_size
218212
if self._error_message:
219213
self.table_html = (
220214
f"<div class='bigframes-error-message'>{self._error_message}</div>"
@@ -256,5 +250,8 @@ def _page_size_changed(self, _change: Dict[str, Any]) -> None:
256250
# Reset the page to 0 when page size changes to avoid invalid page states
257251
self.page = 0
258252

253+
# Reset batches to use new page size for future data fetching
254+
self._reset_batches_for_new_page_size()
255+
259256
# Update the table display
260257
self._set_table_html()

bigframes/ml/llm.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -731,17 +731,8 @@ def predict(
731731
"ground_with_google_search": ground_with_google_search,
732732
}
733733
if output_schema:
734-
supported_dtypes = (
735-
"int64",
736-
"float64",
737-
"bool",
738-
"string",
739-
"array<type>",
740-
"struct<column type>",
741-
)
742734
output_schema = {
743-
k: utils.standardize_type(v, supported_dtypes=supported_dtypes)
744-
for k, v in output_schema.items()
735+
k: utils.standardize_type(v) for k, v in output_schema.items()
745736
}
746737
options["output_schema"] = output_schema
747738
return self._predict_and_retry(

bigframes/operations/output_schemas.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414

1515
import pyarrow as pa
1616

17-
from bigframes import dtypes
18-
1917

2018
def parse_sql_type(sql: str) -> pa.DataType:
2119
"""
@@ -45,9 +43,6 @@ def parse_sql_type(sql: str) -> pa.DataType:
4543
if sql.upper() == "BOOL":
4644
return pa.bool_()
4745

48-
if sql.upper() == "JSON":
49-
return dtypes.JSON_ARROW_TYPE
50-
5146
if sql.upper().startswith("ARRAY<") and sql.endswith(">"):
5247
inner_type = sql[len("ARRAY<") : -1]
5348
return pa.list_(parse_sql_type(inner_type))

bigframes/series.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,6 @@ def astype(
610610
if errors not in ["raise", "null"]:
611611
raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")
612612
dtype = bigframes.dtypes.bigframes_type(dtype)
613-
614613
return self._apply_unary_op(
615614
bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))
616615
)

tests/system/small/test_dataframe.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6142,15 +6142,3 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
61426142

61436143
with pytest.raises(KeyError):
61446144
bf_df.agg(agg_funcs)
6145-
6146-
6147-
def test_to_pandas_batches_with_json_columns(session):
6148-
"""Test that JSON columns are properly handled in to_pandas_batches."""
6149-
# Create a DataFrame with JSON column
6150-
df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
6151-
6152-
# This should not raise an error
6153-
batches = df._to_pandas_batches(page_size=10)
6154-
next(batches)
6155-
6156-
# TODO

tests/system/small/test_series.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4077,6 +4077,7 @@ def test_json_astype_others(data, to_type, errors):
40774077
pytest.param(["10.2", None], dtypes.INT_DTYPE, id="to_int"),
40784078
pytest.param(["false", None], dtypes.FLOAT_DTYPE, id="to_float"),
40794079
pytest.param(["10.2", None], dtypes.BOOL_DTYPE, id="to_bool"),
4080+
pytest.param(["true", None], dtypes.STRING_DTYPE, id="to_string"),
40804081
],
40814082
)
40824083
def test_json_astype_others_raise_error(data, to_type):

tests/unit/test_dataframe.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -181,26 +181,3 @@ def test_dataframe_ai_property_future_warning(
181181

182182
with pytest.warns(FutureWarning):
183183
dataframe.ai
184-
185-
186-
@pytest.fixture()
187-
def json_df(polars_session: bigframes.session.Session) -> bigframes.dataframe.DataFrame:
188-
"""Create a DataFrame with a JSON column for testing."""
189-
import bigframes.dtypes
190-
191-
pandas_df = pd.DataFrame(
192-
{
193-
"a": [1],
194-
"b": ['{"c": 2, "d": 3}'],
195-
}
196-
)
197-
pandas_df["b"] = pandas_df["b"].astype(bigframes.dtypes.JSON_DTYPE)
198-
return polars_session.read_pandas(pandas_df)
199-
200-
201-
def test_to_pandas_batches_with_json_column(json_df: bigframes.dataframe.DataFrame):
202-
"""Test that JSON columns are converted to strings in to_pandas_batches."""
203-
batches = list(json_df._to_pandas_batches(page_size=10))
204-
assert len(batches) > 0
205-
# Verify the JSON column is now string type
206-
assert batches[0]["b"].dtype == pd.StringDtype(storage="pyarrow")

0 commit comments

Comments
 (0)