Skip to content

Commit db29dc6

Browse files
authored
Merge branch 'main' into migrate-coalesce-op-to-sqlglot
2 parents 9a06288 + 764e318 commit db29dc6

File tree

19 files changed

+288
-43
lines changed

19 files changed

+288
-43
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.28.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.27.0...v2.28.0) (2025-11-03)
8+
9+
10+
### Features
11+
12+
* Add bigframes.bigquery.st_simplify ([#2210](https://github.com/googleapis/python-bigquery-dataframes/issues/2210)) ([ecee2bc](https://github.com/googleapis/python-bigquery-dataframes/commit/ecee2bc6ada0bc968fc56ed7194dc8c043547e93))
13+
* Add Series.dt.day_name ([#2218](https://github.com/googleapis/python-bigquery-dataframes/issues/2218)) ([5e006e4](https://github.com/googleapis/python-bigquery-dataframes/commit/5e006e404b65c32e5b1d342ebfcfce59ee592c8c))
14+
* Polars engine supports std, var ([#2215](https://github.com/googleapis/python-bigquery-dataframes/issues/2215)) ([ef5e83a](https://github.com/googleapis/python-bigquery-dataframes/commit/ef5e83acedf005cbe1e6ad174bec523ac50517d7))
15+
* Support INFORMATION_SCHEMA views in `read_gbq` ([#1895](https://github.com/googleapis/python-bigquery-dataframes/issues/1895)) ([d97cafc](https://github.com/googleapis/python-bigquery-dataframes/commit/d97cafcb5921fca2351b18011b0e54e2631cc53d))
16+
* Support some python standard lib callables in apply/combine ([#2187](https://github.com/googleapis/python-bigquery-dataframes/issues/2187)) ([86a2756](https://github.com/googleapis/python-bigquery-dataframes/commit/86a27564b48b854a32b3d11cd2105aa0fa496279))
17+
18+
19+
### Bug Fixes
20+
21+
* Correct connection normalization in blob system tests ([#2222](https://github.com/googleapis/python-bigquery-dataframes/issues/2222)) ([a0e1e50](https://github.com/googleapis/python-bigquery-dataframes/commit/a0e1e50e47c758bdceb54d04180ed36b35cf2e35))
22+
* Improve error handling in blob operations ([#2194](https://github.com/googleapis/python-bigquery-dataframes/issues/2194)) ([d410046](https://github.com/googleapis/python-bigquery-dataframes/commit/d4100466612df0523d01ed01ca1e115dabd6ef45))
23+
* Resolve AttributeError in TableWidget and improve initialization ([#1937](https://github.com/googleapis/python-bigquery-dataframes/issues/1937)) ([4c4c9b1](https://github.com/googleapis/python-bigquery-dataframes/commit/4c4c9b14657b7cda1940ef39e7d4db20a9ff5308))
24+
25+
26+
### Documentation
27+
28+
* Update bq_dataframes_llm_output_schema.ipynb ([#2004](https://github.com/googleapis/python-bigquery-dataframes/issues/2004)) ([316ba9f](https://github.com/googleapis/python-bigquery-dataframes/commit/316ba9f557d792117d5a7845d7567498f78dd513))
29+
730
## [2.27.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.26.0...v2.27.0) (2025-10-24)
831

932

bigframes/core/blocks.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
import bigframes.operations.aggregations as agg_ops
6969
from bigframes.session import dry_runs, execution_spec
7070
from bigframes.session import executor as executors
71+
from bigframes.session._io import pandas as io_pandas
7172

7273
# Type constraint for wherever column labels are used
7374
Label = typing.Hashable
@@ -711,12 +712,15 @@ def to_pandas_batches(
711712
# To reduce the number of edge cases to consider when working with the
712713
# results of this, always return at least one DataFrame. See:
713714
# b/428918844.
714-
empty_val = pd.DataFrame(
715-
{
716-
col: pd.Series([], dtype=self.expr.get_column_type(col))
717-
for col in itertools.chain(self.value_columns, self.index_columns)
718-
}
719-
)
715+
try:
716+
empty_arrow_table = self.expr.schema.to_pyarrow().empty_table()
717+
except pa.ArrowNotImplementedError:
718+
# Bug with some pyarrow versions(https://github.com/apache/arrow/issues/45262),
719+
# empty_table only supports base storage types, not extension types.
720+
empty_arrow_table = self.expr.schema.to_pyarrow(
721+
use_storage_types=True
722+
).empty_table()
723+
empty_val = io_pandas.arrow_to_pandas(empty_arrow_table, self.expr.schema)
720724
dfs = map(
721725
lambda a: a[0],
722726
itertools.zip_longest(

bigframes/core/nodes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1627,7 +1627,7 @@ class ResultNode(UnaryNode):
16271627
# TODO: CTE definitions
16281628

16291629
def _validate(self):
1630-
for ref, name in self.output_cols:
1630+
for ref, _ in self.output_cols:
16311631
assert ref.id in self.child.ids
16321632

16331633
@property

bigframes/core/rewrite/identifiers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ def remap_variables(
5757
new_root = root.transform_children(lambda node: remapped_children[node])
5858

5959
# Step 3: Transform the current node using the mappings from its children.
60+
# "reversed" is required for InNode so that in case of a duplicate column ID,
61+
# the left child's mapping is the one that's kept.
6062
downstream_mappings: dict[identifiers.ColumnId, identifiers.ColumnId] = {
61-
k: v for mapping in new_child_mappings for k, v in mapping.items()
63+
k: v for mapping in reversed(new_child_mappings) for k, v in mapping.items()
6264
}
6365
if isinstance(new_root, nodes.InNode):
6466
new_root = typing.cast(nodes.InNode, new_root)

bigframes/functions/function_typing.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,22 @@ class UnsupportedTypeError(ValueError):
6060
def __init__(self, type_, supported_types):
6161
self.type = type_
6262
self.supported_types = supported_types
63+
64+
types_to_format = supported_types
65+
if isinstance(supported_types, dict):
66+
types_to_format = supported_types.keys()
67+
68+
supported_types_str = ", ".join(
69+
sorted(
70+
[
71+
getattr(supported, "__name__", supported)
72+
for supported in types_to_format
73+
]
74+
)
75+
)
76+
6377
super().__init__(
64-
f"'{type_}' must be one of the supported types ({supported_types}) "
78+
f"'{getattr(type_, '__name__', type_)}' must be one of the supported types ({supported_types_str}) "
6579
"or a list of one of those types."
6680
)
6781

bigframes/session/loader.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
import google.cloud.bigquery.table
4646
from google.cloud.bigquery_storage_v1 import types as bq_storage_types
4747
import pandas
48-
import pyarrow as pa
4948

5049
import bigframes._tools
5150
import bigframes._tools.strings
@@ -1307,22 +1306,6 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
13071306
return configuration
13081307

13091308

1310-
def _has_json_arrow_type(arrow_type: pa.DataType) -> bool:
1311-
"""
1312-
Searches recursively for JSON array type within a PyArrow DataType.
1313-
"""
1314-
if arrow_type == bigframes.dtypes.JSON_ARROW_TYPE:
1315-
return True
1316-
if pa.types.is_list(arrow_type):
1317-
return _has_json_arrow_type(arrow_type.value_type)
1318-
if pa.types.is_struct(arrow_type):
1319-
for i in range(arrow_type.num_fields):
1320-
if _has_json_arrow_type(arrow_type.field(i).type):
1321-
return True
1322-
return False
1323-
return False
1324-
1325-
13261309
def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
13271310
"""
13281311
Determines whether a datatype is supported by bq load jobs.
@@ -1339,7 +1322,9 @@ def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
13391322
if column_type == bigframes.dtypes.JSON_DTYPE:
13401323
return
13411324

1342-
if isinstance(column_type, pandas.ArrowDtype) and _has_json_arrow_type(
1325+
if isinstance(
1326+
column_type, pandas.ArrowDtype
1327+
) and bigframes.dtypes.contains_db_dtypes_json_arrow_type(
13431328
column_type.pyarrow_dtype
13441329
):
13451330
raise NotImplementedError(

bigframes/version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "2.27.0"
15+
__version__ = "2.28.0"
1616

1717
# {x-release-please-start-date}
18-
__release_date__ = "2025-10-24"
18+
__release_date__ = "2025-11-03"
1919
# {x-release-please-end}

tests/system/conftest.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,23 @@ def _hash_digest_file(hasher, filepath):
7070
hasher.update(chunk)
7171

7272

73+
@pytest.fixture(scope="session")
74+
def normalize_connection_id():
75+
"""Normalizes the connection ID by casefolding only the LOCATION component.
76+
77+
Connection format: PROJECT.LOCATION.CONNECTION_NAME
78+
Only LOCATION is case-insensitive; PROJECT and CONNECTION_NAME must be lowercase.
79+
"""
80+
81+
def normalize(connection_id: str) -> str:
82+
parts = connection_id.split(".")
83+
if len(parts) == 3:
84+
return f"{parts[0]}.{parts[1].casefold()}.{parts[2]}"
85+
return connection_id # Return unchanged if invalid format
86+
87+
return normalize
88+
89+
7390
@pytest.fixture(scope="session")
7491
def tokyo_location() -> str:
7592
return TOKYO_LOCATION

tests/system/large/blob/test_function.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def images_output_uris(images_output_folder: str) -> list[str]:
5252
]
5353

5454

55+
@pytest.mark.skip(reason="b/457416070")
5556
def test_blob_exif(
5657
bq_connection: str,
5758
session: bigframes.Session,
@@ -103,6 +104,7 @@ def test_blob_exif_verbose(
103104
assert content_series.dtype == dtypes.JSON_DTYPE
104105

105106

107+
@pytest.mark.skip(reason="b/457416070")
106108
def test_blob_image_blur_to_series(
107109
images_mm_df: bpd.DataFrame,
108110
bq_connection: str,
@@ -136,6 +138,7 @@ def test_blob_image_blur_to_series(
136138
assert not actual.blob.size().isna().any()
137139

138140

141+
@pytest.mark.skip(reason="b/457416070")
139142
def test_blob_image_blur_to_series_verbose(
140143
images_mm_df: bpd.DataFrame,
141144
bq_connection: str,
@@ -163,6 +166,7 @@ def test_blob_image_blur_to_series_verbose(
163166
assert not actual.blob.size().isna().any()
164167

165168

169+
@pytest.mark.skip(reason="b/457416070")
166170
def test_blob_image_blur_to_folder(
167171
images_mm_df: bpd.DataFrame,
168172
bq_connection: str,
@@ -195,6 +199,7 @@ def test_blob_image_blur_to_folder(
195199
assert not actual.blob.size().isna().any()
196200

197201

202+
@pytest.mark.skip(reason="b/457416070")
198203
def test_blob_image_blur_to_folder_verbose(
199204
images_mm_df: bpd.DataFrame,
200205
bq_connection: str,
@@ -254,6 +259,7 @@ def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connectio
254259
assert content_series.dtype == dtypes.BYTES_DTYPE
255260

256261

262+
@pytest.mark.skip(reason="b/457416070")
257263
def test_blob_image_resize_to_series(
258264
images_mm_df: bpd.DataFrame,
259265
bq_connection: str,
@@ -291,6 +297,7 @@ def test_blob_image_resize_to_series(
291297
assert not actual.blob.size().isna().any()
292298

293299

300+
@pytest.mark.skip(reason="b/457416070")
294301
def test_blob_image_resize_to_series_verbose(
295302
images_mm_df: bpd.DataFrame,
296303
bq_connection: str,
@@ -325,6 +332,7 @@ def test_blob_image_resize_to_series_verbose(
325332
assert not actual.blob.size().isna().any()
326333

327334

335+
@pytest.mark.skip(reason="b/457416070")
328336
def test_blob_image_resize_to_folder(
329337
images_mm_df: bpd.DataFrame,
330338
bq_connection: str,
@@ -358,6 +366,7 @@ def test_blob_image_resize_to_folder(
358366
assert not actual.blob.size().isna().any()
359367

360368

369+
@pytest.mark.skip(reason="b/457416070")
361370
def test_blob_image_resize_to_folder_verbose(
362371
images_mm_df: bpd.DataFrame,
363372
bq_connection: str,
@@ -420,6 +429,7 @@ def test_blob_image_resize_to_bq_verbose(
420429
assert content_series.dtype == dtypes.BYTES_DTYPE
421430

422431

432+
@pytest.mark.skip(reason="b/457416070")
423433
def test_blob_image_normalize_to_series(
424434
images_mm_df: bpd.DataFrame,
425435
bq_connection: str,
@@ -492,6 +502,7 @@ def test_blob_image_normalize_to_series_verbose(
492502
assert hasattr(content_series, "blob")
493503

494504

505+
@pytest.mark.skip(reason="b/457416070")
495506
def test_blob_image_normalize_to_folder(
496507
images_mm_df: bpd.DataFrame,
497508
bq_connection: str,
@@ -598,6 +609,7 @@ def test_blob_image_normalize_to_bq_verbose(
598609
assert content_series.dtype == dtypes.BYTES_DTYPE
599610

600611

612+
@pytest.mark.skip(reason="b/457416070")
601613
def test_blob_pdf_extract(
602614
pdf_mm_df: bpd.DataFrame,
603615
bq_connection: str,
@@ -633,6 +645,7 @@ def test_blob_pdf_extract(
633645
), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "
634646

635647

648+
@pytest.mark.skip(reason="b/457416070")
636649
def test_blob_pdf_extract_verbose(
637650
pdf_mm_df: bpd.DataFrame,
638651
bq_connection: str,
@@ -670,6 +683,7 @@ def test_blob_pdf_extract_verbose(
670683
), f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. "
671684

672685

686+
@pytest.mark.skip(reason="b/457416070")
673687
def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
674688
actual = (
675689
pdf_mm_df["pdf"]
@@ -709,6 +723,7 @@ def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
709723
), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "
710724

711725

726+
@pytest.mark.skip(reason="b/457416070")
712727
def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str):
713728
actual = (
714729
pdf_mm_df["pdf"]

tests/system/small/bigquery/test_ai.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def test_ai_if(session):
273273
assert result.dtype == dtypes.BOOL_DTYPE
274274

275275

276+
@pytest.mark.skip(reason="b/457416070")
276277
def test_ai_if_multi_model(session):
277278
df = session.from_glob_path(
278279
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
@@ -293,6 +294,7 @@ def test_ai_classify(session):
293294
assert result.dtype == dtypes.STRING_DTYPE
294295

295296

297+
@pytest.mark.skip(reason="b/457416070")
296298
def test_ai_classify_multi_model(session):
297299
df = session.from_glob_path(
298300
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"

0 commit comments

Comments
 (0)