Skip to content

Commit 25a1be0

Browse files
authored
Merge branch 'main' into output_schema
2 parents c0f71ef + 8804ada commit 25a1be0

File tree

55 files changed

+745
-324
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+745
-324
lines changed

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,38 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.18.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.17.0...v2.18.0) (2025-09-03)
8+
9+
10+
### ⚠ BREAKING CHANGES
11+
12+
* add `allow_large_results` option to `read_gbq_query`, aligning with `bpd.options.compute.allow_large_results` option ([#1935](https://github.com/googleapis/python-bigquery-dataframes/issues/1935))
13+
14+
### Features
15+
16+
* Add `allow_large_results` option to `read_gbq_query`, aligning with `bpd.options.compute.allow_large_results` option ([#1935](https://github.com/googleapis/python-bigquery-dataframes/issues/1935)) ([a7963fe](https://github.com/googleapis/python-bigquery-dataframes/commit/a7963fe57a0e141debf726f0bc7b0e953ebe9634))
17+
* Add parameter shuffle for ml.model_selection.train_test_split ([#2030](https://github.com/googleapis/python-bigquery-dataframes/issues/2030)) ([2c72c56](https://github.com/googleapis/python-bigquery-dataframes/commit/2c72c56fb5893eb01d5aec6273d11945c9c532c5))
18+
* Can pivot unordered, unindexed dataframe ([#2040](https://github.com/googleapis/python-bigquery-dataframes/issues/2040)) ([1a0f710](https://github.com/googleapis/python-bigquery-dataframes/commit/1a0f710ac11418fd71ab3373f3f6002fa581b180))
19+
* Local date accessor execution support ([#2034](https://github.com/googleapis/python-bigquery-dataframes/issues/2034)) ([7ac6fe1](https://github.com/googleapis/python-bigquery-dataframes/commit/7ac6fe16f7f2c09d2efac6ab813ec841c21baef8))
20+
* Support args in dataframe apply method ([#2026](https://github.com/googleapis/python-bigquery-dataframes/issues/2026)) ([164c481](https://github.com/googleapis/python-bigquery-dataframes/commit/164c4818bc4ff2990dca16b9f22a798f47e0a60b))
21+
* Support args in series apply method ([#2013](https://github.com/googleapis/python-bigquery-dataframes/issues/2013)) ([d9d725c](https://github.com/googleapis/python-bigquery-dataframes/commit/d9d725cfbc3dca9e66b460cae4084e25162f2acf))
22+
* Support callable for dataframe mask method ([#2020](https://github.com/googleapis/python-bigquery-dataframes/issues/2020)) ([9d4504b](https://github.com/googleapis/python-bigquery-dataframes/commit/9d4504be310d38b63515d67c0f60d2e48e68c7b5))
23+
* Support multi-column assignment for DataFrame ([#2028](https://github.com/googleapis/python-bigquery-dataframes/issues/2028)) ([ba0d23b](https://github.com/googleapis/python-bigquery-dataframes/commit/ba0d23b59c44ba5a46ace8182ad0e0cfc703b3ab))
24+
* Support string matching in local executor ([#2032](https://github.com/googleapis/python-bigquery-dataframes/issues/2032)) ([c0b54f0](https://github.com/googleapis/python-bigquery-dataframes/commit/c0b54f03849ee3115413670e690e68f3ef10f2ec))
25+
26+
27+
### Bug Fixes
28+
29+
* Fix scalar op lowering tree walk ([#2029](https://github.com/googleapis/python-bigquery-dataframes/issues/2029)) ([935af10](https://github.com/googleapis/python-bigquery-dataframes/commit/935af107ef98837fb2b81d72185d0b6a9e09fbcf))
30+
* Read_csv fails when check file size for wildcard gcs files ([#2019](https://github.com/googleapis/python-bigquery-dataframes/issues/2019)) ([b0d620b](https://github.com/googleapis/python-bigquery-dataframes/commit/b0d620bbe8227189bbdc2ba5a913b03c70575296))
31+
* Resolve the validation issue for other arg in dataframe where method ([#2042](https://github.com/googleapis/python-bigquery-dataframes/issues/2042)) ([8689199](https://github.com/googleapis/python-bigquery-dataframes/commit/8689199aa82212ed300fff592097093812e0290e))
32+
33+
34+
### Performance Improvements
35+
36+
* Improve axis=1 aggregation performance ([#2036](https://github.com/googleapis/python-bigquery-dataframes/issues/2036)) ([fbb2094](https://github.com/googleapis/python-bigquery-dataframes/commit/fbb209468297a8057d9d49c40e425c3bfdeb92bd))
37+
* Improve iter_nodes_topo performance using Kahn's algorithm ([#2038](https://github.com/googleapis/python-bigquery-dataframes/issues/2038)) ([3961637](https://github.com/googleapis/python-bigquery-dataframes/commit/39616374bba424996ebeb9a12096bfaf22660b44))
38+
739
## [2.17.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.16.0...v2.17.0) (2025-08-22)
840

941

bigframes/_config/display_options.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class DisplayOptions:
3535
progress_bar: Optional[str] = "auto"
3636
repr_mode: Literal["head", "deferred", "anywidget"] = "head"
3737

38+
max_colwidth: Optional[int] = 50
3839
max_info_columns: int = 100
3940
max_info_rows: Optional[int] = 200000
4041
memory_usage: bool = True
@@ -52,6 +53,8 @@ def pandas_repr(display_options: DisplayOptions):
5253
so that we don't override pandas behavior.
5354
"""
5455
with pd.option_context(
56+
"display.max_colwidth",
57+
display_options.max_colwidth,
5558
"display.max_columns",
5659
display_options.max_columns,
5760
"display.max_rows",

bigframes/bigquery/__init__.py

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
such as array functions:
1717
https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """
1818

19+
import sys
20+
1921
from bigframes.bigquery._operations.approx_agg import approx_top_count
2022
from bigframes.bigquery._operations.array import (
2123
array_agg,
@@ -52,43 +54,51 @@
5254
from bigframes.bigquery._operations.search import create_vector_index, vector_search
5355
from bigframes.bigquery._operations.sql import sql_scalar
5456
from bigframes.bigquery._operations.struct import struct
57+
from bigframes.core import log_adapter
5558

56-
__all__ = [
59+
_functions = [
5760
# approximate aggregate ops
58-
"approx_top_count",
61+
approx_top_count,
5962
# array ops
60-
"array_agg",
61-
"array_length",
62-
"array_to_string",
63+
array_agg,
64+
array_length,
65+
array_to_string,
6366
# datetime ops
64-
"unix_micros",
65-
"unix_millis",
66-
"unix_seconds",
67+
unix_micros,
68+
unix_millis,
69+
unix_seconds,
6770
# geo ops
68-
"st_area",
69-
"st_buffer",
70-
"st_centroid",
71-
"st_convexhull",
72-
"st_difference",
73-
"st_distance",
74-
"st_intersection",
75-
"st_isclosed",
76-
"st_length",
71+
st_area,
72+
st_buffer,
73+
st_centroid,
74+
st_convexhull,
75+
st_difference,
76+
st_distance,
77+
st_intersection,
78+
st_isclosed,
79+
st_length,
7780
# json ops
78-
"json_extract",
79-
"json_extract_array",
80-
"json_extract_string_array",
81-
"json_query",
82-
"json_query_array",
83-
"json_set",
84-
"json_value",
85-
"json_value_array",
86-
"parse_json",
81+
json_extract,
82+
json_extract_array,
83+
json_extract_string_array,
84+
json_query,
85+
json_query_array,
86+
json_set,
87+
json_value,
88+
json_value_array,
89+
parse_json,
8790
# search ops
88-
"create_vector_index",
89-
"vector_search",
91+
create_vector_index,
92+
vector_search,
9093
# sql ops
91-
"sql_scalar",
94+
sql_scalar,
9295
# struct ops
93-
"struct",
96+
struct,
9497
]
98+
99+
__all__ = [f.__name__ for f in _functions]
100+
101+
_module = sys.modules[__name__]
102+
for f in _functions:
103+
_decorated_object = log_adapter.method_logger(f, custom_base_name="bigquery")
104+
setattr(_module, f.__name__, _decorated_object)

bigframes/bigquery/_operations/search.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import google.cloud.bigquery as bigquery
2222

23-
import bigframes.core.sql
2423
import bigframes.ml.utils as utils
2524

2625
if typing.TYPE_CHECKING:

bigframes/bigquery/_operations/sql.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
import google.cloud.bigquery
2222

2323
import bigframes.core.compile.sqlglot.sqlglot_ir as sqlglot_ir
24-
import bigframes.core.sql
25-
import bigframes.dataframe
2624
import bigframes.dtypes
2725
import bigframes.operations
2826
import bigframes.series

bigframes/core/agg_expressions.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import abc
18+
import dataclasses
19+
import functools
20+
import itertools
21+
import typing
22+
from typing import Callable, Mapping, TypeVar
23+
24+
from bigframes import dtypes
25+
from bigframes.core import expression
26+
import bigframes.core.identifiers as ids
27+
import bigframes.operations.aggregations as agg_ops
28+
29+
TExpression = TypeVar("TExpression", bound="Aggregation")
30+
31+
32+
@dataclasses.dataclass(frozen=True)
33+
class Aggregation(expression.Expression):
34+
"""Represents windowing or aggregation over a column."""
35+
36+
op: agg_ops.WindowOp = dataclasses.field()
37+
38+
@property
39+
def column_references(self) -> typing.Tuple[ids.ColumnId, ...]:
40+
return tuple(
41+
itertools.chain.from_iterable(
42+
map(lambda x: x.column_references, self.inputs)
43+
)
44+
)
45+
46+
@functools.cached_property
47+
def is_resolved(self) -> bool:
48+
return all(input.is_resolved for input in self.inputs)
49+
50+
@functools.cached_property
51+
def output_type(self) -> dtypes.ExpressionType:
52+
if not self.is_resolved:
53+
raise ValueError(f"Type of expression {self.op} has not been fixed.")
54+
55+
input_types = [input.output_type for input in self.inputs]
56+
57+
return self.op.output_type(*input_types)
58+
59+
@property
60+
@abc.abstractmethod
61+
def inputs(
62+
self,
63+
) -> typing.Tuple[expression.Expression, ...]:
64+
...
65+
66+
@property
67+
def free_variables(self) -> typing.Tuple[str, ...]:
68+
return tuple(
69+
itertools.chain.from_iterable(map(lambda x: x.free_variables, self.inputs))
70+
)
71+
72+
@property
73+
def is_const(self) -> bool:
74+
return all(child.is_const for child in self.inputs)
75+
76+
@abc.abstractmethod
77+
def replace_args(self: TExpression, *arg) -> TExpression:
78+
...
79+
80+
def transform_children(
81+
self: TExpression, t: Callable[[expression.Expression], expression.Expression]
82+
) -> TExpression:
83+
return self.replace_args(*(t(arg) for arg in self.inputs))
84+
85+
def bind_variables(
86+
self: TExpression,
87+
bindings: Mapping[str, expression.Expression],
88+
allow_partial_bindings: bool = False,
89+
) -> TExpression:
90+
return self.transform_children(
91+
lambda x: x.bind_variables(bindings, allow_partial_bindings)
92+
)
93+
94+
def bind_refs(
95+
self: TExpression,
96+
bindings: Mapping[ids.ColumnId, expression.Expression],
97+
allow_partial_bindings: bool = False,
98+
) -> TExpression:
99+
return self.transform_children(
100+
lambda x: x.bind_refs(bindings, allow_partial_bindings)
101+
)
102+
103+
104+
@dataclasses.dataclass(frozen=True)
105+
class NullaryAggregation(Aggregation):
106+
op: agg_ops.NullaryWindowOp = dataclasses.field()
107+
108+
@property
109+
def inputs(
110+
self,
111+
) -> typing.Tuple[expression.Expression, ...]:
112+
return ()
113+
114+
def replace_args(self, *arg) -> NullaryAggregation:
115+
return self
116+
117+
118+
@dataclasses.dataclass(frozen=True)
119+
class UnaryAggregation(Aggregation):
120+
op: agg_ops.UnaryWindowOp
121+
arg: expression.Expression
122+
123+
@property
124+
def inputs(
125+
self,
126+
) -> typing.Tuple[expression.Expression, ...]:
127+
return (self.arg,)
128+
129+
def replace_args(self, arg: expression.Expression) -> UnaryAggregation:
130+
return UnaryAggregation(
131+
self.op,
132+
arg,
133+
)
134+
135+
136+
@dataclasses.dataclass(frozen=True)
137+
class BinaryAggregation(Aggregation):
138+
op: agg_ops.BinaryAggregateOp = dataclasses.field()
139+
left: expression.Expression = dataclasses.field()
140+
right: expression.Expression = dataclasses.field()
141+
142+
@property
143+
def inputs(
144+
self,
145+
) -> typing.Tuple[expression.Expression, ...]:
146+
return (self.left, self.right)
147+
148+
def replace_args(
149+
self, larg: expression.Expression, rarg: expression.Expression
150+
) -> BinaryAggregation:
151+
return BinaryAggregation(self.op, larg, rarg)

bigframes/core/array_value.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import pandas
2525
import pyarrow as pa
2626

27+
from bigframes.core import agg_expressions
2728
import bigframes.core.expression as ex
2829
import bigframes.core.guid
2930
import bigframes.core.identifiers as ids
@@ -190,7 +191,7 @@ def row_count(self) -> ArrayValue:
190191
child=self.node,
191192
aggregations=(
192193
(
193-
ex.NullaryAggregation(agg_ops.size_op),
194+
agg_expressions.NullaryAggregation(agg_ops.size_op),
194195
ids.ColumnId(bigframes.core.guid.generate_guid()),
195196
),
196197
),
@@ -379,7 +380,7 @@ def drop_columns(self, columns: Iterable[str]) -> ArrayValue:
379380

380381
def aggregate(
381382
self,
382-
aggregations: typing.Sequence[typing.Tuple[ex.Aggregation, str]],
383+
aggregations: typing.Sequence[typing.Tuple[agg_expressions.Aggregation, str]],
383384
by_column_ids: typing.Sequence[str] = (),
384385
dropna: bool = True,
385386
) -> ArrayValue:
@@ -420,15 +421,15 @@ def project_window_op(
420421
"""
421422

422423
return self.project_window_expr(
423-
ex.UnaryAggregation(op, ex.deref(column_name)),
424+
agg_expressions.UnaryAggregation(op, ex.deref(column_name)),
424425
window_spec,
425426
never_skip_nulls,
426427
skip_reproject_unsafe,
427428
)
428429

429430
def project_window_expr(
430431
self,
431-
expression: ex.Aggregation,
432+
expression: agg_expressions.Aggregation,
432433
window: WindowSpec,
433434
never_skip_nulls=False,
434435
skip_reproject_unsafe: bool = False,

bigframes/core/bigframe_node.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,12 @@
2020
import functools
2121
import itertools
2222
import typing
23-
from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Tuple, Union
23+
from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Tuple
2424

2525
from bigframes.core import expression, field, identifiers
2626
import bigframes.core.schema as schemata
2727
import bigframes.dtypes
2828

29-
if typing.TYPE_CHECKING:
30-
import bigframes.session
31-
3229
COLUMN_SET = frozenset[identifiers.ColumnId]
3330

3431
T = typing.TypeVar("T")
@@ -281,8 +278,8 @@ def field_by_id(self) -> Mapping[identifiers.ColumnId, field.Field]:
281278
@property
282279
def _node_expressions(
283280
self,
284-
) -> Sequence[Union[expression.Expression, expression.Aggregation]]:
285-
"""List of scalar expressions. Intended for checking engine compatibility with used ops."""
281+
) -> Sequence[expression.Expression]:
282+
"""List of expressions. Intended for checking engine compatibility with used ops."""
286283
return ()
287284

288285
# Plan algorithms

0 commit comments

Comments
 (0)