Skip to content

Commit 1377016

Browse files
Merge branch 'main' into better_uniform_sample
2 parents 0656401 + 798af4a commit 1377016

File tree

200 files changed

+49485
-1798
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

200 files changed

+49485
-1798
lines changed

.github/ISSUE_TEMPLATE/bug_report.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,12 @@ import bigframes
2929
import google.cloud.bigquery
3030
import pandas
3131
import pyarrow
32-
import sqlglot
3332

3433
print(f"Python: {sys.version}")
3534
print(f"bigframes=={bigframes.__version__}")
3635
print(f"google-cloud-bigquery=={google.cloud.bigquery.__version__}")
3736
print(f"pandas=={pandas.__version__}")
3837
print(f"pyarrow=={pyarrow.__version__}")
39-
print(f"sqlglot=={sqlglot.__version__}")
4038
```
4139

4240
#### Steps to reproduce

.librarian/state.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
22
libraries:
33
- id: bigframes
4-
version: 2.31.0
4+
version: 2.32.0
55
last_generated_commit: ""
66
apis: []
77
source_roots:

CHANGELOG.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,34 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.32.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.31.0...bigframes-v2.32.0) (2026-01-05)
8+
9+
10+
### Documentation
11+
12+
* generate sitemap.xml for better search indexing (#2351) ([7d2990f1c48c6d74e2af6bee3af87f90189a3d9b](https://github.com/googleapis/google-cloud-python/commit/7d2990f1c48c6d74e2af6bee3af87f90189a3d9b))
13+
* update supported pandas APIs documentation links (#2330) ([ea71936ce240b2becf21b552d4e41e8ef4418e2d](https://github.com/googleapis/google-cloud-python/commit/ea71936ce240b2becf21b552d4e41e8ef4418e2d))
14+
* Add time series analysis notebook (#2328) ([369f1c0aff29d197b577ec79e401b107985fe969](https://github.com/googleapis/google-cloud-python/commit/369f1c0aff29d197b577ec79e401b107985fe969))
15+
16+
17+
### Features
18+
19+
* Enable multi-column sorting in anywidget mode (#2360) ([1feb956e4762e30276e5b380c0633e6ed7881357](https://github.com/googleapis/google-cloud-python/commit/1feb956e4762e30276e5b380c0633e6ed7881357))
20+
* display series in anywidget mode (#2346) ([7395d418550058c516ad878e13567256f4300a37](https://github.com/googleapis/google-cloud-python/commit/7395d418550058c516ad878e13567256f4300a37))
21+
* Refactor TableWidget and to_pandas_batches (#2250) ([b8f09015a7c8e6987dc124e6df925d4f6951b1da](https://github.com/googleapis/google-cloud-python/commit/b8f09015a7c8e6987dc124e6df925d4f6951b1da))
22+
* Auto-plan complex reduction expressions (#2298) ([4d5de14ccdd05b1ac8f50c3fe71c35ab9e5150c1](https://github.com/googleapis/google-cloud-python/commit/4d5de14ccdd05b1ac8f50c3fe71c35ab9e5150c1))
23+
* Display custom single index column in anywidget mode (#2311) ([f27196260743883ed8131d5fd33a335e311177e4](https://github.com/googleapis/google-cloud-python/commit/f27196260743883ed8131d5fd33a335e311177e4))
24+
* add fit_predict method to ml unsupervised models (#2320) ([59df7f70a12ef702224ad61e597bd775208dac45](https://github.com/googleapis/google-cloud-python/commit/59df7f70a12ef702224ad61e597bd775208dac45))
25+
26+
27+
### Bug Fixes
28+
29+
* vendor sqlglot bigquery dialect and remove package dependency (#2354) ([b321d72d5eb005b6e9295541a002540f05f72209](https://github.com/googleapis/google-cloud-python/commit/b321d72d5eb005b6e9295541a002540f05f72209))
30+
* bigframes.ml fit with eval data in partial mode avoids join on null index (#2355) ([7171d21b8c8d5a2d61081f41fa1109b5c9c4bc5f](https://github.com/googleapis/google-cloud-python/commit/7171d21b8c8d5a2d61081f41fa1109b5c9c4bc5f))
31+
* Improve strictness of nan vs None usage (#2326) ([481d938fb0b840e17047bc4b57e61af15b976e54](https://github.com/googleapis/google-cloud-python/commit/481d938fb0b840e17047bc4b57e61af15b976e54))
32+
* Correct DataFrame widget rendering in Colab (#2319) ([7f1d3df3839ec58f52e48df088057fc0df967da9](https://github.com/googleapis/google-cloud-python/commit/7f1d3df3839ec58f52e48df088057fc0df967da9))
33+
* Fix pd.timedelta handling in polars comipler with polars 1.36 (#2325) ([252644826289d9db7a8548884de880b3a4fccafd](https://github.com/googleapis/google-cloud-python/commit/252644826289d9db7a8548884de880b3a4fccafd))
34+
735
## [2.31.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.30.0...bigframes-v2.31.0) (2025-12-10)
836

937

LICENSE

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,29 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
318318
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
319319
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
320320
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
321+
322+
---
323+
324+
Files: The bigframes_vendored.sqlglot module.
325+
326+
MIT License
327+
328+
Copyright (c) 2025 Toby Mao
329+
330+
Permission is hereby granted, free of charge, to any person obtaining a copy
331+
of this software and associated documentation files (the "Software"), to deal
332+
in the Software without restriction, including without limitation the rights
333+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
334+
copies of the Software, and to permit persons to whom the Software is
335+
furnished to do so, subject to the following conditions:
336+
337+
The above copyright notice and this permission notice shall be included in all
338+
copies or substantial portions of the Software.
339+
340+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
341+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
342+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
343+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
344+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
345+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
346+
SOFTWARE.

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ It also contains code derived from the following third-party packages:
8282
* `Python <https://www.python.org/>`_
8383
* `scikit-learn <https://scikit-learn.org/>`_
8484
* `XGBoost <https://xgboost.readthedocs.io/en/stable/>`_
85+
* `SQLGlot <https://sqlglot.com/sqlglot.html>`_
8586

8687
For details, see the `third_party
8788
<https://github.com/googleapis/python-bigquery-dataframes/tree/main/third_party/bigframes_vendored>`_

bigframes/bigquery/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6161
from bigframes.bigquery._operations.sql import sql_scalar
6262
from bigframes.bigquery._operations.struct import struct
63-
from bigframes.core import log_adapter
63+
from bigframes.core.logging import log_adapter
6464

6565
_functions = [
6666
# approximate aggregate ops

bigframes/bigquery/_operations/ai.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
from bigframes import clients, dataframe, dtypes
2727
from bigframes import pandas as bpd
2828
from bigframes import series, session
29-
from bigframes.core import convert, log_adapter
29+
from bigframes.core import convert
30+
from bigframes.core.logging import log_adapter
3031
from bigframes.ml import core as ml_core
3132
from bigframes.operations import ai_ops, output_schemas
3233

bigframes/bigquery/_operations/ml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import google.cloud.bigquery
2121
import pandas as pd
2222

23-
import bigframes.core.log_adapter as log_adapter
23+
import bigframes.core.logging.log_adapter as log_adapter
2424
import bigframes.core.sql.ml
2525
import bigframes.dataframe as dataframe
2626
import bigframes.ml.base

bigframes/core/block_transforms.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -625,21 +625,7 @@ def skew(
625625
# counts, moment3 for each column
626626
aggregations = []
627627
for col in original_columns:
628-
delta3_expr = _mean_delta_to_power(3, col)
629-
count_agg = agg_expressions.UnaryAggregation(
630-
agg_ops.count_op,
631-
ex.deref(col),
632-
)
633-
moment3_agg = agg_expressions.UnaryAggregation(
634-
agg_ops.mean_op,
635-
delta3_expr,
636-
)
637-
variance_agg = agg_expressions.UnaryAggregation(
638-
agg_ops.PopVarOp(),
639-
ex.deref(col),
640-
)
641-
skew_expr = _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
642-
aggregations.append(skew_expr)
628+
aggregations.append(skew_expr(ex.deref(col)))
643629

644630
block = block.aggregate(
645631
aggregations, grouping_column_ids, column_labels=column_labels
@@ -663,16 +649,7 @@ def kurt(
663649
# counts, moment4 for each column
664650
kurt_exprs = []
665651
for col in original_columns:
666-
delta_4_expr = _mean_delta_to_power(4, col)
667-
count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, ex.deref(col))
668-
moment4_agg = agg_expressions.UnaryAggregation(agg_ops.mean_op, delta_4_expr)
669-
variance_agg = agg_expressions.UnaryAggregation(
670-
agg_ops.PopVarOp(), ex.deref(col)
671-
)
672-
673-
# Corresponds to order of aggregations in preceding loop
674-
kurt_expr = _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
675-
kurt_exprs.append(kurt_expr)
652+
kurt_exprs.append(kurt_expr(ex.deref(col)))
676653

677654
block = block.aggregate(
678655
kurt_exprs, grouping_column_ids, column_labels=column_labels
@@ -686,13 +663,38 @@ def kurt(
686663
return block
687664

688665

666+
def skew_expr(expr: ex.Expression) -> ex.Expression:
667+
delta3_expr = _mean_delta_to_power(3, expr)
668+
count_agg = agg_expressions.UnaryAggregation(
669+
agg_ops.count_op,
670+
expr,
671+
)
672+
moment3_agg = agg_expressions.UnaryAggregation(
673+
agg_ops.mean_op,
674+
delta3_expr,
675+
)
676+
variance_agg = agg_expressions.UnaryAggregation(
677+
agg_ops.PopVarOp(),
678+
expr,
679+
)
680+
return _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
681+
682+
683+
def kurt_expr(expr: ex.Expression) -> ex.Expression:
684+
delta_4_expr = _mean_delta_to_power(4, expr)
685+
count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, expr)
686+
moment4_agg = agg_expressions.UnaryAggregation(agg_ops.mean_op, delta_4_expr)
687+
variance_agg = agg_expressions.UnaryAggregation(agg_ops.PopVarOp(), expr)
688+
return _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
689+
690+
689691
def _mean_delta_to_power(
690692
n_power: int,
691-
val_id: str,
693+
col_expr: ex.Expression,
692694
) -> ex.Expression:
693695
"""Calculate (x-mean(x))^n. Useful for calculating moment statistics such as skew and kurtosis."""
694-
mean_expr = agg_expressions.UnaryAggregation(agg_ops.mean_op, ex.deref(val_id))
695-
delta = ops.sub_op.as_expr(val_id, mean_expr)
696+
mean_expr = agg_expressions.UnaryAggregation(agg_ops.mean_op, col_expr)
697+
delta = ops.sub_op.as_expr(col_expr, mean_expr)
696698
return ops.pow_op.as_expr(delta, ex.const(n_power))
697699

698700

bigframes/core/bq_data.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable:
6464
else tuple(table.clustering_fields),
6565
)
6666

67+
@staticmethod
68+
def from_ref_and_schema(
69+
table_ref: bq.TableReference,
70+
schema: Sequence[bq.SchemaField],
71+
cluster_cols: Optional[Sequence[str]] = None,
72+
) -> GbqTable:
73+
return GbqTable(
74+
project_id=table_ref.project,
75+
dataset_id=table_ref.dataset_id,
76+
table_id=table_ref.table_id,
77+
physical_schema=tuple(schema),
78+
is_physically_stored=True,
79+
cluster_cols=tuple(cluster_cols) if cluster_cols else None,
80+
)
81+
6782
def get_table_ref(self) -> bq.TableReference:
6883
return bq.TableReference(
6984
bq.DatasetReference(self.project_id, self.dataset_id), self.table_id

0 commit comments

Comments
 (0)