googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 48 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎bigframes/_config/compute_options.py‎
Lines changed: 39 additions & 30 deletions b/‎bigframes/_config/compute_options.py‎
Lines changed: 39 additions & 30 deletions
diff --git a/‎bigframes/_config/display_options.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/_config/display_options.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/bigframe_node.py‎
Lines changed: 20 additions & 3 deletions b/‎bigframes/core/bigframe_node.py‎
Lines changed: 20 additions & 3 deletions
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 19 additions & 73 deletions b/‎bigframes/core/blocks.py‎
Lines changed: 19 additions & 73 deletions
diff --git a/‎bigframes/core/compile/compiler.py‎
Lines changed: 1 addition & 0 deletions b/‎bigframes/core/compile/compiler.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bigframes/core/compile/googlesql/query.py‎
Lines changed: 8 additions & 2 deletions b/‎bigframes/core/compile/googlesql/query.py‎
Lines changed: 8 additions & 2 deletions
@@ -4,6 +4,54 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.8.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.7.0...v2.8.0) (2025-06-23)
+
+
+### ⚠ BREAKING CHANGES
+
+* add required param 'engine' to multimodal functions ([#1834](https://github.com/googleapis/python-bigquery-dataframes/issues/1834))
+
+### Features
+
+* Add `bpd.options.compute.maximum_result_rows` option to limit client data download ([#1829](https://github.com/googleapis/python-bigquery-dataframes/issues/1829)) ([e22a3f6](https://github.com/googleapis/python-bigquery-dataframes/commit/e22a3f61a02cc1b7a5155556e5a07a1a2fea1d82))
+* Add `bpd.options.display.repr_mode = "anywidget"` to create an interactive display of the results ([#1820](https://github.com/googleapis/python-bigquery-dataframes/issues/1820)) ([be0a3cf](https://github.com/googleapis/python-bigquery-dataframes/commit/be0a3cf7711dadc68d8366ea90b99855773e2a2e))
+* Add DataFrame.ai.forecast() support ([#1828](https://github.com/googleapis/python-bigquery-dataframes/issues/1828)) ([7bc7f36](https://github.com/googleapis/python-bigquery-dataframes/commit/7bc7f36fc20d233f4cf5ed688cc5dcaf100ce4fb))
+* Add describe() method to Series ([#1827](https://github.com/googleapis/python-bigquery-dataframes/issues/1827)) ([a4205f8](https://github.com/googleapis/python-bigquery-dataframes/commit/a4205f882012820c034cb15d73b2768ec4ad3ac8))
+* Add required param 'engine' to multimodal functions ([#1834](https://github.com/googleapis/python-bigquery-dataframes/issues/1834)) ([37666e4](https://github.com/googleapis/python-bigquery-dataframes/commit/37666e4c137d52c28ab13477dfbcc6e92b913334))
+
+
+### Performance Improvements
+
+* Produce simpler sql ([#1836](https://github.com/googleapis/python-bigquery-dataframes/issues/1836)) ([cf9c22a](https://github.com/googleapis/python-bigquery-dataframes/commit/cf9c22a09c4e668a598fa1dad0f6a07b59bc6524))
+
+
+### Documentation
+
+* Add ai.forecast notebook ([#1840](https://github.com/googleapis/python-bigquery-dataframes/issues/1840)) ([2430497](https://github.com/googleapis/python-bigquery-dataframes/commit/24304972fdbdfd12c25c7f4ef5a7b280f334801a))
+
+## [2.7.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.6.0...v2.7.0) (2025-06-16)
+
+
+### Features
+
+* Add bbq.json_query_array and warn bbq.json_extract_array deprecated ([#1811](https://github.com/googleapis/python-bigquery-dataframes/issues/1811)) ([dc9eb27](https://github.com/googleapis/python-bigquery-dataframes/commit/dc9eb27fa75e90c2c95a0619551bf67aea6ef63b))
+* Add bbq.json_value_array and deprecate bbq.json_extract_string_array ([#1818](https://github.com/googleapis/python-bigquery-dataframes/issues/1818)) ([019051e](https://github.com/googleapis/python-bigquery-dataframes/commit/019051e453d81769891aa398475ebd04d1826e81))
+* Add groupby cumcount ([#1798](https://github.com/googleapis/python-bigquery-dataframes/issues/1798)) ([18f43e8](https://github.com/googleapis/python-bigquery-dataframes/commit/18f43e8b58e03a27b021bce07566a3d006ac3679))
+* Support custom build service account in `remote_function` ([#1796](https://github.com/googleapis/python-bigquery-dataframes/issues/1796)) ([e586151](https://github.com/googleapis/python-bigquery-dataframes/commit/e586151df81917b49f702ae496aaacbd02931636))
+
+
+### Bug Fixes
+
+* Correct read_csv behaviours with use_cols, names, index_col ([#1804](https://github.com/googleapis/python-bigquery-dataframes/issues/1804)) ([855031a](https://github.com/googleapis/python-bigquery-dataframes/commit/855031a316a6957731a5d1c5e59dedb9757d9f7a))
+* Fix single row broadcast with null index ([#1803](https://github.com/googleapis/python-bigquery-dataframes/issues/1803)) ([080eb7b](https://github.com/googleapis/python-bigquery-dataframes/commit/080eb7be3cde591e08cad0d5c52c68cc0b25ade8))
+
+
+### Documentation
+
+* Document how to use ai.map() for information extraction ([#1808](https://github.com/googleapis/python-bigquery-dataframes/issues/1808)) ([b586746](https://github.com/googleapis/python-bigquery-dataframes/commit/b5867464a5bf30300dcfc069eda546b11f03146c))
+* Rearrange README.rst to include a short code sample ([#1812](https://github.com/googleapis/python-bigquery-dataframes/issues/1812)) ([f6265db](https://github.com/googleapis/python-bigquery-dataframes/commit/f6265dbb8e22de81bb59c7def175cd325e85c041))
+* Use pandas API instead of pandas-like or pandas-compatible ([#1825](https://github.com/googleapis/python-bigquery-dataframes/issues/1825)) ([aa32369](https://github.com/googleapis/python-bigquery-dataframes/commit/aa323694e161f558bc5e60490c2f21008961e2ca))
+
 ## [2.6.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.5.0...v2.6.0) (2025-06-09)
 
 
 
@@ -55,29 +55,7 @@ class ComputeOptions:
         {'test2': 'abc', 'test3': False}
 
     Attributes:
-        maximum_bytes_billed (int, Options):
-            Limits the bytes billed for query jobs. Queries that will have
-            bytes billed beyond this limit will fail (without incurring a
-            charge). If unspecified, this will be set to your project default.
-            See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
-
-        enable_multi_query_execution (bool, Options):
-            If enabled, large queries may be factored into multiple smaller queries
-            in order to avoid generating queries that are too complex for the query
-            engine to handle. However this comes at the cost of increase cost and latency.
-
-        extra_query_labels (Dict[str, Any], Options):
-            Stores additional custom labels for query configuration.
-
-        semantic_ops_confirmation_threshold (int, optional):
-            .. deprecated:: 1.42.0
-                Semantic operators are deprecated. Please use AI operators instead
-
-        semantic_ops_threshold_autofail (bool):
-            .. deprecated:: 1.42.0
-                Semantic operators are deprecated. Please use AI operators instead
-
-        ai_ops_confirmation_threshold (int, optional):
+        ai_ops_confirmation_threshold (int | None):
             Guards against unexpected processing of large amount of rows by semantic operators.
             If the number of rows exceeds the threshold, the user will be asked to confirm
             their operations to resume. The default value is 0. Set the value to None
@@ -87,26 +65,57 @@ class ComputeOptions:
             Guards against unexpected processing of large amount of rows by semantic operators.
             When set to True, the operation automatically fails without asking for user inputs.
 
-        allow_large_results (bool):
+        allow_large_results (bool | None):
             Specifies whether query results can exceed 10 GB. Defaults to False. Setting this
             to False (the default) restricts results to 10 GB for potentially faster execution;
             BigQuery will raise an error if this limit is exceeded. Setting to True removes
             this result size limit.
+
+        enable_multi_query_execution (bool | None):
+            If enabled, large queries may be factored into multiple smaller queries
+            in order to avoid generating queries that are too complex for the query
+            engine to handle. However this comes at the cost of increase cost and latency.
+
+        extra_query_labels (Dict[str, Any] | None):
+            Stores additional custom labels for query configuration.
+
+        maximum_bytes_billed (int | None):
+            Limits the bytes billed for query jobs. Queries that will have
+            bytes billed beyond this limit will fail (without incurring a
+            charge). If unspecified, this will be set to your project default.
+            See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
+
+        maximum_result_rows (int | None):
+            Limits the number of rows in an execution result. When converting
+            a BigQuery DataFrames object to a pandas DataFrame or Series (e.g.,
+            using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct
+            iteration), the data is downloaded from BigQuery to the client
+            machine. This option restricts the number of rows that can be
+            downloaded.  If the number of rows to be downloaded exceeds this
+            limit, a ``bigframes.exceptions.MaximumResultRowsExceeded``
+            exception is raised.
+
+        semantic_ops_confirmation_threshold (int | None):
+            .. deprecated:: 1.42.0
+                Semantic operators are deprecated. Please use AI operators instead
+
+        semantic_ops_threshold_autofail (bool):
+            .. deprecated:: 1.42.0
+                Semantic operators are deprecated. Please use AI operators instead
     """
 
-    maximum_bytes_billed: Optional[int] = None
+    ai_ops_confirmation_threshold: Optional[int] = 0
+    ai_ops_threshold_autofail: bool = False
+    allow_large_results: Optional[bool] = None
     enable_multi_query_execution: bool = False
     extra_query_labels: Dict[str, Any] = dataclasses.field(
         default_factory=dict, init=False
     )
+    maximum_bytes_billed: Optional[int] = None
+    maximum_result_rows: Optional[int] = None
     semantic_ops_confirmation_threshold: Optional[int] = 0
     semantic_ops_threshold_autofail = False
 
-    ai_ops_confirmation_threshold: Optional[int] = 0
-    ai_ops_threshold_autofail: bool = False
-
-    allow_large_results: Optional[bool] = None
-
     def assign_extra_query_labels(self, **kwargs: Any) -> None:
         """
         Assigns additional custom labels for query configuration. The method updates the
 
@@ -29,7 +29,7 @@ class DisplayOptions:
     max_columns: int = 20
     max_rows: int = 25
     progress_bar: Optional[str] = "auto"
-    repr_mode: Literal["head", "deferred"] = "head"
+    repr_mode: Literal["head", "deferred", "anywidget"] = "head"
 
     max_info_columns: int = 100
     max_info_rows: Optional[int] = 200000
 
@@ -20,9 +20,19 @@
 import functools
 import itertools
 import typing
-from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Set, Tuple
-
-from bigframes.core import field, identifiers
+from typing import (
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    Mapping,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+)
+
+from bigframes.core import expression, field, identifiers
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
@@ -278,6 +288,13 @@ def _dtype_lookup(self) -> dict[identifiers.ColumnId, bigframes.dtypes.Dtype]:
     def field_by_id(self) -> Mapping[identifiers.ColumnId, field.Field]:
         return {field.id: field for field in self.fields}
 
+    @property
+    def _node_expressions(
+        self,
+    ) -> Sequence[Union[expression.Expression, expression.Aggregation]]:
+        """List of scalar expressions. Intended for checking engine compatibility with used ops."""
+        return ()
+
     # Plan algorithms
     def unique_nodes(
         self: BigFrameNode,
 
@@ -36,8 +36,7 @@
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas as pd
-# pyarrow is imported below where needed, but not at top-level if only used for type hints by Session
-# import pyarrow as pa
+import pyarrow as pa
 
 from bigframes import session
 from bigframes._config import sampling_options
@@ -160,89 +159,38 @@ def __init__(
     @classmethod
     def from_local(
         cls,
-        data: Union[pd.DataFrame, pd.Series],
+        data: pd.DataFrame,
         session: bigframes.Session,
         *,
         cache_transpose: bool = True,
     ) -> Block:
-        # Assumes caller has already converted datatypes to bigframes ones where appropriate (e.g. for pandas inputs)
-        index_cols: typing.Sequence[str]
-        value_cols: typing.Sequence[str]
-        index_names: typing.Sequence[typing.Optional[Label]]
-        column_names: pd.Index
-        managed_data: local_data.ManagedArrowTable
-        array_value_column_ids: typing.Sequence[str]
-
-
-        if isinstance(data, pd.Series):
-            # Standardize column names to avoid collisions, eg. index named "value" and series also named "value"
-            original_index_names = list(name if name is not None else f"level_{i}" for i, name in enumerate(data.index.names))
-            original_series_name = data.name if data.name is not None else "value"
-
-            # Ensure series name doesn't clash with index names
-            series_name_std = utils.get_standardized_id(original_series_name)
-            index_names_std = [utils.get_standardized_id(name) for name in original_index_names]
-            while series_name_std in index_names_std:
-                series_name_std = series_name_std + "_series"
-
-            value_cols = [series_name_std]
-            index_cols = index_names_std
-
-            pd_data_reset = data.rename(series_name_std).reset_index(names=index_names_std)
-            managed_data = local_data.ManagedArrowTable.from_pandas(pd_data_reset)
-            index_names = list(data.index.names)
-            column_names = pd.Index([data.name])
-            array_value_column_ids = [*index_cols, *value_cols]
-
-        elif isinstance(data, pd.DataFrame):
-            original_index_names = list(name if name is not None else f"level_{i}" for i, name in enumerate(data.index.names))
-            original_column_names = list(data.columns)
-
-            # Standardize all names
-            index_names_std = [utils.get_standardized_id(name) for name in original_index_names]
-            column_names_std = [utils.get_standardized_id(name) for name in original_column_names]
-
-            # Resolve clashes between index and column names after standardization
-            final_column_names_std = []
-            for name_std in column_names_std:
-                temp_name_std = name_std
-                while temp_name_std in index_names_std:
-                    temp_name_std = temp_name_std + "_col"
-                final_column_names_std.append(temp_name_std)
-
-            value_cols = final_column_names_std
-            index_cols = index_names_std
-
-            pd_data_prepared = data.copy(deep=False)
-            pd_data_prepared.columns = value_cols
-            pd_data_prepared = pd_data_prepared.reset_index(names=index_cols)
-
-            managed_data = local_data.ManagedArrowTable.from_pandas(pd_data_prepared)
-            index_names = list(data.index.names)
-            column_names = data.columns.copy()
-            array_value_column_ids = [*index_cols, *value_cols]
-        else:
-            raise TypeError(
-                f"data must be pandas DataFrame or Series. Got: {type(data)}"
-            )
-
-        array_value = core.ArrayValue.from_managed(managed_data, session=session, default_column_ids=array_value_column_ids)
-
+        # Assumes caller has already converted datatypes to bigframes ones.
+        pd_data = data
+        column_labels = pd_data.columns
+        index_labels = list(pd_data.index.names)
+
+        # unique internal ids
+        column_ids = [f"column_{i}" for i in range(len(pd_data.columns))]
+        index_ids = [f"level_{level}" for level in range(pd_data.index.nlevels)]
+
+        pd_data = pd_data.set_axis(column_ids, axis=1)
+        pd_data = pd_data.reset_index(names=index_ids)
+        managed_data = local_data.ManagedArrowTable.from_pandas(pd_data)
+        array_value = core.ArrayValue.from_managed(managed_data, session=session)
         block = cls(
             array_value,
-            column_labels=column_names,
-            index_columns=index_cols,
-            index_labels=index_names,
+            column_labels=column_labels,
+            index_columns=index_ids,
+            index_labels=index_labels,
         )
-
         if cache_transpose:
             try:
                 # this cache will help when aligning on axis=1
                 block = block.with_transpose_cache(
                     cls.from_local(data.T, session, cache_transpose=False)
                 )
             except Exception:
-                pass # Transposition might fail for various reasons, non-critical.
+                pass
         return block
 
     @property
@@ -3397,5 +3345,3 @@ def _pd_index_to_array_value(
         rows.append(row)
 
     return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
-
-[end of bigframes/core/blocks.py]
@@ -65,6 +65,7 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     ordering: Optional[bf_ordering.RowOrdering] = result_node.order_by
     result_node = dataclasses.replace(result_node, order_by=None)
     result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
+    result_node = cast(nodes.ResultNode, rewrites.defer_selection(result_node))
     sql = compile_result_node(result_node)
     # Return the ordering iff no extra columns are needed to define the row order
     if ordering is not None:
 
@@ -83,7 +83,13 @@ def _select_field(self, field) -> SelectExpression:
             return SelectExpression(expression=expr.ColumnExpression(name=field))
 
         else:
-            alias = field[1] if (field[0] != field[1]) else None
+            alias = (
+                expr.AliasExpression(field[1])
+                if isinstance(field[1], str)
+                else field[1]
+                if (field[0] != field[1])
+                else None
+            )
             return SelectExpression(
                 expression=expr.ColumnExpression(name=field[0]), alias=alias
             )
@@ -119,7 +125,7 @@ def sql(self) -> str:
         return "\n".join(text)
 
 
-@dataclasses.dataclass
+@dataclasses.dataclass(frozen=True)
 class SelectExpression(abc.SQLSyntax):
     """This class represents `select_expression`."""