googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 23 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 35 additions & 34 deletions b/‎bigframes/core/blocks.py‎
Lines changed: 35 additions & 34 deletions
diff --git a/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 4 additions & 2 deletions b/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎bigframes/core/compile/sqlglot/aggregations/op_registration.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/sqlglot/aggregations/op_registration.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 44 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎
Lines changed: 5 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/generic_ops.py‎
Lines changed: 8 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/expressions/generic_ops.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎bigframes/core/nodes.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/nodes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/rewrite/identifiers.py‎
Lines changed: 3 additions & 1 deletion b/‎bigframes/core/rewrite/identifiers.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 5 additions & 57 deletions b/‎bigframes/dataframe.py‎
Lines changed: 5 additions & 57 deletions
@@ -4,6 +4,29 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.28.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.27.0...v2.28.0) (2025-11-03)
+
+
+### Features
+
+* Add bigframes.bigquery.st_simplify ([#2210](https://github.com/googleapis/python-bigquery-dataframes/issues/2210)) ([ecee2bc](https://github.com/googleapis/python-bigquery-dataframes/commit/ecee2bc6ada0bc968fc56ed7194dc8c043547e93))
+* Add Series.dt.day_name ([#2218](https://github.com/googleapis/python-bigquery-dataframes/issues/2218)) ([5e006e4](https://github.com/googleapis/python-bigquery-dataframes/commit/5e006e404b65c32e5b1d342ebfcfce59ee592c8c))
+* Polars engine supports std, var ([#2215](https://github.com/googleapis/python-bigquery-dataframes/issues/2215)) ([ef5e83a](https://github.com/googleapis/python-bigquery-dataframes/commit/ef5e83acedf005cbe1e6ad174bec523ac50517d7))
+* Support INFORMATION_SCHEMA views in `read_gbq` ([#1895](https://github.com/googleapis/python-bigquery-dataframes/issues/1895)) ([d97cafc](https://github.com/googleapis/python-bigquery-dataframes/commit/d97cafcb5921fca2351b18011b0e54e2631cc53d))
+* Support some python standard lib callables in apply/combine ([#2187](https://github.com/googleapis/python-bigquery-dataframes/issues/2187)) ([86a2756](https://github.com/googleapis/python-bigquery-dataframes/commit/86a27564b48b854a32b3d11cd2105aa0fa496279))
+
+
+### Bug Fixes
+
+* Correct connection normalization in blob system tests ([#2222](https://github.com/googleapis/python-bigquery-dataframes/issues/2222)) ([a0e1e50](https://github.com/googleapis/python-bigquery-dataframes/commit/a0e1e50e47c758bdceb54d04180ed36b35cf2e35))
+* Improve error handling in blob operations ([#2194](https://github.com/googleapis/python-bigquery-dataframes/issues/2194)) ([d410046](https://github.com/googleapis/python-bigquery-dataframes/commit/d4100466612df0523d01ed01ca1e115dabd6ef45))
+* Resolve AttributeError in TableWidget and improve initialization ([#1937](https://github.com/googleapis/python-bigquery-dataframes/issues/1937)) ([4c4c9b1](https://github.com/googleapis/python-bigquery-dataframes/commit/4c4c9b14657b7cda1940ef39e7d4db20a9ff5308))
+
+
+### Documentation
+
+* Update bq_dataframes_llm_output_schema.ipynb ([#2004](https://github.com/googleapis/python-bigquery-dataframes/issues/2004)) ([316ba9f](https://github.com/googleapis/python-bigquery-dataframes/commit/316ba9f557d792117d5a7845d7567498f78dd513))
+
 ## [2.27.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.26.0...v2.27.0) (2025-10-24)
 
 
 
@@ -68,6 +68,7 @@
 import bigframes.operations.aggregations as agg_ops
 from bigframes.session import dry_runs, execution_spec
 from bigframes.session import executor as executors
+from bigframes.session._io import pandas as io_pandas
 
 # Type constraint for wherever column labels are used
 Label = typing.Hashable
@@ -711,40 +712,15 @@ def to_pandas_batches(
         # To reduce the number of edge cases to consider when working with the
         # results of this, always return at least one DataFrame. See:
         # b/428918844.
-        empty_val = pd.DataFrame(
-            {
-                col: pd.Series([], dtype=self.expr.get_column_type(col))
-                for col in itertools.chain(self.value_columns, self.index_columns)
-            }
-        )
-        series_map = {}
-        for col in itertools.chain(self.value_columns, self.index_columns):
-            dtype = self.expr.get_column_type(col)
-            if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype):
-                # Due to a limitation in Apache Arrow (#45262), JSON columns are not
-                # natively supported by the to_pandas_batches() method, which is
-                # used by the anywidget backend.
-                # Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
-                # PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType,
-                # especially when nested.
-                # Create with string type and then cast.
-
-                # MyPy doesn't automatically narrow the type of 'dtype' here,
-                # so we add an explicit check.
-                if isinstance(dtype, pd.ArrowDtype):
-                    safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
-                        dtype.pyarrow_dtype
-                    )
-                    safe_dtype = pd.ArrowDtype(safe_pa_type)
-                    series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype)
-                else:
-                    # This branch should ideally not be reached if
-                    # contains_db_dtypes_json_dtype is accurate,
-                    # but it's here for MyPy's sake.
-                    series_map[col] = pd.Series([], dtype=dtype)
-            else:
-                series_map[col] = pd.Series([], dtype=dtype)
-        empty_val = pd.DataFrame(series_map)
+        try:
+            empty_arrow_table = self.expr.schema.to_pyarrow().empty_table()
+        except pa.ArrowNotImplementedError:
+            # Bug with some pyarrow versions(https://github.com/apache/arrow/issues/45262),
+            # empty_table only supports base storage types, not extension types.
+            empty_arrow_table = self.expr.schema.to_pyarrow(
+                use_storage_types=True
+            ).empty_table()
+        empty_val = io_pandas.arrow_to_pandas(empty_arrow_table, self.expr.schema)
         dfs = map(
             lambda a: a[0],
             itertools.zip_longest(
@@ -2020,6 +1996,31 @@ def _generate_resample_label(
             Literal["epoch", "start", "start_day", "end", "end_day"],
         ] = "start_day",
     ) -> Block:
+        if not isinstance(rule, str):
+            raise NotImplementedError(
+                f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
+            )
+
+        if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"):
+            raise NotImplementedError(
+                f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
+            )
+
+        if closed == "right":
+            raise NotImplementedError(
+                f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}",
+            )
+
+        if label == "right":
+            raise NotImplementedError(
+                f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}",
+            )
+
+        if origin not in ("epoch", "start", "start_day"):
+            raise NotImplementedError(
+                f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}"
+            )
+
         # Validate and resolve the index or column to use for grouping
         if on is None:
             if len(self.index_columns) == 0:
 
@@ -535,9 +535,11 @@ def compile_agg_op(
             if isinstance(op, agg_ops.StdOp):
                 return pl.std(inputs[0])
             if isinstance(op, agg_ops.VarOp):
-                return pl.var(inputs[0])
+                # polars var doesnt' support decimal, so use std instead
+                return pl.std(inputs[0]).pow(2)
             if isinstance(op, agg_ops.PopVarOp):
-                return pl.var(inputs[0], ddof=0)
+                # polars var doesnt' support decimal, so use std instead
+                return pl.std(inputs[0], ddof=0).pow(2)
             if isinstance(op, agg_ops.FirstNonNullOp):
                 return pl.col(*inputs).drop_nulls().first()
             if isinstance(op, agg_ops.LastNonNullOp):
 
@@ -52,5 +52,5 @@ def arg_checker(*args, **kwargs):
     def __getitem__(self, op: str | agg_ops.WindowOp) -> CompilationFunc:
         key = op if isinstance(op, type) else type(op)
         if str(key) not in self._registered_ops:
-            raise ValueError(f"{key} is already not registered")
+            raise ValueError(f"{key} is not registered")
         return self._registered_ops[str(key)]
@@ -239,6 +239,20 @@ def _(
     return apply_window_if_present(sge.func("MIN", column.expr), window)
 
 
+@UNARY_OP_REGISTRATION.register(agg_ops.PopVarOp)
+def _(
+    op: agg_ops.PopVarOp,
+    column: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    expr = column.expr
+    if column.dtype == dtypes.BOOL_DTYPE:
+        expr = sge.Cast(this=expr, to="INT64")
+
+    expr = sge.func("VAR_POP", expr)
+    return apply_window_if_present(expr, window)
+
+
 @UNARY_OP_REGISTRATION.register(agg_ops.QuantileOp)
 def _(
     op: agg_ops.QuantileOp,
@@ -278,6 +292,22 @@ def _(
     return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
 
 
+@UNARY_OP_REGISTRATION.register(agg_ops.StdOp)
+def _(
+    op: agg_ops.StdOp,
+    column: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    expr = column.expr
+    if column.dtype == dtypes.BOOL_DTYPE:
+        expr = sge.Cast(this=expr, to="INT64")
+
+    expr = sge.func("STDDEV", expr)
+    if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE:
+        expr = sge.Cast(this=sge.func("FLOOR", expr), to="INT64")
+    return apply_window_if_present(expr, window)
+
+
 @UNARY_OP_REGISTRATION.register(agg_ops.ShiftOp)
 def _(
     op: agg_ops.ShiftOp,
@@ -331,3 +361,17 @@ def _(
         expression=shifted,
         unit=sge.Identifier(this="MICROSECOND"),
     )
+
+
+@UNARY_OP_REGISTRATION.register(agg_ops.VarOp)
+def _(
+    op: agg_ops.VarOp,
+    column: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    expr = column.expr
+    if column.dtype == dtypes.BOOL_DTYPE:
+        expr = sge.Cast(this=expr, to="INT64")
+
+    expr = sge.func("VAR_SAMP", expr)
+    return apply_window_if_present(expr, window)
@@ -109,6 +109,11 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
     return sge.LTE(this=left_expr, expression=right_expr)
 
 
+@register_binary_op(ops.maximum_op)
+def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    return sge.Greatest(expressions=[left.expr, right.expr])
+
+
 @register_binary_op(ops.minimum_op)
 def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
     return sge.Least(this=left.expr, expressions=right.expr)
 
@@ -24,6 +24,7 @@
 import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
 
 register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
+register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
 register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
 register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op
 
@@ -159,6 +160,13 @@ def _(*cases_and_outputs: TypedExpr) -> sge.Expression:
     )
 
 
+@register_binary_op(ops.coalesce_op)
+def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    if left.expr == right.expr:
+        return left.expr
+    return sge.Coalesce(this=left.expr, expressions=[right.expr])
+
+
 @register_nary_op(ops.RowKey)
 def _(*values: TypedExpr) -> sge.Expression:
     # All inputs into hash must be non-null or resulting hash will be null
 
@@ -1627,7 +1627,7 @@ class ResultNode(UnaryNode):
     # TODO: CTE definitions
 
     def _validate(self):
-        for ref, name in self.output_cols:
+        for ref, _ in self.output_cols:
             assert ref.id in self.child.ids
 
     @property
 
@@ -57,8 +57,10 @@ def remap_variables(
     new_root = root.transform_children(lambda node: remapped_children[node])
 
     # Step 3: Transform the current node using the mappings from its children.
+    # "reversed" is required for InNode so that in case of a duplicate column ID,
+    # the left child's mapping is the one that's kept.
     downstream_mappings: dict[identifiers.ColumnId, identifiers.ColumnId] = {
-        k: v for mapping in new_child_mappings for k, v in mapping.items()
+        k: v for mapping in reversed(new_child_mappings) for k, v in mapping.items()
     }
     if isinstance(new_root, nodes.InNode):
         new_root = typing.cast(nodes.InNode, new_root)
 
@@ -4182,10 +4182,12 @@ def _split(
         return [DataFrame(block) for block in blocks]
 
     @validations.requires_ordering()
-    def _resample(
+    def resample(
         self,
         rule: str,
         *,
+        closed: Optional[Literal["right", "left"]] = None,
+        label: Optional[Literal["right", "left"]] = None,
         on: blocks.Label = None,
         level: Optional[LevelsType] = None,
         origin: Union[
@@ -4195,64 +4197,10 @@ def _resample(
             Literal["epoch", "start", "start_day", "end", "end_day"],
         ] = "start_day",
     ) -> bigframes.core.groupby.DataFrameGroupBy:
-        """Internal function to support resample. Resample time-series data.
-
-        **Examples:**
-
-        >>> import bigframes.pandas as bpd
-        >>> data = {
-        ...     "timestamp_col": pd.date_range(
-        ...         start="2021-01-01 13:00:00", periods=30, freq="1s"
-        ...     ),
-        ...     "int64_col": range(30),
-        ...     "int64_too": range(10, 40),
-        ... }
-
-        Resample on a DataFrame with index:
-
-        >>> df = bpd.DataFrame(data).set_index("timestamp_col")
-        >>> df._resample(rule="7s").min()
-                             int64_col  int64_too
-        2021-01-01 12:59:55          0         10
-        2021-01-01 13:00:02          2         12
-        2021-01-01 13:00:09          9         19
-        2021-01-01 13:00:16         16         26
-        2021-01-01 13:00:23         23         33
-        <BLANKLINE>
-        [5 rows x 2 columns]
-
-        Resample with column and origin set to 'start':
-
-        >>> df = bpd.DataFrame(data)
-        >>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
-                             int64_col  int64_too
-        2021-01-01 13:00:00          0         10
-        2021-01-01 13:00:07          7         17
-        2021-01-01 13:00:14         14         24
-        2021-01-01 13:00:21         21         31
-        2021-01-01 13:00:28         28         38
-        <BLANKLINE>
-        [5 rows x 2 columns]
-
-        Args:
-            rule (str):
-                The offset string representing target conversion.
-            on (str, default None):
-                For a DataFrame, column to use instead of index for resampling. Column
-                must be datetime-like.
-            level (str or int, default None):
-                For a MultiIndex, level (name or number) to use for resampling.
-                level must be datetime-like.
-            origin(str, default 'start_day'):
-                The timestamp on which to adjust the grouping. Must be one of the following:
-                'epoch': origin is 1970-01-01
-                'start': origin is the first value of the timeseries
-                'start_day': origin is the first day at midnight of the timeseries
-        Returns:
-            DataFrameGroupBy: DataFrameGroupBy object.
-        """
         block = self._block._generate_resample_label(
             rule=rule,
+            closed=closed,
+            label=label,
             on=on,
             level=level,
             origin=origin,