googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 23 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎README.rst‎
Lines changed: 2 additions & 1 deletion b/‎README.rst‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bigframes/bigquery/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎bigframes/bigquery/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bigframes/bigquery/_operations/json.py‎
Lines changed: 65 additions & 1 deletion b/‎bigframes/bigquery/_operations/json.py‎
Lines changed: 65 additions & 1 deletion
diff --git a/‎bigframes/core/bigframe_node.py‎
Lines changed: 20 additions & 3 deletions b/‎bigframes/core/bigframe_node.py‎
Lines changed: 20 additions & 3 deletions
diff --git a/‎bigframes/core/compile/scalar_op_compiler.py‎
Lines changed: 12 additions & 0 deletions b/‎bigframes/core/compile/scalar_op_compiler.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 33 additions & 5 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 33 additions & 5 deletions
diff --git a/‎bigframes/core/compile/sqlglot/scalar_compiler.py‎
Lines changed: 22 additions & 7 deletions b/‎bigframes/core/compile/sqlglot/scalar_compiler.py‎
Lines changed: 22 additions & 7 deletions
@@ -4,6 +4,29 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.7.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.6.0...v2.7.0) (2025-06-16)
+
+
+### Features
+
+* Add bbq.json_query_array and warn bbq.json_extract_array deprecated ([#1811](https://github.com/googleapis/python-bigquery-dataframes/issues/1811)) ([dc9eb27](https://github.com/googleapis/python-bigquery-dataframes/commit/dc9eb27fa75e90c2c95a0619551bf67aea6ef63b))
+* Add bbq.json_value_array and deprecate bbq.json_extract_string_array ([#1818](https://github.com/googleapis/python-bigquery-dataframes/issues/1818)) ([019051e](https://github.com/googleapis/python-bigquery-dataframes/commit/019051e453d81769891aa398475ebd04d1826e81))
+* Add groupby cumcount ([#1798](https://github.com/googleapis/python-bigquery-dataframes/issues/1798)) ([18f43e8](https://github.com/googleapis/python-bigquery-dataframes/commit/18f43e8b58e03a27b021bce07566a3d006ac3679))
+* Support custom build service account in `remote_function` ([#1796](https://github.com/googleapis/python-bigquery-dataframes/issues/1796)) ([e586151](https://github.com/googleapis/python-bigquery-dataframes/commit/e586151df81917b49f702ae496aaacbd02931636))
+
+
+### Bug Fixes
+
+* Correct read_csv behaviours with use_cols, names, index_col ([#1804](https://github.com/googleapis/python-bigquery-dataframes/issues/1804)) ([855031a](https://github.com/googleapis/python-bigquery-dataframes/commit/855031a316a6957731a5d1c5e59dedb9757d9f7a))
+* Fix single row broadcast with null index ([#1803](https://github.com/googleapis/python-bigquery-dataframes/issues/1803)) ([080eb7b](https://github.com/googleapis/python-bigquery-dataframes/commit/080eb7be3cde591e08cad0d5c52c68cc0b25ade8))
+
+
+### Documentation
+
+* Document how to use ai.map() for information extraction ([#1808](https://github.com/googleapis/python-bigquery-dataframes/issues/1808)) ([b586746](https://github.com/googleapis/python-bigquery-dataframes/commit/b5867464a5bf30300dcfc069eda546b11f03146c))
+* Rearrange README.rst to include a short code sample ([#1812](https://github.com/googleapis/python-bigquery-dataframes/issues/1812)) ([f6265db](https://github.com/googleapis/python-bigquery-dataframes/commit/f6265dbb8e22de81bb59c7def175cd325e85c041))
+* Use pandas API instead of pandas-like or pandas-compatible ([#1825](https://github.com/googleapis/python-bigquery-dataframes/issues/1825)) ([aa32369](https://github.com/googleapis/python-bigquery-dataframes/commit/aa323694e161f558bc5e60490c2f21008961e2ca))
+
 ## [2.6.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.5.0...v2.6.0) (2025-06-09)
 
 
 
@@ -6,7 +6,8 @@ BigQuery DataFrames (BigFrames)
 BigQuery DataFrames (also known as BigFrames) provides a Pythonic DataFrame
 and machine learning (ML) API powered by the BigQuery engine.
 
-* ``bigframes.pandas`` provides a pandas-compatible API for analytics.
+* `bigframes.pandas` provides a pandas API for analytics. Many workloads can be
+  migrated from pandas to bigframes by just changing a few imports.
 * ``bigframes.ml`` provides a scikit-learn-like API for ML.
 
 BigQuery DataFrames is an open-source package.
 
@@ -43,6 +43,7 @@
     json_query_array,
     json_set,
     json_value,
+    json_value_array,
     parse_json,
 )
 from bigframes.bigquery._operations.search import create_vector_index, vector_search
@@ -71,6 +72,7 @@
     "json_query_array",
     "json_set",
     "json_value",
+    "json_value_array",
     "parse_json",
     # search ops
     "create_vector_index",
 
@@ -196,6 +196,10 @@ def json_extract_string_array(
     values in the array. This function uses single quotes and brackets to escape
     invalid JSONPath characters in JSON keys.
 
+    .. deprecated:: 2.6.0
+        The ``json_extract_string_array`` is deprecated and will be removed in a future version.
+        Use ``json_value_array`` instead.
+
     **Examples:**
 
         >>> import bigframes.pandas as bpd
@@ -233,6 +237,11 @@ def json_extract_string_array(
     Returns:
         bigframes.series.Series: A new Series with the parsed arrays from the input.
     """
+    msg = (
+        "The `json_extract_string_array` is deprecated and will be removed in a future version. "
+        "Use `json_value_array` instead."
+    )
+    warnings.warn(bfe.format_message(msg), category=UserWarning)
     array_series = input._apply_unary_op(
         ops.JSONExtractStringArray(json_path=json_path)
     )
@@ -334,7 +343,7 @@ def json_query_array(
 
 def json_value(
     input: series.Series,
-    json_path: str,
+    json_path: str = "$",
 ) -> series.Series:
     """Extracts a JSON scalar value and converts it to a SQL ``STRING`` value. In
     addtion, this function:
@@ -366,6 +375,61 @@ def json_value(
     return input._apply_unary_op(ops.JSONValue(json_path=json_path))
 
 
+def json_value_array(
+    input: series.Series,
+    json_path: str = "$",
+) -> series.Series:
+    """
+    Extracts a JSON array of scalar values and converts it to a SQL ``ARRAY<STRING>``
+    value. In addition, this function:
+
+    - Removes the outermost quotes and unescapes the values.
+    - Returns a SQL ``NULL`` if the selected value isn't an array or not an array
+      containing only scalar values.
+    - Uses double quotes to escape invalid ``JSON_PATH`` characters in JSON keys.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]'])
+        >>> bbq.json_value_array(s)
+        0    ['1' '2' '3']
+        1        ['4' '5']
+        dtype: list<item: string>[pyarrow]
+
+        >>> s = bpd.Series([
+        ...   '{"fruits": ["apples", "oranges", "grapes"]',
+        ...   '{"fruits": ["guava", "grapes"]}'
+        ... ])
+        >>> bbq.json_value_array(s, "$.fruits")
+        0    ['apples' 'oranges' 'grapes']
+        1               ['guava' 'grapes']
+        dtype: list<item: string>[pyarrow]
+
+        >>> s = bpd.Series([
+        ...   '{"fruits": {"color": "red",   "names": ["apple","cherry"]}}',
+        ...   '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
+        ... ])
+        >>> bbq.json_value_array(s, "$.fruits.names")
+        0    ['apple' 'cherry']
+        1    ['guava' 'grapes']
+        dtype: list<item: string>[pyarrow]
+
+    Args:
+        input (bigframes.series.Series):
+            The Series containing JSON data (as native JSON objects or JSON-formatted strings).
+        json_path (str):
+            The JSON path identifying the data that you want to obtain from the input.
+
+    Returns:
+        bigframes.series.Series: A new Series with the parsed arrays from the input.
+    """
+    return input._apply_unary_op(ops.JSONValueArray(json_path=json_path))
+
+
 @utils.preview(name="The JSON-related API `parse_json`")
 def parse_json(
     input: series.Series,
 
@@ -20,9 +20,19 @@
 import functools
 import itertools
 import typing
-from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Set, Tuple
-
-from bigframes.core import field, identifiers
+from typing import (
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    Mapping,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+)
+
+from bigframes.core import expression, field, identifiers
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
@@ -278,6 +288,13 @@ def _dtype_lookup(self) -> dict[identifiers.ColumnId, bigframes.dtypes.Dtype]:
     def field_by_id(self) -> Mapping[identifiers.ColumnId, field.Field]:
         return {field.id: field for field in self.fields}
 
+    @property
+    def _node_expressions(
+        self,
+    ) -> Sequence[Union[expression.Expression, expression.Aggregation]]:
+        """List of scalar expressions. Intended for checking engine compatibility with used ops."""
+        return ()
+
     # Plan algorithms
     def unique_nodes(
         self: BigFrameNode,
 
@@ -1448,6 +1448,11 @@ def json_value_op_impl(x: ibis_types.Value, op: ops.JSONValue):
     return json_value(json_obj=x, json_path=op.json_path)
 
 
+@scalar_op_compiler.register_unary_op(ops.JSONValueArray, pass_op=True)
+def json_value_array_op_impl(x: ibis_types.Value, op: ops.JSONValueArray):
+    return json_value_array(json_obj=x, json_path=op.json_path)
+
+
 # Blob Ops
 @scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
 def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value):
@@ -2157,6 +2162,13 @@ def json_value(  # type: ignore[empty-body]
     """Retrieve value of a JSON field as plain STRING."""
 
 
+@ibis_udf.scalar.builtin(name="json_value_array")
+def json_value_array(  # type: ignore[empty-body]
+    json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.String
+) -> ibis_dtypes.Array[ibis_dtypes.String]:
+    """Extracts a JSON array and converts it to a SQL ARRAY of STRINGs."""
+
+
 @ibis_udf.scalar.builtin(name="INT64")
 def cast_json_to_int64(json_str: ibis_dtypes.JSON) -> ibis_dtypes.Int64:  # type: ignore[empty-body]
     """Converts a JSON number to a SQL INT64 value."""
 
@@ -119,15 +119,33 @@ def _remap_variables(self, node: nodes.ResultNode) -> nodes.ResultNode:
         return typing.cast(nodes.ResultNode, result_node)
 
     def _compile_result_node(self, root: nodes.ResultNode) -> str:
-        sqlglot_ir = self.compile_node(root.child)
-
+        # Have to bind schema as the final step before compilation.
+        root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root))
         selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
             (name, scalar_compiler.compile_scalar_expression(ref))
             for ref, name in root.output_cols
         )
-        sqlglot_ir = sqlglot_ir.select(selected_cols)
+        # Skip squashing selections to ensure the right ordering and limit keys
+        sqlglot_ir = self.compile_node(root.child).select(
+            selected_cols, squash_selections=False
+        )
+
+        if root.order_by is not None:
+            ordering_cols = tuple(
+                sge.Ordered(
+                    this=scalar_compiler.compile_scalar_expression(
+                        ordering.scalar_expression
+                    ),
+                    desc=ordering.direction.is_ascending is False,
+                    nulls_first=ordering.na_last is False,
+                )
+                for ordering in root.order_by.all_ordering_columns
+            )
+            sqlglot_ir = sqlglot_ir.order_by(ordering_cols)
+
+        if root.limit is not None:
+            sqlglot_ir = sqlglot_ir.limit(root.limit)
 
-        # TODO: add order_by, limit to sqlglot_expr
         return sqlglot_ir.sql
 
     @functools.lru_cache(maxsize=5000)
@@ -190,9 +208,19 @@ def compile_projection(
         )
         return child.project(projected_cols)
 
+    @_compile_node.register
+    def compile_concat(
+        self, node: nodes.ConcatNode, *children: ir.SQLGlotIR
+    ) -> ir.SQLGlotIR:
+        output_ids = [id.sql for id in node.output_ids]
+        return ir.SQLGlotIR.from_union(
+            [child.expr for child in children],
+            output_ids=output_ids,
+            uid_gen=self.uid_gen,
+        )
+
 
 def _replace_unsupported_ops(node: nodes.BigFrameNode):
     node = nodes.bottom_up(node, rewrite.rewrite_slice)
-    node = nodes.bottom_up(node, schema_binding.bind_schema_to_expressions)
     node = nodes.bottom_up(node, rewrite.rewrite_range_rolling)
     return node
@@ -13,15 +13,25 @@
 # limitations under the License.
 from __future__ import annotations
 
+import dataclasses
 import functools
 
 import sqlglot.expressions as sge
 
+from bigframes import dtypes
 from bigframes.core import expression
 import bigframes.core.compile.sqlglot.sqlglot_ir as ir
 import bigframes.operations as ops
 
 
+@dataclasses.dataclass(frozen=True)
+class TypedExpr:
+    """SQLGlot expression with type."""
+
+    expr: sge.Expression
+    dtype: dtypes.ExpressionType
+
+
 @functools.singledispatch
 def compile_scalar_expression(
     expression: expression.Expression,
@@ -50,9 +60,12 @@ def compile_constant_expression(
 
 
 @compile_scalar_expression.register
-def compile_op_expression(expr: expression.OpExpression):
+def compile_op_expression(expr: expression.OpExpression) -> sge.Expression:
     # Non-recursively compiles the children scalar expressions.
-    args = tuple(map(compile_scalar_expression, expr.inputs))
+    args = tuple(
+        TypedExpr(compile_scalar_expression(input), input.output_type)
+        for input in expr.inputs
+    )
 
     op = expr.op
     op_name = expr.op.__class__.__name__
@@ -79,8 +92,10 @@ def compile_op_expression(expr: expression.OpExpression):
 
 
 # TODO: add parenthesize for operators
-def compile_addop(
-    op: ops.AddOp, left: sge.Expression, right: sge.Expression
-) -> sge.Expression:
-    # TODO: support addop for string dtype.
-    return sge.Add(this=left, expression=right)
+def compile_addop(op: ops.AddOp, left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    if left.dtype == dtypes.STRING_DTYPE and right.dtype == dtypes.STRING_DTYPE:
+        # String addition
+        return sge.Concat(expressions=[left.expr, right.expr])
+
+    # Numerical addition
+    return sge.Add(this=left.expr, expression=right.expr)