refactor: add agg_ops.MinOp and MaxOp for sqlglot compiler

chelsea-lin · chelsea-lin · commit 0776b8742f0f · 2025-09-17T23:57:25.000Z
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ repos:
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
-        exclude: "^tests/unit/core/compile/sqlglot/snapshots"
+        exclude: "^tests/unit/core/compile/sqlglot/.*snapshots"
     -   id: check-yaml
 -   repo: https://github.com/pycqa/isort
     rev: 5.12.0
diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py
@@ -46,18 +46,22 @@ def _(
     return apply_window_if_present(sge.func("COUNT", column.expr), window)
 
 
-@UNARY_OP_REGISTRATION.register(agg_ops.SumOp)
+@UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
 def _(
-    op: agg_ops.SumOp,
+    op: agg_ops.MaxOp,
     column: typed_expr.TypedExpr,
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
-    expr = column.expr
-    if column.dtype == dtypes.BOOL_DTYPE:
-        expr = sge.Cast(this=column.expr, to="INT64")
-    # Will be null if all inputs are null. Pandas defaults to zero sum though.
-    expr = apply_window_if_present(sge.func("SUM", expr), window)
-    return sge.func("IFNULL", expr, ir._literal(0, column.dtype))
+    return apply_window_if_present(sge.func("MAX", column.expr), window)
+
+
+@UNARY_OP_REGISTRATION.register(agg_ops.MinOp)
+def _(
+    op: agg_ops.MinOp,
+    column: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    return apply_window_if_present(sge.func("MIN", column.expr), window)
 
 
 @UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)
@@ -67,3 +71,17 @@ def _(
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
     return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
+
+
+@UNARY_OP_REGISTRATION.register(agg_ops.SumOp)
+def _(
+    op: agg_ops.SumOp,
+    column: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    expr = column.expr
+    if column.dtype == dtypes.BOOL_DTYPE:
+        expr = sge.Cast(this=column.expr, to="INT64")
+    # Will be null if all inputs are null. Pandas defaults to zero sum though.
+    expr = apply_window_if_present(sge.func("SUM", expr), window)
+    return sge.func("IFNULL", expr, ir._literal(0, column.dtype))
diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py
@@ -633,6 +633,8 @@ def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression:
     elif dtype == dtypes.JSON_DTYPE:
         return sge.ParseJSON(this=sge.convert(str(value)))
     elif dtype == dtypes.TIMEDELTA_DTYPE:
+        if isinstance(value, int):
+            return sge.convert(value)
         return sge.convert(utils.timedelta_to_micros(value))
     elif dtypes.is_struct_like(dtype):
         items = [
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/out.sql
@@ -0,0 +1,12 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `int64_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    COUNT(`bfcol_0`) AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `int64_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/out.sql
@@ -0,0 +1,12 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `int64_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    MAX(`bfcol_0`) AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `int64_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/out.sql
@@ -0,0 +1,12 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `int64_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    MIN(`bfcol_0`) AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `int64_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_size_unary/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_size_unary/out.sql
@@ -1,12 +1,12 @@
 WITH `bfcte_0` AS (
   SELECT
-    `string_col` AS `bfcol_0`
+    `float64_col` AS `bfcol_0`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
     COUNT(1) AS `bfcol_1`
   FROM `bfcte_0`
 )
 SELECT
-  `bfcol_1` AS `string_col_agg`
+  `bfcol_1` AS `float64_col`
 FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/out.sql
@@ -1,12 +1,15 @@
 WITH `bfcte_0` AS (
   SELECT
-    `int64_col` AS `bfcol_0`
+    `bool_col` AS `bfcol_0`,
+    `int64_col` AS `bfcol_1`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
-    COALESCE(SUM(`bfcol_0`), 0) AS `bfcol_1`
+    COALESCE(SUM(`bfcol_1`), 0) AS `bfcol_4`,
+    COALESCE(SUM(CAST(`bfcol_0` AS INT64)), 0) AS `bfcol_5`
   FROM `bfcte_0`
 )
 SELECT
-  `bfcol_1` AS `int64_col_agg`
+  `bfcol_4` AS `int64_col`,
+  `bfcol_5` AS `bool_col`
 FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py
@@ -12,40 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import typing
+
 import pytest
 
-from bigframes.core import agg_expressions, array_value, expression, identifiers, nodes
+from bigframes.core import agg_expressions as agg_exprs
+from bigframes.core import array_value, identifiers, nodes
 from bigframes.operations import aggregations as agg_ops
 import bigframes.pandas as bpd
 
 pytest.importorskip("pytest_snapshot")
 
 
-def _apply_unary_op(obj: bpd.DataFrame, op: agg_ops.UnaryWindowOp, arg: str) -> str:
-    agg_node = nodes.AggregateNode(
-        obj._block.expr.node,
-        aggregations=(
-            (
-                agg_expressions.UnaryAggregation(op, expression.deref(arg)),
-                identifiers.ColumnId(arg + "_agg"),
-            ),
-        ),
-    )
+def _apply_unary_agg_ops(
+    obj: bpd.DataFrame,
+    ops_list: typing.Sequence[agg_exprs.UnaryAggregation],
+    new_names: typing.Sequence[str],
+) -> str:
+    aggs = [(op, identifiers.ColumnId(name)) for op, name in zip(ops_list, new_names)]
+
+    agg_node = nodes.AggregateNode(obj._block.expr.node, aggregations=tuple(aggs))
     result = array_value.ArrayValue(agg_node)
 
     sql = result.session._executor.to_sql(result, enable_cache=False)
     return sql
 
 
-def test_size(scalar_types_df: bpd.DataFrame, snapshot):
-    bf_df = scalar_types_df[["string_col"]]
-    sql = _apply_unary_op(bf_df, agg_ops.SizeUnaryOp(), "string_col")
+def test_count(scalar_types_df: bpd.DataFrame, snapshot):
+    col_name = "int64_col"
+    bf_df = scalar_types_df[[col_name]]
+    agg_expr = agg_ops.CountOp().as_expr(col_name)
+    sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name])
+
+    snapshot.assert_match(sql, "out.sql")
+
+
+def test_max(scalar_types_df: bpd.DataFrame, snapshot):
+    col_name = "int64_col"
+    bf_df = scalar_types_df[[col_name]]
+    agg_expr = agg_ops.MaxOp().as_expr(col_name)
+    sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name])
+
+    snapshot.assert_match(sql, "out.sql")
+
+
+def test_min(scalar_types_df: bpd.DataFrame, snapshot):
+    col_name = "int64_col"
+    bf_df = scalar_types_df[[col_name]]
+    agg_expr = agg_ops.MinOp().as_expr(col_name)
+    sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name])
 
     snapshot.assert_match(sql, "out.sql")
 
 
 def test_sum(scalar_types_df: bpd.DataFrame, snapshot):
-    bf_df = scalar_types_df[["int64_col"]]
-    sql = _apply_unary_op(bf_df, agg_ops.SumOp(), "int64_col")
+    bf_df = scalar_types_df[["int64_col", "bool_col"]]
+    agg_ops_map = {
+        "int64_col": agg_ops.SumOp().as_expr("int64_col"),
+        "bool_col": agg_ops.SumOp().as_expr("bool_col"),
+    }
+    sql = _apply_unary_agg_ops(
+        bf_df, list(agg_ops_map.values()), list(agg_ops_map.keys())
+    )
 
     snapshot.assert_match(sql, "out.sql")