enable non-numeric for ibis compiler too

chelsea-lin · chelsea-lin · commit 1edee547964b · 2025-09-23T20:34:20.000Z
diff --git a/bigframes/core/compile/ibis_compiler/aggregate_compiler.py b/bigframes/core/compile/ibis_compiler/aggregate_compiler.py
@@ -175,15 +175,11 @@ def _(
 
 
 @compile_unary_agg.register
-@numeric_op
 def _(
     op: agg_ops.MedianOp,
     column: ibis_types.NumericColumn,
     window=None,
 ) -> ibis_types.NumericValue:
-    # TODO(swast): Allow switching between exact and approximate median.
-    # For now, the best we can do is an approximate median when we're doing
-    # an aggregation, as PERCENTILE_CONT is only an analytic function.
     return cast(ibis_types.NumericValue, column.approx_median())
 
 
diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py
@@ -62,9 +62,6 @@ def _(
     column: typed_expr.TypedExpr,
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
-    # TODO(swast): Allow switching between exact and approximate median.
-    # For now, the best we can do is an approximate median when we're doing
-    # an aggregation, as PERCENTILE_CONT is only an analytic function.
     approx_quantiles = sge.func("APPROX_QUANTILES", column.expr, sge.convert(2))
     return sge.Bracket(
         this=approx_quantiles, expressions=[sge.func("OFFSET", sge.convert(1))]
diff --git a/tests/system/small/engines/test_aggregation.py b/tests/system/small/engines/test_aggregation.py
@@ -92,15 +92,6 @@ def test_sql_engines_median_op_aggregates(
     node = apply_agg_to_all_valid(
         scalars_array_value,
         agg_ops.MedianOp(),
-        # Exclude columns are not supported by Ibis.
-        excluded_cols=[
-            "bytes_col",
-            "date_col",
-            "datetime_col",
-            "time_col",
-            "timestamp_col",
-            "string_col",
-        ],
     ).node
     left_engine = direct_gbq_execution.DirectGbqExecutor(bigquery_client)
     right_engine = direct_gbq_execution.DirectGbqExecutor(
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -1919,10 +1919,22 @@ def test_mean(scalars_dfs):
     assert math.isclose(pd_result, bf_result)
 
 
-def test_median(scalars_dfs):
+@pytest.mark.parametrize(
+    ("col_name"),
+    [
+        "int64_col",
+        # Non-numeric column
+        "bytes_col",
+        "date_col",
+        "datetime_col",
+        "time_col",
+        "timestamp_col",
+        "string_col",
+    ],
+)
+def test_median(scalars_dfs, col_name):
     scalars_df, scalars_pandas_df = scalars_dfs
-    col_name = "int64_col"
-    bf_result = scalars_df[col_name].median()
+    bf_result = scalars_df[col_name].median(exact=False)
     pd_max = scalars_pandas_df[col_name].max()
     pd_min = scalars_pandas_df[col_name].min()
     # Median is approximate, so just check for plausibility.
@@ -1932,7 +1944,7 @@ def test_median(scalars_dfs):
 def test_median_exact(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "int64_col"
-    bf_result = scalars_df[col_name].median(exact=True)
+    bf_result = scalars_df[col_name].median()
     pd_result = scalars_pandas_df[col_name].median()
     assert math.isclose(pd_result, bf_result)
 
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_median/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_median/out.sql
@@ -1,30 +1,18 @@
 WITH `bfcte_0` AS (
   SELECT
-    `bytes_col` AS `bfcol_0`,
-    `date_col` AS `bfcol_1`,
-    `datetime_col` AS `bfcol_2`,
-    `int64_col` AS `bfcol_3`,
-    `string_col` AS `bfcol_4`,
-    `time_col` AS `bfcol_5`,
-    `timestamp_col` AS `bfcol_6`
+    `date_col` AS `bfcol_0`,
+    `int64_col` AS `bfcol_1`,
+    `string_col` AS `bfcol_2`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
-    APPROX_QUANTILES(`bfcol_3`, 2)[OFFSET(1)] AS `bfcol_7`,
-    APPROX_QUANTILES(`bfcol_0`, 2)[OFFSET(1)] AS `bfcol_8`,
-    APPROX_QUANTILES(`bfcol_1`, 2)[OFFSET(1)] AS `bfcol_9`,
-    APPROX_QUANTILES(`bfcol_2`, 2)[OFFSET(1)] AS `bfcol_10`,
-    APPROX_QUANTILES(`bfcol_5`, 2)[OFFSET(1)] AS `bfcol_11`,
-    APPROX_QUANTILES(`bfcol_6`, 2)[OFFSET(1)] AS `bfcol_12`,
-    APPROX_QUANTILES(`bfcol_4`, 2)[OFFSET(1)] AS `bfcol_13`
+    APPROX_QUANTILES(`bfcol_1`, 2)[OFFSET(1)] AS `bfcol_3`,
+    APPROX_QUANTILES(`bfcol_0`, 2)[OFFSET(1)] AS `bfcol_4`,
+    APPROX_QUANTILES(`bfcol_2`, 2)[OFFSET(1)] AS `bfcol_5`
   FROM `bfcte_0`
 )
 SELECT
-  `bfcol_7` AS `int64_col`,
-  `bfcol_8` AS `bytes_col`,
-  `bfcol_9` AS `date_col`,
-  `bfcol_10` AS `datetime_col`,
-  `bfcol_11` AS `time_col`,
-  `bfcol_12` AS `timestamp_col`,
-  `bfcol_13` AS `string_col`
+  `bfcol_3` AS `int64_col`,
+  `bfcol_4` AS `date_col`,
+  `bfcol_5` AS `string_col`
 FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py
@@ -60,12 +60,7 @@ def test_median(scalar_types_df: bpd.DataFrame, snapshot):
     bf_df = scalar_types_df
     ops_map = {
         "int64_col": agg_ops.MedianOp().as_expr("int64_col"),
-        # Includes columns are not supported by Ibis but supported by BigQuery.
-        "bytes_col": agg_ops.MedianOp().as_expr("bytes_col"),
         "date_col": agg_ops.MedianOp().as_expr("date_col"),
-        "datetime_col": agg_ops.MedianOp().as_expr("datetime_col"),
-        "time_col": agg_ops.MedianOp().as_expr("time_col"),
-        "timestamp_col": agg_ops.MedianOp().as_expr("timestamp_col"),
         "string_col": agg_ops.MedianOp().as_expr("string_col"),
     }
     sql = _apply_unary_agg_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))