create a name to id mapping

chelsea-lin · chelsea-lin · commit ab29b7590834 · 2025-09-18T00:43:16.000Z
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/out.sql
@@ -6,15 +6,22 @@ WITH `bfcte_0` AS (
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
-    AVG(`bfcol_1`) AS `bfcol_6`,
-    AVG(CAST(`bfcol_0` AS INT64)) AS `bfcol_7`,
-    AVG(`bfcol_2`) AS `bfcol_8`,
-    FLOOR(AVG(`bfcol_1`)) AS `bfcol_9`
+    *,
+    `bfcol_1` AS `bfcol_6`,
+    `bfcol_0` AS `bfcol_7`,
+    `bfcol_2` AS `bfcol_8`
   FROM `bfcte_0`
+), `bfcte_2` AS (
+  SELECT
+    AVG(`bfcol_6`) AS `bfcol_12`,
+    AVG(CAST(`bfcol_7` AS INT64)) AS `bfcol_13`,
+    FLOOR(AVG(`bfcol_8`)) AS `bfcol_14`,
+    FLOOR(AVG(`bfcol_6`)) AS `bfcol_15`
+  FROM `bfcte_1`
 )
 SELECT
-  `bfcol_6` AS `int64_col`,
-  `bfcol_7` AS `bool_col`,
-  `bfcol_8` AS `duration_col`,
-  `bfcol_9` AS `int64_col_w_floor`
-FROM `bfcte_1`
+  `bfcol_12` AS `int64_col`,
+  `bfcol_13` AS `bool_col`,
+  `bfcol_14` AS `duration_col`,
+  `bfcol_15` AS `int64_col_w_floor`
+FROM `bfcte_2`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py
@@ -57,17 +57,24 @@ def test_max(scalar_types_df: bpd.DataFrame, snapshot):
 
 
 def test_mean(scalar_types_df: bpd.DataFrame, snapshot):
-    # bf_df = scalar_types_df[["int64_col", "bool_col", "duration_col"]]
-    bf_df = scalar_types_df[["duration_col"]]
+    col_names = ["int64_col", "bool_col", "duration_col"]
+    bf_df = scalar_types_df[col_names]
     bf_df["duration_col"] = bpd.to_timedelta(bf_df["duration_col"], unit="us")
 
+    # to_timedelta creates a new mapping for expression.
+    col_names.insert(0, "rowindex")
+    name2id = {
+        col_name: col_id
+        for col_name, col_id in zip(col_names, bf_df._block.expr.column_ids)
+    }
+
     agg_ops_map = {
-        # "int64_col": agg_ops.MeanOp().as_expr("int64_col"),
-        # "bool_col": agg_ops.MeanOp().as_expr("bool_col"),
-        "duration_col": agg_ops.MeanOp().as_expr("duration_col"),
-        # "int64_col_w_floor": agg_ops.MeanOp(should_floor_result=True).as_expr(
-        #     "int64_col"
-        # ),
+        "int64_col": agg_ops.MeanOp().as_expr(name2id["int64_col"]),
+        "bool_col": agg_ops.MeanOp().as_expr(name2id["bool_col"]),
+        "duration_col": agg_ops.MeanOp().as_expr(name2id["duration_col"]),
+        "int64_col_w_floor": agg_ops.MeanOp(should_floor_result=True).as_expr(
+            name2id["int64_col"]
+        ),
     }
     sql = _apply_unary_agg_ops(
         bf_df, list(agg_ops_map.values()), list(agg_ops_map.keys())