add documentation

TrevorBergeron · TrevorBergeron · commit 1e53fa95d109 · 2025-12-12T01:36:37.000Z
diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
@@ -266,11 +266,28 @@ def compute_values(self, assignments: Sequence[ex.Expression]):
         )
 
     def compute_general_expression(self, assignments: Sequence[ex.Expression]):
+        """
+        Applies arbitrary column expressions to the current execution block.
+
+        This method transforms the logical plan by applying a sequence of expressions that
+        preserve the length of the input columns. It supports both scalar operations
+        and window functions. Each expression is assigned a unique internal column identifier.
+
+        Args:
+            assignments (Sequence[ex.Expression]): A sequence of expression objects
+                representing the transformations to apply to the columns.
+
+        Returns:
+            Tuple[ArrayValue, Tuple[str, ...]]: A tuple containing:
+                - An `ArrayValue` wrapping the new root node of the updated logical plan.
+                - A tuple of strings representing the unique column IDs generated for
+                  each expression in the assignments.
+        """
         named_exprs = [
             nodes.ColumnDef(expr, ids.ColumnId.unique()) for expr in assignments
         ]
         # TODO: Push this to rewrite later to go from block expression to planning form
-        new_root = expression_factoring.plan_general_col_exprs(self.node, named_exprs)
+        new_root = expression_factoring.apply_col_exprs_to_plan(self.node, named_exprs)
 
         target_ids = tuple(named_expr.id for named_expr in named_exprs)
         return (ArrayValue(new_root), target_ids)
@@ -282,7 +299,29 @@ def compute_general_reduction(
         *,
         dropna: bool = False,
     ):
-        # Warning: this function does not check if the expression is a valid reduction, and may fail spectacularly on invalid inputs
+        """
+        Applies arbitrary aggregation expressions to the block, optionally grouped by keys.
+
+        This method handles reduction operations (e.g., sum, mean, count) that collapse
+        multiple input rows into a single scalar value per group. If grouping keys are
+        provided, the operation is performed per group; otherwise, it is a global reduction.
+
+        Args:
+            assignments (Sequence[ex.Expression]): A sequence of aggregation expressions
+                to be calculated.
+            by_column_ids (typing.Sequence[str], optional): A sequence of column IDs
+                to use as grouping keys. Defaults to an empty tuple (global reduction).
+            dropna (bool, optional): If True, rows containing null values in the
+                `by_column_ids` columns will be filtered out before the reduction
+                is applied. Defaults to False.
+
+        Returns:
+            Tuple[ArrayValue, Tuple[str, ...]]: A tuple containing:
+                - An `ArrayValue` wrapping the new root node representing the
+                  aggregation/group-by result.
+                - A tuple of strings representing the unique column IDs assigned to the
+                  resulting aggregate columns.
+        """
         plan = self.node
         if dropna:
             for col_id in by_column_ids:
@@ -292,7 +331,7 @@ def compute_general_reduction(
             nodes.ColumnDef(expr, ids.ColumnId.unique()) for expr in assignments
         ]
         # TODO: Push this to rewrite later to go from block expression to planning form
-        new_root = expression_factoring.plan_general_aggregation(
+        new_root = expression_factoring.apply_agg_exprs_to_plan(
             plan, named_exprs, grouping_keys=[ex.deref(by) for by in by_column_ids]
         )
         target_ids = tuple(named_expr.id for named_expr in named_exprs)
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -1146,13 +1146,15 @@ def project_exprs(
             index_labels=self._index_labels,
         )
 
-    # This is a new experimental version of the project_exprs that supports mixing analytic and scalar expressions
     def project_block_exprs(
         self,
         exprs: Sequence[ex.Expression],
         labels: Union[Sequence[Label], pd.Index],
         drop=False,
     ) -> Block:
+        """
+        Version of the project_exprs that supports mixing analytic and scalar expressions
+        """
         new_array, _ = self.expr.compute_general_expression(exprs)
         if drop:
             new_array = new_array.drop_columns(self.value_columns)
@@ -1167,7 +1169,6 @@ def project_block_exprs(
             index_labels=self._index_labels,
         )
 
-    # This is a new experimental version of the aggregate that supports mixing analytic and scalar expressions\
     def reduce_general(
         self,
         aggregations: typing.Sequence[ex.Expression] = (),
@@ -1176,6 +1177,9 @@ def reduce_general(
         *,
         dropna: bool = True,
     ) -> typing.Tuple[Block, typing.Sequence[str]]:
+        """
+        Version of the aggregate that supports mixing analytic and scalar expressions.
+        """
         if column_labels is None:
             column_labels = pd.Index(range(len(aggregations)))
 
diff --git a/bigframes/core/expression_factoring.py b/bigframes/core/expression_factoring.py
@@ -41,7 +41,7 @@
 _MAX_INLINE_COMPLEXITY = 10
 
 
-def plan_general_col_exprs(
+def apply_col_exprs_to_plan(
     plan: nodes.BigFrameNode, col_exprs: Sequence[nodes.ColumnDef]
 ) -> nodes.BigFrameNode:
     # TODO: Jointly fragmentize expressions to more efficiently reuse common sub-expressions
@@ -55,7 +55,7 @@ def plan_general_col_exprs(
     return push_into_tree(plan, fragments, target_ids)
 
 
-def plan_general_aggregation(
+def apply_agg_exprs_to_plan(
     plan: nodes.BigFrameNode,
     agg_defs: Sequence[nodes.ColumnDef],
     grouping_keys: Sequence[expression.DerefOp],
@@ -69,7 +69,7 @@ def plan_general_aggregation(
         nodes.ColumnDef(windowize(cdef.expression, window_def), cdef.id)
         for cdef in all_inputs
     ]
-    plan = plan_general_col_exprs(plan, windowized_inputs)
+    plan = apply_col_exprs_to_plan(plan, windowized_inputs)
     all_aggs = list(
         itertools.chain(*(factored_agg.agg_exprs for factored_agg in factored_aggs))
     )
@@ -113,6 +113,14 @@ def fragmentize_expression(root: nodes.ColumnDef) -> Sequence[nodes.ColumnDef]:
 
 @dataclasses.dataclass(frozen=True, eq=False)
 class FactoredAggregation:
+    """
+    A three part recomposition of a general aggregating expression.
+
+    1. agg_inputs: This is a set of (*col) -> col transformation that preprocess inputs for the aggregations ops
+    2. agg_exprs: This is a set of pure aggregations (eg sum, mean, min, max) ops referencing the outputs of (1)
+    3. root_scalar_expr: This is the final set, takes outputs of (2), applies scalar expression to produce final result.
+    """
+
     # pure scalar expression
     root_scalar_expr: nodes.ColumnDef
     # pure agg expression, only refs cols and consts