googleapis
diff --git a/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/indexes/base.py‎
Lines changed: 13 additions & 44 deletions b/‎bigframes/core/indexes/base.py‎
Lines changed: 13 additions & 44 deletions
diff --git a/‎bigframes/exceptions.py‎
Lines changed: 7 additions & 0 deletions b/‎bigframes/exceptions.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎bigframes/functions/_function_client.py‎
Lines changed: 9 additions & 27 deletions b/‎bigframes/functions/_function_client.py‎
Lines changed: 9 additions & 27 deletions
diff --git a/‎bigframes/functions/_function_session.py‎
Lines changed: 2 additions & 2 deletions b/‎bigframes/functions/_function_session.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bigframes/functions/_utils.py‎
Lines changed: 30 additions & 9 deletions b/‎bigframes/functions/_utils.py‎
Lines changed: 30 additions & 9 deletions
diff --git a/‎bigframes/functions/function_template.py‎
Lines changed: 53 additions & 0 deletions b/‎bigframes/functions/function_template.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎bigframes/functions/function_typing.py‎
Lines changed: 2 additions & 1 deletion b/‎bigframes/functions/function_typing.py‎
Lines changed: 2 additions & 1 deletion
@@ -646,7 +646,7 @@ def _aggregate(
     def compile_explode(self, node: nodes.ExplodeNode):
         assert node.offsets_col is None
         df = self.compile_node(node.child)
-        cols = [pl.col(col.id.sql) for col in node.column_ids]
+        cols = [col.id.sql for col in node.column_ids]
         return df.explode(cols)
 
     @compile_node.register
 
@@ -27,16 +27,12 @@
 import pandas
 
 from bigframes import dtypes
-from bigframes.core.array_value import ArrayValue
 import bigframes.core.block_transforms as block_ops
 import bigframes.core.blocks as blocks
 import bigframes.core.expression as ex
-import bigframes.core.identifiers as ids
-import bigframes.core.nodes as nodes
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.core.validations as validations
-import bigframes.core.window_spec as window_spec
 import bigframes.dtypes
 import bigframes.formatting_helpers as formatter
 import bigframes.operations as ops
@@ -272,37 +268,20 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
         # Get the index column from the block
         index_column = self._block.index_columns[0]
 
-        # Apply row numbering to the original data
-        row_number_column_id = ids.ColumnId.unique()
-        window_node = nodes.WindowOpNode(
-            child=self._block._expr.node,
-            expression=ex.NullaryAggregation(agg_ops.RowNumberOp()),
-            window_spec=window_spec.unbound(),
-            output_name=row_number_column_id,
-            never_skip_nulls=True,
-        )
-
-        windowed_array = ArrayValue(window_node)
-        windowed_block = blocks.Block(
-            windowed_array,
-            index_columns=self._block.index_columns,
-            column_labels=self._block.column_labels.insert(
-                len(self._block.column_labels), None
-            ),
-            index_labels=self._block._index_labels,
+        # Use promote_offsets to get row numbers (similar to argmax/argmin implementation)
+        block_with_offsets, offsets_id = self._block.promote_offsets(
+            "temp_get_loc_offsets_"
         )
 
         # Create expression to find matching positions
         match_expr = ops.eq_op.as_expr(ex.deref(index_column), ex.const(key))
-        windowed_block, match_col_id = windowed_block.project_expr(match_expr)
+        block_with_offsets, match_col_id = block_with_offsets.project_expr(match_expr)
 
         # Filter to only rows where the key matches
-        filtered_block = windowed_block.filter_by_id(match_col_id)
+        filtered_block = block_with_offsets.filter_by_id(match_col_id)
 
-        # Check if key exists at all by counting on the filtered block
-        count_agg = ex.UnaryAggregation(
-            agg_ops.count_op, ex.deref(row_number_column_id.name)
-        )
+        # Check if key exists at all by counting
+        count_agg = ex.UnaryAggregation(agg_ops.count_op, ex.deref(offsets_id))
         count_result = filtered_block._expr.aggregate([(count_agg, "count")])
         count_scalar = self._block.session._executor.execute(
             count_result
@@ -313,9 +292,7 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
 
         # If only one match, return integer position
         if count_scalar == 1:
-            min_agg = ex.UnaryAggregation(
-                agg_ops.min_op, ex.deref(row_number_column_id.name)
-            )
+            min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(offsets_id))
             position_result = filtered_block._expr.aggregate([(min_agg, "position")])
             position_scalar = self._block.session._executor.execute(
                 position_result
@@ -325,32 +302,24 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
         # Handle multiple matches based on index monotonicity
         is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing
         if is_monotonic:
-            return self._get_monotonic_slice(filtered_block, row_number_column_id)
+            return self._get_monotonic_slice(filtered_block, offsets_id)
         else:
             # Return boolean mask for non-monotonic duplicates
-            mask_block = windowed_block.select_columns([match_col_id])
-            # Reset the index to use positional integers instead of original index values
+            mask_block = block_with_offsets.select_columns([match_col_id])
             mask_block = mask_block.reset_index(drop=True)
-            # Ensure correct dtype and name to match pandas behavior
             result_series = bigframes.series.Series(mask_block)
             return result_series.astype("boolean")
 
-    def _get_monotonic_slice(
-        self, filtered_block, row_number_column_id: "ids.ColumnId"
-    ) -> slice:
+    def _get_monotonic_slice(self, filtered_block, offsets_id: str) -> slice:
         """Helper method to get a slice for monotonic duplicates with an optimized query."""
         # Combine min and max aggregations into a single query for efficiency
         min_max_aggs = [
             (
-                ex.UnaryAggregation(
-                    agg_ops.min_op, ex.deref(row_number_column_id.name)
-                ),
+                ex.UnaryAggregation(agg_ops.min_op, ex.deref(offsets_id)),
                 "min_pos",
             ),
             (
-                ex.UnaryAggregation(
-                    agg_ops.max_op, ex.deref(row_number_column_id.name)
-                ),
+                ex.UnaryAggregation(agg_ops.max_op, ex.deref(offsets_id)),
                 "max_pos",
             ),
         ]
 
@@ -107,6 +107,13 @@ class FunctionAxisOnePreviewWarning(PreviewWarning):
     """Remote Function and Managed UDF with axis=1 preview."""
 
 
+class FunctionPackageVersionWarning(PreviewWarning):
+    """
+    Managed UDF package versions for Numpy, Pandas, and Pyarrow may not
+    precisely match users' local environment or the exact versions specified.
+    """
+
+
 def format_message(message: str, fill: bool = True):
     """Formats a warning message with ANSI color codes for the warning color.
 
 
@@ -19,7 +19,6 @@
 import logging
 import os
 import random
-import re
 import shutil
 import string
 import tempfile
@@ -247,7 +246,7 @@ def provision_bq_managed_function(
         # Augment user package requirements with any internal package
         # requirements.
         packages = _utils._get_updated_package_requirements(
-            packages, is_row_processor, capture_references
+            packages, is_row_processor, capture_references, ignore_package_version=True
         )
         if packages:
             managed_function_options["packages"] = packages
@@ -270,28 +269,6 @@ def provision_bq_managed_function(
         )
 
         udf_name = func.__name__
-        if capture_references:
-            # This code path ensures that if the udf body contains any
-            # references to variables and/or imports outside the body, they are
-            # captured as well.
-            import cloudpickle
-
-            pickled = cloudpickle.dumps(func)
-            udf_code = textwrap.dedent(
-                f"""
-                import cloudpickle
-                {udf_name} = cloudpickle.loads({pickled})
-            """
-            )
-        else:
-            # This code path ensures that if the udf body is self contained,
-            # i.e. there are no references to variables or imports outside the
-            # body.
-            udf_code = textwrap.dedent(inspect.getsource(func))
-            match = re.search(r"^def ", udf_code, flags=re.MULTILINE)
-            if match is None:
-                raise ValueError("The UDF is not defined correctly.")
-            udf_code = udf_code[match.start() :]
 
         with_connection_clause = (
             (
@@ -301,6 +278,13 @@ def provision_bq_managed_function(
             else ""
         )
 
+        # Generate the complete Python code block for the managed Python UDF,
+        # including the user's function, necessary imports, and the BigQuery
+        # handler wrapper.
+        python_code_block = bff_template.generate_managed_function_code(
+            func, udf_name, is_row_processor, capture_references
+        )
+
         create_function_ddl = (
             textwrap.dedent(
                 f"""
@@ -311,13 +295,11 @@ def provision_bq_managed_function(
                 OPTIONS ({managed_function_options_str})
                 AS r'''
                 __UDF_PLACE_HOLDER__
-                def bigframes_handler(*args):
-                    return {udf_name}(*args)
                 '''
             """
             )
             .strip()
-            .replace("__UDF_PLACE_HOLDER__", udf_code)
+            .replace("__UDF_PLACE_HOLDER__", python_code_block)
         )
 
         self._ensure_dataset_exists()
 
@@ -867,15 +867,15 @@ def wrapper(func):
                     warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning)
                 py_sig = py_sig.replace(return_annotation=output_type)
 
-            udf_sig = udf_def.UdfSignature.from_py_signature(py_sig)
-
             # The function will actually be receiving a pandas Series, but allow
             # both BigQuery DataFrames and pandas object types for compatibility.
             is_row_processor = False
             if new_sig := _convert_row_processor_sig(py_sig):
                 py_sig = new_sig
                 is_row_processor = True
 
+            udf_sig = udf_def.UdfSignature.from_py_signature(py_sig)
+
             managed_function_client = _function_client.FunctionClient(
                 dataset_ref.project,
                 bq_location,
 
@@ -19,6 +19,7 @@
 import sys
 import typing
 from typing import cast, Optional, Set
+import warnings
 
 import cloudpickle
 import google.api_core.exceptions
@@ -27,6 +28,7 @@
 import pandas
 import pyarrow
 
+import bigframes.exceptions as bfe
 import bigframes.formatting_helpers as bf_formatting
 from bigframes.functions import function_typing
 
@@ -62,21 +64,40 @@ def get_remote_function_locations(bq_location):
 
 
 def _get_updated_package_requirements(
-    package_requirements=None, is_row_processor=False, capture_references=True
+    package_requirements=None,
+    is_row_processor=False,
+    capture_references=True,
+    ignore_package_version=False,
 ):
     requirements = []
     if capture_references:
         requirements.append(f"cloudpickle=={cloudpickle.__version__}")
 
     if is_row_processor:
-        # bigframes function will send an entire row of data as json, which
-        # would be converted to a pandas series and processed Ensure numpy
-        # versions match to avoid unpickling problems. See internal issue
-        # b/347934471.
-        requirements.append(f"numpy=={numpy.__version__}")
-        requirements.append(f"pandas=={pandas.__version__}")
-        requirements.append(f"pyarrow=={pyarrow.__version__}")
-
+        if ignore_package_version:
+            # TODO(jialuo): Add back the version after b/410924784 is resolved.
+            # Due to current limitations on the packages version in Python UDFs,
+            # we use `ignore_package_version` to optionally omit the version for
+            # managed functions only.
+            msg = bfe.format_message(
+                "numpy, pandas, and pyarrow versions in the function execution"
+                " environment may not precisely match your local environment."
+            )
+            warnings.warn(msg, category=bfe.FunctionPackageVersionWarning)
+            requirements.append("pandas")
+            requirements.append("pyarrow")
+            requirements.append("numpy")
+        else:
+            # bigframes function will send an entire row of data as json, which
+            # would be converted to a pandas series and processed Ensure numpy
+            # versions match to avoid unpickling problems. See internal issue
+            # b/347934471.
+            requirements.append(f"pandas=={pandas.__version__}")
+            requirements.append(f"pyarrow=={pyarrow.__version__}")
+            requirements.append(f"numpy=={numpy.__version__}")
+
+    # TODO(b/435023957): Fix the issue of potential duplicate package versions
+    # when `package_requirements` also contains `pandas/pyarrow/numpy`.
     if package_requirements:
         requirements.extend(package_requirements)
 
 
@@ -17,6 +17,7 @@
 import inspect
 import logging
 import os
+import re
 import textwrap
 from typing import Tuple
 
@@ -291,3 +292,55 @@ def generate_cloud_function_main_code(
     logger.debug(f"Wrote {os.path.abspath(main_py)}:\n{open(main_py).read()}")
 
     return handler_func_name
+
+
+def generate_managed_function_code(
+    def_,
+    udf_name: str,
+    is_row_processor: bool,
+    capture_references: bool,
+) -> str:
+    """Generates the Python code block for managed Python UDF."""
+
+    if capture_references:
+        # This code path ensures that if the udf body contains any
+        # references to variables and/or imports outside the body, they are
+        # captured as well.
+        import cloudpickle
+
+        pickled = cloudpickle.dumps(def_)
+        func_code = textwrap.dedent(
+            f"""
+            import cloudpickle
+            {udf_name} = cloudpickle.loads({pickled})
+        """
+        )
+    else:
+        # This code path ensures that if the udf body is self contained,
+        # i.e. there are no references to variables or imports outside the
+        # body.
+        func_code = textwrap.dedent(inspect.getsource(def_))
+        match = re.search(r"^def ", func_code, flags=re.MULTILINE)
+        if match is None:
+            raise ValueError("The UDF is not defined correctly.")
+        func_code = func_code[match.start() :]
+
+    if is_row_processor:
+        udf_code = textwrap.dedent(inspect.getsource(get_pd_series))
+        udf_code = udf_code[udf_code.index("def") :]
+        bigframes_handler_code = textwrap.dedent(
+            f"""def bigframes_handler(str_arg):
+                return {udf_name}({get_pd_series.__name__}(str_arg))"""
+        )
+    else:
+        udf_code = ""
+        bigframes_handler_code = textwrap.dedent(
+            f"""def bigframes_handler(*args):
+                return {udf_name}(*args)"""
+        )
+
+    udf_code_block = textwrap.dedent(
+        f"{udf_code}\n{func_code}\n{bigframes_handler_code}"
+    )
+
+    return udf_code_block
@@ -61,7 +61,8 @@ def __init__(self, type_, supported_types):
         self.type = type_
         self.supported_types = supported_types
         super().__init__(
-            f"'{type_}' is not one of the supported types {supported_types}"
+            f"'{type_}' must be one of the supported types ({supported_types}) "
+            "or a list of one of those types."
         )
Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,8 @@ def __init__(self, type_, supported_types):`
`61`	`61`	`self.type = type_`
`62`	`62`	`self.supported_types = supported_types`
`63`	`63`	`super().__init__(`
`64`		`- f"'{type_}' is not one of the supported types {supported_types}"`
	`64`	`+ f"'{type_}' must be one of the supported types ({supported_types}) "`
	`65`	`+ "or a list of one of those types."`
`65`	`66`	`)`
`66`	`67`
`67`	`68`