resolve the comments

jialuoo · jialuoo · commit 3e61817ea5cb · 2025-07-30T16:13:38.000Z
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
@@ -105,8 +105,8 @@ class FunctionAxisOnePreviewWarning(PreviewWarning):
 
 class FunctionPackageVersionWarning(PreviewWarning):
     """
-    Managed UDF package versions may not precisely match users' local
-    environment or the exact versions specified.
+    Managed UDF package versions for Numpy, Pandas, and Pyarrow may not
+    precisely match users' local environment or the exact versions specified.
     """
 
 
diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py
@@ -246,7 +246,7 @@ def provision_bq_managed_function(
         # Augment user package requirements with any internal package
         # requirements.
         packages = _utils._get_updated_package_requirements(
-            packages, is_row_processor, capture_references, ignore_numpy_version=True
+            packages, is_row_processor, capture_references, ignore_package_version=True
         )
         if packages:
             managed_function_options["packages"] = packages
diff --git a/bigframes/functions/_utils.py b/bigframes/functions/_utils.py
@@ -66,32 +66,33 @@ def _get_updated_package_requirements(
     package_requirements=None,
     is_row_processor=False,
     capture_references=True,
-    ignore_numpy_version=False,
+    ignore_package_version=False,
 ):
     requirements = []
     if capture_references:
         requirements.append(f"cloudpickle=={cloudpickle.__version__}")
 
     if is_row_processor:
-        # bigframes function will send an entire row of data as json, which
-        # would be converted to a pandas series and processed Ensure numpy
-        # versions match to avoid unpickling problems. See internal issue
-        # b/347934471.
-        requirements.append(f"pandas=={pandas.__version__}")
-        requirements.append(f"pyarrow=={pyarrow.__version__}")
-        # TODO(jialuo): Add back the version after b/410924784 is resolved.
-        # Due to current limitations on the numpy version in Python UDFs, we use
-        # `ignore_numpy_version` to optionally omit the version for managed
-        # functions only.
-        if ignore_numpy_version:
+        if ignore_package_version:
+            # TODO(jialuo): Add back the version after b/410924784 is resolved.
+            # Due to current limitations on the packages version in Python UDFs,
+            # we use `ignore_package_version` to optionally omit the version for
+            # managed functions only.
             msg = bfe.format_message(
-                "Numpy version may not precisely match your local environment."
+                "Numpy, Pandas, and Pyarrow version may not precisely match your local environment."
             )
             warnings.warn(msg, category=bfe.PreviewWarning)
-            numpy_package = "numpy"
+            requirements.append("pandas")
+            requirements.append("pyarrow")
+            requirements.append("numpy")
         else:
-            numpy_package = f"numpy=={numpy.__version__}"
-        requirements.append(numpy_package)
+            # bigframes function will send an entire row of data as json, which
+            # would be converted to a pandas series and processed Ensure numpy
+            # versions match to avoid unpickling problems. See internal issue
+            # b/347934471.
+            requirements.append(f"pandas=={pandas.__version__}")
+            requirements.append(f"pyarrow=={pyarrow.__version__}")
+            requirements.append(f"numpy=={numpy.__version__}")
 
     if package_requirements:
         requirements.extend(package_requirements)
diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py
@@ -655,11 +655,9 @@ def test_managed_function_df_apply_axis_1(session, dataset_id, scalars_dfs):
     try:
 
         def serialize_row(row):
-            # Explicitly casting types ensures consistent behavior between
-            # BigFrames and pandas. Without it, BigFrames return plain Python
-            # types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
-            # which could lead to mismatches and requires further investigation.
-            # See b/435021126.
+            # TODO(b/435021126): Remove explicit type conversion of the field
+            # "name" after the issue has been addressed. It is added only to
+            # accept partial pandas parity for the time being.
             custom = {
                 "name": int(row.name),
                 "index": [idx for idx in row.index],
@@ -705,7 +703,7 @@ def serialize_row(row):
     finally:
         # clean up the gcp assets created for the managed function.
         cleanup_function_assets(
-            serialize_row_mf, session.bqclient, session.cloudfunctionsclient
+            serialize_row_mf, session.bqclient, ignore_failures=False
         )
 
 
@@ -716,11 +714,9 @@ def test_managed_function_df_apply_axis_1_aggregates(session, dataset_id, scalar
     try:
 
         def analyze(row):
-            # Explicitly casting types ensures consistent behavior between
-            # BigFrames and pandas. Without it, BigFrames return plain Python
-            # types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
-            # which could lead to mismatches and requires further investigation.
-            # See b/435021126.
+            # TODO(b/435021126): Remove explicit type conversion of the field
+            # "name" after the issue has been addressed. It is added only to
+            # accept partial pandas parity for the time being.
             return str(
                 {
                     "dtype": row.dtype,
@@ -735,7 +731,7 @@ def analyze(row):
 
         with pytest.warns(
             bfe.PreviewWarning,
-            match=("Numpy version may not precisely match your local environment."),
+            match=("Numpy, Pandas, and Pyarrow version may not precisely match."),
         ):
 
             analyze_mf = session.udf(
@@ -756,9 +752,7 @@ def analyze(row):
 
     finally:
         # clean up the gcp assets created for the managed function.
-        cleanup_function_assets(
-            analyze_mf, session.bqclient, session.cloudfunctionsclient
-        )
+        cleanup_function_assets(analyze_mf, session.bqclient, ignore_failures=False)
 
 
 @pytest.mark.parametrize(
@@ -834,11 +828,9 @@ def test_managed_function_df_apply_axis_1_complex(session, dataset_id, pd_df):
     try:
 
         def serialize_row(row):
-            # Explicitly casting types ensures consistent behavior between
-            # BigFrames and pandas. Without it, BigFrames return plain Python
-            # types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
-            # which could lead to mismatches and requires further investigation.
-            # See b/435021126.
+            # TODO(b/435021126): Remove explicit type conversion of the field
+            # "name" after the issue has been addressed. It is added only to
+            # accept partial pandas parity for the time being.
             custom = {
                 "name": int(row.name),
                 "index": [idx for idx in row.index],
@@ -876,7 +868,7 @@ def serialize_row(row):
     finally:
         # clean up the gcp assets created for the managed function.
         cleanup_function_assets(
-            serialize_row_mf, session.bqclient, session.cloudfunctionsclient
+            serialize_row_mf, session.bqclient, ignore_failures=False
         )
 
 
@@ -942,5 +934,5 @@ def float_parser(row):
     finally:
         # clean up the gcp assets created for the managed function.
         cleanup_function_assets(
-            float_parser_mf, session.bqclient, session.cloudfunctionsclient
+            float_parser_mf, session.bqclient, ignore_failures=False
         )

Original file line number	Diff line number	Diff line change
`@@ -246,7 +246,7 @@ def provision_bq_managed_function(`
`246`	`246`	`# Augment user package requirements with any internal package`
`247`	`247`	`# requirements.`
`248`	`248`	`packages = _utils._get_updated_package_requirements(`
`249`		`- packages, is_row_processor, capture_references, ignore_numpy_version=True`
	`249`	`+ packages, is_row_processor, capture_references, ignore_package_version=True`
`250`	`250`	`)`
`251`	`251`	`if packages:`
`252`	`252`	`managed_function_options["packages"] = packages`