Skip to content

Commit 3e61817

Browse files
committed
resolve the comments
1 parent 4688d70 commit 3e61817

File tree

4 files changed

+34
-41
lines changed

4 files changed

+34
-41
lines changed

bigframes/exceptions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ class FunctionAxisOnePreviewWarning(PreviewWarning):
105105

106106
class FunctionPackageVersionWarning(PreviewWarning):
107107
"""
108-
Managed UDF package versions may not precisely match users' local
109-
environment or the exact versions specified.
108+
Managed UDF package versions for Numpy, Pandas, and Pyarrow may not
109+
precisely match users' local environment or the exact versions specified.
110110
"""
111111

112112

bigframes/functions/_function_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def provision_bq_managed_function(
246246
# Augment user package requirements with any internal package
247247
# requirements.
248248
packages = _utils._get_updated_package_requirements(
249-
packages, is_row_processor, capture_references, ignore_numpy_version=True
249+
packages, is_row_processor, capture_references, ignore_package_version=True
250250
)
251251
if packages:
252252
managed_function_options["packages"] = packages

bigframes/functions/_utils.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,32 +66,33 @@ def _get_updated_package_requirements(
6666
package_requirements=None,
6767
is_row_processor=False,
6868
capture_references=True,
69-
ignore_numpy_version=False,
69+
ignore_package_version=False,
7070
):
7171
requirements = []
7272
if capture_references:
7373
requirements.append(f"cloudpickle=={cloudpickle.__version__}")
7474

7575
if is_row_processor:
76-
# bigframes function will send an entire row of data as json, which
77-
# would be converted to a pandas series and processed Ensure numpy
78-
# versions match to avoid unpickling problems. See internal issue
79-
# b/347934471.
80-
requirements.append(f"pandas=={pandas.__version__}")
81-
requirements.append(f"pyarrow=={pyarrow.__version__}")
82-
# TODO(jialuo): Add back the version after b/410924784 is resolved.
83-
# Due to current limitations on the numpy version in Python UDFs, we use
84-
# `ignore_numpy_version` to optionally omit the version for managed
85-
# functions only.
86-
if ignore_numpy_version:
76+
if ignore_package_version:
77+
# TODO(jialuo): Add back the version after b/410924784 is resolved.
78+
# Due to current limitations on the packages version in Python UDFs,
79+
# we use `ignore_package_version` to optionally omit the version for
80+
# managed functions only.
8781
msg = bfe.format_message(
88-
"Numpy version may not precisely match your local environment."
82+
"Numpy, Pandas, and Pyarrow version may not precisely match your local environment."
8983
)
9084
warnings.warn(msg, category=bfe.PreviewWarning)
91-
numpy_package = "numpy"
85+
requirements.append("pandas")
86+
requirements.append("pyarrow")
87+
requirements.append("numpy")
9288
else:
93-
numpy_package = f"numpy=={numpy.__version__}"
94-
requirements.append(numpy_package)
89+
# bigframes function will send an entire row of data as json, which
90+
# would be converted to a pandas series and processed Ensure numpy
91+
# versions match to avoid unpickling problems. See internal issue
92+
# b/347934471.
93+
requirements.append(f"pandas=={pandas.__version__}")
94+
requirements.append(f"pyarrow=={pyarrow.__version__}")
95+
requirements.append(f"numpy=={numpy.__version__}")
9596

9697
if package_requirements:
9798
requirements.extend(package_requirements)

tests/system/large/functions/test_managed_function.py

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -655,11 +655,9 @@ def test_managed_function_df_apply_axis_1(session, dataset_id, scalars_dfs):
655655
try:
656656

657657
def serialize_row(row):
658-
# Explicitly casting types ensures consistent behavior between
659-
# BigFrames and pandas. Without it, BigFrames return plain Python
660-
# types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
661-
# which could lead to mismatches and requires further investigation.
662-
# See b/435021126.
658+
# TODO(b/435021126): Remove explicit type conversion of the field
659+
# "name" after the issue has been addressed. It is added only to
660+
# accept partial pandas parity for the time being.
663661
custom = {
664662
"name": int(row.name),
665663
"index": [idx for idx in row.index],
@@ -705,7 +703,7 @@ def serialize_row(row):
705703
finally:
706704
# clean up the gcp assets created for the managed function.
707705
cleanup_function_assets(
708-
serialize_row_mf, session.bqclient, session.cloudfunctionsclient
706+
serialize_row_mf, session.bqclient, ignore_failures=False
709707
)
710708

711709

@@ -716,11 +714,9 @@ def test_managed_function_df_apply_axis_1_aggregates(session, dataset_id, scalar
716714
try:
717715

718716
def analyze(row):
719-
# Explicitly casting types ensures consistent behavior between
720-
# BigFrames and pandas. Without it, BigFrames return plain Python
721-
# types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
722-
# which could lead to mismatches and requires further investigation.
723-
# See b/435021126.
717+
# TODO(b/435021126): Remove explicit type conversion of the field
718+
# "name" after the issue has been addressed. It is added only to
719+
# accept partial pandas parity for the time being.
724720
return str(
725721
{
726722
"dtype": row.dtype,
@@ -735,7 +731,7 @@ def analyze(row):
735731

736732
with pytest.warns(
737733
bfe.PreviewWarning,
738-
match=("Numpy version may not precisely match your local environment."),
734+
match=("Numpy, Pandas, and Pyarrow version may not precisely match."),
739735
):
740736

741737
analyze_mf = session.udf(
@@ -756,9 +752,7 @@ def analyze(row):
756752

757753
finally:
758754
# clean up the gcp assets created for the managed function.
759-
cleanup_function_assets(
760-
analyze_mf, session.bqclient, session.cloudfunctionsclient
761-
)
755+
cleanup_function_assets(analyze_mf, session.bqclient, ignore_failures=False)
762756

763757

764758
@pytest.mark.parametrize(
@@ -834,11 +828,9 @@ def test_managed_function_df_apply_axis_1_complex(session, dataset_id, pd_df):
834828
try:
835829

836830
def serialize_row(row):
837-
# Explicitly casting types ensures consistent behavior between
838-
# BigFrames and pandas. Without it, BigFrames return plain Python
839-
# types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
840-
# which could lead to mismatches and requires further investigation.
841-
# See b/435021126.
831+
# TODO(b/435021126): Remove explicit type conversion of the field
832+
# "name" after the issue has been addressed. It is added only to
833+
# accept partial pandas parity for the time being.
842834
custom = {
843835
"name": int(row.name),
844836
"index": [idx for idx in row.index],
@@ -876,7 +868,7 @@ def serialize_row(row):
876868
finally:
877869
# clean up the gcp assets created for the managed function.
878870
cleanup_function_assets(
879-
serialize_row_mf, session.bqclient, session.cloudfunctionsclient
871+
serialize_row_mf, session.bqclient, ignore_failures=False
880872
)
881873

882874

@@ -942,5 +934,5 @@ def float_parser(row):
942934
finally:
943935
# clean up the gcp assets created for the managed function.
944936
cleanup_function_assets(
945-
float_parser_mf, session.bqclient, session.cloudfunctionsclient
937+
float_parser_mf, session.bqclient, ignore_failures=False
946938
)

0 commit comments

Comments
 (0)