From 109f4862a62a989b13e667bb112793dd2e22e2d4 Mon Sep 17 00:00:00 2001 From: jialuo Date: Wed, 30 Jul 2025 23:44:27 +0000 Subject: [PATCH 1/3] fix: add warnings for duplicated or conflicting type hints in bigframes function --- bigframes/exceptions.py | 4 ++ bigframes/functions/_function_session.py | 20 +++++++ bigframes/functions/_utils.py | 14 +++++ .../large/functions/test_managed_function.py | 41 ++++++++++---- .../large/functions/test_remote_function.py | 53 +++++++++++++------ 5 files changed, 108 insertions(+), 24 deletions(-) diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index eda24a74f0..afe3e12266 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -41,6 +41,10 @@ class PreviewWarning(Warning): """The feature is in preview.""" +class FunctionRedundantTypeHintWarning(UserWarning): + """Redundant or conflicting type hints in a BigFrames function.""" + + class NullIndexPreviewWarning(PreviewWarning): """Unused. Kept for backwards compatibility. diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 22e6981c38..cb371ca994 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -534,6 +534,11 @@ def wrapper(func): **signature_kwargs, ) if input_types is not None: + if _utils.has_input_type(py_sig): + msg = bfe.format_message( + "Redundant or conflicting input types detected, using the one from the decorator." + ) + warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) if not isinstance(input_types, collections.abc.Sequence): input_types = [input_types] py_sig = py_sig.replace( @@ -543,6 +548,11 @@ def wrapper(func): ] ) if output_type: + if _utils.has_output_type(py_sig): + msg = bfe.format_message( + "Redundant or conflicting return type detected, using the one from the decorator." + ) + warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) py_sig = py_sig.replace(return_annotation=output_type) # Try to get input types via type annotations. @@ -836,6 +846,11 @@ def wrapper(func): **signature_kwargs, ) if input_types is not None: + if _utils.has_input_type(py_sig): + msg = bfe.format_message( + "Redundant or conflicting input types detected, using the one from the decorator." + ) + warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) if not isinstance(input_types, collections.abc.Sequence): input_types = [input_types] py_sig = py_sig.replace( @@ -845,6 +860,11 @@ def wrapper(func): ] ) if output_type: + if _utils.has_output_type(py_sig): + msg = bfe.format_message( + "Redundant or conflicting return type detected, using the one from the decorator." + ) + warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) py_sig = py_sig.replace(return_annotation=output_type) udf_sig = udf_def.UdfSignature.from_py_signature(py_sig) diff --git a/bigframes/functions/_utils.py b/bigframes/functions/_utils.py index 69cf74ada0..02109bd1ab 100644 --- a/bigframes/functions/_utils.py +++ b/bigframes/functions/_utils.py @@ -14,6 +14,7 @@ import hashlib +import inspect import json import sys import typing @@ -269,3 +270,16 @@ def post_process(input): return bbq.json_extract_string_array(input, value_dtype=result_dtype) return post_process + + +def has_input_type(signature: inspect.Signature) -> bool: + """Checks if any parameter in the signature has a type annotation.""" + for param in signature.parameters.values(): + if param.annotation is not inspect.Parameter.empty: + return True + return False + + +def has_output_type(signature: inspect.Signature) -> bool: + """Checks if the signature has a return type annotation.""" + return signature.return_annotation is not inspect.Parameter.empty diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py index c58610d1ff..752b5b1cd0 100644 --- a/tests/system/large/functions/test_managed_function.py +++ b/tests/system/large/functions/test_managed_function.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import google.api_core.exceptions import pandas import pyarrow @@ -31,12 +33,21 @@ def test_managed_function_array_output(session, scalars_dfs, dataset_id): try: - @session.udf( - dataset=dataset_id, - name=prefixer.create_prefix(), + with warnings.catch_warnings(record=True) as record: + + @session.udf( + dataset=dataset_id, + name=prefixer.create_prefix(), + ) + def featurize(x: int) -> list[float]: + return [float(i) for i in [x, x + 1, x + 2]] + + input_type_warning = "Redundant or conflicting input types detected." + return_type_warning = "Redundant or conflicting return type detected" + assert not any(input_type_warning in str(warning.message) for warning in record) + assert not any( + return_type_warning in str(warning.message) for warning in record ) - def featurize(x: int) -> list[float]: - return [float(i) for i in [x, x + 1, x + 2]] scalars_df, scalars_pandas_df = scalars_dfs @@ -222,7 +233,10 @@ def add(x: int, y: int) -> int: def test_managed_function_series_combine_array_output(session, dataset_id, scalars_dfs): try: - def add_list(x: int, y: int) -> list[int]: + # The type hints in this function's signature are redundant. The + # `input_types` and `output_type` arguments from udf decorator take + # precedence and will be used instead. + def add_list(x, y: bool) -> list[bool]: return [x, y] scalars_df, scalars_pandas_df = scalars_dfs @@ -234,9 +248,18 @@ def add_list(x: int, y: int) -> list[int]: # Make sure there are NA values in the test column. assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]]) - add_list_managed_func = session.udf( - dataset=dataset_id, name=prefixer.create_prefix() - )(add_list) + with warnings.catch_warnings(record=True) as record: + add_list_managed_func = session.udf( + input_types=[int, int], + output_type=list[int], + dataset=dataset_id, + name=prefixer.create_prefix(), + )(add_list) + + input_type_warning = "Redundant or conflicting input types detected" + assert any(input_type_warning in str(warning.message) for warning in record) + return_type_warning = "Redundant or conflicting return type detected" + assert any(return_type_warning in str(warning.message) for warning in record) # After filtering out nulls the managed function application should work # similar to pandas. diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index f3e97aeb85..c4b07bfba9 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -843,22 +843,31 @@ def test_remote_function_with_external_package_dependencies( ): try: - def pd_np_foo(x): + # The return type hint in this function's signature is redundant. The + # `output_type` argument from remote_function decorator takes precedence + # and will be used instead. + def pd_np_foo(x) -> None: import numpy as mynp import pandas as mypd return mypd.Series([x, mynp.sqrt(mynp.abs(x))]).sum() - # Create the remote function with the name provided explicitly - pd_np_foo_remote = session.remote_function( - input_types=[int], - output_type=float, - dataset=dataset_id, - bigquery_connection=bq_cf_connection, - reuse=False, - packages=["numpy", "pandas >= 2.0.0"], - cloud_function_service_account="default", - )(pd_np_foo) + with warnings.catch_warnings(record=True) as record: + # Create the remote function with the name provided explicitly + pd_np_foo_remote = session.remote_function( + input_types=[int], + output_type=float, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, + reuse=False, + packages=["numpy", "pandas >= 2.0.0"], + cloud_function_service_account="default", + )(pd_np_foo) + + input_type_warning = "Redundant or conflicting input types detected" + assert not any(input_type_warning in str(warning.message) for warning in record) + return_type_warning = "Redundant or conflicting return type detected" + assert any(return_type_warning in str(warning.message) for warning in record) # The behavior of the created remote function should be as expected scalars_df, scalars_pandas_df = scalars_dfs @@ -1999,10 +2008,24 @@ def test_remote_function_unnamed_removed_w_session_cleanup(): # create a clean session session = bigframes.connect() - # create an unnamed remote function in the session - @session.remote_function(reuse=False, cloud_function_service_account="default") - def foo(x: int) -> int: - return x + 1 + with warnings.catch_warnings(record=True) as record: + # create an unnamed remote function in the session. + # The type hints in this function's signature are redundant. The + # `input_types` and `output_type` arguments from remote_function + # decorator take precedence and will be used instead. + @session.remote_function( + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + ) + def foo(x: int) -> int: + return x + 1 + + input_type_warning = "Redundant or conflicting input types detected" + assert any(input_type_warning in str(warning.message) for warning in record) + return_type_warning = "Redundant or conflicting return type detected" + assert any(return_type_warning in str(warning.message) for warning in record) # ensure that remote function artifacts are created assert foo.bigframes_remote_function is not None From 26499ff446e83a5f03521652523b4efa9ae72e5a Mon Sep 17 00:00:00 2001 From: jialuo Date: Wed, 6 Aug 2025 19:38:53 +0000 Subject: [PATCH 2/3] only warm conflict --- bigframes/exceptions.py | 8 ++--- bigframes/functions/_function_session.py | 32 ++++++++--------- bigframes/functions/_utils.py | 36 ++++++++++++++----- .../large/functions/test_managed_function.py | 11 +++--- .../large/functions/test_remote_function.py | 15 ++++---- 5 files changed, 62 insertions(+), 40 deletions(-) diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index afe3e12266..415b89053f 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -41,10 +41,6 @@ class PreviewWarning(Warning): """The feature is in preview.""" -class FunctionRedundantTypeHintWarning(UserWarning): - """Redundant or conflicting type hints in a BigFrames function.""" - - class NullIndexPreviewWarning(PreviewWarning): """Unused. Kept for backwards compatibility. @@ -107,6 +103,10 @@ class FunctionAxisOnePreviewWarning(PreviewWarning): """Remote Function and Managed UDF with axis=1 preview.""" +class FunctionConflictTypeHintWarning(UserWarning): + """Conflicting type hints in a BigFrames function.""" + + def format_message(message: str, fill: bool = True): """Formats a warning message with ANSI color codes for the warning color. diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index cb371ca994..71fa69b17b 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -534,13 +534,13 @@ def wrapper(func): **signature_kwargs, ) if input_types is not None: - if _utils.has_input_type(py_sig): - msg = bfe.format_message( - "Redundant or conflicting input types detected, using the one from the decorator." - ) - warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) if not isinstance(input_types, collections.abc.Sequence): input_types = [input_types] + if _utils.has_conflict_input_type(py_sig, input_types): + msg = bfe.format_message( + "Conflicting input types detected, using the one from the decorator." + ) + warnings.warn(msg, category=bfe.FunctionConflictTypeHintWarning) py_sig = py_sig.replace( parameters=[ par.replace(annotation=itype) @@ -548,11 +548,11 @@ def wrapper(func): ] ) if output_type: - if _utils.has_output_type(py_sig): + if _utils.has_conflict_output_type(py_sig, output_type): msg = bfe.format_message( - "Redundant or conflicting return type detected, using the one from the decorator." + "Conflicting return type detected, using the one from the decorator." ) - warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) + warnings.warn(msg, category=bfe.FunctionConflictTypeHintWarning) py_sig = py_sig.replace(return_annotation=output_type) # Try to get input types via type annotations. @@ -846,13 +846,13 @@ def wrapper(func): **signature_kwargs, ) if input_types is not None: - if _utils.has_input_type(py_sig): - msg = bfe.format_message( - "Redundant or conflicting input types detected, using the one from the decorator." - ) - warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) if not isinstance(input_types, collections.abc.Sequence): input_types = [input_types] + if _utils.has_conflict_input_type(py_sig, input_types): + msg = bfe.format_message( + "Conflicting input types detected, using the one from the decorator." + ) + warnings.warn(msg, category=bfe.FunctionConflictTypeHintWarning) py_sig = py_sig.replace( parameters=[ par.replace(annotation=itype) @@ -860,11 +860,11 @@ def wrapper(func): ] ) if output_type: - if _utils.has_output_type(py_sig): + if _utils.has_conflict_output_type(py_sig, output_type): msg = bfe.format_message( - "Redundant or conflicting return type detected, using the one from the decorator." + "Conflicting return type detected, using the one from the decorator." ) - warnings.warn(msg, category=bfe.FunctionRedundantTypeHintWarning) + warnings.warn(msg, category=bfe.FunctionConflictTypeHintWarning) py_sig = py_sig.replace(return_annotation=output_type) udf_sig = udf_def.UdfSignature.from_py_signature(py_sig) diff --git a/bigframes/functions/_utils.py b/bigframes/functions/_utils.py index 02109bd1ab..aef3a4188d 100644 --- a/bigframes/functions/_utils.py +++ b/bigframes/functions/_utils.py @@ -18,7 +18,7 @@ import json import sys import typing -from typing import cast, Optional, Set +from typing import Any, cast, Optional, Sequence, Set import cloudpickle import google.api_core.exceptions @@ -272,14 +272,34 @@ def post_process(input): return post_process -def has_input_type(signature: inspect.Signature) -> bool: - """Checks if any parameter in the signature has a type annotation.""" - for param in signature.parameters.values(): +def has_conflict_input_type( + signature: inspect.Signature, + input_types: Sequence[Any], +) -> bool: + """Checks if the parameters have any conflict with the input_types.""" + params = list(signature.parameters.values()) + + if len(params) != len(input_types): + return True + + # Check for conflicts type hints. + for i, param in enumerate(params): if param.annotation is not inspect.Parameter.empty: - return True + if param.annotation != input_types[i]: + return True + + # No conflicts were found after checking all parameters. return False -def has_output_type(signature: inspect.Signature) -> bool: - """Checks if the signature has a return type annotation.""" - return signature.return_annotation is not inspect.Parameter.empty +def has_conflict_output_type( + signature: inspect.Signature, + output_type: Any, +) -> bool: + """Checks if the return type annotation conflicts with the output_type.""" + return_annotation = signature.return_annotation + + if return_annotation is inspect.Parameter.empty: + return False + + return return_annotation != output_type diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py index 752b5b1cd0..0ab5fde9b3 100644 --- a/tests/system/large/functions/test_managed_function.py +++ b/tests/system/large/functions/test_managed_function.py @@ -42,8 +42,9 @@ def test_managed_function_array_output(session, scalars_dfs, dataset_id): def featurize(x: int) -> list[float]: return [float(i) for i in [x, x + 1, x + 2]] - input_type_warning = "Redundant or conflicting input types detected." - return_type_warning = "Redundant or conflicting return type detected" + # No following conflict warning when there is no redundant type hints. + input_type_warning = "Conflicting input types detected" + return_type_warning = "Conflicting return type detected" assert not any(input_type_warning in str(warning.message) for warning in record) assert not any( return_type_warning in str(warning.message) for warning in record @@ -233,7 +234,7 @@ def add(x: int, y: int) -> int: def test_managed_function_series_combine_array_output(session, dataset_id, scalars_dfs): try: - # The type hints in this function's signature are redundant. The + # The type hints in this function's signature has conflicts. The # `input_types` and `output_type` arguments from udf decorator take # precedence and will be used instead. def add_list(x, y: bool) -> list[bool]: @@ -256,9 +257,9 @@ def add_list(x, y: bool) -> list[bool]: name=prefixer.create_prefix(), )(add_list) - input_type_warning = "Redundant or conflicting input types detected" + input_type_warning = "Conflicting input types detected" assert any(input_type_warning in str(warning.message) for warning in record) - return_type_warning = "Redundant or conflicting return type detected" + return_type_warning = "Conflicting return type detected" assert any(return_type_warning in str(warning.message) for warning in record) # After filtering out nulls the managed function application should work diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index c4b07bfba9..0d6029bd2c 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -843,7 +843,7 @@ def test_remote_function_with_external_package_dependencies( ): try: - # The return type hint in this function's signature is redundant. The + # The return type hint in this function's signature has conflict. The # `output_type` argument from remote_function decorator takes precedence # and will be used instead. def pd_np_foo(x) -> None: @@ -864,9 +864,9 @@ def pd_np_foo(x) -> None: cloud_function_service_account="default", )(pd_np_foo) - input_type_warning = "Redundant or conflicting input types detected" + input_type_warning = "Conflicting input types detected" assert not any(input_type_warning in str(warning.message) for warning in record) - return_type_warning = "Redundant or conflicting return type detected" + return_type_warning = "Conflicting return type detected" assert any(return_type_warning in str(warning.message) for warning in record) # The behavior of the created remote function should be as expected @@ -2022,10 +2022,11 @@ def test_remote_function_unnamed_removed_w_session_cleanup(): def foo(x: int) -> int: return x + 1 - input_type_warning = "Redundant or conflicting input types detected" - assert any(input_type_warning in str(warning.message) for warning in record) - return_type_warning = "Redundant or conflicting return type detected" - assert any(return_type_warning in str(warning.message) for warning in record) + # No following warning with only redundant type hints (no conflict). + input_type_warning = "Conflicting input types detected" + assert not any(input_type_warning in str(warning.message) for warning in record) + return_type_warning = "Conflicting return type detected" + assert not any(return_type_warning in str(warning.message) for warning in record) # ensure that remote function artifacts are created assert foo.bigframes_remote_function is not None From db20d0850b058cea799a600a2e1f2adeb97262e6 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 6 Aug 2025 19:45:00 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index fc62594d81..174f3e852a 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -106,7 +106,7 @@ class FunctionAxisOnePreviewWarning(PreviewWarning): class FunctionConflictTypeHintWarning(UserWarning): """Conflicting type hints in a BigFrames function.""" - + class FunctionPackageVersionWarning(PreviewWarning): """ Managed UDF package versions for Numpy, Pandas, and Pyarrow may not