@@ -659,6 +659,7 @@ def serialize_row(row):
659659 # BigFrames and pandas. Without it, BigFrames return plain Python
660660 # types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
661661 # which could lead to mismatches and requires further investigation.
662+ # See b/435021126.
662663 custom = {
663664 "name" : int (row .name ),
664665 "index" : [idx for idx in row .index ],
@@ -719,6 +720,7 @@ def analyze(row):
719720 # BigFrames and pandas. Without it, BigFrames return plain Python
720721 # types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
721722 # which could lead to mismatches and requires further investigation.
723+ # See b/435021126.
722724 return str (
723725 {
724726 "dtype" : row .dtype ,
@@ -731,12 +733,17 @@ def analyze(row):
731733 }
732734 )
733735
734- analyze_mf = session .udf (
735- input_types = bigframes .series .Series ,
736- output_type = str ,
737- dataset = dataset_id ,
738- name = prefixer .create_prefix (),
739- )(analyze )
736+ with pytest .warns (
737+ bfe .PreviewWarning ,
738+ match = ("Numpy version may not precisely match your local environment." ),
739+ ):
740+
741+ analyze_mf = session .udf (
742+ input_types = bigframes .series .Series ,
743+ output_type = str ,
744+ dataset = dataset_id ,
745+ name = prefixer .create_prefix (),
746+ )(analyze )
740747
741748 assert getattr (analyze_mf , "is_row_processor" )
742749
@@ -831,6 +838,7 @@ def serialize_row(row):
831838 # BigFrames and pandas. Without it, BigFrames return plain Python
832839 # types, e.g. 0, while pandas return NumPy types, e.g. np.int64(0),
833840 # which could lead to mismatches and requires further investigation.
841+ # See b/435021126.
834842 custom = {
835843 "name" : int (row .name ),
836844 "index" : [idx for idx in row .index ],
@@ -870,3 +878,69 @@ def serialize_row(row):
870878 cleanup_function_assets (
871879 serialize_row_mf , session .bqclient , session .cloudfunctionsclient
872880 )
881+
882+
883+ @pytest .mark .skip (reason = "Revert after this bug b/435018880 is fixed." )
884+ def test_managed_function_df_apply_axis_1_na_nan_inf (dataset_id , session ):
885+ """This test is for special cases of float values, to make sure any (nan,
886+ inf, -inf) produced by user code is honored.
887+ """
888+ bf_df = session .read_gbq (
889+ """\
890+ SELECT "1" AS text, 1 AS num
891+ UNION ALL
892+ SELECT "2.5" AS text, 2.5 AS num
893+ UNION ALL
894+ SELECT "nan" AS text, IEEE_DIVIDE(0, 0) AS num
895+ UNION ALL
896+ SELECT "inf" AS text, IEEE_DIVIDE(1, 0) AS num
897+ UNION ALL
898+ SELECT "-inf" AS text, IEEE_DIVIDE(-1, 0) AS num
899+ UNION ALL
900+ SELECT "numpy nan" AS text, IEEE_DIVIDE(0, 0) AS num
901+ UNION ALL
902+ SELECT "pandas na" AS text, NULL AS num
903+ """
904+ )
905+
906+ pd_df = bf_df .to_pandas ()
907+
908+ try :
909+
910+ def float_parser (row ):
911+ import numpy as mynp
912+ import pandas as mypd
913+
914+ if row ["text" ] == "pandas na" :
915+ return mypd .NA
916+ if row ["text" ] == "numpy nan" :
917+ return mynp .nan
918+ return float (row ["text" ])
919+
920+ float_parser_mf = session .udf (
921+ input_types = bigframes .series .Series ,
922+ output_type = float ,
923+ dataset = dataset_id ,
924+ name = prefixer .create_prefix (),
925+ )(float_parser )
926+
927+ assert getattr (float_parser_mf , "is_row_processor" )
928+
929+ pd_result = pd_df .apply (float_parser , axis = 1 )
930+ bf_result = bf_df .apply (float_parser_mf , axis = 1 ).to_pandas ()
931+
932+ # bf_result.dtype is 'Float64' while pd_result.dtype is 'object'
933+ # , ignore this mismatch by using check_dtype=False.
934+ pandas .testing .assert_series_equal (pd_result , bf_result , check_dtype = False )
935+
936+ # Let's also assert that the data is consistent in this round trip
937+ # (BQ -> BigFrames -> BQ -> GCF -> BQ -> BigFrames) w.r.t. their
938+ # expected values in BQ.
939+ bq_result = bf_df ["num" ].to_pandas ()
940+ bq_result .name = None
941+ pandas .testing .assert_series_equal (bq_result , bf_result )
942+ finally :
943+ # clean up the gcp assets created for the managed function.
944+ cleanup_function_assets (
945+ float_parser_mf , session .bqclient , session .cloudfunctionsclient
946+ )
0 commit comments