From b5414624b00b3f1cee2fbf27efa5222701f45256 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Mon, 19 May 2025 18:38:59 +0000 Subject: [PATCH 1/2] fix: Series.str.isdigit in unicode subscripts --- bigframes/core/compile/scalar_op_compiler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 6576276b11..75ee4be1b7 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -488,9 +488,9 @@ def isalpha_op_impl(x: ibis_types.Value): @scalar_op_compiler.register_unary_op(ops.isdigit_op) def isdigit_op_impl(x: ibis_types.Value): - # Based on docs, should include superscript/subscript-ed numbers - # Tests however pass only when set to Nd unicode class - return typing.cast(ibis_types.StringValue, x).re_search(r"^(\p{Nd})+$") + return typing.cast(ibis_types.StringValue, x).re_search( + r"^[\p{Nd}\x{00B9}\x{00B2}\x{00B3}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}]+$" + ) @scalar_op_compiler.register_unary_op(ops.isdecimal_op) From 1e00eccc38c9a4b9111f7c6ac183d9c61bf9dff9 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Mon, 21 Jul 2025 19:15:38 +0000 Subject: [PATCH 2/2] fix test --- tests/system/small/operations/test_strings.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index 209bc87f9b..a720614892 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -324,13 +324,10 @@ def test_isalpha(weird_strings, weird_strings_pd): ) -@pytest.mark.skipif( - "dev" in pa.__version__, - # b/333484335 pyarrow is inconsistent on the behavior - reason="pyarrow dev version is inconsistent on isdigit behavior.", -) def test_isdigit(weird_strings, weird_strings_pd): - pd_result = weird_strings_pd.str.isdigit() + # check the behavior against normal pandas str, since pyarrow has a bug with superscripts/fractions b/333484335 + # astype object instead of str to support pd.NA + pd_result = weird_strings_pd.astype(object).str.isdigit() bf_result = weird_strings.str.isdigit().to_pandas() pd.testing.assert_series_equal(