From f2adc92305924ec90375f0f6a5fbca8be840eb88 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 4 Jan 2025 10:51:07 -0500 Subject: [PATCH 1/3] TST(string dtype): Resolve replace xfails --- pandas/tests/frame/methods/test_replace.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index b2320798ea9a2..2f7dffa48ada2 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -334,7 +334,6 @@ def test_regex_replace_str_to_numeric(self, mix_abc): return_value = res3.replace(regex=r"\s*\.\s*", value=0, inplace=True) assert return_value is None expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]}) - # TODO(infer_string) expec["c"] = expec["c"].astype(object) tm.assert_frame_equal(res, expec) tm.assert_frame_equal(res2, expec) @@ -1478,8 +1477,6 @@ def test_replace_regex_dtype_frame(self, regex): df2 = DataFrame({"A": ["0"], "B": ["1"]}) if regex: - # TODO(infer_string): both string columns get cast to object, - # while only needed for column A expected_df2 = DataFrame({"A": [1], "B": ["1"]}, dtype=object) else: expected_df2 = DataFrame({"A": Series([1], dtype=object), "B": ["1"]}) From d9ead320f520ee8da2b6e64d3a2f4803948866d0 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 4 Jan 2025 10:58:30 -0500 Subject: [PATCH 2/3] Add test --- pandas/tests/frame/methods/test_replace.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 2f7dffa48ada2..138620a3a098e 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1468,18 +1468,23 @@ def test_regex_replace_scalar( tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("regex", [False, True]) - def test_replace_regex_dtype_frame(self, regex): + @pytest.mark.parametrize("value", [1, "1"]) + def test_replace_regex_dtype_frame(self, regex, value): # GH-48644 df1 = DataFrame({"A": ["0"], "B": ["0"]}) - expected_df1 = DataFrame({"A": [1], "B": [1]}, dtype=object) - result_df1 = df1.replace(to_replace="0", value=1, regex=regex) + # When value is an integer, coerce result to object. + # When value is a string, infer the correct string dtype. + dtype = object if value == 1 else None + + expected_df1 = DataFrame({"A": [value], "B": [value]}, dtype=dtype) + result_df1 = df1.replace(to_replace="0", value=value, regex=regex) tm.assert_frame_equal(result_df1, expected_df1) df2 = DataFrame({"A": ["0"], "B": ["1"]}) if regex: - expected_df2 = DataFrame({"A": [1], "B": ["1"]}, dtype=object) + expected_df2 = DataFrame({"A": [1], "B": ["1"]}, dtype=dtype) else: - expected_df2 = DataFrame({"A": Series([1], dtype=object), "B": ["1"]}) + expected_df2 = DataFrame({"A": Series([1], dtype=dtype), "B": ["1"]}) result_df2 = df2.replace(to_replace="0", value=1, regex=regex) tm.assert_frame_equal(result_df2, expected_df2) From 2981e296c0f456d6b5370296a3afc70d2aaab031 Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 5 Jan 2025 15:15:51 -0500 Subject: [PATCH 3/3] fixup --- pandas/tests/frame/methods/test_replace.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 138620a3a098e..e5bd8a9c45b55 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1482,10 +1482,10 @@ def test_replace_regex_dtype_frame(self, regex, value): df2 = DataFrame({"A": ["0"], "B": ["1"]}) if regex: - expected_df2 = DataFrame({"A": [1], "B": ["1"]}, dtype=dtype) + expected_df2 = DataFrame({"A": [value], "B": ["1"]}, dtype=dtype) else: - expected_df2 = DataFrame({"A": Series([1], dtype=dtype), "B": ["1"]}) - result_df2 = df2.replace(to_replace="0", value=1, regex=regex) + expected_df2 = DataFrame({"A": Series([value], dtype=dtype), "B": ["1"]}) + result_df2 = df2.replace(to_replace="0", value=value, regex=regex) tm.assert_frame_equal(result_df2, expected_df2) def test_replace_with_value_also_being_replaced(self):