66import numpy as np
77import pytest
88
9- from pandas ._config import using_string_dtype
10-
119import pandas as pd
1210from pandas import (
1311 DataFrame ,
@@ -30,7 +28,6 @@ def mix_abc() -> dict[str, list[float | str]]:
3028
3129
3230class TestDataFrameReplace :
33- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
3431 def test_replace_inplace (self , datetime_frame , float_string_frame ):
3532 datetime_frame .loc [datetime_frame .index [:5 ], "A" ] = np .nan
3633 datetime_frame .loc [datetime_frame .index [- 5 :], "A" ] = np .nan
@@ -46,7 +43,9 @@ def test_replace_inplace(self, datetime_frame, float_string_frame):
4643 mf .iloc [- 10 :, mf .columns .get_loc ("A" )] = np .nan
4744
4845 result = float_string_frame .replace (np .nan , 0 )
49- expected = float_string_frame .fillna (value = 0 )
46+ expected = float_string_frame .copy ()
47+ expected ["foo" ] = expected ["foo" ].astype (object )
48+ expected = expected .fillna (value = 0 )
5049 tm .assert_frame_equal (result , expected )
5150
5251 tsframe = datetime_frame .copy ()
@@ -291,22 +290,20 @@ def test_regex_replace_dict_nested_non_first_character(
291290 expected = DataFrame ({"first" : [".bc" , "bc." , "c.b" ]}, dtype = dtype )
292291 tm .assert_frame_equal (result , expected )
293292
294- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
295293 def test_regex_replace_dict_nested_gh4115 (self ):
296- df = DataFrame ({"Type" : ["Q" , "T" , "Q" , "Q" , "T" ], "tmp" : 2 })
297- expected = DataFrame (
298- {"Type" : Series ([0 , 1 , 0 , 0 , 1 ], dtype = df .Type .dtype ), "tmp" : 2 }
294+ df = DataFrame (
295+ {"Type" : Series (["Q" , "T" , "Q" , "Q" , "T" ], dtype = object ), "tmp" : 2 }
299296 )
297+ expected = DataFrame ({"Type" : Series ([0 , 1 , 0 , 0 , 1 ], dtype = object ), "tmp" : 2 })
300298 result = df .replace ({"Type" : {"Q" : 0 , "T" : 1 }})
301299 tm .assert_frame_equal (result , expected )
302300
303- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
304301 def test_regex_replace_list_to_scalar (self , mix_abc ):
305302 df = DataFrame (mix_abc )
306303 expec = DataFrame (
307304 {
308305 "a" : mix_abc ["a" ],
309- "b" : np . array ([np .nan ] * 4 , dtype = object ),
306+ "b" : Series ([np .nan ] * 4 , dtype = "str" ),
310307 "c" : [np .nan , np .nan , np .nan , "d" ],
311308 }
312309 )
@@ -326,7 +323,6 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
326323 tm .assert_frame_equal (res2 , expec )
327324 tm .assert_frame_equal (res3 , expec )
328325
329- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
330326 def test_regex_replace_str_to_numeric (self , mix_abc ):
331327 # what happens when you try to replace a numeric value with a regex?
332328 df = DataFrame (mix_abc )
@@ -338,11 +334,12 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
338334 return_value = res3 .replace (regex = r"\s*\.\s*" , value = 0 , inplace = True )
339335 assert return_value is None
340336 expec = DataFrame ({"a" : mix_abc ["a" ], "b" : ["a" , "b" , 0 , 0 ], "c" : mix_abc ["c" ]})
337+ # TODO(infer_string)
338+ expec ["c" ] = expec ["c" ].astype (object )
341339 tm .assert_frame_equal (res , expec )
342340 tm .assert_frame_equal (res2 , expec )
343341 tm .assert_frame_equal (res3 , expec )
344342
345- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
346343 def test_regex_replace_regex_list_to_numeric (self , mix_abc ):
347344 df = DataFrame (mix_abc )
348345 res = df .replace ([r"\s*\.\s*" , "b" ], 0 , regex = True )
@@ -535,31 +532,37 @@ def test_replace_series_dict(self):
535532 result = df .replace (s , df .mean ())
536533 tm .assert_frame_equal (result , expected )
537534
538- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
539- def test_replace_convert (self ):
540- # gh 3907
541- df = DataFrame ([["foo" , "bar" , "bah" ], ["bar" , "foo" , "bah" ]])
535+ def test_replace_convert (self , any_string_dtype ):
536+ # gh 3907 (pandas >= 3.0 no longer converts dtypes)
537+ df = DataFrame (
538+ [["foo" , "bar" , "bah" ], ["bar" , "foo" , "bah" ]], dtype = any_string_dtype
539+ )
542540 m = {"foo" : 1 , "bar" : 2 , "bah" : 3 }
543541 rep = df .replace (m )
544- expec = df .dtypes
545- res = rep .dtypes
546- tm .assert_series_equal (expec , res )
542+ assert (rep .dtypes == object ).all ()
547543
548- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
549544 def test_replace_mixed (self , float_string_frame ):
550545 mf = float_string_frame
551546 mf .iloc [5 :20 , mf .columns .get_loc ("foo" )] = np .nan
552547 mf .iloc [- 10 :, mf .columns .get_loc ("A" )] = np .nan
553548
554549 result = float_string_frame .replace (np .nan , - 18 )
555- expected = float_string_frame .fillna (value = - 18 )
550+ expected = float_string_frame .copy ()
551+ expected ["foo" ] = expected ["foo" ].astype (object )
552+ expected = expected .fillna (value = - 18 )
556553 tm .assert_frame_equal (result , expected )
557- tm .assert_frame_equal (result .replace (- 18 , np .nan ), float_string_frame )
554+ expected2 = float_string_frame .copy ()
555+ expected2 ["foo" ] = expected2 ["foo" ].astype (object )
556+ tm .assert_frame_equal (result .replace (- 18 , np .nan ), expected2 )
558557
559558 result = float_string_frame .replace (np .nan , - 1e8 )
560- expected = float_string_frame .fillna (value = - 1e8 )
559+ expected = float_string_frame .copy ()
560+ expected ["foo" ] = expected ["foo" ].astype (object )
561+ expected = expected .fillna (value = - 1e8 )
561562 tm .assert_frame_equal (result , expected )
562- tm .assert_frame_equal (result .replace (- 1e8 , np .nan ), float_string_frame )
563+ expected2 = float_string_frame .copy ()
564+ expected2 ["foo" ] = expected2 ["foo" ].astype (object )
565+ tm .assert_frame_equal (result .replace (- 1e8 , np .nan ), expected2 )
563566
564567 def test_replace_mixed_int_block_upcasting (self ):
565568 # int block upcasting
@@ -601,8 +604,7 @@ def test_replace_mixed_int_block_splitting(self):
601604 result = df .replace (0 , 0.5 )
602605 tm .assert_frame_equal (result , expected )
603606
604- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
605- def test_replace_mixed2 (self , using_infer_string ):
607+ def test_replace_mixed2 (self ):
606608 # to object block upcasting
607609 df = DataFrame (
608610 {
@@ -621,7 +623,7 @@ def test_replace_mixed2(self, using_infer_string):
621623
622624 expected = DataFrame (
623625 {
624- "A" : Series (["foo" , "bar" ]),
626+ "A" : Series (["foo" , "bar" ], dtype = "object" ),
625627 "B" : Series ([0 , "foo" ], dtype = "object" ),
626628 }
627629 )
@@ -917,16 +919,16 @@ def test_replace_limit(self):
917919 # TODO
918920 pass
919921
920- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
921- def test_replace_dict_no_regex (self ):
922+ def test_replace_dict_no_regex (self , any_string_dtype ):
922923 answer = Series (
923924 {
924925 0 : "Strongly Agree" ,
925926 1 : "Agree" ,
926927 2 : "Neutral" ,
927928 3 : "Disagree" ,
928929 4 : "Strongly Disagree" ,
929- }
930+ },
931+ dtype = any_string_dtype ,
930932 )
931933 weights = {
932934 "Agree" : 4 ,
@@ -935,19 +937,20 @@ def test_replace_dict_no_regex(self):
935937 "Strongly Agree" : 5 ,
936938 "Strongly Disagree" : 1 ,
937939 }
938- expected = Series ({0 : 5 , 1 : 4 , 2 : 3 , 3 : 2 , 4 : 1 }, dtype = answer . dtype )
940+ expected = Series ({0 : 5 , 1 : 4 , 2 : 3 , 3 : 2 , 4 : 1 }, dtype = object )
939941 result = answer .replace (weights )
940942 tm .assert_series_equal (result , expected )
941943
942- def test_replace_series_no_regex (self ):
944+ def test_replace_series_no_regex (self , any_string_dtype ):
943945 answer = Series (
944946 {
945947 0 : "Strongly Agree" ,
946948 1 : "Agree" ,
947949 2 : "Neutral" ,
948950 3 : "Disagree" ,
949951 4 : "Strongly Disagree" ,
950- }
952+ },
953+ dtype = any_string_dtype ,
951954 )
952955 weights = Series (
953956 {
@@ -1043,16 +1046,15 @@ def test_nested_dict_overlapping_keys_replace_str(self):
10431046 expected = df .replace ({"a" : dict (zip (astr , bstr ))})
10441047 tm .assert_frame_equal (result , expected )
10451048
1046- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
1047- def test_replace_swapping_bug (self , using_infer_string ):
1049+ def test_replace_swapping_bug (self ):
10481050 df = DataFrame ({"a" : [True , False , True ]})
10491051 res = df .replace ({"a" : {True : "Y" , False : "N" }})
1050- expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]})
1052+ expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]}, dtype = object )
10511053 tm .assert_frame_equal (res , expect )
10521054
10531055 df = DataFrame ({"a" : [0 , 1 , 0 ]})
10541056 res = df .replace ({"a" : {0 : "Y" , 1 : "N" }})
1055- expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]})
1057+ expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]}, dtype = object )
10561058 tm .assert_frame_equal (res , expect )
10571059
10581060 def test_replace_datetimetz (self ):
@@ -1186,7 +1188,7 @@ def test_replace_commutative(self, df, to_replace, exp):
11861188 )
11871189 def test_replace_replacer_dtype (self , replacer ):
11881190 # GH26632
1189- df = DataFrame (["a" ])
1191+ df = DataFrame (["a" ], dtype = object )
11901192 result = df .replace ({"a" : replacer , "b" : replacer })
11911193 expected = DataFrame ([replacer ], dtype = object )
11921194 tm .assert_frame_equal (result , expected )
@@ -1266,7 +1268,6 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data):
12661268 assert return_value is None
12671269 tm .assert_frame_equal (df , expected )
12681270
1269- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
12701271 def test_replace_value_category_type (self ):
12711272 """
12721273 Test for #23305: to ensure category dtypes are maintained
@@ -1322,7 +1323,7 @@ def test_replace_value_category_type(self):
13221323 lambda x : x .astype ("category" ).cat .rename_categories ({"cat2" : "catX" })
13231324 )
13241325
1325- result = result .astype ({"col1" : "int64" , "col3" : "float64" , "col5" : "object " })
1326+ result = result .astype ({"col1" : "int64" , "col3" : "float64" , "col5" : "str " })
13261327 tm .assert_frame_equal (result , expected )
13271328
13281329 def test_replace_dict_category_type (self ):
@@ -1363,12 +1364,11 @@ def test_replace_with_compiled_regex(self):
13631364 expected = DataFrame (["z" , "b" , "c" ])
13641365 tm .assert_frame_equal (result , expected )
13651366
1366- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
13671367 def test_replace_intervals (self ):
13681368 # https://github.com/pandas-dev/pandas/issues/35931
13691369 df = DataFrame ({"a" : [pd .Interval (0 , 1 ), pd .Interval (0 , 1 )]})
13701370 result = df .replace ({"a" : {pd .Interval (0 , 1 ): "x" }})
1371- expected = DataFrame ({"a" : ["x" , "x" ]})
1371+ expected = DataFrame ({"a" : ["x" , "x" ]}, dtype = object )
13721372 tm .assert_frame_equal (result , expected )
13731373
13741374 def test_replace_unicode (self ):
@@ -1468,17 +1468,21 @@ def test_regex_replace_scalar(
14681468 expected .loc [expected ["a" ] == "." , "a" ] = expected_replace_val
14691469 tm .assert_frame_equal (result , expected )
14701470
1471- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
14721471 @pytest .mark .parametrize ("regex" , [False , True ])
14731472 def test_replace_regex_dtype_frame (self , regex ):
14741473 # GH-48644
14751474 df1 = DataFrame ({"A" : ["0" ], "B" : ["0" ]})
1476- expected_df1 = DataFrame ({"A" : [1 ], "B" : [1 ]}, dtype = df1 . dtypes . iloc [ 0 ] )
1475+ expected_df1 = DataFrame ({"A" : [1 ], "B" : [1 ]}, dtype = object )
14771476 result_df1 = df1 .replace (to_replace = "0" , value = 1 , regex = regex )
14781477 tm .assert_frame_equal (result_df1 , expected_df1 )
14791478
14801479 df2 = DataFrame ({"A" : ["0" ], "B" : ["1" ]})
1481- expected_df2 = DataFrame ({"A" : [1 ], "B" : ["1" ]}, dtype = df2 .dtypes .iloc [0 ])
1480+ if regex :
1481+ # TODO(infer_string): both string columns get cast to object,
1482+ # while only needed for column A
1483+ expected_df2 = DataFrame ({"A" : [1 ], "B" : ["1" ]}, dtype = object )
1484+ else :
1485+ expected_df2 = DataFrame ({"A" : Series ([1 ], dtype = object ), "B" : ["1" ]})
14821486 result_df2 = df2 .replace (to_replace = "0" , value = 1 , regex = regex )
14831487 tm .assert_frame_equal (result_df2 , expected_df2 )
14841488
0 commit comments