@@ -65,7 +65,7 @@ def test_repr(dtype):
6565 assert repr (df ) == expected
6666
6767 if dtype .na_value is np .nan :
68- expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string "
68+ expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: str "
6969 else :
7070 expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
7171 assert repr (df .A ) == expected
@@ -75,7 +75,7 @@ def test_repr(dtype):
7575 expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
7676 elif dtype .storage == "pyarrow" and dtype .na_value is np .nan :
7777 arr_name = "ArrowStringArrayNumpySemantics"
78- expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string "
78+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: str "
7979 else :
8080 arr_name = "StringArray"
8181 expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
@@ -492,7 +492,7 @@ def test_fillna_args(dtype):
492492 tm .assert_extension_array_equal (res , expected )
493493
494494 if dtype .storage == "pyarrow" :
495- msg = "Invalid value '1' for dtype string "
495+ msg = "Invalid value '1' for dtype str "
496496 else :
497497 msg = "Cannot set non-string value '1' into a StringArray."
498498 with pytest .raises (TypeError , match = msg ):
@@ -514,7 +514,7 @@ def test_arrow_array(dtype):
514514 assert arr .equals (expected )
515515
516516
517- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
517+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
518518@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
519519def test_arrow_roundtrip (dtype , string_storage , using_infer_string ):
520520 # roundtrip possible from arrow 1.0.0
@@ -529,14 +529,17 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
529529 assert table .field ("a" ).type == "large_string"
530530 with pd .option_context ("string_storage" , string_storage ):
531531 result = table .to_pandas ()
532- assert isinstance (result ["a" ].dtype , pd .StringDtype )
533- expected = df .astype (f"string[{ string_storage } ]" )
534- tm .assert_frame_equal (result , expected )
535- # ensure the missing value is represented by NA and not np.nan or None
536- assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
532+ if dtype .na_value is np .nan and not using_string_dtype ():
533+ assert result ["a" ].dtype == "object"
534+ else :
535+ assert isinstance (result ["a" ].dtype , pd .StringDtype )
536+ expected = df .astype (f"string[{ string_storage } ]" )
537+ tm .assert_frame_equal (result , expected )
538+ # ensure the missing value is represented by NA and not np.nan or None
539+ assert result .loc [2 , "a" ] is result ["a" ].dtype .na_value
537540
538541
539- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
542+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
540543@pytest .mark .filterwarnings ("ignore:Passing a BlockManager:DeprecationWarning" )
541544def test_arrow_load_from_zero_chunks (dtype , string_storage , using_infer_string ):
542545 # GH-41040
@@ -553,9 +556,13 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
553556 table = pa .table ([pa .chunked_array ([], type = pa .string ())], schema = table .schema )
554557 with pd .option_context ("string_storage" , string_storage ):
555558 result = table .to_pandas ()
556- assert isinstance (result ["a" ].dtype , pd .StringDtype )
557- expected = df .astype (f"string[{ string_storage } ]" )
558- tm .assert_frame_equal (result , expected )
559+
560+ if dtype .na_value is np .nan and not using_string_dtype ():
561+ assert result ["a" ].dtype == "object"
562+ else :
563+ assert isinstance (result ["a" ].dtype , pd .StringDtype )
564+ expected = df .astype (f"string[{ string_storage } ]" )
565+ tm .assert_frame_equal (result , expected )
559566
560567
561568def test_value_counts_na (dtype ):
0 commit comments