@@ -1670,7 +1670,11 @@ def analyze(row):
16701670 (3 , 4 ): ["pq" , "rs" , "tu" ],
16711671 (5.0 , "six" , 7 ): [8 , 9 , 10 ],
16721672 'raise Exception("hacked!")' : [11 , 12 , 13 ],
1673- }
1673+ },
1674+ # Default pandas index has non-numpy type, whereas bigframes is
1675+ # always numpy-based type, so let's use the index compatible
1676+ # with bigframes. See more details in b/369689696.
1677+ index = pandas .Index ([0 , 1 , 2 ], dtype = pandas .Int64Dtype ()),
16741678 ),
16751679 id = "all-kinds-of-column-names" ,
16761680 ),
@@ -1681,17 +1685,22 @@ def analyze(row):
16811685 "y" : [1.5 , 3.75 , 5 ],
16821686 "z" : ["pq" , "rs" , "tu" ],
16831687 },
1684- index = pandas .MultiIndex .from_tuples (
1685- [
1686- ("a" , 100 ),
1687- ("a" , 200 ),
1688- ("b" , 300 ),
1689- ]
1688+ index = pandas .MultiIndex .from_frame (
1689+ pandas .DataFrame (
1690+ {
1691+ "idx0" : pandas .Series (
1692+ ["a" , "a" , "b" ], dtype = pandas .StringDtype ()
1693+ ),
1694+ "idx1" : pandas .Series (
1695+ [100 , 200 , 300 ], dtype = pandas .Int64Dtype ()
1696+ ),
1697+ }
1698+ )
16901699 ),
16911700 ),
16921701 id = "multiindex" ,
16931702 marks = pytest .mark .skip (
1694- reason = "TODO(b/368639580) revert this skip after fix "
1703+ reason = "TODO: revert this skip after this pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/59908 "
16951704 ),
16961705 ),
16971706 pytest .param (
@@ -1701,6 +1710,10 @@ def analyze(row):
17011710 [20 , 3.75 , "rs" ],
17021711 [30 , 8.0 , "tu" ],
17031712 ],
1713+ # Default pandas index has non-numpy type, whereas bigframes is
1714+ # always numpy-based type, so let's use the index compatible
1715+ # with bigframes. See more details in b/369689696.
1716+ index = pandas .Index ([0 , 1 , 2 ], dtype = pandas .Int64Dtype ()),
17041717 columns = pandas .MultiIndex .from_arrays (
17051718 [
17061719 ["first" , "last_two" , "last_two" ],
@@ -1729,10 +1742,8 @@ def test_df_apply_axis_1_complex(session, pd_df):
17291742
17301743 def serialize_row (row ):
17311744 custom = {
1732- "name" : row .name .item () if hasattr (row .name , "item" ) else row .name ,
1733- "index" : [
1734- idx .item () if hasattr (idx , "item" ) else idx for idx in row .index
1735- ],
1745+ "name" : row .name ,
1746+ "index" : [idx for idx in row .index ],
17361747 "values" : [
17371748 val .item () if hasattr (val , "item" ) else val for val in row .values
17381749 ],
@@ -1756,12 +1767,7 @@ def serialize_row(row):
17561767 bf_result = bf_df .apply (serialize_row_remote , axis = 1 ).to_pandas ()
17571768 pd_result = pd_df .apply (serialize_row , axis = 1 )
17581769
1759- # bf_result.dtype is 'string[pyarrow]' while pd_result.dtype is 'object'
1760- # , ignore this mismatch by using check_dtype=False.
1761- #
1762- # bf_result.index[0].dtype is 'string[pyarrow]' while
1763- # pd_result.index[0].dtype is 'object', ignore this mismatch by using
1764- # check_index_type=False.
1770+ # ignore known dtype difference between pandas and bigframes
17651771 pandas .testing .assert_series_equal (
17661772 pd_result , bf_result , check_dtype = False , check_index_type = False
17671773 )
0 commit comments