Skip to content

Commit cde4ff3

Browse files
fix transpose test
1 parent 7ff08da commit cde4ff3

19 files changed

+227
-256
lines changed

bigframes/testing/utils.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
def assert_dfs_equivalent(pd_df: pd.DataFrame, bf_df: bpd.DataFrame, **kwargs):
7474
bf_df_local = bf_df.to_pandas()
7575
ignore_order = not bf_df._session._strictly_ordered
76-
assert_pandas_df_equal(bf_df_local, pd_df, ignore_order=ignore_order, **kwargs)
76+
assert_frame_equal(bf_df_local, pd_df, ignore_order=ignore_order, **kwargs)
7777

7878

7979
def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwargs):
@@ -82,21 +82,40 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar
8282
assert_series_equal(bf_df_local, pd_series, ignore_order=ignore_order, **kwargs)
8383

8484

85-
def assert_pandas_df_equal(df0, df1, ignore_order: bool = False, **kwargs):
85+
def _normalize_all_nulls(col: pd.Series) -> pd.Series:
86+
if col.dtype == bigframes.dtypes.FLOAT_DTYPE:
87+
col = col.astype("float64")
88+
if pd_types.is_object_dtype(col):
89+
col = col.fillna(float("nan"))
90+
return col
91+
92+
93+
def assert_frame_equal(
94+
left: pd.DataFrame,
95+
right: pd.DataFrame,
96+
*,
97+
ignore_order: bool = False,
98+
nulls_are_nan: bool = True,
99+
**kwargs,
100+
):
86101
if ignore_order:
87102
# Sort by a column to get consistent results.
88-
if df0.index.name != "rowindex":
89-
df0 = df0.sort_values(
90-
list(df0.columns.drop("geography_col", errors="ignore"))
103+
if left.index.name != "rowindex":
104+
left = left.sort_values(
105+
list(left.columns.drop("geography_col", errors="ignore"))
91106
).reset_index(drop=True)
92-
df1 = df1.sort_values(
93-
list(df1.columns.drop("geography_col", errors="ignore"))
107+
right = right.sort_values(
108+
list(right.columns.drop("geography_col", errors="ignore"))
94109
).reset_index(drop=True)
95110
else:
96-
df0 = df0.sort_index()
97-
df1 = df1.sort_index()
111+
left = left.sort_index()
112+
right = right.sort_index()
113+
114+
if nulls_are_nan:
115+
left = left.apply(_normalize_all_nulls)
116+
right = right.apply(_normalize_all_nulls)
98117

99-
pd.testing.assert_frame_equal(df0, df1, **kwargs)
118+
pd.testing.assert_frame_equal(left, right, **kwargs)
100119

101120

102121
def assert_series_equal(
@@ -116,14 +135,8 @@ def assert_series_equal(
116135
right = right.sort_index()
117136

118137
if nulls_are_nan:
119-
if left.dtype == bigframes.dtypes.FLOAT_DTYPE:
120-
left = left.astype("float64")
121-
if right.dtype == bigframes.dtypes.FLOAT_DTYPE:
122-
right = right.astype("float64")
123-
if pd_types.is_object_dtype(left):
124-
left = left.fillna(float("nan"))
125-
if pd_types.is_object_dtype(right):
126-
right = right.fillna(float("nan"))
138+
left = _normalize_all_nulls(left)
139+
right = _normalize_all_nulls(right)
127140

128141
pd.testing.assert_series_equal(left, right, **kwargs)
129142

tests/system/large/functions/test_remote_function.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
import bigframes.pandas as bpd
3737
import bigframes.series
3838
from bigframes.testing.utils import (
39-
assert_pandas_df_equal,
39+
assert_frame_equal,
4040
cleanup_function_assets,
4141
delete_cloud_function,
4242
get_cloud_functions,
@@ -214,7 +214,7 @@ def square(x):
214214
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
215215
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
216216

217-
assert_pandas_df_equal(bf_result, pd_result)
217+
assert_frame_equal(bf_result, pd_result)
218218
finally:
219219
# clean up the gcp assets created for the remote function
220220
cleanup_function_assets(square, session.bqclient, session.cloudfunctionsclient)
@@ -261,7 +261,7 @@ def add_one(x):
261261
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
262262
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
263263

264-
assert_pandas_df_equal(bf_result, pd_result)
264+
assert_frame_equal(bf_result, pd_result)
265265
finally:
266266
# clean up the gcp assets created for the remote function
267267
cleanup_function_assets(
@@ -349,7 +349,7 @@ def square(x):
349349
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
350350
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
351351

352-
assert_pandas_df_equal(bf_result, pd_result)
352+
assert_frame_equal(bf_result, pd_result)
353353
finally:
354354
# clean up the gcp assets created for the remote function
355355
cleanup_function_assets(square, session.bqclient, session.cloudfunctionsclient)
@@ -403,7 +403,7 @@ def sign(num):
403403
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
404404
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
405405

406-
assert_pandas_df_equal(bf_result, pd_result)
406+
assert_frame_equal(bf_result, pd_result)
407407
finally:
408408
# clean up the gcp assets created for the remote function
409409
cleanup_function_assets(
@@ -453,7 +453,7 @@ def circumference(radius):
453453
pd_result_col = pd_result_col.astype(pandas.Float64Dtype())
454454
pd_result = pd_float64_col_filtered.to_frame().assign(result=pd_result_col)
455455

456-
assert_pandas_df_equal(bf_result, pd_result)
456+
assert_frame_equal(bf_result, pd_result)
457457
finally:
458458
# clean up the gcp assets created for the remote function
459459
cleanup_function_assets(
@@ -503,7 +503,7 @@ def find_team(num):
503503
pd_result_col = pd_result_col.astype(pandas.StringDtype(storage="pyarrow"))
504504
pd_result = pd_float64_col_filtered.to_frame().assign(result=pd_result_col)
505505

506-
assert_pandas_df_equal(bf_result, pd_result)
506+
assert_frame_equal(bf_result, pd_result)
507507
finally:
508508
# clean up the gcp assets created for the remote function
509509
cleanup_function_assets(
@@ -591,7 +591,7 @@ def inner_test():
591591
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
592592
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
593593

594-
assert_pandas_df_equal(bf_result, pd_result)
594+
assert_frame_equal(bf_result, pd_result)
595595

596596
# Test that the remote function works as expected
597597
inner_test()
@@ -683,7 +683,7 @@ def is_odd(num):
683683
pd_result_col = pd_int64_col.mask(is_odd)
684684
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
685685

686-
assert_pandas_df_equal(bf_result, pd_result)
686+
assert_frame_equal(bf_result, pd_result)
687687
finally:
688688
# clean up the gcp assets created for the remote function
689689
cleanup_function_assets(
@@ -727,7 +727,7 @@ def is_odd(num):
727727
pd_result_col = pd_int64_col[pd_int64_col.notnull()].mask(is_odd, -1)
728728
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
729729

730-
assert_pandas_df_equal(bf_result, pd_result)
730+
assert_frame_equal(bf_result, pd_result)
731731
finally:
732732
# clean up the gcp assets created for the remote function
733733
cleanup_function_assets(
@@ -770,7 +770,7 @@ def test_remote_udf_lambda(session, scalars_dfs, dataset_id, bq_cf_connection):
770770
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
771771
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
772772

773-
assert_pandas_df_equal(bf_result, pd_result)
773+
assert_frame_equal(bf_result, pd_result)
774774
finally:
775775
# clean up the gcp assets created for the remote function
776776
cleanup_function_assets(
@@ -829,7 +829,7 @@ def square(x):
829829
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
830830
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
831831

832-
assert_pandas_df_equal(bf_result, pd_result)
832+
assert_frame_equal(bf_result, pd_result)
833833
finally:
834834
# clean up the gcp assets created for the remote function
835835
cleanup_function_assets(
@@ -884,7 +884,7 @@ def pd_np_foo(x) -> None:
884884
# comparing for the purpose of this test
885885
pd_result.result = pd_result.result.astype(pandas.Float64Dtype())
886886

887-
assert_pandas_df_equal(bf_result, pd_result)
887+
assert_frame_equal(bf_result, pd_result)
888888
finally:
889889
# clean up the gcp assets created for the remote function
890890
cleanup_function_assets(
@@ -928,7 +928,7 @@ def test_internal(rf, udf):
928928
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
929929
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
930930

931-
assert_pandas_df_equal(bf_result, pd_result)
931+
assert_frame_equal(bf_result, pd_result)
932932

933933
# Create an explicit name for the remote function
934934
prefixer = test_utils.prefixer.Prefixer("foo", "")
@@ -1109,7 +1109,7 @@ def square(x):
11091109
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
11101110
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
11111111

1112-
assert_pandas_df_equal(bf_result, pd_result)
1112+
assert_frame_equal(bf_result, pd_result)
11131113
finally:
11141114
# clean up the gcp assets created for the remote function
11151115
cleanup_function_assets(square, session.bqclient, session.cloudfunctionsclient)
@@ -1150,7 +1150,7 @@ def square(x):
11501150
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
11511151
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
11521152

1153-
assert_pandas_df_equal(bf_result, pd_result)
1153+
assert_frame_equal(bf_result, pd_result)
11541154
finally:
11551155
# clean up the gcp assets created for the remote function
11561156
cleanup_function_assets(square, session.bqclient, session.cloudfunctionsclient)
@@ -1225,7 +1225,7 @@ def square(x):
12251225
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
12261226
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
12271227

1228-
assert_pandas_df_equal(bf_result, pd_result)
1228+
assert_frame_equal(bf_result, pd_result)
12291229
finally:
12301230
# clean up the gcp assets created for the remote function
12311231
cleanup_function_assets(square, session.bqclient, session.cloudfunctionsclient)
@@ -1283,7 +1283,7 @@ def square_num(x):
12831283
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
12841284
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
12851285

1286-
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
1286+
assert_frame_equal(bf_result, pd_result, check_dtype=False)
12871287
finally:
12881288
# clean up the gcp assets created for the remote function
12891289
cleanup_function_assets(
@@ -1357,7 +1357,7 @@ def square_num(x):
13571357
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
13581358
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
13591359

1360-
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
1360+
assert_frame_equal(bf_result, pd_result, check_dtype=False)
13611361
finally:
13621362
# clean up the gcp assets created for the remote function
13631363
cleanup_function_assets(
@@ -1416,7 +1416,7 @@ def square_num(x):
14161416
pd_result_col = df["num"].apply(lambda x: x if x is None else x * x)
14171417
pd_result = df.assign(result=pd_result_col)
14181418

1419-
assert_pandas_df_equal(
1419+
assert_frame_equal(
14201420
bf_result, pd_result, check_dtype=False, check_index_type=False
14211421
)
14221422

@@ -1504,7 +1504,7 @@ def square_num(x):
15041504
pd_result_col = pd_int64_col.apply(square_num)
15051505
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
15061506

1507-
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
1507+
assert_frame_equal(bf_result, pd_result, check_dtype=False)
15081508
finally:
15091509
# clean up the gcp assets created for the remote function
15101510
cleanup_function_assets(

tests/system/small/bigquery/test_vector_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
import bigframes.bigquery as bbq
2525
import bigframes.pandas as bpd
26-
from bigframes.testing.utils import assert_pandas_df_equal
26+
from bigframes.testing.utils import assert_frame_equal
2727

2828
# Need at least 5,000 rows to create a vector index.
2929
VECTOR_DF = pd.DataFrame(
@@ -154,7 +154,7 @@ def test_vector_search_basic_params_with_df():
154154
},
155155
index=pd.Index([1, 0, 0, 1], dtype="Int64"),
156156
)
157-
assert_pandas_df_equal(
157+
assert_frame_equal(
158158
expected.sort_values("id"),
159159
vector_search_result.sort_values("id"),
160160
check_dtype=False,

tests/system/small/functions/test_remote_function.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from bigframes.functions import _utils as bff_utils
3535
from bigframes.functions import function as bff
3636
import bigframes.session._io.bigquery
37-
from bigframes.testing.utils import assert_pandas_df_equal, get_function_name
37+
from bigframes.testing.utils import assert_frame_equal, get_function_name
3838

3939
_prefixer = test_utils.prefixer.Prefixer("bigframes", "")
4040

@@ -159,7 +159,7 @@ def square(x):
159159
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
160160
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
161161

162-
assert_pandas_df_equal(bf_result, pd_result)
162+
assert_frame_equal(bf_result, pd_result)
163163

164164

165165
@pytest.mark.flaky(retries=2, delay=120)
@@ -208,7 +208,7 @@ def square(x):
208208
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
209209
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
210210

211-
assert_pandas_df_equal(bf_result, pd_result)
211+
assert_frame_equal(bf_result, pd_result)
212212

213213

214214
@pytest.mark.flaky(retries=2, delay=120)
@@ -300,7 +300,7 @@ def square(x):
300300
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
301301
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
302302

303-
assert_pandas_df_equal(bf_result, pd_result)
303+
assert_frame_equal(bf_result, pd_result)
304304

305305

306306
@pytest.mark.flaky(retries=2, delay=120)
@@ -388,7 +388,7 @@ def square(x):
388388
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
389389
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
390390

391-
assert_pandas_df_equal(bf_result, pd_result)
391+
assert_frame_equal(bf_result, pd_result)
392392

393393

394394
@pytest.mark.flaky(retries=2, delay=120)
@@ -437,7 +437,7 @@ def square(x):
437437
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
438438
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
439439

440-
assert_pandas_df_equal(bf_result, pd_result)
440+
assert_frame_equal(bf_result, pd_result)
441441

442442

443443
@pytest.mark.flaky(retries=2, delay=120)
@@ -482,7 +482,7 @@ def square(x):
482482
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
483483
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
484484

485-
assert_pandas_df_equal(bf_result, pd_result)
485+
assert_frame_equal(bf_result, pd_result)
486486

487487

488488
@pytest.mark.flaky(retries=2, delay=120)
@@ -517,7 +517,7 @@ def add_one(x):
517517
for col in pd_result:
518518
pd_result[col] = pd_result[col].astype(pd_int64_df_filtered[col].dtype)
519519

520-
assert_pandas_df_equal(bf_result, pd_result)
520+
assert_frame_equal(bf_result, pd_result)
521521

522522

523523
@pytest.mark.flaky(retries=2, delay=120)
@@ -552,7 +552,7 @@ def add_one(x):
552552
for col in pd_result:
553553
pd_result[col] = pd_result[col].astype(pd_int64_df_filtered[col].dtype)
554554

555-
assert_pandas_df_equal(bf_result, pd_result)
555+
assert_frame_equal(bf_result, pd_result)
556556

557557

558558
@pytest.mark.flaky(retries=2, delay=120)
@@ -585,7 +585,7 @@ def add_one(x):
585585
for col in pd_result:
586586
pd_result[col] = pd_result[col].astype(pd_int64_df[col].dtype)
587587

588-
assert_pandas_df_equal(bf_result, pd_result)
588+
assert_frame_equal(bf_result, pd_result)
589589

590590

591591
@pytest.mark.flaky(retries=2, delay=120)
@@ -738,7 +738,7 @@ def square1(x):
738738
s2_result_col = int64_col_filtered.apply(square2)
739739
s2_result = int64_col_filtered.to_frame().assign(result=s2_result_col)
740740

741-
assert_pandas_df_equal(s1_result.to_pandas(), s2_result.to_pandas())
741+
assert_frame_equal(s1_result.to_pandas(), s2_result.to_pandas())
742742

743743

744744
def test_read_gbq_function_runs_existing_udf(session):
@@ -937,7 +937,7 @@ def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id):
937937
indirect_df = indirect_df.assign(y=indirect_df.x.apply(square))
938938
converted_indirect_df = indirect_df.to_pandas()
939939

940-
assert_pandas_df_equal(
940+
assert_frame_equal(
941941
direct_df, converted_indirect_df, ignore_order=True, check_index_type=False
942942
)
943943

0 commit comments

Comments
 (0)