Skip to content

Commit a2c9679

Browse files
fix more tests
1 parent 595b6a5 commit a2c9679

File tree

4 files changed

+43
-10
lines changed

4 files changed

+43
-10
lines changed

bigframes/dtypes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ class SimpleDtypeInfo:
247247
"decimal128(38, 9)[pyarrow]",
248248
"decimal256(76, 38)[pyarrow]",
249249
"binary[pyarrow]",
250+
"duration[us][pyarrow]",
250251
]
251252

252253
DTYPE_STRINGS = typing.get_args(DtypeString)
@@ -421,6 +422,8 @@ def is_bool_coercable(type_: ExpressionType) -> bool:
421422
# special case - both "Int64" and "int64[pyarrow]" are accepted
422423
BIGFRAMES_STRING_TO_BIGFRAMES["int64[pyarrow]"] = INT_DTYPE
423424

425+
BIGFRAMES_STRING_TO_BIGFRAMES["duration[us][pyarrow]"] = TIMEDELTA_DTYPE
426+
424427
# For the purposes of dataframe.memory_usage
425428
DTYPE_BYTE_SIZES = {
426429
type_info.dtype: type_info.logical_bytes for type_info in SIMPLE_TYPES

tests/system/small/pandas/core/methods/test_describe.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,13 @@ def test_df_describe_non_temporal(scalars_dfs):
2121
pytest.importorskip("pandas", minversion="2.0.0")
2222
scalars_df, scalars_pandas_df = scalars_dfs
2323
# excluding temporal columns here because BigFrames cannot perform percentiles operations on them
24-
unsupported_columns = ["datetime_col", "timestamp_col", "time_col", "date_col"]
24+
unsupported_columns = [
25+
"datetime_col",
26+
"timestamp_col",
27+
"time_col",
28+
"date_col",
29+
"duration_col",
30+
]
2531
bf_result = scalars_df.drop(columns=unsupported_columns).describe().to_pandas()
2632

2733
modified_pd_df = scalars_pandas_df.drop(columns=unsupported_columns)

tests/system/small/test_dataframe_io.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -999,7 +999,7 @@ def test_to_sql_query_unnamed_index_included(
999999
scalars_df_default_index: bpd.DataFrame,
10001000
scalars_pandas_df_default_index: pd.DataFrame,
10011001
):
1002-
bf_df = scalars_df_default_index.reset_index(drop=True)
1002+
bf_df = scalars_df_default_index.reset_index(drop=True).drop(columns="duration_col")
10031003
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=True)
10041004
assert len(idx_labels) == 1
10051005
assert len(idx_ids) == 1
@@ -1017,7 +1017,9 @@ def test_to_sql_query_named_index_included(
10171017
scalars_df_default_index: bpd.DataFrame,
10181018
scalars_pandas_df_default_index: pd.DataFrame,
10191019
):
1020-
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True)
1020+
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True).drop(
1021+
columns="duration_col"
1022+
)
10211023
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=True)
10221024
assert len(idx_labels) == 1
10231025
assert len(idx_ids) == 1
@@ -1034,7 +1036,7 @@ def test_to_sql_query_unnamed_index_excluded(
10341036
scalars_df_default_index: bpd.DataFrame,
10351037
scalars_pandas_df_default_index: pd.DataFrame,
10361038
):
1037-
bf_df = scalars_df_default_index.reset_index(drop=True)
1039+
bf_df = scalars_df_default_index.reset_index(drop=True).drop(columns="duration_col")
10381040
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=False)
10391041
assert len(idx_labels) == 0
10401042
assert len(idx_ids) == 0
@@ -1051,7 +1053,9 @@ def test_to_sql_query_named_index_excluded(
10511053
scalars_df_default_index: bpd.DataFrame,
10521054
scalars_pandas_df_default_index: pd.DataFrame,
10531055
):
1054-
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True)
1056+
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True).drop(
1057+
columns="duration_col"
1058+
)
10551059
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=False)
10561060
assert len(idx_labels) == 0
10571061
assert len(idx_ids) == 0

tests/system/small/test_session.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,6 +1820,7 @@ def test_read_parquet_gcs(
18201820
df_out = df_out.assign(
18211821
datetime_col=df_out["datetime_col"].astype("timestamp[us][pyarrow]"),
18221822
timestamp_col=df_out["timestamp_col"].astype("timestamp[us, tz=UTC][pyarrow]"),
1823+
duration_col=df_out["duration_col"].astype("duration[us][pyarrow]"),
18231824
)
18241825

18251826
# Make sure we actually have at least some values before comparing.
@@ -1868,7 +1869,8 @@ def test_read_parquet_gcs_compressed(
18681869
# DATETIME gets loaded as TIMESTAMP in parquet. See:
18691870
# https://cloud.google.com/bigquery/docs/exporting-data#parquet_export_details
18701871
df_out = df_out.assign(
1871-
datetime_col=df_out["datetime_col"].astype("timestamp[us][pyarrow]")
1872+
datetime_col=df_out["datetime_col"].astype("timestamp[us][pyarrow]"),
1873+
duration_col=df_out["duration_col"].astype("duration[us][pyarrow]"),
18721874
)
18731875

18741876
# Make sure we actually have at least some values before comparing.
@@ -1926,9 +1928,23 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder):
19261928

19271929
# The auto detects of BigQuery load job have restrictions to detect the bytes,
19281930
# datetime, numeric and geometry types, so they're skipped here.
1929-
df = df.drop(columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"])
1931+
df = df.drop(
1932+
columns=[
1933+
"bytes_col",
1934+
"datetime_col",
1935+
"numeric_col",
1936+
"geography_col",
1937+
"duration_col",
1938+
]
1939+
)
19301940
scalars_df = scalars_df.drop(
1931-
columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
1941+
columns=[
1942+
"bytes_col",
1943+
"datetime_col",
1944+
"numeric_col",
1945+
"geography_col",
1946+
"duration_col",
1947+
]
19321948
)
19331949
assert df.shape[0] == scalars_df.shape[0]
19341950
pd.testing.assert_series_equal(
@@ -1962,11 +1978,15 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
19621978
# The auto detects of BigQuery load job have restrictions to detect the bytes,
19631979
# numeric and geometry types, so they're skipped here.
19641980
df = df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
1965-
scalars_df = scalars_df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
1981+
scalars_df = scalars_df.drop(
1982+
columns=["bytes_col", "numeric_col", "geography_col", "duration_col"]
1983+
)
19661984

19671985
# pandas read_json does not respect the dtype overrides for these columns
19681986
df = df.drop(columns=["date_col", "datetime_col", "time_col"])
1969-
scalars_df = scalars_df.drop(columns=["date_col", "datetime_col", "time_col"])
1987+
scalars_df = scalars_df.drop(
1988+
columns=["date_col", "datetime_col", "time_col", "duration_col"]
1989+
)
19701990

19711991
assert df.shape[0] == scalars_df.shape[0]
19721992
pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)

0 commit comments

Comments
 (0)