From e15ee50c842aa344402bfbb7200f0f6114b1b0fd Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 18:26:30 +0000 Subject: [PATCH 1/9] feat: support date data type for to_datetime() --- bigframes/core/tools/datetimes.py | 12 ++++++++---- bigframes/operations/datetime_ops.py | 2 ++ tests/system/small/operations/test_datetimes.py | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 26afdc7910..7edf2fa2e4 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -13,7 +13,7 @@ # limitations under the License. from collections.abc import Mapping -from datetime import datetime +from datetime import date, datetime from typing import Optional, Union import bigframes_vendored.constants as constants @@ -28,7 +28,7 @@ def to_datetime( arg: Union[ - Union[int, float, str, datetime], + Union[int, float, str, datetime, date], vendored_pandas_datetimes.local_iterables, bigframes.series.Series, bigframes.dataframe.DataFrame, @@ -38,7 +38,7 @@ def to_datetime( format: Optional[str] = None, unit: Optional[str] = None, ) -> Union[pd.Timestamp, datetime, bigframes.series.Series]: - if isinstance(arg, (int, float, str, datetime)): + if isinstance(arg, (int, float, str, datetime, date)): return pd.to_datetime( arg, utc=utc, @@ -62,7 +62,11 @@ def to_datetime( f"Unit parameter is not supported for non-numerical input types. {constants.FEEDBACK_LINK}" ) - if arg.dtype in (bigframes.dtypes.TIMESTAMP_DTYPE, bigframes.dtypes.DATETIME_DTYPE): + if arg.dtype in ( + bigframes.dtypes.TIMESTAMP_DTYPE, + bigframes.dtypes.DATETIME_DTYPE, + bigframes.dtypes.DATE_DTYPE, + ): to_type = ( bigframes.dtypes.TIMESTAMP_DTYPE if utc else bigframes.dtypes.DATETIME_DTYPE ) diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py index 7c760b689b..6f44952488 100644 --- a/bigframes/operations/datetime_ops.py +++ b/bigframes/operations/datetime_ops.py @@ -50,6 +50,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT dtypes.FLOAT_DTYPE, dtypes.INT_DTYPE, dtypes.STRING_DTYPE, + dtypes.DATE_DTYPE, ): raise TypeError("expected string or numeric input") return pd.ArrowDtype(pa.timestamp("us", tz=None)) @@ -67,6 +68,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT dtypes.FLOAT_DTYPE, dtypes.INT_DTYPE, dtypes.STRING_DTYPE, + dtypes.DATE_DTYPE, ): raise TypeError("expected string or numeric input") return pd.ArrowDtype(pa.timestamp("us", tz="UTC")) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 4e2beb9c19..03c4f650c1 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -13,12 +13,14 @@ # limitations under the License. import datetime +import typing import numpy from pandas import testing import pandas as pd import pytest +import bigframes.pandas as bpd import bigframes.series from bigframes.testing.utils import assert_series_equal @@ -548,3 +550,18 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces with pytest.raises(TypeError): access(bf_df["timestamp_col"]) + + +@pytest.mark.parametrize("utc", [True, False]) +@pytest.mark.parametrize("col", ["date_col", "datetime_col"]) +def test_to_datetime(scalars_dfs, col, utc): + bf_df, pd_df = scalars_dfs + + actual_result = typing.cast( + bigframes.series.Series, bpd.to_datetime(bf_df[col], utc=utc) + ).to_pandas() + + expected_result = pd.Series(pd.to_datetime(pd_df[col], utc=utc)) + testing.assert_series_equal( + actual_result, expected_result, check_dtype=False, check_index_type=False + ) From a99dacbb4908d9632cd0b50583d43845f1da0274 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 19:59:02 +0000 Subject: [PATCH 2/9] add test preconditions --- tests/system/small/operations/test_datetimes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 03c4f650c1..923c4ddca1 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -13,9 +13,11 @@ # limitations under the License. import datetime +import sys import typing import numpy +from packaging import version from pandas import testing import pandas as pd import pytest @@ -555,6 +557,9 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces @pytest.mark.parametrize("utc", [True, False]) @pytest.mark.parametrize("col", ["date_col", "datetime_col"]) def test_to_datetime(scalars_dfs, col, utc): + if version.Version(sys.version) <= version.Version("3.9"): + pytest.skip("timezone comparison is not well-supported.") + bf_df, pd_df = scalars_dfs actual_result = typing.cast( From f0eecff3ca4da474bdf8713e7a9377c85399c802 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 20:04:02 +0000 Subject: [PATCH 3/9] fix test --- tests/system/small/operations/test_datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 923c4ddca1..b2e4b05ad2 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -557,7 +557,7 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces @pytest.mark.parametrize("utc", [True, False]) @pytest.mark.parametrize("col", ["date_col", "datetime_col"]) def test_to_datetime(scalars_dfs, col, utc): - if version.Version(sys.version) <= version.Version("3.9"): + if sys.version_info <= (3, 9): pytest.skip("timezone comparison is not well-supported.") bf_df, pd_df = scalars_dfs From c80bb2c79b32711f64a1defeb686ede9b447a83e Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 20:05:34 +0000 Subject: [PATCH 4/9] fix lint --- tests/system/small/operations/test_datetimes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index b2e4b05ad2..1992086d69 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -17,7 +17,6 @@ import typing import numpy -from packaging import version from pandas import testing import pandas as pd import pytest From e9194d35c5173443c3e699e83040d8d031cfa934 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 21:01:17 +0000 Subject: [PATCH 5/9] fix tests --- tests/system/small/operations/test_datetimes.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 1992086d69..0150d20b9c 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -553,19 +553,19 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces access(bf_df["timestamp_col"]) -@pytest.mark.parametrize("utc", [True, False]) -@pytest.mark.parametrize("col", ["date_col", "datetime_col"]) -def test_to_datetime(scalars_dfs, col, utc): - if sys.version_info <= (3, 9): - pytest.skip("timezone comparison is not well-supported.") - +@pytest.mark.parametrize( + "col", + # TODO(b/431276706) test timestamp_col too. + ["date_col", "datetime_col"], +) +def test_to_datetime(scalars_dfs, col): bf_df, pd_df = scalars_dfs actual_result = typing.cast( - bigframes.series.Series, bpd.to_datetime(bf_df[col], utc=utc) + bigframes.series.Series, bpd.to_datetime(bf_df[col]) ).to_pandas() - expected_result = pd.Series(pd.to_datetime(pd_df[col], utc=utc)) + expected_result = pd.Series(pd.to_datetime(pd_df[col])) testing.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index_type=False ) From b85b09bd68333f0fea83f44ea858ad1f832b4bb5 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 21:03:23 +0000 Subject: [PATCH 6/9] fix lint --- tests/system/small/operations/test_datetimes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 0150d20b9c..f48f86d9c7 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -13,7 +13,6 @@ # limitations under the License. import datetime -import sys import typing import numpy From 9189762a4149119513dbeb3b998cd24054e0e59e Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 21:41:13 +0000 Subject: [PATCH 7/9] fix test --- tests/system/small/operations/test_datetimes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index f48f86d9c7..8ce0cb9beb 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -16,6 +16,7 @@ import typing import numpy +from packaging import version from pandas import testing import pandas as pd import pytest @@ -558,6 +559,8 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces ["date_col", "datetime_col"], ) def test_to_datetime(scalars_dfs, col): + if version.Version(pd.__version__) <= version.Version("2.1.0"): + pytest.skip("timezone conversion bug") bf_df, pd_df = scalars_dfs actual_result = typing.cast( From 371e315089e443c118ef1d020dbba3924c7ad59f Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 23:03:12 +0000 Subject: [PATCH 8/9] update docstring --- .../bigframes_vendored/pandas/core/tools/datetimes.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py index d6048d1208..7b235f5082 100644 --- a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py +++ b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py @@ -1,17 +1,22 @@ # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/tools/datetimes.py -from datetime import datetime +from datetime import date, datetime from typing import List, Mapping, Tuple, Union import pandas as pd -from bigframes import constants, series +from bigframes import constants, dataframe, series local_iterables = Union[List, Tuple, pd.Series, pd.DataFrame, Mapping] def to_datetime( - arg, + arg: Union[ + Union[int, float, str, datetime, date], + local_iterables, + series.Series, + dataframe.DataFrame, + ], *, utc=False, format=None, From 7839f64ad770f06848b7ca7a412f6a3b55efc63e Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 11 Jul 2025 23:12:48 +0000 Subject: [PATCH 9/9] update docstring --- third_party/bigframes_vendored/pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py index 7b235f5082..9c17b9632e 100644 --- a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py +++ b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py @@ -63,7 +63,7 @@ def to_datetime( dtype: timestamp[us, tz=UTC][pyarrow] Args: - arg (int, float, str, datetime, list, tuple, 1-d array, Series): + arg (int, float, str, datetime, date, list, tuple, 1-d array, Series): The object to convert to a datetime. utc (bool, default False): Control timezone-related parsing, localization and conversion. If True, the