diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 26afdc7910..7edf2fa2e4 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -13,7 +13,7 @@ # limitations under the License. from collections.abc import Mapping -from datetime import datetime +from datetime import date, datetime from typing import Optional, Union import bigframes_vendored.constants as constants @@ -28,7 +28,7 @@ def to_datetime( arg: Union[ - Union[int, float, str, datetime], + Union[int, float, str, datetime, date], vendored_pandas_datetimes.local_iterables, bigframes.series.Series, bigframes.dataframe.DataFrame, @@ -38,7 +38,7 @@ def to_datetime( format: Optional[str] = None, unit: Optional[str] = None, ) -> Union[pd.Timestamp, datetime, bigframes.series.Series]: - if isinstance(arg, (int, float, str, datetime)): + if isinstance(arg, (int, float, str, datetime, date)): return pd.to_datetime( arg, utc=utc, @@ -62,7 +62,11 @@ def to_datetime( f"Unit parameter is not supported for non-numerical input types. {constants.FEEDBACK_LINK}" ) - if arg.dtype in (bigframes.dtypes.TIMESTAMP_DTYPE, bigframes.dtypes.DATETIME_DTYPE): + if arg.dtype in ( + bigframes.dtypes.TIMESTAMP_DTYPE, + bigframes.dtypes.DATETIME_DTYPE, + bigframes.dtypes.DATE_DTYPE, + ): to_type = ( bigframes.dtypes.TIMESTAMP_DTYPE if utc else bigframes.dtypes.DATETIME_DTYPE ) diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py index 7c760b689b..6f44952488 100644 --- a/bigframes/operations/datetime_ops.py +++ b/bigframes/operations/datetime_ops.py @@ -50,6 +50,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT dtypes.FLOAT_DTYPE, dtypes.INT_DTYPE, dtypes.STRING_DTYPE, + dtypes.DATE_DTYPE, ): raise TypeError("expected string or numeric input") return pd.ArrowDtype(pa.timestamp("us", tz=None)) @@ -67,6 +68,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT dtypes.FLOAT_DTYPE, dtypes.INT_DTYPE, dtypes.STRING_DTYPE, + dtypes.DATE_DTYPE, ): raise TypeError("expected string or numeric input") return pd.ArrowDtype(pa.timestamp("us", tz="UTC")) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 4e2beb9c19..8ce0cb9beb 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -13,12 +13,15 @@ # limitations under the License. import datetime +import typing import numpy +from packaging import version from pandas import testing import pandas as pd import pytest +import bigframes.pandas as bpd import bigframes.series from bigframes.testing.utils import assert_series_equal @@ -548,3 +551,23 @@ def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, acces with pytest.raises(TypeError): access(bf_df["timestamp_col"]) + + +@pytest.mark.parametrize( + "col", + # TODO(b/431276706) test timestamp_col too. + ["date_col", "datetime_col"], +) +def test_to_datetime(scalars_dfs, col): + if version.Version(pd.__version__) <= version.Version("2.1.0"): + pytest.skip("timezone conversion bug") + bf_df, pd_df = scalars_dfs + + actual_result = typing.cast( + bigframes.series.Series, bpd.to_datetime(bf_df[col]) + ).to_pandas() + + expected_result = pd.Series(pd.to_datetime(pd_df[col])) + testing.assert_series_equal( + actual_result, expected_result, check_dtype=False, check_index_type=False + ) diff --git a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py index d6048d1208..9c17b9632e 100644 --- a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py +++ b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py @@ -1,17 +1,22 @@ # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/tools/datetimes.py -from datetime import datetime +from datetime import date, datetime from typing import List, Mapping, Tuple, Union import pandas as pd -from bigframes import constants, series +from bigframes import constants, dataframe, series local_iterables = Union[List, Tuple, pd.Series, pd.DataFrame, Mapping] def to_datetime( - arg, + arg: Union[ + Union[int, float, str, datetime, date], + local_iterables, + series.Series, + dataframe.DataFrame, + ], *, utc=False, format=None, @@ -58,7 +63,7 @@ def to_datetime( dtype: timestamp[us, tz=UTC][pyarrow] Args: - arg (int, float, str, datetime, list, tuple, 1-d array, Series): + arg (int, float, str, datetime, date, list, tuple, 1-d array, Series): The object to convert to a datetime. utc (bool, default False): Control timezone-related parsing, localization and conversion. If True, the