From 55fefe7b390df9f991319794c848178515ea6018 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 22 Jul 2025 18:36:23 +0000 Subject: [PATCH 1/2] fix: support setitem with NaTType --- bigframes/core/array_value.py | 4 ---- bigframes/dtypes.py | 4 ++-- tests/system/small/test_dataframe.py | 8 +++++++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py index b47637cb59..0625a26fdb 100644 --- a/bigframes/core/array_value.py +++ b/bigframes/core/array_value.py @@ -324,10 +324,6 @@ def create_constant( value: typing.Any, dtype: typing.Optional[bigframes.dtypes.Dtype], ) -> Tuple[ArrayValue, str]: - if pandas.isna(value): - # Need to assign a data type when value is NaN. - dtype = dtype or bigframes.dtypes.DEFAULT_DTYPE - return self.project_to_id(ex.const(value, dtype)) def select_columns( diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 0be31505df..47f137ff1b 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -674,14 +674,14 @@ def infer_literal_type(literal) -> typing.Optional[Dtype]: pa.field(key, field_type, nullable=(not pa.types.is_list(field_type))) ) return pd.ArrowDtype(pa.struct(fields)) - if pd.isna(literal): - return None # Null value without a definite type # Make sure to check datetime before date as datetimes are also dates if isinstance(literal, (datetime.datetime, pd.Timestamp)): if literal.tzinfo is not None: return TIMESTAMP_DTYPE else: return DATETIME_DTYPE + if pd.isna(literal): + return None # Null value without a definite type from_python_type = _infer_dtype_from_python_type(type(literal)) if from_python_type is not None: return from_python_type diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index caf39bd9e9..f60ed756e3 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3764,6 +3764,12 @@ def test_at_no_duplicate(scalars_df_index, scalars_pandas_df_index): assert bf_result == pd_result +def test_setitem_w_timestamp_none(): + b_df = bpd.DataFrame({'rowindex': [1, 2, 3]}) + b_df['temp_timestamp'] = pd.Timestamp(ts_input=None, unit="us", tz="utc") + assert b_df['temp_timestamp'].dtype == "timestamp[us][pyarrow]" + + def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_df = scalars_df.copy() @@ -5714,4 +5720,4 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs): } with pytest.raises(KeyError): - bf_df.agg(agg_funcs) + bf_df.agg(agg_funcs) \ No newline at end of file From a5af0d2e57eed98f788da3a42fa175ef13d93bc6 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 22 Jul 2025 19:51:44 +0000 Subject: [PATCH 2/2] fix mypy and format --- bigframes/core/array_value.py | 1 - tests/system/small/test_dataframe.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py index 0625a26fdb..aac33cf0a6 100644 --- a/bigframes/core/array_value.py +++ b/bigframes/core/array_value.py @@ -21,7 +21,6 @@ import warnings import google.cloud.bigquery -import pandas import pyarrow as pa import bigframes.core.expression as ex diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index f60ed756e3..8d8d9d02a8 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3765,9 +3765,9 @@ def test_at_no_duplicate(scalars_df_index, scalars_pandas_df_index): def test_setitem_w_timestamp_none(): - b_df = bpd.DataFrame({'rowindex': [1, 2, 3]}) - b_df['temp_timestamp'] = pd.Timestamp(ts_input=None, unit="us", tz="utc") - assert b_df['temp_timestamp'].dtype == "timestamp[us][pyarrow]" + b_df = bpd.DataFrame({"rowindex": [1, 2, 3]}) + b_df["temp_timestamp"] = pd.Timestamp(ts_input=pd.NaT, unit="us", tz="utc") # type: ignore + assert b_df["temp_timestamp"].dtype == "timestamp[us][pyarrow]" def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): @@ -5720,4 +5720,4 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs): } with pytest.raises(KeyError): - bf_df.agg(agg_funcs) \ No newline at end of file + bf_df.agg(agg_funcs)