|
68 | 68 | import bigframes.operations.aggregations as agg_ops |
69 | 69 | from bigframes.session import dry_runs, execution_spec |
70 | 70 | from bigframes.session import executor as executors |
| 71 | +from bigframes.session._io import pandas as io_pandas |
71 | 72 |
|
72 | 73 | # Type constraint for wherever column labels are used |
73 | 74 | Label = typing.Hashable |
@@ -711,40 +712,15 @@ def to_pandas_batches( |
711 | 712 | # To reduce the number of edge cases to consider when working with the |
712 | 713 | # results of this, always return at least one DataFrame. See: |
713 | 714 | # b/428918844. |
714 | | - empty_val = pd.DataFrame( |
715 | | - { |
716 | | - col: pd.Series([], dtype=self.expr.get_column_type(col)) |
717 | | - for col in itertools.chain(self.value_columns, self.index_columns) |
718 | | - } |
719 | | - ) |
720 | | - series_map = {} |
721 | | - for col in itertools.chain(self.value_columns, self.index_columns): |
722 | | - dtype = self.expr.get_column_type(col) |
723 | | - if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype): |
724 | | - # Due to a limitation in Apache Arrow (#45262), JSON columns are not |
725 | | - # natively supported by the to_pandas_batches() method, which is |
726 | | - # used by the anywidget backend. |
727 | | - # Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273 |
728 | | - # PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType, |
729 | | - # especially when nested. |
730 | | - # Create with string type and then cast. |
731 | | - |
732 | | - # MyPy doesn't automatically narrow the type of 'dtype' here, |
733 | | - # so we add an explicit check. |
734 | | - if isinstance(dtype, pd.ArrowDtype): |
735 | | - safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string( |
736 | | - dtype.pyarrow_dtype |
737 | | - ) |
738 | | - safe_dtype = pd.ArrowDtype(safe_pa_type) |
739 | | - series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype) |
740 | | - else: |
741 | | - # This branch should ideally not be reached if |
742 | | - # contains_db_dtypes_json_dtype is accurate, |
743 | | - # but it's here for MyPy's sake. |
744 | | - series_map[col] = pd.Series([], dtype=dtype) |
745 | | - else: |
746 | | - series_map[col] = pd.Series([], dtype=dtype) |
747 | | - empty_val = pd.DataFrame(series_map) |
| 715 | + try: |
| 716 | + empty_arrow_table = self.expr.schema.to_pyarrow().empty_table() |
| 717 | + except pa.ArrowNotImplementedError: |
| 718 | + # Bug with some pyarrow versions(https://github.com/apache/arrow/issues/45262), |
| 719 | + # empty_table only supports base storage types, not extension types. |
| 720 | + empty_arrow_table = self.expr.schema.to_pyarrow( |
| 721 | + use_storage_types=True |
| 722 | + ).empty_table() |
| 723 | + empty_val = io_pandas.arrow_to_pandas(empty_arrow_table, self.expr.schema) |
748 | 724 | dfs = map( |
749 | 725 | lambda a: a[0], |
750 | 726 | itertools.zip_longest( |
@@ -2020,6 +1996,31 @@ def _generate_resample_label( |
2020 | 1996 | Literal["epoch", "start", "start_day", "end", "end_day"], |
2021 | 1997 | ] = "start_day", |
2022 | 1998 | ) -> Block: |
| 1999 | + if not isinstance(rule, str): |
| 2000 | + raise NotImplementedError( |
| 2001 | + f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}" |
| 2002 | + ) |
| 2003 | + |
| 2004 | + if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"): |
| 2005 | + raise NotImplementedError( |
| 2006 | + f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}" |
| 2007 | + ) |
| 2008 | + |
| 2009 | + if closed == "right": |
| 2010 | + raise NotImplementedError( |
| 2011 | + f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}", |
| 2012 | + ) |
| 2013 | + |
| 2014 | + if label == "right": |
| 2015 | + raise NotImplementedError( |
| 2016 | + f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}", |
| 2017 | + ) |
| 2018 | + |
| 2019 | + if origin not in ("epoch", "start", "start_day"): |
| 2020 | + raise NotImplementedError( |
| 2021 | + f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}" |
| 2022 | + ) |
| 2023 | + |
2023 | 2024 | # Validate and resolve the index or column to use for grouping |
2024 | 2025 | if on is None: |
2025 | 2026 | if len(self.index_columns) == 0: |
|
0 commit comments