|
24 | 24 | import dataclasses |
25 | 25 | import functools |
26 | 26 | import itertools |
| 27 | +import os |
27 | 28 | import random |
28 | 29 | import typing |
29 | 30 | from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple |
|
41 | 42 | import bigframes.core.guid as guid |
42 | 43 | import bigframes.core.join_def as join_defs |
43 | 44 | import bigframes.core.ordering as ordering |
| 45 | +import bigframes.core.schema as bf_schema |
44 | 46 | import bigframes.core.tree_properties as tree_properties |
45 | 47 | import bigframes.core.utils |
46 | 48 | import bigframes.core.utils as utils |
47 | 49 | import bigframes.dtypes |
| 50 | +import bigframes.features |
48 | 51 | import bigframes.operations as ops |
49 | 52 | import bigframes.operations.aggregations as agg_ops |
50 | 53 | import bigframes.session._io.pandas |
@@ -411,7 +414,32 @@ def _to_dataframe(self, result) -> pd.DataFrame: |
411 | 414 | """Convert BigQuery data to pandas DataFrame with specific dtypes.""" |
412 | 415 | dtypes = dict(zip(self.index_columns, self.index.dtypes)) |
413 | 416 | dtypes.update(zip(self.value_columns, self.dtypes)) |
414 | | - return self.session._rows_to_dataframe(result, dtypes) |
| 417 | + result_dataframe = self.session._rows_to_dataframe(result, dtypes) |
| 418 | + # Runs strict validations to ensure internal type predictions and ibis are completely in sync |
| 419 | + # Do not execute these validations outside of testing suite. |
| 420 | + if "PYTEST_CURRENT_TEST" in os.environ: |
| 421 | + self._validate_result_schema(result_dataframe) |
| 422 | + return result_dataframe |
| 423 | + |
| 424 | + def _validate_result_schema(self, result_df: pd.DataFrame): |
| 425 | + ibis_schema = self.expr._compiled_schema |
| 426 | + internal_schema = self.expr.node.schema |
| 427 | + actual_schema = bf_schema.ArraySchema( |
| 428 | + tuple( |
| 429 | + bf_schema.SchemaItem(name, dtype) # type: ignore |
| 430 | + for name, dtype in result_df.dtypes.items() |
| 431 | + ) |
| 432 | + ) |
| 433 | + if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable: |
| 434 | + return |
| 435 | + if internal_schema != actual_schema: |
| 436 | + raise ValueError( |
| 437 | + f"This error should only occur while testing. BigFrames internal schema: {internal_schema} does not match actual schema: {actual_schema}" |
| 438 | + ) |
| 439 | + if ibis_schema != actual_schema: |
| 440 | + raise ValueError( |
| 441 | + f"This error should only occur while testing. Ibis schema: {ibis_schema} does not match actual schema: {actual_schema}" |
| 442 | + ) |
415 | 443 |
|
416 | 444 | def to_pandas( |
417 | 445 | self, |
@@ -1204,7 +1232,7 @@ def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.UnaryAggregateOp |
1204 | 1232 | # TODO: annotate aggregations themself with this information |
1205 | 1233 | dtype = self.expr.get_column_type(column_id) |
1206 | 1234 | stats: list[agg_ops.UnaryAggregateOp] = [agg_ops.count_op] |
1207 | | - if dtype not in bigframes.dtypes.UNORDERED_DTYPES: |
| 1235 | + if bigframes.dtypes.is_orderable(dtype): |
1208 | 1236 | stats += [agg_ops.min_op, agg_ops.max_op] |
1209 | 1237 | if dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE: |
1210 | 1238 | # Notable exclusions: |
|
0 commit comments