Skip to content

Commit bdb1476

Browse files
fix result schema extra column handling
1 parent cf3b7e3 commit bdb1476

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

bigframes/core/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from dataclasses import dataclass
1818
import functools
1919
import typing
20-
from typing import Dict, List, Sequence
20+
from typing import Dict, List
2121

2222
import google.cloud.bigquery
2323
import pyarrow
@@ -35,7 +35,7 @@ class SchemaItem:
3535

3636
@dataclass(frozen=True)
3737
class ArraySchema:
38-
items: Sequence[SchemaItem]
38+
items: tuple[SchemaItem, ...]
3939

4040
def __iter__(self):
4141
yield from self.items

bigframes/session/bq_caching_executor.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,8 @@ def _execute_plan_gbq(
659659
# we could actually cache even when caching is not explicitly requested, but being conservative for now
660660
result_bq_data = None
661661
if query_job and query_job.destination:
662+
# we might add extra sql columns in compilation, esp if caching w ordering, infer a bigframes type for them
663+
result_bf_schema = _result_schema(og_schema, list(compiled.sql_schema))
662664
dst = query_job.destination
663665
result_bq_data = bq_data.BigqueryDataSource(
664666
table=bq_data.GbqTable(
@@ -669,9 +671,9 @@ def _execute_plan_gbq(
669671
is_physically_stored=True,
670672
cluster_cols=tuple(cluster_cols),
671673
),
672-
schema=og_schema,
674+
schema=result_bf_schema,
673675
ordering=compiled.row_order,
674-
n_rows=iterator.num_results,
676+
n_rows=iterator.total_rows,
675677
)
676678

677679
if cache_spec is not None:
@@ -685,14 +687,25 @@ def _execute_plan_gbq(
685687
project_id=self.bqclient.project,
686688
storage_client=self.bqstoragereadclient,
687689
query_job=query_job,
690+
selected_fields=tuple(col for col in og_schema.names),
688691
)
689692
else:
690693
return executor.LocalExecuteResult(
691-
data=iterator.to_arrow(),
694+
data=iterator.to_arrow().select(og_schema.names),
692695
bf_schema=plan.schema,
693696
)
694697

695698

699+
def _result_schema(
700+
logical_schema: schemata.ArraySchema, sql_schema: list[bigquery.SchemaField]
701+
) -> schemata.ArraySchema:
702+
inferred_schema = bigframes.dtypes.bf_type_from_type_kind(sql_schema)
703+
inferred_schema.update(logical_schema._mapping)
704+
return schemata.ArraySchema(
705+
tuple(schemata.SchemaItem(col, dtype) for col, dtype in inferred_schema.items())
706+
)
707+
708+
696709
def _if_schema_match(
697710
table_schema: Tuple[bigquery.SchemaField, ...], schema: schemata.ArraySchema
698711
) -> bool:

0 commit comments

Comments
 (0)