fix name mappings for remote sources

TrevorBergeron · TrevorBergeron · commit 1f856584683e · 2025-10-14T20:44:32.000Z
diff --git a/bigframes/core/bq_data.py b/bigframes/core/bq_data.py
@@ -207,7 +207,9 @@ def get_arrow_batches(
         batches = _iter_streams(session.streams, storage_read_client)
 
         def process_batch(pa_batch):
-            return pyarrow_utils.cast_batch(pa_batch, data.schema.to_pyarrow())
+            return pyarrow_utils.cast_batch(
+                pa_batch.select(columns), data.schema.select(columns).to_pyarrow()
+            )
 
         batches = map(process_batch, batches)
 
diff --git a/bigframes/core/pyarrow_utils.py b/bigframes/core/pyarrow_utils.py
@@ -84,6 +84,13 @@ def cast_batch(batch: pa.RecordBatch, schema: pa.Schema) -> pa.RecordBatch:
     )
 
 
+def rename_batch(batch: pa.RecordBatch, names: list[str]) -> pa.RecordBatch:
+    if batch.schema.names == names:
+        return batch
+    # TODO: Use RecordBatch.rename_columns once min pyarrow>=16.0
+    return pa.RecordBatch.from_arrays(batch.columns, names)
+
+
 def truncate_pyarrow_iterable(
     batches: Iterable[pa.RecordBatch], max_results: int
 ) -> Iterator[pa.RecordBatch]:
diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py
@@ -687,7 +687,7 @@ def _execute_plan_gbq(
                 project_id=self.bqclient.project,
                 storage_client=self.bqstoragereadclient,
                 query_job=query_job,
-                selected_fields=tuple(col for col in og_schema.names),
+                selected_fields=tuple((col, col) for col in og_schema.names),
             )
         else:
             return executor.LocalExecuteResult(
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
@@ -220,14 +220,16 @@ def __init__(
         *,
         query_job: Optional[bigquery.QueryJob] = None,
         limit: Optional[int] = None,
-        selected_fields: Optional[Sequence[str]] = None,
+        selected_fields: Optional[Sequence[tuple[str, str]]] = None,
     ):
         self._data = data
         self._project_id = project_id
         self._query_job = query_job
         self._storage_client = storage_client
         self._limit = limit
-        self._selected_fields = selected_fields
+        self._selected_fields = selected_fields or [
+            (name, name) for name in data.schema.names
+        ]
 
     @property
     def query_job(self) -> Optional[bigquery.QueryJob]:
@@ -240,20 +242,24 @@ def total_bytes_processed(self) -> Optional[int]:
         return None
 
     @property
+    @functools.cache
     def schema(self) -> bigframes.core.schema.ArraySchema:
-        schema = self._data.schema
-        if self._selected_fields:
-            return schema.select(self._selected_fields)
-        return schema
+        source_ids = [selection[0] for selection in self._selected_fields]
+        return self._data.schema.select(source_ids).rename(dict(self._selected_fields))
 
     def batches(self) -> ResultsIterator:
         read_batches = bq_data.get_arrow_batches(
             self._data,
-            self._selected_fields or self._data.schema.names,
+            [x[0] for x in self._selected_fields],
             self._storage_client,
             self._project_id,
         )
-        arrow_batches = read_batches.iter
+        arrow_batches: Iterator[pa.RecordBatch] = map(
+            functools.partial(
+                pyarrow_utils.rename_batch, names=list(self.schema.names)
+            ),
+            read_batches.iter,
+        )
         approx_bytes: Optional[int] = read_batches.approx_bytes
         approx_rows: Optional[int] = self._data.n_rows or read_batches.approx_rows
 
diff --git a/bigframes/session/read_api_execution.py b/bigframes/session/read_api_execution.py
@@ -56,7 +56,9 @@ def execute(
             project_id=self.project,
             storage_client=self.bqstoragereadclient,
             limit=peek,
-            selected_fields=[item.source_id for item in node.scan_list.items],
+            selected_fields=[
+                (item.source_id, item.id.sql) for item in node.scan_list.items
+            ],
         )
 
     def _try_adapt_plan(

Original file line number	Diff line number	Diff line change
`@@ -687,7 +687,7 @@ def _execute_plan_gbq(`
`687`	`687`	`project_id=self.bqclient.project,`
`688`	`688`	`storage_client=self.bqstoragereadclient,`
`689`	`689`	`query_job=query_job,`
`690`		`- selected_fields=tuple(col for col in og_schema.names),`
	`690`	`+ selected_fields=tuple((col, col) for col in og_schema.names),`
`691`	`691`	`)`
`692`	`692`	`else:`
`693`	`693`	`return executor.LocalExecuteResult(`