5050import bigframes .core .identifiers
5151import bigframes .core .join_def as join_defs
5252import bigframes .core .ordering as ordering
53+ import bigframes .core .pyarrow_utils as pyarrow_utils
5354import bigframes .core .schema as bf_schema
5455import bigframes .core .sql as sql
5556import bigframes .core .utils as utils
@@ -156,6 +157,36 @@ def __init__(
156157 self ._view_ref : Optional [bigquery .TableReference ] = None
157158 self ._view_ref_dry_run : Optional [bigquery .TableReference ] = None
158159
160+ @classmethod
161+ def from_pyarrow (
162+ cls ,
163+ data : pa .Table ,
164+ session : bigframes .Session ,
165+ ) -> Block :
166+ column_labels = data .column_names
167+
168+ # TODO(tswast): Use array_value.promote_offsets() instead once that node is
169+ # supported by the local engine.
170+ offsets_col = bigframes .core .guid .generate_guid ()
171+ index_ids = [offsets_col ]
172+ index_labels = [None ]
173+
174+ # TODO(https://github.com/googleapis/python-bigquery-dataframes/issues/859):
175+ # Allow users to specify the "total ordering" column(s) or allow multiple
176+ # such columns.
177+ data = pyarrow_utils .append_offsets (data , offsets_col = offsets_col )
178+
179+ # from_pyarrow will normalize the types for us.
180+ managed_data = local_data .ManagedArrowTable .from_pyarrow (data )
181+ array_value = core .ArrayValue .from_managed (managed_data , session = session )
182+ block = cls (
183+ array_value ,
184+ column_labels = column_labels ,
185+ index_columns = index_ids ,
186+ index_labels = index_labels ,
187+ )
188+ return block
189+
159190 @classmethod
160191 def from_local (
161192 cls ,
@@ -1210,7 +1241,10 @@ def select_column(self, id: str) -> Block:
12101241 return self .select_columns ([id ])
12111242
12121243 def select_columns (self , ids : typing .Sequence [str ]) -> Block :
1213- expr = self ._expr .select_columns ([* self .index_columns , * ids ])
1244+ # Allow renames as may end up selecting same columns multiple times
1245+ expr = self ._expr .select_columns (
1246+ [* self .index_columns , * ids ], allow_renames = True
1247+ )
12141248 col_labels = self ._get_labels_for_columns (ids )
12151249 return Block (expr , self .index_columns , col_labels , self .index .names )
12161250
@@ -1996,7 +2030,7 @@ def _generate_resample_label(
19962030 return block .set_index ([resample_label_id ])
19972031
19982032 def _create_stack_column (self , col_label : typing .Tuple , stack_labels : pd .Index ):
1999- dtype = None
2033+ input_dtypes = []
20002034 input_columns : list [Optional [str ]] = []
20012035 for uvalue in utils .index_as_tuples (stack_labels ):
20022036 label_to_match = (* col_label , * uvalue )
@@ -2006,15 +2040,18 @@ def _create_stack_column(self, col_label: typing.Tuple, stack_labels: pd.Index):
20062040 matching_ids = self .label_to_col_id .get (label_to_match , [])
20072041 input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
20082042 if input_id :
2009- if dtype and dtype != self ._column_type (input_id ):
2010- raise NotImplementedError (
2011- "Cannot stack columns with non-matching dtypes."
2012- )
2013- else :
2014- dtype = self ._column_type (input_id )
2043+ input_dtypes .append (self ._column_type (input_id ))
20152044 input_columns .append (input_id )
20162045 # Input column i is the first one that
2017- return tuple (input_columns ), dtype or pd .Float64Dtype ()
2046+ if len (input_dtypes ) > 0 :
2047+ output_dtype = bigframes .dtypes .lcd_type (* input_dtypes )
2048+ if output_dtype is None :
2049+ raise NotImplementedError (
2050+ "Cannot stack columns with non-matching dtypes."
2051+ )
2052+ else :
2053+ output_dtype = pd .Float64Dtype ()
2054+ return tuple (input_columns ), output_dtype
20182055
20192056 def _column_type (self , col_id : str ) -> bigframes .dtypes .Dtype :
20202057 col_offset = self .value_columns .index (col_id )
0 commit comments