Skip to content

Commit ae5fc8f

Browse files
refactor: convert ordering definitions to use flexible scalar express… (#490)
…ions Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 429a4a5 commit ae5fc8f

File tree

20 files changed

+324
-258
lines changed

20 files changed

+324
-258
lines changed

bigframes/core/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import bigframes.core.join_def as join_def
3131
import bigframes.core.local_data as local_data
3232
import bigframes.core.nodes as nodes
33-
from bigframes.core.ordering import OrderingColumnReference
33+
from bigframes.core.ordering import OrderingExpression
3434
import bigframes.core.ordering as orderings
3535
import bigframes.core.rewrite
3636
import bigframes.core.schema as schemata
@@ -162,7 +162,7 @@ def filter_by_id(self, predicate_id: str, keep_null: bool = False) -> ArrayValue
162162
def filter(self, predicate: ex.Expression):
163163
return ArrayValue(nodes.FilterNode(child=self.node, predicate=predicate))
164164

165-
def order_by(self, by: Sequence[OrderingColumnReference]) -> ArrayValue:
165+
def order_by(self, by: Sequence[OrderingExpression]) -> ArrayValue:
166166
return ArrayValue(nodes.OrderByNode(child=self.node, by=tuple(by)))
167167

168168
def reversed(self) -> ArrayValue:

bigframes/core/block_transforms.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def _interpolate_column(
176176
) -> typing.Tuple[blocks.Block, str]:
177177
if interpolate_method not in ["linear", "nearest", "ffill"]:
178178
raise ValueError("interpolate method not supported")
179-
window_ordering = (ordering.OrderingColumnReference(x_values),)
179+
window_ordering = (ordering.OrderingExpression(ex.free_var(x_values)),)
180180
backwards_window = windows.WindowSpec(following=0, ordering=window_ordering)
181181
forwards_window = windows.WindowSpec(preceding=0, ordering=window_ordering)
182182

@@ -338,8 +338,8 @@ def value_counts(
338338
if sort:
339339
block = block.order_by(
340340
[
341-
ordering.OrderingColumnReference(
342-
count_id,
341+
ordering.OrderingExpression(
342+
ex.free_var(count_id),
343343
direction=ordering.OrderingDirection.ASC
344344
if ascending
345345
else ordering.OrderingDirection.DESC,
@@ -398,8 +398,8 @@ def rank(
398398
window = windows.WindowSpec(
399399
# BigQuery has syntax to reorder nulls with "NULLS FIRST/LAST", but that is unavailable through ibis presently, so must order on a separate nullity expression first.
400400
ordering=(
401-
ordering.OrderingColumnReference(
402-
col,
401+
ordering.OrderingExpression(
402+
ex.free_var(col),
403403
ordering.OrderingDirection.ASC
404404
if ascending
405405
else ordering.OrderingDirection.DESC,
@@ -481,8 +481,8 @@ def nsmallest(
481481
if keep == "last":
482482
block = block.reversed()
483483
order_refs = [
484-
ordering.OrderingColumnReference(
485-
col_id, direction=ordering.OrderingDirection.ASC
484+
ordering.OrderingExpression(
485+
ex.free_var(col_id), direction=ordering.OrderingDirection.ASC
486486
)
487487
for col_id in column_ids
488488
]
@@ -511,8 +511,8 @@ def nlargest(
511511
if keep == "last":
512512
block = block.reversed()
513513
order_refs = [
514-
ordering.OrderingColumnReference(
515-
col_id, direction=ordering.OrderingDirection.DESC
514+
ordering.OrderingExpression(
515+
ex.free_var(col_id), direction=ordering.OrderingDirection.DESC
516516
)
517517
for col_id in column_ids
518518
]
@@ -804,9 +804,9 @@ def _idx_extrema(
804804
)
805805
# Have to find the min for each
806806
order_refs = [
807-
ordering.OrderingColumnReference(value_col, direction),
807+
ordering.OrderingExpression(ex.free_var(value_col), direction),
808808
*[
809-
ordering.OrderingColumnReference(idx_col)
809+
ordering.OrderingExpression(ex.free_var(idx_col))
810810
for idx_col in original_block.index_columns
811811
],
812812
]

bigframes/core/blocks.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
270270

271271
def order_by(
272272
self,
273-
by: typing.Sequence[ordering.OrderingColumnReference],
273+
by: typing.Sequence[ordering.OrderingExpression],
274274
) -> Block:
275275
return Block(
276276
self._expr.order_by(by),
@@ -610,7 +610,9 @@ def _split(
610610
string_ordering_col, random_state_col, ops.strconcat_op
611611
)
612612
block, hash_string_sum_col = block.apply_unary_op(string_sum_col, ops.hash_op)
613-
block = block.order_by([ordering.OrderingColumnReference(hash_string_sum_col)])
613+
block = block.order_by(
614+
[ordering.OrderingExpression(ex.free_var(hash_string_sum_col))]
615+
)
614616

615617
intervals = []
616618
cur = 0
@@ -628,15 +630,17 @@ def _split(
628630
sliced_blocks = [
629631
sliced_block.order_by(
630632
[
631-
ordering.OrderingColumnReference(idx_col)
633+
ordering.OrderingExpression(ex.free_var(idx_col))
632634
for idx_col in sliced_block.index_columns
633635
]
634636
)
635637
for sliced_block in sliced_blocks
636638
]
637639
elif sort is False:
638640
sliced_blocks = [
639-
sliced_block.order_by([ordering.OrderingColumnReference(ordering_col)])
641+
sliced_block.order_by(
642+
[ordering.OrderingExpression(ex.free_var(ordering_col))]
643+
)
640644
for sliced_block in sliced_blocks
641645
]
642646

@@ -1706,7 +1710,10 @@ def merge(
17061710
if sort:
17071711
# sort uses coalesced join keys always
17081712
joined_expr = joined_expr.order_by(
1709-
[ordering.OrderingColumnReference(col_id) for col_id in coalesced_ids],
1713+
[
1714+
ordering.OrderingExpression(ex.free_var(col_id))
1715+
for col_id in coalesced_ids
1716+
],
17101717
)
17111718

17121719
joined_expr = joined_expr.select_columns(result_columns)
@@ -2028,7 +2035,10 @@ def join_mono_indexed(
20282035
)
20292036
if sort:
20302037
combined_expr = combined_expr.order_by(
2031-
[ordering.OrderingColumnReference(col_id) for col_id in coalesced_join_cols]
2038+
[
2039+
ordering.OrderingExpression(ex.free_var(col_id))
2040+
for col_id in coalesced_join_cols
2041+
]
20322042
)
20332043
block = Block(
20342044
combined_expr,
@@ -2117,7 +2127,10 @@ def join_multi_indexed(
21172127
)
21182128
if sort:
21192129
combined_expr = combined_expr.order_by(
2120-
[ordering.OrderingColumnReference(col_id) for col_id in coalesced_join_cols]
2130+
[
2131+
ordering.OrderingExpression(ex.free_var(col_id))
2132+
for col_id in coalesced_join_cols
2133+
]
21212134
)
21222135

21232136
if left.index.nlevels == 1:

0 commit comments

Comments
 (0)