Skip to content

Commit 46dbc85

Browse files
committed
docs: enhance function docstrings to clarify order_by parameter usage
1 parent 031f05c commit 46dbc85

File tree

1 file changed

+81
-13
lines changed

1 file changed

+81
-13
lines changed

python/datafusion/functions.py

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,10 @@ def window(
441441
lag use::
442442
443443
df.select(functions.lag(col("a")).partition_by(col("b")).build())
444+
445+
The ``order_by`` parameter accepts column names or expressions, e.g.::
446+
447+
window("lag", [col("a")], order_by="ts")
444448
"""
445449
args = [a.expr for a in args]
446450
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
@@ -1739,7 +1743,11 @@ def array_agg(
17391743
expression: Values to combine into an array
17401744
distinct: If True, a single entry for each distinct value will be in the result
17411745
filter: If provided, only compute against rows for which the filter is True
1742-
order_by: Order the resultant array values
1746+
order_by: Order the resultant array values. Accepts column names or expressions.
1747+
1748+
For example::
1749+
1750+
df.select(array_agg(col("a"), order_by="b"))
17431751
"""
17441752
order_by_raw = sort_list_to_raw_sort_list(order_by)
17451753
filter_raw = filter.expr if filter is not None else None
@@ -2236,8 +2244,13 @@ def first_value(
22362244
Args:
22372245
expression: Argument to perform bitwise calculation on
22382246
filter: If provided, only compute against rows for which the filter is True
2239-
order_by: Set the ordering of the expression to evaluate
2247+
order_by: Set the ordering of the expression to evaluate. Accepts
2248+
column names or expressions.
22402249
null_treatment: Assign whether to respect or ignore null values.
2250+
2251+
For example::
2252+
2253+
df.select(first_value(col("a"), order_by="ts"))
22412254
"""
22422255
order_by_raw = sort_list_to_raw_sort_list(order_by)
22432256
filter_raw = filter.expr if filter is not None else None
@@ -2268,8 +2281,13 @@ def last_value(
22682281
Args:
22692282
expression: Argument to perform bitwise calculation on
22702283
filter: If provided, only compute against rows for which the filter is True
2271-
order_by: Set the ordering of the expression to evaluate
2284+
order_by: Set the ordering of the expression to evaluate. Accepts
2285+
column names or expressions.
22722286
null_treatment: Assign whether to respect or ignore null values.
2287+
2288+
For example::
2289+
2290+
df.select(last_value(col("a"), order_by="ts"))
22732291
"""
22742292
order_by_raw = sort_list_to_raw_sort_list(order_by)
22752293
filter_raw = filter.expr if filter is not None else None
@@ -2302,8 +2320,13 @@ def nth_value(
23022320
expression: Argument to perform bitwise calculation on
23032321
n: Index of value to return. Starts at 1.
23042322
filter: If provided, only compute against rows for which the filter is True
2305-
order_by: Set the ordering of the expression to evaluate
2323+
order_by: Set the ordering of the expression to evaluate. Accepts
2324+
column names or expressions.
23062325
null_treatment: Assign whether to respect or ignore null values.
2326+
2327+
For example::
2328+
2329+
df.select(nth_value(col("a"), 2, order_by="ts"))
23072330
"""
23082331
order_by_raw = sort_list_to_raw_sort_list(order_by)
23092332
filter_raw = filter.expr if filter is not None else None
@@ -2438,7 +2461,12 @@ def lead(
24382461
shift_offset: Number of rows following the current row.
24392462
default_value: Value to return if shift_offet row does not exist.
24402463
partition_by: Expressions to partition the window frame on.
2441-
order_by: Set ordering within the window frame.
2464+
order_by: Set ordering within the window frame. Accepts
2465+
column names or expressions.
2466+
2467+
For example::
2468+
2469+
lead(col("b"), order_by="ts")
24422470
"""
24432471
if not isinstance(default_value, pa.Scalar) and default_value is not None:
24442472
default_value = pa.scalar(default_value)
@@ -2488,7 +2516,12 @@ def lag(
24882516
shift_offset: Number of rows before the current row.
24892517
default_value: Value to return if shift_offet row does not exist.
24902518
partition_by: Expressions to partition the window frame on.
2491-
order_by: Set ordering within the window frame.
2519+
order_by: Set ordering within the window frame. Accepts
2520+
column names or expressions.
2521+
2522+
For example::
2523+
2524+
lag(col("b"), order_by="ts")
24922525
"""
24932526
if not isinstance(default_value, pa.Scalar):
24942527
default_value = pa.scalar(default_value)
@@ -2528,7 +2561,12 @@ def row_number(
25282561
25292562
Args:
25302563
partition_by: Expressions to partition the window frame on.
2531-
order_by: Set ordering within the window frame.
2564+
order_by: Set ordering within the window frame. Accepts
2565+
column names or expressions.
2566+
2567+
For example::
2568+
2569+
row_number(order_by="points")
25322570
"""
25332571
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
25342572
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2567,7 +2605,12 @@ def rank(
25672605
25682606
Args:
25692607
partition_by: Expressions to partition the window frame on.
2570-
order_by: Set ordering within the window frame.
2608+
order_by: Set ordering within the window frame. Accepts
2609+
column names or expressions.
2610+
2611+
For example::
2612+
2613+
rank(order_by="points")
25712614
"""
25722615
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
25732616
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2601,7 +2644,12 @@ def dense_rank(
26012644
26022645
Args:
26032646
partition_by: Expressions to partition the window frame on.
2604-
order_by: Set ordering within the window frame.
2647+
order_by: Set ordering within the window frame. Accepts
2648+
column names or expressions.
2649+
2650+
For example::
2651+
2652+
dense_rank(order_by="points")
26052653
"""
26062654
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
26072655
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2636,7 +2684,12 @@ def percent_rank(
26362684
26372685
Args:
26382686
partition_by: Expressions to partition the window frame on.
2639-
order_by: Set ordering within the window frame.
2687+
order_by: Set ordering within the window frame. Accepts
2688+
column names or expressions.
2689+
2690+
For example::
2691+
2692+
percent_rank(order_by="points")
26402693
"""
26412694
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
26422695
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2671,7 +2724,12 @@ def cume_dist(
26712724
26722725
Args:
26732726
partition_by: Expressions to partition the window frame on.
2674-
order_by: Set ordering within the window frame.
2727+
order_by: Set ordering within the window frame. Accepts
2728+
column names or expressions.
2729+
2730+
For example::
2731+
2732+
cume_dist(order_by="points")
26752733
"""
26762734
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
26772735
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2710,7 +2768,12 @@ def ntile(
27102768
Args:
27112769
groups: Number of groups for the n-tile to be divided into.
27122770
partition_by: Expressions to partition the window frame on.
2713-
order_by: Set ordering within the window frame.
2771+
order_by: Set ordering within the window frame. Accepts
2772+
column names or expressions.
2773+
2774+
For example::
2775+
2776+
ntile(3, order_by="points")
27142777
"""
27152778
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
27162779
order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -2743,7 +2806,12 @@ def string_agg(
27432806
expression: Argument to perform bitwise calculation on
27442807
delimiter: Text to place between each value of expression
27452808
filter: If provided, only compute against rows for which the filter is True
2746-
order_by: Set the ordering of the expression to evaluate
2809+
order_by: Set the ordering of the expression to evaluate. Accepts
2810+
column names or expressions.
2811+
2812+
For example::
2813+
2814+
df.select(string_agg(col("a"), ",", order_by="b"))
27472815
"""
27482816
order_by_raw = sort_list_to_raw_sort_list(order_by)
27492817
filter_raw = filter.expr if filter is not None else None

0 commit comments

Comments
 (0)