Skip to content

Commit d473cc2

Browse files
committed
feat: add to_time, to_local_time, to_date, to_char functions
Additionally fix conditional on formatters (since it is *args it cannot be None) Refactor name to avoid possible collision with f.
1 parent d87c6e8 commit d473cc2

File tree

3 files changed

+116
-11
lines changed

3 files changed

+116
-11
lines changed

python/datafusion/functions.py

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from __future__ import annotations
2020

21-
from typing import TYPE_CHECKING, Any
21+
from typing import TYPE_CHECKING, Any, Sequence
2222

2323
import pyarrow as pa
2424

@@ -42,7 +42,6 @@
4242

4343
if TYPE_CHECKING:
4444
from datafusion.context import SessionContext
45-
4645
__all__ = [
4746
"abs",
4847
"acos",
@@ -268,7 +267,11 @@
268267
"sum",
269268
"tan",
270269
"tanh",
270+
"to_char",
271+
"to_date",
271272
"to_hex",
273+
"to_local_time",
274+
"to_time",
272275
"to_timestamp",
273276
"to_timestamp_micros",
274277
"to_timestamp_millis",
@@ -290,6 +293,7 @@
290293
]
291294

292295

296+
293297
def isnan(expr: Expr) -> Expr:
294298
"""Returns true if a given number is +NaN or -NaN otherwise returns false."""
295299
return Expr(f.isnan(expr.expr))
@@ -1009,6 +1013,57 @@ def now() -> Expr:
10091013
"""
10101014
return Expr(f.now())
10111015

1016+
def to_char(arg: Expr, format: Expr) -> Expr:
1017+
"""Returns a string representation of a date, time, timestamp or duration
1018+
based on ``format`.
1019+
1020+
For usage of ``format`` see the rust chrono package ``strftime`` package.
1021+
1022+
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1023+
"""
1024+
return Expr(f.to_char(arg.expr, format.expr))
1025+
1026+
def to_date(arg: Expr, *formatters: Expr) -> Expr:
1027+
"""Converts a value to a date (YYYY-MM-DD).
1028+
1029+
Supports strings, numeric and timestamp types as input.
1030+
Integers and doubles are interpreted as days since the unix epoch.
1031+
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20')
1032+
if ``formatters`` are not provided.
1033+
1034+
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
1035+
1036+
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1037+
"""
1038+
if not formatters:
1039+
return Expr(f.to_date(arg.expr))
1040+
formatters = [fmt.expr for fmt in formatters]
1041+
return Expr(f.to_date(arg.expr, *formatters))
1042+
1043+
1044+
def to_local_time(arg: Expr) -> Expr:
1045+
"""Converts a timestamp with a timezone to a timestamp without a timezone.
1046+
1047+
This function handles daylight saving time changes.
1048+
"""
1049+
return Expr(f.to_local_time(arg.expr))
1050+
1051+
1052+
def to_time(arg: Expr, *formatters: Expr) -> Expr:
1053+
"""Converts a value to a time. Supports strings and timestamps as input.
1054+
1055+
If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
1056+
HH:MM:SS.nnnnnnnnn;
1057+
1058+
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
1059+
1060+
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1061+
"""
1062+
if not formatters:
1063+
return Expr(f.to_time(arg.expr))
1064+
formatters = [fmt.expr for fmt in formatters]
1065+
return Expr(f.to_time(arg.expr, *formatters))
1066+
10121067

10131068
def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
10141069
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
@@ -1017,10 +1072,10 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
10171072
10181073
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
10191074
"""
1020-
if formatters is None:
1021-
return f.to_timestamp(arg.expr)
1075+
if not formatters:
1076+
return Expr(f.to_timestamp(arg.expr))
10221077

1023-
formatters = [f.expr for f in formatters]
1078+
formatters = [fmt.expr for fmt in formatters]
10241079
return Expr(f.to_timestamp(arg.expr, *formatters))
10251080

10261081

@@ -1029,7 +1084,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
10291084
10301085
See :py:func:`to_timestamp` for a description on how to use formatters.
10311086
"""
1032-
formatters = [f.expr for f in formatters]
1087+
formatters = [fmt.expr for fmt in formatters]
10331088
return Expr(f.to_timestamp_millis(arg.expr, *formatters))
10341089

10351090

@@ -1038,7 +1093,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
10381093
10391094
See :py:func:`to_timestamp` for a description on how to use formatters.
10401095
"""
1041-
formatters = [f.expr for f in formatters]
1096+
formatters = [fmt.expr for fmt in formatters]
10421097
return Expr(f.to_timestamp_micros(arg.expr, *formatters))
10431098

10441099

@@ -1047,7 +1102,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
10471102
10481103
See :py:func:`to_timestamp` for a description on how to use formatters.
10491104
"""
1050-
formatters = [f.expr for f in formatters]
1105+
formatters = [fmt.expr for fmt in formatters]
10511106
return Expr(f.to_timestamp_nanos(arg.expr, *formatters))
10521107

10531108

@@ -1056,13 +1111,13 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
10561111
10571112
See :py:func:`to_timestamp` for a description on how to use formatters.
10581113
"""
1059-
formatters = [f.expr for f in formatters]
1114+
formatters = [fmt.expr for fmt in formatters]
10601115
return Expr(f.to_timestamp_seconds(arg.expr, *formatters))
10611116

10621117

10631118
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
10641119
"""Converts a string and optional formats to a Unixtime."""
1065-
args = [f.expr for f in format_arguments]
1120+
args = [fmt.expr for fmt in format_arguments]
10661121
return Expr(f.to_unixtime(string.expr, *args))
10671122

10681123

@@ -1071,6 +1126,9 @@ def current_date() -> Expr:
10711126
return Expr(f.current_date())
10721127

10731128

1129+
today = current_date
1130+
1131+
10741132
def current_time() -> Expr:
10751133
"""Returns current UTC time as a Time64 value."""
10761134
return Expr(f.current_time())

python/tests/test_functions.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
import math
18-
from datetime import datetime, timezone
18+
from datetime import date, datetime, time, timezone
1919

2020
import numpy as np
2121
import pyarrow as pa
@@ -952,6 +952,12 @@ def test_temporal_functions(df):
952952
f.to_timestamp_nanos(
953953
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
954954
),
955+
f.to_time(literal("12:30:45")),
956+
f.to_time(literal("12-30-45"), literal("%H-%M-%S")),
957+
f.to_date(literal("2017-05-31")),
958+
f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")),
959+
f.to_local_time(column("d")),
960+
f.to_char(column("d"), literal('%d-%m-%Y'))
955961
)
956962
result = df.collect()
957963
assert len(result) == 1
@@ -1026,6 +1032,39 @@ def test_temporal_functions(df):
10261032
[datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
10271033
type=pa.timestamp("ns"),
10281034
)
1035+
assert result.column(17) == pa.array(
1036+
[time(12, 30, 45)] * 3,
1037+
type=pa.time64("ns"),
1038+
)
1039+
assert result.column(18) == pa.array(
1040+
[time(12, 30, 45)] * 3,
1041+
type=pa.time64("ns"),
1042+
)
1043+
assert result.column(19) == pa.array(
1044+
[date(2017, 5, 31)] * 3,
1045+
type=pa.date32(),
1046+
)
1047+
assert result.column(20) == pa.array(
1048+
[date(2017, 5, 31)] * 3,
1049+
type=pa.date32(),
1050+
)
1051+
assert result.column(21) == pa.array(
1052+
[
1053+
datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
1054+
datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
1055+
datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
1056+
],
1057+
type=pa.timestamp("us"),
1058+
)
1059+
1060+
assert result.column(22) == pa.array(
1061+
[
1062+
"31-12-2022",
1063+
"26-06-2027",
1064+
"02-07-2020",
1065+
],
1066+
type=pa.string(),
1067+
)
10291068

10301069

10311070
def test_arrow_cast(df):

src/functions.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,9 @@ expr_fn!(
601601
"Converts the number to its equivalent hexadecimal representation."
602602
);
603603
expr_fn!(now);
604+
expr_fn_vec!(to_date);
605+
expr_fn_vec!(to_local_time);
606+
expr_fn_vec!(to_time);
604607
expr_fn_vec!(to_timestamp);
605608
expr_fn_vec!(to_timestamp_millis);
606609
expr_fn_vec!(to_timestamp_nanos);
@@ -613,6 +616,7 @@ expr_fn!(date_part, part date);
613616
expr_fn!(date_trunc, part date);
614617
expr_fn!(date_bin, stride source origin);
615618
expr_fn!(make_date, year month day);
619+
expr_fn!(to_char, datetime format);
616620

617621
expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.");
618622
expr_fn_vec!(
@@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
10451049
m.add_wrapped(wrap_pyfunction!(tan))?;
10461050
m.add_wrapped(wrap_pyfunction!(tanh))?;
10471051
m.add_wrapped(wrap_pyfunction!(to_hex))?;
1052+
m.add_wrapped(wrap_pyfunction!(to_char))?;
1053+
m.add_wrapped(wrap_pyfunction!(to_date))?;
1054+
m.add_wrapped(wrap_pyfunction!(to_local_time))?;
1055+
m.add_wrapped(wrap_pyfunction!(to_time))?;
10481056
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
10491057
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
10501058
m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;

0 commit comments

Comments
 (0)