From 902bb077b83ded6060fd9fe5a693bf30026a1680 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 30 Dec 2025 21:05:39 +0530 Subject: [PATCH 1/5] fix: Return Int for Date - Date instead of duration --- .../expr-common/src/type_coercion/binary.rs | 21 ++++ .../physical-expr/src/expressions/binary.rs | 98 +++++++++++++++++++ .../test_files/datetime/arith_date_date.slt | 11 +-- .../test_files/datetime/dates.slt | 23 +++-- 4 files changed, 140 insertions(+), 13 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index de16e9e01073e..032e507fb94a0 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -261,6 +261,17 @@ impl<'a> BinaryTypeCoercer<'a> { }) } Plus | Minus | Multiply | Divide | Modulo => { + // Special case: Date - Date should return Int64 (days difference) + // This aligns with PostgreSQL, DuckDB, and MySQL behavior + // See: https://www.postgresql.org/docs/current/functions-datetime.html + if matches!(self.op, Minus) && is_date_minus_date(lhs, rhs) { + return Ok(Signature { + lhs: lhs.clone(), + rhs: rhs.clone(), + ret: Int64, + }); + } + if let Ok(ret) = self.get_result(lhs, rhs) { // Temporal arithmetic, e.g. Date32 + Interval Ok(Signature{ @@ -281,6 +292,7 @@ impl<'a> BinaryTypeCoercer<'a> { ret, }) } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) { + // Temporal arithmetic by first coercing to a common time representation // e.g. Date32 - Timestamp let ret = self.get_result(&coerced, &coerced).map_err(|e| { @@ -351,6 +363,15 @@ fn is_decimal(data_type: &DataType) -> bool { ) } +/// Returns true if both operands are Date types (Date32 or Date64) +/// Used to detect Date - Date operations which should return Int64 (days difference) +fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool { + matches!( + (lhs, rhs), + (DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64) + ) +} + /// Coercion rules for mathematics operators between decimal and non-decimal types. fn math_decimal_coercion( lhs_type: &DataType, diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 8df09c22bbd8d..36185c6918f73 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -30,6 +30,7 @@ use arrow::datatypes::*; use arrow::error::ArrowError; use datafusion_common::cast::as_boolean_array; use datafusion_common::{Result, ScalarValue, internal_err, not_impl_err}; + use datafusion_expr::binary::BinaryTypeCoercer; use datafusion_expr::interval_arithmetic::{Interval, apply_operator}; use datafusion_expr::sort_properties::ExprProperties; @@ -162,6 +163,98 @@ fn boolean_op( op(ll, rr).map(|t| Arc::new(t) as _) } +/// Returns true if both operands are Date types (Date32 or Date64) +/// Used to detect Date - Date operations which should return Int64 (days difference) +fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool { + matches!( + (lhs, rhs), + (DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64) + ) +} + +/// Computes the difference between two dates and returns the result as Int64 (days) +/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date returns an integer +/// +/// Implementation: Uses Arrow's sub_wrapping to get Duration, then converts to Int64 days +fn apply_date_subtraction( + lhs: &ColumnarValue, + rhs: &ColumnarValue, +) -> Result { + use arrow::compute::kernels::numeric::sub_wrapping; + + // Use Arrow's sub_wrapping to compute the Duration result + let duration_result = apply(lhs, rhs, sub_wrapping)?; + + // Convert Duration to Int64 (days) + match duration_result { + ColumnarValue::Array(array) => { + let int64_array = duration_to_days(&array)?; + Ok(ColumnarValue::Array(int64_array)) + } + ColumnarValue::Scalar(scalar) => { + // Convert scalar Duration to Int64 days + let array = scalar.to_array_of_size(1)?; + let int64_array = duration_to_days(&array)?; + let int64_scalar = ScalarValue::try_from_array(int64_array.as_ref(), 0)?; + Ok(ColumnarValue::Scalar(int64_scalar)) + } + } +} + +/// Converts a Duration array to Int64 days +/// Handles different Duration time units (Second, Millisecond, Microsecond, Nanosecond) +fn duration_to_days(array: &ArrayRef) -> Result { + use datafusion_common::cast::{ + as_duration_microsecond_array, as_duration_millisecond_array, + as_duration_nanosecond_array, as_duration_second_array, + }; + + const SECONDS_PER_DAY: i64 = 86_400; + const MILLIS_PER_DAY: i64 = 86_400_000; + const MICROS_PER_DAY: i64 = 86_400_000_000; + const NANOS_PER_DAY: i64 = 86_400_000_000_000; + + match array.data_type() { + DataType::Duration(TimeUnit::Second) => { + let duration_array = as_duration_second_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / SECONDS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + DataType::Duration(TimeUnit::Millisecond) => { + let duration_array = as_duration_millisecond_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / MILLIS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + DataType::Duration(TimeUnit::Microsecond) => { + let duration_array = as_duration_microsecond_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / MICROS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + DataType::Duration(TimeUnit::Nanosecond) => { + let duration_array = as_duration_nanosecond_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / NANOS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + other => internal_err!( + "duration_to_days expected Duration type, got: {}", + other + ), + } +} + + impl PhysicalExpr for BinaryExpr { /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { @@ -251,6 +344,11 @@ impl PhysicalExpr for BinaryExpr { match self.op { Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add), Operator::Plus => return apply(&lhs, &rhs, add_wrapping), + // Special case: Date - Date returns Int64 (days difference) + // This aligns with PostgreSQL, DuckDB, and MySQL behavior + Operator::Minus if is_date_minus_date(&left_data_type, &right_data_type) => { + return apply_date_subtraction(&lhs, &rhs); + } Operator::Minus if self.fail_on_overflow => return apply(&lhs, &rhs, sub), Operator::Minus => return apply(&lhs, &rhs, sub_wrapping), Operator::Multiply if self.fail_on_overflow => return apply(&lhs, &rhs, mul), diff --git a/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt b/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt index f6e4aad78b27c..8eb5cc176f365 100644 --- a/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt +++ b/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt @@ -1,16 +1,15 @@ # date - date → integer # Subtract dates, producing the number of days elapsed # date '2001-10-01' - date '2001-09-28' → 3 +# This aligns with PostgreSQL, DuckDB, and MySQL behavior +# Resolved by: https://github.com/apache/datafusion/issues/19528 -# note that datafusion returns Duration whereas postgres returns an int -# Tracking issue: https://github.com/apache/datafusion/issues/19528 - -query ? +query I SELECT '2001-10-01'::date - '2001-09-28'::date ---- -3 days 0 hours 0 mins 0 secs +3 query T SELECT arrow_typeof('2001-10-01'::date - '2001-09-28'::date) ---- -Duration(s) +Int64 diff --git a/datafusion/sqllogictest/test_files/datetime/dates.slt b/datafusion/sqllogictest/test_files/datetime/dates.slt index 6ba34cfcac03f..6dff68ec09db0 100644 --- a/datafusion/sqllogictest/test_files/datetime/dates.slt +++ b/datafusion/sqllogictest/test_files/datetime/dates.slt @@ -94,12 +94,20 @@ caused by Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types -# DATE minus DATE -# https://github.com/apache/arrow-rs/issues/4383 -query ? +# DATE minus DATE returns Int64 (days difference) +# This aligns with PostgreSQL, DuckDB, and MySQL behavior +# See: https://www.postgresql.org/docs/current/functions-datetime.html +query I SELECT DATE '2023-04-09' - DATE '2023-04-02'; ---- -7 days 0 hours 0 mins 0 secs +7 + +# Verify Date - Date returns Int64 type +query T +SELECT arrow_typeof(DATE '2023-04-09' - DATE '2023-04-02'); +---- +Int64 + # DATE minus Timestamp query ? @@ -113,17 +121,18 @@ SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 730 days 0 hours 0 mins 0.000000000 secs -# NULL with DATE arithmetic should yield NULL -query ? +# NULL with DATE arithmetic should yield NULL (but Int64 type) +query I SELECT NULL - DATE '1984-02-28'; ---- NULL -query ? +query I SELECT DATE '1984-02-28' - NULL ---- NULL + # to_date_test statement ok create table to_date_t1(ts bigint) as VALUES From 4bbaac897e6f4065f33a69e705e95f29c0f1f6ec Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Fri, 2 Jan 2026 11:23:48 +0530 Subject: [PATCH 2/5] fix fmt --- datafusion/physical-expr/src/expressions/binary.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 36185c6918f73..72eae396e68a6 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -247,14 +247,10 @@ fn duration_to_days(array: &ArrayRef) -> Result { .collect(); Ok(Arc::new(result)) } - other => internal_err!( - "duration_to_days expected Duration type, got: {}", - other - ), + other => internal_err!("duration_to_days expected Duration type, got: {}", other), } } - impl PhysicalExpr for BinaryExpr { /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { From 4ff8058197872b863a66552285d863386b5cb7b9 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Sat, 3 Jan 2026 18:44:04 +0530 Subject: [PATCH 3/5] suggestion from Jefferey Co-authored-by: Jeffrey Vo --- datafusion/expr-common/src/type_coercion/binary.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 032e507fb94a0..dd81704a5bbd9 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -260,6 +260,13 @@ impl<'a> BinaryTypeCoercer<'a> { ) }) } + Minus if is_date_minus_date(lhs, rhs) => { + return Ok(Signature { + lhs: lhs.clone(), + rhs: rhs.clone(), + ret: Int64, + }); + } Plus | Minus | Multiply | Divide | Modulo => { // Special case: Date - Date should return Int64 (days difference) // This aligns with PostgreSQL, DuckDB, and MySQL behavior From 50c88800e28f423d6f2785e7c3d26197aa5edc13 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Sat, 3 Jan 2026 18:58:53 +0530 Subject: [PATCH 4/5] removed duplicate test --- .../sqllogictest/test_files/datetime/dates.slt | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/datafusion/sqllogictest/test_files/datetime/dates.slt b/datafusion/sqllogictest/test_files/datetime/dates.slt index 6dff68ec09db0..d2a7360b120c6 100644 --- a/datafusion/sqllogictest/test_files/datetime/dates.slt +++ b/datafusion/sqllogictest/test_files/datetime/dates.slt @@ -94,21 +94,6 @@ caused by Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types -# DATE minus DATE returns Int64 (days difference) -# This aligns with PostgreSQL, DuckDB, and MySQL behavior -# See: https://www.postgresql.org/docs/current/functions-datetime.html -query I -SELECT DATE '2023-04-09' - DATE '2023-04-02'; ----- -7 - -# Verify Date - Date returns Int64 type -query T -SELECT arrow_typeof(DATE '2023-04-09' - DATE '2023-04-02'); ----- -Int64 - - # DATE minus Timestamp query ? SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp; From db6df3dfa424dbb909cd89cb21ded09b1cfe0921 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Sat, 3 Jan 2026 21:22:46 +0530 Subject: [PATCH 5/5] remove unused codes --- datafusion/expr-common/src/type_coercion/binary.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index dd81704a5bbd9..c9b39eacefc6a 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -268,18 +268,8 @@ impl<'a> BinaryTypeCoercer<'a> { }); } Plus | Minus | Multiply | Divide | Modulo => { - // Special case: Date - Date should return Int64 (days difference) - // This aligns with PostgreSQL, DuckDB, and MySQL behavior - // See: https://www.postgresql.org/docs/current/functions-datetime.html - if matches!(self.op, Minus) && is_date_minus_date(lhs, rhs) { - return Ok(Signature { - lhs: lhs.clone(), - rhs: rhs.clone(), - ret: Int64, - }); - } - if let Ok(ret) = self.get_result(lhs, rhs) { + // Temporal arithmetic, e.g. Date32 + Interval Ok(Signature{ lhs: lhs.clone(),