From 04f13be6b99d20ef5b438834ce0c6a46ed5633b0 Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Mon, 16 Feb 2026 09:28:40 +0330 Subject: [PATCH 1/2] enable ulp based comparison --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/compare.cc | 245 ++++++++++++++--------------- cpp/src/arrow/compare.h | 38 ++++- cpp/src/arrow/meson.build | 1 + cpp/src/arrow/scalar_test.cc | 143 +++++++++++++++++ cpp/src/arrow/testing/math.cc | 73 ++------- cpp/src/arrow/testing/math.h | 12 +- cpp/src/arrow/util/ulp_distance.cc | 103 ++++++++++++ cpp/src/arrow/util/ulp_distance.h | 33 ++++ 9 files changed, 449 insertions(+), 200 deletions(-) create mode 100644 cpp/src/arrow/util/ulp_distance.cc create mode 100644 cpp/src/arrow/util/ulp_distance.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 6e9d76a61e0..6e0e3d5d225 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -535,6 +535,7 @@ set(ARROW_UTIL_SRCS util/time.cc util/tracing.cc util/trie.cc + util/ulp_distance.cc util/union_util.cc util/unreachable.cc util/uri.cc diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 26f56d9b588..559d1e63216 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -53,6 +53,7 @@ #include "arrow/util/macros.h" #include "arrow/util/memory_internal.h" #include "arrow/util/ree_util.h" +#include "arrow/util/ulp_distance.h" #include "arrow/util/unreachable.h" #include "arrow/visit_scalar_inline.h" #include "arrow/visit_type_inline.h" @@ -71,39 +72,52 @@ using util::Float16; namespace { -template +template struct FloatingEqualityFlags { - static constexpr bool approximate = Approximate; + static constexpr bool absolute_tolerance = AbsoluteTolerance; static constexpr bool nans_equal = NansEqual; static constexpr bool signed_zeros_equal = SignedZerosEqual; + static constexpr bool ulp_distance_equal = UlpDistanceEqual; }; template struct FloatingEquality { explicit FloatingEquality(const EqualOptions& options) - : epsilon(static_cast(options.atol())) {} + : epsilon(static_cast(options.atol())), ulp_distance(options.ulp_distance()) {} bool operator()(T x, T y) const { if (x == y) { return Flags::signed_zeros_equal || (std::signbit(x) == std::signbit(y)); } - if (Flags::nans_equal && std::isnan(x) && std::isnan(y)) { + + if constexpr (Flags::nans_equal) { + if (std::isnan(x) && std::isnan(y)) { + return true; + } + } else if (std::isnan(x) || std::isnan(y)) { + return false; + } + + if (Flags::absolute_tolerance && (std::fabs(x - y) <= epsilon)) { return true; } - if (Flags::approximate && (fabs(x - y) <= epsilon)) { + if (Flags::ulp_distance_equal && UlpDistance(x, y, ulp_distance)) { return true; } return false; } const T epsilon; + const uint16_t ulp_distance; }; // For half-float equality. template struct FloatingEquality { explicit FloatingEquality(const EqualOptions& options) - : epsilon(static_cast(options.atol())) {} + : epsilon(static_cast(options.atol())), + ulp_distance(options.ulp_distance()) {} bool operator()(uint16_t x, uint16_t y) const { Float16 f_x = Float16::FromBits(x); @@ -111,46 +125,65 @@ struct FloatingEquality { if (f_x == f_y) { return Flags::signed_zeros_equal || (f_x.signbit() == f_y.signbit()); } - if (Flags::nans_equal && f_x.is_nan() && f_y.is_nan()) { + if constexpr (Flags::nans_equal) { + if (f_x.is_nan() && f_y.is_nan()) { + return true; + } + } else if (f_x.is_nan() || f_y.is_nan()) { + return false; + } + if (Flags::absolute_tolerance && + (std::fabs(f_x.ToFloat() - f_y.ToFloat()) <= epsilon)) { return true; } - if (Flags::approximate && (fabs(f_x.ToFloat() - f_y.ToFloat()) <= epsilon)) { + if (Flags::ulp_distance_equal && UlpDistance(f_x, f_y, ulp_distance)) { return true; } return false; } const float epsilon; + const uint16_t ulp_distance; }; template struct FloatingEqualityDispatcher { const EqualOptions& options; - bool floating_approximate; Visitor&& visit; - template - void DispatchL3() { - if (options.signed_zeros_equal()) { - visit(FloatingEquality>{ + template + void DispatchL4() { + if (options.use_ulp_distance()) { + visit(FloatingEquality< + T, FloatingEqualityFlags>{ options}); } else { - visit(FloatingEquality>{ + visit(FloatingEquality< + T, FloatingEqualityFlags>{ options}); } } - template + template + void DispatchL3() { + if (options.signed_zeros_equal()) { + DispatchL4(); + } else { + DispatchL4(); + } + } + + template void DispatchL2() { if (options.nans_equal()) { - DispatchL3(); + DispatchL3(); } else { - DispatchL3(); + DispatchL3(); } } void Dispatch() { - if (floating_approximate) { + if (options.use_atol()) { DispatchL2(); } else { DispatchL2(); @@ -161,10 +194,8 @@ struct FloatingEqualityDispatcher { // Call `visit(equality_func)` where `equality_func` has the signature `bool(T, T)` // and returns true if the two values compare equal. template -void VisitFloatingEquality(const EqualOptions& options, bool floating_approximate, - Visitor&& visit) { - FloatingEqualityDispatcher{options, floating_approximate, - std::forward(visit)} +void VisitFloatingEquality(const EqualOptions& options, Visitor&& visit) { + FloatingEqualityDispatcher{options, std::forward(visit)} .Dispatch(); } @@ -190,20 +221,17 @@ inline bool IdentityImpliesEquality(const DataType& type, const EqualOptions& op bool CompareArrayRanges(const ArrayData& left, const ArrayData& right, int64_t left_start_idx, int64_t left_end_idx, - int64_t right_start_idx, const EqualOptions& options, - bool floating_approximate); + int64_t right_start_idx, const EqualOptions& options); class RangeDataEqualsImpl { public: // PRE-CONDITIONS: // - the types are equal // - the ranges are in bounds - RangeDataEqualsImpl(const EqualOptions& options, bool floating_approximate, - const ArrayData& left, const ArrayData& right, - int64_t left_start_idx, int64_t right_start_idx, - int64_t range_length) + RangeDataEqualsImpl(const EqualOptions& options, const ArrayData& left, + const ArrayData& right, int64_t left_start_idx, + int64_t right_start_idx, int64_t range_length) : options_(options), - floating_approximate_(floating_approximate), left_(left), right_(right), left_start_idx_(left_start_idx), @@ -349,7 +377,7 @@ class RangeDataEqualsImpl { const ArrayData& right_data = *right_.child_data[0]; auto compare_runs = [&](int64_t i, int64_t length) -> bool { - RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data, + RangeDataEqualsImpl impl(options_, left_data, right_data, (left_start_idx_ + left_.offset + i) * list_size, (right_start_idx_ + right_.offset + i) * list_size, length * list_size); @@ -364,8 +392,7 @@ class RangeDataEqualsImpl { auto compare_runs = [&](int64_t i, int64_t length) -> bool { for (int32_t f = 0; f < num_fields; ++f) { - RangeDataEqualsImpl impl(options_, floating_approximate_, *left_.child_data[f], - *right_.child_data[f], + RangeDataEqualsImpl impl(options_, *left_.child_data[f], *right_.child_data[f], left_start_idx_ + left_.offset + i, right_start_idx_ + right_.offset + i, length); if (!impl.Compare()) { @@ -399,11 +426,11 @@ class RangeDataEqualsImpl { const auto previous_child_num = child_ids[left_codes[left_start_idx_ + i - 1]]; int64_t run_length = i - run_start; - RangeDataEqualsImpl impl( - options_, floating_approximate_, *left_.child_data[previous_child_num], - *right_.child_data[previous_child_num], - left_start_idx_ + left_.offset + run_start, - right_start_idx_ + right_.offset + run_start, run_length); + RangeDataEqualsImpl impl(options_, *left_.child_data[previous_child_num], + *right_.child_data[previous_child_num], + left_start_idx_ + left_.offset + run_start, + right_start_idx_ + right_.offset + run_start, + run_length); if (!impl.Compare()) { result_ = false; @@ -421,7 +448,7 @@ class RangeDataEqualsImpl { int64_t final_run_length = range_length_ - run_start; RangeDataEqualsImpl impl( - options_, floating_approximate_, *left_.child_data[final_child_num], + options_, *left_.child_data[final_child_num], *right_.child_data[final_child_num], left_start_idx_ + left_.offset + run_start, right_start_idx_ + right_.offset + run_start, final_run_length); @@ -447,9 +474,8 @@ class RangeDataEqualsImpl { } const auto child_num = child_ids[type_id]; RangeDataEqualsImpl impl( - options_, floating_approximate_, *left_.child_data[child_num], - *right_.child_data[child_num], left_offsets[left_start_idx_ + i], - right_offsets[right_start_idx_ + i], 1); + options_, *left_.child_data[child_num], *right_.child_data[child_num], + left_offsets[left_start_idx_ + i], right_offsets[right_start_idx_ + i], 1); if (!impl.Compare()) { result_ = false; break; @@ -464,7 +490,7 @@ class RangeDataEqualsImpl { *left_.dictionary, *right_.dictionary, /*left_start_idx=*/0, /*left_end_idx=*/std::max(left_.dictionary->length, right_.dictionary->length), - /*right_start_idx=*/0, options_, floating_approximate_); + /*right_start_idx=*/0, options_); if (result_) { // Compare indices result_ &= CompareWithType(*type.index_type()); @@ -516,7 +542,7 @@ class RangeDataEqualsImpl { return compare_func(x, y); }); }; - VisitFloatingEquality(options_, floating_approximate_, std::move(visitor)); + VisitFloatingEquality(options_, std::move(visitor)); return Status::OK(); } @@ -547,8 +573,8 @@ class RangeDataEqualsImpl { const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset, int64_t length) -> bool { - RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data, - left_offset, right_offset, length); + RangeDataEqualsImpl impl(options_, left_data, right_data, left_offset, right_offset, + length); return impl.Compare(); }; @@ -576,8 +602,8 @@ class RangeDataEqualsImpl { if (size == 0) { continue; } - RangeDataEqualsImpl impl(options_, floating_approximate_, left_values, - right_values, left_offsets[j], right_offsets[j], size); + RangeDataEqualsImpl impl(options_, left_values, right_values, left_offsets[j], + right_offsets[j], size); if (!impl.Compare()) { return false; } @@ -602,7 +628,7 @@ class RangeDataEqualsImpl { auto it = ree_util::MergedRunsIterator(left, right); for (; !it.is_end(); ++it) { - RangeDataEqualsImpl impl(options_, floating_approximate_, left_values, right_values, + RangeDataEqualsImpl impl(options_, left_values, right_values, it.index_into_left_array(), it.index_into_right_array(), /*range_length=*/1); if (!impl.Compare()) { @@ -670,7 +696,6 @@ class RangeDataEqualsImpl { } const EqualOptions& options_; - const bool floating_approximate_; const ArrayData& left_; const ArrayData& right_; const int64_t left_start_idx_; @@ -682,8 +707,7 @@ class RangeDataEqualsImpl { bool CompareArrayRanges(const ArrayData& left, const ArrayData& right, int64_t left_start_idx, int64_t left_end_idx, - int64_t right_start_idx, const EqualOptions& options, - bool floating_approximate) { + int64_t right_start_idx, const EqualOptions& options) { if (left.type->id() != right.type->id() || !TypeEquals(*left.type, *right.type, false /* check_metadata */)) { return false; @@ -704,8 +728,8 @@ bool CompareArrayRanges(const ArrayData& left, const ArrayData& right, return true; } // Compare values - RangeDataEqualsImpl impl(options, floating_approximate, left, right, left_start_idx, - right_start_idx, range_length); + RangeDataEqualsImpl impl(options, left, right, left_start_idx, right_start_idx, + range_length); return impl.Compare(); } @@ -875,22 +899,13 @@ class TypeEqualsVisitor { bool result_; }; -bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts, - bool floating_approximate); -bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options, - bool floating_approximate); - class ScalarEqualsVisitor { public: // PRE-CONDITIONS: // - the types are equal // - the scalars are non-null - explicit ScalarEqualsVisitor(const Scalar& right, const EqualOptions& opts, - bool floating_approximate) - : right_(right), - options_(opts), - floating_approximate_(floating_approximate), - result_(false) {} + explicit ScalarEqualsVisitor(const Scalar& right, const EqualOptions& opts) + : right_(right), options_(opts), result_(false) {} Status Visit(const NullScalar& left) { result_ = true; @@ -952,37 +967,37 @@ class ScalarEqualsVisitor { Status Visit(const ListScalar& left) { const auto& right = checked_cast(right_); - result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ArrayEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const LargeListScalar& left) { const auto& right = checked_cast(right_); - result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ArrayEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const ListViewScalar& left) { const auto& right = checked_cast(right_); - result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ArrayEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const LargeListViewScalar& left) { const auto& right = checked_cast(right_); - result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ArrayEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const MapScalar& left) { const auto& right = checked_cast(right_); - result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ArrayEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const FixedSizeListScalar& left) { const auto& right = checked_cast(right_); - result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ArrayEquals(*left.value, *right.value, options_); return Status::OK(); } @@ -994,8 +1009,7 @@ class ScalarEqualsVisitor { } else { bool all_equals = true; for (size_t i = 0; i < left.value.size() && all_equals; i++) { - all_equals &= ScalarEquals(*left.value[i], *right.value[i], options_, - floating_approximate_); + all_equals &= ScalarEquals(*left.value[i], *right.value[i], options_); } result_ = all_equals; } @@ -1005,35 +1019,33 @@ class ScalarEqualsVisitor { Status Visit(const DenseUnionScalar& left) { const auto& right = checked_cast(right_); - result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ScalarEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const SparseUnionScalar& left) { const auto& right = checked_cast(right_); - result_ = ScalarEquals(*left.value[left.child_id], *right.value[right.child_id], - options_, floating_approximate_); + result_ = + ScalarEquals(*left.value[left.child_id], *right.value[right.child_id], options_); return Status::OK(); } Status Visit(const DictionaryScalar& left) { const auto& right = checked_cast(right_); - result_ = ScalarEquals(*left.value.index, *right.value.index, options_, - floating_approximate_) && - ArrayEquals(*left.value.dictionary, *right.value.dictionary, options_, - floating_approximate_); + result_ = ScalarEquals(*left.value.index, *right.value.index, options_) && + ArrayEquals(*left.value.dictionary, *right.value.dictionary, options_); return Status::OK(); } Status Visit(const RunEndEncodedScalar& left) { const auto& right = checked_cast(right_); - result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ScalarEquals(*left.value, *right.value, options_); return Status::OK(); } Status Visit(const ExtensionScalar& left) { const auto& right = checked_cast(right_); - result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_); + result_ = ScalarEquals(*left.value, *right.value, options_); return Status::OK(); } @@ -1048,13 +1060,12 @@ class ScalarEqualsVisitor { auto visitor = [&](auto&& compare_func) { result_ = compare_func(left.value, right.value); }; - VisitFloatingEquality(options_, floating_approximate_, std::move(visitor)); + VisitFloatingEquality(options_, std::move(visitor)); return Status::OK(); } const Scalar& right_; const EqualOptions options_; - const bool floating_approximate_; bool result_; }; @@ -1107,12 +1118,13 @@ Status PrintDiff(const Array& left, const Array& right, std::ostream* os) { return PrintDiff(left, right, 0, left.length(), 0, right.length(), os); } +} // namespace + bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx, int64_t left_end_idx, int64_t right_start_idx, - const EqualOptions& options, bool floating_approximate) { - bool are_equal = - CompareArrayRanges(*left.data(), *right.data(), left_start_idx, left_end_idx, - right_start_idx, options, floating_approximate); + const EqualOptions& options) { + bool are_equal = CompareArrayRanges(*left.data(), *right.data(), left_start_idx, + left_end_idx, right_start_idx, options); if (!are_equal) { ARROW_IGNORE_EXPR(PrintDiff( left, right, left_start_idx, left_end_idx, right_start_idx, @@ -1121,17 +1133,26 @@ bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_ return are_equal; } -bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts, - bool floating_approximate) { +bool ArrayRangeApproxEquals(const Array& left, const Array& right, int64_t left_start_idx, + int64_t left_end_idx, int64_t right_start_idx, + const EqualOptions& options) { + return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx, + options.use_atol(true).use_ulp_distance(false)); +} + +bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts) { if (left.length() != right.length()) { ARROW_IGNORE_EXPR(PrintDiff(left, right, opts.diff_sink())); return false; } - return ArrayRangeEquals(left, right, 0, left.length(), 0, opts, floating_approximate); + return ArrayRangeEquals(left, right, 0, left.length(), 0, opts); +} + +bool ArrayApproxEquals(const Array& left, const Array& right, const EqualOptions& opts) { + return ArrayEquals(left, right, opts.use_atol(true).use_ulp_distance(false)); } -bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options, - bool floating_approximate) { +bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options) { if (&left == &right && IdentityImpliesEquality(*left.type, options)) { return true; } @@ -1144,46 +1165,15 @@ bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& o if (!left.is_valid) { return true; } - ScalarEqualsVisitor visitor(right, options, floating_approximate); + ScalarEqualsVisitor visitor(right, options); auto error = VisitScalarInline(left, &visitor); DCHECK_OK(error); return visitor.result(); } -} // namespace - -bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx, - int64_t left_end_idx, int64_t right_start_idx, - const EqualOptions& options) { - return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx, - options, options.use_atol()); -} - -bool ArrayRangeApproxEquals(const Array& left, const Array& right, int64_t left_start_idx, - int64_t left_end_idx, int64_t right_start_idx, - const EqualOptions& options) { - const bool floating_approximate = true; - return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx, - options, floating_approximate); -} - -bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts) { - return ArrayEquals(left, right, opts, opts.use_atol()); -} - -bool ArrayApproxEquals(const Array& left, const Array& right, const EqualOptions& opts) { - const bool floating_approximate = true; - return ArrayEquals(left, right, opts, floating_approximate); -} - -bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options) { - return ScalarEquals(left, right, options, options.use_atol()); -} - bool ScalarApproxEquals(const Scalar& left, const Scalar& right, const EqualOptions& options) { - const bool floating_approximate = true; - return ScalarEquals(left, right, options, floating_approximate); + return ScalarEquals(left, right, options.use_atol(true).use_ulp_distance(false)); } namespace { @@ -1274,8 +1264,7 @@ bool StridedFloatTensorContentEquals(const int dim_index, int64_t left_offset, } }; - VisitFloatingEquality(opts, /*floating_approximate=*/false, - std::move(visitor)); + VisitFloatingEquality(opts, std::move(visitor)); return result; } @@ -1528,7 +1517,7 @@ namespace { bool DoubleEquals(const double& left, const double& right, const EqualOptions& options) { bool result; auto visitor = [&](auto&& compare_func) { result = compare_func(left, right); }; - VisitFloatingEquality(options, options.use_atol(), std::move(visitor)); + VisitFloatingEquality(options, std::move(visitor)); return result; } diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index 2198495d7d2..c57fe347f2f 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -35,6 +35,7 @@ class SparseTensor; struct Scalar; static constexpr double kDefaultAbsoluteTolerance = 1E-5; +static constexpr uint16_t kDefaultUlpDistance = 4; /// A container of options for equality comparisons class EqualOptions { @@ -66,6 +67,8 @@ class EqualOptions { bool use_atol() const { return use_atol_; } /// Return a new EqualOptions object with the "use_atol" property changed. + /// If both "ulp_distance" and "atol" are specified, the comparison + /// succeeds when either condition is satisfied. EqualOptions use_atol(bool v) const { auto res = EqualOptions(*this); res.use_atol_ = v; @@ -115,6 +118,31 @@ class EqualOptions { return res; } + /// Whether the "ulp_distance" property is used in the comparison. + /// + /// This option only affects the Equals methods + /// and has no effect on ApproxEquals methods. + /// If both "ulp_distance" and "atol" are specified, the comparison + /// succeeds when either condition is satisfied. + bool use_ulp_distance() const { return use_ulp_distance_; } + + /// Return a new EqualOptions object with the "use_ulp_distance" property changed. + EqualOptions use_ulp_distance(bool v) const { + auto res = EqualOptions(*this); + res.use_ulp_distance_ = v; + return res; + } + + /// The ulp distance for approximate comparisons of floating-point values. + /// Note that this option is ignored if "use_ulp_distance" is set to false. + uint16_t ulp_distance() const { return ulp_distance_; } + + /// Return a new EqualOptions object with the "ulp_distance" property changed. + EqualOptions ulp_distance(uint16_t v) { + auto res = EqualOptions(*this); + res.ulp_distance_ = v; + return res; + } /// The ostream to which a diff will be formatted if arrays disagree. /// If this is null (the default) no diff will be formatted. std::ostream* diff_sink() const { return diff_sink_; } @@ -132,11 +160,13 @@ class EqualOptions { protected: double atol_ = kDefaultAbsoluteTolerance; + uint16_t ulp_distance_ = kDefaultUlpDistance; bool nans_equal_ = false; bool signed_zeros_equal_ = true; bool use_atol_ = false; bool use_schema_ = true; bool use_metadata_ = false; + bool use_ulp_distance_ = false; std::ostream* diff_sink_ = NULLPTR; }; @@ -147,8 +177,8 @@ class EqualOptions { ARROW_EXPORT bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& = EqualOptions::Defaults()); -/// Returns true if the arrays are approximately equal. For non-floating point -/// types, this is equivalent to ArrayEquals(left, right) +/// Returns true if the arrays are approximately equal according to the absolute tolerance +/// method. For non-floating point types, this is equivalent to ArrayEquals(left, right) /// /// Note that arrow::ArrayStatistics is not included in the comparison. ARROW_EXPORT bool ArrayApproxEquals(const Array& left, const Array& right, @@ -163,6 +193,7 @@ ARROW_EXPORT bool ArrayRangeEquals(const Array& left, const Array& right, const EqualOptions& = EqualOptions::Defaults()); /// Returns true if indicated equal-length segment of arrays are approximately equal +/// according to the absolute tolerance method. /// /// Note that arrow::ArrayStatistics is not included in the comparison. ARROW_EXPORT bool ArrayRangeApproxEquals(const Array& left, const Array& right, @@ -202,7 +233,8 @@ ARROW_EXPORT bool ArrayStatisticsEquals( ARROW_EXPORT bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options = EqualOptions::Defaults()); -/// Returns true if scalars are approximately equal +/// Returns true if the scalars are approximately equal according to the absolute +/// tolerance method. /// \param[in] left a Scalar /// \param[in] right a Scalar /// \param[in] options comparison options diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index cd113311c86..b22d7dbafc7 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -214,6 +214,7 @@ arrow_util_srcs = [ 'util/time.cc', 'util/tracing.cc', 'util/trie.cc', + 'util/ulp_distance.cc', 'util/union_util.cc', 'util/unreachable.cc', 'util/uri.cc', diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index 4a34e5d13c2..bda7d18e758 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -353,6 +353,11 @@ class TestRealScalar : public ::testing::Test { ASSERT_FALSE(scalar_nan_->Equals(*scalar_val_, options)); ASSERT_TRUE(scalar_nan_->Equals(*scalar_nan_, options)); ASSERT_TRUE(scalar_nan_->Equals(*scalar_other_nan_, options)); + + options = options.nans_equal(false).use_ulp_distance(true); + ASSERT_FALSE(scalar_nan_->Equals(*scalar_val_, options)); + ASSERT_FALSE(scalar_nan_->Equals(*scalar_nan_, options)); + ASSERT_FALSE(scalar_nan_->Equals(*scalar_other_nan_, options)); } void TestSignedZeroEquals() { @@ -365,6 +370,11 @@ class TestRealScalar : public ::testing::Test { ASSERT_FALSE(scalar_zero_->Equals(*scalar_val_, options)); ASSERT_TRUE(scalar_zero_->Equals(*scalar_other_zero_, options)); ASSERT_FALSE(scalar_zero_->Equals(*scalar_neg_zero_, options)); + + options = options.signed_zeros_equal(false).use_ulp_distance(true); + ASSERT_FALSE(scalar_zero_->Equals(*scalar_val_, options)); + ASSERT_TRUE(scalar_zero_->Equals(*scalar_other_zero_, options)); + ASSERT_FALSE(scalar_zero_->Equals(*scalar_neg_zero_, options)); } void TestApproxEquals() { @@ -562,6 +572,139 @@ TYPED_TEST(TestRealScalar, ListViewOf) { this->TestListViewOf(); } TYPED_TEST(TestRealScalar, LargeListViewOf) { this->TestLargeListViewOf(); } +namespace { +template +std::shared_ptr CreateScalar(CType value) { + return std::make_shared(value); +} + +template +bool IsScalarEqual(CType left, CType right, const EqualOptions& options) { + std::shared_ptr scalar_left; + std::shared_ptr scalar_right; + if constexpr (std::is_floating_point_v) { + scalar_left = CreateScalar::ScalarType>(left); + scalar_right = CreateScalar::ScalarType>(right); + } else { + scalar_left = CreateScalar(left); + scalar_right = CreateScalar(right); + } + return scalar_left->Equals(*scalar_right, options); +} + +template +void AssertScalarEquals(CType left, CType right, const EqualOptions& options) { + ASSERT_TRUE(IsScalarEqual(left, right, options)); +} + +template +void AssertScalarNotEquals(CType left, CType right, const EqualOptions& options) { + ASSERT_FALSE(IsScalarEqual(left, right, options)); +} + +} // namespace + +TEST(TestRealScalarUlpDistance, Double) { + auto options = EqualOptions::Defaults().use_ulp_distance(true); + + // Check for different value + AssertScalarEquals(1.0, 1.0000000000000002, options.ulp_distance(1)); + AssertScalarEquals(1.0, 1.0000000000000007, options.ulp_distance(3)); + AssertScalarNotEquals(1.0, 1.0000000000000002, options.ulp_distance(0)); + AssertScalarNotEquals(1.0, 1.0000000000000007, options.ulp_distance(2)); + AssertScalarNotEquals(1.0, 1.0000000000000007, options.ulp_distance(1)); + AssertScalarEquals(123.4567, 123.45670000000015, options.ulp_distance(11)); + AssertScalarNotEquals(123.4567, 123.45670000000015, + options.use_ulp_distance(false).ulp_distance(11)); + AssertScalarNotEquals(123.4567, 123.45670000000015, options.ulp_distance(10)); + + // Left and right have a different exponent but are still very close + AssertScalarEquals(1.0, 0.9999999999999999, options.ulp_distance(1)); + AssertScalarEquals(1.0, 0.9999999999999988, options.ulp_distance(11)); + AssertScalarNotEquals(1.0, 0.9999999999999988, options.ulp_distance(10)); + AssertScalarEquals(1.0000000000000002, 0.9999999999999999, options.ulp_distance(2)); + AssertScalarNotEquals(1.0000000000000002, 0.9999999999999999, options.ulp_distance(1)); + AssertScalarEquals(0.9999999999999988, 1.0000000000000007, options.ulp_distance(14)); + AssertScalarNotEquals(0.9999999999999988, 1.0000000000000007, + options.ulp_distance(14).use_ulp_distance(false)); + AssertScalarNotEquals(0.9999999999999988, 1.0000000000000007, options.ulp_distance(13)); + + // Check for infinity + double max = std::numeric_limits::max(); + double positive_infinity = std::numeric_limits::infinity(); + double negative_infinity = -1 * std::numeric_limits::infinity(); + AssertScalarNotEquals(max, positive_infinity, options.ulp_distance(0)); + AssertScalarEquals(max, positive_infinity, options.ulp_distance(1)); + AssertScalarNotEquals(max, positive_infinity, + options.use_ulp_distance(false).ulp_distance(1)); + AssertScalarNotEquals(positive_infinity, negative_infinity, options); +} + +TEST(TestRealScalarUlpDistance, Float) { + auto options = EqualOptions::Defaults().use_ulp_distance(true); + + // Check for different value + AssertScalarEquals(1.0f, 1.0000001f, options.ulp_distance(1)); + AssertScalarEquals(1.0f, 1.0000013f, options.ulp_distance(11)); + AssertScalarNotEquals(1.0f, 1.0000001f, options.ulp_distance(0)); + AssertScalarNotEquals(1.0f, 1.0000013f, options.ulp_distance(10)); + AssertScalarEquals(123.456f, 123.456085f, options.ulp_distance(11)); + AssertScalarNotEquals(123.456f, 123.456085f, options.ulp_distance(10)); + + // Left and right have a different exponent but are still very close + AssertScalarEquals(1.0f, 0.99999994f, options.ulp_distance(1)); + AssertScalarEquals(1.0f, 0.99999934f, options.ulp_distance(11)); + AssertScalarNotEquals(1.0f, 0.99999934f, options.ulp_distance(10)); + AssertScalarEquals(1.0000001f, 0.99999994f, options.ulp_distance(2)); + AssertScalarNotEquals(1.0000001f, 0.99999994f, options.ulp_distance(1)); + AssertScalarEquals(1.0000013f, 0.99999934f, options.ulp_distance(22)); + AssertScalarNotEquals(1.0000013f, 0.99999934f, options.ulp_distance(21)); + + // Check for infinity + float max = std::numeric_limits::max(); + float positive_infinity = std::numeric_limits::infinity(); + float negative_infinity = -1 * std::numeric_limits::infinity(); + AssertScalarNotEquals(max, positive_infinity, options.ulp_distance(0)); + AssertScalarEquals(max, positive_infinity, options.ulp_distance(1)); + AssertScalarNotEquals(max, positive_infinity, + options.use_ulp_distance(false).ulp_distance(1)); + AssertScalarNotEquals(positive_infinity, negative_infinity, options); +} + +TEST(TestRealScalarUlpDistance, HalfFloat) { + auto options = EqualOptions::Defaults().use_ulp_distance(true); + + // Check for different value + AssertScalarEquals(Float16(1.0f), Float16(1.00097656f), options.ulp_distance(1)); + AssertScalarEquals(Float16(1.0f), Float16(1.01074219f), options.ulp_distance(11)); + AssertScalarNotEquals(Float16(1.0f), Float16(1.00097656f), options.ulp_distance(0)); + AssertScalarNotEquals(Float16(1.0f), Float16(1.01074219f), options.ulp_distance(10)); + AssertScalarNotEquals(Float16(123.456f), Float16(124.143501f), + options.ulp_distance(10)); + + // Left and right have a different exponent but are still very close + AssertScalarEquals(Float16(1.0f), Float16(0.999511719f), options.ulp_distance(1)); + AssertScalarEquals(Float16(1.0f), Float16(0.994628906f), options.ulp_distance(11)); + AssertScalarNotEquals(Float16(1.0f), Float16(0.994628906f), options.ulp_distance(10)); + AssertScalarEquals(Float16(1.00097656), Float16(0.999511719f), options.ulp_distance(2)); + AssertScalarNotEquals(Float16(1.00097656), Float16(0.999511719f), + options.ulp_distance(1)); + AssertScalarEquals(Float16(1.01074219f), Float16(0.994628906f), + options.ulp_distance(22)); + AssertScalarNotEquals(Float16(1.01074219f), Float16(0.994628906f), + options.ulp_distance(21)); + + // Check for infinity + Float16 max = std::numeric_limits::max(); + Float16 positive_infinity = std::numeric_limits::infinity(); + Float16 negative_infinity = -std::numeric_limits::infinity(); + AssertScalarNotEquals(max, positive_infinity, options.ulp_distance(0)); + AssertScalarEquals(max, positive_infinity, options.ulp_distance(1)); + AssertScalarNotEquals(max, positive_infinity, + options.use_ulp_distance(false).ulp_distance(1)); + AssertScalarNotEquals(positive_infinity, negative_infinity, options); +} + template class TestDecimalScalar : public ::testing::Test { public: diff --git a/cpp/src/arrow/testing/math.cc b/cpp/src/arrow/testing/math.cc index 79f7ec3033c..b5325fabf7c 100644 --- a/cpp/src/arrow/testing/math.cc +++ b/cpp/src/arrow/testing/math.cc @@ -25,64 +25,13 @@ #include #include "arrow/util/float16.h" -#include "arrow/util/logging_internal.h" -#include "arrow/util/ubsan.h" +#include "arrow/util/ulp_distance.h" namespace arrow { namespace { template -struct FloatToUInt; - -template <> -struct FloatToUInt { - using Type = uint64_t; -}; - -template <> -struct FloatToUInt { - using Type = uint32_t; -}; - -template <> -struct FloatToUInt { - using Type = uint16_t; -}; - -template -struct UlpDistanceUtil { - public: - using UIntType = typename FloatToUInt::Type; - static constexpr UIntType kNumberOfBits = sizeof(Float) * 8; - static constexpr UIntType kSignMask = static_cast(1) << (kNumberOfBits - 1); - - // This implementation is inspired by: - // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ - static UIntType UlpDistance(Float left, Float right) { - auto unsigned_left = util::SafeCopy(left); - auto unsigned_right = util::SafeCopy(right); - auto biased_left = ConvertSignAndMagnitudeToBiased(unsigned_left); - auto biased_right = ConvertSignAndMagnitudeToBiased(unsigned_right); - if (biased_left > biased_right) { - std::swap(biased_left, biased_right); - } - return biased_right - biased_left; - } - - private: - // Source reference (GoogleTest): - // https://github.com/google/googletest/blob/1b96fa13f549387b7549cc89e1a785cf143a1a50/googletest/include/gtest/internal/gtest-internal.h#L345-L368 - static UIntType ConvertSignAndMagnitudeToBiased(UIntType value) { - if (value & kSignMask) { - return ~value + 1; - } else { - return value | kSignMask; - } - } -}; - -template -bool WithinUlpGeneric(Float left, Float right, int n_ulps) { +bool WithinUlpGeneric(Float left, Float right, uint16_t n_ulps) { if constexpr (std::is_same_v) { if (left.is_nan() || right.is_nan()) { return left.is_nan() == right.is_nan(); @@ -102,13 +51,11 @@ bool WithinUlpGeneric(Float left, Float right, int n_ulps) { return left == right; } - DCHECK_GE(n_ulps, 1); - return UlpDistanceUtil::UlpDistance(left, right) <= - static_cast(n_ulps); + return UlpDistance(left, right, n_ulps); } template -void AssertWithinUlpGeneric(Float left, Float right, int n_ulps) { +void AssertWithinUlpGeneric(Float left, Float right, uint16_t n_ulps) { if (!WithinUlpGeneric(left, right, n_ulps)) { FAIL() << left << " and " << right << " are not within " << n_ulps << " ulps"; } @@ -116,27 +63,27 @@ void AssertWithinUlpGeneric(Float left, Float right, int n_ulps) { } // namespace -bool WithinUlp(util::Float16 left, util::Float16 right, int n_ulps) { +bool WithinUlp(util::Float16 left, util::Float16 right, uint16_t n_ulps) { return WithinUlpGeneric(left, right, n_ulps); } -bool WithinUlp(float left, float right, int n_ulps) { +bool WithinUlp(float left, float right, uint16_t n_ulps) { return WithinUlpGeneric(left, right, n_ulps); } -bool WithinUlp(double left, double right, int n_ulps) { +bool WithinUlp(double left, double right, uint16_t n_ulps) { return WithinUlpGeneric(left, right, n_ulps); } -void AssertWithinUlp(util::Float16 left, util::Float16 right, int n_ulps) { +void AssertWithinUlp(util::Float16 left, util::Float16 right, uint16_t n_ulps) { AssertWithinUlpGeneric(left, right, n_ulps); } -void AssertWithinUlp(float left, float right, int n_ulps) { +void AssertWithinUlp(float left, float right, uint16_t n_ulps) { AssertWithinUlpGeneric(left, right, n_ulps); } -void AssertWithinUlp(double left, double right, int n_ulps) { +void AssertWithinUlp(double left, double right, uint16_t n_ulps) { AssertWithinUlpGeneric(left, right, n_ulps); } diff --git a/cpp/src/arrow/testing/math.h b/cpp/src/arrow/testing/math.h index 1e829e0d616..0fb73a2abbf 100644 --- a/cpp/src/arrow/testing/math.h +++ b/cpp/src/arrow/testing/math.h @@ -23,17 +23,17 @@ namespace arrow { ARROW_TESTING_EXPORT -bool WithinUlp(util::Float16 left, util::Float16 right, int n_ulps); +bool WithinUlp(util::Float16 left, util::Float16 right, uint16_t n_ulps); ARROW_TESTING_EXPORT -bool WithinUlp(float left, float right, int n_ulps); +bool WithinUlp(float left, float right, uint16_t n_ulps); ARROW_TESTING_EXPORT -bool WithinUlp(double left, double right, int n_ulps); +bool WithinUlp(double left, double right, uint16_t n_ulps); ARROW_TESTING_EXPORT -void AssertWithinUlp(util::Float16 left, util::Float16 right, int n_ulps); +void AssertWithinUlp(util::Float16 left, util::Float16 right, uint16_t n_ulps); ARROW_TESTING_EXPORT -void AssertWithinUlp(float left, float right, int n_ulps); +void AssertWithinUlp(float left, float right, uint16_t n_ulps); ARROW_TESTING_EXPORT -void AssertWithinUlp(double left, double right, int n_ulps); +void AssertWithinUlp(double left, double right, uint16_t n_ulps); } // namespace arrow diff --git a/cpp/src/arrow/util/ulp_distance.cc b/cpp/src/arrow/util/ulp_distance.cc new file mode 100644 index 00000000000..04cbd88f3db --- /dev/null +++ b/cpp/src/arrow/util/ulp_distance.cc @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/ulp_distance.h" + +#include +#include +#include +#include +#include + +#include "arrow/util/float16.h" + +namespace arrow { +namespace { + +template +struct FloatToUInt; + +template <> +struct FloatToUInt { + using Type = uint64_t; +}; + +template <> +struct FloatToUInt { + using Type = uint32_t; +}; + +template <> +struct FloatToUInt { + using Type = uint16_t; +}; + +template +struct UlpDistanceUtil { + public: + using UIntType = FloatToUInt::Type; + static constexpr UIntType kNumberOfBits = sizeof(Float) * 8; + static constexpr UIntType kSignMask = static_cast(1) << (kNumberOfBits - 1); + + // This implementation is inspired by: + // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + static UIntType UlpDistance(Float left, Float right) { + auto unsigned_left = std::bit_cast(left); + auto unsigned_right = std::bit_cast(right); + auto biased_left = ConvertSignAndMagnitudeToBiased(unsigned_left); + auto biased_right = ConvertSignAndMagnitudeToBiased(unsigned_right); + if (biased_left > biased_right) { + std::swap(biased_left, biased_right); + } + + // Handling of NaN should be determined by the comparison policy. + return biased_right - biased_left; + } + + private: + // Source reference (GoogleTest): + // https://github.com/google/googletest/blob/1b96fa13f549387b7549cc89e1a785cf143a1a50/googletest/include/gtest/internal/gtest-internal.h#L345-L368 + static UIntType ConvertSignAndMagnitudeToBiased(UIntType value) { + if (value & kSignMask) { + return ~value + 1; + } else { + return value | kSignMask; + } + } +}; + +template +bool UlpDistanceGeneric(Float left, Float right, uint16_t n_ulps) { + return UlpDistanceUtil::UlpDistance(left, right) <= + static_cast(n_ulps); +} + +} // namespace + +bool UlpDistance(util::Float16 left, util::Float16 right, uint16_t n_ulps) { + return UlpDistanceGeneric(left, right, n_ulps); +} + +bool UlpDistance(float left, float right, uint16_t n_ulps) { + return UlpDistanceGeneric(left, right, n_ulps); +} + +bool UlpDistance(double left, double right, uint16_t n_ulps) { + return UlpDistanceGeneric(left, right, n_ulps); +} + +} // namespace arrow diff --git a/cpp/src/arrow/util/ulp_distance.h b/cpp/src/arrow/util/ulp_distance.h new file mode 100644 index 00000000000..23847189dd0 --- /dev/null +++ b/cpp/src/arrow/util/ulp_distance.h @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/type_fwd.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +ARROW_EXPORT +bool UlpDistance(util::Float16 left, util::Float16 right, uint16_t n_ulps); + +ARROW_EXPORT +bool UlpDistance(float left, float right, uint16_t n_ulps); + +ARROW_EXPORT +bool UlpDistance(double left, double right, uint16_t n_ulps); +} // namespace arrow From 9cd01476f09d86009af409cc8af3bb0dc3bd3630 Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Mon, 16 Feb 2026 11:02:13 +0330 Subject: [PATCH 2/2] Fix a minor issue in test --- cpp/src/arrow/scalar_test.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index bda7d18e758..4af607d0b06 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -580,15 +580,10 @@ std::shared_ptr CreateScalar(CType value) { template bool IsScalarEqual(CType left, CType right, const EqualOptions& options) { - std::shared_ptr scalar_left; - std::shared_ptr scalar_right; - if constexpr (std::is_floating_point_v) { - scalar_left = CreateScalar::ScalarType>(left); - scalar_right = CreateScalar::ScalarType>(right); - } else { - scalar_left = CreateScalar(left); - scalar_right = CreateScalar(right); - } + std::shared_ptr scalar_left = + CreateScalar::ScalarType>(left); + std::shared_ptr scalar_right = + CreateScalar::ScalarType>(right); return scalar_left->Equals(*scalar_right, options); }