Skip to content

Commit e306e2a

Browse files
committed
Try templated khint_t
1 parent 76bed79 commit e306e2a

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

pandas/_libs/new_vector.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,22 @@ namespace nb = nanobind;
2222
/// and support arbitrary types
2323
///
2424
template <typename T> struct PandasHashFunction {
25-
size_t operator()(const T &value) const { return std::hash<T>()(value); }
25+
auto operator()(const T &value) const { return std::hash<T>()(value); }
2626
};
2727

2828
template <>
29-
size_t PandasHashFunction<float>::operator()(const float &value) const {
29+
auto PandasHashFunction<float>::operator()(const float &value) const {
3030
if (std::isnan(value)) {
31-
return 0;
31+
return static_cast<decltype(std::hash<float>()(value))>(0);
3232
}
3333

3434
return std::hash<float>()(value);
3535
}
3636

3737
template <>
38-
size_t PandasHashFunction<double>::operator()(const double &value) const {
38+
auto PandasHashFunction<double>::operator()(const double &value) const {
3939
if (std::isnan(value)) {
40-
return 0;
40+
return static_cast<decltype(std::hash<double>()(value))>(0);
4141
}
4242

4343
return std::hash<double>()(value);
@@ -110,16 +110,13 @@ template <typename T> class PandasVector {
110110
bool external_view_exists_;
111111
};
112112

113-
using pd_kh_int_t = uint32_t;
114-
115113
template <typename T, bool IsMasked> class PandasHashTable {
116114
public:
115+
using HashValueT = decltype(PandasHashFunction<T>()(T()));
117116
explicit PandasHashTable<T, IsMasked>() = default;
118-
explicit PandasHashTable<T, IsMasked>(pd_kh_int_t new_size) {
119-
// historically pandas would take a size_hint constructor and pass
120-
// it to the hash map. However, klib has no public method on the map
121-
// to resize from a hint (only on sets) so we silently discard
122-
hash_map_.resize(new_size);
117+
explicit PandasHashTable<T, IsMasked>(size_t new_size) {
118+
// TODO: C++20 std::in_range would be great to safely check cast
119+
hash_map_.resize(static_cast<HashValueT>(new_size));
123120
}
124121

125122
auto __len__() const noexcept { return hash_map_.size(); }
@@ -138,7 +135,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
138135
auto SizeOf() const noexcept {
139136
constexpr auto overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t *);
140137
const auto for_flags =
141-
std::max(1U, hash_map_.n_buckets() >> 5) * sizeof(uint32_t);
138+
std::max(static_cast<HashValueT>(1), hash_map_.n_buckets() >> 5) * sizeof(uint32_t);
142139
const auto for_pairs =
143140
hash_map_.n_buckets() * (sizeof(T) + sizeof(Py_ssize_t));
144141

@@ -661,7 +658,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
661658
return;
662659
}
663660

664-
klib::KHashMap<T, size_t, PandasHashFunction<T>, PandasHashEquality<T>, pd_kh_int_t>
661+
klib::KHashMap<T, size_t, PandasHashFunction<T>, PandasHashEquality<T>, HashValueT>
665662
hash_map_;
666663
Py_ssize_t na_position_ = -1;
667664
};
@@ -682,7 +679,7 @@ using namespace nb::literals;
682679
do { \
683680
nb::class_<PandasHashTable<TYPE, MASKED>>(m, NAME) \
684681
.def(nb::init<>()) \
685-
.def(nb::init<pd_kh_int_t>(), "size_hint"_a) \
682+
.def(nb::init<size_t>(), "size_hint"_a) \
686683
.def("__len__", &PandasHashTable<TYPE, MASKED>::__len__) \
687684
.def("__contains__", &PandasHashTable<TYPE, MASKED>::__contains__) \
688685
.def("sizeof", &PandasHashTable<TYPE, MASKED>::SizeOf) \

pandas/tests/libs/test_hashtable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def test_vector_resize(
311311
)
312312
def test_hashtable_large_sizehint(self, hashtable):
313313
# GH#22729 smoketest for not raising when passing a large size_hint
314-
size_hint = np.iinfo(np.uint32).max
314+
size_hint = np.iinfo(np.uint32).max + 1
315315
hashtable(size_hint=size_hint)
316316

317317

0 commit comments

Comments
 (0)