@@ -76,16 +76,16 @@ template <typename T> class PandasVector {
7676 PandasVector<T>(PandasVector<T> &&) = default ;
7777 PandasVector<T> &operator =(PandasVector<T> &&) = default ;
7878
79- size_t __len__ () const { return vec_.size (); }
79+ auto __len__ () const { return vec_.size (); }
8080
81- void Append (const T &value) {
81+ auto Append (const T &value) -> void {
8282 if (external_view_exists_) {
8383 throw std::domain_error (" external reference but Vector.resize() needed" );
8484 }
8585 vec_.emplace_back (value);
8686 }
8787
88- void Extend (const nb::ndarray<const T, nb::ndim<1 >> &values) {
88+ auto Extend (const nb::ndarray<const T, nb::ndim<1 >> &values) -> void {
8989 if (external_view_exists_) {
9090 throw std::domain_error (" external reference but Vector.resize() needed" );
9191 }
@@ -110,19 +110,21 @@ template <typename T> class PandasVector {
110110 bool external_view_exists_;
111111};
112112
113+ using pd_kh_int_t = uint32_t ;
114+
113115template <typename T, bool IsMasked> class PandasHashTable {
114116public:
115117 explicit PandasHashTable<T, IsMasked>() = default ;
116- explicit PandasHashTable<T, IsMasked>(size_t new_size) {
118+ explicit PandasHashTable<T, IsMasked>(pd_kh_int_t new_size) {
117119 // historically pandas would take a size_hint constructor and pass
118120 // it to the hash map. However, klib has no public method on the map
119121 // to resize from a hint (only on sets) so we silently discard
120122 hash_map_.resize (new_size);
121123 }
122124
123- auto __len__ () const noexcept -> size_t { return hash_map_.size (); }
125+ auto __len__ () const noexcept { return hash_map_.size (); }
124126
125- auto __contains__ (nb::object key) const noexcept -> bool {
127+ auto __contains__ (nb::object key) const noexcept {
126128 if constexpr (IsMasked) {
127129 if (key.is_none ()) {
128130 return -1 != na_position_;
@@ -133,8 +135,8 @@ template <typename T, bool IsMasked> class PandasHashTable {
133135 return hash_map_.get (ckey) != hash_map_.end ();
134136 }
135137
136- auto SizeOf () const noexcept -> size_t {
137- constexpr size_t overhead = 4 * sizeof (uint32_t ) + 3 * sizeof (uint32_t *);
138+ auto SizeOf () const noexcept {
139+ constexpr auto overhead = 4 * sizeof (uint32_t ) + 3 * sizeof (uint32_t *);
138140 const auto for_flags =
139141 std::max (1U , hash_map_.n_buckets () >> 5 ) * sizeof (uint32_t );
140142 const auto for_pairs =
@@ -158,7 +160,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
158160 // / Returns
159161 // / -------
160162 // / The position of the requested integer.
161- auto GetItem (T key) -> int64_t {
163+ auto GetItem (T key) {
162164 const auto k = hash_map_.get (key);
163165 if (k != hash_map_.end ()) {
164166 return hash_map_.value (k);
@@ -173,7 +175,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
173175 // / Returns
174176 // / -------
175177 // / The position of the last na value.
176- auto GetNA () const -> int64_t {
178+ auto GetNA () const {
177179 // TODO: missing NotImplementedError for mask, although this should really
178180 // just be templated out
179181 if (na_position_ == -1 ) {
@@ -191,14 +193,14 @@ template <typename T, bool IsMasked> class PandasHashTable {
191193 }
192194 }
193195
194- auto MapKeysToValues (const nb::ndarray< const T, nb::ndim< 1 >> &keys,
195- const nb::ndarray<const int64_t , nb::ndim<1 >> &values) noexcept
196- -> void {
196+ auto MapKeysToValues (
197+ const nb::ndarray<const T , nb::ndim<1 >> &keys,
198+ const nb::ndarray< const int64_t , nb::ndim< 1 >> &values) noexcept -> void {
197199 nb::call_guard<nb::gil_scoped_release>();
198200 const auto keys_v = keys.view ();
199201 const auto values_v = values.view ();
200202
201- for (size_t i = 0 ; i < values_v.shape (0 ); i++) {
203+ for (decltype (values_v. shape ( 0 )) i = 0 ; i < values_v.shape (0 ); i++) {
202204 hash_map_[keys_v (i)] = values_v (i);
203205 }
204206 }
@@ -218,7 +220,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
218220 nb::cast<nb::ndarray<const uint8_t , nb::ndim<1 >>>(mask);
219221 const auto mask_v = mask_base.view ();
220222 auto na_position = na_position_; // pandas uses int8_t here - why?
221- for (size_t i = 0 ; i < values_v.shape (0 ); i++) {
223+ for (decltype (values_v. shape ( 0 )) i = 0 ; i < values_v.shape (0 ); i++) {
222224 if (mask_v (i)) {
223225 na_position = i;
224226 } else {
@@ -227,7 +229,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
227229 }
228230 na_position_ = na_position;
229231 } else {
230- for (size_t i = 0 ; i < values_v.shape (0 ); i++) {
232+ for (decltype (values_v. shape ( 0 )) i = 0 ; i < values_v.shape (0 ); i++) {
231233 const auto key = values_v (i);
232234 hash_map_[key] = i;
233235 }
@@ -243,7 +245,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
243245 }
244246
245247 nb::call_guard<nb::gil_scoped_release>();
246- const size_t n = values.shape (0 );
248+ const auto n = values.shape (0 );
247249 auto *locs = new Py_ssize_t[n];
248250 const auto values_v = values.view ();
249251
@@ -253,7 +255,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
253255 const auto mask_base =
254256 nb::cast<nb::ndarray<const uint8_t , nb::ndim<1 >>>(mask);
255257 const auto mask_v = mask_base.view ();
256- for (size_t i = 0 ; i < n; i++) {
258+ for (decltype (values. shape ( 0 )) i = 0 ; i < n; i++) {
257259 if (mask_v (i)) {
258260 locs[i] = na_position_;
259261 } else {
@@ -267,7 +269,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
267269 }
268270 }
269271 } else {
270- for (size_t i = 0 ; i < n; i++) {
272+ for (decltype (values. shape (n)) i = 0 ; i < n; i++) {
271273 const auto val = values_v (i);
272274 const auto position = hash_map_.get (val);
273275 if (position == hash_map_.end ()) {
@@ -283,7 +285,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
283285
284286 const size_t shape[1 ] = {n};
285287 return nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1 >>(locs, 1 , shape,
286- owner);
288+ owner);
287289 }
288290
289291 // / Mutates uniques argument
@@ -327,7 +329,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
327329 auto *labels = new Py_ssize_t[n];
328330 Py_ssize_t count = 0 ;
329331
330- for (size_t i = 0 ; i < n; i++) {
332+ for (decltype (values. shape ( 0 )) i = 0 ; i < n; i++) {
331333 const auto val = values_v (i);
332334
333335 // specific for groupby
@@ -350,22 +352,21 @@ template <typename T, bool IsMasked> class PandasHashTable {
350352 }
351353
352354 nb::gil_scoped_acquire ();
355+
356+ const size_t shape[1 ] = {n};
353357 nb::capsule owner (labels, [](void *p) noexcept { delete[] (size_t *) p; });
354358 const auto labels_arr =
355- nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1 >>(labels,
356- {
357- n,
358- },
359- owner);
359+ nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1 >>(labels, 1 , shape,
360+ owner);
360361 return std::make_tuple (labels_arr, uniques.ToNdArray ());
361362 }
362363
363364private:
364365 auto UniqueInternal (const nb::ndarray<const T, nb::ndim<1 >> &values,
365366 PandasVector<T> &uniques, Py_ssize_t count_prior = 0 ,
366367 [[maybe_unused]] Py_ssize_t na_sentinel = -1 ,
367- [[maybe_unused]] nb::object na_value = nb::none(), bool ignore_na = false,
368- nb::object mask_obj = nb::none(),
368+ [[maybe_unused]] nb::object na_value = nb::none(),
369+ bool ignore_na = false, nb::object mask_obj = nb::none(),
369370 bool return_inverse = false, bool use_result_mask = false)
370371 -> nb::object {
371372 if (use_result_mask && return_inverse) {
@@ -438,7 +439,8 @@ template <typename T, bool IsMasked> class PandasHashTable {
438439 template <bool IgnoreNA, bool UseNAValue>
439440 auto UniqueWithInverse (const nb::ndarray<const T, nb::ndim<1 >> &values,
440441 PandasVector<T> &uniques, Py_ssize_t count_prior,
441- [[maybe_unused]] Py_ssize_t na_sentinel, [[maybe_unused]] T na_value,
442+ [[maybe_unused]] Py_ssize_t na_sentinel,
443+ [[maybe_unused]] T na_value,
442444 [[maybe_unused]] nb::object mask_obj = nb::none())
443445 -> nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>> {
444446 if constexpr (IsMasked) {
@@ -461,7 +463,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
461463 nb::call_guard<nb::gil_scoped_release>();
462464 const auto mask_v = mask.view ();
463465
464- for (size_t i = 0 ; i < n; i++) {
466+ for (decltype (values_v. shape ( 0 )) i = 0 ; i < n; i++) {
465467 if constexpr (IgnoreNA) {
466468 if (mask_v (i)) {
467469 labels[i] = na_sentinel;
@@ -486,7 +488,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
486488 }
487489 } else {
488490 nb::call_guard<nb::gil_scoped_release>();
489- for (size_t i = 0 ; i < n; i++) {
491+ for (decltype (values_v. shape ( 0 )) i = 0 ; i < n; i++) {
490492 const auto val = values_v (i);
491493
492494 if constexpr (IgnoreNA) {
@@ -519,13 +521,10 @@ template <typename T, bool IsMasked> class PandasHashTable {
519521 }
520522 }
521523
524+ const size_t shape[1 ] = {n};
522525 nb::capsule owner (labels, [](void *p) noexcept { delete[] (size_t *) p; });
523-
524- return nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1 >>(labels,
525- {
526- n,
527- },
528- owner);
526+ return nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1 >>(labels, 1 ,
527+ shape, owner);
529528 }
530529
531530 template <bool IgnoreNA, bool UseNAValue>
@@ -553,7 +552,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
553552 const auto mask_v = mask.view ();
554553
555554 bool seen_na = false ;
556- for (size_t i = 0 ; i < n; i++) {
555+ for (decltype (values. shape ( 0 )) i = 0 ; i < n; i++) {
557556 const auto val = values_v (i);
558557
559558 if constexpr (IgnoreNA) {
@@ -595,7 +594,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
595594 }
596595 } else {
597596 nb::call_guard<nb::gil_scoped_release>();
598- for (size_t i = 0 ; i < n; i++) {
597+ for (decltype (values. shape ( 0 )) i = 0 ; i < n; i++) {
599598 const auto val = values_v (i);
600599 auto k = hash_map_.get (val);
601600 if (k == hash_map_.end ()) {
@@ -631,7 +630,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
631630 }
632631 nb::call_guard<nb::gil_scoped_release>();
633632
634- for (size_t i = 0 ; i < n; i++) {
633+ for (decltype (values. shape ( 0 )) i = 0 ; i < n; i++) {
635634 if constexpr (IgnoreNA) {
636635 // TODO: current pandas code is a bit messy here...
637636 // labels[i] = na_sentinel
@@ -648,7 +647,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
648647 }
649648 } else {
650649 nb::call_guard<nb::gil_scoped_release>();
651- for (size_t i = 0 ; i < n; i++) {
650+ for (decltype (values. shape ( 0 )) i = 0 ; i < n; i++) {
652651 const auto val = values_v (i);
653652 auto k = hash_map_.get (val);
654653 if (k == hash_map_.end ()) {
@@ -683,7 +682,7 @@ using namespace nb::literals;
683682 do { \
684683 nb::class_<PandasHashTable<TYPE, MASKED>>(m, NAME) \
685684 .def (nb::init<>()) \
686- .def (nb::init<size_t >(), " size_hint" _a) \
685+ .def (nb::init<pd_kh_int_t >(), " size_hint" _a) \
687686 .def (" __len__" , &PandasHashTable<TYPE, MASKED>::__len__) \
688687 .def (" __contains__" , &PandasHashTable<TYPE, MASKED>::__contains__) \
689688 .def (" sizeof" , &PandasHashTable<TYPE, MASKED>::SizeOf) \
0 commit comments