Skip to content

Commit 4235d15

Browse files
committed
use more auto
1 parent 9a0e09b commit 4235d15

File tree

1 file changed

+41
-42
lines changed

1 file changed

+41
-42
lines changed

pandas/_libs/new_vector.cpp

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,16 @@ template <typename T> class PandasVector {
7676
PandasVector<T>(PandasVector<T> &&) = default;
7777
PandasVector<T> &operator=(PandasVector<T> &&) = default;
7878

79-
size_t __len__() const { return vec_.size(); }
79+
auto __len__() const { return vec_.size(); }
8080

81-
void Append(const T &value) {
81+
auto Append(const T &value) -> void {
8282
if (external_view_exists_) {
8383
throw std::domain_error("external reference but Vector.resize() needed");
8484
}
8585
vec_.emplace_back(value);
8686
}
8787

88-
void Extend(const nb::ndarray<const T, nb::ndim<1>> &values) {
88+
auto Extend(const nb::ndarray<const T, nb::ndim<1>> &values) -> void {
8989
if (external_view_exists_) {
9090
throw std::domain_error("external reference but Vector.resize() needed");
9191
}
@@ -110,19 +110,21 @@ template <typename T> class PandasVector {
110110
bool external_view_exists_;
111111
};
112112

113+
using pd_kh_int_t = uint32_t;
114+
113115
template <typename T, bool IsMasked> class PandasHashTable {
114116
public:
115117
explicit PandasHashTable<T, IsMasked>() = default;
116-
explicit PandasHashTable<T, IsMasked>(size_t new_size) {
118+
explicit PandasHashTable<T, IsMasked>(pd_kh_int_t new_size) {
117119
// historically pandas would take a size_hint constructor and pass
118120
// it to the hash map. However, klib has no public method on the map
119121
// to resize from a hint (only on sets) so we silently discard
120122
hash_map_.resize(new_size);
121123
}
122124

123-
auto __len__() const noexcept -> size_t { return hash_map_.size(); }
125+
auto __len__() const noexcept { return hash_map_.size(); }
124126

125-
auto __contains__(nb::object key) const noexcept -> bool {
127+
auto __contains__(nb::object key) const noexcept {
126128
if constexpr (IsMasked) {
127129
if (key.is_none()) {
128130
return -1 != na_position_;
@@ -133,8 +135,8 @@ template <typename T, bool IsMasked> class PandasHashTable {
133135
return hash_map_.get(ckey) != hash_map_.end();
134136
}
135137

136-
auto SizeOf() const noexcept -> size_t {
137-
constexpr size_t overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t *);
138+
auto SizeOf() const noexcept {
139+
constexpr auto overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t *);
138140
const auto for_flags =
139141
std::max(1U, hash_map_.n_buckets() >> 5) * sizeof(uint32_t);
140142
const auto for_pairs =
@@ -158,7 +160,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
158160
/// Returns
159161
/// -------
160162
/// The position of the requested integer.
161-
auto GetItem(T key) -> int64_t {
163+
auto GetItem(T key) {
162164
const auto k = hash_map_.get(key);
163165
if (k != hash_map_.end()) {
164166
return hash_map_.value(k);
@@ -173,7 +175,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
173175
/// Returns
174176
/// -------
175177
/// The position of the last na value.
176-
auto GetNA() const -> int64_t {
178+
auto GetNA() const {
177179
// TODO: missing NotImplementedError for mask, although this should really
178180
// just be templated out
179181
if (na_position_ == -1) {
@@ -191,14 +193,14 @@ template <typename T, bool IsMasked> class PandasHashTable {
191193
}
192194
}
193195

194-
auto MapKeysToValues(const nb::ndarray<const T, nb::ndim<1>> &keys,
195-
const nb::ndarray<const int64_t, nb::ndim<1>> &values) noexcept
196-
-> void {
196+
auto MapKeysToValues(
197+
const nb::ndarray<const T, nb::ndim<1>> &keys,
198+
const nb::ndarray<const int64_t, nb::ndim<1>> &values) noexcept -> void {
197199
nb::call_guard<nb::gil_scoped_release>();
198200
const auto keys_v = keys.view();
199201
const auto values_v = values.view();
200202

201-
for (size_t i = 0; i < values_v.shape(0); i++) {
203+
for (decltype(values_v.shape(0)) i = 0; i < values_v.shape(0); i++) {
202204
hash_map_[keys_v(i)] = values_v(i);
203205
}
204206
}
@@ -218,7 +220,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
218220
nb::cast<nb::ndarray<const uint8_t, nb::ndim<1>>>(mask);
219221
const auto mask_v = mask_base.view();
220222
auto na_position = na_position_; // pandas uses int8_t here - why?
221-
for (size_t i = 0; i < values_v.shape(0); i++) {
223+
for (decltype(values_v.shape(0)) i = 0; i < values_v.shape(0); i++) {
222224
if (mask_v(i)) {
223225
na_position = i;
224226
} else {
@@ -227,7 +229,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
227229
}
228230
na_position_ = na_position;
229231
} else {
230-
for (size_t i = 0; i < values_v.shape(0); i++) {
232+
for (decltype(values_v.shape(0)) i = 0; i < values_v.shape(0); i++) {
231233
const auto key = values_v(i);
232234
hash_map_[key] = i;
233235
}
@@ -243,7 +245,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
243245
}
244246

245247
nb::call_guard<nb::gil_scoped_release>();
246-
const size_t n = values.shape(0);
248+
const auto n = values.shape(0);
247249
auto *locs = new Py_ssize_t[n];
248250
const auto values_v = values.view();
249251

@@ -253,7 +255,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
253255
const auto mask_base =
254256
nb::cast<nb::ndarray<const uint8_t, nb::ndim<1>>>(mask);
255257
const auto mask_v = mask_base.view();
256-
for (size_t i = 0; i < n; i++) {
258+
for (decltype(values.shape(0)) i = 0; i < n; i++) {
257259
if (mask_v(i)) {
258260
locs[i] = na_position_;
259261
} else {
@@ -267,7 +269,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
267269
}
268270
}
269271
} else {
270-
for (size_t i = 0; i < n; i++) {
272+
for (decltype(values.shape(n)) i = 0; i < n; i++) {
271273
const auto val = values_v(i);
272274
const auto position = hash_map_.get(val);
273275
if (position == hash_map_.end()) {
@@ -283,7 +285,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
283285

284286
const size_t shape[1] = {n};
285287
return nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>>(locs, 1, shape,
286-
owner);
288+
owner);
287289
}
288290

289291
/// Mutates uniques argument
@@ -327,7 +329,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
327329
auto *labels = new Py_ssize_t[n];
328330
Py_ssize_t count = 0;
329331

330-
for (size_t i = 0; i < n; i++) {
332+
for (decltype(values.shape(0)) i = 0; i < n; i++) {
331333
const auto val = values_v(i);
332334

333335
// specific for groupby
@@ -350,22 +352,21 @@ template <typename T, bool IsMasked> class PandasHashTable {
350352
}
351353

352354
nb::gil_scoped_acquire();
355+
356+
const size_t shape[1] = {n};
353357
nb::capsule owner(labels, [](void *p) noexcept { delete[](size_t *) p; });
354358
const auto labels_arr =
355-
nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>>(labels,
356-
{
357-
n,
358-
},
359-
owner);
359+
nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>>(labels, 1, shape,
360+
owner);
360361
return std::make_tuple(labels_arr, uniques.ToNdArray());
361362
}
362363

363364
private:
364365
auto UniqueInternal(const nb::ndarray<const T, nb::ndim<1>> &values,
365366
PandasVector<T> &uniques, Py_ssize_t count_prior = 0,
366367
[[maybe_unused]] Py_ssize_t na_sentinel = -1,
367-
[[maybe_unused]] nb::object na_value = nb::none(), bool ignore_na = false,
368-
nb::object mask_obj = nb::none(),
368+
[[maybe_unused]] nb::object na_value = nb::none(),
369+
bool ignore_na = false, nb::object mask_obj = nb::none(),
369370
bool return_inverse = false, bool use_result_mask = false)
370371
-> nb::object {
371372
if (use_result_mask && return_inverse) {
@@ -438,7 +439,8 @@ template <typename T, bool IsMasked> class PandasHashTable {
438439
template <bool IgnoreNA, bool UseNAValue>
439440
auto UniqueWithInverse(const nb::ndarray<const T, nb::ndim<1>> &values,
440441
PandasVector<T> &uniques, Py_ssize_t count_prior,
441-
[[maybe_unused]] Py_ssize_t na_sentinel, [[maybe_unused]] T na_value,
442+
[[maybe_unused]] Py_ssize_t na_sentinel,
443+
[[maybe_unused]] T na_value,
442444
[[maybe_unused]] nb::object mask_obj = nb::none())
443445
-> nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>> {
444446
if constexpr (IsMasked) {
@@ -461,7 +463,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
461463
nb::call_guard<nb::gil_scoped_release>();
462464
const auto mask_v = mask.view();
463465

464-
for (size_t i = 0; i < n; i++) {
466+
for (decltype(values_v.shape(0)) i = 0; i < n; i++) {
465467
if constexpr (IgnoreNA) {
466468
if (mask_v(i)) {
467469
labels[i] = na_sentinel;
@@ -486,7 +488,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
486488
}
487489
} else {
488490
nb::call_guard<nb::gil_scoped_release>();
489-
for (size_t i = 0; i < n; i++) {
491+
for (decltype(values_v.shape(0)) i = 0; i < n; i++) {
490492
const auto val = values_v(i);
491493

492494
if constexpr (IgnoreNA) {
@@ -519,13 +521,10 @@ template <typename T, bool IsMasked> class PandasHashTable {
519521
}
520522
}
521523

524+
const size_t shape[1] = {n};
522525
nb::capsule owner(labels, [](void *p) noexcept { delete[](size_t *) p; });
523-
524-
return nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>>(labels,
525-
{
526-
n,
527-
},
528-
owner);
526+
return nb::ndarray<nb::numpy, const Py_ssize_t, nb::ndim<1>>(labels, 1,
527+
shape, owner);
529528
}
530529

531530
template <bool IgnoreNA, bool UseNAValue>
@@ -553,7 +552,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
553552
const auto mask_v = mask.view();
554553

555554
bool seen_na = false;
556-
for (size_t i = 0; i < n; i++) {
555+
for (decltype(values.shape(0)) i = 0; i < n; i++) {
557556
const auto val = values_v(i);
558557

559558
if constexpr (IgnoreNA) {
@@ -595,7 +594,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
595594
}
596595
} else {
597596
nb::call_guard<nb::gil_scoped_release>();
598-
for (size_t i = 0; i < n; i++) {
597+
for (decltype(values.shape(0)) i = 0; i < n; i++) {
599598
const auto val = values_v(i);
600599
auto k = hash_map_.get(val);
601600
if (k == hash_map_.end()) {
@@ -631,7 +630,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
631630
}
632631
nb::call_guard<nb::gil_scoped_release>();
633632

634-
for (size_t i = 0; i < n; i++) {
633+
for (decltype(values.shape(0)) i = 0; i < n; i++) {
635634
if constexpr (IgnoreNA) {
636635
// TODO: current pandas code is a bit messy here...
637636
// labels[i] = na_sentinel
@@ -648,7 +647,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
648647
}
649648
} else {
650649
nb::call_guard<nb::gil_scoped_release>();
651-
for (size_t i = 0; i < n; i++) {
650+
for (decltype(values.shape(0)) i = 0; i < n; i++) {
652651
const auto val = values_v(i);
653652
auto k = hash_map_.get(val);
654653
if (k == hash_map_.end()) {
@@ -683,7 +682,7 @@ using namespace nb::literals;
683682
do { \
684683
nb::class_<PandasHashTable<TYPE, MASKED>>(m, NAME) \
685684
.def(nb::init<>()) \
686-
.def(nb::init<size_t>(), "size_hint"_a) \
685+
.def(nb::init<pd_kh_int_t>(), "size_hint"_a) \
687686
.def("__len__", &PandasHashTable<TYPE, MASKED>::__len__) \
688687
.def("__contains__", &PandasHashTable<TYPE, MASKED>::__contains__) \
689688
.def("sizeof", &PandasHashTable<TYPE, MASKED>::SizeOf) \

0 commit comments

Comments
 (0)