Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ hicpp-*,\
misc-*,\
-misc-const-correctness,\
-misc-include-cleaner,\
-misc-use-internal-linkage,\
modernize-*,\
-modernize-avoid-c-arrays,\
-modernize-use-auto,\
Expand Down
2 changes: 1 addition & 1 deletion examples/createAndDestroyDeviceObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class Image
STDGPU_HOST_DEVICE std::uint8_t&
operator()(const stdgpu::index_t x, const stdgpu::index_t y)
{
return _values[y * _width + x];
return _values[(y * _width) + x];
}

STDGPU_HOST_DEVICE stdgpu::index_t
Expand Down
9 changes: 5 additions & 4 deletions src/stdgpu/impl/algorithm_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ template <class T>
constexpr STDGPU_HOST_DEVICE const T&
min(const T& a, const T& b)
{
return (b < a) ? b : a;
return (b < a) ? b : a; // NOLINT(bugprone-return-const-ref-from-parameter)
}

template <class T>
constexpr STDGPU_HOST_DEVICE const T&
max(const T& a, const T& b)
{
return (a < b) ? b : a;
return (a < b) ? b : a; // NOLINT(bugprone-return-const-ref-from-parameter)
}

template <class T>
Expand All @@ -45,7 +45,8 @@ clamp(const T& v, const T& lower, const T& upper)
{
STDGPU_EXPECTS(!(upper < lower));

return v < lower ? lower : upper < v ? upper : v; // NOLINT(readability-avoid-nested-conditional-operator)
// NOLINTNEXTLINE(bugprone-return-const-ref-from-parameter,readability-avoid-nested-conditional-operator)
return v < lower ? lower : upper < v ? upper : v;
}

template <typename IndexType,
Expand All @@ -58,7 +59,7 @@ for_each_index(ExecutionPolicy&& policy, IndexType size, UnaryFunction f)
thrust::for_each(std::forward<ExecutionPolicy>(policy),
thrust::counting_iterator<IndexType>(0),
thrust::counting_iterator<IndexType>(size),
f);
std::move(f));
}

namespace detail
Expand Down
9 changes: 5 additions & 4 deletions src/stdgpu/impl/bitset_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#define STDGPU_BITSET_DETAIL_H

#include <limits>
#include <type_traits>

#include <stdgpu/algorithm.h>
#include <stdgpu/atomic.cuh>
Expand Down Expand Up @@ -119,7 +120,7 @@ div_up(const index_t a, const index_t b) noexcept
STDGPU_EXPECTS(a >= 0);
STDGPU_EXPECTS(b > 0);

index_t result = (a % b != 0) ? (a / b + 1) : (a / b);
index_t result = (a % b != 0) ? ((a / b) + 1) : (a / b);

STDGPU_ENSURES(result * b >= a);

Expand All @@ -146,7 +147,7 @@ private:
STDGPU_HOST_DEVICE Block
block_mask(const index_t i) const
{
index_t remaining_bits = _size - i * _bits_per_block;
index_t remaining_bits = _size - (i * _bits_per_block);
return (remaining_bits >= _bits_per_block)
? ~static_cast<Block>(0)
: (static_cast<Block>(1) << static_cast<Block>(remaining_bits)) - static_cast<Block>(1);
Expand Down Expand Up @@ -256,7 +257,7 @@ template <typename ExecutionPolicy,
inline void
bitset<Block, Allocator>::set(ExecutionPolicy&& policy)
{
fill(std::forward<ExecutionPolicy>(policy), device_begin(_bit_blocks), device_end(_bit_blocks), ~block_type(0));
fill(std::decay_t<ExecutionPolicy>{ policy }, device_begin(_bit_blocks), device_end(_bit_blocks), ~block_type(0));

STDGPU_ENSURES(count(std::forward<ExecutionPolicy>(policy)) == size());
}
Expand Down Expand Up @@ -284,7 +285,7 @@ template <typename ExecutionPolicy,
inline void
bitset<Block, Allocator>::reset(ExecutionPolicy&& policy)
{
fill(std::forward<ExecutionPolicy>(policy), device_begin(_bit_blocks), device_end(_bit_blocks), block_type(0));
fill(std::decay_t<ExecutionPolicy>{ policy }, device_begin(_bit_blocks), device_end(_bit_blocks), block_type(0));

STDGPU_ENSURES(count(std::forward<ExecutionPolicy>(policy)) == 0);
}
Expand Down
92 changes: 49 additions & 43 deletions src/stdgpu/impl/deque_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef STDGPU_DEQUE_DETAIL_H
#define STDGPU_DEQUE_DETAIL_H

#include <type_traits>

#include <stdgpu/contract.h>
#include <stdgpu/iterator.h>
#include <stdgpu/memory.h>
Expand All @@ -42,19 +44,21 @@ deque<T, Allocator>::createDeviceObject(ExecutionPolicy&& policy, const index_t&

deque<T, Allocator> result(
mutex_array<mutex_default_type, mutex_array_allocator_type>::createDeviceObject(
std::forward<ExecutionPolicy>(policy),
std::decay_t<ExecutionPolicy>{ policy },
capacity,
mutex_array_allocator_type(allocator)),
bitset<bitset_default_type, bitset_allocator_type>::createDeviceObject(
std::forward<ExecutionPolicy>(policy),
std::decay_t<ExecutionPolicy>{ policy },
capacity,
bitset_allocator_type(allocator)),
atomic<int, atomic_int_allocator_type>::createDeviceObject(std::forward<ExecutionPolicy>(policy),
atomic<int, atomic_int_allocator_type>::createDeviceObject(std::decay_t<ExecutionPolicy>{ policy },
atomic_int_allocator_type(allocator)),
atomic<unsigned int, atomic_uint_allocator_type>::createDeviceObject(std::forward<ExecutionPolicy>(policy),
atomic_uint_allocator_type(allocator)),
atomic<unsigned int, atomic_uint_allocator_type>::createDeviceObject(std::forward<ExecutionPolicy>(policy),
atomic_uint_allocator_type(allocator)),
atomic<unsigned int, atomic_uint_allocator_type>::createDeviceObject(
std::decay_t<ExecutionPolicy>{ policy },
atomic_uint_allocator_type(allocator)),
atomic<unsigned int, atomic_uint_allocator_type>::createDeviceObject(
std::decay_t<ExecutionPolicy>{ policy },
atomic_uint_allocator_type(allocator)),
allocator);
result._data = allocator_traits<allocator_type>::allocate(result._allocator, capacity);
result._range_indices = allocator_traits<index_allocator_type>::allocate(result._index_allocator, capacity);
Expand All @@ -77,7 +81,7 @@ deque<T, Allocator>::destroyDeviceObject(ExecutionPolicy&& policy, deque<T, Allo
{
if (!detail::is_destroy_optimizable<value_type>())
{
device_object.clear(std::forward<ExecutionPolicy>(policy));
device_object.clear(std::decay_t<ExecutionPolicy>{ policy });
}

allocator_traits<allocator_type>::deallocate(device_object._allocator,
Expand All @@ -89,13 +93,13 @@ deque<T, Allocator>::destroyDeviceObject(ExecutionPolicy&& policy, deque<T, Allo
device_object._data = nullptr;
device_object._range_indices = nullptr;
mutex_array<mutex_default_type, mutex_array_allocator_type>::destroyDeviceObject(
std::forward<ExecutionPolicy>(policy),
std::decay_t<ExecutionPolicy>{ policy },
device_object._locks);
bitset<bitset_default_type, bitset_allocator_type>::destroyDeviceObject(std::forward<ExecutionPolicy>(policy),
bitset<bitset_default_type, bitset_allocator_type>::destroyDeviceObject(std::decay_t<ExecutionPolicy>{ policy },
device_object._occupied);
atomic<int, atomic_int_allocator_type>::destroyDeviceObject(std::forward<ExecutionPolicy>(policy),
atomic<int, atomic_int_allocator_type>::destroyDeviceObject(std::decay_t<ExecutionPolicy>{ policy },
device_object._size);
atomic<unsigned int, atomic_uint_allocator_type>::destroyDeviceObject(std::forward<ExecutionPolicy>(policy),
atomic<unsigned int, atomic_uint_allocator_type>::destroyDeviceObject(std::decay_t<ExecutionPolicy>{ policy },
device_object._begin);
atomic<unsigned int, atomic_uint_allocator_type>::destroyDeviceObject(std::forward<ExecutionPolicy>(policy),
device_object._end);
Expand Down Expand Up @@ -550,48 +554,50 @@ template <typename ExecutionPolicy,
inline void
deque<T, Allocator>::clear(ExecutionPolicy&& policy)
{
if (empty(std::forward<ExecutionPolicy>(policy)))
if (empty(std::decay_t<ExecutionPolicy>{ policy }))
{
return;
}

if (!detail::is_destroy_optimizable<value_type>())
{
const index_t begin = static_cast<index_t>(_begin.load(std::forward<ExecutionPolicy>(policy)));
const index_t end = static_cast<index_t>(_end.load(std::forward<ExecutionPolicy>(policy)));
const index_t begin = static_cast<index_t>(_begin.load(std::decay_t<ExecutionPolicy>{ policy }));
const index_t end = static_cast<index_t>(_end.load(std::decay_t<ExecutionPolicy>{ policy }));

// Full, i.e. one large block and begin == end
if (full(std::forward<ExecutionPolicy>(policy)))
if (full(std::decay_t<ExecutionPolicy>{ policy }))
{
detail::unoptimized_destroy(std::forward<ExecutionPolicy>(policy), device_begin(_data), device_end(_data));
detail::unoptimized_destroy(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_data),
device_end(_data));
}
// One large block
else if (begin <= end)
{
detail::unoptimized_destroy(std::forward<ExecutionPolicy>(policy),
detail::unoptimized_destroy(std::decay_t<ExecutionPolicy>{ policy },
make_device(_data + begin),
make_device(_data + end));
}
// Two disconnected blocks
else
{
detail::unoptimized_destroy(std::forward<ExecutionPolicy>(policy),
detail::unoptimized_destroy(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_data),
make_device(_data + end));
detail::unoptimized_destroy(std::forward<ExecutionPolicy>(policy),
detail::unoptimized_destroy(std::decay_t<ExecutionPolicy>{ policy },
make_device(_data + begin),
device_end(_data));
}
}

_occupied.reset(std::forward<ExecutionPolicy>(policy));
_occupied.reset(std::decay_t<ExecutionPolicy>{ policy });

_size.store(std::forward<ExecutionPolicy>(policy), 0);
_size.store(std::decay_t<ExecutionPolicy>{ policy }, 0);

_begin.store(std::forward<ExecutionPolicy>(policy), 0);
_end.store(std::forward<ExecutionPolicy>(policy), 0);
_begin.store(std::decay_t<ExecutionPolicy>{ policy }, 0);
_end.store(std::decay_t<ExecutionPolicy>{ policy }, 0);

STDGPU_ENSURES(empty(std::forward<ExecutionPolicy>(policy)));
STDGPU_ENSURES(empty(std::decay_t<ExecutionPolicy>{ policy }));
STDGPU_ENSURES(valid(std::forward<ExecutionPolicy>(policy)));
}

Expand All @@ -614,9 +620,9 @@ deque<T, Allocator>::valid(ExecutionPolicy&& policy) const
return true;
}

return (size_valid(std::forward<ExecutionPolicy>(policy)) &&
occupied_count_valid(std::forward<ExecutionPolicy>(policy)) &&
_locks.valid(std::forward<ExecutionPolicy>(policy)));
return (size_valid(std::decay_t<ExecutionPolicy>{ policy }) &&
occupied_count_valid(std::decay_t<ExecutionPolicy>{ policy }) &&
_locks.valid(std::decay_t<ExecutionPolicy>{ policy }));
}

template <typename T, typename Allocator>
Expand All @@ -632,30 +638,30 @@ template <typename ExecutionPolicy,
stdgpu::device_indexed_range<T>
deque<T, Allocator>::device_range(ExecutionPolicy&& policy)
{
const index_t begin = static_cast<index_t>(_begin.load(std::forward<ExecutionPolicy>(policy)));
const index_t end = static_cast<index_t>(_end.load(std::forward<ExecutionPolicy>(policy)));
const index_t begin = static_cast<index_t>(_begin.load(std::decay_t<ExecutionPolicy>{ policy }));
const index_t end = static_cast<index_t>(_end.load(std::decay_t<ExecutionPolicy>{ policy }));

// Full, i.e. one large block and begin == end
if (full(std::forward<ExecutionPolicy>(policy)))
if (full(std::decay_t<ExecutionPolicy>{ policy }))
{
iota(std::forward<ExecutionPolicy>(policy), device_begin(_range_indices), device_end(_range_indices), 0);
iota(std::decay_t<ExecutionPolicy>{ policy }, device_begin(_range_indices), device_end(_range_indices), 0);
}
// One large block, including empty block
else if (begin <= end)
{
iota(std::forward<ExecutionPolicy>(policy),
iota(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_range_indices),
device_begin(_range_indices) + (end - begin),
begin);
}
// Two disconnected blocks
else
{
iota(std::forward<ExecutionPolicy>(policy),
iota(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_range_indices),
device_begin(_range_indices) + end,
0);
iota(std::forward<ExecutionPolicy>(policy),
iota(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_range_indices) + end,
device_begin(_range_indices) + (end + capacity() - begin),
begin);
Expand All @@ -679,30 +685,30 @@ template <typename ExecutionPolicy,
stdgpu::device_indexed_range<const T>
deque<T, Allocator>::device_range(ExecutionPolicy&& policy) const
{
const index_t begin = static_cast<index_t>(_begin.load(std::forward<ExecutionPolicy>(policy)));
const index_t end = static_cast<index_t>(_end.load(std::forward<ExecutionPolicy>(policy)));
const index_t begin = static_cast<index_t>(_begin.load(std::decay_t<ExecutionPolicy>{ policy }));
const index_t end = static_cast<index_t>(_end.load(std::decay_t<ExecutionPolicy>{ policy }));

// Full, i.e. one large block and begin == end
if (full(std::forward<ExecutionPolicy>(policy)))
if (full(std::decay_t<ExecutionPolicy>{ policy }))
{
iota(std::forward<ExecutionPolicy>(policy), device_begin(_range_indices), device_end(_range_indices), 0);
iota(std::decay_t<ExecutionPolicy>{ policy }, device_begin(_range_indices), device_end(_range_indices), 0);
}
// One large block, including empty block
else if (begin <= end)
{
iota(std::forward<ExecutionPolicy>(policy),
iota(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_range_indices),
device_begin(_range_indices) + (end - begin),
begin);
}
// Two disconnected blocks
else
{
iota(std::forward<ExecutionPolicy>(policy),
iota(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_range_indices),
device_begin(_range_indices) + end,
0);
iota(std::forward<ExecutionPolicy>(policy),
iota(std::decay_t<ExecutionPolicy>{ policy },
device_begin(_range_indices) + end,
device_begin(_range_indices) + (end + capacity() - begin),
begin);
Expand All @@ -726,7 +732,7 @@ template <typename ExecutionPolicy,
bool
deque<T, Allocator>::occupied_count_valid(ExecutionPolicy&& policy) const
{
index_t size_count = size(std::forward<ExecutionPolicy>(policy));
index_t size_count = size(std::decay_t<ExecutionPolicy>{ policy });
index_t size_sum = _occupied.count(std::forward<ExecutionPolicy>(policy));

return (size_count == size_sum);
Expand Down
6 changes: 3 additions & 3 deletions src/stdgpu/impl/memory_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ unoptimized_destroy(ExecutionPolicy&& policy, Iterator first, Iterator last)

template <typename T>
T*
createDeviceArray(const stdgpu::index64_t count, const T default_value)
createDeviceArray(const stdgpu::index64_t count, const T default_value) // NOLINT(performance-unnecessary-value-param)
{
T* device_array = nullptr;

Expand Down Expand Up @@ -441,8 +441,8 @@ template <typename ExecutionPolicy,
typename... Args,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
device_unique_object<T>::device_unique_object(ExecutionPolicy&& policy, Args&&... args)
: _object(new T(T::createDeviceObject(std::forward<ExecutionPolicy>(policy), std::forward<Args>(args)...)),
[_policy = std::forward<ExecutionPolicy>(policy)](T* ptr)
: _object(new T(T::createDeviceObject(std::decay_t<ExecutionPolicy>{ policy }, std::forward<Args>(args)...)),
[_policy = std::decay_t<ExecutionPolicy>{ policy }](T* ptr)
{
T::destroyDeviceObject(_policy, *ptr);
delete ptr;
Expand Down
Loading