Skip to content

Commit 5be538e

Browse files
authored
Merge pull request #40 from jpetso/master
Further improve performance by fixing specialized data accessors
2 parents 34b4f92 + 840ac79 commit 5be538e

File tree

1 file changed

+159
-31
lines changed

1 file changed

+159
-31
lines changed

cppcodec/data/access.hpp

Lines changed: 159 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
#define CPPCODEC_DETAIL_DATA_ACCESS
2626

2727
#include <stdint.h> // for size_t
28+
#include <string> // for static_assert() checking that string will be optimized
29+
#include <type_traits> // for std::enable_if and such
30+
#include <vector> // for static_assert() checking that vector will be optimized
31+
32+
#include "../detail/config.hpp" // for CPPCODEC_ALWAYS_INLINE
2833

2934
namespace cppcodec {
3035
namespace data {
@@ -37,36 +42,43 @@ namespace data {
3742
// For const (read-only) types: char_data(const T&)
3843
// For both const and result types: size(const T&)
3944

40-
template <typename T> inline size_t size(const T& t) { return t.size(); }
41-
template <typename T, size_t N> inline constexpr size_t size(const T (&t)[N]) noexcept {
45+
template <typename T>
46+
CPPCODEC_ALWAYS_INLINE size_t size(const T& t) { return t.size(); }
47+
48+
template <typename T, size_t N>
49+
CPPCODEC_ALWAYS_INLINE constexpr size_t size(const T (&t)[N]) noexcept {
4250
return N * sizeof(t[0]);
4351
}
4452

4553
class general_t {};
4654
class specific_t : public general_t {};
4755

4856
class empty_result_state {
49-
template <typename Result> inline void size(const Result& result) { return size(result); }
57+
template <typename Result>
58+
CPPCODEC_ALWAYS_INLINE void size(const Result& result) { return size(result); }
5059
};
5160

5261
// SFINAE: Generic fallback in case no specific state function applies.
5362
template <typename Result>
54-
inline empty_result_state create_state(Result&, general_t) { return empty_result_state(); }
63+
CPPCODEC_ALWAYS_INLINE empty_result_state create_state(Result&, general_t)
64+
{
65+
return empty_result_state();
66+
}
5567

5668
//
5769
// Generic templates for containers: Use these init()/put()/finish()
5870
// implementations if no specialization was found.
5971
//
6072

6173
template <typename Result>
62-
inline void init(Result& result, empty_result_state&, size_t capacity)
74+
CPPCODEC_ALWAYS_INLINE void init(Result& result, empty_result_state&, size_t capacity)
6375
{
6476
result.resize(0);
6577
result.reserve(capacity);
6678
}
6779

6880
template <typename Result>
69-
inline void finish(Result&, empty_result_state&)
81+
CPPCODEC_ALWAYS_INLINE void finish(Result&, empty_result_state&)
7082
{
7183
// Default is to push_back(), which already increases the size.
7284
}
@@ -86,52 +98,68 @@ template <typename Result> inline void put_uint8(Result& result, uint8_t c) { re
8698

8799
template <bool> struct put_impl;
88100
template <> struct put_impl<true> { // put_uint8() available
89-
template<typename Result> static void put(Result& result, uint8_t c) { put_uint8(result, c); }
101+
template<typename Result>
102+
static CPPCODEC_ALWAYS_INLINE void put(Result& result, uint8_t c)
103+
{
104+
put_uint8(result, c);
105+
}
90106
};
91107
template <> struct put_impl<false> { // put_uint8() not available
92-
template<typename Result> static void put(Result& result, uint8_t c) {
108+
template<typename Result>
109+
static CPPCODEC_ALWAYS_INLINE void put(Result& result, uint8_t c)
110+
{
93111
result.push_back(static_cast<char>(c));
94112
}
95113
};
96114

97-
template <typename Result> inline void put(Result& result, empty_result_state&, uint8_t c)
115+
template <typename Result>
116+
CPPCODEC_ALWAYS_INLINE void put(Result& result, empty_result_state&, uint8_t c)
98117
{
99118
using namespace fallback;
100119
put_impl<sizeof(fallback::flag(), put_uint8(result, c), fallback::flag()) != 1>::put(result, c);
101120
}
102121

103122
//
104-
// Specialization for container types with direct mutable data access.
105-
// The expected way to specialize is to subclass empty_result_state and
123+
// Specialization for container types with direct mutable data access,
124+
// e.g. std::vector<uint8_t>.
125+
//
126+
// The expected way to specialize is to draft a new xyz_result_state type and
106127
// return an instance of it from a create_state() template specialization.
107128
// You can then create overloads for init(), put() and finish()
108-
// that are more specific than the empty_result_state ones above.
109-
// See the example below for direct access to a mutable data() method.
129+
// for the new result state type.
110130
//
111131
// If desired, a non-templated overload for both specific types
112132
// (result & state) can be added to tailor it to that particular result type.
113133
//
114134

115-
template <typename Result> class direct_data_access_result_state : empty_result_state
135+
template <typename T>
136+
constexpr auto data_is_mutable(T* t) -> decltype(t->data()[size_t(0)] = 'x', bool())
116137
{
117-
public:
118-
using result_type = Result;
138+
return true;
139+
}
140+
constexpr bool data_is_mutable(...) { return false; }
119141

120-
inline void init(Result& result, size_t capacity)
142+
template <typename Result>
143+
class direct_data_access_result_state
144+
{
145+
public:
146+
CPPCODEC_ALWAYS_INLINE void init(Result& result, size_t capacity)
121147
{
122-
// resize(0) is not called here since we don't rely on it
123-
result.reserve(capacity);
148+
// reserve() may not actually allocate the storage right away,
149+
// and it isn't guaranteed that it will be untouched upon the
150+
//.next resize(). In that light, resize from the start and
151+
// slightly reduce the size at the end if necessary.
152+
result.resize(capacity);
124153
}
125-
inline void put(Result& result, char c)
154+
CPPCODEC_ALWAYS_INLINE void put(Result& result, char c)
126155
{
127-
// This only compiles if decltype(data) == char*
128-
result.data()[m_offset++] = static_cast<char>(c);
156+
result.data()[m_offset++] = c;
129157
}
130-
inline void finish(Result& result)
158+
CPPCODEC_ALWAYS_INLINE void finish(Result& result)
131159
{
132160
result.resize(m_offset);
133161
}
134-
inline size_t size(const Result&)
162+
CPPCODEC_ALWAYS_INLINE size_t size(const Result&)
135163
{
136164
return m_offset;
137165
}
@@ -142,23 +170,123 @@ template <typename Result> class direct_data_access_result_state : empty_result_
142170
// SFINAE: Select a specific state based on the result type and possible result state type.
143171
// Implement this if direct data access (`result.data()[0] = 'x') isn't already possible
144172
// and you want to specialize it for your own result type.
145-
template <typename Result, typename ResultState =
146-
typename direct_data_access_result_state<Result>::result_type::value>
147-
inline ResultState create_state(Result&, specific_t) { return ResultState(); }
173+
// Note: The enable_if should ideally be part of the class declaration,
174+
// but Visual Studio C++ will not compile it that way.
175+
// Have it here in the factory function instead.
176+
template <typename Result,
177+
typename = typename std::enable_if<
178+
data_is_mutable((Result*)nullptr)>::type>
179+
CPPCODEC_ALWAYS_INLINE direct_data_access_result_state<Result> create_state(Result&, specific_t)
180+
{
181+
return direct_data_access_result_state<Result>();
182+
}
148183

184+
static_assert(std::is_same<
185+
decltype(create_state(*(std::vector<uint8_t>*)nullptr, specific_t())),
186+
direct_data_access_result_state<std::vector<uint8_t>>>::value,
187+
"std::vector<uint8_t> must be handled by direct_data_access_result_state");
188+
189+
// Specialized init(), put() and finish() functions for direct_data_access_result_state.
149190
template <typename Result>
150-
inline void init(Result& result, direct_data_access_result_state<Result>& state, size_t capacity)
191+
CPPCODEC_ALWAYS_INLINE void init(Result& result, direct_data_access_result_state<Result>& state, size_t capacity)
151192
{
152-
state.init(result);
193+
state.init(result, capacity);
153194
}
154195

155-
// Specialized put function for direct_data_access_result_state.
156196
template <typename Result>
157-
inline void put(Result& result, direct_data_access_result_state<Result>& state, char c)
197+
CPPCODEC_ALWAYS_INLINE void put(Result& result, direct_data_access_result_state<Result>& state, char c)
158198
{
159199
state.put(result, c);
160200
}
161201

202+
template <typename Result>
203+
CPPCODEC_ALWAYS_INLINE void finish(Result& result, direct_data_access_result_state<Result>& state)
204+
{
205+
state.finish(result);
206+
}
207+
208+
//
209+
// Specialization for container types with direct mutable array access,
210+
// e.g. std::string. This is generally faster because bound checks are
211+
// minimal and operator[] is more likely noexcept. In addition,
212+
// std::string::push_back() needs to write a null character on every
213+
// expansion, which should be more efficient when done in bulk by resize().
214+
//
215+
// Compared to the above, tracking an extra offset variable is cheap.
216+
//
217+
218+
template <typename T>
219+
constexpr auto array_access_is_mutable(T* t) -> decltype((*t)[size_t(0)] = 'x', bool())
220+
{
221+
return true;
222+
}
223+
constexpr bool array_access_is_mutable(...) { return false; }
224+
225+
template <typename Result>
226+
class array_access_result_state
227+
{
228+
public:
229+
CPPCODEC_ALWAYS_INLINE void init(Result& result, size_t capacity)
230+
{
231+
// reserve() may not actually allocate the storage right away,
232+
// and it isn't guaranteed that it will be untouched upon the
233+
//.next resize(). In that light, resize from the start and
234+
// slightly reduce the size at the end if necessary.
235+
result.resize(capacity);
236+
}
237+
CPPCODEC_ALWAYS_INLINE void put(Result& result, char c)
238+
{
239+
result[m_offset++] = c;
240+
}
241+
CPPCODEC_ALWAYS_INLINE void finish(Result& result)
242+
{
243+
result.resize(m_offset);
244+
}
245+
CPPCODEC_ALWAYS_INLINE size_t size(const Result&)
246+
{
247+
return m_offset;
248+
}
249+
private:
250+
size_t m_offset = 0;
251+
};
252+
253+
// SFINAE: Select a specific state based on the result type and possible result state type.
254+
// Note: The enable_if should ideally be part of the class declaration,
255+
// but Visual Studio C++ will not compile it that way.
256+
// Have it here in the factory function instead.
257+
template <typename Result,
258+
typename = typename std::enable_if<
259+
!data_is_mutable((Result*)nullptr) // no more than one template option
260+
&& array_access_is_mutable((Result*)nullptr)>::type>
261+
CPPCODEC_ALWAYS_INLINE array_access_result_state<Result> create_state(Result&, specific_t)
262+
{
263+
return array_access_result_state<Result>();
264+
}
265+
266+
static_assert(std::is_same<
267+
decltype(create_state(*(std::string*)nullptr, specific_t())),
268+
array_access_result_state<std::string>>::value,
269+
"std::string must be handled by array_access_result_state");
270+
271+
// Specialized init(), put() and finish() functions for array_access_result_state.
272+
template <typename Result>
273+
CPPCODEC_ALWAYS_INLINE void init(Result& result, array_access_result_state<Result>& state, size_t capacity)
274+
{
275+
state.init(result, capacity);
276+
}
277+
278+
template <typename Result>
279+
CPPCODEC_ALWAYS_INLINE void put(Result& result, array_access_result_state<Result>& state, char c)
280+
{
281+
state.put(result, c);
282+
}
283+
284+
template <typename Result>
285+
CPPCODEC_ALWAYS_INLINE void finish(Result& result, array_access_result_state<Result>& state)
286+
{
287+
state.finish(result);
288+
}
289+
162290
// char_data() is only used to read, not for result buffers.
163291
template <typename T> inline const char* char_data(const T& t)
164292
{

0 commit comments

Comments
 (0)