Skip to content

Commit 01d3b7a

Browse files
authored
Merge pull request #45 from jpetso/master
Make cppcodec runtime performance competitive
2 parents 374bb3b + 5a0bdaa commit 01d3b7a

19 files changed

+465
-217
lines changed

LICENSE.txt renamed to LICENSE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
The MIT License (MIT)
2-
3-
(See individual files for copyright holders.)
1+
Copyright (c) 2015 Topology Inc.
2+
Copyright (c) 2018 Jakob Petsovits
3+
Copyright (c) various other contributors, see individual files
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and hex (a.k.a. base16) as specified in RFC 4648, plus Crockford's base32.
77

88
MIT licensed with consistent, flexible API. Supports raw pointers,
99
`std::string` and (templated) character vectors without unnecessary allocations.
10+
Cross-platform with measured decent performance and without compiler warnings.
1011

1112

1213

cppcodec/base32_crockford.hpp

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -39,46 +39,36 @@ static constexpr const char base32_crockford_alphabet[] = {
3939
'P', 'Q', 'R', 'S', 'T', // 27 - no U
4040
'V', 'W', 'X', 'Y', 'Z' // 32
4141
};
42-
static_assert(sizeof(base32_crockford_alphabet) == 32, "base32 alphabet must have 32 values");
4342

4443
class base32_crockford_base
4544
{
4645
public:
47-
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return false; }
48-
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return false; }
49-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(char /*c*/) { return false; }
50-
51-
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(uint8_t index)
46+
static CPPCODEC_ALWAYS_INLINE constexpr size_t alphabet_size() {
47+
static_assert(sizeof(base32_crockford_alphabet) == 32, "base32 alphabet must have 32 values");
48+
return sizeof(base32_crockford_alphabet);
49+
}
50+
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(alphabet_index_t idx)
5251
{
53-
return base32_crockford_alphabet[index];
52+
return base32_crockford_alphabet[idx];
5453
}
55-
56-
static CPPCODEC_ALWAYS_INLINE constexpr uint8_t index_of(char c)
54+
static CPPCODEC_ALWAYS_INLINE constexpr char normalized_symbol(char c)
5755
{
58-
return (c >= '0' && c <= '9') ? (c - '0')
59-
// upper-case letters
60-
: (c >= 'A' && c <= 'H') ? (c - 'A' + 10) // no I
61-
: (c >= 'J' && c <= 'K') ? (c - 'J' + 18) // no L
62-
: (c >= 'M' && c <= 'N') ? (c - 'M' + 20) // no O
63-
: (c >= 'P' && c <= 'T') ? (c - 'P' + 22) // no U
64-
: (c >= 'V' && c <= 'Z') ? (c - 'V' + 27)
65-
// lower-case letters
66-
: (c >= 'a' && c <= 'h') ? (c - 'a' + 10) // no I
67-
: (c >= 'j' && c <= 'k') ? (c - 'j' + 18) // no L
68-
: (c >= 'm' && c <= 'n') ? (c - 'm' + 20) // no O
69-
: (c >= 'p' && c <= 't') ? (c - 'p' + 22) // no U
70-
: (c >= 'v' && c <= 'z') ? (c - 'v' + 27)
71-
: (c == '-') ? 253 // "Hyphens (-) can be inserted into strings [for readability]."
72-
: (c == '\0') ? 255 // stop at end of string
73-
// special cases
74-
: (c == 'O' || c == 'o') ? 0
75-
: (c == 'I' || c == 'i' || c == 'L' || c == 'l') ? 1
76-
: throw symbol_error(c);
56+
// Hex decoding is always case-insensitive (even in RFC 4648), the question
57+
// is only for encoding whether to use upper-case or lower-case letters.
58+
return (c == 'O' || c == 'o') ? '0'
59+
: (c == 'I' || c == 'i' || c == 'L' || c == 'l') ? '1'
60+
: (c >= 'a' && c <= 'z') ? (c - 'a' + 'A')
61+
: c;
7762
}
7863

79-
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(uint8_t index) { return index == 253; }
80-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_special_character(uint8_t index) { return index > 32; }
81-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof(uint8_t index) { return index == 255; }
64+
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return false; }
65+
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return false; }
66+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(char c) { return false; }
67+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof_symbol(char c) { return c == '\0'; }
68+
69+
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(char c) {
70+
return c == '-'; // "Hyphens (-) can be inserted into strings [for readability]."
71+
}
8272
};
8373

8474
// base32_crockford is a concatenative iterative (i.e. streaming) interpretation of Crockford base32.

cppcodec/base32_hex.hpp

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,37 +37,34 @@ static constexpr const char base32_hex_alphabet[] = {
3737
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
3838
'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V'
3939
};
40-
static_assert(sizeof(base32_hex_alphabet) == 32, "base32 alphabet must have 32 values");
4140

4241
class base32_hex
4342
{
4443
public:
4544
template <typename Codec> using codec_impl = stream_codec<Codec, base32_hex>;
4645

47-
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
48-
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
49-
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
50-
51-
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(uint8_t index)
46+
static CPPCODEC_ALWAYS_INLINE constexpr size_t alphabet_size() {
47+
static_assert(sizeof(base32_hex_alphabet) == 32, "base32 alphabet must have 32 values");
48+
return sizeof(base32_hex_alphabet);
49+
}
50+
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(alphabet_index_t idx)
5251
{
53-
return base32_hex_alphabet[index];
52+
return base32_hex_alphabet[idx];
5453
}
55-
56-
static CPPCODEC_ALWAYS_INLINE constexpr uint8_t index_of(char c)
54+
static CPPCODEC_ALWAYS_INLINE constexpr char normalized_symbol(char c)
5755
{
58-
return (c >= '0' && c <= '9') ? (c - '0')
59-
: (c >= 'A' && c <= 'V') ? (c - 'A' + 10)
60-
: (c == padding_symbol()) ? 254
61-
: (c == '\0') ? 255 // stop at end of string
62-
: (c >= 'a' && c <= 'v') ? (c - 'a' + 10) // lower-case: not expected, but accepted
63-
: throw symbol_error(c);
56+
// Lower-case letters are accepted, though not generally expected.
57+
return (c >= 'a' && c <= 'v') ? (c - 'a' + 'A') : c;
6458
}
6559

60+
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
61+
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
62+
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
63+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(char c) { return c == '='; }
64+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof_symbol(char c) { return c == '\0'; }
65+
6666
// RFC4648 does not specify any whitespace being allowed in base32 encodings.
67-
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(uint8_t /*index*/) { return false; }
68-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_special_character(uint8_t index) { return index > 32; }
69-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(uint8_t index) { return index == 254; }
70-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof(uint8_t index) { return index == 255; }
67+
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(char) { return false; }
7168
};
7269

7370
} // namespace detail

cppcodec/base32_rfc4648.hpp

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,37 +37,34 @@ static constexpr const char base32_rfc4648_alphabet[] = {
3737
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', // at index 26
3838
'2', '3', '4', '5', '6', '7'
3939
};
40-
static_assert(sizeof(base32_rfc4648_alphabet) == 32, "base32 alphabet must have 32 values");
4140

4241
class base32_rfc4648
4342
{
4443
public:
4544
template <typename Codec> using codec_impl = stream_codec<Codec, base32_rfc4648>;
4645

47-
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
48-
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
49-
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
50-
51-
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(uint8_t index)
46+
static CPPCODEC_ALWAYS_INLINE constexpr size_t alphabet_size() {
47+
static_assert(sizeof(base32_rfc4648_alphabet) == 32, "base32 alphabet must have 32 values");
48+
return sizeof(base32_rfc4648_alphabet);
49+
}
50+
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(alphabet_index_t idx)
5251
{
53-
return base32_rfc4648_alphabet[index];
52+
return base32_rfc4648_alphabet[idx];
5453
}
55-
56-
static CPPCODEC_ALWAYS_INLINE constexpr uint8_t index_of(char c)
54+
static CPPCODEC_ALWAYS_INLINE constexpr char normalized_symbol(char c)
5755
{
58-
return (c >= 'A' && c <= 'Z') ? (c - 'A')
59-
: (c >= '2' && c <= '7') ? (c - '2' + 26)
60-
: (c == padding_symbol()) ? 254
61-
: (c == '\0') ? 255 // stop at end of string
62-
: (c >= 'a' && c <= 'z') ? (c - 'a') // lower-case: not expected, but accepted
63-
: throw symbol_error(c);
56+
// Lower-case letters are accepted, though not generally expected.
57+
return (c >= 'a' && c <= 'z') ? (c - 'a' + 'A') : c;
6458
}
6559

60+
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
61+
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
62+
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
63+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(char c) { return c == '='; }
64+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof_symbol(char c) { return c == '\0'; }
65+
6666
// RFC4648 does not specify any whitespace being allowed in base32 encodings.
67-
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(uint8_t /*index*/) { return false; }
68-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_special_character(uint8_t index) { return index > 32; }
69-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(uint8_t index) { return index == 254; }
70-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof(uint8_t index) { return index == 255; }
67+
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(char) { return false; }
7168
};
7269

7370
} // namespace detail

cppcodec/base64_rfc4648.hpp

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,39 +38,30 @@ static constexpr const char base64_rfc4648_alphabet[] = {
3838
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
3939
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
4040
};
41-
static_assert(sizeof(base64_rfc4648_alphabet) == 64, "base64 alphabet must have 64 values");
4241

4342
class base64_rfc4648
4443
{
4544
public:
4645
template <typename Codec> using codec_impl = stream_codec<Codec, base64_rfc4648>;
4746

48-
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
49-
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
50-
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
51-
52-
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(uint8_t index)
53-
{
54-
return base64_rfc4648_alphabet[index];
47+
static CPPCODEC_ALWAYS_INLINE constexpr size_t alphabet_size() {
48+
static_assert(sizeof(base64_rfc4648_alphabet) == 64, "base64 alphabet must have 64 values");
49+
return sizeof(base64_rfc4648_alphabet);
5550
}
56-
57-
static CPPCODEC_ALWAYS_INLINE constexpr uint8_t index_of(char c)
51+
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(alphabet_index_t idx)
5852
{
59-
return (c >= 'A' && c <= 'Z') ? (c - 'A')
60-
: (c >= 'a' && c <= 'z') ? (c - 'a' + 26)
61-
: (c >= '0' && c <= '9') ? (c - '0' + 52)
62-
: (c == '+') ? (c - '+' + 62)
63-
: (c == '/') ? (c - '/' + 63)
64-
: (c == padding_symbol()) ? 254
65-
: (c == '\0') ? 255 // stop at end of string
66-
: throw symbol_error(c);
53+
return base64_rfc4648_alphabet[idx];
6754
}
55+
static CPPCODEC_ALWAYS_INLINE constexpr char normalized_symbol(char c) { return c; }
56+
57+
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
58+
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
59+
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
60+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(char c) { return c == '='; }
61+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof_symbol(char c) { return c == '\0'; }
6862

6963
// RFC4648 does not specify any whitespace being allowed in base64 encodings.
70-
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(uint8_t /*index*/) { return false; }
71-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_special_character(uint8_t index) { return index > 64; }
72-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(uint8_t index) { return index == 254; }
73-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof(uint8_t index) { return index == 255; }
64+
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(char) { return false; }
7465
};
7566

7667
} // namespace detail

cppcodec/base64_url.hpp

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,39 +40,30 @@ static constexpr const char base64_url_alphabet[] = {
4040
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
4141
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
4242
};
43-
static_assert(sizeof(base64_url_alphabet) == 64, "base64 alphabet must have 64 values");
4443

4544
class base64_url
4645
{
4746
public:
4847
template <typename Codec> using codec_impl = stream_codec<Codec, base64_url>;
4948

50-
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
51-
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
52-
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
53-
54-
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(uint8_t index)
55-
{
56-
return base64_url_alphabet[index];
49+
static CPPCODEC_ALWAYS_INLINE constexpr size_t alphabet_size() {
50+
static_assert(sizeof(base64_url_alphabet) == 64, "base64 alphabet must have 64 values");
51+
return sizeof(base64_url_alphabet);
5752
}
58-
59-
static CPPCODEC_ALWAYS_INLINE constexpr uint8_t index_of(char c)
53+
static CPPCODEC_ALWAYS_INLINE constexpr char symbol(alphabet_index_t idx)
6054
{
61-
return (c >= 'A' && c <= 'Z') ? (c - 'A')
62-
: (c >= 'a' && c <= 'z') ? (c - 'a' + 26)
63-
: (c >= '0' && c <= '9') ? (c - '0' + 52)
64-
: (c == '-') ? (c - '-' + 62)
65-
: (c == '_') ? (c - '_' + 63)
66-
: (c == padding_symbol()) ? 254
67-
: (c == '\0') ? 255 // stop at end of string
68-
: throw symbol_error(c);
55+
return base64_url_alphabet[idx];
6956
}
57+
static CPPCODEC_ALWAYS_INLINE constexpr char normalized_symbol(char c) { return c; }
58+
59+
static CPPCODEC_ALWAYS_INLINE constexpr bool generates_padding() { return true; }
60+
static CPPCODEC_ALWAYS_INLINE constexpr bool requires_padding() { return true; }
61+
static CPPCODEC_ALWAYS_INLINE constexpr char padding_symbol() { return '='; }
62+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(char c) { return c == '='; }
63+
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof_symbol(char c) { return c == '\0'; }
7064

7165
// RFC4648 does not specify any whitespace being allowed in base64 encodings.
72-
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(uint8_t /*index*/) { return false; }
73-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_special_character(uint8_t index) { return index > 64; }
74-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_padding_symbol(uint8_t index) { return index == 254; }
75-
static CPPCODEC_ALWAYS_INLINE constexpr bool is_eof(uint8_t index) { return index == 255; }
66+
static CPPCODEC_ALWAYS_INLINE constexpr bool should_ignore(char) { return false; }
7667
};
7768

7869
} // namespace detail

cppcodec/data/access.hpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727

2828
#include <stdint.h> // for size_t
2929
#include <string> // for static_assert() checking that string will be optimized
30-
#include <type_traits> // for std::enable_if and such
30+
#include <type_traits> // for std::enable_if, std::remove_reference, and such
31+
#include <utility> // for std::declval
3132
#include <vector> // for static_assert() checking that vector will be optimized
3233

3334
#include "../detail/config.hpp" // for CPPCODEC_ALWAYS_INLINE
@@ -151,10 +152,18 @@ class direct_data_access_result_state
151152
//.next resize(). In that light, resize from the start and
152153
// slightly reduce the size at the end if necessary.
153154
result.resize(capacity);
155+
156+
// result.data() may perform a calculation to retrieve the address.
157+
// E.g. std::string (since C++11) will use small string optimization,
158+
// so it needs to check if it's using allocated data or (ab)using
159+
// its own member variables interpreted as char array.
160+
// (This result_state is used for std::string starting with C++17.)
161+
// Conditional code paths are slow so we only do it once, at the start.
162+
m_buffer = result.data();
154163
}
155164
CPPCODEC_ALWAYS_INLINE void put(Result& result, char c)
156165
{
157-
result.data()[m_offset++] = c;
166+
m_buffer[m_offset++] = c;
158167
}
159168
CPPCODEC_ALWAYS_INLINE void finish(Result& result)
160169
{
@@ -165,6 +174,9 @@ class direct_data_access_result_state
165174
return m_offset;
166175
}
167176
private:
177+
// Make sure to get the mutable buffer decltype by using assignment.
178+
typename std::remove_reference<
179+
decltype(std::declval<Result>().data()[size_t(0)] = 'x')>::type* m_buffer;
168180
size_t m_offset = 0;
169181
};
170182

@@ -264,16 +276,16 @@ CPPCODEC_ALWAYS_INLINE array_access_result_state<Result> create_state(Result&, s
264276
return array_access_result_state<Result>();
265277
}
266278

267-
#if __cplusplus < 201703L
279+
#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG > 201703L)
280+
static_assert(std::is_same<
281+
decltype(create_state(*(std::string*)nullptr, specific_t())),
282+
direct_data_access_result_state<std::string>>::value,
283+
"std::string (C++17 and later) must be handled by direct_data_access_result_state");
284+
#elif __cplusplus < 201703 && !defined(_MSVC_LANG) // we can't trust MSVC to set this right
268285
static_assert(std::is_same<
269286
decltype(create_state(*(std::string*)nullptr, specific_t())),
270287
array_access_result_state<std::string>>::value,
271288
"std::string (pre-C++17) must be handled by array_access_result_state");
272-
#else
273-
static_assert(std::is_same<
274-
decltype(create_state(*(std::string*)nullptr, specific_t())),
275-
direct_data_access_result_state<std::string>>::value,
276-
"std::string (C++17 and later) must be handled by direct_data_access_result_state");
277289
#endif
278290

279291
// Specialized init(), put() and finish() functions for array_access_result_state.

0 commit comments

Comments
 (0)