Skip to content

Commit dc67bbc

Browse files
committed
ADD: Add DBNv2 decoding to C++ client
1 parent bf3895b commit dc67bbc

File tree

77 files changed

+1038
-290
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+1038
-290
lines changed

CHANGELOG.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,33 @@
11
# Changelog
22

3-
## 0.13.2 - TBD
3+
## 0.14.0 - TBD
44
### Enhancements
5+
- Added support for DBN encoding version 2 (DBNv2), affecting `SymbolMappingMsg`,
6+
`InstrumentDefMsg`, and `Metadata`
7+
- Version 1 structs can be converted to version 2 structs with the `ToV2()` method
8+
- Added `symbol_cstr_len` field to `Metadata` to indicate the length of fixed symbol
9+
strings
10+
- Added `stype_in` and `stype_out` fields to `SymbolMappingMsg` to provide more context
11+
with live symbology updates
12+
- Added `IndexTs` methods to every record type which returns the primary timestamp
13+
- Added `VersionUpgradePolicy` enum to allow specifying how to handle decoding records
14+
from prior DBN versions
15+
- Added `InstrumentDefMsgV2` and `SymbolMappingMsgV2` type aliases
16+
- Added `kDbnVersion` constant for current DBN version
17+
- Added `kSymbolCstrLen`, `kSymbolCstrLenV1`, and `kSymbolCstrLenV2` constants for the
18+
length of fixed-length symbol strings in different DBN versions
519
- Added new publisher values in preparation for IFEU.IMPACT and NDEX.IMPACT datasets
20+
- Added `kMaxRecordLen` constant for the the length of the largest record type
21+
22+
### Breaking changes
23+
- The old `InstrumentDefMsg` is now `InstrumentDefMsgV1` in `compat.hpp`
24+
- The old `SymbolMappingMsg` is now `SymbolMappingMsgV1` in `compat.hpp`
25+
- Converted the following enums to enum classes to allow safely adding new variants:
26+
`SecurityUpdateAction` and `SType`
27+
- Renamed `dummy` to `reserved` in `InstrumentDefMsg`
28+
- Removed `reserved2`, `reserved3`, `reserved4`, and `reserved5` from `InstrumentDefMsg`
29+
- Moved position of `strike_price` within `InstrumentDefMsg`
30+
- Removed deprecated `SecurityUpdateAction::Invalid` variant
631

732
## 0.13.1 - 2023-10-23
833
### Enhancements

cmake/SourcesAndHeaders.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(headers
22
include/databento/batch.hpp
3+
include/databento/compat.hpp
34
include/databento/constants.hpp
45
include/databento/datetime.hpp
56
include/databento/dbn.hpp
@@ -34,6 +35,7 @@ set(headers
3435

3536
set(sources
3637
src/batch.cpp
38+
src/compat.cpp
3739
src/datetime.cpp
3840
src/dbn.cpp
3941
src/dbn_decoder.cpp

include/databento/compat.hpp

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
// Record definitions from previous DBN versions and helper functions.
2+
#pragma once
3+
4+
#include <cstddef> // size_t
5+
#include <cstdint>
6+
7+
#include "databento/constants.hpp" // kSymbolCstrLen
8+
#include "databento/datetime.hpp" // UnixNanos
9+
#include "databento/enums.hpp"
10+
#include "databento/record.hpp"
11+
12+
namespace databento {
13+
static constexpr std::size_t kSymbolCstrLenV1 = 22;
14+
static constexpr std::size_t kSymbolCstrLenV2 = kSymbolCstrLen;
15+
16+
constexpr std::size_t VersionSymbolCstrLen(std::uint8_t version) {
17+
if (version < 2) {
18+
return kSymbolCstrLenV1;
19+
}
20+
return kSymbolCstrLenV2;
21+
}
22+
23+
using InstrumentDefMsgV2 = InstrumentDefMsg;
24+
using SymbolMappingMsgV2 = SymbolMappingMsg;
25+
26+
// DBN version 1 instrument definition.
27+
struct InstrumentDefMsgV1 {
28+
static bool HasRType(RType rtype) { return rtype == RType::InstrumentDef; }
29+
30+
InstrumentDefMsgV2 ToV2() const;
31+
const char* Currency() const { return currency.data(); }
32+
const char* SettlCurrency() const { return settl_currency.data(); }
33+
const char* SecSubType() const { return secsubtype.data(); }
34+
const char* RawSymbol() const { return raw_symbol.data(); }
35+
const char* Group() const { return group.data(); }
36+
const char* Exchange() const { return exchange.data(); }
37+
const char* Asset() const { return asset.data(); }
38+
const char* Cfi() const { return cfi.data(); }
39+
const char* SecurityType() const { return security_type.data(); }
40+
const char* UnitOfMeasure() const { return unit_of_measure.data(); }
41+
const char* Underlying() const { return underlying.data(); }
42+
const char* StrikePriceCurrency() const {
43+
return strike_price_currency.data();
44+
}
45+
46+
RecordHeader hd;
47+
UnixNanos ts_recv;
48+
std::int64_t min_price_increment;
49+
std::int64_t display_factor;
50+
UnixNanos expiration;
51+
UnixNanos activation;
52+
std::int64_t high_limit_price;
53+
std::int64_t low_limit_price;
54+
std::int64_t max_price_variation;
55+
std::int64_t trading_reference_price;
56+
std::int64_t unit_of_measure_qty;
57+
std::int64_t min_price_increment_amount;
58+
std::int64_t price_ratio;
59+
std::int32_t inst_attrib_value;
60+
std::uint32_t underlying_id;
61+
std::uint32_t raw_instrument_id;
62+
std::int32_t market_depth_implied;
63+
std::int32_t market_depth;
64+
std::uint32_t market_segment_id;
65+
std::uint32_t max_trade_vol;
66+
std::int32_t min_lot_size;
67+
std::int32_t min_lot_size_block;
68+
std::int32_t min_lot_size_round_lot;
69+
std::uint32_t min_trade_vol;
70+
std::array<char, 4> _reserved2;
71+
std::int32_t contract_multiplier;
72+
std::int32_t decay_quantity;
73+
std::int32_t original_contract_size;
74+
std::array<char, 4> _reserved3;
75+
std::uint16_t trading_reference_date;
76+
std::int16_t appl_id;
77+
std::uint16_t maturity_year;
78+
std::uint16_t decay_start_date;
79+
std::uint16_t channel_id;
80+
std::array<char, 4> currency;
81+
std::array<char, 4> settl_currency;
82+
std::array<char, 6> secsubtype;
83+
std::array<char, kSymbolCstrLenV1> raw_symbol;
84+
std::array<char, 21> group;
85+
std::array<char, 5> exchange;
86+
std::array<char, 7> asset;
87+
std::array<char, 7> cfi;
88+
std::array<char, 7> security_type;
89+
std::array<char, 31> unit_of_measure;
90+
std::array<char, 21> underlying;
91+
std::array<char, 4> strike_price_currency;
92+
InstrumentClass instrument_class;
93+
std::array<char, 2> _reserved4;
94+
std::int64_t strike_price;
95+
std::array<char, 6> _reserved5;
96+
MatchAlgorithm match_algorithm;
97+
std::uint8_t md_security_trading_status;
98+
std::uint8_t main_fraction;
99+
std::uint8_t price_display_format;
100+
std::uint8_t settl_price_type;
101+
std::uint8_t sub_fraction;
102+
std::uint8_t underlying_product;
103+
SecurityUpdateAction security_update_action;
104+
std::uint8_t maturity_month;
105+
std::uint8_t maturity_day;
106+
std::uint8_t maturity_week;
107+
UserDefinedInstrument user_defined_instrument;
108+
std::int8_t contract_multiplier_unit;
109+
std::int8_t flow_schedule_type;
110+
std::uint8_t tick_rule;
111+
// padding for alignment
112+
std::array<char, 3> dummy;
113+
};
114+
static_assert(sizeof(InstrumentDefMsgV1) == 360);
115+
116+
/// A symbol mapping message.
117+
struct SymbolMappingMsgV1 {
118+
static bool HasRType(RType rtype) { return rtype == RType::SymbolMapping; }
119+
120+
SymbolMappingMsgV2 ToV2() const;
121+
const char* STypeInSymbol() const { return stype_in_symbol.data(); }
122+
const char* STypeOutSymbol() const { return stype_out_symbol.data(); }
123+
124+
RecordHeader hd;
125+
std::array<char, kSymbolCstrLenV1> stype_in_symbol;
126+
std::array<char, kSymbolCstrLenV1> stype_out_symbol;
127+
// padding for alignment
128+
std::array<char, 4> dummy;
129+
UnixNanos start_ts;
130+
UnixNanos end_ts;
131+
};
132+
static_assert(sizeof(SymbolMappingMsgV1) == 80);
133+
134+
bool operator==(const InstrumentDefMsgV1& lhs, const InstrumentDefMsgV1& rhs);
135+
inline bool operator!=(const InstrumentDefMsgV1& lhs,
136+
const InstrumentDefMsgV1& rhs) {
137+
return !(lhs == rhs);
138+
}
139+
inline bool operator==(const SymbolMappingMsgV1& lhs,
140+
const SymbolMappingMsgV1& rhs) {
141+
return std::tie(lhs.hd, lhs.stype_in_symbol, lhs.stype_out_symbol,
142+
lhs.start_ts, lhs.end_ts) ==
143+
std::tie(rhs.hd, rhs.stype_in_symbol, rhs.stype_out_symbol,
144+
rhs.start_ts, rhs.end_ts);
145+
}
146+
inline bool operator!=(const SymbolMappingMsgV1& lhs,
147+
const SymbolMappingMsgV1& rhs) {
148+
return !(lhs == rhs);
149+
}
150+
std::string ToString(const InstrumentDefMsgV1& instr_def_msg);
151+
std::ostream& operator<<(std::ostream& stream,
152+
const InstrumentDefMsgV1& instr_def_msg);
153+
std::string ToString(const SymbolMappingMsgV1& symbol_mapping_msg);
154+
std::ostream& operator<<(std::ostream& stream,
155+
const SymbolMappingMsgV1& symbol_mapping_msg);
156+
} // namespace databento

include/databento/constants.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ static constexpr auto kUndefStatQuantity =
2020
// The sentinel value for an unset or null timestamp.
2121
static constexpr auto kUndefTimestamp =
2222
std::numeric_limits<std::uint64_t>::max();
23+
// The current version of the DBN encoding.
24+
static constexpr auto kDbnVersion = 2;
25+
// The length of fixed-length symbol strings.
26+
static constexpr auto kSymbolCstrLen = 71;
2327

2428
// This is not necessarily a comprehensive list of available datasets. Please
2529
// use `Historical.MetadataListDatasets` to retrieve an up-to-date list.

include/databento/dbn.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ struct Metadata {
5656
SType stype_out;
5757
// Whether the records contain an appended send timestamp.
5858
bool ts_out;
59+
// The length in bytes of fixed-length symbol strings, including a null
60+
// terminator byte.
61+
std::size_t symbol_cstr_len;
5962
// The original query input symbols from the request.
6063
std::vector<std::string> symbols;
6164
// Symbols that did not resolve for _at least one day_ in the query time
@@ -91,6 +94,7 @@ inline bool operator==(const Metadata& lhs, const Metadata& rhs) {
9194
(lhs.has_mixed_stype_in ? rhs.has_mixed_stype_in
9295
: lhs.stype_in == rhs.stype_in) &&
9396
lhs.stype_out == rhs.stype_out && lhs.ts_out == rhs.ts_out &&
97+
lhs.symbol_cstr_len == rhs.symbol_cstr_len &&
9498
lhs.symbols == rhs.symbols && lhs.partial == rhs.partial &&
9599
lhs.not_found == rhs.not_found && lhs.mappings == rhs.mappings;
96100
}

include/databento/dbn_decoder.hpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,50 +8,67 @@
88
#include "databento/dbn.hpp"
99
#include "databento/detail/file_stream.hpp"
1010
#include "databento/detail/shared_channel.hpp"
11+
#include "databento/enums.hpp" // Upgrade Policy
1112
#include "databento/ireadable.hpp"
1213
#include "databento/record.hpp"
1314

1415
namespace databento {
15-
// DBN decoder. Use either the DbnChannelDecoder or DbnFileDecoder
16-
// specialization.
16+
// DBN decoder. Set upgrade_policy to control how DBN version 1 data should be
17+
// handled. Currently it defaults to returning this data as-is, but this default
18+
// will change in a future version.
1719
class DbnDecoder {
1820
public:
1921
explicit DbnDecoder(detail::SharedChannel channel);
2022
explicit DbnDecoder(detail::FileStream file_stream);
2123
explicit DbnDecoder(std::unique_ptr<IReadable> input);
24+
DbnDecoder(std::unique_ptr<IReadable> input,
25+
VersionUpgradePolicy upgrade_policy);
2226

2327
// Decode metadata from the given buffer.
2428
static Metadata DecodeMetadata(const std::vector<std::uint8_t>& buffer);
2529
static std::pair<std::uint8_t, std::size_t> DecodeMetadataVersionAndSize(
2630
const std::uint8_t* buffer, std::size_t size);
2731
static Metadata DecodeMetadataFields(std::uint8_t version,
2832
const std::vector<std::uint8_t>& buffer);
33+
// Decodes a record possibly applying upgrading the data according to the
34+
// given version and upgrade policy. If an upgrade is applied,
35+
// compat_buffer is modified.
36+
static Record DecodeRecordCompat(
37+
std::uint8_t version, VersionUpgradePolicy upgrade_policy,
38+
std::array<std::uint8_t, kMaxRecordLen>* compat_buffer, Record rec);
2939

30-
// Should only be called once
40+
// Should be called exactly once.
3141
Metadata DecodeMetadata();
3242
// Lifetime of returned Record is until next call to DecodeRecord. Returns
3343
// nullptr once the end of the input has been reached.
3444
const Record* DecodeRecord();
3545

3646
private:
3747
static std::string DecodeSymbol(
48+
std::size_t symbol_cstr_len,
3849
std::vector<std::uint8_t>::const_iterator& buffer_it);
3950
static std::vector<std::string> DecodeRepeatedSymbol(
51+
std::size_t symbol_cstr_len,
4052
std::vector<std::uint8_t>::const_iterator& buffer_it,
4153
std::vector<std::uint8_t>::const_iterator buffer_end_it);
4254
static std::vector<SymbolMapping> DecodeSymbolMappings(
55+
std::size_t symbol_cstr_len,
4356
std::vector<std::uint8_t>::const_iterator& buffer_it,
4457
std::vector<std::uint8_t>::const_iterator buffer_end_it);
4558
static SymbolMapping DecodeSymbolMapping(
59+
std::size_t symbol_cstr_len,
4660
std::vector<std::uint8_t>::const_iterator& buffer_it,
4761
std::vector<std::uint8_t>::const_iterator buffer_end_it);
4862
bool DetectCompression();
4963
std::size_t FillBuffer();
5064
RecordHeader* BufferRecordHeader();
5165

66+
std::uint8_t version_{};
67+
VersionUpgradePolicy upgrade_policy_;
5268
std::unique_ptr<IReadable> input_;
53-
std::vector<std::uint8_t> buffer_;
69+
std::vector<std::uint8_t> read_buffer_;
5470
std::size_t buffer_idx_{};
71+
std::array<std::uint8_t, kMaxRecordLen> compat_buffer_{};
5572
Record current_record_{nullptr};
5673
};
5774
} // namespace databento

include/databento/dbn_file_store.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
#pragma once
22

3-
#include <memory> // unique_ptr
43
#include <string>
54

65
#include "databento/dbn.hpp" // Metadata
76
#include "databento/dbn_decoder.hpp" // DbnDecoder
7+
#include "databento/enums.hpp" // VersionUpgradePolicy
88
#include "databento/timeseries.hpp" // MetadataCallback, RecordCallback
99

1010
namespace databento {
1111
// A reader for DBN files.
1212
class DbnFileStore {
1313
public:
1414
explicit DbnFileStore(const std::string& file_path);
15+
DbnFileStore(const std::string& file_path,
16+
VersionUpgradePolicy upgrade_policy);
1517

1618
void Replay(const MetadataCallback& metadata_callback,
1719
const RecordCallback& record_callback);

0 commit comments

Comments
 (0)