Skip to content

Commit 4657dcd

Browse files
authored
fix: make Avro reader buffer size configurable (#447)
Add ReaderProperties::kAvroBufferSize property to allow users to configure the Avro input stream buffer size instead of using a hardcoded 1MB value. Changes: - Add kAvroBufferSize property to ReaderProperties (default: 1MB) - Update AvroReader to use the configurable buffer size - Remove TODO comment about making this configurable
1 parent 239e255 commit 4657dcd

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

src/iceberg/avro/avro_reader.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ class AvroReader::Impl {
8989
read_schema_ = options.projection;
9090

9191
// Open the input stream and adapt to the avro interface.
92-
// TODO(gangwu): make this configurable
93-
constexpr int64_t kDefaultBufferSize = 1024 * 1024;
94-
ICEBERG_ASSIGN_OR_RAISE(auto input_stream,
95-
CreateInputStream(options, kDefaultBufferSize));
92+
ICEBERG_ASSIGN_OR_RAISE(
93+
auto input_stream,
94+
CreateInputStream(options,
95+
options.properties->Get(ReaderProperties::kAvroBufferSize)));
9696

9797
::avro::ValidSchema file_schema;
9898

src/iceberg/file_reader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ class ReaderProperties : public ConfigBase<ReaderProperties> {
8181
/// Default: true (skip GenericDatum for better performance).
8282
inline static Entry<bool> kAvroSkipDatum{"read.avro.skip-datum", true};
8383

84+
/// \brief The buffer size used by Avro input stream.
85+
inline static Entry<int64_t> kAvroBufferSize{"read.avro.buffer-size", 1024 * 1024};
86+
8487
/// \brief Create a default ReaderProperties instance.
8588
static std::unique_ptr<ReaderProperties> default_properties();
8689

src/iceberg/test/avro_test.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,4 +503,27 @@ INSTANTIATE_TEST_SUITE_P(DirectDecoderModes, AvroReaderParameterizedTest,
503503
return info.param ? "DirectDecoder" : "GenericDatum";
504504
});
505505

506+
TEST_F(AvroReaderTest, BufferSizeConfiguration) {
507+
// Test default buffer size
508+
auto properties1 = ReaderProperties::default_properties();
509+
ASSERT_EQ(properties1->Get(ReaderProperties::kAvroBufferSize), 1024 * 1024);
510+
511+
// Test setting custom buffer size
512+
auto properties2 = ReaderProperties::default_properties();
513+
constexpr int64_t kCustomBufferSize = 2 * 1024 * 1024; // 2MB
514+
properties2->Set(ReaderProperties::kAvroBufferSize, kCustomBufferSize);
515+
ASSERT_EQ(properties2->Get(ReaderProperties::kAvroBufferSize), kCustomBufferSize);
516+
517+
// Test setting via FromMap
518+
std::unordered_map<std::string, std::string> config_map = {
519+
{"read.avro.buffer-size", "4194304"} // 4MB
520+
};
521+
auto properties3 = ReaderProperties::FromMap(config_map);
522+
ASSERT_EQ(properties3->Get(ReaderProperties::kAvroBufferSize), 4194304);
523+
524+
// Test that unset returns to default
525+
properties2->Unset(ReaderProperties::kAvroBufferSize);
526+
ASSERT_EQ(properties2->Get(ReaderProperties::kAvroBufferSize), 1024 * 1024);
527+
}
528+
506529
} // namespace iceberg::avro

0 commit comments

Comments
 (0)