Skip to content

Commit c0f1dce

Browse files
authored
fix: dangling span capture in StructLikeAccessor (#423)
1 parent da91626 commit c0f1dce

File tree

8 files changed

+53
-31
lines changed

8 files changed

+53
-31
lines changed

src/iceberg/expression/manifest_evaluator.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -363,12 +363,10 @@ Result<std::unique_ptr<ManifestEvaluator>> ManifestEvaluator::MakePartitionFilte
363363
std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>& spec,
364364
const Schema& schema, bool case_sensitive) {
365365
ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec->PartitionType(schema));
366-
auto field_span = partition_type->fields();
367-
std::vector<SchemaField> fields(field_span.begin(), field_span.end());
368-
auto partition_schema = std::make_shared<Schema>(fields);
369366
ICEBERG_ASSIGN_OR_RAISE(auto rewrite_expr, RewriteNot::Visit(std::move(expr)));
370-
ICEBERG_ASSIGN_OR_RAISE(auto partition_expr,
371-
Binder::Bind(*partition_schema, rewrite_expr, case_sensitive));
367+
ICEBERG_ASSIGN_OR_RAISE(
368+
auto partition_expr,
369+
Binder::Bind(*partition_type->ToSchema(), rewrite_expr, case_sensitive));
372370
return std::unique_ptr<ManifestEvaluator>(
373371
new ManifestEvaluator(std::move(partition_expr)));
374372
}

src/iceberg/expression/residual_evaluator.cc

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "iceberg/schema.h"
2828
#include "iceberg/schema_internal.h"
2929
#include "iceberg/transform.h"
30+
#include "iceberg/type.h"
3031
#include "iceberg/util/macros.h"
3132

3233
namespace iceberg {
@@ -42,8 +43,7 @@ class ResidualVisitor : public BoundVisitor<std::shared_ptr<Expression>> {
4243
const StructLike& partition_data,
4344
bool case_sensitive) {
4445
ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec.PartitionType(schema));
45-
auto partition_schema = FromStructType(std::move(*partition_type), std::nullopt);
46-
return ResidualVisitor(spec, schema, std::move(partition_schema), partition_data,
46+
return ResidualVisitor(spec, schema, std::move(partition_type), partition_data,
4747
case_sensitive);
4848
}
4949

@@ -202,17 +202,17 @@ class ResidualVisitor : public BoundVisitor<std::shared_ptr<Expression>> {
202202

203203
private:
204204
ResidualVisitor(const PartitionSpec& spec, const Schema& schema,
205-
std::unique_ptr<Schema> partition_schema,
205+
std::unique_ptr<StructType> partition_type,
206206
const StructLike& partition_data, bool case_sensitive)
207207
: spec_(spec),
208208
schema_(schema),
209-
partition_schema_(std::move(partition_schema)),
209+
partition_type_(std::move(partition_type)),
210210
partition_data_(partition_data),
211211
case_sensitive_(case_sensitive) {}
212212

213213
const PartitionSpec& spec_;
214214
const Schema& schema_;
215-
std::unique_ptr<Schema> partition_schema_;
215+
std::unique_ptr<StructType> partition_type_;
216216
const StructLike& partition_data_;
217217
bool case_sensitive_;
218218
};
@@ -235,6 +235,7 @@ Result<std::shared_ptr<Expression>> ResidualVisitor::Predicate(
235235
// Not associated with a partition field, can't be evaluated
236236
return pred;
237237
}
238+
auto schema = partition_type_->ToSchema();
238239

239240
for (const auto& part : parts) {
240241
// Check the strict projection
@@ -243,9 +244,8 @@ Result<std::shared_ptr<Expression>> ResidualVisitor::Predicate(
243244
std::shared_ptr<Expression> strict_result = nullptr;
244245

245246
if (strict_projection != nullptr) {
246-
ICEBERG_ASSIGN_OR_RAISE(
247-
auto bound_strict,
248-
strict_projection->Bind(*partition_schema_, case_sensitive_));
247+
ICEBERG_ASSIGN_OR_RAISE(auto bound_strict,
248+
strict_projection->Bind(*schema, case_sensitive_));
249249
if (bound_strict->is_bound_predicate()) {
250250
ICEBERG_ASSIGN_OR_RAISE(
251251
strict_result, BoundVisitor::Predicate(
@@ -268,9 +268,8 @@ Result<std::shared_ptr<Expression>> ResidualVisitor::Predicate(
268268
std::shared_ptr<Expression> inclusive_result = nullptr;
269269

270270
if (inclusive_projection != nullptr) {
271-
ICEBERG_ASSIGN_OR_RAISE(
272-
auto bound_inclusive,
273-
inclusive_projection->Bind(*partition_schema_, case_sensitive_));
271+
ICEBERG_ASSIGN_OR_RAISE(auto bound_inclusive,
272+
inclusive_projection->Bind(*schema, case_sensitive_));
274273

275274
if (bound_inclusive->is_bound_predicate()) {
276275
ICEBERG_ASSIGN_OR_RAISE(

src/iceberg/row/struct_like.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,20 +87,20 @@ StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
8787
return std::get<std::shared_ptr<StructLike>>(first_level_field)->GetField(pos1);
8888
};
8989
} else if (!position_path.empty()) {
90-
accessor_ = [position_path](const StructLike& struct_like) -> Result<Scalar> {
90+
accessor_ = [this](const StructLike& struct_like) -> Result<Scalar> {
9191
std::vector<std::shared_ptr<StructLike>> backups;
9292
const StructLike* current_struct_like = &struct_like;
93-
for (size_t i = 0; i < position_path.size() - 1; ++i) {
93+
for (size_t i = 0; i < position_path_.size() - 1; ++i) {
9494
ICEBERG_ASSIGN_OR_RAISE(auto field,
95-
current_struct_like->GetField(position_path[i]));
95+
current_struct_like->GetField(position_path_[i]));
9696
if (!std::holds_alternative<std::shared_ptr<StructLike>>(field)) {
9797
return InvalidSchema("Encountered non-struct in the position path [{}]",
98-
position_path);
98+
position_path_);
9999
}
100100
backups.push_back(std::get<std::shared_ptr<StructLike>>(field));
101101
current_struct_like = backups.back().get();
102102
}
103-
return current_struct_like->GetField(position_path.back());
103+
return current_struct_like->GetField(position_path_.back());
104104
};
105105
} else {
106106
accessor_ = [](const StructLike&) -> Result<Scalar> {

src/iceberg/schema.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,22 @@ class ICEBERG_EXPORT Schema : public StructType {
5959

6060
std::string ToString() const override;
6161

62-
/// \brief Find the SchemaField by field name.
62+
/// \brief Recursively find the SchemaField by field name.
6363
///
6464
/// Short names for maps and lists are included for any name that does not conflict with
6565
/// a canonical name. For example, a list, 'l', of structs with field 'x' will produce
66-
/// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', if its
66+
/// short name 'l.x' in addition to canonical name 'l.element.x'. A map 'm', if its
6767
/// value include a structs with field 'x' wil produce short name 'm.x' in addition to
68-
/// canonical name 'm.value.x'
68+
/// canonical name 'm.value.x'.
6969
/// FIXME: Currently only handles ASCII lowercase conversion; extend to support
7070
/// non-ASCII characters (e.g., using std::towlower or ICU)
7171
Result<std::optional<std::reference_wrapper<const SchemaField>>> FindFieldByName(
7272
std::string_view name, bool case_sensitive = true) const;
7373

74-
/// \brief Find the SchemaField by field id.
74+
/// \brief Recursively find the SchemaField by field id.
75+
///
76+
/// \param field_id The id of the field to get the accessor for.
77+
/// \return The field with the given id, or std::nullopt if not found.
7578
Result<std::optional<std::reference_wrapper<const SchemaField>>> FindFieldById(
7679
int32_t field_id) const;
7780

src/iceberg/table_scan.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,13 @@ Result<std::vector<std::shared_ptr<FileScanTask>>> DataTableScan::PlanFiles() co
273273

274274
// Get the table schema and partition type
275275
ICEBERG_ASSIGN_OR_RAISE(auto current_schema, context_.table_metadata->Schema());
276-
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructType> partition_schema,
276+
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructType> partition_type,
277277
partition_spec->PartitionType(*current_schema));
278278

279279
for (const auto& manifest_file : manifest_files) {
280280
ICEBERG_ASSIGN_OR_RAISE(
281281
auto manifest_reader,
282-
ManifestReader::Make(manifest_file, file_io_, partition_schema));
282+
ManifestReader::Make(manifest_file, file_io_, partition_type));
283283
ICEBERG_ASSIGN_OR_RAISE(auto manifests, manifest_reader->Entries());
284284

285285
// TODO(gty404): filter manifests using partition spec and filter expression

src/iceberg/test/manifest_writer_versions_test.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,9 @@ class ManifestWriterVersionsTest : public ::testing::Test {
241241
}
242242

243243
std::vector<ManifestEntry> ReadManifest(const ManifestFile& manifest_file) {
244-
auto partition_schema_result = spec_->PartitionType(*schema_);
245-
EXPECT_THAT(partition_schema_result, IsOk());
246-
std::shared_ptr<StructType> partition_type =
247-
std::move(partition_schema_result.value());
244+
auto partition_type_result = spec_->PartitionType(*schema_);
245+
EXPECT_THAT(partition_type_result, IsOk());
246+
std::shared_ptr<StructType> partition_type = std::move(partition_type_result.value());
248247
auto reader_result = ManifestReader::Make(manifest_file, file_io_, partition_type);
249248
EXPECT_THAT(reader_result, IsOk());
250249

src/iceberg/type.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <utility>
2626

2727
#include "iceberg/exception.h"
28+
#include "iceberg/schema.h"
2829
#include "iceberg/util/formatter.h" // IWYU pragma: keep
2930
#include "iceberg/util/macros.h"
3031
#include "iceberg/util/string_util.h"
@@ -48,6 +49,7 @@ std::string StructType::ToString() const {
4849
repr += ">";
4950
return repr;
5051
}
52+
5153
std::span<const SchemaField> StructType::fields() const { return fields_; }
5254
Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldById(
5355
int32_t field_id) const {
@@ -56,13 +58,15 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldById(
5658
if (it == field_by_id.get().end()) return std::nullopt;
5759
return it->second;
5860
}
61+
5962
Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldByIndex(
6063
int32_t index) const {
6164
if (index < 0 || static_cast<size_t>(index) >= fields_.size()) {
6265
return InvalidArgument("Invalid index {} to get field from struct", index);
6366
}
6467
return fields_[index];
6568
}
69+
6670
Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldByName(
6771
std::string_view name, bool case_sensitive) const {
6872
if (case_sensitive) {
@@ -81,13 +85,19 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldByNam
8185
}
8286
return std::nullopt;
8387
}
88+
89+
std::unique_ptr<Schema> StructType::ToSchema() const {
90+
return std::make_unique<Schema>(fields_);
91+
}
92+
8493
bool StructType::Equals(const Type& other) const {
8594
if (other.type_id() != TypeId::kStruct) {
8695
return false;
8796
}
8897
const auto& struct_ = static_cast<const StructType&>(other);
8998
return fields_ == struct_.fields_;
9099
}
100+
91101
Result<std::unordered_map<int32_t, StructType::SchemaFieldConstRef>>
92102
StructType::InitFieldById(const StructType& self) {
93103
std::unordered_map<int32_t, SchemaFieldConstRef> field_by_id;
@@ -100,6 +110,7 @@ StructType::InitFieldById(const StructType& self) {
100110
}
101111
return field_by_id;
102112
}
113+
103114
Result<std::unordered_map<std::string_view, StructType::SchemaFieldConstRef>>
104115
StructType::InitFieldByName(const StructType& self) {
105116
std::unordered_map<std::string_view, StructType::SchemaFieldConstRef> field_by_name;
@@ -113,6 +124,7 @@ StructType::InitFieldByName(const StructType& self) {
113124
}
114125
return field_by_name;
115126
}
127+
116128
Result<std::unordered_map<std::string, StructType::SchemaFieldConstRef>>
117129
StructType::InitFieldByLowerCaseName(const StructType& self) {
118130
std::unordered_map<std::string, SchemaFieldConstRef> field_by_lowercase_name;
@@ -146,6 +158,7 @@ std::string ListType::ToString() const {
146158
repr += ">";
147159
return repr;
148160
}
161+
149162
std::span<const SchemaField> ListType::fields() const { return {&element_, 1}; }
150163
Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldById(
151164
int32_t field_id) const {
@@ -154,13 +167,15 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldById(
154167
}
155168
return std::nullopt;
156169
}
170+
157171
Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldByIndex(
158172
int index) const {
159173
if (index == 0) {
160174
return std::cref(element_);
161175
}
162176
return InvalidArgument("Invalid index {} to get field from list", index);
163177
}
178+
164179
Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldByName(
165180
std::string_view name, bool case_sensitive) const {
166181
if (case_sensitive) {
@@ -174,6 +189,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> ListType::GetFieldByName(
174189
}
175190
return std::nullopt;
176191
}
192+
177193
bool ListType::Equals(const Type& other) const {
178194
if (other.type_id() != TypeId::kList) {
179195
return false;
@@ -195,6 +211,7 @@ MapType::MapType(SchemaField key, SchemaField value)
195211
const SchemaField& MapType::key() const { return fields_[0]; }
196212
const SchemaField& MapType::value() const { return fields_[1]; }
197213
TypeId MapType::type_id() const { return kTypeId; }
214+
198215
std::string MapType::ToString() const {
199216
// XXX: work around Clang/libc++: "<{}>" in a format string appears to get
200217
// parsed as {<>} or something; split up the format string to avoid that
@@ -204,6 +221,7 @@ std::string MapType::ToString() const {
204221
repr += ">";
205222
return repr;
206223
}
224+
207225
std::span<const SchemaField> MapType::fields() const { return fields_; }
208226
Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldById(
209227
int32_t field_id) const {
@@ -214,6 +232,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldById(
214232
}
215233
return std::nullopt;
216234
}
235+
217236
Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldByIndex(
218237
int32_t index) const {
219238
if (index == 0) {
@@ -223,6 +242,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldByIndex(
223242
}
224243
return InvalidArgument("Invalid index {} to get field from map", index);
225244
}
245+
226246
Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldByName(
227247
std::string_view name, bool case_sensitive) const {
228248
if (case_sensitive) {
@@ -241,6 +261,7 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> MapType::GetFieldByName(
241261
}
242262
return std::nullopt;
243263
}
264+
244265
bool MapType::Equals(const Type& other) const {
245266
if (other.type_id() != TypeId::kMap) {
246267
return false;

src/iceberg/type.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ class ICEBERG_EXPORT StructType : public NestedType {
123123
std::string_view name, bool case_sensitive) const override;
124124
using NestedType::GetFieldByName;
125125

126+
std::unique_ptr<Schema> ToSchema() const;
127+
126128
protected:
127129
bool Equals(const Type& other) const override;
128130

0 commit comments

Comments
 (0)