diff --git a/src/iceberg/expression/residual_evaluator.cc b/src/iceberg/expression/residual_evaluator.cc index cf8f98120..3cdf2914a 100644 --- a/src/iceberg/expression/residual_evaluator.cc +++ b/src/iceberg/expression/residual_evaluator.cc @@ -308,8 +308,7 @@ class UnpartitionedResidualEvaluator : public ResidualEvaluator { private: // Store an empty schema to avoid dangling reference when passing to base class - inline static const std::shared_ptr kEmptySchema_ = - std::make_shared(std::vector{}, std::nullopt); + inline static const std::shared_ptr kEmptySchema_ = Schema::EmptySchema(); }; } // namespace diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index 9d955d46b..d175bb6dd 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -308,7 +308,7 @@ nlohmann::json ToJson(const Type& type) { nlohmann::json ToJson(const Schema& schema) { nlohmann::json json = ToJson(static_cast(schema)); - SetOptionalField(json, kSchemaId, schema.schema_id()); + json[kSchemaId] = schema.schema_id(); // TODO(gangwu): add identifier-field-ids. return json; } @@ -466,7 +466,8 @@ Result> FieldFromJson(const nlohmann::json& json) { } Result> SchemaFromJson(const nlohmann::json& json) { - ICEBERG_ASSIGN_OR_RAISE(auto schema_id, GetJsonValueOptional(json, kSchemaId)); + ICEBERG_ASSIGN_OR_RAISE(auto schema_id_opt, + GetJsonValueOptional(json, kSchemaId)); ICEBERG_ASSIGN_OR_RAISE(auto type, TypeFromJson(json)); if (type->type_id() != TypeId::kStruct) [[unlikely]] { @@ -474,6 +475,7 @@ Result> SchemaFromJson(const nlohmann::json& json) { } auto& struct_type = static_cast(*type); + auto schema_id = schema_id_opt.value_or(Schema::kInitialSchemaId); return FromStructType(std::move(struct_type), schema_id); } @@ -762,7 +764,7 @@ nlohmann::json ToJson(const TableMetadata& table_metadata) { } // write the current schema ID and schema list - SetOptionalField(json, kCurrentSchemaId, table_metadata.current_schema_id); + json[kCurrentSchemaId] = table_metadata.current_schema_id; json[kSchemas] = ToJsonList(table_metadata.schemas); // for older readers, continue writing the default spec as "partition-spec" @@ -824,8 +826,7 @@ namespace { /// /// \return The current schema or parse error. Result> ParseSchemas( - const nlohmann::json& json, int8_t format_version, - std::optional& current_schema_id, + const nlohmann::json& json, int8_t format_version, int32_t& current_schema_id, std::vector>& schemas) { std::shared_ptr current_schema; if (json.contains(kSchemas)) { @@ -848,7 +849,7 @@ Result> ParseSchemas( } if (!current_schema) { return JsonParseError("Cannot find schema with {}={} from {}", kCurrentSchemaId, - current_schema_id.value(), SafeDumpJson(schema_array)); + current_schema_id, SafeDumpJson(schema_array)); } } else { if (format_version != 1) { diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc index afe6be1e7..bc325251d 100644 --- a/src/iceberg/schema.cc +++ b/src/iceberg/schema.cc @@ -34,14 +34,14 @@ namespace iceberg { -Schema::Schema(std::vector fields, std::optional schema_id, +Schema::Schema(std::vector fields, int32_t schema_id, std::vector identifier_field_ids) : StructType(std::move(fields)), schema_id_(schema_id), identifier_field_ids_(std::move(identifier_field_ids)) {} Result> Schema::Make( - std::vector fields, std::optional schema_id, + std::vector fields, int32_t schema_id, const std::vector& identifier_field_names) { auto schema = std::make_unique(std::move(fields), schema_id); @@ -57,7 +57,13 @@ Result> Schema::Make( return schema; } -std::optional Schema::schema_id() const { return schema_id_; } +const std::shared_ptr& Schema::EmptySchema() { + static const auto empty_schema = + std::make_shared(std::vector{}, kInitialSchemaId); + return empty_schema; +} + +int32_t Schema::schema_id() const { return schema_id_; } std::string Schema::ToString() const { std::string repr = "schema<"; @@ -196,7 +202,7 @@ Result> Schema::Select(std::span name auto pruned_type, visitor.Visit(std::shared_ptr(ToStructType(*this)))); if (!pruned_type) { - return std::make_unique(std::vector{}, std::nullopt); + return std::make_unique(std::vector{}, kInitialSchemaId); } if (pruned_type->type_id() != TypeId::kStruct) { @@ -214,7 +220,7 @@ Result> Schema::Project( auto project_type, visitor.Visit(std::shared_ptr(ToStructType(*this)))); if (!project_type) { - return std::make_unique(std::vector{}, std::nullopt); + return std::make_unique(std::vector{}, kInitialSchemaId); } if (project_type->type_id() != TypeId::kStruct) { diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index a2686c9f7..fc0825695 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -52,8 +52,7 @@ class ICEBERG_EXPORT Schema : public StructType { /// \brief Special value to select all columns from manifest files. static constexpr std::string_view kAllColumns = "*"; - explicit Schema(std::vector fields, - std::optional schema_id = std::nullopt, + explicit Schema(std::vector fields, int32_t schema_id = kInitialSchemaId, std::vector identifier_field_ids = {}); /// \brief Create a schema. @@ -63,14 +62,19 @@ class ICEBERG_EXPORT Schema : public StructType { /// \param identifier_field_names Canonical names of fields that uniquely identify rows /// in the table (default: empty). \return A new Schema instance or Status if failed. static Result> Make( - std::vector fields, std::optional schema_id = std::nullopt, + std::vector fields, int32_t schema_id = kInitialSchemaId, const std::vector& identifier_field_names = {}); + /// \brief Get an empty schema. + /// + /// An empty schema has no fields and a schema ID of 0. + static const std::shared_ptr& EmptySchema(); + /// \brief Get the schema ID. /// /// A schema is identified by a unique ID for the purposes of schema /// evolution. - std::optional schema_id() const; + int32_t schema_id() const; std::string ToString() const override; @@ -178,7 +182,7 @@ class ICEBERG_EXPORT Schema : public StructType { const Schema&); static Result InitHighestFieldId(const Schema&); - const std::optional schema_id_; + const int32_t schema_id_; /// Field IDs that uniquely identify rows in the table. std::vector identifier_field_ids_; /// Mapping from field id to field. diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc index e020a9b7b..dedb603e2 100644 --- a/src/iceberg/schema_internal.cc +++ b/src/iceberg/schema_internal.cc @@ -304,12 +304,13 @@ Result> FromArrowSchema(const ArrowSchema& schema) { } // namespace std::unique_ptr FromStructType(StructType&& struct_type, - std::optional schema_id) { + std::optional schema_id_opt) { std::vector fields; fields.reserve(struct_type.fields().size()); for (auto& field : struct_type.fields()) { fields.emplace_back(std::move(field)); } + auto schema_id = schema_id_opt.value_or(Schema::kInitialSchemaId); return std::make_unique(std::move(fields), schema_id); } diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 095f91d14..114f3d0b1 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -233,13 +233,12 @@ Result> TableMetadata::Schema() const { return SchemaById(current_schema_id); } -Result> TableMetadata::SchemaById( - std::optional schema_id) const { +Result> TableMetadata::SchemaById(int32_t schema_id) const { auto iter = std::ranges::find_if(schemas, [schema_id](const auto& schema) { return schema != nullptr && schema->schema_id() == schema_id; }); if (iter == schemas.end()) { - return NotFound("Schema with ID {} is not found", schema_id.value_or(-1)); + return NotFound("Schema with ID {} is not found", schema_id); } return *iter; } @@ -359,11 +358,8 @@ Result TableMetadataCache::GetSnapshotsById Result TableMetadataCache::InitSchemasMap( const TableMetadata* metadata) { - return metadata->schemas | std::views::filter([](const auto& schema) { - return schema->schema_id().has_value(); - }) | - std::views::transform([](const auto& schema) { - return std::make_pair(schema->schema_id().value(), schema); + return metadata->schemas | std::views::transform([](const auto& schema) { + return std::make_pair(schema->schema_id(), schema); }) | std::ranges::to(); } @@ -548,9 +544,7 @@ class TableMetadataBuilder::Impl { : base_(base_metadata), metadata_(*base_metadata) { // Initialize index maps from base metadata for (const auto& schema : metadata_.schemas) { - if (schema->schema_id().has_value()) { - schemas_by_id_.emplace(schema->schema_id().value(), schema); - } + schemas_by_id_.emplace(schema->schema_id(), schema); } for (const auto& spec : metadata_.partition_specs) { @@ -920,14 +914,13 @@ Status TableMetadataBuilder::Impl::SetCurrentSchema(int32_t schema_id) { Status TableMetadataBuilder::Impl::RemoveSchemas( const std::unordered_set& schema_ids) { - auto current_schema_id = metadata_.current_schema_id.value_or(Schema::kInitialSchemaId); + auto current_schema_id = metadata_.current_schema_id; ICEBERG_PRECHECK(!schema_ids.contains(current_schema_id), "Cannot remove current schema: {}", current_schema_id); if (!schema_ids.empty()) { metadata_.schemas = metadata_.schemas | std::views::filter([&](const auto& schema) { - return schema->schema_id().has_value() && - !schema_ids.contains(schema->schema_id().value()); + return !schema_ids.contains(schema->schema_id()); }) | std::ranges::to>>(); changes_.push_back(std::make_unique(schema_ids)); @@ -999,7 +992,7 @@ Result> TableMetadataBuilder::Impl::Build() { std::chrono::system_clock::now().time_since_epoch())}; } - auto current_schema_id = metadata_.current_schema_id.value_or(Schema::kInitialSchemaId); + auto current_schema_id = metadata_.current_schema_id; auto schema_it = schemas_by_id_.find(current_schema_id); ICEBERG_PRECHECK(schema_it != schemas_by_id_.end(), "Current schema ID {} not found in schemas", current_schema_id); @@ -1072,9 +1065,9 @@ int32_t TableMetadataBuilder::Impl::ReuseOrCreateNewPartitionSpecId( int32_t TableMetadataBuilder::Impl::ReuseOrCreateNewSchemaId( const Schema& new_schema) const { // if the schema already exists, use its id; otherwise use the highest id + 1 - auto new_schema_id = metadata_.current_schema_id.value_or(Schema::kInitialSchemaId); + auto new_schema_id = metadata_.current_schema_id; for (auto& schema : metadata_.schemas) { - auto schema_id = schema->schema_id().value_or(Schema::kInitialSchemaId); + auto schema_id = schema->schema_id(); if (schema->SameSchema(new_schema)) { return schema_id; } else if (new_schema_id <= schema_id) { diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 068ab3197..3e3eb9c70 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -95,7 +95,7 @@ struct ICEBERG_EXPORT TableMetadata { /// A list of schemas std::vector> schemas; /// ID of the table's current schema - std::optional current_schema_id; + int32_t current_schema_id; /// A list of partition specs std::vector> partition_specs; /// ID of the current partition spec that writers should use by default @@ -136,8 +136,7 @@ struct ICEBERG_EXPORT TableMetadata { /// \brief Get the current schema, return NotFoundError if not found Result> Schema() const; /// \brief Get the current schema by ID, return NotFoundError if not found - Result> SchemaById( - std::optional schema_id) const; + Result> SchemaById(int32_t schema_id) const; /// \brief Get the current partition spec, return NotFoundError if not found Result> PartitionSpec() const; /// \brief Get the current sort order, return NotFoundError if not found diff --git a/src/iceberg/table_requirement.cc b/src/iceberg/table_requirement.cc index 3c0a01e48..496289280 100644 --- a/src/iceberg/table_requirement.cc +++ b/src/iceberg/table_requirement.cc @@ -90,15 +90,10 @@ Status AssertCurrentSchemaID::Validate(const TableMetadata* base) const { return CommitFailed("Requirement failed: current table metadata is missing"); } - if (!base->current_schema_id.has_value()) { - return CommitFailed( - "Requirement failed: current schema ID is not set in table metadata"); - } - - if (base->current_schema_id.value() != schema_id_) { + if (base->current_schema_id != schema_id_) { return CommitFailed( "Requirement failed: current schema ID does not match, expected {} != {}", - schema_id_, base->current_schema_id.value()); + schema_id_, base->current_schema_id); } return {}; diff --git a/src/iceberg/table_requirements.cc b/src/iceberg/table_requirements.cc index 8222ded31..3268980e5 100644 --- a/src/iceberg/table_requirements.cc +++ b/src/iceberg/table_requirements.cc @@ -51,8 +51,8 @@ void TableUpdateContext::RequireLastAssignedFieldIdUnchanged() { void TableUpdateContext::RequireCurrentSchemaIdUnchanged() { if (!added_current_schema_id_) { if (base_ != nullptr && !is_replace_) { - AddRequirement(std::make_unique( - base_->current_schema_id.value())); + AddRequirement( + std::make_unique(base_->current_schema_id)); } added_current_schema_id_ = true; } diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc index 6c1828454..6918de825 100644 --- a/src/iceberg/table_scan.cc +++ b/src/iceberg/table_scan.cc @@ -216,8 +216,7 @@ Result> TableScanBuilder::Build() { if (!context_.projected_schema) { const auto& snapshot = context_.snapshot; - auto schema_id = - snapshot->schema_id ? snapshot->schema_id : table_metadata->current_schema_id; + auto schema_id = table_metadata->current_schema_id; ICEBERG_ASSIGN_OR_RAISE(auto schema, table_metadata->SchemaById(schema_id)); if (column_names_.empty()) { @@ -231,7 +230,7 @@ Result> TableScanBuilder::Build() { auto field_opt = schema->GetFieldByName(column_name); if (!field_opt) { return InvalidArgument("Column {} not found in schema '{}'", column_name, - *schema_id); + schema_id); } projected_fields.emplace_back(field_opt.value()->get()); } diff --git a/src/iceberg/test/metadata_serde_test.cc b/src/iceberg/test/metadata_serde_test.cc index 51126b27c..62682b801 100644 --- a/src/iceberg/test/metadata_serde_test.cc +++ b/src/iceberg/test/metadata_serde_test.cc @@ -96,8 +96,7 @@ TEST(MetadataSerdeTest, DeserializeV1Valid) { auto expected_schema = std::make_shared( std::vector{SchemaField::MakeRequired(1, "x", int64()), SchemaField::MakeRequired(2, "y", int64()), - SchemaField::MakeRequired(3, "z", int64())}, - /*schema_id=*/std::nullopt); + SchemaField::MakeRequired(3, "z", int64())}); auto expected_spec_result = PartitionSpec::Make( /*spec_id=*/0, @@ -115,7 +114,7 @@ TEST(MetadataSerdeTest, DeserializeV1Valid) { .last_updated_ms = TimePointMsFromUnixMs(1602638573874).value(), .last_column_id = 3, .schemas = {expected_schema}, - .current_schema_id = std::nullopt, + .current_schema_id = Schema::kInitialSchemaId, .partition_specs = {expected_spec}, .default_spec_id = 0, .last_partition_id = 1000, diff --git a/src/iceberg/test/residual_evaluator_test.cc b/src/iceberg/test/residual_evaluator_test.cc index 04c82e25c..d2aa2aeb0 100644 --- a/src/iceberg/test/residual_evaluator_test.cc +++ b/src/iceberg/test/residual_evaluator_test.cc @@ -90,8 +90,7 @@ class ResidualEvaluatorTest : public ::testing::Test { TEST_F(ResidualEvaluatorTest, IdentityTransformResiduals) { auto schema = std::make_shared( std::vector{SchemaField::MakeOptional(50, "dateint", int32()), - SchemaField::MakeOptional(51, "hour", int32())}, - std::nullopt); + SchemaField::MakeOptional(51, "hour", int32())}); auto identity_transform = Transform::Identity(); PartitionField pt_field(50, 1000, "dateint", identity_transform); @@ -158,8 +157,7 @@ TEST_F(ResidualEvaluatorTest, IdentityTransformResiduals) { TEST_F(ResidualEvaluatorTest, CaseInsensitiveIdentityTransformResiduals) { auto schema = std::make_shared( std::vector{SchemaField::MakeOptional(50, "dateint", int32()), - SchemaField::MakeOptional(51, "hour", int32())}, - std::nullopt); + SchemaField::MakeOptional(51, "hour", int32())}); auto identity_transform = Transform::Identity(); PartitionField pt_field(50, 1000, "dateint", identity_transform); @@ -249,8 +247,7 @@ TEST_F(ResidualEvaluatorTest, UnpartitionedResiduals) { TEST_F(ResidualEvaluatorTest, In) { auto schema = std::make_shared( std::vector{SchemaField::MakeOptional(50, "dateint", int32()), - SchemaField::MakeOptional(51, "hour", int32())}, - std::nullopt); + SchemaField::MakeOptional(51, "hour", int32())}); auto identity_transform = Transform::Identity(); PartitionField pt_field(50, 1000, "dateint", identity_transform); @@ -276,8 +273,7 @@ TEST_F(ResidualEvaluatorTest, In) { TEST_F(ResidualEvaluatorTest, NotIn) { auto schema = std::make_shared( std::vector{SchemaField::MakeOptional(50, "dateint", int32()), - SchemaField::MakeOptional(51, "hour", int32())}, - std::nullopt); + SchemaField::MakeOptional(51, "hour", int32())}); auto identity_transform = Transform::Identity(); PartitionField pt_field(50, 1000, "dateint", identity_transform); @@ -304,8 +300,7 @@ TEST_F(ResidualEvaluatorTest, NotIn) { TEST_F(ResidualEvaluatorTest, IsNaN) { auto schema = std::make_shared( std::vector{SchemaField::MakeOptional(50, "double", float64()), - SchemaField::MakeOptional(51, "float", float32())}, - std::nullopt); + SchemaField::MakeOptional(51, "float", float32())}); // Test double field auto identity_transform = Transform::Identity(); @@ -353,8 +348,7 @@ TEST_F(ResidualEvaluatorTest, IsNaN) { TEST_F(ResidualEvaluatorTest, NotNaN) { auto schema = std::make_shared( std::vector{SchemaField::MakeOptional(50, "double", float64()), - SchemaField::MakeOptional(51, "float", float32())}, - std::nullopt); + SchemaField::MakeOptional(51, "float", float32())}); // Test double field auto identity_transform = Transform::Identity(); @@ -401,8 +395,7 @@ TEST_F(ResidualEvaluatorTest, NotNaN) { TEST_F(ResidualEvaluatorTest, IntegerTruncateTransformResiduals) { auto schema = std::make_shared( - std::vector{SchemaField::MakeOptional(50, "value", int32())}, - std::nullopt); + std::vector{SchemaField::MakeOptional(50, "value", int32())}); // Valid partitions would be 0, 10, 20...90, 100 etc. auto truncate_transform = Transform::Truncate(10); @@ -518,8 +511,7 @@ TEST_F(ResidualEvaluatorTest, IntegerTruncateTransformResiduals) { TEST_F(ResidualEvaluatorTest, StringTruncateTransformResiduals) { auto schema = std::make_shared( - std::vector{SchemaField::MakeOptional(50, "value", string())}, - std::nullopt); + std::vector{SchemaField::MakeOptional(50, "value", string())}); // Valid partitions would be two letter strings for eg: ab, bc etc auto truncate_transform = Transform::Truncate(2); diff --git a/src/iceberg/test/rest_json_internal_test.cc b/src/iceberg/test/rest_json_internal_test.cc index c4b7fb552..60facf43c 100644 --- a/src/iceberg/test/rest_json_internal_test.cc +++ b/src/iceberg/test/rest_json_internal_test.cc @@ -37,9 +37,8 @@ namespace iceberg::rest { // Helper function to create a simple schema for testing static std::shared_ptr MakeSimpleSchema() { return std::make_shared( - std::vector{SchemaField(1, "id", int32(), false), // required - SchemaField(2, "data", string(), true)}, // optional - std::nullopt); + std::vector{SchemaField(1, "id", int32(), false), // required + SchemaField(2, "data", string(), true)}); } // Helper function to create a simple TableMetadata for testing @@ -956,13 +955,13 @@ INSTANTIATE_TEST_SUITE_P( CreateTableRequestParam{ .test_name = "MinimalRequest", .expected_json_str = - R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]}})", + R"({"name":"my_table","schema":{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]}})", .model = {.name = "my_table", .schema = MakeSimpleSchema()}}, // Request with location CreateTableRequestParam{ .test_name = "WithLocation", .expected_json_str = - R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"location":"s3://bucket/warehouse/my_table"})", + R"({"name":"my_table","schema":{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"location":"s3://bucket/warehouse/my_table"})", .model = {.name = "my_table", .location = "s3://bucket/warehouse/my_table", .schema = MakeSimpleSchema()}}, @@ -970,7 +969,7 @@ INSTANTIATE_TEST_SUITE_P( CreateTableRequestParam{ .test_name = "WithProperties", .expected_json_str = - R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"properties":{"owner":"alice","version":"1.0"}})", + R"({"name":"my_table","schema":{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"properties":{"owner":"alice","version":"1.0"}})", .model = {.name = "my_table", .schema = MakeSimpleSchema(), .properties = {{"owner", "alice"}, {"version", "1.0"}}}}, @@ -978,7 +977,7 @@ INSTANTIATE_TEST_SUITE_P( CreateTableRequestParam{ .test_name = "WithStageCreate", .expected_json_str = - R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"stage-create":true})", + R"({"name":"my_table","schema":{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"stage-create":true})", .model = {.name = "my_table", .schema = MakeSimpleSchema(), .stage_create = true}}, @@ -986,7 +985,7 @@ INSTANTIATE_TEST_SUITE_P( CreateTableRequestParam{ .test_name = "WithUnpartitionedSpec", .expected_json_str = - R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"partition-spec":{"spec-id":0,"fields":[]}})", + R"({"name":"my_table","schema":{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"partition-spec":{"spec-id":0,"fields":[]}})", .model = {.name = "my_table", .schema = MakeSimpleSchema(), .partition_spec = PartitionSpec::Unpartitioned()}}, @@ -994,7 +993,7 @@ INSTANTIATE_TEST_SUITE_P( CreateTableRequestParam{ .test_name = "WithUnsortedOrder", .expected_json_str = - R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"write-order":{"order-id":0,"fields":[]}})", + R"({"name":"my_table","schema":{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","type":"int","required":true},{"id":2,"name":"data","type":"string","required":false}]},"write-order":{"order-id":0,"fields":[]}})", .model = {.name = "my_table", .schema = MakeSimpleSchema(), .write_order = SortOrder::Unsorted()}}), @@ -1013,20 +1012,18 @@ INSTANTIATE_TEST_SUITE_P( .json_str = R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true}]}})", .expected_model = {.name = "my_table", - .schema = std::make_shared( - std::vector{ - SchemaField(1, "id", int32(), false)}, // required - std::nullopt)}}, + .schema = + std::make_shared(std::vector{ + SchemaField(1, "id", int32(), false)})}}, // stage-create field is missing (should default to false) CreateTableRequestDeserializeParam{ .test_name = "MissingStageCreate", .json_str = R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true}]}})", .expected_model = {.name = "my_table", - .schema = std::make_shared( - std::vector{ - SchemaField(1, "id", int32(), false)}, // required - std::nullopt), + .schema = + std::make_shared(std::vector{ + SchemaField(1, "id", int32(), false)}), .stage_create = false}}, // Properties field is missing (should deserialize to empty map) CreateTableRequestDeserializeParam{ @@ -1034,10 +1031,9 @@ INSTANTIATE_TEST_SUITE_P( .json_str = R"({"name":"my_table","schema":{"type":"struct","fields":[{"id":1,"name":"id","type":"int","required":true}]}})", .expected_model = {.name = "my_table", - .schema = std::make_shared( - std::vector{ - SchemaField(1, "id", int32(), false)}, // required - std::nullopt)}}), + .schema = + std::make_shared(std::vector{ + SchemaField(1, "id", int32(), false)})}}), [](const ::testing::TestParamInfo& info) { return info.param.test_name; }); diff --git a/src/iceberg/test/schema_test.cc b/src/iceberg/test/schema_test.cc index ff6bf060a..38fef36c2 100644 --- a/src/iceberg/test/schema_test.cc +++ b/src/iceberg/test/schema_test.cc @@ -39,7 +39,7 @@ std::shared_ptr MakeStructType(Args&&... args) { template std::unique_ptr MakeSchema(Args&&... args) { return std::make_unique( - std::vector{std::move(args)...}, std::nullopt); + std::vector{std::move(args)...}); } TEST(SchemaTest, Basics) { diff --git a/src/iceberg/test/table_metadata_builder_test.cc b/src/iceberg/test/table_metadata_builder_test.cc index 77380e89e..84a272c8e 100644 --- a/src/iceberg/test/table_metadata_builder_test.cc +++ b/src/iceberg/test/table_metadata_builder_test.cc @@ -55,7 +55,8 @@ std::shared_ptr CreateInvalidSchema() { auto field2 = SchemaField::MakeRequired(5, "part_col", string()); auto field3 = SchemaField::MakeRequired(8, "sort_col", timestamp()); return std::make_shared(std::vector{field1, field2, field3}, - std::make_optional(1), std::vector{1}); + /*schema_id=*/1, + /*identifier_field_ids=*/std::vector{10}); } // Helper function to create a simple schema with disordered field_ids @@ -64,7 +65,8 @@ std::shared_ptr CreateDisorderedSchema() { auto field2 = SchemaField::MakeRequired(5, "part_col", string()); auto field3 = SchemaField::MakeRequired(8, "sort_col", timestamp()); return std::make_shared(std::vector{field1, field2, field3}, - std::make_optional(1), std::vector{2}); + /*schema_id=*/1, + /*identifier_field_ids=*/std::vector{2}); } // Helper function to create base metadata for tests @@ -559,7 +561,7 @@ TEST(TableMetadataBuilderTest, AddSchemaBasic) { builder->AddSchema(new_schema); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); EXPECT_EQ(metadata->last_column_id, 5); // 2. Add duplicate schema - should be idempotent @@ -580,8 +582,8 @@ TEST(TableMetadataBuilderTest, AddSchemaBasic) { builder->AddSchema(schema4); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 3); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); - EXPECT_EQ(metadata->schemas[2]->schema_id().value(), 2); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); + EXPECT_EQ(metadata->schemas[2]->schema_id(), 2); EXPECT_EQ(metadata->last_column_id, 6); } @@ -643,7 +645,7 @@ TEST(TableMetadataBuilderTest, AddSchemaIdempotent) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); // Only one new schema should be added - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); } TEST(TableMetadataBuilderTest, AddSchemaMultipleDifferent) { @@ -665,9 +667,9 @@ TEST(TableMetadataBuilderTest, AddSchemaMultipleDifferent) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 4); // Original + 3 new - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); - EXPECT_EQ(metadata->schemas[2]->schema_id().value(), 2); - EXPECT_EQ(metadata->schemas[3]->schema_id().value(), 3); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); + EXPECT_EQ(metadata->schemas[2]->schema_id(), 2); + EXPECT_EQ(metadata->schemas[3]->schema_id(), 3); EXPECT_EQ(metadata->last_column_id, 6); } @@ -682,8 +684,8 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaBasic) { builder->SetCurrentSchema(new_schema, 4); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->current_schema_id.value(), 1); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); EXPECT_EQ(metadata->last_column_id, 4); // 2. Set current schema by schema ID @@ -692,7 +694,7 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaBasic) { builder->AddSchema(schema1); builder->SetCurrentSchema(1); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); // 3. Set current schema using -1 (last added) builder = TableMetadataBuilder::BuildFrom(base.get()); @@ -701,13 +703,13 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaBasic) { builder->AddSchema(schema2); builder->SetCurrentSchema(-1); // Use last added ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); // 4. Setting same schema is no-op builder = TableMetadataBuilder::BuildFrom(base.get()); builder->SetCurrentSchema(0); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 0); + EXPECT_EQ(metadata->current_schema_id, 0); } TEST(TableMetadataBuilderTest, SetCurrentSchemaWithInvalidLastColumnId) { @@ -732,7 +734,7 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaUpdatesLastColumnId) { auto new_schema = std::make_shared(std::vector{field1, field2}, 1); builder->SetCurrentSchema(new_schema, 10); // Higher than field IDs ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); EXPECT_EQ(metadata->last_column_id, 10); } @@ -753,7 +755,7 @@ TEST(TableMetadataBuilderTest, AddSchemaWithNestedTypes) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); EXPECT_EQ(metadata->last_column_id, 11); // Should be highest nested field ID } @@ -777,7 +779,7 @@ TEST(TableMetadataBuilderTest, AddSchemaWithListAndMapTypes) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); EXPECT_EQ(metadata->last_column_id, 12); // Should be highest field ID including nested } @@ -803,10 +805,10 @@ TEST(TableMetadataBuilderTest, AddSchemaSequentialIds) { ASSERT_EQ(metadata->schemas.size(), 4); // Original + 3 new // Verify sequential schema IDs - EXPECT_EQ(metadata->schemas[0]->schema_id().value(), 0); // Original - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); - EXPECT_EQ(metadata->schemas[2]->schema_id().value(), 2); - EXPECT_EQ(metadata->schemas[3]->schema_id().value(), 3); + EXPECT_EQ(metadata->schemas[0]->schema_id(), 0); // Original + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); + EXPECT_EQ(metadata->schemas[2]->schema_id(), 2); + EXPECT_EQ(metadata->schemas[3]->schema_id(), 3); } TEST(TableMetadataBuilderTest, AddSchemaWithOptionalFields) { @@ -823,7 +825,7 @@ TEST(TableMetadataBuilderTest, AddSchemaWithOptionalFields) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); // Verify field properties const auto& fields = metadata->schemas[1]->fields(); @@ -872,7 +874,7 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaWithPartitionSpecRebuild) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); // Verify schema was set - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); EXPECT_EQ(metadata->last_column_id, 5); // Verify partition specs were rebuilt (they should still exist) @@ -896,19 +898,19 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaMultipleOperations) { // Set current to first added schema builder->SetCurrentSchema(1); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); // Change current to second schema builder = TableMetadataBuilder::BuildFrom(metadata.get()); builder->SetCurrentSchema(2); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 2); + EXPECT_EQ(metadata->current_schema_id, 2); // Change back to original schema builder = TableMetadataBuilder::BuildFrom(metadata.get()); builder->SetCurrentSchema(0); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 0); + EXPECT_EQ(metadata->current_schema_id, 0); } TEST(TableMetadataBuilderTest, SetCurrentSchemaLastAddedTracking) { @@ -928,7 +930,7 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaLastAddedTracking) { // Use -1 to set current to last added schema builder->SetCurrentSchema(-1); ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); - EXPECT_EQ(metadata->current_schema_id.value(), 2); // Should be schema2 + EXPECT_EQ(metadata->current_schema_id, 2); // Should be schema2 } TEST(TableMetadataBuilderTest, AddSchemaAndSetCurrentCombined) { @@ -944,8 +946,8 @@ TEST(TableMetadataBuilderTest, AddSchemaAndSetCurrentCombined) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->current_schema_id.value(), 1); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 1); EXPECT_EQ(metadata->last_column_id, 4); } @@ -998,7 +1000,7 @@ TEST(TableMetadataBuilderTest, SetCurrentSchemaRebuildsSpecsAndOrders) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); // Verify schema was set - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); // Verify partition specs were rebuilt (they should still exist) ASSERT_EQ(metadata->partition_specs.size(), 2); @@ -1032,16 +1034,16 @@ TEST(TableMetadataBuilderTest, RemoveSchemasBasic) { builder->RemoveSchemas({1}); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 3); - EXPECT_EQ(metadata->schemas[0]->schema_id().value(), 0); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 2); - EXPECT_EQ(metadata->schemas[2]->schema_id().value(), 3); + EXPECT_EQ(metadata->schemas[0]->schema_id(), 0); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 2); + EXPECT_EQ(metadata->schemas[2]->schema_id(), 3); // Remove multiple schemas builder = TableMetadataBuilder::BuildFrom(metadata.get()); builder->RemoveSchemas({2, 3}); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 1); - EXPECT_EQ(metadata->schemas[0]->schema_id().value(), Schema::kInitialSchemaId); + EXPECT_EQ(metadata->schemas[0]->schema_id(), Schema::kInitialSchemaId); } TEST(TableMetadataBuilderTest, RemoveSchemasCannotRemoveCurrent) { @@ -1055,7 +1057,7 @@ TEST(TableMetadataBuilderTest, RemoveSchemasCannotRemoveCurrent) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->current_schema_id.value(), 0); + EXPECT_EQ(metadata->current_schema_id, 0); // Try to remove current schema (ID 0) builder = TableMetadataBuilder::BuildFrom(metadata.get()); @@ -1093,7 +1095,7 @@ TEST(TableMetadataBuilderTest, RemoveSchemasNonExistent) { builder->RemoveSchemas({1, 999, 888}); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 1); - EXPECT_EQ(metadata->schemas[0]->schema_id().value(), Schema::kInitialSchemaId); + EXPECT_EQ(metadata->schemas[0]->schema_id(), Schema::kInitialSchemaId); } TEST(TableMetadataBuilderTest, RemoveSchemasEmptySet) { @@ -1131,16 +1133,16 @@ TEST(TableMetadataBuilderTest, RemoveSchemasAfterSchemaChange) { ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 3); - EXPECT_EQ(metadata->current_schema_id.value(), 1); + EXPECT_EQ(metadata->current_schema_id, 1); // Now remove the old current schema (ID 0) builder = TableMetadataBuilder::BuildFrom(metadata.get()); builder->RemoveSchemas({0}); ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build()); ASSERT_EQ(metadata->schemas.size(), 2); - EXPECT_EQ(metadata->current_schema_id.value(), 1); - EXPECT_EQ(metadata->schemas[0]->schema_id().value(), 1); - EXPECT_EQ(metadata->schemas[1]->schema_id().value(), 2); + EXPECT_EQ(metadata->current_schema_id, 1); + EXPECT_EQ(metadata->schemas[0]->schema_id(), 1); + EXPECT_EQ(metadata->schemas[1]->schema_id(), 2); // Cannot remove the new current schema builder = TableMetadataBuilder::BuildFrom(metadata.get()); diff --git a/src/iceberg/test/table_requirement_test.cc b/src/iceberg/test/table_requirement_test.cc index 07055f18c..d50d9f413 100644 --- a/src/iceberg/test/table_requirement_test.cc +++ b/src/iceberg/test/table_requirement_test.cc @@ -77,13 +77,6 @@ TEST(TableRequirementTest, AssertCurrentSchemaID) { status = req_for_null.Validate(nullptr); EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed)); EXPECT_THAT(status, HasErrorMessage("metadata is missing")); - - // Schema ID not set - base->current_schema_id = std::nullopt; - table::AssertCurrentSchemaID req_for_nullopt(5); - status = req_for_nullopt.Validate(base.get()); - EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed)); - EXPECT_THAT(status, HasErrorMessage("schema ID is not set")); } TEST(TableRequirementTest, AssertDoesNotExist) { diff --git a/src/iceberg/util/snapshot_util.cc b/src/iceberg/util/snapshot_util.cc index d2d083ea7..e76426f3c 100644 --- a/src/iceberg/util/snapshot_util.cc +++ b/src/iceberg/util/snapshot_util.cc @@ -298,7 +298,7 @@ Result> SnapshotUtil::SchemaFor(const TableMetadata& met return metadata.Schema(); } - return metadata.SchemaById(snapshot->schema_id); + return metadata.SchemaById(snapshot->schema_id.value()); } Result> SnapshotUtil::LatestSnapshot(