diff --git a/src/iceberg/partition_spec.cc b/src/iceberg/partition_spec.cc index 3bfd0ffbb..9c38d0c53 100644 --- a/src/iceberg/partition_spec.cc +++ b/src/iceberg/partition_spec.cc @@ -26,16 +26,19 @@ #include #include #include +#include #include #include #include "iceberg/result.h" +#include "iceberg/row/partition_values.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" #include "iceberg/transform.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep #include "iceberg/util/macros.h" #include "iceberg/util/type_util.h" +#include "iceberg/util/url_encoder.h" namespace iceberg { @@ -98,6 +101,25 @@ Result> PartitionSpec::PartitionType( return std::make_unique(std::move(partition_fields)); } +Result PartitionSpec::PartitionPath(const PartitionValues& data) const { + ICEBERG_PRECHECK(fields_.size() == data.num_fields(), + "Partition spec and data mismatch, expected field num {}, got {}", + fields_.size(), data.num_fields()); + std::stringstream ss; + for (int32_t i = 0; i < fields_.size(); ++i) { + ICEBERG_ASSIGN_OR_RAISE(auto value, data.ValueAt(i)); + if (i > 0) { + ss << "/"; + } + // TODO(zhuo.wang): transform for partition value, will be fixed after transform util + // is ready + std::string partition_value = value.get().ToString(); + ss << UrlEncoder::Encode(fields_[i].name()) << "=" + << UrlEncoder::Encode(partition_value); + } + return ss.str(); +} + bool PartitionSpec::CompatibleWith(const PartitionSpec& other) const { if (Equals(other)) { return true; diff --git a/src/iceberg/partition_spec.h b/src/iceberg/partition_spec.h index ae10dfccf..0fb8814b8 100644 --- a/src/iceberg/partition_spec.h +++ b/src/iceberg/partition_spec.h @@ -64,6 +64,9 @@ class ICEBERG_EXPORT PartitionSpec : public util::Formattable { /// \brief Get the partition type binding to the input schema. Result> PartitionType(const Schema& schema) const; + /// \brief Get the partition path for the given partition data. + Result PartitionPath(const PartitionValues& data) const; + /// \brief Returns true if this spec is equivalent to the other, with partition field /// ids ignored. That is, if both specs have the same number of fields, field order, /// field name, source columns, and transforms. diff --git a/src/iceberg/test/partition_spec_test.cc b/src/iceberg/test/partition_spec_test.cc index e20245047..ea3ea6e12 100644 --- a/src/iceberg/test/partition_spec_test.cc +++ b/src/iceberg/test/partition_spec_test.cc @@ -28,6 +28,7 @@ #include "iceberg/json_internal.h" #include "iceberg/partition_field.h" +#include "iceberg/row/partition_values.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" #include "iceberg/test/matchers.h" @@ -425,4 +426,52 @@ TEST(PartitionSpecTest, ValidateRedundantPartitionsIdentityTransforms) { } } +TEST(PartitionSpecTest, PartitionPath) { + // Create a schema with different field types + auto id_field = SchemaField::MakeRequired(1, "id", int64()); + auto name_field = SchemaField::MakeRequired(2, "name", string()); + auto ts_field = SchemaField::MakeRequired(3, "ts", timestamp()); + Schema schema({id_field, name_field, ts_field}, Schema::kInitialSchemaId); + + // Create partition fields + PartitionField id_field_partition(1, 1000, "id_partition", Transform::Identity()); + PartitionField name_field_partition(2, 1001, "name_partition", Transform::Identity()); + PartitionField ts_field_partition(3, 1002, "ts_partition", Transform::Day()); + + // Create partition spec + ICEBERG_UNWRAP_OR_FAIL( + auto spec, + PartitionSpec::Make(schema, 1, + {id_field_partition, name_field_partition, ts_field_partition}, + false)); + + { + // Invalid partition values + PartitionValues part_data({Literal::Int(123)}); + auto result = spec->PartitionPath(part_data); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("Partition spec and data mismatch")); + } + + { + // Normal partition values + PartitionValues part_data( + {Literal::Int(123), Literal::String("val2"), Literal::Date(19489)}); + ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data)); + std::string expected = + "id_partition=123/name_partition=%22val2%22/ts_partition=19489"; + EXPECT_EQ(expected, path); + } + + { + // Partition values with special characters + PartitionValues part_data( + {Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)}); + ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data)); + std::string expected = + "id_partition=123/name_partition=%22val%232%22/ts_partition=19489"; + EXPECT_EQ(expected, path); + } +} + } // namespace iceberg