diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index f248700c02..9c2ae29cdd 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -36,7 +36,7 @@ SortOrder, assign_fresh_sort_order_ids, ) -from pyiceberg.table.statistics import StatisticsFile +from pyiceberg.table.statistics import PartitionStatisticsFile, StatisticsFile from pyiceberg.typedef import ( EMPTY_DICT, IcebergBaseModel, @@ -222,6 +222,14 @@ class TableMetadataCommonFields(IcebergBaseModel): table correctly. A table can contain many statistics files associated with different table snapshots.""" + partition_statistics: List[PartitionStatisticsFile] = Field(alias="partition-statistics", default_factory=list) + """A optional list of partition statistics files. + Partition statistics are not required for reading or planning + and readers may ignore them. Each table snapshot may be associated + with at most one partition statistics file. A writer can optionally + write the partition statistics file during each write operation, + or it can also be computed on demand.""" + # validators @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: diff --git a/pyiceberg/table/statistics.py b/pyiceberg/table/statistics.py index 151f5e961c..702a262092 100644 --- a/pyiceberg/table/statistics.py +++ b/pyiceberg/table/statistics.py @@ -29,15 +29,24 @@ class BlobMetadata(IcebergBaseModel): properties: Optional[Dict[str, str]] = None -class StatisticsFile(IcebergBaseModel): +class StatisticsCommonFields(IcebergBaseModel): + """Common fields between table and partition statistics structs found on metadata.""" + snapshot_id: int = Field(alias="snapshot-id") statistics_path: str = Field(alias="statistics-path") file_size_in_bytes: int = Field(alias="file-size-in-bytes") + + +class StatisticsFile(StatisticsCommonFields, IcebergBaseModel): file_footer_size_in_bytes: int = Field(alias="file-footer-size-in-bytes") key_metadata: Optional[str] = Field(alias="key-metadata", default=None) blob_metadata: List[BlobMetadata] = Field(alias="blob-metadata") +class PartitionStatisticsFile(IcebergBaseModel): + pass + + def filter_statistics_by_snapshot_id( statistics: List[StatisticsFile], reject_snapshot_id: int,