diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 8d1a24c420..60ad7219e1 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -58,6 +58,7 @@ TOTAL_FILE_SIZE = "total-files-size" CHANGED_PARTITION_COUNT_PROP = "changed-partition-count" CHANGED_PARTITION_PREFIX = "partitions." +PARTITION_SUMMARY_PROP = "partition-summaries-included" OPERATION = "operation" INITIAL_SEQUENCE_NUMBER = 0 @@ -306,6 +307,8 @@ def build(self) -> Dict[str, str]: changed_partitions_size = len(self.partition_metrics) set_when_positive(properties, changed_partitions_size, CHANGED_PARTITION_COUNT_PROP) if changed_partitions_size <= self.max_changed_partitions_for_summaries: + if changed_partitions_size > 0: + properties[PARTITION_SUMMARY_PROP] = "true" for partition_path, update_metrics_partition in self.partition_metrics.items(): if (summary := self._partition_summary(update_metrics_partition)) and len(summary) != 0: properties[CHANGED_PARTITION_PREFIX + partition_path] = summary diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py index b71d92aa55..d26562ad8f 100644 --- a/tests/table/test_snapshots.py +++ b/tests/table/test_snapshots.py @@ -224,6 +224,7 @@ def test_snapshot_summary_collector_with_partition() -> None: "added-records": "100", "deleted-records": "300", "changed-partition-count": "2", + "partition-summaries-included": "true", "partitions.int_field=1": "added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100", "partitions.int_field=2": "removed-files-size=4321,deleted-data-files=1,deleted-records=200", } @@ -259,11 +260,32 @@ def test_snapshot_summary_collector_with_partition_limit_in_constructor() -> Non "added-records": "100", "deleted-records": "300", "changed-partition-count": "2", + "partition-summaries-included": "true", "partitions.int_field=1": "added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100", "partitions.int_field=2": "removed-files-size=4321,deleted-data-files=1,deleted-records=200", } +@pytest.mark.integration +def test_partition_summaries_included_not_set_when_no_change() -> None: + ssc = SnapshotSummaryCollector() + # No files added, so no partition_metrics + ssc.set_partition_summary_limit(10) + result = ssc.build() + assert "partition-summaries-included" not in result + assert result == {} # Should be empty dict + + +@pytest.mark.integration +def test_partition_summaries_included_not_set_when_unpartitioned_files(table_schema_simple: Schema) -> None: + ssc = SnapshotSummaryCollector() + data_file = DataFile.from_args(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record()) + ssc.add_file(data_file, schema=table_schema_simple) + ssc.set_partition_summary_limit(10) + result = ssc.build() + assert "partition-summaries-included" not in result + + def test_merge_snapshot_summaries_empty() -> None: assert update_snapshot_summaries(Summary(Operation.APPEND)) == Summary( operation=Operation.APPEND,