Skip to content

Commit 0bf0e8b

Browse files
author
Yingjian Wu
committed
refactor partition_summary_limit into SnapshotSummaryCollector constructor
1 parent be528ae commit 0bf0e8b

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

pyiceberg/table/snapshots.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -272,13 +272,10 @@ class SnapshotSummaryCollector:
272272
partition_metrics: DefaultDict[str, UpdateMetrics]
273273
max_changed_partitions_for_summaries: int
274274

275-
def __init__(self) -> None:
275+
def __init__(self, partition_summary_limit: int = 0) -> None:
276276
self.metrics = UpdateMetrics()
277277
self.partition_metrics = defaultdict(UpdateMetrics)
278-
self.max_changed_partitions_for_summaries = 0
279-
280-
def set_partition_summary_limit(self, limit: int) -> None:
281-
self.max_changed_partitions_for_summaries = limit
278+
self.max_changed_partitions_for_summaries = partition_summary_limit
282279

283280
def add_file(self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC) -> None:
284281
self.metrics.add_file(data_file)

pyiceberg/table/update/snapshot.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,12 @@ def _write_delete_manifest() -> List[ManifestFile]:
203203
def _summary(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> Summary:
204204
from pyiceberg.table import TableProperties
205205

206-
ssc = SnapshotSummaryCollector()
207206
partition_summary_limit = int(
208207
self._transaction.table_metadata.properties.get(
209208
TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, TableProperties.WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT
210209
)
211210
)
212-
ssc.set_partition_summary_limit(partition_summary_limit)
211+
ssc = SnapshotSummaryCollector(partition_summary_limit=partition_summary_limit)
213212

214213
for data_file in self._added_data_files:
215214
ssc.add_file(

tests/table/test_snapshots.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,27 @@ def test_snapshot_summary_collector_with_partition() -> None:
201201
"changed-partition-count": "2",
202202
}
203203

204+
205+
@pytest.mark.integration
206+
def test_snapshot_summary_collector_with_partition_limit_in_constructor() -> None:
207+
# Given
208+
partition_summary_limit = 10
209+
ssc = SnapshotSummaryCollector(partition_summary_limit=partition_summary_limit)
210+
211+
assert ssc.build() == {}
212+
schema = Schema(
213+
NestedField(field_id=1, name="bool_field", field_type=BooleanType(), required=False),
214+
NestedField(field_id=2, name="string_field", field_type=StringType(), required=False),
215+
NestedField(field_id=3, name="int_field", field_type=IntegerType(), required=False),
216+
)
217+
spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name="int_field"))
218+
data_file_1 = DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1))
219+
data_file_2 = DataFile(content=DataFileContent.DATA, record_count=200, file_size_in_bytes=4321, partition=Record(int_field=2))
220+
204221
# When
205-
ssc.set_partition_summary_limit(10)
222+
ssc.add_file(data_file=data_file_1, schema=schema, partition_spec=spec)
223+
ssc.remove_file(data_file=data_file_1, schema=schema, partition_spec=spec)
224+
ssc.remove_file(data_file=data_file_2, schema=schema, partition_spec=spec)
206225

207226
# Then
208227
assert ssc.build() == {

0 commit comments

Comments
 (0)