From 1d1a68d174c67e2b701375a5700cb46e2b2f9c57 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 1 Sep 2025 12:37:20 +0200 Subject: [PATCH 1/5] Set the ManifestEntryStatus A faulty refactor: https://github.com/apache/iceberg-python/commit/59742e01e8d190aa759e0154b03300bd81a28d02#diff-e9df836e263024ba54e2706853fb25c00269fbfe3726b440ba57f4a929c995dcR927 This produces incorrect metadata, but was hidden by the writer. --- pyiceberg/manifest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index a92d944811..bfc4338412 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -1090,7 +1090,10 @@ def add(self, entry: ManifestEntry) -> ManifestWriter: if entry.sequence_number is not None and entry.sequence_number >= 0: self.add_entry( ManifestEntry.from_args( - snapshot_id=self._snapshot_id, sequence_number=entry.sequence_number, data_file=entry.data_file + status=ManifestEntryStatus.ADDED, + snapshot_id=self._snapshot_id, + sequence_number=entry.sequence_number, + data_file=entry.data_file, ) ) else: From 5214de50564b771378fba2a7267e0d84cde27be6 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 1 Sep 2025 21:02:05 +0200 Subject: [PATCH 2/5] Cleanup --- pyiceberg/manifest.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index bfc4338412..21d7f42fbd 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -1087,21 +1087,16 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter: return self def add(self, entry: ManifestEntry) -> ManifestWriter: - if entry.sequence_number is not None and entry.sequence_number >= 0: - self.add_entry( - ManifestEntry.from_args( - status=ManifestEntryStatus.ADDED, - snapshot_id=self._snapshot_id, - sequence_number=entry.sequence_number, - data_file=entry.data_file, - ) - ) - else: - self.add_entry( - ManifestEntry.from_args( - status=ManifestEntryStatus.ADDED, snapshot_id=self._snapshot_id, data_file=entry.data_file - ) + self.add_entry( + ManifestEntry.from_args( + status=ManifestEntryStatus.ADDED, + snapshot_id=self._snapshot_id, + sequence_number=entry.sequence_number if entry.file_sequence_number != UNASSIGNED_SEQ else None, + file_sequence_number=entry.file_sequence_number if entry.file_sequence_number != UNASSIGNED_SEQ else None, + data_file=entry.data_file, ) + ) + return self def delete(self, entry: ManifestEntry) -> ManifestWriter: From 37b53aa50d93f3526ed168fe9330fb0976e5a63c Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 1 Sep 2025 21:42:39 +0200 Subject: [PATCH 3/5] Extend test --- pyiceberg/manifest.py | 1 - tests/catalog/test_sql.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 21d7f42fbd..713d23f933 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -1092,7 +1092,6 @@ def add(self, entry: ManifestEntry) -> ManifestWriter: status=ManifestEntryStatus.ADDED, snapshot_id=self._snapshot_id, sequence_number=entry.sequence_number if entry.file_sequence_number != UNASSIGNED_SEQ else None, - file_sequence_number=entry.file_sequence_number if entry.file_sequence_number != UNASSIGNED_SEQ else None, data_file=entry.data_file, ) ) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 27105e8004..83b7338c4a 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -1657,6 +1657,8 @@ def test_merge_manifests_local_file_system(catalog: SqlCatalog, arrow_table_with tbl.append(arrow_table_with_null) assert len(tbl.scan().to_arrow()) == 5 * len(arrow_table_with_null) + manifests = tbl.current_snapshot().manifests(tbl.io) + assert len(manifests) == 1 @pytest.mark.parametrize( From d171ec493f74c66c886aa7c7d4b4eceeaa09f64d Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 1 Sep 2025 21:43:51 +0200 Subject: [PATCH 4/5] Cleanup --- tests/catalog/test_sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 83b7338c4a..00868a5739 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -1657,7 +1657,9 @@ def test_merge_manifests_local_file_system(catalog: SqlCatalog, arrow_table_with tbl.append(arrow_table_with_null) assert len(tbl.scan().to_arrow()) == 5 * len(arrow_table_with_null) - manifests = tbl.current_snapshot().manifests(tbl.io) + current_snapshot = tbl.current_snapshot() + assert current_snapshot + manifests = current_snapshot.manifests(tbl.io) assert len(manifests) == 1 From aa0ef95ef1e26f20ae1bf545a143f94934283ff7 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 1 Sep 2025 23:50:54 +0200 Subject: [PATCH 5/5] Update manifest.py Co-authored-by: Kevin Liu --- pyiceberg/manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 713d23f933..470c99d602 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -1091,7 +1091,7 @@ def add(self, entry: ManifestEntry) -> ManifestWriter: ManifestEntry.from_args( status=ManifestEntryStatus.ADDED, snapshot_id=self._snapshot_id, - sequence_number=entry.sequence_number if entry.file_sequence_number != UNASSIGNED_SEQ else None, + sequence_number=entry.sequence_number if entry.sequence_number != UNASSIGNED_SEQ else None, data_file=entry.data_file, ) )