Skip to content

Commit 87fa187

Browse files
Fokkogabeiglio
authored andcommitted
Upsert: Don't produce empty snapshots (apache#1810)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Yikes! This makes sure to only produce a snapshot when there is anything to update or append. # Are these changes tested? Yes, by checking the snapshots that are being produced. # Are there any user-facing changes? Smaller metadata and faster commits when there is nothing to append/update :) <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent c9b714f commit 87fa187

File tree

2 files changed

+19
-4
lines changed

2 files changed

+19
-4
lines changed

pyiceberg/table/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,10 +1198,11 @@ def upsert(
11981198

11991199
update_row_cnt = len(rows_to_update)
12001200

1201-
# build the match predicate filter
1202-
overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols)
1201+
if len(rows_to_update) > 0:
1202+
# build the match predicate filter
1203+
overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols)
12031204

1204-
tx.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate)
1205+
tx.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate)
12051206

12061207
if when_not_matched_insert_all:
12071208
expr_match = upsert_util.create_match_filter(matched_iceberg_table, join_cols)
@@ -1211,7 +1212,8 @@ def upsert(
12111212

12121213
insert_row_cnt = len(rows_to_insert)
12131214

1214-
tx.append(rows_to_insert)
1215+
if insert_row_cnt > 0:
1216+
tx.append(rows_to_insert)
12151217

12161218
return UpsertResult(rows_updated=update_row_cnt, rows_inserted=insert_row_cnt)
12171219

tests/table/test_upsert.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pyiceberg.io.pyarrow import schema_to_pyarrow
2929
from pyiceberg.schema import Schema
3030
from pyiceberg.table import UpsertResult
31+
from pyiceberg.table.snapshots import Operation
3132
from pyiceberg.table.upsert_util import create_match_filter
3233
from pyiceberg.types import IntegerType, NestedField, StringType
3334
from tests.catalog.test_base import InMemoryCatalog, Table
@@ -368,9 +369,21 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None:
368369
)
369370
upd = tbl.upsert(df)
370371

372+
expected_operations = [Operation.APPEND, Operation.OVERWRITE, Operation.APPEND, Operation.APPEND]
373+
371374
assert upd.rows_updated == 1
372375
assert upd.rows_inserted == 1
373376

377+
assert [snap.summary.operation for snap in tbl.snapshots() if snap.summary is not None] == expected_operations
378+
379+
# This should be a no-op
380+
upd = tbl.upsert(df)
381+
382+
assert upd.rows_updated == 0
383+
assert upd.rows_inserted == 0
384+
385+
assert [snap.summary.operation for snap in tbl.snapshots() if snap.summary is not None] == expected_operations
386+
374387

375388
def test_upsert_into_empty_table(catalog: Catalog) -> None:
376389
identifier = "default.test_upsert_into_empty_table"

0 commit comments

Comments
 (0)