diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index a189b07c1e..cab5d73d27 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1198,10 +1198,11 @@ def upsert( update_row_cnt = len(rows_to_update) - # build the match predicate filter - overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols) + if len(rows_to_update) > 0: + # build the match predicate filter + overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols) - tx.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate) + tx.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate) if when_not_matched_insert_all: expr_match = upsert_util.create_match_filter(matched_iceberg_table, join_cols) @@ -1211,7 +1212,8 @@ def upsert( insert_row_cnt = len(rows_to_insert) - tx.append(rows_to_insert) + if insert_row_cnt > 0: + tx.append(rows_to_insert) return UpsertResult(rows_updated=update_row_cnt, rows_inserted=insert_row_cnt) diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py index 7f9e13b5a1..19bfbc01de 100644 --- a/tests/table/test_upsert.py +++ b/tests/table/test_upsert.py @@ -28,6 +28,7 @@ from pyiceberg.io.pyarrow import schema_to_pyarrow from pyiceberg.schema import Schema from pyiceberg.table import UpsertResult +from pyiceberg.table.snapshots import Operation from pyiceberg.table.upsert_util import create_match_filter from pyiceberg.types import IntegerType, NestedField, StringType from tests.catalog.test_base import InMemoryCatalog, Table @@ -368,9 +369,21 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None: ) upd = tbl.upsert(df) + expected_operations = [Operation.APPEND, Operation.OVERWRITE, Operation.APPEND, Operation.APPEND] + assert upd.rows_updated == 1 assert upd.rows_inserted == 1 + assert [snap.summary.operation for snap in tbl.snapshots() if snap.summary is not None] == expected_operations + + # This should be a no-op + upd = tbl.upsert(df) + + assert upd.rows_updated == 0 + assert upd.rows_inserted == 0 + + assert [snap.summary.operation for snap in tbl.snapshots() if snap.summary is not None] == expected_operations + def test_upsert_into_empty_table(catalog: Catalog) -> None: identifier = "default.test_upsert_into_empty_table"