From 915b85f405c600c4d039dcdbc7383cee1e09a5d1 Mon Sep 17 00:00:00 2001 From: Fokko Date: Fri, 2 May 2025 14:44:13 +0200 Subject: [PATCH 1/5] Clear updates/requirements after commit Resolves #1946 --- pyiceberg/table/__init__.py | 4 +++- tests/integration/test_writes/test_writes.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 8f7b45f532..85c1aae1c2 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -773,13 +773,15 @@ def commit_transaction(self) -> Table: updates=self._updates, requirements=self._requirements, ) + self._updates = () + self._requirements = () return self._table else: return self._table class CreateTableTransaction(Transaction): - """A transaction that involves the creation of a a new table.""" + """A transaction that involves the creation of a new table.""" def _initial_changes(self, table_metadata: TableMetadata) -> None: """Set the initial changes that can reconstruct the initial table metadata when creating the CreateTableTransaction.""" diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 8ea2e93cb1..28d42dd2f6 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1684,3 +1684,13 @@ def test_write_optional_list(session_catalog: Catalog) -> None: session_catalog.load_table(identifier).append(df_2) assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 4 + + +@pytest.mark.integration +def test_double_commit_transaction(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: + identifier = "default.arrow_data_files" + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, []) + + with tbl.transaction() as tx: + tx.append(arrow_table_with_null) + tx.commit_transaction() From 4d24d0b2d427c30594b1c3e74d44e93fb9d376b3 Mon Sep 17 00:00:00 2001 From: Fokko Date: Fri, 2 May 2025 14:54:11 +0200 Subject: [PATCH 2/5] Add an `assert` to the test --- tests/integration/test_writes/test_writes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index db0ded3933..a5292762d9 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1786,10 +1786,14 @@ def test_double_commit_transaction( identifier = "default.arrow_data_files" tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, []) + assert len(tbl.metadata.metadata_log) == 0 + with tbl.transaction() as tx: tx.append(arrow_table_with_null) tx.commit_transaction() + assert len(tbl.metadata.metadata_log) == 1 + @pytest.mark.integration @pytest.mark.parametrize("format_version", [1, 2]) From f39a2f7270a4b55e2a4e23c5dd0ab3c4f15dc46e Mon Sep 17 00:00:00 2001 From: Fokko Date: Sat, 3 May 2025 20:02:50 +0200 Subject: [PATCH 3/5] Simplify logic, thanks! --- pyiceberg/table/__init__.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index d517b0a79b..cf0c572295 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -291,8 +291,6 @@ def _apply(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequ if self._autocommit: self.commit_transaction() - self._updates = () - self._requirements = () return self @@ -774,11 +772,11 @@ def commit_transaction(self) -> Table: updates=self._updates, requirements=self._requirements, ) - self._updates = () - self._requirements = () - return self._table - else: - return self._table + + self._updates = () + self._requirements = () + + return self._table class CreateTableTransaction(Transaction): From 54d980d08d014e06a8c80fbfbfb1964c338bd03a Mon Sep 17 00:00:00 2001 From: Fokko Date: Thu, 15 May 2025 12:22:36 +0200 Subject: [PATCH 4/5] Great catch Kevin! --- pyiceberg/table/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index cf0c572295..e78029b768 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -825,11 +825,16 @@ def commit_transaction(self) -> Table: Returns: The table with the updates applied. """ - self._requirements = (AssertCreate(),) - self._table._do_commit( # pylint: disable=W0212 - updates=self._updates, - requirements=self._requirements, - ) + if len(self._updates) > 0: + self._requirements += (AssertCreate(),) + self._table._do_commit( # pylint: disable=W0212 + updates=self._updates, + requirements=self._requirements, + ) + + self._updates = () + self._requirements = () + return self._table From a01708ca70a54e6f242b6dbea5bc345812e9eefc Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 4 Aug 2025 21:17:04 +0200 Subject: [PATCH 5/5] Fix CI --- pyiceberg/table/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 1434e4c3d3..21898e9c4d 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -989,10 +989,9 @@ def commit_transaction(self) -> Table: The table with the updates applied. """ if len(self._updates) > 0: - self._requirements += (AssertCreate(),) self._table._do_commit( # pylint: disable=W0212 updates=self._updates, - requirements=self._requirements, + requirements=(AssertCreate(),), ) self._updates = ()