Skip to content
26 changes: 15 additions & 11 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,6 @@ def _apply(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequ

if self._autocommit:
self.commit_transaction()
self._updates = ()
self._requirements = ()

return self

Expand Down Expand Up @@ -937,13 +935,15 @@ def commit_transaction(self) -> Table:
updates=self._updates,
requirements=self._requirements,
)
return self._table
else:
return self._table

self._updates = ()
self._requirements = ()

return self._table


class CreateTableTransaction(Transaction):
"""A transaction that involves the creation of a a new table."""
"""A transaction that involves the creation of a new table."""

def _initial_changes(self, table_metadata: TableMetadata) -> None:
"""Set the initial changes that can reconstruct the initial table metadata when creating the CreateTableTransaction."""
Expand Down Expand Up @@ -988,11 +988,15 @@ def commit_transaction(self) -> Table:
Returns:
The table with the updates applied.
"""
self._requirements = (AssertCreate(),)
self._table._do_commit( # pylint: disable=W0212
updates=self._updates,
requirements=self._requirements,
)
if len(self._updates) > 0:
self._table._do_commit( # pylint: disable=W0212
updates=self._updates,
requirements=(AssertCreate(),),
)

self._updates = ()
self._requirements = ()
Comment on lines +991 to +998
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we use super().commit_transaction here?

I want to make sure there's a way for future subclass of Transaction to also clear the updates and requirements

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment on lines +997 to +998
Copy link
Contributor Author

@Fokko Fokko Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could refactor it like:

Suggested change
self._updates = ()
self._requirements = ()
super().commit_transaction()

But it doesn't actually commit, but just clear the self_requirements, which is also a bit confusing to me. For me, it makes less sense to commit a CreateTableTransaction twice in the first place. With the original version, it would just error since the table is already created.

Comment on lines +991 to +998
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah we cant use super().commit_transaction() here because the base Transaction class' commit_transaction() method adds a AssertTableUUID(uuid=self.table_metadata.table_uuid), as a requirement.

Since requirements are checked before updates, this wont work.

👍 im fine with leaving it as is.
We just need to remember to clear _updates and _requirements for future subclasses of Transaction. And hopefully we'll follow the same pattern

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, exactly. The AssertTableUUID is orthogonal to the CreateTable operation, thanks for checking 👍


return self._table


Expand Down
17 changes: 17 additions & 0 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1802,6 +1802,23 @@ def test_write_optional_list(session_catalog: Catalog) -> None:
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 4


@pytest.mark.integration
@pytest.mark.parametrize("format_version", [1, 2])
def test_double_commit_transaction(
spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int
) -> None:
identifier = "default.arrow_data_files"
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, [])

assert len(tbl.metadata.metadata_log) == 0

with tbl.transaction() as tx:
tx.append(arrow_table_with_null)
tx.commit_transaction()

assert len(tbl.metadata.metadata_log) == 1


@pytest.mark.integration
@pytest.mark.parametrize("format_version", [1, 2])
def test_evolve_and_write(
Expand Down