From ccfcbcd3a36b7687339807c1bc3dc7977ae60d83 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 5 Jan 2025 20:20:51 +0100
Subject: [PATCH 01/44] chore: scaffolding

---
 pyiceberg/table/__init__.py          | 13 +++++++++++
 pyiceberg/table/update/sort_order.py | 35 ++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 pyiceberg/table/update/sort_order.py

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 2469a9ed7b..36b3d45520 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -117,6 +117,7 @@
     UpdateSnapshot,
     _FastAppendFiles,
 )
+from pyiceberg.table.update.sort_order import UpdateSortOrder
 from pyiceberg.table.update.spec import UpdateSpec
 from pyiceberg.transforms import IdentityTransform
 from pyiceberg.typedef import (
@@ -403,6 +404,10 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             case_sensitive=case_sensitive,
             name_mapping=self.table_metadata.name_mapping(),
         )
+        
+    
+    def replace_sort_order(self) -> None:
+        ...
 
     def update_snapshot(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> UpdateSnapshot:
         """Create a new UpdateSnapshot to produce a new snapshot for the table.
@@ -1050,6 +1055,14 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.name_mapping(),
         )
 
+    def replace_sort_order(self) -> UpdateSortOrder:
+        """Create a new UpdateSortOrder to replace the sort order of this table.
+
+        Returns:
+            A new UpdateSortOrder.
+        """
+        return UpdateSortOrder(self)
+
     def name_mapping(self) -> Optional[NameMapping]:
         """Return the table's field-id NameMapping."""
         return self.metadata.name_mapping()
diff --git a/pyiceberg/table/update/sort_order.py b/pyiceberg/table/update/sort_order.py
new file mode 100644
index 0000000000..a08bc3784b
--- /dev/null
+++ b/pyiceberg/table/update/sort_order.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+)
+
+from pyiceberg.table.update import (
+    UpdateTableMetadata,
+)
+
+if TYPE_CHECKING:
+    from pyiceberg.table import Transaction
+
+
+class UpdateSpec(UpdateTableMetadata["UpdateSpec"]):
+    _transaction: Transaction
+
+    def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
+        super().__init__(transaction)

From 95da8f344434bc2aff6d4cf492488f161a6f1c6b Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 5 Jan 2025 20:24:30 +0100
Subject: [PATCH 02/44] chore: scaffolding

---
 pyiceberg/table/__init__.py          |  8 +++-----
 pyiceberg/table/update/sort_order.py | 16 ++++++++++++----
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 36b3d45520..e16255603c 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -404,10 +404,8 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             case_sensitive=case_sensitive,
             name_mapping=self.table_metadata.name_mapping(),
         )
-        
-    
-    def replace_sort_order(self) -> None:
-        ...
+
+    def replace_sort_order(self) -> None: ...
 
     def update_snapshot(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> UpdateSnapshot:
         """Create a new UpdateSnapshot to produce a new snapshot for the table.
@@ -1061,7 +1059,7 @@ def replace_sort_order(self) -> UpdateSortOrder:
         Returns:
             A new UpdateSortOrder.
         """
-        return UpdateSortOrder(self)
+        return UpdateSortOrder(transaction=Transaction(self, autocommit=True))
 
     def name_mapping(self) -> Optional[NameMapping]:
         """Return the table's field-id NameMapping."""
diff --git a/pyiceberg/table/update/sort_order.py b/pyiceberg/table/update/sort_order.py
index a08bc3784b..bba50e7645 100644
--- a/pyiceberg/table/update/sort_order.py
+++ b/pyiceberg/table/update/sort_order.py
@@ -16,11 +16,12 @@
 # under the License.
 from __future__ import annotations
 
-from typing import (
-    TYPE_CHECKING,
-)
+from typing import TYPE_CHECKING, Tuple
 
 from pyiceberg.table.update import (
+    TableRequirement,
+    TableUpdate,
+    UpdatesAndRequirements,
     UpdateTableMetadata,
 )
 
@@ -28,8 +29,15 @@
     from pyiceberg.table import Transaction
 
 
-class UpdateSpec(UpdateTableMetadata["UpdateSpec"]):
+class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
     _transaction: Transaction
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
+
+    def _commit(self) -> UpdatesAndRequirements:
+        """Apply the pending changes and commit."""
+        requirements: Tuple[TableRequirement, ...] = ()
+        updates: Tuple[TableUpdate, ...] = ()
+
+        return updates, requirements

From 253967d0617fd6708104419092c787ff051f0cd3 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 5 Jan 2025 20:32:27 +0100
Subject: [PATCH 03/44] chore: add skeleton for asc/desc methods

---
 pyiceberg/table/update/sort_order.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/update/sort_order.py b/pyiceberg/table/update/sort_order.py
index bba50e7645..b194537422 100644
--- a/pyiceberg/table/update/sort_order.py
+++ b/pyiceberg/table/update/sort_order.py
@@ -16,8 +16,9 @@
 # under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Tuple, Any
 
+from pyiceberg.transforms import Transform, IdentityTransform
 from pyiceberg.table.update import (
     TableRequirement,
     TableUpdate,
@@ -34,6 +35,12 @@ class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
+        
+    def asc(self, source_column_name: str, transform: Transform[Any, Any] = IdentityTransform()) -> UpdateSortOrder:
+        ...
+        
+    def desc(self, source_column_name: str, transform: Transform[Any, Any] = IdentityTransform()) -> UpdateSortOrder:
+        ...
 
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""

From 7b5a98e8e9a39c039bd3f0139b9b17bace4e9720 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 5 Jan 2025 20:56:17 +0100
Subject: [PATCH 04/44] chore: scaffolding

---
 pyiceberg/table/update/sort_order.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/pyiceberg/table/update/sort_order.py b/pyiceberg/table/update/sort_order.py
index b194537422..6629ae3447 100644
--- a/pyiceberg/table/update/sort_order.py
+++ b/pyiceberg/table/update/sort_order.py
@@ -16,15 +16,16 @@
 # under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Tuple, Any
+from typing import TYPE_CHECKING, Any, Tuple
 
-from pyiceberg.transforms import Transform, IdentityTransform
 from pyiceberg.table.update import (
     TableRequirement,
     TableUpdate,
     UpdatesAndRequirements,
     UpdateTableMetadata,
 )
+from pyiceberg.transforms import Transform
+from pyiceberg.table.sorting import NullOrder
 
 if TYPE_CHECKING:
     from pyiceberg.table import Transaction
@@ -35,12 +36,12 @@ class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
-        
-    def asc(self, source_column_name: str, transform: Transform[Any, Any] = IdentityTransform()) -> UpdateSortOrder:
-        ...
-        
-    def desc(self, source_column_name: str, transform: Transform[Any, Any] = IdentityTransform()) -> UpdateSortOrder:
-        ...
+
+    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
+        return self
+
+    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
+        return self
 
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""

From 304a806bbe4f7c562a37b51a41c96ebe7d33c2c9 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 5 Jan 2025 21:01:40 +0100
Subject: [PATCH 05/44] chore: change method names

---
 pyiceberg/table/__init__.py          | 10 +++++-----
 pyiceberg/table/update/sort_order.py |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index e16255603c..817184f381 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -117,7 +117,7 @@
     UpdateSnapshot,
     _FastAppendFiles,
 )
-from pyiceberg.table.update.sort_order import UpdateSortOrder
+from pyiceberg.table.update.sort_order import SortOrderBuilder
 from pyiceberg.table.update.spec import UpdateSpec
 from pyiceberg.transforms import IdentityTransform
 from pyiceberg.typedef import (
@@ -1053,13 +1053,13 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.name_mapping(),
         )
 
-    def replace_sort_order(self) -> UpdateSortOrder:
-        """Create a new UpdateSortOrder to replace the sort order of this table.
+    def replace_sort_order(self) -> SortOrderBuilder:
+        """Create a new SortOrderBuilder to replace the sort order of this table.
 
         Returns:
-            A new UpdateSortOrder.
+            A new SortOrderBuilder.
         """
-        return UpdateSortOrder(transaction=Transaction(self, autocommit=True))
+        return SortOrderBuilder(transaction=Transaction(self, autocommit=True))
 
     def name_mapping(self) -> Optional[NameMapping]:
         """Return the table's field-id NameMapping."""
diff --git a/pyiceberg/table/update/sort_order.py b/pyiceberg/table/update/sort_order.py
index 6629ae3447..2512a0af03 100644
--- a/pyiceberg/table/update/sort_order.py
+++ b/pyiceberg/table/update/sort_order.py
@@ -31,7 +31,7 @@
     from pyiceberg.table import Transaction
 
 
-class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
+class SortOrderBuilder(UpdateTableMetadata["SortOrderBuilder"]):
     _transaction: Transaction
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:

From 48ac5c079f7bf2f3f3daf5aa1af8993960ddb86d Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Tue, 7 Jan 2025 21:40:58 +0100
Subject: [PATCH 06/44] chore: update methods

---
 pyiceberg/table/update/sorting.py | 103 ++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 pyiceberg/table/update/sorting.py

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
new file mode 100644
index 0000000000..dd2ded25cf
--- /dev/null
+++ b/pyiceberg/table/update/sorting.py
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Tuple, Dict, List
+
+from pyiceberg.table.update import (
+    TableRequirement,
+    TableUpdate,
+    UpdatesAndRequirements,
+    UpdateTableMetadata,
+)
+from pyiceberg.transforms import Transform
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
+
+if TYPE_CHECKING:
+    from pyiceberg.table import Transaction
+
+
+class SortOrderBuilder:
+    
+    def __init__(self, case_sensitive: bool = True) -> None:
+        self._fields: List[SortField] = []
+        self._case_sensitive = case_sensitive
+        
+    def add_sort_field(
+        self,
+        source_id: int,
+        transform: Transform[Any, Any],
+        direction: SortDirection,
+        null_order: NullOrder,
+    ) -> SortOrderBuilder:
+        self._fields.append(
+            SortField(
+                source_id=source_id,
+                transform=transform,
+                direction=direction,
+                null_order=null_order,
+            )
+        )
+        return self
+    
+    @property
+    def sort_order(self) -> SortOrder: # todo: add sort order id?
+        return SortOrder(*self._fields)
+
+
+class ReplaceSortOrder(UpdateTableMetadata["ReplaceSortOrder"]):
+    _transaction: Transaction
+    _builder: SortOrderBuilder
+    _last_assigned_order_id: int
+    _case_sensitive: bool
+
+    def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
+        super().__init__(transaction)
+        self._builder = SortOrderBuilder(case_sensitive)
+        self._case_sensitive = case_sensitive
+        self._last_sort_order_id = transaction.table_metadata.default_sort_order_id
+
+    def _column_name_to_id(self, column_name: str) -> int:
+        return self._transaction.table_metadata.schema().find_field(
+            name_or_id=column_name,
+            case_sensitive=self._case_sensitive,
+        ).field_id
+
+    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> ReplaceSortOrder:
+        self._builder.add_sort_field(
+            source_id=self._column_name_to_id(source_column_name),
+            transform=transform,
+            direction=SortDirection.ASC,
+            null_order=null_order,
+        )
+        return self
+
+    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> ReplaceSortOrder:
+        self._builder.add_sort_field(
+            source_id=self._column_name_to_id(source_column_name),
+            transform=transform,
+            direction=SortDirection.DESC,
+            null_order=null_order,
+        )
+        return self
+
+    def _commit(self) -> UpdatesAndRequirements:
+        """Apply the pending changes and commit."""
+        requirements: Tuple[TableRequirement, ...] = ()
+        updates: Tuple[TableUpdate, ...] = ()
+
+        return updates, requirements

From a47067c38ed52eba908d5b4bfc8f5dd6af1ca992 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Tue, 7 Jan 2025 21:41:14 +0100
Subject: [PATCH 07/44] chore: update methods

---
 pyiceberg/table/update/sort_order.py | 51 ----------------------------
 1 file changed, 51 deletions(-)
 delete mode 100644 pyiceberg/table/update/sort_order.py

diff --git a/pyiceberg/table/update/sort_order.py b/pyiceberg/table/update/sort_order.py
deleted file mode 100644
index 2512a0af03..0000000000
--- a/pyiceberg/table/update/sort_order.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, Tuple
-
-from pyiceberg.table.update import (
-    TableRequirement,
-    TableUpdate,
-    UpdatesAndRequirements,
-    UpdateTableMetadata,
-)
-from pyiceberg.transforms import Transform
-from pyiceberg.table.sorting import NullOrder
-
-if TYPE_CHECKING:
-    from pyiceberg.table import Transaction
-
-
-class SortOrderBuilder(UpdateTableMetadata["SortOrderBuilder"]):
-    _transaction: Transaction
-
-    def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
-        super().__init__(transaction)
-
-    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
-        return self
-
-    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
-        return self
-
-    def _commit(self) -> UpdatesAndRequirements:
-        """Apply the pending changes and commit."""
-        requirements: Tuple[TableRequirement, ...] = ()
-        updates: Tuple[TableUpdate, ...] = ()
-
-        return updates, requirements

From c1ab2ecde26f91e4cdfc55e120b669122c3dbc30 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Tue, 7 Jan 2025 21:41:26 +0100
Subject: [PATCH 08/44] chore: update imports

---
 pyiceberg/table/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 817184f381..6fca6d112f 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -117,7 +117,7 @@
     UpdateSnapshot,
     _FastAppendFiles,
 )
-from pyiceberg.table.update.sort_order import SortOrderBuilder
+from pyiceberg.table.update.sorting import SortOrderBuilder
 from pyiceberg.table.update.spec import UpdateSpec
 from pyiceberg.transforms import IdentityTransform
 from pyiceberg.typedef import (

From 8f27d149719797eb52af11c07cd91423a55bf1d6 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Tue, 7 Jan 2025 21:42:54 +0100
Subject: [PATCH 09/44] chore: stupid renames

---
 pyiceberg/table/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 6fca6d112f..32df27da54 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -117,7 +117,7 @@
     UpdateSnapshot,
     _FastAppendFiles,
 )
-from pyiceberg.table.update.sorting import SortOrderBuilder
+from pyiceberg.table.update.sorting import ReplaceSortOrder
 from pyiceberg.table.update.spec import UpdateSpec
 from pyiceberg.transforms import IdentityTransform
 from pyiceberg.typedef import (
@@ -1053,13 +1053,13 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.name_mapping(),
         )
 
-    def replace_sort_order(self) -> SortOrderBuilder:
-        """Create a new SortOrderBuilder to replace the sort order of this table.
+    def replace_sort_order(self) -> ReplaceSortOrder:
+        """Create a new ReplaceSortOrder to replace the sort order of this table.
 
         Returns:
-            A new SortOrderBuilder.
+            A new ReplaceSortOrder.
         """
-        return SortOrderBuilder(transaction=Transaction(self, autocommit=True))
+        return ReplaceSortOrder(transaction=Transaction(self, autocommit=True), case_sensitive=True)
 
     def name_mapping(self) -> Optional[NameMapping]:
         """Return the table's field-id NameMapping."""

From d8720f2edfb13e188c4915ac6480316c9f6387d1 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Tue, 7 Jan 2025 21:51:02 +0100
Subject: [PATCH 10/44] chore: lint

---
 pyiceberg/table/update/sorting.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index dd2ded25cf..a2ee561fe6 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -16,8 +16,9 @@
 # under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Tuple, Dict, List
+from typing import TYPE_CHECKING, Any, List, Tuple
 
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
 from pyiceberg.table.update import (
     TableRequirement,
     TableUpdate,
@@ -25,18 +26,16 @@
     UpdateTableMetadata,
 )
 from pyiceberg.transforms import Transform
-from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
 
 if TYPE_CHECKING:
     from pyiceberg.table import Transaction
 
 
 class SortOrderBuilder:
-    
     def __init__(self, case_sensitive: bool = True) -> None:
         self._fields: List[SortField] = []
         self._case_sensitive = case_sensitive
-        
+
     def add_sort_field(
         self,
         source_id: int,
@@ -53,9 +52,9 @@ def add_sort_field(
             )
         )
         return self
-    
+
     @property
-    def sort_order(self) -> SortOrder: # todo: add sort order id?
+    def sort_order(self) -> SortOrder:  # todo: add sort order id?
         return SortOrder(*self._fields)
 
 
@@ -72,10 +71,14 @@ def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> Non
         self._last_sort_order_id = transaction.table_metadata.default_sort_order_id
 
     def _column_name_to_id(self, column_name: str) -> int:
-        return self._transaction.table_metadata.schema().find_field(
-            name_or_id=column_name,
-            case_sensitive=self._case_sensitive,
-        ).field_id
+        return (
+            self._transaction.table_metadata.schema()
+            .find_field(
+                name_or_id=column_name,
+                case_sensitive=self._case_sensitive,
+            )
+            .field_id
+        )
 
     def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> ReplaceSortOrder:
         self._builder.add_sort_field(

From 90db60a1f175cc5e2ce90b22490f3cb7c6bcb41e Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Tue, 7 Jan 2025 21:53:01 +0100
Subject: [PATCH 11/44] chore: docstrings

---
 pyiceberg/table/update/sorting.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index a2ee561fe6..56ff6e6f76 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -43,6 +43,7 @@ def add_sort_field(
         direction: SortDirection,
         null_order: NullOrder,
     ) -> SortOrderBuilder:
+        """Add a sort field to the sort order list."""
         self._fields.append(
             SortField(
                 source_id=source_id,
@@ -55,6 +56,7 @@ def add_sort_field(
 
     @property
     def sort_order(self) -> SortOrder:  # todo: add sort order id?
+        """Return the sort order."""
         return SortOrder(*self._fields)
 
 
@@ -66,7 +68,7 @@ class ReplaceSortOrder(UpdateTableMetadata["ReplaceSortOrder"]):
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
-        self._builder = SortOrderBuilder(case_sensitive)
+        self._builder = SortOrderBuilder(case_sensitive=case_sensitive)
         self._case_sensitive = case_sensitive
         self._last_sort_order_id = transaction.table_metadata.default_sort_order_id
 

From 1cd27293a45b25c7a76db3e7c4115d30dbd31e0e Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 21:35:24 +0100
Subject: [PATCH 12/44] test: add integration test for replace sort order

---
 tests/integration/test_sort_order_update.py | 553 ++++++++++++++++++++
 1 file changed, 553 insertions(+)
 create mode 100644 tests/integration/test_sort_order_update.py

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
new file mode 100644
index 0000000000..98804b1c35
--- /dev/null
+++ b/tests/integration/test_sort_order_update.py
@@ -0,0 +1,553 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint:disable=redefined-outer-name
+
+import pytest
+
+from pyiceberg.catalog import Catalog
+from pyiceberg.exceptions import NoSuchTableError
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table import Table
+from pyiceberg.transforms import (
+    BucketTransform,
+    DayTransform,
+    HourTransform,
+    IdentityTransform,
+    MonthTransform,
+    TruncateTransform,
+    VoidTransform,
+    YearTransform,
+)
+from pyiceberg.types import (
+    LongType,
+    NestedField,
+    StringType,
+    TimestampType,
+)
+
+
+def _simple_table(catalog: Catalog, table_schema_simple: Schema) -> Table:
+    return _create_table_with_schema(catalog, table_schema_simple, "1")
+
+
+def _table(catalog: Catalog) -> Table:
+    schema_with_timestamp = Schema(
+        NestedField(1, "id", LongType(), required=False),
+        NestedField(2, "event_ts", TimestampType(), required=False),
+        NestedField(3, "str", StringType(), required=False),
+    )
+    return _create_table_with_schema(catalog, schema_with_timestamp, "1")
+
+
+def _table_v2(catalog: Catalog) -> Table:
+    schema_with_timestamp = Schema(
+        NestedField(1, "id", LongType(), required=False),
+        NestedField(2, "event_ts", TimestampType(), required=False),
+        NestedField(3, "str", StringType(), required=False),
+    )
+    return _create_table_with_schema(catalog, schema_with_timestamp, "2")
+
+
+def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table:
+    tbl_name = "default.test_schema_evolution"
+    try:
+        catalog.drop_table(tbl_name)
+    except NoSuchTableError:
+        pass
+    return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version})
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) # pytest.lazy_fixture("session_catalog_hive"), 
+def test_sort_order_builder(catalog: Catalog, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple)
+    r = simple_table.replace_sort_order()
+    
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_year(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", YearTransform(), "year_transform").commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "year_transform"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_year_generates_default_name(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", YearTransform()).commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "event_ts_year"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_month(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", MonthTransform(), "month_transform").commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "month_transform"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_month_generates_default_name(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", MonthTransform()).commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "event_ts_month"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_day(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", DayTransform(), "day_transform").commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "day_transform"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_day_generates_default_name(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", DayTransform()).commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "event_ts_day"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_hour(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", HourTransform(), "hour_transform").commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "hour_transform"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_hour_generates_default_name(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", HourTransform()).commit()
+#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "event_ts_hour"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+#     simple_table.update_spec().add_field("foo", BucketTransform(12), "bucket_transform").commit()
+#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "bucket_transform"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_bucket_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+#     simple_table.update_spec().add_field("foo", BucketTransform(12)).commit()
+#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "foo_bucket_12"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+#     simple_table.update_spec().add_field("foo", TruncateTransform(1), "truncate_transform").commit()
+#     _validate_new_partition_fields(
+#         simple_table, 1000, 1, 1000, PartitionField(1, 1000, TruncateTransform(1), "truncate_transform")
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_truncate_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+#     simple_table.update_spec().add_field("foo", TruncateTransform(1)).commit()
+#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, TruncateTransform(1), "foo_trunc_1"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_multiple_adds(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field(
+#         "str", TruncateTransform(2), "truncate_str"
+#     ).commit()
+#     _validate_new_partition_fields(
+#         table,
+#         1002,
+#         1,
+#         1002,
+#         PartitionField(1, 1000, IdentityTransform(), "id"),
+#         PartitionField(2, 1001, HourTransform(), "hourly_partitioned"),
+#         PartitionField(3, 1002, TruncateTransform(2), "truncate_str"),
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_void(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+#     simple_table.update_spec().add_field("foo", VoidTransform(), "void_transform").commit()
+#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, VoidTransform(), "void_transform"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_void_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+#     simple_table.update_spec().add_field("foo", VoidTransform()).commit()
+#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, VoidTransform(), "foo_null"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_hour_to_day(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("event_ts", DayTransform(), "daily_partitioned").commit()
+#     table.update_spec().add_field("event_ts", HourTransform(), "hourly_partitioned").commit()
+#     _validate_new_partition_fields(
+#         table,
+#         1001,
+#         2,
+#         1001,
+#         PartitionField(2, 1000, DayTransform(), "daily_partitioned"),
+#         PartitionField(2, 1001, HourTransform(), "hourly_partitioned"),
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_add_multiple_buckets(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_field("id", BucketTransform(16)).add_field("id", BucketTransform(4)).commit()
+#     _validate_new_partition_fields(
+#         table,
+#         1001,
+#         1,
+#         1001,
+#         PartitionField(1, 1000, BucketTransform(16), "id_bucket_16"),
+#         PartitionField(1, 1001, BucketTransform(4), "id_bucket_4"),
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_identity(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_identity("id").commit()
+#     table.update_spec().remove_field("id").commit()
+#     assert len(table.specs()) == 3
+#     assert table.spec().spec_id == 2
+#     assert table.spec() == PartitionSpec(
+#         PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="id"), spec_id=2
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_identity_v2(catalog: Catalog) -> None:
+#     table_v2 = _table_v2(catalog)
+#     table_v2.update_spec().add_identity("id").commit()
+#     table_v2.update_spec().remove_field("id").commit()
+#     assert len(table_v2.specs()) == 2
+#     assert table_v2.spec().spec_id == 0
+#     assert table_v2.spec() == PartitionSpec(spec_id=0)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_and_add_identity(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_identity("id").commit()
+#     table.update_spec().remove_field("id").commit()
+#     table.update_spec().add_identity("id").commit()
+
+#     assert len(table.specs()) == 4
+#     assert table.spec().spec_id == 3
+#     assert table.spec() == PartitionSpec(
+#         PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="id_1000"),
+#         PartitionField(source_id=1, field_id=1001, transform=IdentityTransform(), name="id"),
+#         spec_id=3,
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_and_add_identity_v2(catalog: Catalog) -> None:
+#     table_v2 = _table_v2(catalog)
+#     table_v2.update_spec().add_identity("id").commit()
+#     table_v2.update_spec().remove_field("id").commit()
+#     table_v2.update_spec().add_identity("id").commit()
+
+#     assert len(table_v2.specs()) == 2
+#     assert table_v2.spec().spec_id == 1
+#     assert table_v2.spec() == PartitionSpec(
+#         PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="id"), spec_id=1
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_bucket(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with table.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table.update_spec() as remove:
+#         remove.remove_field("bucketed_id")
+
+#     assert len(table.specs()) == 3
+#     _validate_new_partition_fields(
+#         table,
+#         1001,
+#         2,
+#         1001,
+#         PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="bucketed_id"),
+#         PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts"),
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_bucket_v2(catalog: Catalog) -> None:
+#     table_v2 = _table_v2(catalog)
+#     with table_v2.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table_v2.update_spec() as remove:
+#         remove.remove_field("bucketed_id")
+#     assert len(table_v2.specs()) == 3
+#     _validate_new_partition_fields(
+#         table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts")
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_day(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with table.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table.update_spec() as remove:
+#         remove.remove_field("day_ts")
+
+#     assert len(table.specs()) == 3
+#     _validate_new_partition_fields(
+#         table,
+#         1001,
+#         2,
+#         1001,
+#         PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id"),
+#         PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name="day_ts"),
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_remove_day_v2(catalog: Catalog) -> None:
+#     table_v2 = _table_v2(catalog)
+#     with table_v2.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table_v2.update_spec() as remove:
+#         remove.remove_field("day_ts")
+#     assert len(table_v2.specs()) == 3
+#     _validate_new_partition_fields(
+#         table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id")
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_rename(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     table.update_spec().add_identity("id").commit()
+#     table.update_spec().rename_field("id", "sharded_id").commit()
+#     assert len(table.specs()) == 3
+#     assert table.spec().spec_id == 2
+#     _validate_new_partition_fields(table, 1000, 2, 1000, PartitionField(1, 1000, IdentityTransform(), "sharded_id"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_add_and_remove(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_identity("id").remove_field("id").commit()
+#     assert "Cannot delete newly added field id" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_field("event_ts", YearTransform(), "year_transform").add_field(
+#             "event_ts", HourTransform(), "hour_transform"
+#         ).commit()
+#     assert "Cannot add time partition field: hour_transform conflicts with year_transform" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_delete_and_rename(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_identity("id").commit()
+#         table.update_spec().remove_field("id").rename_field("id", "sharded_id").commit()
+#     assert "Cannot delete and rename partition field id" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_rename_and_delete(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_identity("id").commit()
+#         table.update_spec().rename_field("id", "sharded_id").remove_field("id").commit()
+#     assert "Cannot rename and delete field id" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_field("str", TruncateTransform(4), "truncated_str").add_field(
+#             "str", TruncateTransform(4)
+#         ).commit()
+#     assert "Already added partition" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_field("id", IdentityTransform(), "duplicate").add_field(
+#             "id", IdentityTransform(), "duplicate"
+#         ).commit()
+#     assert "Already added partition" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with pytest.raises(ValueError) as exc_info:
+#         table.update_spec().add_field("id", IdentityTransform(), "duplicate").add_field(
+#             "event_ts", IdentityTransform(), "duplicate"
+#         ).commit()
+#     assert "Already added partition" in str(exc_info.value)
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_change_specs_and_schema_transaction(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with table.transaction() as transaction:
+#         with transaction.update_spec() as update_spec:
+#             update_spec.add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field(
+#                 "str", TruncateTransform(2), "truncate_str"
+#             )
+
+#         with transaction.update_schema() as update_schema:
+#             update_schema.add_column("col_string", StringType())
+
+#     _validate_new_partition_fields(
+#         table,
+#         1002,
+#         1,
+#         1002,
+#         PartitionField(1, 1000, IdentityTransform(), "id"),
+#         PartitionField(2, 1001, HourTransform(), "hourly_partitioned"),
+#         PartitionField(3, 1002, TruncateTransform(2), "truncate_str"),
+#     )
+
+#     assert table.schema() == Schema(
+#         NestedField(field_id=1, name="id", field_type=LongType(), required=False),
+#         NestedField(field_id=2, name="event_ts", field_type=TimestampType(), required=False),
+#         NestedField(field_id=3, name="str", field_type=StringType(), required=False),
+#         NestedField(field_id=4, name="col_string", field_type=StringType(), required=False),
+#         identifier_field_ids=[],
+#     )
+#     assert table.schema().schema_id == 1
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None:
+#     table = _table(catalog)
+#     with table.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table.update_spec() as update:
+#         update.remove_field("day_ts").remove_field("bucketed_id")
+#     with table.update_spec() as update:
+#         update.add_field("str", TruncateTransform(2), "truncated_str")
+#     _validate_new_partition_fields(
+#         table,
+#         1002,
+#         3,
+#         1002,
+#         PartitionField(1, 1000, VoidTransform(), "bucketed_id"),
+#         PartitionField(2, 1001, VoidTransform(), "day_ts"),
+#         PartitionField(3, 1002, TruncateTransform(2), "truncated_str"),
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None:
+#     table_v2 = _table_v2(catalog)
+#     with table_v2.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table_v2.update_spec() as update:
+#         update.remove_field("day_ts").remove_field("bucketed_id")
+#     with table_v2.update_spec() as update:
+#         update.add_field("str", TruncateTransform(2), "truncated_str")
+#     _validate_new_partition_fields(table_v2, 1002, 2, 1002, PartitionField(3, 1002, TruncateTransform(2), "truncated_str"))
+
+
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+# def test_multiple_remove_and_add_reuses_v2(catalog: Catalog) -> None:
+#     table_v2 = _table_v2(catalog)
+#     with table_v2.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#         update.add_field("event_ts", DayTransform(), "day_ts")
+#     with table_v2.update_spec() as update:
+#         update.remove_field("day_ts").remove_field("bucketed_id")
+#     with table_v2.update_spec() as update:
+#         update.add_field("id", BucketTransform(16), "bucketed_id")
+#     _validate_new_partition_fields(table_v2, 1000, 2, 1001, PartitionField(1, 1000, BucketTransform(16), "bucketed_id"))
+
+
+# def _validate_new_partition_fields(
+#     table: Table,
+#     expected_spec_last_assigned_field_id: int,
+#     expected_spec_id: int,
+#     expected_metadata_last_assigned_field_id: int,
+#     *expected_partition_fields: PartitionField,
+# ) -> None:
+#     spec = table.spec()
+#     assert spec.spec_id == expected_spec_id
+#     assert spec.last_assigned_field_id == expected_spec_last_assigned_field_id
+#     assert table.last_partition_id() == expected_metadata_last_assigned_field_id
+#     assert len(spec.fields) == len(expected_partition_fields)
+#     for i in range(len(spec.fields)):
+#         assert spec.fields[i] == expected_partition_fields[i]

From 0a1e781c2c642554bdaa926109bc3e6a1bded9f0 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 21:39:39 +0100
Subject: [PATCH 13/44] test: add test for lookup

---
 tests/integration/test_sort_order_update.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 98804b1c35..bdfd7e3b2e 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -72,6 +72,14 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version:
     return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version})
 
 
+@pytest.mark.integration
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")]) 
+def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple)
+    for col_name, col_id in {"foo": 1, "bar": 2, "baz": 3}.items():
+        assert col_id == simple_table.replace_sort_order()._column_name_to_id(col_name)
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) # pytest.lazy_fixture("session_catalog_hive"), 
 def test_sort_order_builder(catalog: Catalog, table_schema_simple: Schema) -> None:

From a550ccbc8bd3348ac40f30ecaab452ebcf91d27b Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 21:48:45 +0100
Subject: [PATCH 14/44] refactor: add last sort order id

---
 tests/integration/test_sort_order_update.py | 23 ++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index bdfd7e3b2e..1c8cc0a1fd 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -23,6 +23,8 @@
 from pyiceberg.partitioning import PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.table import Table
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
+from pyiceberg.table.update.sorting import SortOrderBuilder
 from pyiceberg.transforms import (
     BucketTransform,
     DayTransform,
@@ -72,6 +74,17 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version:
     return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version})
 
 
+@pytest.mark.integration
+def test_sort_order_builder() -> None:
+    builder = SortOrderBuilder(last_sort_order_id=0)
+    builder.add_sort_field(1, IdentityTransform(), SortDirection.ASC, NullOrder.NULLS_FIRST)
+    builder.add_sort_field(2, IdentityTransform(), SortDirection.DESC, NullOrder.NULLS_LAST)
+    assert builder.sort_order == SortOrder(
+        SortField(1, IdentityTransform(), SortDirection.ASC, NullOrder.NULLS_FIRST),
+        SortField(2, IdentityTransform(), SortDirection.DESC, NullOrder.NULLS_LAST),
+    )
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")]) 
 def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) -> None:
@@ -80,11 +93,11 @@ def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) ->
         assert col_id == simple_table.replace_sort_order()._column_name_to_id(col_name)
 
 
-@pytest.mark.integration
-@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) # pytest.lazy_fixture("session_catalog_hive"), 
-def test_sort_order_builder(catalog: Catalog, table_schema_simple: Schema) -> None:
-    simple_table = _simple_table(catalog, table_schema_simple)
-    r = simple_table.replace_sort_order()
+# @pytest.mark.integration
+# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) # pytest.lazy_fixture("session_catalog_hive"), 
+# def test_sort_order_builder(catalog: Catalog, table_schema_simple: Schema) -> None:
+#     simple_table = _simple_table(catalog, table_schema_simple)
+#     r = simple_table.replace_sort_order()
     
 
 # @pytest.mark.integration

From 8b0925589f180a1edaa51d652124ec2ee4ae461c Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 21:54:28 +0100
Subject: [PATCH 15/44] refactor: add last sort order id and increment

---
 pyiceberg/table/update/sorting.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 56ff6e6f76..ccfa3c5915 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -32,9 +32,10 @@
 
 
 class SortOrderBuilder:
-    def __init__(self, case_sensitive: bool = True) -> None:
+    def __init__(self,  last_sort_order_id: int, case_sensitive: bool = True) -> None:
         self._fields: List[SortField] = []
         self._case_sensitive = case_sensitive
+        self._last_sort_order_id = last_sort_order_id
 
     def add_sort_field(
         self,
@@ -57,7 +58,7 @@ def add_sort_field(
     @property
     def sort_order(self) -> SortOrder:  # todo: add sort order id?
         """Return the sort order."""
-        return SortOrder(*self._fields)
+        return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
 
 
 class ReplaceSortOrder(UpdateTableMetadata["ReplaceSortOrder"]):
@@ -68,9 +69,11 @@ class ReplaceSortOrder(UpdateTableMetadata["ReplaceSortOrder"]):
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
-        self._builder = SortOrderBuilder(case_sensitive=case_sensitive)
+        self._builder = SortOrderBuilder(
+            case_sensitive=case_sensitive,
+            last_sort_order_id=transaction.table_metadata.default_sort_order_id,
+        )
         self._case_sensitive = case_sensitive
-        self._last_sort_order_id = transaction.table_metadata.default_sort_order_id
 
     def _column_name_to_id(self, column_name: str) -> int:
         return (

From 67b9e527e52f6f42620ff048911ab18917952102 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 22:02:52 +0100
Subject: [PATCH 16/44] chore: add imports

---
 pyiceberg/table/update/sorting.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index ccfa3c5915..ec1f31dbf7 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -18,12 +18,14 @@
 
 from typing import TYPE_CHECKING, Any, List, Tuple
 
+from pyiceberg.table import AddSortOrderUpdate, SetDefaultSortOrderUpdate
 from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
 from pyiceberg.table.update import (
     TableRequirement,
     TableUpdate,
     UpdatesAndRequirements,
     UpdateTableMetadata,
+    AssertDefaultSortOrderId
 )
 from pyiceberg.transforms import Transform
 
@@ -56,7 +58,7 @@ def add_sort_field(
         return self
 
     @property
-    def sort_order(self) -> SortOrder:  # todo: add sort order id?
+    def sort_order(self) -> SortOrder:
         """Return the sort order."""
         return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
 

From dcaa63f05b56781cf12148c7ea679812e76cc2df Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 22:11:39 +0100
Subject: [PATCH 17/44] feat: add apply and commit methods

---
 pyiceberg/table/update/sorting.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index ec1f31dbf7..7bb0c67f74 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -105,9 +105,24 @@ def desc(self, source_column_name: str, transform: Transform[Any, Any], null_ord
         )
         return self
 
+    def _apply(self) -> SortOrder:
+        return self._builder.sort_order
+
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""
+        new_sort_order = self._apply()
         requirements: Tuple[TableRequirement, ...] = ()
         updates: Tuple[TableUpdate, ...] = ()
 
+        if self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id:
+            updates = (
+                AddSortOrderUpdate(sort_order=new_sort_order),
+                SetDefaultSortOrderUpdate(sort_order_id=-1)
+            )
+        else:
+            updates = (SetDefaultSortOrderUpdate(sort_order_id=new_sort_order.order_id),)
+        
+        required_last_assigned_sort_order_id = self._transaction.table_metadata.default_sort_order_id
+        requirements = (AssertDefaultSortOrderId(default_sort_order_id=required_last_assigned_sort_order_id),)
+
         return updates, requirements

From ced6a4b3e8bd603e75eca830d8b61b6c5d6f93b9 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 22:19:16 +0100
Subject: [PATCH 18/44] test: remove spec stuff

---
 tests/integration/test_sort_order_update.py | 501 +-------------------
 1 file changed, 22 insertions(+), 479 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 1c8cc0a1fd..39a223c054 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -93,482 +93,25 @@ def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) ->
         assert col_id == simple_table.replace_sort_order()._column_name_to_id(col_name)
 
 
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")]) # pytest.lazy_fixture("session_catalog_hive"), 
-# def test_sort_order_builder(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _simple_table(catalog, table_schema_simple)
-#     r = simple_table.replace_sort_order()
-    
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_year(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", YearTransform(), "year_transform").commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "year_transform"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_year_generates_default_name(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", YearTransform()).commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "event_ts_year"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_month(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", MonthTransform(), "month_transform").commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "month_transform"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_month_generates_default_name(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", MonthTransform()).commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "event_ts_month"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_day(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", DayTransform(), "day_transform").commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "day_transform"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_day_generates_default_name(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", DayTransform()).commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "event_ts_day"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_hour(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", HourTransform(), "hour_transform").commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "hour_transform"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_hour_generates_default_name(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", HourTransform()).commit()
-#     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "event_ts_hour"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
-#     simple_table.update_spec().add_field("foo", BucketTransform(12), "bucket_transform").commit()
-#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "bucket_transform"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_bucket_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
-#     simple_table.update_spec().add_field("foo", BucketTransform(12)).commit()
-#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "foo_bucket_12"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
-#     simple_table.update_spec().add_field("foo", TruncateTransform(1), "truncate_transform").commit()
-#     _validate_new_partition_fields(
-#         simple_table, 1000, 1, 1000, PartitionField(1, 1000, TruncateTransform(1), "truncate_transform")
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_truncate_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
-#     simple_table.update_spec().add_field("foo", TruncateTransform(1)).commit()
-#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, TruncateTransform(1), "foo_trunc_1"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_multiple_adds(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field(
-#         "str", TruncateTransform(2), "truncate_str"
-#     ).commit()
-#     _validate_new_partition_fields(
-#         table,
-#         1002,
-#         1,
-#         1002,
-#         PartitionField(1, 1000, IdentityTransform(), "id"),
-#         PartitionField(2, 1001, HourTransform(), "hourly_partitioned"),
-#         PartitionField(3, 1002, TruncateTransform(2), "truncate_str"),
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_void(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
-#     simple_table.update_spec().add_field("foo", VoidTransform(), "void_transform").commit()
-#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, VoidTransform(), "void_transform"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_void_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
-#     simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
-#     simple_table.update_spec().add_field("foo", VoidTransform()).commit()
-#     _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, VoidTransform(), "foo_null"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_hour_to_day(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("event_ts", DayTransform(), "daily_partitioned").commit()
-#     table.update_spec().add_field("event_ts", HourTransform(), "hourly_partitioned").commit()
-#     _validate_new_partition_fields(
-#         table,
-#         1001,
-#         2,
-#         1001,
-#         PartitionField(2, 1000, DayTransform(), "daily_partitioned"),
-#         PartitionField(2, 1001, HourTransform(), "hourly_partitioned"),
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_add_multiple_buckets(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_field("id", BucketTransform(16)).add_field("id", BucketTransform(4)).commit()
-#     _validate_new_partition_fields(
-#         table,
-#         1001,
-#         1,
-#         1001,
-#         PartitionField(1, 1000, BucketTransform(16), "id_bucket_16"),
-#         PartitionField(1, 1001, BucketTransform(4), "id_bucket_4"),
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_identity(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_identity("id").commit()
-#     table.update_spec().remove_field("id").commit()
-#     assert len(table.specs()) == 3
-#     assert table.spec().spec_id == 2
-#     assert table.spec() == PartitionSpec(
-#         PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="id"), spec_id=2
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_identity_v2(catalog: Catalog) -> None:
-#     table_v2 = _table_v2(catalog)
-#     table_v2.update_spec().add_identity("id").commit()
-#     table_v2.update_spec().remove_field("id").commit()
-#     assert len(table_v2.specs()) == 2
-#     assert table_v2.spec().spec_id == 0
-#     assert table_v2.spec() == PartitionSpec(spec_id=0)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_and_add_identity(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_identity("id").commit()
-#     table.update_spec().remove_field("id").commit()
-#     table.update_spec().add_identity("id").commit()
-
-#     assert len(table.specs()) == 4
-#     assert table.spec().spec_id == 3
-#     assert table.spec() == PartitionSpec(
-#         PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="id_1000"),
-#         PartitionField(source_id=1, field_id=1001, transform=IdentityTransform(), name="id"),
-#         spec_id=3,
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_and_add_identity_v2(catalog: Catalog) -> None:
-#     table_v2 = _table_v2(catalog)
-#     table_v2.update_spec().add_identity("id").commit()
-#     table_v2.update_spec().remove_field("id").commit()
-#     table_v2.update_spec().add_identity("id").commit()
-
-#     assert len(table_v2.specs()) == 2
-#     assert table_v2.spec().spec_id == 1
-#     assert table_v2.spec() == PartitionSpec(
-#         PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="id"), spec_id=1
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_bucket(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with table.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table.update_spec() as remove:
-#         remove.remove_field("bucketed_id")
-
-#     assert len(table.specs()) == 3
-#     _validate_new_partition_fields(
-#         table,
-#         1001,
-#         2,
-#         1001,
-#         PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="bucketed_id"),
-#         PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts"),
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_bucket_v2(catalog: Catalog) -> None:
-#     table_v2 = _table_v2(catalog)
-#     with table_v2.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table_v2.update_spec() as remove:
-#         remove.remove_field("bucketed_id")
-#     assert len(table_v2.specs()) == 3
-#     _validate_new_partition_fields(
-#         table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts")
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_day(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with table.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table.update_spec() as remove:
-#         remove.remove_field("day_ts")
-
-#     assert len(table.specs()) == 3
-#     _validate_new_partition_fields(
-#         table,
-#         1001,
-#         2,
-#         1001,
-#         PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id"),
-#         PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name="day_ts"),
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_remove_day_v2(catalog: Catalog) -> None:
-#     table_v2 = _table_v2(catalog)
-#     with table_v2.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table_v2.update_spec() as remove:
-#         remove.remove_field("day_ts")
-#     assert len(table_v2.specs()) == 3
-#     _validate_new_partition_fields(
-#         table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id")
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_rename(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     table.update_spec().add_identity("id").commit()
-#     table.update_spec().rename_field("id", "sharded_id").commit()
-#     assert len(table.specs()) == 3
-#     assert table.spec().spec_id == 2
-#     _validate_new_partition_fields(table, 1000, 2, 1000, PartitionField(1, 1000, IdentityTransform(), "sharded_id"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_add_and_remove(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_identity("id").remove_field("id").commit()
-#     assert "Cannot delete newly added field id" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_field("event_ts", YearTransform(), "year_transform").add_field(
-#             "event_ts", HourTransform(), "hour_transform"
-#         ).commit()
-#     assert "Cannot add time partition field: hour_transform conflicts with year_transform" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_delete_and_rename(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_identity("id").commit()
-#         table.update_spec().remove_field("id").rename_field("id", "sharded_id").commit()
-#     assert "Cannot delete and rename partition field id" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_rename_and_delete(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_identity("id").commit()
-#         table.update_spec().rename_field("id", "sharded_id").remove_field("id").commit()
-#     assert "Cannot rename and delete field id" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_field("str", TruncateTransform(4), "truncated_str").add_field(
-#             "str", TruncateTransform(4)
-#         ).commit()
-#     assert "Already added partition" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_field("id", IdentityTransform(), "duplicate").add_field(
-#             "id", IdentityTransform(), "duplicate"
-#         ).commit()
-#     assert "Already added partition" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with pytest.raises(ValueError) as exc_info:
-#         table.update_spec().add_field("id", IdentityTransform(), "duplicate").add_field(
-#             "event_ts", IdentityTransform(), "duplicate"
-#         ).commit()
-#     assert "Already added partition" in str(exc_info.value)
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_change_specs_and_schema_transaction(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with table.transaction() as transaction:
-#         with transaction.update_spec() as update_spec:
-#             update_spec.add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field(
-#                 "str", TruncateTransform(2), "truncate_str"
-#             )
-
-#         with transaction.update_schema() as update_schema:
-#             update_schema.add_column("col_string", StringType())
-
-#     _validate_new_partition_fields(
-#         table,
-#         1002,
-#         1,
-#         1002,
-#         PartitionField(1, 1000, IdentityTransform(), "id"),
-#         PartitionField(2, 1001, HourTransform(), "hourly_partitioned"),
-#         PartitionField(3, 1002, TruncateTransform(2), "truncate_str"),
-#     )
-
-#     assert table.schema() == Schema(
-#         NestedField(field_id=1, name="id", field_type=LongType(), required=False),
-#         NestedField(field_id=2, name="event_ts", field_type=TimestampType(), required=False),
-#         NestedField(field_id=3, name="str", field_type=StringType(), required=False),
-#         NestedField(field_id=4, name="col_string", field_type=StringType(), required=False),
-#         identifier_field_ids=[],
-#     )
-#     assert table.schema().schema_id == 1
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None:
-#     table = _table(catalog)
-#     with table.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table.update_spec() as update:
-#         update.remove_field("day_ts").remove_field("bucketed_id")
-#     with table.update_spec() as update:
-#         update.add_field("str", TruncateTransform(2), "truncated_str")
-#     _validate_new_partition_fields(
-#         table,
-#         1002,
-#         3,
-#         1002,
-#         PartitionField(1, 1000, VoidTransform(), "bucketed_id"),
-#         PartitionField(2, 1001, VoidTransform(), "day_ts"),
-#         PartitionField(3, 1002, TruncateTransform(2), "truncated_str"),
-#     )
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None:
-#     table_v2 = _table_v2(catalog)
-#     with table_v2.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table_v2.update_spec() as update:
-#         update.remove_field("day_ts").remove_field("bucketed_id")
-#     with table_v2.update_spec() as update:
-#         update.add_field("str", TruncateTransform(2), "truncated_str")
-#     _validate_new_partition_fields(table_v2, 1002, 2, 1002, PartitionField(3, 1002, TruncateTransform(2), "truncated_str"))
-
-
-# @pytest.mark.integration
-# @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-# def test_multiple_remove_and_add_reuses_v2(catalog: Catalog) -> None:
-#     table_v2 = _table_v2(catalog)
-#     with table_v2.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#         update.add_field("event_ts", DayTransform(), "day_ts")
-#     with table_v2.update_spec() as update:
-#         update.remove_field("day_ts").remove_field("bucketed_id")
-#     with table_v2.update_spec() as update:
-#         update.add_field("id", BucketTransform(16), "bucketed_id")
-#     _validate_new_partition_fields(table_v2, 1000, 2, 1001, PartitionField(1, 1000, BucketTransform(16), "bucketed_id"))
-
-
-# def _validate_new_partition_fields(
-#     table: Table,
-#     expected_spec_last_assigned_field_id: int,
-#     expected_spec_id: int,
-#     expected_metadata_last_assigned_field_id: int,
-#     *expected_partition_fields: PartitionField,
-# ) -> None:
-#     spec = table.spec()
-#     assert spec.spec_id == expected_spec_id
-#     assert spec.last_assigned_field_id == expected_spec_last_assigned_field_id
-#     assert table.last_partition_id() == expected_metadata_last_assigned_field_id
-#     assert len(spec.fields) == len(expected_partition_fields)
-#     for i in range(len(spec.fields)):
-#         assert spec.fields[i] == expected_partition_fields[i]
+@pytest.mark.integration
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
+def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema):
+    simple_table = _simple_table(catalog, table_schema_simple)
+    simple_table.replace_sort_order().asc(
+        "foo", IdentityTransform(), NullOrder.NULLS_FIRST
+    ).desc("bar", IdentityTransform(), NullOrder.NULLS_LAST).commit()
+    assert simple_table.sort_order() == SortOrder(
+        SortField(
+            source_id=1, 
+            transform=IdentityTransform(), 
+            direction=SortDirection.ASC, 
+            null_order=NullOrder.NULLS_FIRST
+        ), 
+        SortField(
+            source_id=2, 
+            transform=IdentityTransform(), 
+            direction=SortDirection.DESC, 
+            null_order=NullOrder.NULLS_LAST
+        ), 
+        order_id=1
+    )

From 43e09a3bafab4440c19cc4f44ad9ecdfc6b4198b Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Wed, 8 Jan 2025 22:19:57 +0100
Subject: [PATCH 19/44] chore: remove unused import

---
 tests/integration/test_sort_order_update.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 39a223c054..001f3e42c0 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -20,20 +20,12 @@
 
 from pyiceberg.catalog import Catalog
 from pyiceberg.exceptions import NoSuchTableError
-from pyiceberg.partitioning import PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.table import Table
 from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
 from pyiceberg.table.update.sorting import SortOrderBuilder
 from pyiceberg.transforms import (
-    BucketTransform,
-    DayTransform,
-    HourTransform,
     IdentityTransform,
-    MonthTransform,
-    TruncateTransform,
-    VoidTransform,
-    YearTransform,
 )
 from pyiceberg.types import (
     LongType,

From b460c34df5774e11cf374555338393a4b8a6948b Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 10 Jan 2025 17:18:25 +0100
Subject: [PATCH 20/44] chore: add ReplaceSortOrder to the Transaction class

---
 pyiceberg/table/__init__.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 32df27da54..9f9efcea6b 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -405,7 +405,17 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.table_metadata.name_mapping(),
         )
 
-    def replace_sort_order(self) -> None: ...
+    def replace_sort_order(self, case_sensitive: bool = True) -> ReplaceSortOrder:
+        """Create a new ReplaceSortOrder to replace the sort order of this table.
+        
+        Returns:
+            A new ReplaceSortOrder. 
+        """
+        
+        return ReplaceSortOrder(
+            self,
+            case_sensitive=case_sensitive,
+        )
 
     def update_snapshot(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> UpdateSnapshot:
         """Create a new UpdateSnapshot to produce a new snapshot for the table.

From 190071fb3cdd70708bd5990f74d0e6957b7ecf5e Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 10 Jan 2025 17:22:52 +0100
Subject: [PATCH 21/44] chore: lint

---
 pyiceberg/table/__init__.py                 |  8 ++++---
 pyiceberg/table/update/sorting.py           | 14 +++++------
 tests/integration/test_sort_order_update.py | 26 +++++++--------------
 3 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 9f9efcea6b..ab2c7570f2 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -407,11 +407,13 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
 
     def replace_sort_order(self, case_sensitive: bool = True) -> ReplaceSortOrder:
         """Create a new ReplaceSortOrder to replace the sort order of this table.
-        
+
+        Args:
+            case_sensitive: If field names are case-sensitive.
+
         Returns:
-            A new ReplaceSortOrder. 
+            A new ReplaceSortOrder.
         """
-        
         return ReplaceSortOrder(
             self,
             case_sensitive=case_sensitive,
diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 7bb0c67f74..4179b3e1d9 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -18,14 +18,15 @@
 
 from typing import TYPE_CHECKING, Any, List, Tuple
 
-from pyiceberg.table import AddSortOrderUpdate, SetDefaultSortOrderUpdate
 from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
 from pyiceberg.table.update import (
+    AddSortOrderUpdate,
+    AssertDefaultSortOrderId,
+    SetDefaultSortOrderUpdate,
     TableRequirement,
     TableUpdate,
     UpdatesAndRequirements,
     UpdateTableMetadata,
-    AssertDefaultSortOrderId
 )
 from pyiceberg.transforms import Transform
 
@@ -34,7 +35,7 @@
 
 
 class SortOrderBuilder:
-    def __init__(self,  last_sort_order_id: int, case_sensitive: bool = True) -> None:
+    def __init__(self, last_sort_order_id: int, case_sensitive: bool = True) -> None:
         self._fields: List[SortField] = []
         self._case_sensitive = case_sensitive
         self._last_sort_order_id = last_sort_order_id
@@ -115,13 +116,10 @@ def _commit(self) -> UpdatesAndRequirements:
         updates: Tuple[TableUpdate, ...] = ()
 
         if self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id:
-            updates = (
-                AddSortOrderUpdate(sort_order=new_sort_order),
-                SetDefaultSortOrderUpdate(sort_order_id=-1)
-            )
+            updates = (AddSortOrderUpdate(sort_order=new_sort_order), SetDefaultSortOrderUpdate(sort_order_id=-1))
         else:
             updates = (SetDefaultSortOrderUpdate(sort_order_id=new_sort_order.order_id),)
-        
+
         required_last_assigned_sort_order_id = self._transaction.table_metadata.default_sort_order_id
         requirements = (AssertDefaultSortOrderId(default_sort_order_id=required_last_assigned_sort_order_id),)
 
diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 001f3e42c0..4db7f89c46 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -78,7 +78,7 @@ def test_sort_order_builder() -> None:
 
 
 @pytest.mark.integration
-@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")]) 
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
 def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple)
     for col_name, col_id in {"foo": 1, "bar": 2, "baz": 3}.items():
@@ -87,23 +87,13 @@ def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) ->
 
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
-def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema):
+def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple)
-    simple_table.replace_sort_order().asc(
-        "foo", IdentityTransform(), NullOrder.NULLS_FIRST
-    ).desc("bar", IdentityTransform(), NullOrder.NULLS_LAST).commit()
+    simple_table.replace_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).desc(
+        "bar", IdentityTransform(), NullOrder.NULLS_LAST
+    ).commit()
     assert simple_table.sort_order() == SortOrder(
-        SortField(
-            source_id=1, 
-            transform=IdentityTransform(), 
-            direction=SortDirection.ASC, 
-            null_order=NullOrder.NULLS_FIRST
-        ), 
-        SortField(
-            source_id=2, 
-            transform=IdentityTransform(), 
-            direction=SortDirection.DESC, 
-            null_order=NullOrder.NULLS_LAST
-        ), 
-        order_id=1
+        SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
+        SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_LAST),
+        order_id=1,
     )

From ec5f711591241aaec0a3f80d7a76c9b1b25edac2 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 24 Jan 2025 10:16:17 +0100
Subject: [PATCH 22/44] chore: renames (replace to update)

---
 pyiceberg/table/__init__.py       | 14 +++++++-------
 pyiceberg/table/update/sorting.py |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index f9d214b72d..effa1c06e6 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -117,7 +117,7 @@
     UpdateSnapshot,
     _FastAppendFiles,
 )
-from pyiceberg.table.update.sorting import ReplaceSortOrder
+from pyiceberg.table.update.sorting import UpdateSortOrder
 from pyiceberg.table.update.spec import UpdateSpec
 from pyiceberg.table.update.statistics import UpdateStatistics
 from pyiceberg.transforms import IdentityTransform
@@ -414,16 +414,16 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.table_metadata.name_mapping(),
         )
 
-    def replace_sort_order(self, case_sensitive: bool = True) -> ReplaceSortOrder:
-        """Create a new ReplaceSortOrder to replace the sort order of this table.
+    def update_sort_order(self, case_sensitive: bool = True) -> UpdateSortOrder:
+        """Create a new UpdateSortOrder to update the sort order of this table.
 
         Args:
             case_sensitive: If field names are case-sensitive.
 
         Returns:
-            A new ReplaceSortOrder.
+            A new UpdateSortOrder.
         """
-        return ReplaceSortOrder(
+        return UpdateSortOrder(
             self,
             case_sensitive=case_sensitive,
         )
@@ -1095,13 +1095,13 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.name_mapping(),
         )
 
-    def replace_sort_order(self) -> ReplaceSortOrder:
+    def update_sort_order(self) -> UpdateSortOrder:
         """Create a new ReplaceSortOrder to replace the sort order of this table.
 
         Returns:
             A new ReplaceSortOrder.
         """
-        return ReplaceSortOrder(transaction=Transaction(self, autocommit=True), case_sensitive=True)
+        return UpdateSortOrder(transaction=Transaction(self, autocommit=True), case_sensitive=True)
 
     def name_mapping(self) -> Optional[NameMapping]:
         """Return the table's field-id NameMapping."""
diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 4179b3e1d9..0e8641a715 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -64,7 +64,7 @@ def sort_order(self) -> SortOrder:
         return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
 
 
-class ReplaceSortOrder(UpdateTableMetadata["ReplaceSortOrder"]):
+class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
     _transaction: Transaction
     _builder: SortOrderBuilder
     _last_assigned_order_id: int
@@ -88,7 +88,7 @@ def _column_name_to_id(self, column_name: str) -> int:
             .field_id
         )
 
-    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> ReplaceSortOrder:
+    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
         self._builder.add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
             transform=transform,
@@ -97,7 +97,7 @@ def asc(self, source_column_name: str, transform: Transform[Any, Any], null_orde
         )
         return self
 
-    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> ReplaceSortOrder:
+    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
         self._builder.add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
             transform=transform,

From d69a07171b9608a787c4aece355577b688c1f26f Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 24 Jan 2025 10:16:58 +0100
Subject: [PATCH 23/44] chore: renames (replace to update)

---
 tests/integration/test_sort_order_update.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 4db7f89c46..fe4e550c10 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -82,14 +82,14 @@ def test_sort_order_builder() -> None:
 def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple)
     for col_name, col_id in {"foo": 1, "bar": 2, "baz": 3}.items():
-        assert col_id == simple_table.replace_sort_order()._column_name_to_id(col_name)
+        assert col_id == simple_table.update_sort_order()._column_name_to_id(col_name)
 
 
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
 def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple)
-    simple_table.replace_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).desc(
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).desc(
         "bar", IdentityTransform(), NullOrder.NULLS_LAST
     ).commit()
     assert simple_table.sort_order() == SortOrder(

From b5a5bd83903df232d60ee34619db0f5073cfe825 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 24 Jan 2025 10:27:47 +0100
Subject: [PATCH 24/44] test: add test updating sort order

---
 tests/integration/test_sort_order_update.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index fe4e550c10..c41edd5966 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -97,3 +97,23 @@ def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema) -> No
         SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_LAST),
         order_id=1,
     )
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
+def test_replace_existing_sort_order(catalog: Catalog, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple)
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
+    assert simple_table.sort_order() == SortOrder(
+        SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
+        order_id=1,
+    )
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_LAST).desc(
+        "bar", IdentityTransform(), NullOrder.NULLS_FIRST
+    ).commit()
+    assert len(simple_table.sort_orders()) == 3 # 0: empty sort order from creating tables, 1: first sort order, 2: second sort order
+    assert simple_table.sort_order() == SortOrder(
+        SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST),
+        SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_FIRST),
+        order_id=2,
+    )

From 8080fa526f25a240548312ec4e44518cf8798f10 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 24 Jan 2025 10:36:01 +0100
Subject: [PATCH 25/44] refactor: remove the sort order builder

---
 pyiceberg/table/update/sorting.py | 71 +++++++++++++------------------
 1 file changed, 30 insertions(+), 41 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 0e8641a715..87c2cd9098 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -34,51 +34,21 @@
     from pyiceberg.table import Transaction
 
 
-class SortOrderBuilder:
-    def __init__(self, last_sort_order_id: int, case_sensitive: bool = True) -> None:
-        self._fields: List[SortField] = []
-        self._case_sensitive = case_sensitive
-        self._last_sort_order_id = last_sort_order_id
-
-    def add_sort_field(
-        self,
-        source_id: int,
-        transform: Transform[Any, Any],
-        direction: SortDirection,
-        null_order: NullOrder,
-    ) -> SortOrderBuilder:
-        """Add a sort field to the sort order list."""
-        self._fields.append(
-            SortField(
-                source_id=source_id,
-                transform=transform,
-                direction=direction,
-                null_order=null_order,
-            )
-        )
-        return self
-
-    @property
-    def sort_order(self) -> SortOrder:
-        """Return the sort order."""
-        return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
-
-
 class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
     _transaction: Transaction
-    _builder: SortOrderBuilder
     _last_assigned_order_id: int
     _case_sensitive: bool
+    _fields: List[SortField]
+    _last_sort_order_id: int
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
-        self._builder = SortOrderBuilder(
-            case_sensitive=case_sensitive,
-            last_sort_order_id=transaction.table_metadata.default_sort_order_id,
-        )
-        self._case_sensitive = case_sensitive
+        self._fields: List[SortField] = []
+        self._case_sensitive: bool = case_sensitive
+        self._last_sort_order_id: int = transaction.table_metadata.default_sort_order_id
 
     def _column_name_to_id(self, column_name: str) -> int:
+        """Maps the column name to the column field id."""
         return (
             self._transaction.table_metadata.schema()
             .find_field(
@@ -87,27 +57,46 @@ def _column_name_to_id(self, column_name: str) -> int:
             )
             .field_id
         )
+        
+    def _add_sort_field(
+        self,
+        source_id: int,
+        transform: Transform[Any, Any],
+        direction: SortDirection,
+        null_order: NullOrder,
+    ) -> UpdateSortOrder:
+        """Add a sort field to the sort order list."""
+        self._fields.append(
+            SortField(
+                source_id=source_id,
+                transform=transform,
+                direction=direction,
+                null_order=null_order,
+            )
+        )
+        return self
 
     def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
-        self._builder.add_sort_field(
+        """Adds a sort field with ascending order."""
+        return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
             transform=transform,
             direction=SortDirection.ASC,
             null_order=null_order,
         )
-        return self
 
     def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
-        self._builder.add_sort_field(
+        """Adds a sort field with descending order."""
+        return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
             transform=transform,
             direction=SortDirection.DESC,
             null_order=null_order,
         )
-        return self
 
     def _apply(self) -> SortOrder:
-        return self._builder.sort_order
+        """Returns the sort order"""
+        return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
 
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""

From e77a2c1b113e915fd14bf37d076f177875609666 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 24 Jan 2025 10:36:34 +0100
Subject: [PATCH 26/44] chore: remove sort order builder

---
 tests/integration/test_sort_order_update.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index c41edd5966..1272958711 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -23,7 +23,6 @@
 from pyiceberg.schema import Schema
 from pyiceberg.table import Table
 from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
-from pyiceberg.table.update.sorting import SortOrderBuilder
 from pyiceberg.transforms import (
     IdentityTransform,
 )
@@ -66,17 +65,6 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version:
     return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version})
 
 
-@pytest.mark.integration
-def test_sort_order_builder() -> None:
-    builder = SortOrderBuilder(last_sort_order_id=0)
-    builder.add_sort_field(1, IdentityTransform(), SortDirection.ASC, NullOrder.NULLS_FIRST)
-    builder.add_sort_field(2, IdentityTransform(), SortDirection.DESC, NullOrder.NULLS_LAST)
-    assert builder.sort_order == SortOrder(
-        SortField(1, IdentityTransform(), SortDirection.ASC, NullOrder.NULLS_FIRST),
-        SortField(2, IdentityTransform(), SortDirection.DESC, NullOrder.NULLS_LAST),
-    )
-
-
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
 def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) -> None:

From fc32b2832c997de0dbbdb84d6225332fbef1186b Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Fri, 24 Jan 2025 10:39:50 +0100
Subject: [PATCH 27/44] chore: lint

---
 pyiceberg/table/update/sorting.py           | 10 +++++-----
 tests/integration/test_sort_order_update.py |  4 +++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 87c2cd9098..19d1b4affa 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -48,7 +48,7 @@ def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> Non
         self._last_sort_order_id: int = transaction.table_metadata.default_sort_order_id
 
     def _column_name_to_id(self, column_name: str) -> int:
-        """Maps the column name to the column field id."""
+        """Map the column name to the column field id."""
         return (
             self._transaction.table_metadata.schema()
             .find_field(
@@ -57,7 +57,7 @@ def _column_name_to_id(self, column_name: str) -> int:
             )
             .field_id
         )
-        
+
     def _add_sort_field(
         self,
         source_id: int,
@@ -77,7 +77,7 @@ def _add_sort_field(
         return self
 
     def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
-        """Adds a sort field with ascending order."""
+        """Add a sort field with ascending order."""
         return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
             transform=transform,
@@ -86,7 +86,7 @@ def asc(self, source_column_name: str, transform: Transform[Any, Any], null_orde
         )
 
     def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
-        """Adds a sort field with descending order."""
+        """Add a sort field with descending order."""
         return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
             transform=transform,
@@ -95,7 +95,7 @@ def desc(self, source_column_name: str, transform: Transform[Any, Any], null_ord
         )
 
     def _apply(self) -> SortOrder:
-        """Returns the sort order"""
+        """Return the sort order."""
         return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
 
     def _commit(self) -> UpdatesAndRequirements:
diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 1272958711..857d5b3841 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -99,7 +99,9 @@ def test_replace_existing_sort_order(catalog: Catalog, table_schema_simple: Sche
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_LAST).desc(
         "bar", IdentityTransform(), NullOrder.NULLS_FIRST
     ).commit()
-    assert len(simple_table.sort_orders()) == 3 # 0: empty sort order from creating tables, 1: first sort order, 2: second sort order
+    assert (
+        len(simple_table.sort_orders()) == 3
+    )  # 0: empty sort order from creating tables, 1: first sort order, 2: second sort order
     assert simple_table.sort_order() == SortOrder(
         SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST),
         SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_FIRST),

From fa1aa50c5c7c54976c3993b1c8c643700530fc7c Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sat, 25 Jan 2025 10:40:29 +0100
Subject: [PATCH 28/44] chore: update comment

---
 pyiceberg/table/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index effa1c06e6..ce3c8d72eb 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -1096,10 +1096,10 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
         )
 
     def update_sort_order(self) -> UpdateSortOrder:
-        """Create a new ReplaceSortOrder to replace the sort order of this table.
+        """Create a new UpdateSortOrder to update the sort order of this table.
 
         Returns:
-            A new ReplaceSortOrder.
+            A new UpdateSortOrder.
         """
         return UpdateSortOrder(transaction=Transaction(self, autocommit=True), case_sensitive=True)
 

From 2e9cd3f1191175caf391c0a025aaa7d77f51fa15 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:17:07 +0100
Subject: [PATCH 29/44] test: parametrize over iceberg format versions and
 remove unnused code

---
 tests/integration/test_sort_order_update.py | 70 ++++++++++-----------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 857d5b3841..0dbab8c9d3 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -26,34 +26,10 @@
 from pyiceberg.transforms import (
     IdentityTransform,
 )
-from pyiceberg.types import (
-    LongType,
-    NestedField,
-    StringType,
-    TimestampType,
-)
-
-
-def _simple_table(catalog: Catalog, table_schema_simple: Schema) -> Table:
-    return _create_table_with_schema(catalog, table_schema_simple, "1")
-
-
-def _table(catalog: Catalog) -> Table:
-    schema_with_timestamp = Schema(
-        NestedField(1, "id", LongType(), required=False),
-        NestedField(2, "event_ts", TimestampType(), required=False),
-        NestedField(3, "str", StringType(), required=False),
-    )
-    return _create_table_with_schema(catalog, schema_with_timestamp, "1")
 
 
-def _table_v2(catalog: Catalog) -> Table:
-    schema_with_timestamp = Schema(
-        NestedField(1, "id", LongType(), required=False),
-        NestedField(2, "event_ts", TimestampType(), required=False),
-        NestedField(3, "str", StringType(), required=False),
-    )
-    return _create_table_with_schema(catalog, schema_with_timestamp, "2")
+def _simple_table(catalog: Catalog, table_schema_simple: Schema, format_version: str) -> Table:
+    return _create_table_with_schema(catalog, table_schema_simple, format_version)
 
 
 def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table:
@@ -66,17 +42,33 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version:
 
 
 @pytest.mark.integration
-@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
-def test_map_column_name_to_id(catalog: Catalog, table_schema_simple: Schema) -> None:
-    simple_table = _simple_table(catalog, table_schema_simple)
+@pytest.mark.parametrize(
+    "catalog, format_version", 
+    [
+        (pytest.lazy_fixture("session_catalog"), "1"), 
+        (pytest.lazy_fixture("session_catalog_hive"), "1"),
+        (pytest.lazy_fixture("session_catalog"), "2"), 
+        (pytest.lazy_fixture("session_catalog_hive"), "2"),
+    ]
+)
+def test_map_column_name_to_id(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple, format_version)
     for col_name, col_id in {"foo": 1, "bar": 2, "baz": 3}.items():
         assert col_id == simple_table.update_sort_order()._column_name_to_id(col_name)
 
 
 @pytest.mark.integration
-@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
-def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema) -> None:
-    simple_table = _simple_table(catalog, table_schema_simple)
+@pytest.mark.parametrize(
+    "catalog, format_version", 
+    [
+        (pytest.lazy_fixture("session_catalog"), "1"), 
+        (pytest.lazy_fixture("session_catalog_hive"), "1"),
+        (pytest.lazy_fixture("session_catalog"), "2"), 
+        (pytest.lazy_fixture("session_catalog_hive"), "2"),
+    ]
+)
+def test_replace_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple, format_version)
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).desc(
         "bar", IdentityTransform(), NullOrder.NULLS_LAST
     ).commit()
@@ -88,9 +80,17 @@ def test_replace_sort_order(catalog: Catalog, table_schema_simple: Schema) -> No
 
 
 @pytest.mark.integration
-@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog"), pytest.lazy_fixture("session_catalog_hive")])
-def test_replace_existing_sort_order(catalog: Catalog, table_schema_simple: Schema) -> None:
-    simple_table = _simple_table(catalog, table_schema_simple)
+@pytest.mark.parametrize(
+    "catalog, format_version", 
+    [
+        (pytest.lazy_fixture("session_catalog"), "1"), 
+        (pytest.lazy_fixture("session_catalog_hive"), "1"),
+        (pytest.lazy_fixture("session_catalog"), "2"), 
+        (pytest.lazy_fixture("session_catalog_hive"), "2"),
+    ]
+)
+def test_replace_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple, format_version)
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
     assert simple_table.sort_order() == SortOrder(
         SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),

From 137dbd9551b2172ef5c2d146d4a3d8de5dc9724f Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:17:57 +0100
Subject: [PATCH 30/44] chore: fmt

---
 tests/integration/test_sort_order_update.py | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 0dbab8c9d3..bfac783e9e 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -43,13 +43,13 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version:
 
 @pytest.mark.integration
 @pytest.mark.parametrize(
-    "catalog, format_version", 
+    "catalog, format_version",
     [
-        (pytest.lazy_fixture("session_catalog"), "1"), 
+        (pytest.lazy_fixture("session_catalog"), "1"),
         (pytest.lazy_fixture("session_catalog_hive"), "1"),
-        (pytest.lazy_fixture("session_catalog"), "2"), 
+        (pytest.lazy_fixture("session_catalog"), "2"),
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
-    ]
+    ],
 )
 def test_map_column_name_to_id(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)
@@ -59,13 +59,13 @@ def test_map_column_name_to_id(catalog: Catalog, format_version: str, table_sche
 
 @pytest.mark.integration
 @pytest.mark.parametrize(
-    "catalog, format_version", 
+    "catalog, format_version",
     [
-        (pytest.lazy_fixture("session_catalog"), "1"), 
+        (pytest.lazy_fixture("session_catalog"), "1"),
         (pytest.lazy_fixture("session_catalog_hive"), "1"),
-        (pytest.lazy_fixture("session_catalog"), "2"), 
+        (pytest.lazy_fixture("session_catalog"), "2"),
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
-    ]
+    ],
 )
 def test_replace_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)
@@ -81,13 +81,13 @@ def test_replace_sort_order(catalog: Catalog, format_version: str, table_schema_
 
 @pytest.mark.integration
 @pytest.mark.parametrize(
-    "catalog, format_version", 
+    "catalog, format_version",
     [
-        (pytest.lazy_fixture("session_catalog"), "1"), 
+        (pytest.lazy_fixture("session_catalog"), "1"),
         (pytest.lazy_fixture("session_catalog_hive"), "1"),
-        (pytest.lazy_fixture("session_catalog"), "2"), 
+        (pytest.lazy_fixture("session_catalog"), "2"),
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
-    ]
+    ],
 )
 def test_replace_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)

From d8b90012d3fcbde0b68b9f9e208f5730a76dd159 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:19:56 +0100
Subject: [PATCH 31/44] Update pyiceberg/table/update/sorting.py

fix: set default

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 pyiceberg/table/update/sorting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 19d1b4affa..43256b35fc 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -85,7 +85,7 @@ def asc(self, source_column_name: str, transform: Transform[Any, Any], null_orde
             null_order=null_order,
         )
 
-    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
+    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST) -> UpdateSortOrder:
         """Add a sort field with descending order."""
         return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),

From 58d302de58e75da4a7420e128831521b8b0ec772 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:24:16 +0100
Subject: [PATCH 32/44] Update pyiceberg/table/__init__.py

chore: update signature

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 pyiceberg/table/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index c195b1e684..8f9d76ed29 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -1123,7 +1123,7 @@ def update_sort_order(self) -> UpdateSortOrder:
         Returns:
             A new UpdateSortOrder.
         """
-        return UpdateSortOrder(transaction=Transaction(self, autocommit=True), case_sensitive=True)
+        return UpdateSortOrder(transaction=Transaction(self, autocommit=True), case_sensitive=case_sensitive)
 
     def name_mapping(self) -> Optional[NameMapping]:
         """Return the table's field-id NameMapping."""

From fd0e287331097d8a82e71bb884ca46ab5e9154f3 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:25:41 +0100
Subject: [PATCH 33/44] chore: add arg

---
 pyiceberg/table/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 8f9d76ed29..59abcf3a43 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -1117,7 +1117,7 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
             name_mapping=self.name_mapping(),
         )
 
-    def update_sort_order(self) -> UpdateSortOrder:
+    def update_sort_order(self, case_sensitive: bool = True) -> UpdateSortOrder:
         """Create a new UpdateSortOrder to update the sort order of this table.
 
         Returns:

From 5e57697a9eafa5d76800d5b9f9cddb9a011a1d84 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:26:20 +0100
Subject: [PATCH 34/44] chore: fmt

---
 pyiceberg/table/update/sorting.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 43256b35fc..05657c84e4 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -85,7 +85,9 @@ def asc(self, source_column_name: str, transform: Transform[Any, Any], null_orde
             null_order=null_order,
         )
 
-    def desc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST) -> UpdateSortOrder:
+    def desc(
+        self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST
+    ) -> UpdateSortOrder:
         """Add a sort field with descending order."""
         return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),

From 51319032cb3d4ed2fc6a1a6525caed645ed5b9a3 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:47:23 +0100
Subject: [PATCH 35/44] docs: update docs

---
 mkdocs/docs/api.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
index e904662871..e72f9e0a78 100644
--- a/mkdocs/docs/api.md
+++ b/mkdocs/docs/api.md
@@ -1194,6 +1194,24 @@ with table.update_spec() as update:
     update.rename_field("bucketed_id", "sharded_id")
 ```
 
+## Sort order updates
+
+Users can update the sort order on existing tables for new data. See [sorting](https://iceberg.apache.org/spec/#sorting) for more details.
+
+The API to use when updating a sort order is the `update_sort_order` API on the table.
+
+Sort orders can only be updated by adding a new sort order. They cannot be deleted or modified.
+
+### Updating a sort order on a table
+
+To create a new sort order, you can use either the `asc` or `desc` API depending on whether you want you data sorted in ascending or descending order. Both take the name of the field, the sort order transform, and a null order that describes the order of null values when sorted.
+
+```python
+with table.update_sort_order() as update:
+    update.desc("event_ts", DayTransform(), NullOrder.NULLS_FIRST)
+    update.asc("some_field", IdentityTransform(), NullOrder.NULLS_LAST)
+```
+
 ## Table properties
 
 Set and remove properties through the `Transaction` API:

From cc1ae1c2e17f96f4ef62a58fb491cca1ab3202b6 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 16 Feb 2025 15:47:43 +0100
Subject: [PATCH 36/44] chore: set default

---
 pyiceberg/table/update/sorting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 05657c84e4..4df17d700c 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -76,7 +76,7 @@ def _add_sort_field(
         )
         return self
 
-    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder) -> UpdateSortOrder:
+    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST) -> UpdateSortOrder:
         """Add a sort field with ascending order."""
         return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),

From d99dfdd6e383b6e08c4b2cf6d917580095be517f Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Sun, 23 Mar 2025 20:57:23 +0100
Subject: [PATCH 37/44] chore: lint and update names

---
 pyiceberg/table/update/sorting.py           | 4 +++-
 tests/integration/test_sort_order_update.py | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 4df17d700c..5062afd352 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -76,7 +76,9 @@ def _add_sort_field(
         )
         return self
 
-    def asc(self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST) -> UpdateSortOrder:
+    def asc(
+        self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST
+    ) -> UpdateSortOrder:
         """Add a sort field with ascending order."""
         return self._add_sort_field(
             source_id=self._column_name_to_id(source_column_name),
diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index bfac783e9e..a47a67584c 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -67,7 +67,7 @@ def test_map_column_name_to_id(catalog: Catalog, format_version: str, table_sche
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
     ],
 )
-def test_replace_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+def test_update_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).desc(
         "bar", IdentityTransform(), NullOrder.NULLS_LAST
@@ -89,7 +89,7 @@ def test_replace_sort_order(catalog: Catalog, format_version: str, table_schema_
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
     ],
 )
-def test_replace_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+def test_update_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
     assert simple_table.sort_order() == SortOrder(

From 9d77f3f862ed182537593b512dfe61049cad221d Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Mon, 24 Mar 2025 22:48:14 +0100
Subject: [PATCH 38/44] chore: determine if a sort order is newly added. If so,
 set the last assigned id, else set a previous sort order as the default

---
 pyiceberg/table/update/sorting.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 5062afd352..525b4baa98 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -16,9 +16,9 @@
 # under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, List, Tuple
+from typing import TYPE_CHECKING, Any, List, Tuple, Optional
 
-from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder, INITIAL_SORT_ORDER_ID, UNSORTED_SORT_ORDER
 from pyiceberg.table.update import (
     AddSortOrderUpdate,
     AssertDefaultSortOrderId,
@@ -36,16 +36,15 @@
 
 class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
     _transaction: Transaction
-    _last_assigned_order_id: int
+    _last_assigned_order_id: Optional[int]
     _case_sensitive: bool
     _fields: List[SortField]
-    _last_sort_order_id: int
 
     def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None:
         super().__init__(transaction)
         self._fields: List[SortField] = []
         self._case_sensitive: bool = case_sensitive
-        self._last_sort_order_id: int = transaction.table_metadata.default_sort_order_id
+        self._last_assigned_order_id: Optional[int] = None
 
     def _column_name_to_id(self, column_name: str) -> int:
         """Map the column name to the column field id."""
@@ -75,6 +74,17 @@ def _add_sort_field(
             )
         )
         return self
+    
+    def _reuse_or_create_sort_order_id(self) -> int:
+        """Return the last assigned sort order id or create a new one."""
+        new_sort_order_id = INITIAL_SORT_ORDER_ID
+        for sort_order in self._transaction.table_metadata.sort_orders:
+            new_sort_order_id = max(new_sort_order_id, sort_order.order_id)
+            if sort_order.fields == self._fields:
+                return sort_order.order_id
+            elif new_sort_order_id <= sort_order.order_id:
+                new_sort_order_id = sort_order.order_id + 1
+        return new_sort_order_id
 
     def asc(
         self, source_column_name: str, transform: Transform[Any, Any], null_order: NullOrder = NullOrder.NULLS_LAST
@@ -100,7 +110,12 @@ def desc(
 
     def _apply(self) -> SortOrder:
         """Return the sort order."""
-        return SortOrder(*self._fields, order_id=self._last_sort_order_id + 1)
+        if next(iter(self._fields), None) is None:
+            return UNSORTED_SORT_ORDER
+        else:
+            _sort_order_id = self._reuse_or_create_sort_order_id()
+            self._last_assigned_order_id = _sort_order_id
+            return SortOrder(*self._fields, order_id=_sort_order_id)
 
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""
@@ -108,7 +123,7 @@ def _commit(self) -> UpdatesAndRequirements:
         requirements: Tuple[TableRequirement, ...] = ()
         updates: Tuple[TableUpdate, ...] = ()
 
-        if self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id:
+        if self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id and self._transaction.table_metadata.sort_order_by_id(new_sort_order.order_id) is None:
             updates = (AddSortOrderUpdate(sort_order=new_sort_order), SetDefaultSortOrderUpdate(sort_order_id=-1))
         else:
             updates = (SetDefaultSortOrderUpdate(sort_order_id=new_sort_order.order_id),)

From 3f6a953bef39d069638934c69f947e93db102df5 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Mon, 24 Mar 2025 22:48:36 +0100
Subject: [PATCH 39/44] test: add test for re-using previously defineed sort
 order

---
 tests/integration/test_sort_order_update.py | 33 ++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index a47a67584c..31abba868b 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -89,7 +89,7 @@ def test_update_sort_order(catalog: Catalog, format_version: str, table_schema_s
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
     ],
 )
-def test_update_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+def test_increment_existing_sort_order_id(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
     assert simple_table.sort_order() == SortOrder(
@@ -107,3 +107,34 @@ def test_update_existing_sort_order(catalog: Catalog, format_version: str, table
         SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_FIRST),
         order_id=2,
     )
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "catalog, format_version",
+    [
+        (pytest.lazy_fixture("session_catalog"), "1"),
+        (pytest.lazy_fixture("session_catalog_hive"), "1"),
+        (pytest.lazy_fixture("session_catalog"), "2"),
+        (pytest.lazy_fixture("session_catalog_hive"), "2"),
+    ],
+)
+def test_update_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple, format_version)
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
+    assert simple_table.sort_order() == SortOrder(
+        SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
+        order_id=1,
+    )
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_LAST).desc(
+        "bar", IdentityTransform(), NullOrder.NULLS_FIRST
+    ).commit()
+    # Go back to the first sort order
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()        
+    assert (
+        len(simple_table.sort_orders()) == 3
+    ) # line 133 should not create a new sort order since it is the same as the first one
+    assert simple_table.sort_order() == SortOrder(
+        SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
+        order_id=1,
+    )

From 7223fdd323f6721abb4c67fbb68c131f916a5700 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Mon, 24 Mar 2025 22:49:31 +0100
Subject: [PATCH 40/44] chore: fmt

---
 pyiceberg/table/update/sorting.py           | 11 +++++++----
 tests/integration/test_sort_order_update.py |  4 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 525b4baa98..80fc8273bf 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -16,9 +16,9 @@
 # under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, List, Tuple, Optional
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
-from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder, INITIAL_SORT_ORDER_ID, UNSORTED_SORT_ORDER
+from pyiceberg.table.sorting import INITIAL_SORT_ORDER_ID, UNSORTED_SORT_ORDER, NullOrder, SortDirection, SortField, SortOrder
 from pyiceberg.table.update import (
     AddSortOrderUpdate,
     AssertDefaultSortOrderId,
@@ -74,7 +74,7 @@ def _add_sort_field(
             )
         )
         return self
-    
+
     def _reuse_or_create_sort_order_id(self) -> int:
         """Return the last assigned sort order id or create a new one."""
         new_sort_order_id = INITIAL_SORT_ORDER_ID
@@ -123,7 +123,10 @@ def _commit(self) -> UpdatesAndRequirements:
         requirements: Tuple[TableRequirement, ...] = ()
         updates: Tuple[TableUpdate, ...] = ()
 
-        if self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id and self._transaction.table_metadata.sort_order_by_id(new_sort_order.order_id) is None:
+        if (
+            self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id
+            and self._transaction.table_metadata.sort_order_by_id(new_sort_order.order_id) is None
+        ):
             updates = (AddSortOrderUpdate(sort_order=new_sort_order), SetDefaultSortOrderUpdate(sort_order_id=-1))
         else:
             updates = (SetDefaultSortOrderUpdate(sort_order_id=new_sort_order.order_id),)
diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 31abba868b..c431dccd4f 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -130,10 +130,10 @@ def test_update_existing_sort_order(catalog: Catalog, format_version: str, table
         "bar", IdentityTransform(), NullOrder.NULLS_FIRST
     ).commit()
     # Go back to the first sort order
-    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()        
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
     assert (
         len(simple_table.sort_orders()) == 3
-    ) # line 133 should not create a new sort order since it is the same as the first one
+    )  # line 133 should not create a new sort order since it is the same as the first one
     assert simple_table.sort_order() == SortOrder(
         SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
         order_id=1,

From 18ced3f41bdd731ff3cd627cedcc83d19d21e4fb Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Mon, 24 Mar 2025 22:56:51 +0100
Subject: [PATCH 41/44] chore: only set last assigned order id when sure that a
 new sort order has been added

---
 pyiceberg/table/update/sorting.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/table/update/sorting.py b/pyiceberg/table/update/sorting.py
index 80fc8273bf..a356229f91 100644
--- a/pyiceberg/table/update/sorting.py
+++ b/pyiceberg/table/update/sorting.py
@@ -113,9 +113,7 @@ def _apply(self) -> SortOrder:
         if next(iter(self._fields), None) is None:
             return UNSORTED_SORT_ORDER
         else:
-            _sort_order_id = self._reuse_or_create_sort_order_id()
-            self._last_assigned_order_id = _sort_order_id
-            return SortOrder(*self._fields, order_id=_sort_order_id)
+            return SortOrder(*self._fields, order_id=self._reuse_or_create_sort_order_id())
 
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""
@@ -127,6 +125,7 @@ def _commit(self) -> UpdatesAndRequirements:
             self._transaction.table_metadata.default_sort_order_id != new_sort_order.order_id
             and self._transaction.table_metadata.sort_order_by_id(new_sort_order.order_id) is None
         ):
+            self._last_assigned_order_id = new_sort_order.order_id
             updates = (AddSortOrderUpdate(sort_order=new_sort_order), SetDefaultSortOrderUpdate(sort_order_id=-1))
         else:
             updates = (SetDefaultSortOrderUpdate(sort_order_id=new_sort_order.order_id),)

From 8f425fb5c3242be3c01b441859c9f815ccb65fd7 Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Mon, 24 Mar 2025 22:57:08 +0100
Subject: [PATCH 42/44] test: add test for reverting back to unsorted sort
 order

---
 tests/integration/test_sort_order_update.py | 26 +++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index c431dccd4f..10f4dafc0f 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -138,3 +138,29 @@ def test_update_existing_sort_order(catalog: Catalog, format_version: str, table
         SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
         order_id=1,
     )
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "catalog, format_version",
+    [
+        (pytest.lazy_fixture("session_catalog"), "1"),
+        (pytest.lazy_fixture("session_catalog_hive"), "1"),
+        (pytest.lazy_fixture("session_catalog"), "2"),
+        (pytest.lazy_fixture("session_catalog_hive"), "2"),
+    ],
+)
+def test_update_existing_sort_order_with_unsorted_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+    simple_table = _simple_table(catalog, table_schema_simple, format_version)
+    simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
+    assert simple_table.sort_order() == SortOrder(
+        SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
+        order_id=1,
+    )
+    # Table should now be unsorted
+    simple_table.update_sort_order().commit()
+    # Go back to the first sort order
+    assert (
+        len(simple_table.sort_orders()) == 2
+    )
+    assert simple_table.sort_order() == SortOrder(order_id=0)

From f9efab3503df123cb1a0b75bd48417bff2bc970a Mon Sep 17 00:00:00 2001
From: Jasper Ginn <jasperginn@gmail.com>
Date: Mon, 24 Mar 2025 22:57:39 +0100
Subject: [PATCH 43/44] chore: fmt

---
 tests/integration/test_sort_order_update.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_sort_order_update.py b/tests/integration/test_sort_order_update.py
index 10f4dafc0f..548c6692db 100644
--- a/tests/integration/test_sort_order_update.py
+++ b/tests/integration/test_sort_order_update.py
@@ -150,7 +150,9 @@ def test_update_existing_sort_order(catalog: Catalog, format_version: str, table
         (pytest.lazy_fixture("session_catalog_hive"), "2"),
     ],
 )
-def test_update_existing_sort_order_with_unsorted_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None:
+def test_update_existing_sort_order_with_unsorted_sort_order(
+    catalog: Catalog, format_version: str, table_schema_simple: Schema
+) -> None:
     simple_table = _simple_table(catalog, table_schema_simple, format_version)
     simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit()
     assert simple_table.sort_order() == SortOrder(
@@ -160,7 +162,5 @@ def test_update_existing_sort_order_with_unsorted_sort_order(catalog: Catalog, f
     # Table should now be unsorted
     simple_table.update_sort_order().commit()
     # Go back to the first sort order
-    assert (
-        len(simple_table.sort_orders()) == 2
-    )
+    assert len(simple_table.sort_orders()) == 2
     assert simple_table.sort_order() == SortOrder(order_id=0)

From 353178fc19ae62e869c6f02e7d6a75eb15c8f613 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Tue, 30 Sep 2025 21:16:05 +0200
Subject: [PATCH 44/44] Make the CI happy

---
 pyiceberg/table/__init__.py | 485 +++++++++++++++++++++++++-----------
 1 file changed, 346 insertions(+), 139 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index a52f60967c..972efc8c47 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -80,6 +80,7 @@
 from pyiceberg.schema import Schema
 from pyiceberg.table.inspect import InspectTable
 from pyiceberg.table.locations import LocationProvider, load_location_provider
+from pyiceberg.table.maintenance import MaintenanceTable
 from pyiceberg.table.metadata import (
     INITIAL_SEQUENCE_NUMBER,
     TableMetadata,
@@ -87,7 +88,7 @@
 from pyiceberg.table.name_mapping import (
     NameMapping,
 )
-from pyiceberg.table.refs import SnapshotRef
+from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef
 from pyiceberg.table.snapshots import (
     Snapshot,
     SnapshotLogEntry,
@@ -142,12 +143,14 @@
 from pyiceberg.utils.properties import property_as_bool
 
 if TYPE_CHECKING:
+    import bodo.pandas as bd
     import daft
     import pandas as pd
     import polars as pl
     import pyarrow as pa
     import ray
     from duckdb import DuckDBPyConnection
+    from pyiceberg_core.datafusion import IcebergDataFusionTable
 
     from pyiceberg.catalog import Catalog
 
@@ -193,6 +196,9 @@ class TableProperties:
     WRITE_TARGET_FILE_SIZE_BYTES = "write.target-file-size-bytes"
     WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT = 512 * 1024 * 1024  # 512 MB
 
+    WRITE_AVRO_COMPRESSION = "write.avro.compression-codec"
+    WRITE_AVRO_COMPRESSION_DEFAULT = "gzip"
+
     DEFAULT_WRITE_METRICS_MODE = "write.metadata.metrics.default"
     DEFAULT_WRITE_METRICS_MODE_DEFAULT = "truncate(16)"
 
@@ -210,6 +216,9 @@ class TableProperties:
     WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
 
     WRITE_DATA_PATH = "write.data.path"
+
+    WRITE_FILE_FORMAT = "write.format.default"
+    WRITE_FILE_FORMAT_DEFAULT = "parquet"
     WRITE_METADATA_PATH = "write.metadata.path"
 
     DELETE_MODE = "write.delete.mode"
@@ -219,7 +228,7 @@ class TableProperties:
 
     DEFAULT_NAME_MAPPING = "schema.name-mapping.default"
     FORMAT_VERSION = "format-version"
-    DEFAULT_FORMAT_VERSION = 2
+    DEFAULT_FORMAT_VERSION: TableVersion = 2
 
     MANIFEST_TARGET_SIZE_BYTES = "commit.manifest.target-size-bytes"
     MANIFEST_TARGET_SIZE_BYTES_DEFAULT = 8 * 1024 * 1024  # 8 MB
@@ -292,8 +301,6 @@ def _apply(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequ
 
         if self._autocommit:
             self.commit_transaction()
-            self._updates = ()
-            self._requirements = ()
 
         return self
 
@@ -399,7 +406,9 @@ def _build_partition_predicate(self, partition_records: Set[Record]) -> BooleanE
             expr = Or(expr, match_partition_expression)
         return expr
 
-    def _append_snapshot_producer(self, snapshot_properties: Dict[str, str]) -> _FastAppendFiles:
+    def _append_snapshot_producer(
+        self, snapshot_properties: Dict[str, str], branch: Optional[str] = MAIN_BRANCH
+    ) -> _FastAppendFiles:
         """Determine the append type based on table properties.
 
         Args:
@@ -412,7 +421,7 @@ def _append_snapshot_producer(self, snapshot_properties: Dict[str, str]) -> _Fas
             TableProperties.MANIFEST_MERGE_ENABLED,
             TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT,
         )
-        update_snapshot = self.update_snapshot(snapshot_properties=snapshot_properties)
+        update_snapshot = self.update_snapshot(snapshot_properties=snapshot_properties, branch=branch)
         return update_snapshot.merge_append() if manifest_merge_enabled else update_snapshot.fast_append()
 
     def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive: bool = True) -> UpdateSchema:
@@ -446,13 +455,15 @@ def update_sort_order(self, case_sensitive: bool = True) -> UpdateSortOrder:
             case_sensitive=case_sensitive,
         )
 
-    def update_snapshot(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> UpdateSnapshot:
+    def update_snapshot(
+        self, snapshot_properties: Dict[str, str] = EMPTY_DICT, branch: Optional[str] = MAIN_BRANCH
+    ) -> UpdateSnapshot:
         """Create a new UpdateSnapshot to produce a new snapshot for the table.
 
         Returns:
             A new UpdateSnapshot
         """
-        return UpdateSnapshot(self, io=self._table.io, snapshot_properties=snapshot_properties)
+        return UpdateSnapshot(self, io=self._table.io, branch=branch, snapshot_properties=snapshot_properties)
 
     def update_statistics(self) -> UpdateStatistics:
         """
@@ -463,13 +474,14 @@ def update_statistics(self) -> UpdateStatistics:
         """
         return UpdateStatistics(transaction=self)
 
-    def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
+    def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT, branch: Optional[str] = MAIN_BRANCH) -> None:
         """
         Shorthand API for appending a PyArrow table to a table transaction.
 
         Args:
             df: The Arrow dataframe that will be appended to overwrite the table
             snapshot_properties: Custom properties to be added to the snapshot summary
+            branch: Branch Reference to run the append operation
         """
         try:
             import pyarrow as pa
@@ -481,18 +493,15 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
         if not isinstance(df, pa.Table):
             raise ValueError(f"Expected PyArrow table, got: {df}")
 
-        if unsupported_partitions := [
-            field for field in self.table_metadata.spec().fields if not field.transform.supports_pyarrow_transform
-        ]:
-            raise ValueError(
-                f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}."
-            )
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
-        with self._append_snapshot_producer(snapshot_properties) as append_files:
+        with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
             # skip writing data files if the dataframe is empty
             if df.shape[0] > 0:
                 data_files = list(
@@ -503,7 +512,9 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
                 for data_file in data_files:
                     append_files.append_data_file(data_file)
 
-    def dynamic_partition_overwrite(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
+    def dynamic_partition_overwrite(
+        self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT, branch: Optional[str] = MAIN_BRANCH
+    ) -> None:
         """
         Shorthand for overwriting existing partitions with a PyArrow table.
 
@@ -514,6 +525,7 @@ def dynamic_partition_overwrite(self, df: pa.Table, snapshot_properties: Dict[st
         Args:
             df: The Arrow dataframe that will be used to overwrite the table
             snapshot_properties: Custom properties to be added to the snapshot summary
+            branch: Branch Reference to run the dynamic partition overwrite operation
         """
         try:
             import pyarrow as pa
@@ -536,7 +548,10 @@ def dynamic_partition_overwrite(self, df: pa.Table, snapshot_properties: Dict[st
 
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
         # If dataframe does not have data, there is no need to overwrite
@@ -552,9 +567,9 @@ def dynamic_partition_overwrite(self, df: pa.Table, snapshot_properties: Dict[st
 
         partitions_to_overwrite = {data_file.partition for data_file in data_files}
         delete_filter = self._build_partition_predicate(partition_records=partitions_to_overwrite)
-        self.delete(delete_filter=delete_filter, snapshot_properties=snapshot_properties)
+        self.delete(delete_filter=delete_filter, snapshot_properties=snapshot_properties, branch=branch)
 
-        with self._append_snapshot_producer(snapshot_properties) as append_files:
+        with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
             append_files.commit_uuid = append_snapshot_commit_uuid
             for data_file in data_files:
                 append_files.append_data_file(data_file)
@@ -565,6 +580,7 @@ def overwrite(
         overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
         snapshot_properties: Dict[str, str] = EMPTY_DICT,
         case_sensitive: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> None:
         """
         Shorthand for adding a table overwrite with a PyArrow table to the transaction.
@@ -572,15 +588,16 @@ def overwrite(
         An overwrite may produce zero or more snapshots based on the operation:
 
             - DELETE: In case existing Parquet files can be dropped completely.
-            - REPLACE: In case existing Parquet files need to be rewritten.
+            - OVERWRITE: In case existing Parquet files need to be rewritten to drop rows that match the overwrite filter.
             - APPEND: In case new data is being inserted into the table.
 
         Args:
             df: The Arrow dataframe that will be used to overwrite the table
             overwrite_filter: ALWAYS_TRUE when you overwrite all the data,
                               or a boolean expression in case of a partial overwrite
-            case_sensitive: A bool determine if the provided `overwrite_filter` is case-sensitive
             snapshot_properties: Custom properties to be added to the snapshot summary
+            case_sensitive: A bool determine if the provided `overwrite_filter` is case-sensitive
+            branch: Branch Reference to run the overwrite operation
         """
         try:
             import pyarrow as pa
@@ -592,22 +609,24 @@ def overwrite(
         if not isinstance(df, pa.Table):
             raise ValueError(f"Expected PyArrow table, got: {df}")
 
-        if unsupported_partitions := [
-            field for field in self.table_metadata.spec().fields if not field.transform.supports_pyarrow_transform
-        ]:
-            raise ValueError(
-                f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}."
-            )
         downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
         _check_pyarrow_schema_compatible(
-            self.table_metadata.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
         )
 
         if overwrite_filter != AlwaysFalse():
             # Only delete when the filter is != AlwaysFalse
-            self.delete(delete_filter=overwrite_filter, case_sensitive=case_sensitive, snapshot_properties=snapshot_properties)
+            self.delete(
+                delete_filter=overwrite_filter,
+                case_sensitive=case_sensitive,
+                snapshot_properties=snapshot_properties,
+                branch=branch,
+            )
 
-        with self._append_snapshot_producer(snapshot_properties) as append_files:
+        with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
             # skip writing data files if the dataframe is empty
             if df.shape[0] > 0:
                 data_files = _dataframe_to_data_files(
@@ -621,6 +640,7 @@ def delete(
         delete_filter: Union[str, BooleanExpression],
         snapshot_properties: Dict[str, str] = EMPTY_DICT,
         case_sensitive: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> None:
         """
         Shorthand for deleting record from a table.
@@ -628,12 +648,13 @@ def delete(
         A delete may produce zero or more snapshots based on the operation:
 
             - DELETE: In case existing Parquet files can be dropped completely.
-            - REPLACE: In case existing Parquet files need to be rewritten
+            - OVERWRITE: In case existing Parquet files need to be rewritten to drop rows that match the delete filter.
 
         Args:
             delete_filter: A boolean expression to delete rows from a table
             snapshot_properties: Custom properties to be added to the snapshot summary
             case_sensitive: A bool determine if the provided `delete_filter` is case-sensitive
+            branch: Branch Reference to run the delete operation
         """
         from pyiceberg.io.pyarrow import (
             ArrowScan,
@@ -650,7 +671,7 @@ def delete(
         if isinstance(delete_filter, str):
             delete_filter = _parse_row_filter(delete_filter)
 
-        with self.update_snapshot(snapshot_properties=snapshot_properties).delete() as delete_snapshot:
+        with self.update_snapshot(snapshot_properties=snapshot_properties, branch=branch).delete() as delete_snapshot:
             delete_snapshot.delete_by_predicate(delete_filter, case_sensitive)
 
         # Check if there are any files that require an actual rewrite of a data file
@@ -658,7 +679,10 @@ def delete(
             bound_delete_filter = bind(self.table_metadata.schema(), delete_filter, case_sensitive)
             preserve_row_filter = _expression_to_complementary_pyarrow(bound_delete_filter)
 
-            files = self._scan(row_filter=delete_filter, case_sensitive=case_sensitive).plan_files()
+            file_scan = self._scan(row_filter=delete_filter, case_sensitive=case_sensitive)
+            if branch is not None:
+                file_scan = file_scan.use_ref(branch)
+            files = file_scan.plan_files()
 
             commit_uuid = uuid.uuid4()
             counter = itertools.count(0)
@@ -700,7 +724,9 @@ def delete(
                     )
 
             if len(replaced_files) > 0:
-                with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as overwrite_snapshot:
+                with self.update_snapshot(
+                    snapshot_properties=snapshot_properties, branch=branch
+                ).overwrite() as overwrite_snapshot:
                     overwrite_snapshot.commit_uuid = commit_uuid
                     for original_data_file, replaced_data_files in replaced_files:
                         overwrite_snapshot.delete_data_file(original_data_file)
@@ -710,8 +736,156 @@ def delete(
         if not delete_snapshot.files_affected and not delete_snapshot.rewrites_needed:
             warnings.warn("Delete operation did not match any records")
 
+    def upsert(
+        self,
+        df: pa.Table,
+        join_cols: Optional[List[str]] = None,
+        when_matched_update_all: bool = True,
+        when_not_matched_insert_all: bool = True,
+        case_sensitive: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
+    ) -> UpsertResult:
+        """Shorthand API for performing an upsert to an iceberg table.
+
+        Args:
+
+            df: The input dataframe to upsert with the table's data.
+            join_cols: Columns to join on, if not provided, it will use the identifier-field-ids.
+            when_matched_update_all: Bool indicating to update rows that are matched but require an update due to a value in a non-key column changing
+            when_not_matched_insert_all: Bool indicating new rows to be inserted that do not match any existing rows in the table
+            case_sensitive: Bool indicating if the match should be case-sensitive
+            branch: Branch Reference to run the upsert operation
+
+            To learn more about the identifier-field-ids: https://iceberg.apache.org/spec/#identifier-field-ids
+
+                Example Use Cases:
+                    Case 1: Both Parameters = True (Full Upsert)
+                    Existing row found → Update it
+                    New row found → Insert it
+
+                    Case 2: when_matched_update_all = False, when_not_matched_insert_all = True
+                    Existing row found → Do nothing (no updates)
+                    New row found → Insert it
+
+                    Case 3: when_matched_update_all = True, when_not_matched_insert_all = False
+                    Existing row found → Update it
+                    New row found → Do nothing (no inserts)
+
+                    Case 4: Both Parameters = False (No Merge Effect)
+                    Existing row found → Do nothing
+                    New row found → Do nothing
+                    (Function effectively does nothing)
+
+
+        Returns:
+            An UpsertResult class (contains details of rows updated and inserted)
+        """
+        try:
+            import pyarrow as pa  # noqa: F401
+        except ModuleNotFoundError as e:
+            raise ModuleNotFoundError("For writes PyArrow needs to be installed") from e
+
+        from pyiceberg.io.pyarrow import expression_to_pyarrow
+        from pyiceberg.table import upsert_util
+
+        if join_cols is None:
+            join_cols = []
+            for field_id in self.table_metadata.schema().identifier_field_ids:
+                col = self.table_metadata.schema().find_column_name(field_id)
+                if col is not None:
+                    join_cols.append(col)
+                else:
+                    raise ValueError(f"Field-ID could not be found: {join_cols}")
+
+        if len(join_cols) == 0:
+            raise ValueError("Join columns could not be found, please set identifier-field-ids or pass in explicitly.")
+
+        if not when_matched_update_all and not when_not_matched_insert_all:
+            raise ValueError("no upsert options selected...exiting")
+
+        if upsert_util.has_duplicate_rows(df, join_cols):
+            raise ValueError("Duplicate rows found in source dataset based on the key columns. No upsert executed")
+
+        from pyiceberg.io.pyarrow import _check_pyarrow_schema_compatible
+
+        downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
+        _check_pyarrow_schema_compatible(
+            self.table_metadata.schema(),
+            provided_schema=df.schema,
+            downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us,
+            format_version=self.table_metadata.format_version,
+        )
+
+        # get list of rows that exist so we don't have to load the entire target table
+        matched_predicate = upsert_util.create_match_filter(df, join_cols)
+
+        # We must use Transaction.table_metadata for the scan. This includes all uncommitted - but relevant - changes.
+
+        matched_iceberg_record_batches_scan = DataScan(
+            table_metadata=self.table_metadata,
+            io=self._table.io,
+            row_filter=matched_predicate,
+            case_sensitive=case_sensitive,
+        )
+
+        if branch in self.table_metadata.refs:
+            matched_iceberg_record_batches_scan = matched_iceberg_record_batches_scan.use_ref(branch)
+
+        matched_iceberg_record_batches = matched_iceberg_record_batches_scan.to_arrow_batch_reader()
+
+        batches_to_overwrite = []
+        overwrite_predicates = []
+        rows_to_insert = df
+
+        for batch in matched_iceberg_record_batches:
+            rows = pa.Table.from_batches([batch])
+
+            if when_matched_update_all:
+                # function get_rows_to_update is doing a check on non-key columns to see if any of the values have actually changed
+                # we don't want to do just a blanket overwrite for matched rows if the actual non-key column data hasn't changed
+                # this extra step avoids unnecessary IO and writes
+                rows_to_update = upsert_util.get_rows_to_update(df, rows, join_cols)
+
+                if len(rows_to_update) > 0:
+                    # build the match predicate filter
+                    overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols)
+
+                    batches_to_overwrite.append(rows_to_update)
+                    overwrite_predicates.append(overwrite_mask_predicate)
+
+            if when_not_matched_insert_all:
+                expr_match = upsert_util.create_match_filter(rows, join_cols)
+                expr_match_bound = bind(self.table_metadata.schema(), expr_match, case_sensitive=case_sensitive)
+                expr_match_arrow = expression_to_pyarrow(expr_match_bound)
+
+                # Filter rows per batch.
+                rows_to_insert = rows_to_insert.filter(~expr_match_arrow)
+
+        update_row_cnt = 0
+        insert_row_cnt = 0
+
+        if batches_to_overwrite:
+            rows_to_update = pa.concat_tables(batches_to_overwrite)
+            update_row_cnt = len(rows_to_update)
+            self.overwrite(
+                rows_to_update,
+                overwrite_filter=Or(*overwrite_predicates) if len(overwrite_predicates) > 1 else overwrite_predicates[0],
+                branch=branch,
+            )
+
+        if when_not_matched_insert_all:
+            insert_row_cnt = len(rows_to_insert)
+            if rows_to_insert:
+                self.append(rows_to_insert, branch=branch)
+
+        return UpsertResult(rows_updated=update_row_cnt, rows_inserted=insert_row_cnt)
+
     def add_files(
-        self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT, check_duplicate_files: bool = True
+        self,
+        file_paths: List[str],
+        snapshot_properties: Dict[str, str] = EMPTY_DICT,
+        check_duplicate_files: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> None:
         """
         Shorthand API for adding files as data files to the table transaction.
@@ -731,7 +905,7 @@ def add_files(
             import pyarrow.compute as pc
 
             expr = pc.field("file_path").isin(file_paths)
-            referenced_files = [file["file_path"] for file in self._table.inspect.files().filter(expr).to_pylist()]
+            referenced_files = [file["file_path"] for file in self._table.inspect.data_files().filter(expr).to_pylist()]
 
             if referenced_files:
                 raise ValueError(f"Cannot add files that are already referenced by table, files: {', '.join(referenced_files)}")
@@ -740,12 +914,12 @@ def add_files(
             self.set_properties(
                 **{TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()}
             )
-        with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
+        with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
             data_files = _parquet_files_to_data_files(
                 table_metadata=self.table_metadata, file_paths=file_paths, io=self._table.io
             )
             for data_file in data_files:
-                update_snapshot.append_data_file(data_file)
+                append_files.append_data_file(data_file)
 
     def update_spec(self) -> UpdateSpec:
         """Create a new UpdateSpec to update the partitioning of the table.
@@ -789,13 +963,15 @@ def commit_transaction(self) -> Table:
                 updates=self._updates,
                 requirements=self._requirements,
             )
-            return self._table
-        else:
-            return self._table
+
+        self._updates = ()
+        self._requirements = ()
+
+        return self._table
 
 
 class CreateTableTransaction(Transaction):
-    """A transaction that involves the creation of a a new table."""
+    """A transaction that involves the creation of a new table."""
 
     def _initial_changes(self, table_metadata: TableMetadata) -> None:
         """Set the initial changes that can reconstruct the initial table metadata when creating the CreateTableTransaction."""
@@ -806,7 +982,7 @@ def _initial_changes(self, table_metadata: TableMetadata) -> None:
 
         schema: Schema = table_metadata.schema()
         self._updates += (
-            AddSchemaUpdate(schema_=schema, last_column_id=schema.highest_field_id),
+            AddSchemaUpdate(schema_=schema),
             SetCurrentSchemaUpdate(schema_id=-1),
         )
 
@@ -840,11 +1016,15 @@ def commit_transaction(self) -> Table:
         Returns:
             The table with the updates applied.
         """
-        self._requirements = (AssertCreate(),)
-        self._table._do_commit(  # pylint: disable=W0212
-            updates=self._updates,
-            requirements=self._requirements,
-        )
+        if len(self._updates) > 0:
+            self._table._do_commit(  # pylint: disable=W0212
+                updates=self._updates,
+                requirements=(AssertCreate(),),
+            )
+
+        self._updates = ()
+        self._requirements = ()
+
         return self._table
 
 
@@ -922,6 +1102,15 @@ def inspect(self) -> InspectTable:
         """
         return InspectTable(self)
 
+    @property
+    def maintenance(self) -> MaintenanceTable:
+        """Return the MaintenanceTable object for maintenance.
+
+        Returns:
+            MaintenanceTable object based on this Table.
+        """
+        return MaintenanceTable(self)
+
     def refresh(self) -> Table:
         """Refresh the current table metadata.
 
@@ -1147,6 +1336,7 @@ def upsert(
         when_matched_update_all: bool = True,
         when_not_matched_insert_all: bool = True,
         case_sensitive: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> UpsertResult:
         """Shorthand API for performing an upsert to an iceberg table.
 
@@ -1157,6 +1347,7 @@ def upsert(
             when_matched_update_all: Bool indicating to update rows that are matched but require an update due to a value in a non-key column changing
             when_not_matched_insert_all: Bool indicating new rows to be inserted that do not match any existing rows in the table
             case_sensitive: Bool indicating if the match should be case-sensitive
+            branch: Branch Reference to run the upsert operation
 
             To learn more about the identifier-field-ids: https://iceberg.apache.org/spec/#identifier-field-ids
 
@@ -1182,95 +1373,41 @@ def upsert(
         Returns:
             An UpsertResult class (contains details of rows updated and inserted)
         """
-        try:
-            import pyarrow as pa  # noqa: F401
-        except ModuleNotFoundError as e:
-            raise ModuleNotFoundError("For writes PyArrow needs to be installed") from e
-
-        from pyiceberg.io.pyarrow import expression_to_pyarrow
-        from pyiceberg.table import upsert_util
-
-        if join_cols is None:
-            join_cols = []
-            for field_id in self.schema().identifier_field_ids:
-                col = self.schema().find_column_name(field_id)
-                if col is not None:
-                    join_cols.append(col)
-                else:
-                    raise ValueError(f"Field-ID could not be found: {join_cols}")
-
-        if len(join_cols) == 0:
-            raise ValueError("Join columns could not be found, please set identifier-field-ids or pass in explicitly.")
-
-        if not when_matched_update_all and not when_not_matched_insert_all:
-            raise ValueError("no upsert options selected...exiting")
-
-        if upsert_util.has_duplicate_rows(df, join_cols):
-            raise ValueError("Duplicate rows found in source dataset based on the key columns. No upsert executed")
-
-        from pyiceberg.io.pyarrow import _check_pyarrow_schema_compatible
-
-        downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
-        _check_pyarrow_schema_compatible(
-            self.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us
-        )
-
-        # get list of rows that exist so we don't have to load the entire target table
-        matched_predicate = upsert_util.create_match_filter(df, join_cols)
-        matched_iceberg_table = self.scan(row_filter=matched_predicate, case_sensitive=case_sensitive).to_arrow()
-
-        update_row_cnt = 0
-        insert_row_cnt = 0
-
         with self.transaction() as tx:
-            if when_matched_update_all:
-                # function get_rows_to_update is doing a check on non-key columns to see if any of the values have actually changed
-                # we don't want to do just a blanket overwrite for matched rows if the actual non-key column data hasn't changed
-                # this extra step avoids unnecessary IO and writes
-                rows_to_update = upsert_util.get_rows_to_update(df, matched_iceberg_table, join_cols)
-
-                update_row_cnt = len(rows_to_update)
-
-                if len(rows_to_update) > 0:
-                    # build the match predicate filter
-                    overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols)
-
-                    tx.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate)
-
-            if when_not_matched_insert_all:
-                expr_match = upsert_util.create_match_filter(matched_iceberg_table, join_cols)
-                expr_match_bound = bind(self.schema(), expr_match, case_sensitive=case_sensitive)
-                expr_match_arrow = expression_to_pyarrow(expr_match_bound)
-                rows_to_insert = df.filter(~expr_match_arrow)
-
-                insert_row_cnt = len(rows_to_insert)
-
-                if insert_row_cnt > 0:
-                    tx.append(rows_to_insert)
-
-        return UpsertResult(rows_updated=update_row_cnt, rows_inserted=insert_row_cnt)
+            return tx.upsert(
+                df=df,
+                join_cols=join_cols,
+                when_matched_update_all=when_matched_update_all,
+                when_not_matched_insert_all=when_not_matched_insert_all,
+                case_sensitive=case_sensitive,
+                branch=branch,
+            )
 
-    def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
+    def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT, branch: Optional[str] = MAIN_BRANCH) -> None:
         """
         Shorthand API for appending a PyArrow table to the table.
 
         Args:
             df: The Arrow dataframe that will be appended to overwrite the table
             snapshot_properties: Custom properties to be added to the snapshot summary
+            branch: Branch Reference to run the append operation
         """
         with self.transaction() as tx:
-            tx.append(df=df, snapshot_properties=snapshot_properties)
+            tx.append(df=df, snapshot_properties=snapshot_properties, branch=branch)
 
-    def dynamic_partition_overwrite(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
+    def dynamic_partition_overwrite(
+        self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT, branch: Optional[str] = MAIN_BRANCH
+    ) -> None:
         """Shorthand for dynamic overwriting the table with a PyArrow table.
 
         Old partitions are auto detected and replaced with data files created for input arrow table.
         Args:
             df: The Arrow dataframe that will be used to overwrite the table
             snapshot_properties: Custom properties to be added to the snapshot summary
+            branch: Branch Reference to run the dynamic partition overwrite operation
         """
         with self.transaction() as tx:
-            tx.dynamic_partition_overwrite(df=df, snapshot_properties=snapshot_properties)
+            tx.dynamic_partition_overwrite(df=df, snapshot_properties=snapshot_properties, branch=branch)
 
     def overwrite(
         self,
@@ -1278,6 +1415,7 @@ def overwrite(
         overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
         snapshot_properties: Dict[str, str] = EMPTY_DICT,
         case_sensitive: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> None:
         """
         Shorthand for overwriting the table with a PyArrow table.
@@ -1285,7 +1423,7 @@ def overwrite(
         An overwrite may produce zero or more snapshots based on the operation:
 
             - DELETE: In case existing Parquet files can be dropped completely.
-            - REPLACE: In case existing Parquet files need to be rewritten.
+            - OVERWRITE: In case existing Parquet files need to be rewritten to drop rows that match the overwrite filter..
             - APPEND: In case new data is being inserted into the table.
 
         Args:
@@ -1294,10 +1432,15 @@ def overwrite(
                               or a boolean expression in case of a partial overwrite
             snapshot_properties: Custom properties to be added to the snapshot summary
             case_sensitive: A bool determine if the provided `overwrite_filter` is case-sensitive
+            branch: Branch Reference to run the overwrite operation
         """
         with self.transaction() as tx:
             tx.overwrite(
-                df=df, overwrite_filter=overwrite_filter, case_sensitive=case_sensitive, snapshot_properties=snapshot_properties
+                df=df,
+                overwrite_filter=overwrite_filter,
+                case_sensitive=case_sensitive,
+                snapshot_properties=snapshot_properties,
+                branch=branch,
             )
 
     def delete(
@@ -1305,6 +1448,7 @@ def delete(
         delete_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
         snapshot_properties: Dict[str, str] = EMPTY_DICT,
         case_sensitive: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> None:
         """
         Shorthand for deleting rows from the table.
@@ -1313,12 +1457,19 @@ def delete(
             delete_filter: The predicate that used to remove rows
             snapshot_properties: Custom properties to be added to the snapshot summary
             case_sensitive: A bool determine if the provided `delete_filter` is case-sensitive
+            branch: Branch Reference to run the delete operation
         """
         with self.transaction() as tx:
-            tx.delete(delete_filter=delete_filter, case_sensitive=case_sensitive, snapshot_properties=snapshot_properties)
+            tx.delete(
+                delete_filter=delete_filter, case_sensitive=case_sensitive, snapshot_properties=snapshot_properties, branch=branch
+            )
 
     def add_files(
-        self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT, check_duplicate_files: bool = True
+        self,
+        file_paths: List[str],
+        snapshot_properties: Dict[str, str] = EMPTY_DICT,
+        check_duplicate_files: bool = True,
+        branch: Optional[str] = MAIN_BRANCH,
     ) -> None:
         """
         Shorthand API for adding files as data files to the table.
@@ -1331,7 +1482,10 @@ def add_files(
         """
         with self.transaction() as tx:
             tx.add_files(
-                file_paths=file_paths, snapshot_properties=snapshot_properties, check_duplicate_files=check_duplicate_files
+                file_paths=file_paths,
+                snapshot_properties=snapshot_properties,
+                check_duplicate_files=check_duplicate_files,
+                branch=branch,
             )
 
     def update_spec(self, case_sensitive: bool = True) -> UpdateSpec:
@@ -1384,6 +1538,16 @@ def to_daft(self) -> daft.DataFrame:
 
         return daft.read_iceberg(self)
 
+    def to_bodo(self) -> bd.DataFrame:
+        """Read a bodo DataFrame lazily from this Iceberg table.
+
+        Returns:
+            bd.DataFrame: Unmaterialized Bodo Dataframe created from the Iceberg table
+        """
+        import bodo.pandas as bd
+
+        return bd.read_iceberg_table(self)
+
     def to_polars(self) -> pl.LazyFrame:
         """Lazily read from this Apache Iceberg table.
 
@@ -1394,6 +1558,51 @@ def to_polars(self) -> pl.LazyFrame:
 
         return pl.scan_iceberg(self)
 
+    def __datafusion_table_provider__(self) -> "IcebergDataFusionTable":
+        """Return the DataFusion table provider PyCapsule interface.
+
+        To support DataFusion features such as push down filtering, this function will return a PyCapsule
+        interface that conforms to the FFI Table Provider required by DataFusion. From an end user perspective
+        you should not need to call this function directly. Instead you can use ``register_table_provider`` in
+        the DataFusion SessionContext.
+
+        Returns:
+            A PyCapsule DataFusion TableProvider interface.
+
+        Example:
+            ```python
+            from datafusion import SessionContext
+            from pyiceberg.catalog import load_catalog
+            import pyarrow as pa
+            catalog = load_catalog("catalog", type="in-memory")
+            catalog.create_namespace_if_not_exists("default")
+            data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
+            iceberg_table = catalog.create_table("default.test", schema=data.schema)
+            iceberg_table.append(data)
+            ctx = SessionContext()
+            ctx.register_table_provider("test", iceberg_table)
+            ctx.table("test").show()
+            ```
+            Results in
+            ```
+            DataFrame()
+            +---+---+
+            | x | y |
+            +---+---+
+            | 1 | 4 |
+            | 2 | 5 |
+            | 3 | 6 |
+            +---+---+
+            ```
+        """
+        from pyiceberg_core.datafusion import IcebergDataFusionTable
+
+        return IcebergDataFusionTable(
+            identifier=self.name(),
+            metadata_location=self.metadata_location,
+            file_io_properties=self.io.properties,
+        ).__datafusion_table_provider__()
+
 
 class StaticTable(Table):
     """Load a table directly from a metadata file (i.e., without using a catalog)."""
@@ -1545,7 +1754,14 @@ def to_polars(self) -> pl.DataFrame: ...
 
     def update(self: S, **overrides: Any) -> S:
         """Create a copy of this table scan with updated fields."""
-        return type(self)(**{**self.__dict__, **overrides})
+        from inspect import signature
+
+        # Extract those attributes that are constructor parameters. We don't use self.__dict__ as the kwargs to the
+        # constructors because it may contain additional attributes that are not part of the constructor signature.
+        params = signature(type(self).__init__).parameters.keys() - {"self"}  # Skip "self" parameter
+        kwargs = {param: getattr(self, param) for param in params}  # Assume parameters are attributes
+
+        return type(self)(**{**kwargs, **overrides})
 
     def use_ref(self: S, name: str) -> S:
         if self.snapshot_id:
@@ -1695,13 +1911,11 @@ def _build_metrics_evaluator(self) -> Callable[[DataFile], bool]:
     def _build_residual_evaluator(self, spec_id: int) -> Callable[[DataFile], ResidualEvaluator]:
         spec = self.table_metadata.specs()[spec_id]
 
+        from pyiceberg.expressions.visitors import residual_evaluator_of
+
         # The lambda created here is run in multiple threads.
         # So we avoid creating _EvaluatorExpression methods bound to a single
         # shared instance across multiple threads.
-        # return lambda data_file: (partition_schema, partition_expr, self.case_sensitive)(data_file.partition)
-        from pyiceberg.expressions.visitors import residual_evaluator_of
-
-        # assert self.row_filter == False
         return lambda datafile: (
             residual_evaluator_of(
                 spec=spec,
@@ -1711,7 +1925,8 @@ def _build_residual_evaluator(self, spec_id: int) -> Callable[[DataFile], Residu
             )
         )
 
-    def _check_sequence_number(self, min_sequence_number: int, manifest: ManifestFile) -> bool:
+    @staticmethod
+    def _check_sequence_number(min_sequence_number: int, manifest: ManifestFile) -> bool:
         """Ensure that no manifests are loaded that contain deletes that are older than the data.
 
         Args:
@@ -1929,14 +2144,6 @@ def generate_data_file_filename(self, extension: str) -> str:
         return f"00000-{self.task_id}-{self.write_uuid}.{extension}"
 
 
-@dataclass(frozen=True)
-class AddFileTask:
-    """Task with the parameters for adding a Parquet file as a DataFile."""
-
-    file_path: str
-    partition_field_value: Record
-
-
 def _parquet_files_to_data_files(table_metadata: TableMetadata, file_paths: List[str], io: FileIO) -> Iterable[DataFile]:
     """Convert a list files into DataFiles.