Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
)
from pyiceberg.table.update.schema import UpdateSchema
from pyiceberg.table.update.snapshot import ManageSnapshots, UpdateSnapshot, _FastAppendFiles
from pyiceberg.table.update.sort_order import UpdateSortOrder
from pyiceberg.table.update.spec import UpdateSpec
from pyiceberg.table.update.statistics import UpdateStatistics
from pyiceberg.transforms import IdentityTransform
Expand Down Expand Up @@ -433,6 +434,14 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
name_mapping=self.table_metadata.name_mapping(),
)

def update_sort_order(self) -> UpdateSortOrder:
"""Create a new UpdateSortOrder transaction to alter the sort order of this table.

Returns:
A new UpdateSortOrder.
"""
return UpdateSortOrder(self)

def update_snapshot(
self, snapshot_properties: Dict[str, str] = EMPTY_DICT, branch: Optional[str] = MAIN_BRANCH
) -> UpdateSnapshot:
Expand Down Expand Up @@ -1291,6 +1300,14 @@ def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive
name_mapping=self.name_mapping(),
)

def update_sort_order(self) -> UpdateSortOrder:
"""Create a new UpdateSortOrder transaction to alter the sort order of this table.

Returns:
A new UpdateSortOrder.
"""
return UpdateSortOrder(Transaction(self, autocommit=True))

def name_mapping(self) -> Optional[NameMapping]:
"""Return the table's field-id NameMapping."""
return self.metadata.name_mapping()
Expand Down
15 changes: 15 additions & 0 deletions pyiceberg/table/update/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ class AddSortOrderUpdate(IcebergBaseModel):
sort_order: SortOrder = Field(alias="sort-order")


class SetSortOrderUpdate(IcebergBaseModel):
action: Literal["set-sort-order"] = Field(default="set-sort-order")
sort_order: SortOrder = Field(alias="sort-order")


class SetDefaultSortOrderUpdate(IcebergBaseModel):
action: Literal["set-default-sort-order"] = Field(default="set-default-sort-order")
sort_order_id: int = Field(
Expand Down Expand Up @@ -546,6 +551,16 @@ def _(update: AddSortOrderUpdate, base_metadata: TableMetadata, context: _TableM
)


@_apply_table_update.register(SetSortOrderUpdate)
def _(update: AddSortOrderUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
context.add_update(update)
return base_metadata.model_copy(
update={
"sort_orders": [update.sort_order],
}
)


@_apply_table_update.register(SetDefaultSortOrderUpdate)
def _(
update: SetDefaultSortOrderUpdate,
Expand Down
67 changes: 67 additions & 0 deletions pyiceberg/table/update/sort_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import TYPE_CHECKING, Tuple

from pyiceberg.table.sorting import SortOrder
from pyiceberg.table.update import (
AddSortOrderUpdate,
SetSortOrderUpdate,
TableUpdate,
UpdatesAndRequirements,
UpdateTableMetadata,
)

if TYPE_CHECKING:
from pyiceberg.table import Transaction


class UpdateSortOrder(UpdateTableMetadata["UpdateSortOrder"]):
"""
Run sort order management operations using APIs.

APIs include set_sort_order the operation.

Use table.update_sort_order().<operation>().commit() to run a specific operation.
Use table.update_sort_order().<operation-one>().<operation-two>().commit() to run multiple operations.

Pending changes are applied on commit.

We can also use context managers to make more changes. For example:

new_sort_order = SortOrder(SortField(source_id=2, transform='identity'))

with table.update_sort_order() as update:
update.set_sort_order(sort_order=new_sort_order)
"""

_updates: Tuple[TableUpdate, ...] = ()

def __init__(self, transaction: "Transaction") -> None:
super().__init__(transaction)

def add_sort_order(self, sort_order: SortOrder) -> "UpdateSortOrder":
self._updates += (AddSortOrderUpdate(sort_order=sort_order),)

return self

def set_sort_order(self, sort_order: SortOrder) -> "UpdateSortOrder":
self._updates += (SetSortOrderUpdate(sort_order=sort_order),)

return self

def _commit(self) -> UpdatesAndRequirements:
return self._updates, ()
24 changes: 24 additions & 0 deletions tests/integration/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from pyiceberg.io import WAREHOUSE
from pyiceberg.schema import Schema
from pyiceberg.table.sorting import SortOrder
from tests.conftest import clean_up


Expand Down Expand Up @@ -343,3 +344,26 @@ def test_update_namespace_properties(test_catalog: Catalog, database_name: str)
else:
assert k in update_report.removed
assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]


@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_update_sort_order(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
new_database_name = f"{database_name}_new"
test_catalog.create_namespace(database_name)
test_catalog.create_namespace(new_database_name)

identifier = (database_name, table_name)
table = test_catalog.create_table(identifier, table_schema_nested)
new_sort_order = SortOrder(order_id=table.sort_order().order_id + 1)

print("before")
print(table.metadata.sort_orders)
table.update_sort_order().set_sort_order(new_sort_order).commit()

table = table.refresh()
print("after")
print(table.metadata.sort_orders)
print(table.sort_order(), "I error :(")

assert table.sort_order() == new_sort_order