Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions pyiceberg/catalog/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from hive_metastore.ttypes import (
AlreadyExistsException,
CheckLockRequest,
EnvironmentContext,
FieldSchema,
InvalidOperationException,
LockComponent,
Expand Down Expand Up @@ -135,6 +136,8 @@
DEFAULT_LOCK_CHECK_MIN_WAIT_TIME = 0.1 # 100 milliseconds
DEFAULT_LOCK_CHECK_MAX_WAIT_TIME = 60 # 1 min
DEFAULT_LOCK_CHECK_RETRIES = 4
DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"
DO_NOT_UPDATE_STATS_DEFAULT = "true"

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -539,7 +542,12 @@ def commit_table(
metadata_location=updated_staged_table.metadata_location,
previous_metadata_location=current_table.metadata_location,
)
open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table)
open_client.alter_table_with_environment_context(
dbname=database_name,
tbl_name=table_name,
new_tbl=hive_table,
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
)
else:
# Table does not exist, create it.
hive_table = self._convert_iceberg_into_hive(
Expand Down Expand Up @@ -626,7 +634,12 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
tbl = open_client.get_table(dbname=from_database_name, tbl_name=from_table_name)
tbl.dbName = to_database_name
tbl.tableName = to_table_name
open_client.alter_table(dbname=from_database_name, tbl_name=from_table_name, new_tbl=tbl)
open_client.alter_table_with_environment_context(
dbname=from_database_name,
tbl_name=from_table_name,
new_tbl=tbl,
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
)
except NoSuchObjectException as e:
raise NoSuchTableError(f"Table does not exist: {from_table_name}") from e
except InvalidOperationException as e:
Expand Down
25 changes: 19 additions & 6 deletions tests/catalog/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import thrift.transport.TSocket
from hive_metastore.ttypes import (
AlreadyExistsException,
EnvironmentContext,
FieldSchema,
InvalidOperationException,
LockResponse,
Expand All @@ -44,6 +45,8 @@

from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.hive import (
DO_NOT_UPDATE_STATS,
DO_NOT_UPDATE_STATS_DEFAULT,
HIVE_KERBEROS_AUTH,
LOCK_CHECK_MAX_WAIT_TIME,
LOCK_CHECK_MIN_WAIT_TIME,
Expand Down Expand Up @@ -874,7 +877,7 @@ def test_rename_table(hive_table: HiveTable) -> None:

catalog._client = MagicMock()
catalog._client.__enter__().get_table.side_effect = [hive_table, renamed_table]
catalog._client.__enter__().alter_table.return_value = None
catalog._client.__enter__().alter_table_with_environment_context.return_value = None

from_identifier = ("default", "new_tabl2e")
to_identifier = ("default", "new_tabl3e")
Expand All @@ -884,7 +887,12 @@ def test_rename_table(hive_table: HiveTable) -> None:

calls = [call(dbname="default", tbl_name="new_tabl2e"), call(dbname="default", tbl_name="new_tabl3e")]
catalog._client.__enter__().get_table.assert_has_calls(calls)
catalog._client.__enter__().alter_table.assert_called_with(dbname="default", tbl_name="new_tabl2e", new_tbl=renamed_table)
catalog._client.__enter__().alter_table_with_environment_context.assert_called_with(
dbname="default",
tbl_name="new_tabl2e",
new_tbl=renamed_table,
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
)


def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
Expand All @@ -902,22 +910,27 @@ def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
renamed_table.tableName = "new_tabl3e"

catalog._client.__enter__().get_table.side_effect = [hive_table, renamed_table]
catalog._client.__enter__().alter_table.return_value = None
catalog._client.__enter__().alter_table_with_environment_context.return_value = None
to_identifier = ("default", "new_tabl3e")
table = catalog.rename_table(from_table.name(), to_identifier)

assert table.name() == to_identifier

calls = [call(dbname="default", tbl_name="new_tabl2e"), call(dbname="default", tbl_name="new_tabl3e")]
catalog._client.__enter__().get_table.assert_has_calls(calls)
catalog._client.__enter__().alter_table.assert_called_with(dbname="default", tbl_name="new_tabl2e", new_tbl=renamed_table)
catalog._client.__enter__().alter_table_with_environment_context.assert_called_with(
dbname="default",
tbl_name="new_tabl2e",
new_tbl=renamed_table,
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
)


def test_rename_table_from_does_not_exists() -> None:
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)

catalog._client = MagicMock()
catalog._client.__enter__().alter_table.side_effect = NoSuchObjectException(
catalog._client.__enter__().alter_table_with_environment_context.side_effect = NoSuchObjectException(
message="hive.default.does_not_exists table not found"
)

Expand All @@ -931,7 +944,7 @@ def test_rename_table_to_namespace_does_not_exists() -> None:
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)

catalog._client = MagicMock()
catalog._client.__enter__().alter_table.side_effect = InvalidOperationException(
catalog._client.__enter__().alter_table_with_environment_context.side_effect = InvalidOperationException(
message="Unable to change partition or table. Database default does not exist Check metastore logs for detailed stack.does_not_exists"
)

Expand Down