Skip to content

Commit b034b83

Browse files
committed
Hive: Add DO_NOT_UPDATE_STATS in alter_table
1 parent 34c8949 commit b034b83

File tree

2 files changed

+33
-8
lines changed

2 files changed

+33
-8
lines changed

pyiceberg/catalog/hive.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from hive_metastore.ttypes import (
3737
AlreadyExistsException,
3838
CheckLockRequest,
39+
EnvironmentContext,
3940
FieldSchema,
4041
InvalidOperationException,
4142
LockComponent,
@@ -135,6 +136,8 @@
135136
DEFAULT_LOCK_CHECK_MIN_WAIT_TIME = 0.1 # 100 milliseconds
136137
DEFAULT_LOCK_CHECK_MAX_WAIT_TIME = 60 # 1 min
137138
DEFAULT_LOCK_CHECK_RETRIES = 4
139+
DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"
140+
TRUE = "true"
138141

139142
logger = logging.getLogger(__name__)
140143

@@ -539,7 +542,12 @@ def commit_table(
539542
metadata_location=updated_staged_table.metadata_location,
540543
previous_metadata_location=current_table.metadata_location,
541544
)
542-
open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table)
545+
open_client.alter_table_with_environment_context(
546+
dbname=database_name,
547+
tbl_name=table_name,
548+
new_tbl=hive_table,
549+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: TRUE}),
550+
)
543551
else:
544552
# Table does not exist, create it.
545553
hive_table = self._convert_iceberg_into_hive(
@@ -626,7 +634,12 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
626634
tbl = open_client.get_table(dbname=from_database_name, tbl_name=from_table_name)
627635
tbl.dbName = to_database_name
628636
tbl.tableName = to_table_name
629-
open_client.alter_table(dbname=from_database_name, tbl_name=from_table_name, new_tbl=tbl)
637+
open_client.alter_table_with_environment_context(
638+
dbname=from_database_name,
639+
tbl_name=from_table_name,
640+
new_tbl=tbl,
641+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: TRUE}),
642+
)
630643
except NoSuchObjectException as e:
631644
raise NoSuchTableError(f"Table does not exist: {from_table_name}") from e
632645
except InvalidOperationException as e:

tests/catalog/test_hive.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import thrift.transport.TSocket
3030
from hive_metastore.ttypes import (
3131
AlreadyExistsException,
32+
EnvironmentContext,
3233
FieldSchema,
3334
InvalidOperationException,
3435
LockResponse,
@@ -44,10 +45,12 @@
4445

4546
from pyiceberg.catalog import PropertiesUpdateSummary
4647
from pyiceberg.catalog.hive import (
48+
DO_NOT_UPDATE_STATS,
4749
HIVE_KERBEROS_AUTH,
4850
LOCK_CHECK_MAX_WAIT_TIME,
4951
LOCK_CHECK_MIN_WAIT_TIME,
5052
LOCK_CHECK_RETRIES,
53+
TRUE,
5154
HiveCatalog,
5255
_construct_hive_storage_descriptor,
5356
_HiveClient,
@@ -874,7 +877,7 @@ def test_rename_table(hive_table: HiveTable) -> None:
874877

875878
catalog._client = MagicMock()
876879
catalog._client.__enter__().get_table.side_effect = [hive_table, renamed_table]
877-
catalog._client.__enter__().alter_table.return_value = None
880+
catalog._client.__enter__().alter_table_with_environment_context.return_value = None
878881

879882
from_identifier = ("default", "new_tabl2e")
880883
to_identifier = ("default", "new_tabl3e")
@@ -884,7 +887,12 @@ def test_rename_table(hive_table: HiveTable) -> None:
884887

885888
calls = [call(dbname="default", tbl_name="new_tabl2e"), call(dbname="default", tbl_name="new_tabl3e")]
886889
catalog._client.__enter__().get_table.assert_has_calls(calls)
887-
catalog._client.__enter__().alter_table.assert_called_with(dbname="default", tbl_name="new_tabl2e", new_tbl=renamed_table)
890+
catalog._client.__enter__().alter_table_with_environment_context.assert_called_with(
891+
dbname="default",
892+
tbl_name="new_tabl2e",
893+
new_tbl=renamed_table,
894+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: TRUE}),
895+
)
888896

889897

890898
def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
@@ -902,22 +910,26 @@ def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
902910
renamed_table.tableName = "new_tabl3e"
903911

904912
catalog._client.__enter__().get_table.side_effect = [hive_table, renamed_table]
905-
catalog._client.__enter__().alter_table.return_value = None
913+
catalog._client.__enter__().alter_table_with_environment_context.return_value = None
906914
to_identifier = ("default", "new_tabl3e")
907915
table = catalog.rename_table(from_table.name(), to_identifier)
908916

909917
assert table.name() == to_identifier
910918

911919
calls = [call(dbname="default", tbl_name="new_tabl2e"), call(dbname="default", tbl_name="new_tabl3e")]
912920
catalog._client.__enter__().get_table.assert_has_calls(calls)
913-
catalog._client.__enter__().alter_table.assert_called_with(dbname="default", tbl_name="new_tabl2e", new_tbl=renamed_table)
914-
921+
catalog._client.__enter__().alter_table_with_environment_context.assert_called_with(
922+
dbname="default",
923+
tbl_name="new_tabl2e",
924+
new_tbl=renamed_table,
925+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: TRUE}),
926+
)
915927

916928
def test_rename_table_from_does_not_exists() -> None:
917929
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
918930

919931
catalog._client = MagicMock()
920-
catalog._client.__enter__().alter_table.side_effect = NoSuchObjectException(
932+
catalog._client.__enter__().alter_table_with_environment_context.side_effect = NoSuchObjectException(
921933
message="hive.default.does_not_exists table not found"
922934
)
923935

0 commit comments

Comments
 (0)