From 1c9330b220f154d38ff682593cbba6fadfc381ae Mon Sep 17 00:00:00 2001 From: Gabriel Igliozzi Date: Sun, 17 Aug 2025 02:53:43 -0700 Subject: [PATCH 1/4] Catalog: Fix bug for renaming table that already exists --- mkdocs/docs/api.md | 2 ++ pyiceberg/catalog/hive.py | 5 +++++ tests/catalog/test_hive.py | 13 +++++++++++++ tests/integration/test_catalog.py | 31 ++++++++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 89d5692d0b..0e0dc375de 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1031,6 +1031,7 @@ Expert Iceberg users may choose to commit existing parquet files to the Iceberg ### Example Add files to Iceberg table: + ```python # Given that these parquet files have schema consistent with the Iceberg table @@ -1047,6 +1048,7 @@ tbl.add_files(file_paths=file_paths) ``` Add files to Iceberg table with custom snapshot properties: + ```python # Assume an existing Iceberg table object `tbl` diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index eef6bbad18..c3fe10c489 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -651,9 +651,14 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U ValueError: When from table identifier is invalid. NoSuchTableError: When a table with the name does not exist. NoSuchNamespaceError: When the destination namespace doesn't exist. + TableAlreadyExistsError: When the destination table already exists. """ from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError) to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier) + + if self.table_exists(to_identifier): + raise TableAlreadyExistsError(f"Table already exist: {to_table_name}") + try: with self._client as open_client: tbl = open_client.get_table(dbname=from_database_name, tbl_name=from_table_name) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 1edb4f7295..d45ddfda52 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -61,6 +61,7 @@ NamespaceNotEmptyError, NoSuchNamespaceError, NoSuchTableError, + TableAlreadyExistsError, WaitingForLockException, ) from pyiceberg.partitioning import PartitionField, PartitionSpec @@ -883,6 +884,7 @@ def test_load_table_from_self_identifier(hive_table: HiveTable) -> None: def test_rename_table(hive_table: HiveTable) -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] renamed_table = copy.deepcopy(hive_table) renamed_table.dbName = "default" @@ -910,6 +912,7 @@ def test_rename_table(hive_table: HiveTable) -> None: def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] catalog._client = MagicMock() catalog._client.__enter__().get_table.return_value = hive_table @@ -941,6 +944,7 @@ def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None: def test_rename_table_from_does_not_exists() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] catalog._client = MagicMock() catalog._client.__enter__().alter_table_with_environment_context.side_effect = NoSuchObjectException( @@ -955,6 +959,7 @@ def test_rename_table_from_does_not_exists() -> None: def test_rename_table_to_namespace_does_not_exists() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] catalog._client = MagicMock() catalog._client.__enter__().alter_table_with_environment_context.side_effect = InvalidOperationException( @@ -967,6 +972,14 @@ def test_rename_table_to_namespace_does_not_exists() -> None: assert "Database does not exists: default_does_not_exists" in str(exc_info.value) +def test_rename_table_to_table_already_exists(hive_table: HiveTable) -> None: + catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.load_table = MagicMock(return_value=hive_table) # type: ignore[method-assign] + + with pytest.raises(TableAlreadyExistsError): + catalog.rename_table(("default", "some_table"), ("default", "new_tabl2e")) + + def test_drop_database_does_not_empty() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 123aca1bef..be93382daa 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -169,19 +169,48 @@ def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_ new_database_name = f"{database_name}_new" test_catalog.create_namespace(database_name) test_catalog.create_namespace(new_database_name) - new_table_name = f"rename-{table_name}" + identifier = (database_name, table_name) table = test_catalog.create_table(identifier, table_schema_nested) assert table.name() == identifier + + new_table_name = f"rename-{table_name}" new_identifier = (new_database_name, new_table_name) test_catalog.rename_table(identifier, new_identifier) new_table = test_catalog.load_table(new_identifier) + assert new_table.name() == new_identifier assert new_table.metadata_location == table.metadata_location + with pytest.raises(NoSuchTableError): test_catalog.load_table(identifier) +@pytest.mark.integration +@pytest.mark.parametrize("test_catalog", CATALOGS) +def test_rename_table_already_exists( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + new_database_name = f"{database_name}_new" + test_catalog.create_namespace(database_name) + test_catalog.create_namespace(new_database_name) + + identifier = (database_name, table_name) + table = test_catalog.create_table(identifier, table_schema_nested) + assert table.name() == identifier + + new_table_name = f"rename-{table_name}" + new_identifier = (new_database_name, new_table_name) + new_table = test_catalog.create_table(new_identifier, table_schema_nested) + assert new_table.name() == new_identifier + + with pytest.raises(TableAlreadyExistsError): + test_catalog.rename_table(identifier, new_identifier) + + assert test_catalog.table_exists(identifier) + assert test_catalog.table_exists(new_identifier) + + @pytest.mark.integration @pytest.mark.parametrize("test_catalog", CATALOGS) def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: From 072123f40fd73ca6e8dd617ab3111a8ae3d8ea58 Mon Sep 17 00:00:00 2001 From: Gabriel Igliozzi Date: Sun, 17 Aug 2025 02:57:37 -0700 Subject: [PATCH 2/4] typo --- pyiceberg/catalog/hive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index c3fe10c489..9f85087061 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -657,7 +657,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier) if self.table_exists(to_identifier): - raise TableAlreadyExistsError(f"Table already exist: {to_table_name}") + raise TableAlreadyExistsError(f"Table already exists: {to_table_name}") try: with self._client as open_client: From dff6a489035499cb5d6794a1e0dc9ec18068f5fe Mon Sep 17 00:00:00 2001 From: Gabriel Igliozzi Date: Sun, 17 Aug 2025 03:05:22 -0700 Subject: [PATCH 3/4] add assertion for error message --- tests/catalog/test_hive.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index d45ddfda52..b111965bf0 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -976,9 +976,10 @@ def test_rename_table_to_table_already_exists(hive_table: HiveTable) -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) catalog.load_table = MagicMock(return_value=hive_table) # type: ignore[method-assign] - with pytest.raises(TableAlreadyExistsError): + with pytest.raises(TableAlreadyExistsError) as exc_info: catalog.rename_table(("default", "some_table"), ("default", "new_tabl2e")) + assert "Table already exists: new_tabl2e" in str(exc_info.value) def test_drop_database_does_not_empty() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) From e7f1f92edb1ae3176cbdf362ad2341ab1b0265f5 Mon Sep 17 00:00:00 2001 From: Gabriel Igliozzi Date: Mon, 18 Aug 2025 09:52:25 -0700 Subject: [PATCH 4/4] lint --- tests/catalog/test_hive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index b111965bf0..2da3f99f3e 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -981,6 +981,7 @@ def test_rename_table_to_table_already_exists(hive_table: HiveTable) -> None: assert "Table already exists: new_tabl2e" in str(exc_info.value) + def test_drop_database_does_not_empty() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)