diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 89d5692d0b..0e0dc375de 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1031,6 +1031,7 @@ Expert Iceberg users may choose to commit existing parquet files to the Iceberg ### Example Add files to Iceberg table: + ```python # Given that these parquet files have schema consistent with the Iceberg table @@ -1047,6 +1048,7 @@ tbl.add_files(file_paths=file_paths) ``` Add files to Iceberg table with custom snapshot properties: + ```python # Assume an existing Iceberg table object `tbl` diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index eef6bbad18..9f85087061 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -651,9 +651,14 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U ValueError: When from table identifier is invalid. NoSuchTableError: When a table with the name does not exist. NoSuchNamespaceError: When the destination namespace doesn't exist. + TableAlreadyExistsError: When the destination table already exists. """ from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError) to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier) + + if self.table_exists(to_identifier): + raise TableAlreadyExistsError(f"Table already exists: {to_table_name}") + try: with self._client as open_client: tbl = open_client.get_table(dbname=from_database_name, tbl_name=from_table_name) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 1edb4f7295..2da3f99f3e 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -61,6 +61,7 @@ NamespaceNotEmptyError, NoSuchNamespaceError, NoSuchTableError, + TableAlreadyExistsError, WaitingForLockException, ) from pyiceberg.partitioning import PartitionField, PartitionSpec @@ -883,6 +884,7 @@ def test_load_table_from_self_identifier(hive_table: HiveTable) -> None: def test_rename_table(hive_table: HiveTable) -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] renamed_table = copy.deepcopy(hive_table) renamed_table.dbName = "default" @@ -910,6 +912,7 @@ def test_rename_table(hive_table: HiveTable) -> None: def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] catalog._client = MagicMock() catalog._client.__enter__().get_table.return_value = hive_table @@ -941,6 +944,7 @@ def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None: def test_rename_table_from_does_not_exists() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] catalog._client = MagicMock() catalog._client.__enter__().alter_table_with_environment_context.side_effect = NoSuchObjectException( @@ -955,6 +959,7 @@ def test_rename_table_from_does_not_exists() -> None: def test_rename_table_to_namespace_does_not_exists() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.table_exists = MagicMock(return_value=False) # type: ignore[method-assign] catalog._client = MagicMock() catalog._client.__enter__().alter_table_with_environment_context.side_effect = InvalidOperationException( @@ -967,6 +972,16 @@ def test_rename_table_to_namespace_does_not_exists() -> None: assert "Database does not exists: default_does_not_exists" in str(exc_info.value) +def test_rename_table_to_table_already_exists(hive_table: HiveTable) -> None: + catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + catalog.load_table = MagicMock(return_value=hive_table) # type: ignore[method-assign] + + with pytest.raises(TableAlreadyExistsError) as exc_info: + catalog.rename_table(("default", "some_table"), ("default", "new_tabl2e")) + + assert "Table already exists: new_tabl2e" in str(exc_info.value) + + def test_drop_database_does_not_empty() -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 123aca1bef..be93382daa 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -169,19 +169,48 @@ def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_ new_database_name = f"{database_name}_new" test_catalog.create_namespace(database_name) test_catalog.create_namespace(new_database_name) - new_table_name = f"rename-{table_name}" + identifier = (database_name, table_name) table = test_catalog.create_table(identifier, table_schema_nested) assert table.name() == identifier + + new_table_name = f"rename-{table_name}" new_identifier = (new_database_name, new_table_name) test_catalog.rename_table(identifier, new_identifier) new_table = test_catalog.load_table(new_identifier) + assert new_table.name() == new_identifier assert new_table.metadata_location == table.metadata_location + with pytest.raises(NoSuchTableError): test_catalog.load_table(identifier) +@pytest.mark.integration +@pytest.mark.parametrize("test_catalog", CATALOGS) +def test_rename_table_already_exists( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + new_database_name = f"{database_name}_new" + test_catalog.create_namespace(database_name) + test_catalog.create_namespace(new_database_name) + + identifier = (database_name, table_name) + table = test_catalog.create_table(identifier, table_schema_nested) + assert table.name() == identifier + + new_table_name = f"rename-{table_name}" + new_identifier = (new_database_name, new_table_name) + new_table = test_catalog.create_table(new_identifier, table_schema_nested) + assert new_table.name() == new_identifier + + with pytest.raises(TableAlreadyExistsError): + test_catalog.rename_table(identifier, new_identifier) + + assert test_catalog.table_exists(identifier) + assert test_catalog.table_exists(new_identifier) + + @pytest.mark.integration @pytest.mark.parametrize("test_catalog", CATALOGS) def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: