From 72c28702335365b7d8aa588fc09762e1bb286cff Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 14 Jun 2025 14:11:46 -0400 Subject: [PATCH 01/12] maint: catalog implementation roundtripping tests --- tests/catalog/test_common.py | 95 ++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 tests/catalog/test_common.py diff --git a/tests/catalog/test_common.py b/tests/catalog/test_common.py new file mode 100644 index 0000000000..55f6950f75 --- /dev/null +++ b/tests/catalog/test_common.py @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from pathlib import Path, PosixPath + +import pytest +from moto import mock_aws + +from pyiceberg.catalog import Catalog +from pyiceberg.io import WAREHOUSE +from tests.conftest import BUCKET_NAME + + +@pytest.fixture(scope="function") +@mock_aws +def glue(_bucket_initialize: None, moto_endpoint_url: str) -> Catalog: + from pyiceberg.catalog.glue import GlueCatalog + + catalog = GlueCatalog(name="glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) + + return catalog + + +@pytest.fixture(scope="function") +@mock_aws +def dynamodb(_bucket_initialize: None, moto_endpoint_url: str) -> Catalog: + from pyiceberg.catalog.dynamodb import DynamoDbCatalog + + catalog = DynamoDbCatalog(name="dynamodb", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) + + return catalog + + +@pytest.fixture(scope="function") +def memory_catalog(tmp_path: PosixPath) -> Catalog: + from pyiceberg.catalog.memory import InMemoryCatalog + + return InMemoryCatalog("test.in_memory.catalog", **{WAREHOUSE: tmp_path.absolute().as_posix(), "test.key": "test.value"}) + + +@pytest.fixture(scope="function") +def sqllite_catalog_memory(warehouse: Path) -> Catalog: + from pyiceberg.catalog.sql import SqlCatalog + + catalog = SqlCatalog("sqlitememory", uri="sqlite:///:memory:", warehouse=f"file://{warehouse}") + + return catalog + + +@pytest.fixture(scope="function") +def sqllite_catalog_file(warehouse: Path) -> Catalog: + from pyiceberg.catalog.sql import SqlCatalog + + catalog = SqlCatalog("sqlitefile", uri=f"sqlite:////{warehouse}/sql-catalog.db", warehouse=f"file://{warehouse}") + + return catalog + + +@pytest.mark.parametrize( + "catalog", + [ + pytest.lazy_fixture("glue"), + pytest.lazy_fixture("dynamodb"), + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqllite_catalog_memory"), + pytest.lazy_fixture("sqllite_catalog_file"), + ], +) +def test_create_namespace_no_properties( + catalog: Catalog, + database_name: str, +) -> None: + catalog.create_namespace(namespace=database_name) + loaded_database_list = catalog.list_namespaces() + assert len(loaded_database_list) == 1 + assert (database_name,) in loaded_database_list + properties = catalog.load_namespace_properties(database_name) + assert properties == {} + catalog.drop_namespace(database_name) + assert len(catalog.list_namespaces()) == 0 From c4af086fd8be6057a1f738d5ded03ee484508f0a Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sun, 15 Jun 2025 10:38:09 -0400 Subject: [PATCH 02/12] change up to integration tests --- pyiceberg/catalog/hive.py | 5 +- tests/catalog/test_common.py | 95 ------------------------ tests/integration/test_catalog.py | 119 ++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 97 deletions(-) delete mode 100644 tests/catalog/test_common.py create mode 100644 tests/integration/test_catalog.py diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 5a9387577b..13495baf79 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -63,6 +63,7 @@ LOCATION, METADATA_LOCATION, TABLE_TYPE, + URI, MetastoreCatalog, PropertiesUpdateSummary, ) @@ -300,7 +301,7 @@ def __init__(self, name: str, **properties: str): @staticmethod def _create_hive_client(properties: Dict[str, str]) -> _HiveClient: last_exception = None - for uri in properties["uri"].split(","): + for uri in properties[URI].split(","): try: return _HiveClient( uri, @@ -312,7 +313,7 @@ def _create_hive_client(properties: Dict[str, str]) -> _HiveClient: if last_exception is not None: raise last_exception else: - raise ValueError(f"Unable to connect to hive using uri: {properties['uri']}") + raise ValueError(f"Unable to connect to hive using uri: {properties[URI]}") def _convert_hive_into_iceberg(self, table: HiveTable) -> Table: properties: Dict[str, str] = table.parameters diff --git a/tests/catalog/test_common.py b/tests/catalog/test_common.py deleted file mode 100644 index 55f6950f75..0000000000 --- a/tests/catalog/test_common.py +++ /dev/null @@ -1,95 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -from pathlib import Path, PosixPath - -import pytest -from moto import mock_aws - -from pyiceberg.catalog import Catalog -from pyiceberg.io import WAREHOUSE -from tests.conftest import BUCKET_NAME - - -@pytest.fixture(scope="function") -@mock_aws -def glue(_bucket_initialize: None, moto_endpoint_url: str) -> Catalog: - from pyiceberg.catalog.glue import GlueCatalog - - catalog = GlueCatalog(name="glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) - - return catalog - - -@pytest.fixture(scope="function") -@mock_aws -def dynamodb(_bucket_initialize: None, moto_endpoint_url: str) -> Catalog: - from pyiceberg.catalog.dynamodb import DynamoDbCatalog - - catalog = DynamoDbCatalog(name="dynamodb", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) - - return catalog - - -@pytest.fixture(scope="function") -def memory_catalog(tmp_path: PosixPath) -> Catalog: - from pyiceberg.catalog.memory import InMemoryCatalog - - return InMemoryCatalog("test.in_memory.catalog", **{WAREHOUSE: tmp_path.absolute().as_posix(), "test.key": "test.value"}) - - -@pytest.fixture(scope="function") -def sqllite_catalog_memory(warehouse: Path) -> Catalog: - from pyiceberg.catalog.sql import SqlCatalog - - catalog = SqlCatalog("sqlitememory", uri="sqlite:///:memory:", warehouse=f"file://{warehouse}") - - return catalog - - -@pytest.fixture(scope="function") -def sqllite_catalog_file(warehouse: Path) -> Catalog: - from pyiceberg.catalog.sql import SqlCatalog - - catalog = SqlCatalog("sqlitefile", uri=f"sqlite:////{warehouse}/sql-catalog.db", warehouse=f"file://{warehouse}") - - return catalog - - -@pytest.mark.parametrize( - "catalog", - [ - pytest.lazy_fixture("glue"), - pytest.lazy_fixture("dynamodb"), - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqllite_catalog_memory"), - pytest.lazy_fixture("sqllite_catalog_file"), - ], -) -def test_create_namespace_no_properties( - catalog: Catalog, - database_name: str, -) -> None: - catalog.create_namespace(namespace=database_name) - loaded_database_list = catalog.list_namespaces() - assert len(loaded_database_list) == 1 - assert (database_name,) in loaded_database_list - properties = catalog.load_namespace_properties(database_name) - assert properties == {} - catalog.drop_namespace(database_name) - assert len(catalog.list_namespaces()) == 0 diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py new file mode 100644 index 0000000000..33a02ff1e4 --- /dev/null +++ b/tests/integration/test_catalog.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pathlib import Path, PosixPath +from typing import Generator + +import pytest + +from pyiceberg.catalog import Catalog +from pyiceberg.catalog.dynamodb import DynamoDbCatalog +from pyiceberg.catalog.glue import GLUE_CATALOG_ENDPOINT, GlueCatalog +from pyiceberg.catalog.hive import HiveCatalog +from pyiceberg.catalog.memory import InMemoryCatalog +from pyiceberg.catalog.rest import RestCatalog +from pyiceberg.catalog.sql import SqlCatalog +from pyiceberg.io import WAREHOUSE +from tests.conftest import clean_up, get_bucket_name, get_glue_endpoint, get_s3_path + +# The number of tables/databases used in list_table/namespace test +LIST_TEST_NUMBER = 2 + + +@pytest.fixture(scope="function") +def dynamodb() -> Generator[Catalog, None, None]: + test_catalog = DynamoDbCatalog("test_dynamodb_catalog", warehouse=get_s3_path(get_bucket_name())) + yield test_catalog + clean_up(test_catalog) + + +@pytest.fixture(scope="function") +def glue() -> Generator[Catalog, None, None]: + test_catalog = GlueCatalog( + "test_glue_catalog", **{"warehouse": get_s3_path(get_bucket_name()), GLUE_CATALOG_ENDPOINT: get_glue_endpoint()} + ) + yield test_catalog + clean_up(test_catalog) + + +@pytest.fixture(scope="function") +def memory_catalog(tmp_path: PosixPath) -> Generator[Catalog, None, None]: + test_catalog = InMemoryCatalog( + "test.in_memory.catalog", **{WAREHOUSE: tmp_path.absolute().as_posix(), "test.key": "test.value"} + ) + yield test_catalog + + clean_up(test_catalog) + + +@pytest.fixture(scope="function") +def sqlite_catalog_memory(warehouse: Path) -> Generator[Catalog, None, None]: + test_catalog = SqlCatalog("sqlitememory", uri="sqlite:///:memory:", warehouse=f"file://{warehouse}") + + yield test_catalog + + clean_up(test_catalog) + + +@pytest.fixture(scope="function") +def sqlite_catalog_file(warehouse: Path) -> Generator[Catalog, None, None]: + test_catalog = SqlCatalog("sqlitefile", uri=f"sqlite:////{warehouse}/sql-catalog.db", warehouse=f"file://{warehouse}") + + yield test_catalog + + clean_up(test_catalog) + + +@pytest.fixture(scope="function") +def rest_catalog() -> Generator[Catalog, None, None]: + test_catalog = RestCatalog("rest", uri="http://localhost:8181") + + yield test_catalog + + clean_up(test_catalog) + + +@pytest.fixture(scope="function") +def hive_catalog() -> Generator[Catalog, None, None]: + test_catalog = HiveCatalog( + "test_hive_catalog", + uri="thrift://localhost:9083", + ) + yield test_catalog + clean_up(test_catalog) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("glue"), + pytest.lazy_fixture("dynamodb"), + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_namespace( + test_catalog: Catalog, + database_name: str, +) -> None: + test_catalog.create_namespace(database_name) + # note the use of `in` because some catalogs have a "default" namespace + assert (database_name,) in test_catalog.list_namespaces() From e613750a61c5123de921900b98b9d8fdbca68451 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 5 Jul 2025 09:11:29 -0400 Subject: [PATCH 03/12] steal some more integration tests --- tests/integration/test_catalog.py | 52 ++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 33a02ff1e4..17e82be576 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -20,7 +20,7 @@ import pytest -from pyiceberg.catalog import Catalog +from pyiceberg.catalog import Catalog, MetastoreCatalog from pyiceberg.catalog.dynamodb import DynamoDbCatalog from pyiceberg.catalog.glue import GLUE_CATALOG_ENDPOINT, GlueCatalog from pyiceberg.catalog.hive import HiveCatalog @@ -28,11 +28,9 @@ from pyiceberg.catalog.rest import RestCatalog from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.io import WAREHOUSE +from pyiceberg.schema import Schema from tests.conftest import clean_up, get_bucket_name, get_glue_endpoint, get_s3_path -# The number of tables/databases used in list_table/namespace test -LIST_TEST_NUMBER = 2 - @pytest.fixture(scope="function") def dynamodb() -> Generator[Catalog, None, None]: @@ -117,3 +115,49 @@ def test_create_namespace( test_catalog.create_namespace(database_name) # note the use of `in` because some catalogs have a "default" namespace assert (database_name,) in test_catalog.list_namespaces() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("glue"), + pytest.lazy_fixture("dynamodb"), + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_table_with_default_location( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + identifier = (database_name, table_name) + test_catalog.create_namespace(database_name) + test_catalog.create_table(identifier, table_schema_nested) + table = test_catalog.load_table(identifier) + assert table.name() == identifier + assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0 + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("glue"), + pytest.lazy_fixture("dynamodb"), + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_table_with_invalid_location( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + identifier = (database_name, table_name) + test_catalog.create_namespace(database_name) + with pytest.raises(ValueError): + test_catalog.create_table(identifier, table_schema_nested) From 76f0342d799f4069cdf6ff339713754a41c46e86 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 12:31:31 -0400 Subject: [PATCH 04/12] remove glue and dynamodb --- tests/integration/test_catalog.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 17e82be576..1431486f53 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -21,31 +21,13 @@ import pytest from pyiceberg.catalog import Catalog, MetastoreCatalog -from pyiceberg.catalog.dynamodb import DynamoDbCatalog -from pyiceberg.catalog.glue import GLUE_CATALOG_ENDPOINT, GlueCatalog from pyiceberg.catalog.hive import HiveCatalog from pyiceberg.catalog.memory import InMemoryCatalog from pyiceberg.catalog.rest import RestCatalog from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.io import WAREHOUSE from pyiceberg.schema import Schema -from tests.conftest import clean_up, get_bucket_name, get_glue_endpoint, get_s3_path - - -@pytest.fixture(scope="function") -def dynamodb() -> Generator[Catalog, None, None]: - test_catalog = DynamoDbCatalog("test_dynamodb_catalog", warehouse=get_s3_path(get_bucket_name())) - yield test_catalog - clean_up(test_catalog) - - -@pytest.fixture(scope="function") -def glue() -> Generator[Catalog, None, None]: - test_catalog = GlueCatalog( - "test_glue_catalog", **{"warehouse": get_s3_path(get_bucket_name()), GLUE_CATALOG_ENDPOINT: get_glue_endpoint()} - ) - yield test_catalog - clean_up(test_catalog) +from tests.conftest import clean_up @pytest.fixture(scope="function") From 2bb2229a0fe71ae33ae30b29d8fc271a2f70ff26 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 13:17:12 -0400 Subject: [PATCH 05/12] implement more tests --- tests/integration/test_catalog.py | 361 ++++++++++++++++++++++++++++-- 1 file changed, 340 insertions(+), 21 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 1431486f53..cdb610d284 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -16,7 +16,7 @@ # under the License. from pathlib import Path, PosixPath -from typing import Generator +from typing import Generator, List import pytest @@ -25,10 +25,20 @@ from pyiceberg.catalog.memory import InMemoryCatalog from pyiceberg.catalog.rest import RestCatalog from pyiceberg.catalog.sql import SqlCatalog +from pyiceberg.exceptions import ( + NamespaceAlreadyExistsError, + NamespaceNotEmptyError, + NoSuchNamespaceError, + NoSuchTableError, + TableAlreadyExistsError, +) from pyiceberg.io import WAREHOUSE from pyiceberg.schema import Schema from tests.conftest import clean_up +# The number of tables/databases used in list_table/namespace test +LIST_TEST_NUMBER = 2 + @pytest.fixture(scope="function") def memory_catalog(tmp_path: PosixPath) -> Generator[Catalog, None, None]: @@ -81,8 +91,6 @@ def hive_catalog() -> Generator[Catalog, None, None]: @pytest.mark.parametrize( "test_catalog", [ - pytest.lazy_fixture("glue"), - pytest.lazy_fixture("dynamodb"), pytest.lazy_fixture("memory_catalog"), pytest.lazy_fixture("sqlite_catalog_memory"), pytest.lazy_fixture("sqlite_catalog_file"), @@ -90,21 +98,21 @@ def hive_catalog() -> Generator[Catalog, None, None]: pytest.lazy_fixture("hive_catalog"), ], ) -def test_create_namespace( - test_catalog: Catalog, - database_name: str, +def test_create_table_with_default_location( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str ) -> None: + identifier = (database_name, table_name) test_catalog.create_namespace(database_name) - # note the use of `in` because some catalogs have a "default" namespace - assert (database_name,) in test_catalog.list_namespaces() + test_catalog.create_table(identifier, table_schema_nested) + table = test_catalog.load_table(identifier) + assert table.name() == identifier + assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0 @pytest.mark.integration @pytest.mark.parametrize( "test_catalog", [ - pytest.lazy_fixture("glue"), - pytest.lazy_fixture("dynamodb"), pytest.lazy_fixture("memory_catalog"), pytest.lazy_fixture("sqlite_catalog_memory"), pytest.lazy_fixture("sqlite_catalog_file"), @@ -112,23 +120,167 @@ def test_create_namespace( pytest.lazy_fixture("hive_catalog"), ], ) -def test_create_table_with_default_location( - test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +def test_create_table_with_invalid_database(test_catalog: Catalog, table_schema_nested: Schema, table_name: str) -> None: + identifier = ("invalid", table_name) + with pytest.raises(NoSuchNamespaceError): + test_catalog.create_table(identifier, table_schema_nested) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: + test_catalog.create_namespace(database_name) + test_catalog.create_table((database_name, table_name), table_schema_nested) + with pytest.raises(TableAlreadyExistsError): + test_catalog.create_table((database_name, table_name), table_schema_nested) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_table_if_not_exists_duplicated_table( + test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: + test_catalog.create_namespace(database_name) + table1 = test_catalog.create_table((database_name, table_name), table_schema_nested) + table2 = test_catalog.create_table_if_not_exists((database_name, table_name), table_schema_nested) + assert table1.name() == table2.name() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: + identifier = (database_name, table_name) + test_catalog.create_namespace(database_name) + table = test_catalog.create_table(identifier, table_schema_nested) + loaded_table = test_catalog.load_table(identifier) + assert table.name() == loaded_table.name() + assert table.metadata_location == loaded_table.metadata_location + assert table.metadata == loaded_table.metadata + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_list: List[str]) -> None: + test_catalog.create_namespace(database_name) + for table_name in table_list: + test_catalog.create_table((database_name, table_name), table_schema_nested) + identifier_list = test_catalog.list_tables(database_name) + assert len(identifier_list) == LIST_TEST_NUMBER + for table_name in table_list: + assert (database_name, table_name) in identifier_list + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: + new_database_name = f"{database_name}_new" + test_catalog.create_namespace(database_name) + test_catalog.create_namespace(new_database_name) + new_table_name = f"rename-{table_name}" + identifier = (database_name, table_name) + table = test_catalog.create_table(identifier, table_schema_nested) + assert table.name() == identifier + new_identifier = (new_database_name, new_table_name) + test_catalog.rename_table(identifier, new_identifier) + new_table = test_catalog.load_table(new_identifier) + assert new_table.name() == new_identifier + assert new_table.metadata_location == table.metadata_location + with pytest.raises(NoSuchTableError): + test_catalog.load_table(identifier) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: + identifier = (database_name, table_name) + test_catalog.create_namespace(database_name) + table = test_catalog.create_table(identifier, table_schema_nested) + assert table.name() == identifier + test_catalog.drop_table(identifier) + with pytest.raises(NoSuchTableError): + test_catalog.load_table(identifier) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: identifier = (database_name, table_name) test_catalog.create_namespace(database_name) test_catalog.create_table(identifier, table_schema_nested) table = test_catalog.load_table(identifier) assert table.name() == identifier - assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0 + test_catalog.purge_table(identifier) + with pytest.raises(NoSuchTableError): + test_catalog.load_table(identifier) @pytest.mark.integration @pytest.mark.parametrize( "test_catalog", [ - pytest.lazy_fixture("glue"), - pytest.lazy_fixture("dynamodb"), pytest.lazy_fixture("memory_catalog"), pytest.lazy_fixture("sqlite_catalog_memory"), pytest.lazy_fixture("sqlite_catalog_file"), @@ -136,10 +288,177 @@ def test_create_table_with_default_location( pytest.lazy_fixture("hive_catalog"), ], ) -def test_create_table_with_invalid_location( - test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str -) -> None: - identifier = (database_name, table_name) +def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: test_catalog.create_namespace(database_name) - with pytest.raises(ValueError): - test_catalog.create_table(identifier, table_schema_nested) + test_catalog.create_table((database_name, table_name), table_schema_nested) + assert test_catalog.table_exists((database_name, table_name)) is True + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_namespace(test_catalog: Catalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + assert (database_name,) in test_catalog.list_namespaces() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + with pytest.raises(NamespaceAlreadyExistsError): + test_catalog.create_namespace(database_name) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_namepsace_if_not_exists(test_catalog: Catalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + test_catalog.create_namespace_if_not_exists(database_name) + assert (database_name,) in test_catalog.list_namespaces() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_create_namespace_with_comment(test_catalog: Catalog, database_name: str) -> None: + test_properties = { + "comment": "this is a test description", + } + test_catalog.create_namespace(namespace=database_name, properties=test_properties) + loaded_database_list = test_catalog.list_namespaces() + assert (database_name,) in loaded_database_list + properties = test_catalog.load_namespace_properties(database_name) + assert properties["comment"] == "this is a test description" + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_list_namespaces(test_catalog: Catalog, database_list: List[str]) -> None: + for database_name in database_list: + test_catalog.create_namespace(database_name) + db_list = test_catalog.list_namespaces() + for database_name in database_list: + assert (database_name,) in db_list + assert len(test_catalog.list_namespaces(list(database_list)[0])) == 0 + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: + test_catalog.create_namespace(database_name) + assert (database_name,) in test_catalog.list_namespaces() + test_catalog.create_table((database_name, table_name), table_schema_nested) + with pytest.raises(NamespaceNotEmptyError): + test_catalog.drop_namespace(database_name) + test_catalog.drop_table((database_name, table_name)) + test_catalog.drop_namespace(database_name) + assert (database_name,) not in test_catalog.list_namespaces() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_load_namespace_properties(test_catalog: Catalog, database_name: str) -> None: + test_properties = { + "comment": "this is a test description", + "test_property1": "1", + "test_property2": "2", + "test_property3": "3", + } + test_catalog.create_namespace(database_name, test_properties) + listed_properties = test_catalog.load_namespace_properties(database_name) + for k, v in test_properties.items(): + assert v == listed_properties[k] + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_catalog", + [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), + ], +) +def test_update_namespace_properties(test_catalog: Catalog, database_name: str) -> None: + test_properties = { + "comment": "this is a test description", + "test_property1": "1", + "test_property2": "2", + "test_property3": "3", + } + removals = {"test_property1", "test_property2", "test_property3", "should_not_removed"} + updates = {"test_property4": "4", "test_property5": "5", "comment": "updated test description"} + test_catalog.create_namespace(database_name, test_properties) + update_report = test_catalog.update_namespace_properties(database_name, removals, updates) + for k in updates.keys(): + assert k in update_report.updated + for k in removals: + if k == "should_not_removed": + assert k in update_report.missing + else: + assert k in update_report.removed + assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"] From 769536b800af9638822ade6811480b4d92836c31 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 14:01:12 -0400 Subject: [PATCH 06/12] mostly fix hive? --- tests/conftest.py | 10 ++++++++-- tests/integration/test_catalog.py | 16 +++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7b5256f340..f3d551edae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2318,13 +2318,19 @@ def fixture_s3_client() -> boto3.client: yield boto3.client("s3") -def clean_up(test_catalog: Catalog) -> None: +def clean_up(test_catalog: Catalog, drop_if_cannot_purge: bool = False) -> None: """Clean all databases and tables created during the integration test.""" for database_tuple in test_catalog.list_namespaces(): database_name = database_tuple[0] if "my_iceberg_database-" in database_name: for identifier in test_catalog.list_tables(database_name): - test_catalog.purge_table(identifier) + try: + test_catalog.purge_table(identifier) + except Exception as e: + if drop_if_cannot_purge: + test_catalog.drop_table(identifier) + else: + raise e test_catalog.drop_namespace(database_name) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index cdb610d284..beb298a829 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -36,9 +36,6 @@ from pyiceberg.schema import Schema from tests.conftest import clean_up -# The number of tables/databases used in list_table/namespace test -LIST_TEST_NUMBER = 2 - @pytest.fixture(scope="function") def memory_catalog(tmp_path: PosixPath) -> Generator[Catalog, None, None]: @@ -81,10 +78,15 @@ def rest_catalog() -> Generator[Catalog, None, None]: def hive_catalog() -> Generator[Catalog, None, None]: test_catalog = HiveCatalog( "test_hive_catalog", - uri="thrift://localhost:9083", + **{ + "uri": "http://localhost:9083", + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + }, ) yield test_catalog - clean_up(test_catalog) + clean_up(test_catalog, drop_if_cannot_purge=True) @pytest.mark.integration @@ -201,7 +203,7 @@ def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, databas for table_name in table_list: test_catalog.create_table((database_name, table_name), table_schema_nested) identifier_list = test_catalog.list_tables(database_name) - assert len(identifier_list) == LIST_TEST_NUMBER + assert len(identifier_list) == len(table_list) for table_name in table_list: assert (database_name, table_name) in identifier_list @@ -263,7 +265,7 @@ def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_na pytest.lazy_fixture("sqlite_catalog_memory"), pytest.lazy_fixture("sqlite_catalog_file"), pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), + # NOTE: HiveCatalog does not support purge_table ], ) def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: From 00140585222c03517735f8d1d6f2f8824c6a28ae Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 14:28:57 -0400 Subject: [PATCH 07/12] fix `update_namespace_properties` in hive catalog --- pyiceberg/catalog/hive.py | 2 +- tests/catalog/test_hive.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index cc9cd028c4..eef6bbad18 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -800,7 +800,7 @@ def update_namespace_properties( if removals: for key in removals: if key in parameters: - parameters[key] = None + parameters.pop(key) removed.add(key) if updates: for key, value in updates.items(): diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index a36425ebea..1edb4f7295 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -1156,7 +1156,7 @@ def test_update_namespace_properties(hive_database: HiveDatabase) -> None: name="default", description=None, locationUri=hive_database.locationUri, - parameters={"test": None, "label": "core"}, + parameters={"label": "core"}, privileges=None, ownerName=None, ownerType=1, From c90f484dad90ad7294626c869e388c6a36ed8b21 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 14:41:23 -0400 Subject: [PATCH 08/12] add missing mapper for `NoSuchNamespaceError` in rest catalog create table --- pyiceberg/catalog/rest/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 6215d17a4f..0972d7792f 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -505,7 +505,7 @@ def _create_table( try: response.raise_for_status() except HTTPError as exc: - _handle_non_200_response(exc, {409: TableAlreadyExistsError}) + _handle_non_200_response(exc, {409: TableAlreadyExistsError, 404: NoSuchNamespaceError}) return TableResponse.model_validate_json(response.text) @retry(**_RETRY_ARGS) From efb536dc15fb75027d652aa5015ed94021b4091d Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 15:23:37 -0400 Subject: [PATCH 09/12] drop_table in clean_up instead of purge --- tests/conftest.py | 10 ++-------- tests/integration/test_catalog.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f3d551edae..584b6c633a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2318,19 +2318,13 @@ def fixture_s3_client() -> boto3.client: yield boto3.client("s3") -def clean_up(test_catalog: Catalog, drop_if_cannot_purge: bool = False) -> None: +def clean_up(test_catalog: Catalog) -> None: """Clean all databases and tables created during the integration test.""" for database_tuple in test_catalog.list_namespaces(): database_name = database_tuple[0] if "my_iceberg_database-" in database_name: for identifier in test_catalog.list_tables(database_name): - try: - test_catalog.purge_table(identifier) - except Exception as e: - if drop_if_cannot_purge: - test_catalog.drop_table(identifier) - else: - raise e + test_catalog.drop_table(identifier) test_catalog.drop_namespace(database_name) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index beb298a829..e78295069f 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -86,7 +86,7 @@ def hive_catalog() -> Generator[Catalog, None, None]: }, ) yield test_catalog - clean_up(test_catalog, drop_if_cannot_purge=True) + clean_up(test_catalog) @pytest.mark.integration From 98109e8449350e4e24813270fae0ec551e256d3b Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sat, 19 Jul 2025 20:58:36 -0400 Subject: [PATCH 10/12] enable strict behavior in rest fixture for jdbc catalog --- dev/docker-compose-integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/docker-compose-integration.yml b/dev/docker-compose-integration.yml index 500a042e16..c901b2ee23 100644 --- a/dev/docker-compose-integration.yml +++ b/dev/docker-compose-integration.yml @@ -53,6 +53,7 @@ services: - CATALOG_WAREHOUSE=s3://warehouse/ - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO - CATALOG_S3_ENDPOINT=http://minio:9000 + - CATALOG_JDBC_STRICT__MODE=true minio: image: minio/minio container_name: pyiceberg-minio From ef60e3bfe3d2e6924c6667519918b833c997a4a6 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Sun, 20 Jul 2025 07:26:51 -0400 Subject: [PATCH 11/12] DRY --- tests/integration/test_catalog.py | 210 +++++------------------------- 1 file changed, 30 insertions(+), 180 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index e78295069f..123aca1bef 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -89,17 +89,17 @@ def hive_catalog() -> Generator[Catalog, None, None]: clean_up(test_catalog) +CATALOGS = [ + pytest.lazy_fixture("memory_catalog"), + pytest.lazy_fixture("sqlite_catalog_memory"), + pytest.lazy_fixture("sqlite_catalog_file"), + pytest.lazy_fixture("rest_catalog"), + pytest.lazy_fixture("hive_catalog"), +] + + @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_table_with_default_location( test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str ) -> None: @@ -112,16 +112,7 @@ def test_create_table_with_default_location( @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_table_with_invalid_database(test_catalog: Catalog, table_schema_nested: Schema, table_name: str) -> None: identifier = ("invalid", table_name) with pytest.raises(NoSuchNamespaceError): @@ -129,16 +120,7 @@ def test_create_table_with_invalid_database(test_catalog: Catalog, table_schema_ @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: test_catalog.create_namespace(database_name) test_catalog.create_table((database_name, table_name), table_schema_nested) @@ -147,16 +129,7 @@ def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: Sch @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_table_if_not_exists_duplicated_table( test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -167,16 +140,7 @@ def test_create_table_if_not_exists_duplicated_table( @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: identifier = (database_name, table_name) test_catalog.create_namespace(database_name) @@ -188,16 +152,7 @@ def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, database @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_list: List[str]) -> None: test_catalog.create_namespace(database_name) for table_name in table_list: @@ -209,16 +164,7 @@ def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, databas @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: new_database_name = f"{database_name}_new" test_catalog.create_namespace(database_name) @@ -237,16 +183,7 @@ def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, table_ @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: identifier = (database_name, table_name) test_catalog.create_namespace(database_name) @@ -258,17 +195,11 @@ def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_na @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - # NOTE: HiveCatalog does not support purge_table - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: + if isinstance(test_catalog, HiveCatalog): + pytest.skip("HiveCatalog does not support purge_table operation yet") + identifier = (database_name, table_name) test_catalog.create_namespace(database_name) test_catalog.create_table(identifier, table_schema_nested) @@ -280,16 +211,7 @@ def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, table_n @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: test_catalog.create_namespace(database_name) test_catalog.create_table((database_name, table_name), table_schema_nested) @@ -297,32 +219,14 @@ def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, databa @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_namespace(test_catalog: Catalog, database_name: str) -> None: test_catalog.create_namespace(database_name) assert (database_name,) in test_catalog.list_namespaces() @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) -> None: test_catalog.create_namespace(database_name) with pytest.raises(NamespaceAlreadyExistsError): @@ -330,16 +234,7 @@ def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) - @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_namepsace_if_not_exists(test_catalog: Catalog, database_name: str) -> None: test_catalog.create_namespace(database_name) test_catalog.create_namespace_if_not_exists(database_name) @@ -347,16 +242,7 @@ def test_create_namepsace_if_not_exists(test_catalog: Catalog, database_name: st @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_create_namespace_with_comment(test_catalog: Catalog, database_name: str) -> None: test_properties = { "comment": "this is a test description", @@ -369,16 +255,7 @@ def test_create_namespace_with_comment(test_catalog: Catalog, database_name: str @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_list_namespaces(test_catalog: Catalog, database_list: List[str]) -> None: for database_name in database_list: test_catalog.create_namespace(database_name) @@ -389,16 +266,7 @@ def test_list_namespaces(test_catalog: Catalog, database_list: List[str]) -> Non @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: test_catalog.create_namespace(database_name) assert (database_name,) in test_catalog.list_namespaces() @@ -411,16 +279,7 @@ def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, tabl @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_load_namespace_properties(test_catalog: Catalog, database_name: str) -> None: test_properties = { "comment": "this is a test description", @@ -435,16 +294,7 @@ def test_load_namespace_properties(test_catalog: Catalog, database_name: str) -> @pytest.mark.integration -@pytest.mark.parametrize( - "test_catalog", - [ - pytest.lazy_fixture("memory_catalog"), - pytest.lazy_fixture("sqlite_catalog_memory"), - pytest.lazy_fixture("sqlite_catalog_file"), - pytest.lazy_fixture("rest_catalog"), - pytest.lazy_fixture("hive_catalog"), - ], -) +@pytest.mark.parametrize("test_catalog", CATALOGS) def test_update_namespace_properties(test_catalog: Catalog, database_name: str) -> None: test_properties = { "comment": "this is a test description", From bbd9ae0c113e93a25727aeb64344370fd1ebd261 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 20 Jul 2025 12:34:36 -0700 Subject: [PATCH 12/12] fix namespace for test --- tests/integration/test_writes/test_writes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 624bf0d8b2..e63883c1db 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1540,7 +1540,7 @@ def test_rest_catalog_with_empty_catalog_name_append_data(session_catalog: Catal @pytest.mark.integration def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: - identifier = "default.lower.table_v1_with_null_nested_namespace" + identifier = "default.table_v1_with_null_nested_namespace" tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_null]) assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}"