From ff149e8e9d8e0b8dd9e74158b1fb89724833b5b4 Mon Sep 17 00:00:00 2001 From: kadai0308 Date: Sun, 25 May 2025 21:17:06 +0800 Subject: [PATCH 1/5] fix: add metadata_properties to _construct_parameters when update hive table --- pyiceberg/catalog/hive.py | 12 ++++++++++-- tests/integration/test_reads.py | 20 +++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 5a9387577b..0fd6b9d79e 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -211,11 +211,18 @@ def _construct_hive_storage_descriptor( DEFAULT_PROPERTIES = {TableProperties.PARQUET_COMPRESSION: TableProperties.PARQUET_COMPRESSION_DEFAULT} -def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]: +def _construct_parameters( + metadata_location: str, previous_metadata_location: Optional[str] = None, metadata_properties: Optional[Properties] = None +) -> Dict[str, Any]: properties = {PROP_EXTERNAL: "TRUE", PROP_TABLE_TYPE: "ICEBERG", PROP_METADATA_LOCATION: metadata_location} if previous_metadata_location: properties[PROP_PREVIOUS_METADATA_LOCATION] = previous_metadata_location + if metadata_properties: + for key, value in metadata_properties.items(): + if key not in properties: + properties[key] = str(value) + return properties @@ -360,7 +367,7 @@ def _convert_iceberg_into_hive(self, table: Table) -> HiveTable: property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT), ), tableType=EXTERNAL_TABLE, - parameters=_construct_parameters(table.metadata_location), + parameters=_construct_parameters(metadata_location=table.metadata_location, metadata_properties=table.properties), ) def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None: @@ -541,6 +548,7 @@ def commit_table( hive_table.parameters = _construct_parameters( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, + metadata_properties=updated_staged_table.properties, ) open_client.alter_table_with_environment_context( dbname=database_name, diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 5ac5162f8e..caf28652d9 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -48,7 +48,7 @@ pyarrow_to_schema, ) from pyiceberg.schema import Schema -from pyiceberg.table import Table +from pyiceberg.table import Table, update_table_metadata from pyiceberg.types import ( BinaryType, BooleanType, @@ -59,6 +59,7 @@ TimestampType, ) from pyiceberg.utils.concurrent import ExecutorFactory +from pyiceberg.table.update import SetPropertiesUpdate, RemovePropertiesUpdate DEFAULT_PROPERTIES = {"write.parquet.compression-codec": "zstd"} @@ -111,6 +112,23 @@ def test_table_properties(catalog: Catalog) -> None: table.transaction().set_properties(property_name=None).commit_transaction() assert "None type is not a supported value in properties: property_name" in str(exc_info.value) + if isinstance(catalog, HiveCatalog): + table.transaction().set_properties({"abc": "def", "p1": "123"}).commit_transaction() + + hive_client: _HiveClient = _HiveClient(catalog.properties["uri"]) + + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("abc") == "def" + assert hive_table.parameters.get("p1") == "123" + assert hive_table.parameters.get("not_exist_parameter") is None + + table.transaction().remove_properties("abc").commit_transaction() + + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("abc") is None + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) From 230d08832750d5ffa1ef7358b75aaf93ab0dfb31 Mon Sep 17 00:00:00 2001 From: kadai0308 Date: Tue, 3 Jun 2025 17:49:11 +0800 Subject: [PATCH 2/5] fix: separate the test_hive_properties test --- tests/integration/test_reads.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index caf28652d9..8f6d273c22 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -112,22 +112,26 @@ def test_table_properties(catalog: Catalog) -> None: table.transaction().set_properties(property_name=None).commit_transaction() assert "None type is not a supported value in properties: property_name" in str(exc_info.value) - if isinstance(catalog, HiveCatalog): - table.transaction().set_properties({"abc": "def", "p1": "123"}).commit_transaction() - hive_client: _HiveClient = _HiveClient(catalog.properties["uri"]) +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")]) +def test_hive_properties(catalog: Catalog) -> None: + table = create_table(catalog) + table.transaction().set_properties({"abc": "def", "p1": "123"}).commit_transaction() - with hive_client as open_client: - hive_table = open_client.get_table(*TABLE_NAME) - assert hive_table.parameters.get("abc") == "def" - assert hive_table.parameters.get("p1") == "123" - assert hive_table.parameters.get("not_exist_parameter") is None + hive_client: _HiveClient = _HiveClient(catalog.properties["uri"]) - table.transaction().remove_properties("abc").commit_transaction() + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("abc") == "def" + assert hive_table.parameters.get("p1") == "123" + assert hive_table.parameters.get("not_exist_parameter") is None - with hive_client as open_client: - hive_table = open_client.get_table(*TABLE_NAME) - assert hive_table.parameters.get("abc") is None + table.transaction().remove_properties("abc").commit_transaction() + + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("abc") is None @pytest.mark.integration From 7e754d5d2a3cf96c51ae282e1899de74335dc52f Mon Sep 17 00:00:00 2001 From: kadai0308 Date: Fri, 13 Jun 2025 20:12:08 +0800 Subject: [PATCH 3/5] fix lint --- tests/integration/test_reads.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 8f6d273c22..b417a43616 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -48,7 +48,7 @@ pyarrow_to_schema, ) from pyiceberg.schema import Schema -from pyiceberg.table import Table, update_table_metadata +from pyiceberg.table import Table from pyiceberg.types import ( BinaryType, BooleanType, @@ -59,7 +59,6 @@ TimestampType, ) from pyiceberg.utils.concurrent import ExecutorFactory -from pyiceberg.table.update import SetPropertiesUpdate, RemovePropertiesUpdate DEFAULT_PROPERTIES = {"write.parquet.compression-codec": "zstd"} From d5a26e0dea037af2586babca96f0bcb5082e4afe Mon Sep 17 00:00:00 2001 From: kadai0308 Date: Fri, 13 Jun 2025 22:52:27 +0800 Subject: [PATCH 4/5] fix: test_hive missing default parameters --- tests/catalog/test_hive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 497ff99924..59f9d56926 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -342,7 +342,7 @@ def test_create_table( storedAsSubDirectories=None, ), partitionKeys=None, - parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location}, + parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location, "write.parquet.compression-codec": "zstd", "owner": "javaberg"}, viewOriginalText=None, viewExpandedText=None, tableType="EXTERNAL_TABLE", @@ -517,7 +517,7 @@ def test_create_table_with_given_location_removes_trailing_slash( storedAsSubDirectories=None, ), partitionKeys=None, - parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location}, + parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location, "write.parquet.compression-codec": "zstd", "owner": "javaberg"}, viewOriginalText=None, viewExpandedText=None, tableType="EXTERNAL_TABLE", From 16f562edac5ec9eee25b3d51f86cbde2e40b8c1e Mon Sep 17 00:00:00 2001 From: kadai0308 Date: Sat, 14 Jun 2025 00:44:59 +0800 Subject: [PATCH 5/5] fix lint --- tests/catalog/test_hive.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 59f9d56926..fef0d6acc6 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -342,7 +342,13 @@ def test_create_table( storedAsSubDirectories=None, ), partitionKeys=None, - parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location, "write.parquet.compression-codec": "zstd", "owner": "javaberg"}, + parameters={ + "EXTERNAL": "TRUE", + "table_type": "ICEBERG", + "metadata_location": metadata_location, + "write.parquet.compression-codec": "zstd", + "owner": "javaberg", + }, viewOriginalText=None, viewExpandedText=None, tableType="EXTERNAL_TABLE", @@ -517,7 +523,13 @@ def test_create_table_with_given_location_removes_trailing_slash( storedAsSubDirectories=None, ), partitionKeys=None, - parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location, "write.parquet.compression-codec": "zstd", "owner": "javaberg"}, + parameters={ + "EXTERNAL": "TRUE", + "table_type": "ICEBERG", + "metadata_location": metadata_location, + "write.parquet.compression-codec": "zstd", + "owner": "javaberg", + }, viewOriginalText=None, viewExpandedText=None, tableType="EXTERNAL_TABLE",