diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 5a9387577b..23b148e9b9 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -542,6 +542,12 @@ def commit_table( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, ) + # Update hive's schema and properties + hive_table.sd = _construct_hive_storage_descriptor( + updated_staged_table.schema(), + updated_staged_table.location(), + property_as_bool(updated_staged_table.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT), + ) open_client.alter_table_with_environment_context( dbname=database_name, tbl_name=table_name, diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 150d2b750c..94a3d3775f 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1157,6 +1157,30 @@ def test_hive_catalog_storage_descriptor( assert spark.sql("SELECT * FROM hive.default.test_storage_descriptor").count() == 3 +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_hive_catalog_storage_descriptor_has_changed( + session_catalog_hive: HiveCatalog, + pa_schema: pa.Schema, + arrow_table_with_null: pa.Table, + spark: SparkSession, + format_version: int, +) -> None: + tbl = _create_table( + session_catalog_hive, "default.test_storage_descriptor", {"format-version": format_version}, [arrow_table_with_null] + ) + + with tbl.transaction() as tx: + with tx.update_schema() as schema: + schema.update_column("string_long", doc="this is string_long") + schema.update_column("binary", doc="this is binary") + + with session_catalog_hive._client as open_client: + hive_table = session_catalog_hive._get_hive_table(open_client, "default", "test_storage_descriptor") + assert "this is string_long" in str(hive_table.sd) + assert "this is binary" in str(hive_table.sd) + + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_sanitize_character_partitioned(catalog: Catalog) -> None: