Skip to content

Commit b2a4127

Browse files
committed
Update hive storage descriptor after commit schema change
1 parent f47513b commit b2a4127

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

dev/hive/core-site.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,9 @@
5050
<name>fs.s3a.path.style.access</name>
5151
<value>true</value>
5252
</property>
53+
<property>
54+
<name>hive.metastore.disallow.incompatible.col.type.changes</name>
55+
<value>false</value>
56+
</property>
57+
5358
</configuration>

pyiceberg/catalog/hive.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,12 @@ def commit_table(
542542
metadata_location=updated_staged_table.metadata_location,
543543
previous_metadata_location=current_table.metadata_location,
544544
)
545+
# Update hive's schema and properties
546+
hive_table.sd = _construct_hive_storage_descriptor(
547+
updated_staged_table.schema(),
548+
updated_staged_table.location(),
549+
property_as_bool(updated_staged_table.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
550+
)
545551
open_client.alter_table_with_environment_context(
546552
dbname=database_name,
547553
tbl_name=table_name,

tests/integration/test_writes/test_writes.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,30 @@ def test_hive_catalog_storage_descriptor(
11571157
assert spark.sql("SELECT * FROM hive.default.test_storage_descriptor").count() == 3
11581158

11591159

1160+
@pytest.mark.integration
1161+
@pytest.mark.parametrize("format_version", [1, 2])
1162+
def test_hive_catalog_storage_descriptor_has_changed(
1163+
session_catalog_hive: HiveCatalog,
1164+
pa_schema: pa.Schema,
1165+
arrow_table_with_null: pa.Table,
1166+
spark: SparkSession,
1167+
format_version: int,
1168+
) -> None:
1169+
tbl = _create_table(
1170+
session_catalog_hive, "default.test_storage_descriptor", {"format-version": format_version}, [arrow_table_with_null]
1171+
)
1172+
1173+
with tbl.transaction() as tx:
1174+
with tx.update_schema() as schema:
1175+
schema.update_column("string_long", doc="this is string_long")
1176+
schema.update_column("binary", doc="this is binary")
1177+
1178+
with session_catalog_hive._client as open_client:
1179+
hive_table = session_catalog_hive._get_hive_table(open_client, "default", "test_storage_descriptor")
1180+
assert "this is string_long" in str(hive_table.sd)
1181+
assert "this is binary" in str(hive_table.sd)
1182+
1183+
11601184
@pytest.mark.integration
11611185
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
11621186
def test_sanitize_character_partitioned(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)