diff --git a/Makefile b/Makefile index 859d8dfa23..a19ac9ddce 100644 --- a/Makefile +++ b/Makefile @@ -96,7 +96,7 @@ test-integration-setup: ## Start Docker services for integration tests docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py test-integration-exec: ## Run integration tests (excluding provision) - $(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS) + $(TEST_RUNNER) pytest tests/integration/test_reads.py::test_metadata_sanitize_character -m integration $(PYTEST_ARGS) test-integration-rebuild: ## Rebuild integration Docker services from scratch docker compose -f dev/docker-compose-integration.yml kill diff --git a/dev/provision.py b/dev/provision.py index 231f5123ce..a3da1a111d 100644 --- a/dev/provision.py +++ b/dev/provision.py @@ -315,6 +315,27 @@ """ ) + # avro sanitization test + spark.sql( + f""" + CREATE TABLE {catalog_name}.default.test_table_metadata_sanitized_character ( + name STRING NOT NULL, + `😎` STRING + ) + USING ICEBERG + PARTITIONED BY (`😎`); + """ + ) + + spark.sql( + f""" + INSERT INTO {catalog_name}.default.test_table_metadata_sanitized_character + VALUES + ('Foo', 'Cool Foo'), + ('Bar', 'Cool Bar') + """ + ) + spark.sql( f""" CREATE TABLE {catalog_name}.default.test_table_add_column ( diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index a33b1a36bc..d56b7efdea 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -760,6 +760,17 @@ def test_sanitize_character(catalog: Catalog) -> None: assert arrow_table.schema.names[0] == table_test_table_sanitized_character.schema().fields[0].name +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_metadata_sanitize_character(catalog: Catalog) -> None: + avro_sanitized_character_table = catalog.load_table("default.test_table_metadata_sanitized_character") + arrow_table = avro_sanitized_character_table.scan().to_arrow() + assert len(arrow_table.schema.names) == 2 + assert len(avro_sanitized_character_table.inspect.files()) == 2 + assert [field.name for field in avro_sanitized_character_table.schema().fields] == ["name", "😎"] + assert [field.name for field in avro_sanitized_character_table.spec().fields] == ["😎"] + + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_null_list_and_map(catalog: Catalog) -> None: