Skip to content

Commit 1d24e71

Browse files
authored
Turn ObjectStoreLocationProvider off by default (#1722)
Closes #1721 Otherwise, there's a default behavior change in new 0.9.0 release. Previous versions will write to `data/`, new version will write to `data/<hash>/`
1 parent 71129ee commit 1d24e71

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

pyiceberg/table/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ class TableProperties:
202202
WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"
203203

204204
OBJECT_STORE_ENABLED = "write.object-storage.enabled"
205-
OBJECT_STORE_ENABLED_DEFAULT = True
205+
OBJECT_STORE_ENABLED_DEFAULT = False
206206

207207
WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths"
208208
WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True

tests/integration/test_writes/test_partitioned_writes.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,13 +294,14 @@ def test_object_storage_location_provider_excludes_partition_path(
294294
PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col)
295295
)
296296

297-
# write.object-storage.enabled and write.object-storage.partitioned-paths don't need to be specified as they're on by default
298-
assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT
299-
assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT
297+
# Enable `write.object-storage.enabled` which is False by default
298+
# `write.object-storage.partitioned-paths` is True by default
299+
assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT is False
300+
assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT is True
300301
tbl = _create_table(
301302
session_catalog=session_catalog,
302303
identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
303-
properties={"format-version": str(format_version)},
304+
properties={"format-version": str(format_version), TableProperties.OBJECT_STORE_ENABLED: True},
304305
data=[arrow_table_with_null],
305306
partition_spec=partition_spec,
306307
)

tests/table/test_locations.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_custom_location_provider_not_found(caplog: Any) -> None:
7474

7575

7676
def test_object_storage_no_partition() -> None:
77-
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
77+
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
7878

7979
location = provider.new_data_location("test.parquet")
8080
parts = location.split("/")
@@ -111,6 +111,7 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
111111
provider = load_location_provider(
112112
table_location="table_location",
113113
table_properties={
114+
"write.object-storage.enabled": "true",
114115
"write.object-storage.partitioned-paths": "false",
115116
},
116117
)
@@ -131,15 +132,18 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
131132
],
132133
)
133134
def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
134-
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
135+
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
135136

136137
assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"
137138

138139

139140
def test_object_location_provider_write_data_path() -> None:
140141
provider = load_location_provider(
141142
table_location="s3://table-location/table",
142-
table_properties={TableProperties.WRITE_DATA_PATH: "s3://table-location/custom/data/path"},
143+
table_properties={
144+
"write.object-storage.enabled": "true",
145+
TableProperties.WRITE_DATA_PATH: "s3://table-location/custom/data/path",
146+
},
143147
)
144148

145149
assert (

0 commit comments

Comments
 (0)