Skip to content

Commit 6cf4a51

Browse files
committed
validate_partition_name function update
1 parent 284250b commit 6cf4a51

File tree

4 files changed

+25
-30
lines changed

4 files changed

+25
-30
lines changed

pyiceberg/partitioning.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from dataclasses import dataclass
2222
from datetime import date, datetime, time
2323
from functools import cached_property, singledispatch
24-
from typing import Annotated, Any, Dict, Generic, List, Optional, Tuple, TypeVar, Union
24+
from typing import Annotated, Any, Dict, Generic, List, Optional, Set, Tuple, TypeVar, Union
2525
from urllib.parse import quote_plus
2626

2727
from pydantic import (
@@ -254,6 +254,7 @@ def validate_partition_name(
254254
partition_transform: Transform[Any, Any],
255255
source_id: int,
256256
schema: Schema,
257+
partition_names: Set[str],
257258
) -> None:
258259
"""Validate that a partition field name doesn't conflict with schema field names."""
259260
try:
@@ -262,11 +263,15 @@ def validate_partition_name(
262263
return # No conflict if field doesn't exist in schema
263264

264265
if isinstance(partition_transform, (IdentityTransform, VoidTransform)):
265-
# For identity transforms, allow conflict only if sourced from the same schema field
266+
# For identity and void transforms, allow conflict only if sourced from the same schema field
266267
if schema_field.field_id != source_id:
267-
raise ValueError(f"Cannot create identity partition from a different source field in the schema: {field_name}")
268+
raise ValueError(f"Cannot create identity partition sourced from different field in schema: {field_name}")
268269
else:
269270
raise ValueError(f"Cannot create partition from name that exists in schema: {field_name}")
271+
if not field_name:
272+
raise ValueError("Undefined name")
273+
if field_name in partition_names:
274+
raise ValueError(f"Partition name has to be unique: {field_name}")
270275

271276

272277
def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fresh_schema: Schema) -> PartitionSpec:
@@ -279,7 +284,7 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre
279284
if fresh_field is None:
280285
raise ValueError(f"Could not find field in fresh schema: {original_column_name}")
281286

282-
validate_partition_name(field.name, field.transform, fresh_field.field_id, fresh_schema)
287+
validate_partition_name(field.name, field.transform, fresh_field.field_id, fresh_schema, set())
283288

284289
partition_fields.append(
285290
PartitionField(

pyiceberg/table/update/schema.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -658,14 +658,13 @@ def _apply(self) -> Schema:
658658

659659
# Check the field-ids
660660
new_schema = Schema(*struct.fields)
661-
if self._transaction is not None:
662-
from pyiceberg.partitioning import validate_partition_name
661+
from pyiceberg.partitioning import validate_partition_name
663662

664-
for spec in self._transaction.table_metadata.partition_specs:
665-
for partition_field in spec.fields:
666-
validate_partition_name(
667-
partition_field.name, partition_field.transform, partition_field.source_id, new_schema
668-
)
663+
for spec in self._transaction.table_metadata.partition_specs:
664+
for partition_field in spec.fields:
665+
validate_partition_name(
666+
partition_field.name, partition_field.transform, partition_field.source_id, new_schema, set()
667+
)
669668
field_ids = set()
670669
for name in self._identifier_field_names:
671670
try:

pyiceberg/table/update/spec.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,7 @@ def _check_and_add_partition_name(
179179
) -> None:
180180
from pyiceberg.partitioning import validate_partition_name
181181

182-
validate_partition_name(name, transform, source_id, schema)
183-
if not name:
184-
raise ValueError("Undefined name")
185-
if name in partition_names:
186-
raise ValueError(f"Partition name has to be unique: {name}")
182+
validate_partition_name(name, transform, source_id, schema, partition_names)
187183
partition_names.add(name)
188184

189185
def _add_new_field(

tests/integration/test_partition_evolution.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
# pylint:disable=redefined-outer-name
18-
from typing import Optional
1918

2019
import pytest
2120

2221
from pyiceberg.catalog import Catalog
2322
from pyiceberg.exceptions import NoSuchTableError
24-
from pyiceberg.partitioning import PartitionField, PartitionSpec
23+
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec
2524
from pyiceberg.schema import Schema
2625
from pyiceberg.table import Table
2726
from pyiceberg.transforms import (
@@ -65,7 +64,7 @@ def _table_v2(catalog: Catalog) -> Table:
6564

6665

6766
def _create_table_with_schema(
68-
catalog: Catalog, schema: Schema, format_version: str, partition_spec: Optional[PartitionSpec] = None
67+
catalog: Catalog, schema: Schema, format_version: str, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC
6968
) -> Table:
7069
tbl_name = "default.test_schema_evolution"
7170
try:
@@ -77,7 +76,9 @@ def _create_table_with_schema(
7776
return catalog.create_table(
7877
identifier=tbl_name, schema=schema, partition_spec=partition_spec, properties={"format-version": format_version}
7978
)
80-
return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version})
79+
return catalog.create_table(
80+
identifier=tbl_name, schema=schema, partition_spec=partition_spec, properties={"format-version": format_version}
81+
)
8182

8283

8384
@pytest.mark.integration
@@ -590,11 +591,9 @@ def test_partition_schema_field_name_conflict(catalog: Catalog) -> None:
590591
with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: id"):
591592
table.update_spec().add_field("event_ts", DayTransform(), "id").commit()
592593

593-
with pytest.raises(
594-
ValueError, match="Cannot create identity partition from a different source field in the schema: another_ts"
595-
):
594+
with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: another_ts"):
596595
table.update_spec().add_field("event_ts", IdentityTransform(), "another_ts").commit()
597-
with pytest.raises(ValueError, match="Cannot create identity partition from a different source field in the schema: str"):
596+
with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: str"):
598597
table.update_spec().add_field("id", IdentityTransform(), "str").commit()
599598

600599
table.update_spec().add_field("id", IdentityTransform(), "id").commit()
@@ -640,18 +639,14 @@ def test_schema_evolution_partition_conflict(catalog: Catalog) -> None:
640639

641640
with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: event_year"):
642641
table.update_schema().add_column("event_year", StringType()).commit()
643-
with pytest.raises(
644-
ValueError, match="Cannot create identity partition from a different source field in the schema: first_name"
645-
):
642+
with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: first_name"):
646643
table.update_schema().add_column("first_name", StringType()).commit()
647644

648645
table.update_schema().add_column("other_field", StringType()).commit()
649646

650647
with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: event_year"):
651648
table.update_schema().rename_column("other_field", "event_year").commit()
652-
with pytest.raises(
653-
ValueError, match="Cannot create identity partition from a different source field in the schema: first_name"
654-
):
649+
with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: first_name"):
655650
table.update_schema().rename_column("other_field", "first_name").commit()
656651

657652
table.update_schema().rename_column("other_field", "valid_name").commit()

0 commit comments

Comments
 (0)