Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions pyiceberg/table/update/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,7 @@
# under the License.
from __future__ import annotations

from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Optional,
Set,
Tuple,
)
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union

from pyiceberg.expressions import (
Reference,
Expand All @@ -47,7 +39,7 @@
UpdatesAndRequirements,
UpdateTableMetadata,
)
from pyiceberg.transforms import IdentityTransform, TimeTransform, Transform, VoidTransform
from pyiceberg.transforms import IdentityTransform, TimeTransform, Transform, VoidTransform, parse_transform

if TYPE_CHECKING:
from pyiceberg.table import Transaction
Expand Down Expand Up @@ -85,11 +77,13 @@ def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> Non
def add_field(
self,
source_column_name: str,
transform: Transform[Any, Any],
transform: Union[str, Transform[Any, Any]],
partition_field_name: Optional[str] = None,
) -> UpdateSpec:
ref = Reference(source_column_name)
bound_ref = ref.bind(self._transaction.table_metadata.schema(), self._case_sensitive)
if isinstance(transform, str):
transform = parse_transform(transform)
# verify transform can actually bind it
output_type = bound_ref.field.field_type
if not transform.can_transform(output_type):
Expand Down
46 changes: 23 additions & 23 deletions pyiceberg/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,29 +111,6 @@ def _transform_literal(func: Callable[[L], L], lit: Literal[L]) -> Literal[L]:
return literal(func(lit.value))


def parse_transform(v: Any) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this intentional? seems like the function got moved lower

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinjqliu yes this is intentional.

I had to re-position the parse_transform method to change the return type of the method from Any to Transform[Any, Any] because mypy wasn't happy after we made Transform type as Union[str, Transform] in add_field. Without moving it down, Transform wasn't being recognized in the code.

if isinstance(v, str):
if v == IDENTITY:
return IdentityTransform()
elif v == VOID:
return VoidTransform()
elif v.startswith(BUCKET):
return BucketTransform(num_buckets=BUCKET_PARSER.match(v))
elif v.startswith(TRUNCATE):
return TruncateTransform(width=TRUNCATE_PARSER.match(v))
elif v == YEAR:
return YearTransform()
elif v == MONTH:
return MonthTransform()
elif v == DAY:
return DayTransform()
elif v == HOUR:
return HourTransform()
else:
return UnknownTransform(transform=v)
return v


class Transform(IcebergRootModel[str], ABC, Generic[S, T]):
"""Transform base class for concrete transforms.

Expand Down Expand Up @@ -220,6 +197,29 @@ def _transform(array: "ArrayLike") -> "ArrayLike":
return _transform


def parse_transform(v: Any) -> Transform[Any, Any]:
if isinstance(v, str):
if v == IDENTITY:
return IdentityTransform()
elif v == VOID:
return VoidTransform()
elif v.startswith(BUCKET):
return BucketTransform(num_buckets=BUCKET_PARSER.match(v))
elif v.startswith(TRUNCATE):
return TruncateTransform(width=TRUNCATE_PARSER.match(v))
elif v == YEAR:
return YearTransform()
elif v == MONTH:
return MonthTransform()
elif v == DAY:
return DayTransform()
elif v == HOUR:
return HourTransform()
else:
return UnknownTransform(transform=v)
return v


class BucketTransform(Transform[S, int]):
"""Base Transform class to transform a value into a bucket partition value.

Expand Down
8 changes: 8 additions & 0 deletions tests/integration/test_partition_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,14 @@ def test_add_hour(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "hour_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_hour_string_transform(catalog: Catalog) -> None:
table = _table(catalog)
table.update_spec().add_field("event_ts", "hour", "str_hour_transform").commit()
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "str_hour_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_hour_generates_default_name(catalog: Catalog) -> None:
Expand Down