From 16b261d466d44ff86b88f9a47b67baeb58cfe602 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 10 Oct 2025 10:59:19 +0530 Subject: [PATCH 1/5] Make UnaryPredicate JSON serializable and add unit test --- pyiceberg/expressions/__init__.py | 25 +++++++++++++++++++++++-- tests/expressions/test_expressions.py | 14 ++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 2adf898fea..bd6903f6be 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -39,7 +39,7 @@ literal, ) from pyiceberg.schema import Accessor, Schema -from pyiceberg.typedef import L, StructProtocol +from pyiceberg.typedef import IcebergBaseModel, L, StructProtocol from pyiceberg.types import DoubleType, FloatType, NestedField from pyiceberg.utils.singleton import Singleton @@ -429,7 +429,20 @@ def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression def as_bound(self) -> Type[BoundPredicate[L]]: ... -class UnaryPredicate(UnboundPredicate[Any], ABC): +class UnaryPredicate(UnboundPredicate[Any], IcebergBaseModel, ABC): + type: str + column: str + + def __init__(self, term: Union[str, UnboundTerm[Any]]): + if isinstance(term, Reference): + term_name = term.name + elif isinstance(term, str): + term_name = term + else: + raise ValueError("term must be a string or Reference") + super().__init__(term=Reference(term_name)) + self.column = term_name + def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundUnaryPredicate[Any]: bound_term = self.term.bind(schema, case_sensitive) return self.as_bound(bound_term) @@ -488,6 +501,8 @@ def as_unbound(self) -> Type[NotNull]: class IsNull(UnaryPredicate): + type: str = "is-null" + def __invert__(self) -> NotNull: """Transform the Expression into its negated version.""" return NotNull(self.term) @@ -498,6 +513,8 @@ def as_bound(self) -> Type[BoundIsNull[L]]: class NotNull(UnaryPredicate): + type: str = "not-null" + def __invert__(self) -> IsNull: """Transform the Expression into its negated version.""" return IsNull(self.term) @@ -540,6 +557,8 @@ def as_unbound(self) -> Type[NotNaN]: class IsNaN(UnaryPredicate): + type: str = "is-nan" + def __invert__(self) -> NotNaN: """Transform the Expression into its negated version.""" return NotNaN(self.term) @@ -550,6 +569,8 @@ def as_bound(self) -> Type[BoundIsNaN[L]]: class NotNaN(UnaryPredicate): + type: str = "not-nan" + def __invert__(self) -> IsNaN: """Transform the Expression into its negated version.""" return IsNaN(self.term) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 828d32704a..9b9ccb4694 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -707,7 +707,7 @@ def test_and() -> None: assert and_ == pickle.loads(pickle.dumps(and_)) with pytest.raises(ValueError, match="Expected BooleanExpression, got: abc"): - null & "abc" # type: ignore + null & "abc" def test_or() -> None: @@ -724,7 +724,7 @@ def test_or() -> None: assert or_ == pickle.loads(pickle.dumps(or_)) with pytest.raises(ValueError, match="Expected BooleanExpression, got: abc"): - null | "abc" # type: ignore + null | "abc" def test_not() -> None: @@ -791,6 +791,16 @@ def test_not_null() -> None: assert non_null == pickle.loads(pickle.dumps(non_null)) +def test_serialize_is_null() -> None: + pred = IsNull(term="foo") + assert pred.model_dump_json() == '{"type":"is-null","term":"foo"}' + + +def test_serialize_not_null() -> None: + pred = NotNull(term="foo") + assert pred.model_dump_json() == '{"type":"not-null","term":"foo"}' + + def test_bound_is_nan(accessor: Accessor) -> None: # We need a FloatType here term = BoundReference[float]( From 9d333ee5bb976a70a00aed802698985b733d1cf9 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 18 Oct 2025 13:57:37 +0530 Subject: [PATCH 2/5] Make UnaryPredicate JSON serializable and add unit test --- pyiceberg/expressions/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index bd6903f6be..fe8d8eabd3 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -429,10 +429,12 @@ def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression def as_bound(self) -> Type[BoundPredicate[L]]: ... -class UnaryPredicate(UnboundPredicate[Any], IcebergBaseModel, ABC): +class UnaryPredicate(IcebergBaseModel, UnboundPredicate[Any], ABC): type: str column: str + model_config = {"arbitrary_types_allowed": True} + def __init__(self, term: Union[str, UnboundTerm[Any]]): if isinstance(term, Reference): term_name = term.name From 9c9b3c4cf1c4b5f713d542d908761e5d42399cca Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Mon, 20 Oct 2025 14:20:09 +0530 Subject: [PATCH 3/5] Make UnaryPredicate JSON serializable and add unit test --- pyiceberg/expressions/__init__.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index fe8d8eabd3..671421724c 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -431,19 +431,12 @@ def as_bound(self) -> Type[BoundPredicate[L]]: ... class UnaryPredicate(IcebergBaseModel, UnboundPredicate[Any], ABC): type: str - column: str model_config = {"arbitrary_types_allowed": True} def __init__(self, term: Union[str, UnboundTerm[Any]]): - if isinstance(term, Reference): - term_name = term.name - elif isinstance(term, str): - term_name = term - else: - raise ValueError("term must be a string or Reference") - super().__init__(term=Reference(term_name)) - self.column = term_name + unbound = _to_unbound_term(term) + super().__init__(term=unbound) def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundUnaryPredicate[Any]: bound_term = self.term.bind(schema, case_sensitive) From e04716769ad09aa146b97270fffcac11ab0badfe Mon Sep 17 00:00:00 2001 From: Aniket <148300120+Aniketsy@users.noreply.github.com> Date: Mon, 20 Oct 2025 19:44:55 +0530 Subject: [PATCH 4/5] Update pyiceberg/expressions/__init__.py Co-authored-by: Fokko Driesprong --- pyiceberg/expressions/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index a3514f8e7a..4b173bcbc2 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -456,6 +456,11 @@ def __init__(self, term: Union[str, UnboundTerm[Any]]): unbound = _to_unbound_term(term) super().__init__(term=unbound) + def __str__(self) -> str: + """Return the string representation of the UnaryPredicate class.""" + # Sort to make it deterministic + return f"{str(self.__class__.__name__)}(term={str(self.term)})" + def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundUnaryPredicate[Any]: bound_term = self.term.bind(schema, case_sensitive) return self.as_bound(bound_term) From 0d7f25b5da326dd561b5d253a3a56054726f6768 Mon Sep 17 00:00:00 2001 From: Aniket <148300120+Aniketsy@users.noreply.github.com> Date: Wed, 22 Oct 2025 00:00:25 +0530 Subject: [PATCH 5/5] Update tests/expressions/test_expressions.py Co-authored-by: Fokko Driesprong --- tests/expressions/test_expressions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 48ad50d129..bfed8ec2b0 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -782,12 +782,12 @@ def test_not_null() -> None: def test_serialize_is_null() -> None: pred = IsNull(term="foo") - assert pred.model_dump_json() == '{"type":"is-null","term":"foo"}' + assert pred.model_dump_json() == '{"term":"foo","type":"is-null"}' def test_serialize_not_null() -> None: pred = NotNull(term="foo") - assert pred.model_dump_json() == '{"type":"not-null","term":"foo"}' + assert pred.model_dump_json() == '{"term":"foo","type":"not-null"}' def test_bound_is_nan(accessor: Accessor) -> None: