From c961dc3150000c93e7d0e8c7f0c349357451611e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Fri, 3 Oct 2025 22:17:51 +0200 Subject: [PATCH 1/8] feat: make Or json serializable via IcebergBaseModel --- pyiceberg/expressions/__init__.py | 29 ++++++++++++++++++++++++--- tests/expressions/test_expressions.py | 4 ++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 16210bae68..2560bb00b4 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations +import typing from abc import ABC, abstractmethod from functools import cached_property from typing import ( @@ -35,6 +36,8 @@ from pydantic import Field +from pydantic import ConfigDict, Field, field_serializer + from pyiceberg.expressions.literals import ( AboveMax, BelowMin, @@ -302,12 +305,18 @@ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]: return (self.left, self.right) -class Or(BooleanExpression): +class Or(IcebergBaseModel, BooleanExpression): """OR operation expression - logical disjunction.""" + model_config = ConfigDict(arbitrary_types_allowed=True) + + type: str = Field(default="or", repr=False) left: BooleanExpression right: BooleanExpression + def __init__(self, left: typing.Union[BooleanExpression, Or], right: typing.Union[BooleanExpression, Or], *rest: Any): + return super().__init__(left=left, right=right) + def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> BooleanExpression: # type: ignore if rest: return _build_balanced_tree(Or, (left, right, *rest)) @@ -319,10 +328,24 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole return left else: obj = super().__new__(cls) - obj.left = left - obj.right = right return obj + @field_serializer("left") + def ser_left(self, left: BooleanExpression) -> str: + if isinstance(left, IcebergRootModel): + return left.root + return str(left) + + @field_serializer("right") + def ser_right(self, right: BooleanExpression) -> str: + if isinstance(right, IcebergRootModel): + return right.root + return str(right) + + def __str__(self) -> str: + """Return the string representation of the Or class.""" + return f"{str(self.__class__.__name__)}(left={repr(self.left)}, right={repr(self.right)})" + def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the Or class.""" return self.left == other.left and self.right == other.right if isinstance(other, Or) else False diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 63673fdaeb..0e8859c541 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -705,6 +705,10 @@ def test_or() -> None: # Some syntactic sugar assert or_ == null | nan + assert ( + or_.model_dump_json() + == '{"type":"or","left":"IsNull(term=Reference(name=\'a\'))","right":"IsNaN(term=Reference(name=\'b\'))"}' + ) assert str(or_) == f"Or(left={str(null)}, right={str(nan)})" assert repr(or_) == f"Or(left={repr(null)}, right={repr(nan)})" assert or_ == eval(repr(or_)) From 873c022dcc0279e967f608b1438302bb07085e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Fri, 3 Oct 2025 22:56:50 +0200 Subject: [PATCH 2/8] fix: invalid creation of Or instance with multiple BooleanExpressions --- pyiceberg/expressions/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 2560bb00b4..991f7d9cec 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -328,6 +328,7 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole return left else: obj = super().__new__(cls) + super(Or, obj).__init__(left=left, right=right) return obj @field_serializer("left") From 4f6a09e97cbad9f92ea9108548a53bd76140be4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Mon, 20 Oct 2025 00:18:11 +0200 Subject: [PATCH 3/8] feat: fix or instance creation for _build_balanced_tree --- pyiceberg/expressions/__init__.py | 11 ++++------- tests/expressions/test_visitors.py | 24 +++++++++++------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 991f7d9cec..cfc532a0e5 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -17,7 +17,6 @@ from __future__ import annotations -import typing from abc import ABC, abstractmethod from functools import cached_property from typing import ( @@ -34,8 +33,6 @@ ) from typing import Literal as TypingLiteral -from pydantic import Field - from pydantic import ConfigDict, Field, field_serializer from pyiceberg.expressions.literals import ( @@ -310,12 +307,13 @@ class Or(IcebergBaseModel, BooleanExpression): model_config = ConfigDict(arbitrary_types_allowed=True) - type: str = Field(default="or", repr=False) + type: TypingLiteral["str"] = Field(default="or", alias="type") left: BooleanExpression right: BooleanExpression - def __init__(self, left: typing.Union[BooleanExpression, Or], right: typing.Union[BooleanExpression, Or], *rest: Any): - return super().__init__(left=left, right=right) + def __init__(self, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> None: + if isinstance(self, Or) and not hasattr(self, "left") and not hasattr(self, "right"): + super().__init__(left=left, right=right) def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> BooleanExpression: # type: ignore if rest: @@ -328,7 +326,6 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole return left else: obj = super().__new__(cls) - super(Or, obj).__init__(left=left, right=right) return obj @field_serializer("left") diff --git a/tests/expressions/test_visitors.py b/tests/expressions/test_visitors.py index d0b6ab5ab4..fdf694881b 100644 --- a/tests/expressions/test_visitors.py +++ b/tests/expressions/test_visitors.py @@ -417,21 +417,19 @@ def test_and_expression_binding( ), {literal("bar"), literal("baz")}, ), - Or( - BoundIn( - BoundReference( - field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), - accessor=Accessor(position=0, inner=None), - ), - {literal("bar")}, + BoundIn( + BoundReference( + field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + accessor=Accessor(position=0, inner=None), ), - BoundIn( - BoundReference( - field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), - accessor=Accessor(position=0, inner=None), - ), - {literal("baz")}, + {literal("bar")}, + ), + BoundIn( + BoundReference( + field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + accessor=Accessor(position=0, inner=None), ), + {literal("baz")}, ), ), ), From 2638c8d18a711660b0e2d3c89a01b8c834b398a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Mon, 20 Oct 2025 00:27:08 +0200 Subject: [PATCH 4/8] revert: test_visitors --- tests/expressions/test_visitors.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/expressions/test_visitors.py b/tests/expressions/test_visitors.py index fdf694881b..d0b6ab5ab4 100644 --- a/tests/expressions/test_visitors.py +++ b/tests/expressions/test_visitors.py @@ -417,19 +417,21 @@ def test_and_expression_binding( ), {literal("bar"), literal("baz")}, ), - BoundIn( - BoundReference( - field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), - accessor=Accessor(position=0, inner=None), + Or( + BoundIn( + BoundReference( + field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + accessor=Accessor(position=0, inner=None), + ), + {literal("bar")}, ), - {literal("bar")}, - ), - BoundIn( - BoundReference( - field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), - accessor=Accessor(position=0, inner=None), + BoundIn( + BoundReference( + field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + accessor=Accessor(position=0, inner=None), + ), + {literal("baz")}, ), - {literal("baz")}, ), ), ), From 41975366af2901719083f920e6483c106ae92253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Wed, 22 Oct 2025 00:06:28 +0200 Subject: [PATCH 5/8] fix: typo in typingliteral --- pyiceberg/expressions/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index cfc532a0e5..17378287c0 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -307,7 +307,7 @@ class Or(IcebergBaseModel, BooleanExpression): model_config = ConfigDict(arbitrary_types_allowed=True) - type: TypingLiteral["str"] = Field(default="or", alias="type") + type: TypingLiteral["or"] = Field(default="or", alias="type") left: BooleanExpression right: BooleanExpression From dc7a2b4d99a3526f5dcd3a6008541181067dae84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Wed, 22 Oct 2025 00:19:52 +0200 Subject: [PATCH 6/8] feat: add test for Or serialization alone --- tests/expressions/test_expressions.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 0e8859c541..2a57fb6693 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -705,10 +705,6 @@ def test_or() -> None: # Some syntactic sugar assert or_ == null | nan - assert ( - or_.model_dump_json() - == '{"type":"or","left":"IsNull(term=Reference(name=\'a\'))","right":"IsNaN(term=Reference(name=\'b\'))"}' - ) assert str(or_) == f"Or(left={str(null)}, right={str(nan)})" assert repr(or_) == f"Or(left={repr(null)}, right={repr(nan)})" assert or_ == eval(repr(or_)) @@ -718,6 +714,17 @@ def test_or() -> None: null | "abc" # type: ignore +def test_or_serialization() -> None: + left = EqualTo("a", 10) + right = EqualTo("b", 20) + or_ = Or(left, right) + + assert ( + or_.model_dump_json() + == '{"type":"or","left":{"term":"a","type":"eq","value":10},"right":{"term":"b","type":"eq","value":20}}' + ) + + def test_not() -> None: null = IsNull(Reference("a")) not_ = Not(null) From 2a55dfb9bf5916cb39b13f17e135512a4f1ba3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Wed, 22 Oct 2025 00:20:08 +0200 Subject: [PATCH 7/8] fix: test Or serialization --- pyiceberg/expressions/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 17378287c0..f3b599d60a 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -329,15 +329,19 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole return obj @field_serializer("left") - def ser_left(self, left: BooleanExpression) -> str: + def ser_left(self, left: BooleanExpression) -> Any: if isinstance(left, IcebergRootModel): return left.root + if isinstance(left, IcebergBaseModel): + return left.model_dump() return str(left) @field_serializer("right") - def ser_right(self, right: BooleanExpression) -> str: + def ser_right(self, right: BooleanExpression) -> Any: if isinstance(right, IcebergRootModel): return right.root + if isinstance(right, IcebergBaseModel): + return right.model_dump() return str(right) def __str__(self) -> str: From 1b18573edf61b3e938f38572d0cc6ea287a846cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Wed, 22 Oct 2025 00:22:40 +0200 Subject: [PATCH 8/8] fix: remove unnecessary serializer --- pyiceberg/expressions/__init__.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index f3b599d60a..339fd9cd6e 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -33,7 +33,7 @@ ) from typing import Literal as TypingLiteral -from pydantic import ConfigDict, Field, field_serializer +from pydantic import ConfigDict, Field from pyiceberg.expressions.literals import ( AboveMax, @@ -328,22 +328,6 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole obj = super().__new__(cls) return obj - @field_serializer("left") - def ser_left(self, left: BooleanExpression) -> Any: - if isinstance(left, IcebergRootModel): - return left.root - if isinstance(left, IcebergBaseModel): - return left.model_dump() - return str(left) - - @field_serializer("right") - def ser_right(self, right: BooleanExpression) -> Any: - if isinstance(right, IcebergRootModel): - return right.root - if isinstance(right, IcebergBaseModel): - return right.model_dump() - return str(right) - def __str__(self) -> str: """Return the string representation of the Or class.""" return f"{str(self.__class__.__name__)}(left={repr(self.left)}, right={repr(self.right)})"