Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions pyiceberg/expressions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
)
from typing import Literal as TypingLiteral

from pydantic import Field
from pydantic import ConfigDict, Field

from pyiceberg.expressions.literals import (
AboveMax,
Expand Down Expand Up @@ -302,12 +302,19 @@ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
return (self.left, self.right)


class Or(BooleanExpression):
class Or(IcebergBaseModel, BooleanExpression):
"""OR operation expression - logical disjunction."""

model_config = ConfigDict(arbitrary_types_allowed=True)

type: TypingLiteral["or"] = Field(default="or", alias="type")
left: BooleanExpression
right: BooleanExpression

def __init__(self, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> None:
if isinstance(self, Or) and not hasattr(self, "left") and not hasattr(self, "right"):
super().__init__(left=left, right=right)

Comment on lines +314 to +317
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This weird __init__ is to make sure that if the class has been already initialized in __new__, it is not initialized again.
If this is not made, multiple things can happen:

  • If *rest is given, although _build_balanced_tree build an Or instance correctly, Python would call again Or initializer with the original parameters left and right resulting in Or(left, right), ignoring the created Or instance from _build_balanced_tree
  • If left or right is AlwaysTrue it will get overriden and the instance will be initialized as Or(left, right)
  • If left and right is AlwaysFalse it will get overriden and the instance will be initialized as Or(left, right)

I do not know why exactly but it gotta be something from Pydantic

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I spent several hours debugging this, and it is pretty weird. Pydantic seems not to work well with __new__. I came up with the following:

 class Or(IcebergBaseModel, BooleanExpression):
     """OR operation expression - logical disjunction."""
 
-    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_config = ConfigDict(arbitrary_types_allowed=True, frozen=False)
 
-    type: TypingLiteral["str"] = Field(default="or", alias="type")
-    left: BooleanExpression
-    right: BooleanExpression
+    type: TypingLiteral["or"] = Field(default="or")
+    left: BooleanExpression = Field()
+    right: BooleanExpression = Field()
 
     def __init__(self, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> None:
-        if isinstance(self, Or) and not hasattr(self, "left") and not hasattr(self, "right"):
-            super().__init__(left=left, right=right)
+        super().__init__(left=self.left, right=self.right)
 
     def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> BooleanExpression:  # type: ignore
         if rest:
@@ -326,20 +325,11 @@ class Or(IcebergBaseModel, BooleanExpression):
             return left
         else:
             obj = super().__new__(cls)
+            obj.__pydantic_fields_set__ = set()
+            obj.left = left
+            obj.right = right
             return obj

I see what direction you were going, but if we leave out the obj.left = left then we don't have the attributes and we have an empty Or object. The test is still failing, but I think that's because the UnaryPredicates are not yet serializable. If you update the tests to use LiteralPredicates, then it should work 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot test it now, but I am pretty sure that I left out obj.left = left because of how Python works with __new__. If you return a class of the same type that happens:

If new() is invoked during object construction and it returns an instance of cls, then the new instance’s init() method will be invoked like init(self[, ...]), where self is the new instance and the remaining arguments are the same as were passed to the object constructor.
Ref: https://docs.python.org/3.12/reference/datamodel.html#object.__new__

So that is why I purposely left the object uninitialized, so that when the raw obj is returned it is initialized by __init__.

If the object has already being initialized by _build_balanced_tree I just leave it as is.

Copy link
Contributor Author

@jaimeferj jaimeferj Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another thing, the tests are passing in my implementation, as far as I know, so you know that Or is being instantiated correctly, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Earlier today I had failing tests, but I was still at an older commit of the branch. Let me check again

def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> BooleanExpression: # type: ignore
if rest:
return _build_balanced_tree(Or, (left, right, *rest))
Expand All @@ -319,10 +326,12 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole
return left
else:
obj = super().__new__(cls)
obj.left = left
obj.right = right
return obj

def __str__(self) -> str:
"""Return the string representation of the Or class."""
return f"{str(self.__class__.__name__)}(left={repr(self.left)}, right={repr(self.right)})"

def __eq__(self, other: Any) -> bool:
"""Return the equality of two instances of the Or class."""
return self.left == other.left and self.right == other.right if isinstance(other, Or) else False
Expand Down
11 changes: 11 additions & 0 deletions tests/expressions/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,17 @@ def test_or() -> None:
null | "abc" # type: ignore


def test_or_serialization() -> None:
left = EqualTo("a", 10)
right = EqualTo("b", 20)
or_ = Or(left, right)

assert (
or_.model_dump_json()
== '{"type":"or","left":{"term":"a","type":"eq","value":10},"right":{"term":"b","type":"eq","value":20}}'
)


def test_not() -> None:
null = IsNull(Reference("a"))
not_ = Not(null)
Expand Down