From 6d2e99bd1266cdfc62888a8e806a76b0119e5bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Fri, 3 Oct 2025 01:30:18 +0200 Subject: [PATCH 1/9] feat: make LiteralPredicate serializable via internal IcebergBaseModel --- pyiceberg/expressions/__init__.py | 35 +++++++++++++++++++++++++++ tests/expressions/test_expressions.py | 18 ++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index d0824cc315..6834d2d80d 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -35,6 +35,8 @@ from pydantic import Field +from pydantic import Field + from pyiceberg.expressions.literals import ( AboveMax, BelowMin, @@ -750,6 +752,39 @@ def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal super().__init__(term) self.literal = _to_literal(literal) # pylint: disable=W0621 + # ---- JSON (Pydantic) serialization helpers ---- + + class _LiteralPredicateModel(IcebergBaseModel): + type: str = Field(alias="type") + term: str + value: Any + + def _json_op(self) -> str: + mapping = { + EqualTo: "eq", + NotEqualTo: "not-eq", + LessThan: "lt", + LessThanOrEqual: "lt-eq", + GreaterThan: "gt", + GreaterThanOrEqual: "gt-eq", + StartsWith: "starts-with", + NotStartsWith: "not-starts-with", + } + for cls, op in mapping.items(): + if isinstance(self, cls): + return op + raise ValueError(f"Unknown LiteralPredicate: {type(self).__name__}") + + def model_dump(self, **kwargs: Any) -> dict: + term_name = getattr(self.term, "name", str(self.term)) + return self._LiteralPredicateModel(type=self._json_op(), term=term_name, value=self.literal.value).model_dump(**kwargs) + + def model_dump_json(self, **kwargs: Any) -> str: + term_name = getattr(self.term, "name", str(self.term)) + return self._LiteralPredicateModel(type=self._json_op(), term=term_name, value=self.literal.value).model_dump_json( + **kwargs + ) + def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundLiteralPredicate[L]: bound_term = self.term.bind(schema, case_sensitive) lit = self.literal.to(bound_term.ref().field.field_type) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 115cc4025f..0fbd31b22b 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -55,8 +55,10 @@ NotIn, NotNaN, NotNull, + NotStartsWith, Or, Reference, + StartsWith, UnboundPredicate, ) from pyiceberg.expressions.literals import Literal, literal @@ -915,6 +917,7 @@ def test_bound_less_than_or_equal(term: BoundReference[Any]) -> None: def test_equal_to() -> None: equal_to = EqualTo(Reference("a"), literal("a")) + assert equal_to.model_dump_json() == '{"type":"eq","term":"a","value":"a"}' assert str(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))" assert repr(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))" assert equal_to == eval(repr(equal_to)) @@ -923,6 +926,7 @@ def test_equal_to() -> None: def test_not_equal_to() -> None: not_equal_to = NotEqualTo(Reference("a"), literal("a")) + assert not_equal_to.model_dump_json() == '{"type":"not-eq","term":"a","value":"a"}' assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))" assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))" assert not_equal_to == eval(repr(not_equal_to)) @@ -931,6 +935,7 @@ def test_not_equal_to() -> None: def test_greater_than_or_equal_to() -> None: greater_than_or_equal_to = GreaterThanOrEqual(Reference("a"), literal("a")) + assert greater_than_or_equal_to.model_dump_json() == '{"type":"gt-eq","term":"a","value":"a"}' assert str(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert repr(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to)) @@ -939,6 +944,7 @@ def test_greater_than_or_equal_to() -> None: def test_greater_than() -> None: greater_than = GreaterThan(Reference("a"), literal("a")) + assert greater_than.model_dump_json() == '{"type":"gt","term":"a","value":"a"}' assert str(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))" assert repr(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))" assert greater_than == eval(repr(greater_than)) @@ -947,14 +953,26 @@ def test_greater_than() -> None: def test_less_than() -> None: less_than = LessThan(Reference("a"), literal("a")) + assert less_than.model_dump_json() == '{"type":"lt","term":"a","value":"a"}' assert str(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))" assert repr(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))" assert less_than == eval(repr(less_than)) assert less_than == pickle.loads(pickle.dumps(less_than)) +def test_starts_with() -> None: + starts_with = StartsWith(Reference("a"), literal("a")) + assert starts_with.model_dump_json() == '{"type":"starts-with","term":"a","value":"a"}' + + +def test_not_starts_with() -> None: + not_starts_with = NotStartsWith(Reference("a"), literal("a")) + assert not_starts_with.model_dump_json() == '{"type":"not-starts-with","term":"a","value":"a"}' + + def test_less_than_or_equal() -> None: less_than_or_equal = LessThanOrEqual(Reference("a"), literal("a")) + assert less_than_or_equal.model_dump_json() == '{"type":"lt-eq","term":"a","value":"a"}' assert str(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert repr(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert less_than_or_equal == eval(repr(less_than_or_equal)) From 07014e530a1ecc555ad6440eded354ce913043df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Fri, 3 Oct 2025 18:55:15 +0200 Subject: [PATCH 2/9] feat: subclass LiteralPredicate instead of using internal class --- pyiceberg/expressions/__init__.py | 90 ++++++++++++++++----------- tests/expressions/test_expressions.py | 32 +++++----- 2 files changed, 68 insertions(+), 54 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 6834d2d80d..02c102df4c 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -17,11 +17,13 @@ from __future__ import annotations +import typing from abc import ABC, abstractmethod from functools import cached_property from typing import ( Any, Callable, + ClassVar, Generic, Iterable, Sequence, @@ -35,7 +37,7 @@ from pydantic import Field -from pydantic import Field +from pydantic import ConfigDict, Field, field_serializer, field_validator from pyiceberg.expressions.literals import ( AboveMax, @@ -745,45 +747,37 @@ def as_bound(self) -> Type[BoundNotIn[L]]: return BoundNotIn[L] -class LiteralPredicate(UnboundPredicate[L], ABC): - literal: Literal[L] +class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC): + op: str = Field( + default="", + alias="type", + validation_alias="type", + serialization_alias="type", + repr=False, + ) + term: Term[L] + literal: Literal[L] = Field(serialization_alias="value") + + __op__: ClassVar[str] = "" + + model_config = ConfigDict(arbitrary_types_allowed=True) def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]): # pylint: disable=W0621 - super().__init__(term) - self.literal = _to_literal(literal) # pylint: disable=W0621 - - # ---- JSON (Pydantic) serialization helpers ---- - - class _LiteralPredicateModel(IcebergBaseModel): - type: str = Field(alias="type") - term: str - value: Any - - def _json_op(self) -> str: - mapping = { - EqualTo: "eq", - NotEqualTo: "not-eq", - LessThan: "lt", - LessThanOrEqual: "lt-eq", - GreaterThan: "gt", - GreaterThanOrEqual: "gt-eq", - StartsWith: "starts-with", - NotStartsWith: "not-starts-with", - } - for cls, op in mapping.items(): - if isinstance(self, cls): - return op - raise ValueError(f"Unknown LiteralPredicate: {type(self).__name__}") - - def model_dump(self, **kwargs: Any) -> dict: - term_name = getattr(self.term, "name", str(self.term)) - return self._LiteralPredicateModel(type=self._json_op(), term=term_name, value=self.literal.value).model_dump(**kwargs) - - def model_dump_json(self, **kwargs: Any) -> str: - term_name = getattr(self.term, "name", str(self.term)) - return self._LiteralPredicateModel(type=self._json_op(), term=term_name, value=self.literal.value).model_dump_json( - **kwargs - ) + super().__init__(term=_to_unbound_term(term), literal=_to_literal(literal)) + + def model_post_init(self, __context: Any) -> None: + if not self.op: + object.__setattr__(self, "op", self.__op__) + elif self.op != self.__op__: + raise ValueError(f"Invalid type {self.op!r}; expected {self.__op__!r}") + + @field_serializer("term") + def ser_term(self, v: Term[L]) -> str: + return v.name + + @field_serializer("literal") + def ser_literal(self, literal: Literal[L]) -> str: + return "Any" def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundLiteralPredicate[L]: bound_term = self.term.bind(schema, case_sensitive) @@ -808,6 +802,10 @@ def __eq__(self, other: Any) -> bool: return self.term == other.term and self.literal == other.literal return False + def __str__(self) -> str: + """Return the string representation of the LiteralPredicate class.""" + return f"{str(self.__class__.__name__)}(term={repr(self.term)}, literal={repr(self.literal)})" + def __repr__(self) -> str: """Return the string representation of the LiteralPredicate class.""" return f"{str(self.__class__.__name__)}(term={repr(self.term)}, literal={repr(self.literal)})" @@ -921,6 +919,8 @@ def as_unbound(self) -> Type[NotStartsWith[L]]: class EqualTo(LiteralPredicate[L]): + __op__ = "eq" + def __invert__(self) -> NotEqualTo[L]: """Transform the Expression into its negated version.""" return NotEqualTo[L](self.term, self.literal) @@ -931,6 +931,8 @@ def as_bound(self) -> Type[BoundEqualTo[L]]: class NotEqualTo(LiteralPredicate[L]): + __op__ = "not-eq" + def __invert__(self) -> EqualTo[L]: """Transform the Expression into its negated version.""" return EqualTo[L](self.term, self.literal) @@ -941,6 +943,8 @@ def as_bound(self) -> Type[BoundNotEqualTo[L]]: class LessThan(LiteralPredicate[L]): + __op__ = "lt" + def __invert__(self) -> GreaterThanOrEqual[L]: """Transform the Expression into its negated version.""" return GreaterThanOrEqual[L](self.term, self.literal) @@ -951,6 +955,8 @@ def as_bound(self) -> Type[BoundLessThan[L]]: class GreaterThanOrEqual(LiteralPredicate[L]): + __op__ = "gt-eq" + def __invert__(self) -> LessThan[L]: """Transform the Expression into its negated version.""" return LessThan[L](self.term, self.literal) @@ -961,6 +967,8 @@ def as_bound(self) -> Type[BoundGreaterThanOrEqual[L]]: class GreaterThan(LiteralPredicate[L]): + __op__ = "gt" + def __invert__(self) -> LessThanOrEqual[L]: """Transform the Expression into its negated version.""" return LessThanOrEqual[L](self.term, self.literal) @@ -971,6 +979,8 @@ def as_bound(self) -> Type[BoundGreaterThan[L]]: class LessThanOrEqual(LiteralPredicate[L]): + __op__ = "lt-eq" + def __invert__(self) -> GreaterThan[L]: """Transform the Expression into its negated version.""" return GreaterThan[L](self.term, self.literal) @@ -981,6 +991,8 @@ def as_bound(self) -> Type[BoundLessThanOrEqual[L]]: class StartsWith(LiteralPredicate[L]): + __op__ = "starts-with" + def __invert__(self) -> NotStartsWith[L]: """Transform the Expression into its negated version.""" return NotStartsWith[L](self.term, self.literal) @@ -991,6 +1003,8 @@ def as_bound(self) -> Type[BoundStartsWith[L]]: class NotStartsWith(LiteralPredicate[L]): + __op__ = "not-starts-with" + def __invert__(self) -> StartsWith[L]: """Transform the Expression into its negated version.""" return StartsWith[L](self.term, self.literal) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 0fbd31b22b..b66f665b05 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -917,7 +917,7 @@ def test_bound_less_than_or_equal(term: BoundReference[Any]) -> None: def test_equal_to() -> None: equal_to = EqualTo(Reference("a"), literal("a")) - assert equal_to.model_dump_json() == '{"type":"eq","term":"a","value":"a"}' + assert equal_to.model_dump_json() == '{"term":"a","type":"eq","value":"Any"}' assert str(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))" assert repr(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))" assert equal_to == eval(repr(equal_to)) @@ -926,7 +926,7 @@ def test_equal_to() -> None: def test_not_equal_to() -> None: not_equal_to = NotEqualTo(Reference("a"), literal("a")) - assert not_equal_to.model_dump_json() == '{"type":"not-eq","term":"a","value":"a"}' + assert not_equal_to.model_dump_json() == '{"term":"a","type":"not-eq","value":"Any"}' assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))" assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))" assert not_equal_to == eval(repr(not_equal_to)) @@ -935,7 +935,7 @@ def test_not_equal_to() -> None: def test_greater_than_or_equal_to() -> None: greater_than_or_equal_to = GreaterThanOrEqual(Reference("a"), literal("a")) - assert greater_than_or_equal_to.model_dump_json() == '{"type":"gt-eq","term":"a","value":"a"}' + assert greater_than_or_equal_to.model_dump_json() == '{"term":"a","type":"gt-eq","value":"Any"}' assert str(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert repr(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to)) @@ -944,7 +944,7 @@ def test_greater_than_or_equal_to() -> None: def test_greater_than() -> None: greater_than = GreaterThan(Reference("a"), literal("a")) - assert greater_than.model_dump_json() == '{"type":"gt","term":"a","value":"a"}' + assert greater_than.model_dump_json() == '{"term":"a","type":"gt","value":"Any"}' assert str(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))" assert repr(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))" assert greater_than == eval(repr(greater_than)) @@ -953,32 +953,32 @@ def test_greater_than() -> None: def test_less_than() -> None: less_than = LessThan(Reference("a"), literal("a")) - assert less_than.model_dump_json() == '{"type":"lt","term":"a","value":"a"}' + assert less_than.model_dump_json() == '{"term":"a","type":"lt","value":"Any"}' assert str(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))" assert repr(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))" assert less_than == eval(repr(less_than)) assert less_than == pickle.loads(pickle.dumps(less_than)) -def test_starts_with() -> None: - starts_with = StartsWith(Reference("a"), literal("a")) - assert starts_with.model_dump_json() == '{"type":"starts-with","term":"a","value":"a"}' - - -def test_not_starts_with() -> None: - not_starts_with = NotStartsWith(Reference("a"), literal("a")) - assert not_starts_with.model_dump_json() == '{"type":"not-starts-with","term":"a","value":"a"}' - - def test_less_than_or_equal() -> None: less_than_or_equal = LessThanOrEqual(Reference("a"), literal("a")) - assert less_than_or_equal.model_dump_json() == '{"type":"lt-eq","term":"a","value":"a"}' + assert less_than_or_equal.model_dump_json() == '{"term":"a","type":"lt-eq","value":"Any"}' assert str(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert repr(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert less_than_or_equal == eval(repr(less_than_or_equal)) assert less_than_or_equal == pickle.loads(pickle.dumps(less_than_or_equal)) +def test_starts_with() -> None: + starts_with = StartsWith(Reference("a"), literal("a")) + assert starts_with.model_dump_json() == '{"term":"a","type":"starts-with","value":"Any"}' + + +def test_not_starts_with() -> None: + not_starts_with = NotStartsWith(Reference("a"), literal("a")) + assert not_starts_with.model_dump_json() == '{"term":"a","type":"not-starts-with","value":"Any"}' + + def test_bound_reference_eval(table_schema_simple: Schema) -> None: """Test creating a BoundReference and evaluating it on a StructProtocol""" struct = Record("foovalue", 123, True) From f985dc29013409d31ae0b612d03f45f7e9e5ffed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Fri, 10 Oct 2025 19:43:30 +0200 Subject: [PATCH 3/9] fix: use type in main class only and remove __op__ --- pyiceberg/expressions/__init__.py | 38 ++++++++----------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 02c102df4c..923e462b45 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -17,13 +17,11 @@ from __future__ import annotations -import typing from abc import ABC, abstractmethod from functools import cached_property from typing import ( Any, Callable, - ClassVar, Generic, Iterable, Sequence, @@ -35,9 +33,7 @@ ) from typing import Literal as TypingLiteral -from pydantic import Field - -from pydantic import ConfigDict, Field, field_serializer, field_validator +from pydantic import ConfigDict, Field, field_serializer from pyiceberg.expressions.literals import ( AboveMax, @@ -748,29 +744,15 @@ def as_bound(self) -> Type[BoundNotIn[L]]: class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC): - op: str = Field( - default="", - alias="type", - validation_alias="type", - serialization_alias="type", - repr=False, - ) + type: TypingLiteral["lt-eq", "gt", "gt-eq", "eq", "not-eq", "starts-with", "not-starts-with"] = Field(alias="type") term: Term[L] literal: Literal[L] = Field(serialization_alias="value") - __op__: ClassVar[str] = "" - model_config = ConfigDict(arbitrary_types_allowed=True) def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]): # pylint: disable=W0621 super().__init__(term=_to_unbound_term(term), literal=_to_literal(literal)) - def model_post_init(self, __context: Any) -> None: - if not self.op: - object.__setattr__(self, "op", self.__op__) - elif self.op != self.__op__: - raise ValueError(f"Invalid type {self.op!r}; expected {self.__op__!r}") - @field_serializer("term") def ser_term(self, v: Term[L]) -> str: return v.name @@ -919,7 +901,7 @@ def as_unbound(self) -> Type[NotStartsWith[L]]: class EqualTo(LiteralPredicate[L]): - __op__ = "eq" + type: TypingLiteral["eq"] = Field(default="eq", alias="type") def __invert__(self) -> NotEqualTo[L]: """Transform the Expression into its negated version.""" @@ -931,7 +913,7 @@ def as_bound(self) -> Type[BoundEqualTo[L]]: class NotEqualTo(LiteralPredicate[L]): - __op__ = "not-eq" + type: TypingLiteral["not-eq"] = Field(default="not-eq", alias="type") def __invert__(self) -> EqualTo[L]: """Transform the Expression into its negated version.""" @@ -943,7 +925,7 @@ def as_bound(self) -> Type[BoundNotEqualTo[L]]: class LessThan(LiteralPredicate[L]): - __op__ = "lt" + type: TypingLiteral["lt"] = Field(default="lt", alias="type") def __invert__(self) -> GreaterThanOrEqual[L]: """Transform the Expression into its negated version.""" @@ -955,7 +937,7 @@ def as_bound(self) -> Type[BoundLessThan[L]]: class GreaterThanOrEqual(LiteralPredicate[L]): - __op__ = "gt-eq" + type: TypingLiteral["gt-eq"] = Field(default="gt-eq", alias="type") def __invert__(self) -> LessThan[L]: """Transform the Expression into its negated version.""" @@ -967,7 +949,7 @@ def as_bound(self) -> Type[BoundGreaterThanOrEqual[L]]: class GreaterThan(LiteralPredicate[L]): - __op__ = "gt" + type: TypingLiteral["gt"] = Field(default="gt", alias="type") def __invert__(self) -> LessThanOrEqual[L]: """Transform the Expression into its negated version.""" @@ -979,7 +961,7 @@ def as_bound(self) -> Type[BoundGreaterThan[L]]: class LessThanOrEqual(LiteralPredicate[L]): - __op__ = "lt-eq" + type: TypingLiteral["lt-eq"] = Field(default="lt-eq", alias="type") def __invert__(self) -> GreaterThan[L]: """Transform the Expression into its negated version.""" @@ -991,7 +973,7 @@ def as_bound(self) -> Type[BoundLessThanOrEqual[L]]: class StartsWith(LiteralPredicate[L]): - __op__ = "starts-with" + type: TypingLiteral["starts-with"] = Field(default="starts-with", alias="type") def __invert__(self) -> NotStartsWith[L]: """Transform the Expression into its negated version.""" @@ -1003,7 +985,7 @@ def as_bound(self) -> Type[BoundStartsWith[L]]: class NotStartsWith(LiteralPredicate[L]): - __op__ = "not-starts-with" + type: TypingLiteral["not-starts-with"] = Field(default="not-starts-with", alias="type") def __invert__(self) -> StartsWith[L]: """Transform the Expression into its negated version.""" From 2092a0b0dc718ec043ad9544da0fde4f8c194ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Sat, 11 Oct 2025 00:32:54 +0200 Subject: [PATCH 4/9] fix adding lt literal and allow boundreference in _to_unbound_term --- pyiceberg/expressions/__init__.py | 38 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 923e462b45..883a042d54 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -33,7 +33,7 @@ ) from typing import Literal as TypingLiteral -from pydantic import ConfigDict, Field, field_serializer +from pydantic import ConfigDict, Field, field_serializer, field_validator from pyiceberg.expressions.literals import ( AboveMax, @@ -52,8 +52,14 @@ ConfigDict = dict -def _to_unbound_term(term: Union[str, UnboundTerm[Any]]) -> UnboundTerm[Any]: - return Reference(term) if isinstance(term, str) else term +def _to_unbound_term(term: Union[str, UnboundTerm[Any], BoundReference[Any]]) -> UnboundTerm[Any]: + if isinstance(term, str): + return Reference(term) + if isinstance(term, UnboundTerm): + return term + if isinstance(term, BoundReference): + return Reference(term.field.name) + raise ValueError(f"Expected UnboundTerm | BoundReference | str, got {type(term).__name__}") def _to_literal_set(values: Union[Iterable[L], Iterable[Literal[L]]]) -> Set[Literal[L]]: @@ -744,18 +750,28 @@ def as_bound(self) -> Type[BoundNotIn[L]]: class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC): - type: TypingLiteral["lt-eq", "gt", "gt-eq", "eq", "not-eq", "starts-with", "not-starts-with"] = Field(alias="type") - term: Term[L] + type: TypingLiteral["lt", "lt-eq", "gt", "gt-eq", "eq", "not-eq", "starts-with", "not-starts-with"] = Field(alias="type") + term: UnboundTerm[L] literal: Literal[L] = Field(serialization_alias="value") model_config = ConfigDict(arbitrary_types_allowed=True) - def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]): # pylint: disable=W0621 - super().__init__(term=_to_unbound_term(term), literal=_to_literal(literal)) - - @field_serializer("term") - def ser_term(self, v: Term[L]) -> str: - return v.name + def __init__(self, *args: Any, **kwargs: Any) -> None: + if args: + if len(args) != 2: + raise TypeError("Expected (term, literal)") + kwargs = {"term": args[0], "literal": args[1], **kwargs} + super().__init__(**kwargs) + + @field_validator("term", mode="before") + @classmethod + def _coerce_term(cls, v: Any) -> UnboundTerm[Any]: + return _to_unbound_term(v) + + @field_validator("literal", mode="before") + @classmethod + def _coerce_literal(cls, v: Union[L, Literal[L]]) -> Literal[L]: + return _to_literal(v) @field_serializer("literal") def ser_literal(self, literal: Literal[L]) -> str: From bad9859df2e45d3002da6a60956304bd7bcfe4f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Sat, 11 Oct 2025 00:35:46 +0200 Subject: [PATCH 5/9] fix: remove type hinting errors --- tests/expressions/test_evaluator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py index cfc32d9b6b..7b15099105 100644 --- a/tests/expressions/test_evaluator.py +++ b/tests/expressions/test_evaluator.py @@ -683,7 +683,7 @@ def data_file_nan() -> DataFile: def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan: Schema, data_file_nan: DataFile) -> None: - for operator in [LessThan, LessThanOrEqual]: # type: ignore + for operator in [LessThan, LessThanOrEqual]: should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] assert not should_read, "Should not match: all nan column doesn't contain number" @@ -711,7 +711,7 @@ def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal( schema_data_file_nan: Schema, data_file_nan: DataFile ) -> None: - for operator in [GreaterThan, GreaterThanOrEqual]: # type: ignore + for operator in [GreaterThan, GreaterThanOrEqual]: should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] assert not should_read, "Should not match: all nan column doesn't contain number" From b2ff87747c265014f53065658c593db5b7a009d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Thu, 16 Oct 2025 00:38:03 +0200 Subject: [PATCH 6/9] fix: mypy errors and tests --- pyiceberg/expressions/__init__.py | 56 ++++++++++++++++++--------- pyiceberg/transforms.py | 2 +- tests/expressions/test_evaluator.py | 46 ++++++++++++---------- tests/expressions/test_expressions.py | 26 ++++++++----- 4 files changed, 80 insertions(+), 50 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 883a042d54..62e7e71ef3 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -30,10 +30,11 @@ Type, TypeVar, Union, + cast, ) from typing import Literal as TypingLiteral -from pydantic import ConfigDict, Field, field_serializer, field_validator +from pydantic import ConfigDict, Field, field_validator from pyiceberg.expressions.literals import ( AboveMax, @@ -751,31 +752,50 @@ def as_bound(self) -> Type[BoundNotIn[L]]: class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC): type: TypingLiteral["lt", "lt-eq", "gt", "gt-eq", "eq", "not-eq", "starts-with", "not-starts-with"] = Field(alias="type") - term: UnboundTerm[L] - literal: Literal[L] = Field(serialization_alias="value") + term: UnboundTerm[Any] + value: Literal[L] = Field(alias="literal", serialization_alias="value") - model_config = ConfigDict(arbitrary_types_allowed=True) + model_config = ConfigDict(populate_by_name=True, frozen=True, arbitrary_types_allowed=True) + + def __init__( + self, + term: Union[str, UnboundTerm[Any], BoundReference[Any]], + literal: Union[L, Literal[L], None] = None, + **data: Any, + ) -> None: # pylint: disable=W0621 + extra = dict(data) + + literal_candidates = [] + if literal is not None: + literal_candidates.append(literal) + if "literal" in extra: + literal_candidates.append(extra.pop("literal")) + if "value" in extra: + literal_candidates.append(extra.pop("value")) - def __init__(self, *args: Any, **kwargs: Any) -> None: - if args: - if len(args) != 2: - raise TypeError("Expected (term, literal)") - kwargs = {"term": args[0], "literal": args[1], **kwargs} - super().__init__(**kwargs) + literal_candidates = [candidate for candidate in literal_candidates if candidate is not None] + + if not literal_candidates: + raise TypeError("LiteralPredicate requires a literal or value argument") + if len(literal_candidates) > 1: + raise TypeError("literal/value provided multiple times") + + init = cast("Callable[..., None]", IcebergBaseModel.__init__) + init(self, term=_to_unbound_term(term), literal=_to_literal(literal_candidates[0]), **extra) @field_validator("term", mode="before") @classmethod - def _coerce_term(cls, v: Any) -> UnboundTerm[Any]: - return _to_unbound_term(v) + def _convert_term(cls, value: Any) -> UnboundTerm[Any]: + return _to_unbound_term(value) - @field_validator("literal", mode="before") + @field_validator("value", mode="before") @classmethod - def _coerce_literal(cls, v: Union[L, Literal[L]]) -> Literal[L]: - return _to_literal(v) + def _convert_value(cls, value: Any) -> Literal[Any]: + return _to_literal(value) - @field_serializer("literal") - def ser_literal(self, literal: Literal[L]) -> str: - return "Any" + @property + def literal(self) -> Literal[L]: + return self.value def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundLiteralPredicate[L]: bound_term = self.term.bind(schema, case_sensitive) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 30b3929329..4069a95330 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -120,7 +120,7 @@ def _try_import(module_name: str, extras_name: Optional[str] = None) -> types.Mo raise NotInstalledError(msg) from None -def _transform_literal(func: Callable[[L], L], lit: Literal[L]) -> Literal[L]: +def _transform_literal(func: Callable[[Any], Any], lit: Literal[L]) -> Literal[L]: """Small helper to upwrap the value from the literal, and wrap it again.""" return literal(func(lit.value)) diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py index 7b15099105..07888dd41e 100644 --- a/tests/expressions/test_evaluator.py +++ b/tests/expressions/test_evaluator.py @@ -22,6 +22,7 @@ from pyiceberg.conversions import to_bytes from pyiceberg.expressions import ( And, + BooleanExpression, EqualTo, GreaterThan, GreaterThanOrEqual, @@ -30,6 +31,7 @@ IsNull, LessThan, LessThanOrEqual, + LiteralPredicate, Not, NotEqualTo, NotIn, @@ -301,7 +303,7 @@ def test_missing_stats() -> None: upper_bounds=None, ) - expressions = [ + expressions: list[BooleanExpression] = [ LessThan("no_stats", 5), LessThanOrEqual("no_stats", 30), EqualTo("no_stats", 70), @@ -324,7 +326,7 @@ def test_zero_record_file_stats(schema_data_file: Schema) -> None: file_path="file_1.parquet", file_format=FileFormat.PARQUET, partition=Record(), record_count=0 ) - expressions = [ + expressions: list[BooleanExpression] = [ LessThan("no_stats", 5), LessThanOrEqual("no_stats", 30), EqualTo("no_stats", 70), @@ -683,26 +685,27 @@ def data_file_nan() -> DataFile: def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan: Schema, data_file_nan: DataFile) -> None: - for operator in [LessThan, LessThanOrEqual]: - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] + operators: tuple[type[LiteralPredicate[Any]], ...] = (LessThan, LessThanOrEqual) + for operator in operators: + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) assert not should_read, "Should not match: all nan column doesn't contain number" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan) assert not should_read, "Should not match: 1 is smaller than lower bound" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan) assert should_read, "Should match: 10 is larger than lower bound" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan) assert should_read, "Should match: no visibility" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan) assert not should_read, "Should not match: all nan column doesn't contain number" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan) assert not should_read, "Should not match: 1 is smaller than lower bound" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval( # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval( data_file_nan ) assert should_read, "Should match: 10 larger than lower bound" @@ -711,31 +714,32 @@ def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal( schema_data_file_nan: Schema, data_file_nan: DataFile ) -> None: - for operator in [GreaterThan, GreaterThanOrEqual]: - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] + operators: tuple[type[LiteralPredicate[Any]], ...] = (GreaterThan, GreaterThanOrEqual) + for operator in operators: + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) assert not should_read, "Should not match: all nan column doesn't contain number" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan) assert should_read, "Should match: upper bound is larger than 1" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan) assert should_read, "Should match: upper bound is larger than 10" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan) assert should_read, "Should match: no visibility" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan) assert not should_read, "Should not match: all nan column doesn't contain number" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan) assert should_read, "Should match: 1 is smaller than upper bound" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval( # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval( data_file_nan ) assert should_read, "Should match: 10 is smaller than upper bound" - should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 30)).eval(data_file_nan) # type: ignore[arg-type] + should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 30)).eval(data_file_nan) assert not should_read, "Should not match: 30 is greater than upper bound" @@ -1162,7 +1166,7 @@ def test_strict_missing_stats(strict_data_file_schema: Schema, strict_data_file_ upper_bounds=None, ) - expressions = [ + expressions: list[BooleanExpression] = [ LessThan("no_stats", 5), LessThanOrEqual("no_stats", 30), EqualTo("no_stats", 70), @@ -1185,7 +1189,7 @@ def test_strict_zero_record_file_stats(strict_data_file_schema: Schema) -> None: file_path="file_1.parquet", file_format=FileFormat.PARQUET, partition=Record(), record_count=0 ) - expressions = [ + expressions: list[BooleanExpression] = [ LessThan("no_stats", 5), LessThanOrEqual("no_stats", 30), EqualTo("no_stats", 70), diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index b66f665b05..af4cc70037 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -50,6 +50,7 @@ IsNull, LessThan, LessThanOrEqual, + LiteralPredicate, Not, NotEqualTo, NotIn, @@ -64,7 +65,7 @@ from pyiceberg.expressions.literals import Literal, literal from pyiceberg.expressions.visitors import _from_byte_buffer from pyiceberg.schema import Accessor, Schema -from pyiceberg.typedef import Record +from pyiceberg.typedef import L, Record from pyiceberg.types import ( DecimalType, DoubleType, @@ -917,7 +918,7 @@ def test_bound_less_than_or_equal(term: BoundReference[Any]) -> None: def test_equal_to() -> None: equal_to = EqualTo(Reference("a"), literal("a")) - assert equal_to.model_dump_json() == '{"term":"a","type":"eq","value":"Any"}' + assert equal_to.model_dump_json() == '{"term":"a","type":"eq","value":"a"}' assert str(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))" assert repr(equal_to) == "EqualTo(term=Reference(name='a'), literal=literal('a'))" assert equal_to == eval(repr(equal_to)) @@ -926,7 +927,7 @@ def test_equal_to() -> None: def test_not_equal_to() -> None: not_equal_to = NotEqualTo(Reference("a"), literal("a")) - assert not_equal_to.model_dump_json() == '{"term":"a","type":"not-eq","value":"Any"}' + assert not_equal_to.model_dump_json() == '{"term":"a","type":"not-eq","value":"a"}' assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))" assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'), literal=literal('a'))" assert not_equal_to == eval(repr(not_equal_to)) @@ -935,7 +936,7 @@ def test_not_equal_to() -> None: def test_greater_than_or_equal_to() -> None: greater_than_or_equal_to = GreaterThanOrEqual(Reference("a"), literal("a")) - assert greater_than_or_equal_to.model_dump_json() == '{"term":"a","type":"gt-eq","value":"Any"}' + assert greater_than_or_equal_to.model_dump_json() == '{"term":"a","type":"gt-eq","value":"a"}' assert str(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert repr(greater_than_or_equal_to) == "GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to)) @@ -944,7 +945,7 @@ def test_greater_than_or_equal_to() -> None: def test_greater_than() -> None: greater_than = GreaterThan(Reference("a"), literal("a")) - assert greater_than.model_dump_json() == '{"term":"a","type":"gt","value":"Any"}' + assert greater_than.model_dump_json() == '{"term":"a","type":"gt","value":"a"}' assert str(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))" assert repr(greater_than) == "GreaterThan(term=Reference(name='a'), literal=literal('a'))" assert greater_than == eval(repr(greater_than)) @@ -953,7 +954,7 @@ def test_greater_than() -> None: def test_less_than() -> None: less_than = LessThan(Reference("a"), literal("a")) - assert less_than.model_dump_json() == '{"term":"a","type":"lt","value":"Any"}' + assert less_than.model_dump_json() == '{"term":"a","type":"lt","value":"a"}' assert str(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))" assert repr(less_than) == "LessThan(term=Reference(name='a'), literal=literal('a'))" assert less_than == eval(repr(less_than)) @@ -962,7 +963,7 @@ def test_less_than() -> None: def test_less_than_or_equal() -> None: less_than_or_equal = LessThanOrEqual(Reference("a"), literal("a")) - assert less_than_or_equal.model_dump_json() == '{"term":"a","type":"lt-eq","value":"Any"}' + assert less_than_or_equal.model_dump_json() == '{"term":"a","type":"lt-eq","value":"a"}' assert str(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert repr(less_than_or_equal) == "LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))" assert less_than_or_equal == eval(repr(less_than_or_equal)) @@ -971,12 +972,12 @@ def test_less_than_or_equal() -> None: def test_starts_with() -> None: starts_with = StartsWith(Reference("a"), literal("a")) - assert starts_with.model_dump_json() == '{"term":"a","type":"starts-with","value":"Any"}' + assert starts_with.model_dump_json() == '{"term":"a","type":"starts-with","value":"a"}' def test_not_starts_with() -> None: not_starts_with = NotStartsWith(Reference("a"), literal("a")) - assert not_starts_with.model_dump_json() == '{"term":"a","type":"not-starts-with","value":"Any"}' + assert not_starts_with.model_dump_json() == '{"term":"a","type":"not-starts-with","value":"a"}' def test_bound_reference_eval(table_schema_simple: Schema) -> None: @@ -1217,7 +1218,12 @@ def test_bind_ambiguous_name() -> None: # |_| |_|\_, |_| \_, | # |__/ |__/ -assert_type(EqualTo("a", "b"), EqualTo[str]) + +def _assert_literal_predicate_type(expr: LiteralPredicate[L]) -> None: + assert_type(expr, LiteralPredicate[L]) + + +_assert_literal_predicate_type(EqualTo("a", "b")) assert_type(In("a", ("a", "b", "c")), In[str]) assert_type(In("a", (1, 2, 3)), In[int]) assert_type(NotIn("a", ("a", "b", "c")), NotIn[str]) From f1bf81aebb19af04d5e21b0ffa2e76fd44486e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Fri, 17 Oct 2025 17:26:07 +0200 Subject: [PATCH 7/9] update: reduce code and supress errors --- pyiceberg/expressions/__init__.py | 43 +++---------------------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index 62e7e71ef3..e951716bbd 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -30,11 +30,10 @@ Type, TypeVar, Union, - cast, ) from typing import Literal as TypingLiteral -from pydantic import ConfigDict, Field, field_validator +from pydantic import Field from pyiceberg.expressions.literals import ( AboveMax, @@ -753,45 +752,11 @@ def as_bound(self) -> Type[BoundNotIn[L]]: class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC): type: TypingLiteral["lt", "lt-eq", "gt", "gt-eq", "eq", "not-eq", "starts-with", "not-starts-with"] = Field(alias="type") term: UnboundTerm[Any] - value: Literal[L] = Field(alias="literal", serialization_alias="value") - + value: Literal[L] = Field() model_config = ConfigDict(populate_by_name=True, frozen=True, arbitrary_types_allowed=True) - def __init__( - self, - term: Union[str, UnboundTerm[Any], BoundReference[Any]], - literal: Union[L, Literal[L], None] = None, - **data: Any, - ) -> None: # pylint: disable=W0621 - extra = dict(data) - - literal_candidates = [] - if literal is not None: - literal_candidates.append(literal) - if "literal" in extra: - literal_candidates.append(extra.pop("literal")) - if "value" in extra: - literal_candidates.append(extra.pop("value")) - - literal_candidates = [candidate for candidate in literal_candidates if candidate is not None] - - if not literal_candidates: - raise TypeError("LiteralPredicate requires a literal or value argument") - if len(literal_candidates) > 1: - raise TypeError("literal/value provided multiple times") - - init = cast("Callable[..., None]", IcebergBaseModel.__init__) - init(self, term=_to_unbound_term(term), literal=_to_literal(literal_candidates[0]), **extra) - - @field_validator("term", mode="before") - @classmethod - def _convert_term(cls, value: Any) -> UnboundTerm[Any]: - return _to_unbound_term(value) - - @field_validator("value", mode="before") - @classmethod - def _convert_value(cls, value: Any) -> Literal[Any]: - return _to_literal(value) + def __init__(self, term: Union[str, UnboundTerm[Any], BoundReference[Any]], literal: Union[L, Literal[L]]): + super().__init__(term=_to_unbound_term(term), value=_to_literal(literal)) # type: ignore[call-arg] @property def literal(self) -> Literal[L]: From 1a3e702ea6a9b3cef5327d692badf9f62c9883ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Sun, 19 Oct 2025 18:44:02 +0200 Subject: [PATCH 8/9] fix: literalpredicate and _to_unbound only accept str or unbound term --- pyiceberg/expressions/__init__.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index e951716bbd..2d7333838c 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -52,14 +52,8 @@ ConfigDict = dict -def _to_unbound_term(term: Union[str, UnboundTerm[Any], BoundReference[Any]]) -> UnboundTerm[Any]: - if isinstance(term, str): - return Reference(term) - if isinstance(term, UnboundTerm): - return term - if isinstance(term, BoundReference): - return Reference(term.field.name) - raise ValueError(f"Expected UnboundTerm | BoundReference | str, got {type(term).__name__}") +def _to_unbound_term(term: Union[str, UnboundTerm[Any]]) -> UnboundTerm[Any]: + return Reference(term) if isinstance(term, str) else term def _to_literal_set(values: Union[Iterable[L], Iterable[Literal[L]]]) -> Set[Literal[L]]: @@ -755,7 +749,7 @@ class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC): value: Literal[L] = Field() model_config = ConfigDict(populate_by_name=True, frozen=True, arbitrary_types_allowed=True) - def __init__(self, term: Union[str, UnboundTerm[Any], BoundReference[Any]], literal: Union[L, Literal[L]]): + def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, Literal[L]]): super().__init__(term=_to_unbound_term(term), value=_to_literal(literal)) # type: ignore[call-arg] @property From 1b23faa274c49af43caab10d347f9cd6e4648f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Fern=C3=A1ndez?= Date: Sun, 19 Oct 2025 19:51:46 +0200 Subject: [PATCH 9/9] feat: add _assert_literal_predicate_type for In and NotIn --- tests/expressions/test_expressions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index af4cc70037..63673fdaeb 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1224,6 +1224,9 @@ def _assert_literal_predicate_type(expr: LiteralPredicate[L]) -> None: _assert_literal_predicate_type(EqualTo("a", "b")) +_assert_literal_predicate_type(In("a", ("a", "b", "c"))) +_assert_literal_predicate_type(In("a", (1, 2, 3))) +_assert_literal_predicate_type(NotIn("a", ("a", "b", "c"))) assert_type(In("a", ("a", "b", "c")), In[str]) assert_type(In("a", (1, 2, 3)), In[int]) assert_type(NotIn("a", ("a", "b", "c")), NotIn[str])