From fab8e7f8a30685716b73d3a56ea6247e9d00198d Mon Sep 17 00:00:00 2001 From: Jesse Tuglu Date: Fri, 3 Oct 2025 14:33:16 -0700 Subject: [PATCH 1/2] Support arbitrary literal comparisons in BETWEEN operator --- mkdocs/docs/row-filter-syntax.md | 4 +++- pyiceberg/expressions/parser.py | 3 +-- tests/expressions/test_parser.py | 26 ++++++++++++++------------ 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/mkdocs/docs/row-filter-syntax.md b/mkdocs/docs/row-filter-syntax.md index ce3b46c092..5cc549a4a6 100644 --- a/mkdocs/docs/row-filter-syntax.md +++ b/mkdocs/docs/row-filter-syntax.md @@ -102,11 +102,13 @@ column NOT LIKE 'prefix%' ## BETWEEN -The BETWEEN operator filters a numeric value against an inclusive range, e.g. `a between 1 and 2` is equivalent to `a >= 1 and a <= 2`. +The BETWEEN operator filters a column against an inclusive range of two comparable literals, e.g. `a between 1 and 2` is equivalent to `a >= 1 and a <= 2`. ```sql column BETWEEN 1 AND 2 column BETWEEN 1.0 AND 2.0 +column BETWEEN '2025-01-01' AND '2025-01-02' +column BETWEEN '2025-01-01T00:00:00.000000' AND '2025-01-01T00:00:00.000000' ``` ## Logical Operations diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index 1966363829..608467918f 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -107,7 +107,6 @@ def _(result: ParseResults) -> Reference: string = sgl_quoted_string.set_results_name("raw_quoted_string") decimal = common.real().set_results_name("decimal") integer = common.signed_integer().set_results_name("integer") -number = common.number().set_results_name("number") literal = Group(string | decimal | integer | boolean).set_results_name("literal") literal_set = Group( DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | DelimitedList(boolean) @@ -151,7 +150,7 @@ def _(result: ParseResults) -> Literal[L]: left_ref = column + comparison_op + literal right_ref = literal + comparison_op + column comparison = left_ref | right_ref -between = column + BETWEEN + number + AND + number +between = column + BETWEEN + literal + AND + literal @between.set_parse_action diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index 152ac03e2a..28d7cf110f 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -42,7 +42,7 @@ Reference, StartsWith, ) -from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral +from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral, literal def test_always_true() -> None: @@ -241,7 +241,8 @@ def test_quoted_column_with_spaces() -> None: assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'") -def test_valid_between() -> None: +def test_valid_between_with_numerics() -> None: + # numerics assert And( left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3)), @@ -254,16 +255,17 @@ def test_valid_between() -> None: left=GreaterThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(1.0))), right=LessThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(4.0))), ) == parser.parse("foo between 1.0 and 4.0") - assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and foo <= 3") + # dates + assert And( + left=GreaterThanOrEqual(Reference(name="foo"), literal("2025-05-10")), + right=LessThanOrEqual(Reference(name="foo"), literal("2025-05-12")), + ) == parser.parse("foo between '2025-05-10' and '2025-05-12'") -def test_invalid_between() -> None: - # boolean - with pytest.raises(ParseException) as exc_info: - parser.parse("foo between true and false") - assert "Expected number, found 'true'" in str(exc_info) + # timestamps + assert And( + left=GreaterThanOrEqual(Reference(name="foo"), literal("2025-01-01T00:00:00.000000")), + right=LessThanOrEqual(Reference(name="foo"), literal("2025-01-10T12:00:00.000000")), + ) == parser.parse("foo between '2025-01-01T00:00:00.000000' and '2025-01-10T12:00:00.000000'") - # string - with pytest.raises(ParseException) as exc_info: - parser.parse("foo between 'a' and 'b'") - assert 'Expected number, found "\'"' in str(exc_info) + assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and foo <= 3") From 5727e075d6ba22ac0b5458154e05aae76a88c388 Mon Sep 17 00:00:00 2001 From: Jesse Tuglu Date: Fri, 3 Oct 2025 18:34:44 -0700 Subject: [PATCH 2/2] Update doc --- mkdocs/docs/row-filter-syntax.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/row-filter-syntax.md b/mkdocs/docs/row-filter-syntax.md index 5cc549a4a6..bffb97c225 100644 --- a/mkdocs/docs/row-filter-syntax.md +++ b/mkdocs/docs/row-filter-syntax.md @@ -108,7 +108,7 @@ The BETWEEN operator filters a column against an inclusive range of two comparab column BETWEEN 1 AND 2 column BETWEEN 1.0 AND 2.0 column BETWEEN '2025-01-01' AND '2025-01-02' -column BETWEEN '2025-01-01T00:00:00.000000' AND '2025-01-01T00:00:00.000000' +column BETWEEN '2025-01-01T00:00:00.000000' AND '2025-01-02T12:00:00.000000' ``` ## Logical Operations