Add between keyword

jtuglu1 · jtuglu1 · commit c6b65a62d8b1 · 2025-08-15T19:19:34.000-07:00
diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
@@ -79,6 +79,7 @@
 NULL = CaselessKeyword("null")
 NAN = CaselessKeyword("nan")
 LIKE = CaselessKeyword("like")
+BETWEEN = CaselessKeyword("between")
 
 unquoted_identifier = Word(alphas + "_", alphanums + "_$")
 quoted_identifier = QuotedString('"', escChar="\\", unquoteResults=True)
@@ -106,6 +107,7 @@ def _(result: ParseResults) -> Reference:
 string = sgl_quoted_string.set_results_name("raw_quoted_string")
 decimal = common.real().set_results_name("decimal")
 integer = common.signed_integer().set_results_name("integer")
+number = common.number().set_results_name("number")
 literal = Group(string | decimal | integer | boolean).set_results_name("literal")
 literal_set = Group(
     DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | DelimitedList(boolean)
@@ -149,8 +151,17 @@ def _(result: ParseResults) -> Literal[L]:
 left_ref = column + comparison_op + literal
 right_ref = literal + comparison_op + column
 comparison = left_ref | right_ref
+between = column + BETWEEN + number + AND + number
 
 
+@between.set_parse_action
+def _(result: ParseResults) -> BooleanExpression:
+    print("BETWEEN matched:", result)
+    return And(
+        GreaterThanOrEqual(result.column, result[2]),
+        LessThanOrEqual(result.column, result[4])
+    )
+
 @left_ref.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
     if result.op == "<":
@@ -258,7 +269,7 @@ def _evaluate_like_statement(result: ParseResults) -> BooleanExpression:
         return EqualTo(result.column, StringLiteral(literal_like.value.replace("\\%", "%")))
 
 
-predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")
+predicate = (between | comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")
 
 
 def handle_not(result: ParseResults) -> Not:
@@ -297,7 +308,6 @@ def handle_always_expression(result: ParseResults) -> BooleanExpression:
     .add_parse_action(handle_always_expression)
 )
 
-
 def parse(expr: str) -> BooleanExpression:
     """Parse a boolean expression."""
     return boolean_expression.parse_string(expr, parse_all=True)[0]
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
@@ -39,9 +39,9 @@
     NotNull,
     NotStartsWith,
     Or,
-    StartsWith,
+    StartsWith, Reference,
 )
-from pyiceberg.expressions.literals import DecimalLiteral
+from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral
 
 
 def test_always_true() -> None:
@@ -238,3 +238,19 @@ def test_quoted_column_with_dots() -> None:
 
 def test_quoted_column_with_spaces() -> None:
     assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'")
+
+def test_valid_between() -> None:
+    assert And(left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3))) == parser.parse("foo between 1 and 3")
+    assert And(left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(1))) == parser.parse("foo between 1 and 1")
+    assert And(left=GreaterThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(1.0))), right=LessThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(4.0)))) == parser.parse("foo between 1.0 and 4.0")
+
+def test_invalid_between() -> None:
+    # boolean
+    with pytest.raises(ParseException) as exc_info:
+        parser.parse("foo between true and false")
+    assert "Expected number, found 'true'" in str(exc_info)
+
+    # string
+    with pytest.raises(ParseException) as exc_info:
+        parser.parse("foo between 'a' and 'b'")
+    assert "Expected number, found \"\'\"" in str(exc_info)