From b2174e3d9f5dead7303cedfe8744994771a109c7 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 13:52:43 +0000
Subject: [PATCH 01/17] feat: Changes in upsert_utils.py

---
 pyiceberg/table/upsert_util.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index 723a89aa20..4cd1bdd2ca 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -30,13 +30,16 @@
 )
 
 
-def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpression:
+def create_match_filter(df: pa.Table, join_cols: list[str]) -> BooleanExpression:
     unique_keys = df.select(join_cols).group_by(join_cols).aggregate([])
 
     if len(join_cols) == 1:
         return In(join_cols[0], unique_keys[0].to_pylist())
     else:
-        return Or(*[And(*[EqualTo(col, row[col]) for col in join_cols]) for row in unique_keys.to_pylist()])
+        filters = [And(*[EqualTo(col, row[col]) for col in join_cols]) for row in unique_keys.to_pylist()]
+        if len(filters) == 1:
+            return filters[0]
+        return Or(*filters)
 
 
 def has_duplicate_rows(df: pyarrow_table, join_cols: list[str]) -> bool:

From b8dbc9d89ffcaae604eaacd5a7811d36e80d8658 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 14:01:41 +0000
Subject: [PATCH 02/17] feat: rename

---
 pyiceberg/table/upsert_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index 4cd1bdd2ca..ba69bb8f58 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -30,7 +30,7 @@
 )
 
 
-def create_match_filter(df: pa.Table, join_cols: list[str]) -> BooleanExpression:
+def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpression:
     unique_keys = df.select(join_cols).group_by(join_cols).aggregate([])
 
     if len(join_cols) == 1:

From cb7d073ebd75a8e1c47d763faa86f3b6007b00b4 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 16:21:00 +0000
Subject: [PATCH 03/17] feat: Add test cases

---
 tests/table/test_upsert.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index 0cfb0ba609..2deca99901 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -366,3 +366,26 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None:
 
     assert upd.rows_updated == 1
     assert upd.rows_inserted == 1
+
+
+def test_merge_scenario_composite_key_with_duplicate(catalog: Catalog) -> None:
+    """
+    Tests merging 200 rows with a composite key when the source contains duplicate rows.
+    This verifies that the upsert logic correctly handles the case where grouping by
+    the composite key yields only one unique condition, thereby avoiding a TypeError.
+    """
+    identifier = "default.test_merge_scenario_composite_key_with_duplicate"
+    _drop_table(catalog, identifier)
+
+    ctx = SessionContext()
+
+    table = gen_target_iceberg_table(1, 200, True, ctx, catalog, identifier)
+
+    source_df = gen_source_dataset(101, 300, True, True, ctx)
+
+    res = table.upsert(df=source_df, join_cols=["order_id", "order_line_id"])
+
+    expected_updated = 100
+    expected_inserted = 100
+
+    assert_upsert_result(res, expected_updated, expected_inserted)

From 54fe2799b4c48716a4c3ac54c9a0ede82f287f15 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 16:45:51 +0000
Subject: [PATCH 04/17] feat: remove test case

---
 tests/table/test_upsert.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index 2deca99901..0cfb0ba609 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -366,26 +366,3 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None:
 
     assert upd.rows_updated == 1
     assert upd.rows_inserted == 1
-
-
-def test_merge_scenario_composite_key_with_duplicate(catalog: Catalog) -> None:
-    """
-    Tests merging 200 rows with a composite key when the source contains duplicate rows.
-    This verifies that the upsert logic correctly handles the case where grouping by
-    the composite key yields only one unique condition, thereby avoiding a TypeError.
-    """
-    identifier = "default.test_merge_scenario_composite_key_with_duplicate"
-    _drop_table(catalog, identifier)
-
-    ctx = SessionContext()
-
-    table = gen_target_iceberg_table(1, 200, True, ctx, catalog, identifier)
-
-    source_df = gen_source_dataset(101, 300, True, True, ctx)
-
-    res = table.upsert(df=source_df, join_cols=["order_id", "order_line_id"])
-
-    expected_updated = 100
-    expected_inserted = 100
-
-    assert_upsert_result(res, expected_updated, expected_inserted)

From 391b5ea621f5dbc7260084600a5d9a1cc6a4b8e0 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:09:52 +0000
Subject: [PATCH 05/17] feat: Add condition to solve only insert issue

---
 pyiceberg/table/upsert_util.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index ba69bb8f58..b61d795398 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -30,6 +30,10 @@
 )
 
 
+def combine_or(a: BooleanExpression, b: BooleanExpression) -> BooleanExpression:
+    return Or(a, b)
+
+
 def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpression:
     unique_keys = df.select(join_cols).group_by(join_cols).aggregate([])
 
@@ -37,9 +41,14 @@ def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpre
         return In(join_cols[0], unique_keys[0].to_pylist())
     else:
         filters = [And(*[EqualTo(col, row[col]) for col in join_cols]) for row in unique_keys.to_pylist()]
+
+        if not filters:
+            return In(join_cols[0], [])
+
         if len(filters) == 1:
             return filters[0]
-        return Or(*filters)
+
+        return functools.reduce(combine_or, filters)
 
 
 def has_duplicate_rows(df: pyarrow_table, join_cols: list[str]) -> bool:
@@ -89,8 +98,8 @@ def get_rows_to_update(source_table: pa.Table, target_table: pa.Table, join_cols
     if rows_to_update:
         rows_to_update_table = pa.concat_tables(rows_to_update)
     else:
-        rows_to_update_table = pa.Table.from_arrays([], names=source_table.column_names)
-
+        empty_arrays = [pa.array([], type=field.type) for field in source_table.schema]
+        rows_to_update_table = pa.Table.from_arrays(empty_arrays, schema=source_table.schema)
     common_columns = set(source_table.column_names).intersection(set(target_table.column_names))
     rows_to_update_table = rows_to_update_table.select(list(common_columns))
 

From 5023d184245e16cb359a3dad5db7af9655348d15 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:21:13 +0000
Subject: [PATCH 06/17] feat: Add condition to solve insert operation

---
 pyiceberg/table/upsert_util.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index b61d795398..693f42416b 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -16,6 +16,8 @@
 # under the License.
 import functools
 import operator
+from functools import reduce
+from typing import List, cast
 
 import pyarrow as pa
 from pyarrow import Table as pyarrow_table
@@ -30,25 +32,25 @@
 )
 
 
-def combine_or(a: BooleanExpression, b: BooleanExpression) -> BooleanExpression:
-    return Or(a, b)
-
-
-def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpression:
+def create_match_filter(df: pa.Table, join_cols: list[str]) -> BooleanExpression:
     unique_keys = df.select(join_cols).group_by(join_cols).aggregate([])
 
     if len(join_cols) == 1:
+        # Single join column: Use the In expression
         return In(join_cols[0], unique_keys[0].to_pylist())
     else:
-        filters = [And(*[EqualTo(col, row[col]) for col in join_cols]) for row in unique_keys.to_pylist()]
-
+        # Build a list of AND expressions for each unique key, cast to BooleanExpression.
+        filters: List[BooleanExpression] = [
+            cast(BooleanExpression, And(*[EqualTo(col, row[col]) for col in join_cols])) for row in unique_keys.to_pylist()
+        ]
+        # If no filters were produced, return an expression that always evaluates to False.
         if not filters:
             return In(join_cols[0], [])
-
+        # If there's exactly one condition, return it directly.
         if len(filters) == 1:
             return filters[0]
-
-        return functools.reduce(combine_or, filters)
+        # Otherwise, combine conditions pairwise using reduce.
+        return reduce(lambda a, b: Or(a, b), filters)
 
 
 def has_duplicate_rows(df: pyarrow_table, join_cols: list[str]) -> bool:
@@ -100,6 +102,7 @@ def get_rows_to_update(source_table: pa.Table, target_table: pa.Table, join_cols
     else:
         empty_arrays = [pa.array([], type=field.type) for field in source_table.schema]
         rows_to_update_table = pa.Table.from_arrays(empty_arrays, schema=source_table.schema)
+
     common_columns = set(source_table.column_names).intersection(set(target_table.column_names))
     rows_to_update_table = rows_to_update_table.select(list(common_columns))
 

From 50ab0aa942e50479d7a9a84d3bbcdcf8d12a6911 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:43:15 +0000
Subject: [PATCH 07/17] feat: add test cases

---
 tests/table/test_upsert.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index 0cfb0ba609..bda04f96a8 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -25,6 +25,7 @@
 from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.schema import Schema
 from pyiceberg.table import UpsertResult
+from pyiceberg.table.upsert_util import create_match_filter, get_rows_to_update
 from pyiceberg.types import IntegerType, NestedField, StringType
 from tests.catalog.test_base import InMemoryCatalog, Table
 
@@ -366,3 +367,40 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None:
 
     assert upd.rows_updated == 1
     assert upd.rows_inserted == 1
+
+
+def test_create_match_filter_single_condition() -> None:
+    """
+    Test create_match_filter with a composite key where the source yields exactly one unique key.
+    Expected: The function returns the single And condition directly.
+    """
+
+    data = [
+        {"order_id": 101, "order_line_id": 1, "extra": "x"},
+        {"order_id": 101, "order_line_id": 1, "extra": "x"},  # duplicate
+    ]
+    schema = pa.schema([pa.field("order_id", pa.int32()), pa.field("order_line_id", pa.int32()), pa.field("extra", pa.string())])
+    table = pa.Table.from_pylist(data, schema=schema)
+    expr = create_match_filter(table, ["order_id", "order_line_id"])
+    expr_str = str(expr)
+
+    assert "And(" in expr_str, f"Expected And condition but got: {expr_str}"
+    assert "Or(" not in expr_str, "Did not expect an Or condition when only one unique key exists"
+
+
+def test_get_rows_to_update_empty_source() -> None:
+    """
+    Test get_rows_to_update when there are no rows to update.
+    Expected: It returns an empty table that matches the source schema.
+    """
+
+    schema = pa.schema([pa.field("order_id", pa.int32()), pa.field("order_line_id", pa.int32()), pa.field("extra", pa.string())])
+    source_table = pa.Table.from_pydict({"order_id": [101], "order_line_id": [1], "extra": ["x"]}, schema=schema)
+
+    target_table = pa.Table.from_pydict({"order_id": [102], "order_line_id": [2], "extra": ["y"]}, schema=schema)
+
+    updated_table = get_rows_to_update(source_table, target_table, ["order_id", "order_line_id"])
+
+    assert updated_table.num_rows == 0, "Expected empty table when there are no rows to update."
+
+    assert updated_table.schema == source_table.schema, "Schema mismatch on empty table creation."

From 9b31c00c7984eb5cc917c3e19a492aad366a44fc Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:48:45 +0000
Subject: [PATCH 08/17] feat: Remove one test case

---
 tests/table/test_upsert.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index bda04f96a8..2901ee7ddc 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -386,21 +386,3 @@ def test_create_match_filter_single_condition() -> None:
 
     assert "And(" in expr_str, f"Expected And condition but got: {expr_str}"
     assert "Or(" not in expr_str, "Did not expect an Or condition when only one unique key exists"
-
-
-def test_get_rows_to_update_empty_source() -> None:
-    """
-    Test get_rows_to_update when there are no rows to update.
-    Expected: It returns an empty table that matches the source schema.
-    """
-
-    schema = pa.schema([pa.field("order_id", pa.int32()), pa.field("order_line_id", pa.int32()), pa.field("extra", pa.string())])
-    source_table = pa.Table.from_pydict({"order_id": [101], "order_line_id": [1], "extra": ["x"]}, schema=schema)
-
-    target_table = pa.Table.from_pydict({"order_id": [102], "order_line_id": [2], "extra": ["y"]}, schema=schema)
-
-    updated_table = get_rows_to_update(source_table, target_table, ["order_id", "order_line_id"])
-
-    assert updated_table.num_rows == 0, "Expected empty table when there are no rows to update."
-
-    assert updated_table.schema == source_table.schema, "Schema mismatch on empty table creation."

From 0bdb9c39aafc40ba97b5be2f960cce659eb4317e Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:51:35 +0000
Subject: [PATCH 09/17] feat: Remove unwanted imports

---
 pyiceberg/table/upsert_util.py | 11 ++++-------
 tests/table/test_upsert.py     |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index 693f42416b..1a2ffbc06a 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -16,7 +16,6 @@
 # under the License.
 import functools
 import operator
-from functools import reduce
 from typing import List, cast
 
 import pyarrow as pa
@@ -36,21 +35,19 @@ def create_match_filter(df: pa.Table, join_cols: list[str]) -> BooleanExpression
     unique_keys = df.select(join_cols).group_by(join_cols).aggregate([])
 
     if len(join_cols) == 1:
-        # Single join column: Use the In expression
         return In(join_cols[0], unique_keys[0].to_pylist())
     else:
-        # Build a list of AND expressions for each unique key, cast to BooleanExpression.
         filters: List[BooleanExpression] = [
             cast(BooleanExpression, And(*[EqualTo(col, row[col]) for col in join_cols])) for row in unique_keys.to_pylist()
         ]
-        # If no filters were produced, return an expression that always evaluates to False.
+
         if not filters:
             return In(join_cols[0], [])
-        # If there's exactly one condition, return it directly.
+
         if len(filters) == 1:
             return filters[0]
-        # Otherwise, combine conditions pairwise using reduce.
-        return reduce(lambda a, b: Or(a, b), filters)
+
+        return functools.reduce(lambda a, b: Or(a, b), filters)
 
 
 def has_duplicate_rows(df: pyarrow_table, join_cols: list[str]) -> bool:
diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index 2901ee7ddc..e68166dbac 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -25,7 +25,7 @@
 from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.schema import Schema
 from pyiceberg.table import UpsertResult
-from pyiceberg.table.upsert_util import create_match_filter, get_rows_to_update
+from pyiceberg.table.upsert_util import create_match_filter
 from pyiceberg.types import IntegerType, NestedField, StringType
 from tests.catalog.test_base import InMemoryCatalog, Table
 

From 923f0654ad7234e88bd957335f542f44d485fcdb Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:52:05 +0000
Subject: [PATCH 10/17] feat: rename

---
 pyiceberg/table/upsert_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index 1a2ffbc06a..b557813ce7 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -31,7 +31,7 @@
 )
 
 
-def create_match_filter(df: pa.Table, join_cols: list[str]) -> BooleanExpression:
+def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpression:
     unique_keys = df.select(join_cols).group_by(join_cols).aggregate([])
 
     if len(join_cols) == 1:

From 4cbdaddf9f617b782ca06671986216868b595e92 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 14:43:15 +0530
Subject: [PATCH 11/17] Update pyiceberg/table/upsert_util.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 pyiceberg/table/upsert_util.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index b557813ce7..7351ae7840 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -97,8 +97,7 @@ def get_rows_to_update(source_table: pa.Table, target_table: pa.Table, join_cols
     if rows_to_update:
         rows_to_update_table = pa.concat_tables(rows_to_update)
     else:
-        empty_arrays = [pa.array([], type=field.type) for field in source_table.schema]
-        rows_to_update_table = pa.Table.from_arrays(empty_arrays, schema=source_table.schema)
+        rows_to_update_table = source_table.schema.empty_table()
 
     common_columns = set(source_table.column_names).intersection(set(target_table.column_names))
     rows_to_update_table = rows_to_update_table.select(list(common_columns))

From d14aad345adb2d161ec178618ddddb998b3e1239 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 14:52:17 +0530
Subject: [PATCH 12/17] Update pyiceberg/table/upsert_util.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 pyiceberg/table/upsert_util.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index 7351ae7840..aef2f9df36 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -41,13 +41,12 @@ def create_match_filter(df: pyarrow_table, join_cols: list[str]) -> BooleanExpre
             cast(BooleanExpression, And(*[EqualTo(col, row[col]) for col in join_cols])) for row in unique_keys.to_pylist()
         ]
 
-        if not filters:
-            return In(join_cols[0], [])
-
-        if len(filters) == 1:
+        if len(filters) == 0:
+            return AlwaysFalse()
+        elif len(filters) == 1:
             return filters[0]
-
-        return functools.reduce(lambda a, b: Or(a, b), filters)
+        else:
+            return functools.reduce(lambda a, b: Or(a, b), filters)
 
 
 def has_duplicate_rows(df: pyarrow_table, join_cols: list[str]) -> bool:

From 7571893386118400edb96488beffd3e217cf8a27 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 15:15:41 +0530
Subject: [PATCH 13/17] Update tests/table/test_upsert.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 tests/table/test_upsert.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index e68166dbac..1a2406de3e 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -382,7 +382,7 @@ def test_create_match_filter_single_condition() -> None:
     schema = pa.schema([pa.field("order_id", pa.int32()), pa.field("order_line_id", pa.int32()), pa.field("extra", pa.string())])
     table = pa.Table.from_pylist(data, schema=schema)
     expr = create_match_filter(table, ["order_id", "order_line_id"])
-    expr_str = str(expr)
-
-    assert "And(" in expr_str, f"Expected And condition but got: {expr_str}"
-    assert "Or(" not in expr_str, "Did not expect an Or condition when only one unique key exists"
+    assert expr == And(
+        EqualTo(term=Reference(name="order_id"), literal=LongLiteral(101)),
+        EqualTo(term=Reference(name="order_line_id"), literal=LongLiteral(1)),
+    )

From 3e14cae8ccb9d2c53f15094c14cb2a54a8e06b43 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 09:49:01 +0000
Subject: [PATCH 14/17] feat: Add import

---
 tests/table/test_upsert.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index e68166dbac..c1bd3c1558 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -26,6 +26,8 @@
 from pyiceberg.schema import Schema
 from pyiceberg.table import UpsertResult
 from pyiceberg.table.upsert_util import create_match_filter
+from pyiceberg.expressions import Reference, EqualTo
+from pyiceberg.expressions.literals import LongLiteral
 from pyiceberg.types import IntegerType, NestedField, StringType
 from tests.catalog.test_base import InMemoryCatalog, Table
 

From 2c326fabb435e202be8a3ba31b0b4bcb65cb9647 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 09:53:48 +0000
Subject: [PATCH 15/17] feat:add import

---
 pyiceberg/table/upsert_util.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyiceberg/table/upsert_util.py b/pyiceberg/table/upsert_util.py
index aef2f9df36..c12351d45c 100644
--- a/pyiceberg/table/upsert_util.py
+++ b/pyiceberg/table/upsert_util.py
@@ -23,6 +23,7 @@
 from pyarrow import compute as pc
 
 from pyiceberg.expressions import (
+    AlwaysFalse,
     And,
     BooleanExpression,
     EqualTo,

From fded98427bbadf32de61f32ac36f853715e96091 Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 15:30:58 +0530
Subject: [PATCH 16/17] Update tests/table/test_upsert.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 tests/table/test_upsert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index 476acf4cf0..ffb233105a 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -26,7 +26,7 @@
 from pyiceberg.schema import Schema
 from pyiceberg.table import UpsertResult
 from pyiceberg.table.upsert_util import create_match_filter
-from pyiceberg.expressions import Reference, EqualTo
+from pyiceberg.expressions import And, Reference, EqualTo
 from pyiceberg.expressions.literals import LongLiteral
 from pyiceberg.types import IntegerType, NestedField, StringType
 from tests.catalog.test_base import InMemoryCatalog, Table

From 894810a2199da331b1493ad64b9e2093c2a1128d Mon Sep 17 00:00:00 2001
From: Om Kenge <88768848+omkenge@users.noreply.github.com>
Date: Fri, 21 Feb 2025 15:49:20 +0530
Subject: [PATCH 17/17] Update tests/table/test_upsert.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 tests/table/test_upsert.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index ffb233105a..c97015e650 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -23,11 +23,11 @@
 
 from pyiceberg.catalog import Catalog
 from pyiceberg.exceptions import NoSuchTableError
+from pyiceberg.expressions import And, EqualTo, Reference
+from pyiceberg.expressions.literals import LongLiteral
 from pyiceberg.schema import Schema
 from pyiceberg.table import UpsertResult
 from pyiceberg.table.upsert_util import create_match_filter
-from pyiceberg.expressions import And, Reference, EqualTo
-from pyiceberg.expressions.literals import LongLiteral
 from pyiceberg.types import IntegerType, NestedField, StringType
 from tests.catalog.test_base import InMemoryCatalog, Table