Skip to content

Commit c6b0f5f

Browse files
committed
add case for column projection
1 parent bc89cfa commit c6b0f5f

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

pyiceberg/expressions/visitors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,12 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
897897
file_column_name = self.file_schema.find_column_name(field.field_id)
898898

899899
if file_column_name is None:
900+
# In the case of column projection, the field might not be present in the file schema
901+
# If the field has no initial_default, return AlwaysTrue to include all rows
902+
# for further evaluation
903+
if field.initial_default is None:
904+
return AlwaysTrue()
905+
900906
# In the case of schema evolution, the column might not be present
901907
# we can use the default value as a constant and evaluate it against
902908
# the predicate

tests/expressions/test_visitors.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,3 +1799,29 @@ def test_translate_column_names_always_true_false() -> None:
17991799
# Test AlwaysFalse
18001800
translated_false = translate_column_names(AlwaysFalse(), file_schema, case_sensitive=True)
18011801
assert translated_false == AlwaysFalse()
1802+
1803+
1804+
def test_translate_column_names_column_projection_missing_field_no_initial_default() -> None:
1805+
"""Test translate_column_names for column projection when field is missing from file schema and has no initial_default."""
1806+
# Original schema with a field that has no initial_default (defaults to None)
1807+
original_schema = Schema(
1808+
NestedField(field_id=1, name="existing_col", field_type=StringType(), required=False),
1809+
NestedField(field_id=2, name="missing_col", field_type=IntegerType(), required=False), # No initial_default specified
1810+
schema_id=1,
1811+
)
1812+
1813+
# Create bound expression for the missing column
1814+
unbound_expr = EqualTo("missing_col", 42)
1815+
bound_expr = visit(unbound_expr, visitor=BindVisitor(schema=original_schema, case_sensitive=True))
1816+
1817+
# File schema only has the existing column (field_id=1), missing field_id=2
1818+
file_schema = Schema(
1819+
NestedField(field_id=1, name="existing_col", field_type=StringType(), required=False),
1820+
schema_id=1,
1821+
)
1822+
1823+
# Translate column names
1824+
translated_expr = translate_column_names(bound_expr, file_schema, case_sensitive=True)
1825+
1826+
# Should evaluate to AlwaysTrue when field has no initial_default, allowing for further evaluation
1827+
assert translated_expr == AlwaysTrue()

0 commit comments

Comments
 (0)