Skip to content

Commit bc8d5c9

Browse files
committed
fix logic
1 parent f9b53e0 commit bc8d5c9

File tree

2 files changed

+69
-3
lines changed

2 files changed

+69
-3
lines changed

pyiceberg/expressions/visitors.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -915,11 +915,15 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
915915
else:
916916
raise ValueError(f"Unsupported predicate: {predicate}")
917917

918+
# Evaluate column projection first if it exists
919+
if projected_field_value := self.projected_field_values.get(field.name):
920+
if expression_evaluator(Schema(field), pred, case_sensitive=self.case_sensitive)(Record(projected_field_value)):
921+
return AlwaysTrue()
922+
923+
# Evaluate initial_default value
918924
return (
919925
AlwaysTrue()
920-
if expression_evaluator(Schema(field), pred, case_sensitive=self.case_sensitive)(
921-
Record(self.projected_field_values.get(field.name, None) or field.initial_default)
922-
)
926+
if expression_evaluator(Schema(field), pred, case_sensitive=self.case_sensitive)(Record(field.initial_default))
923927
else AlwaysFalse()
924928
)
925929

tests/expressions/test_visitors.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,3 +1842,65 @@ def test_translate_column_names_missing_column_with_projected_field_mismatch() -
18421842

18431843
# Should evaluate to AlwaysFalse since projected field value does not match the expression literal
18441844
assert translated_expr == AlwaysFalse()
1845+
1846+
1847+
def test_translate_column_names_missing_column_projected_field_fallbacks_to_initial_default() -> None:
1848+
"""Test translate_column_names when projected field value doesn't match but initial_default does."""
1849+
# Original schema with a field that has an initial_default
1850+
original_schema = Schema(
1851+
NestedField(field_id=1, name="existing_col", field_type=StringType(), required=False),
1852+
NestedField(field_id=2, name="missing_col", field_type=IntegerType(), required=False, initial_default=42),
1853+
schema_id=1,
1854+
)
1855+
1856+
# Create bound expression for the missing column that would match initial_default
1857+
unbound_expr = EqualTo("missing_col", 42)
1858+
bound_expr = visit(unbound_expr, visitor=BindVisitor(schema=original_schema, case_sensitive=True))
1859+
1860+
# File schema only has the existing column (field_id=1), missing field_id=2
1861+
file_schema = Schema(
1862+
NestedField(field_id=1, name="existing_col", field_type=StringType(), required=False),
1863+
schema_id=1,
1864+
)
1865+
1866+
# Projected field value that differs from both the expression literal and initial_default
1867+
projected_field_values = {"missing_col_1": 10} # This doesn't match expression literal (42)
1868+
1869+
# Translate column names
1870+
translated_expr = translate_column_names(
1871+
bound_expr, file_schema, case_sensitive=True, projected_field_values=projected_field_values
1872+
)
1873+
1874+
# Should evaluate to AlwaysTrue since projected field value doesn't match but initial_default does
1875+
assert translated_expr == AlwaysTrue()
1876+
1877+
1878+
def test_translate_column_names_missing_column_projected_field_matches_initial_default_mismatch() -> None:
1879+
"""Test translate_column_names when both projected field value and initial_default doesn't match."""
1880+
# Original schema with a field that has an initial_default that doesn't match the expression
1881+
original_schema = Schema(
1882+
NestedField(field_id=1, name="existing_col", field_type=StringType(), required=False),
1883+
NestedField(field_id=2, name="missing_col", field_type=IntegerType(), required=False, initial_default=10),
1884+
schema_id=1,
1885+
)
1886+
1887+
# Create bound expression for the missing column
1888+
unbound_expr = EqualTo("missing_col", 42)
1889+
bound_expr = visit(unbound_expr, visitor=BindVisitor(schema=original_schema, case_sensitive=True))
1890+
1891+
# File schema only has the existing column (field_id=1), missing field_id=2
1892+
file_schema = Schema(
1893+
NestedField(field_id=1, name="existing_col", field_type=StringType(), required=False),
1894+
schema_id=1,
1895+
)
1896+
1897+
# Projected field value that matches the expression literal
1898+
projected_field_values = {"missing_col_1": 10} # This doesn't match expression literal (42)
1899+
1900+
# Translate column names
1901+
translated_expr = translate_column_names(
1902+
bound_expr, file_schema, case_sensitive=True, projected_field_values=projected_field_values
1903+
)
1904+
1905+
# Should evaluate to AlwaysFalse since both projected field value and initial_default does not match
1906+
assert translated_expr == AlwaysFalse()

0 commit comments

Comments
 (0)