Skip to content

Commit 1ce4889

Browse files
committed
project value in _ColumnNameTranslator
1 parent 10afbb8 commit 1ce4889

File tree

2 files changed

+13
-15
lines changed

2 files changed

+13
-15
lines changed

pyiceberg/expressions/visitors.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
Dict,
2424
Generic,
2525
List,
26+
Optional,
2627
Set,
2728
SupportsFloat,
2829
Tuple,
@@ -861,6 +862,7 @@ class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]):
861862
Args:
862863
file_schema (Schema): The schema of the file.
863864
case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.
865+
projected_fields (Dict[str, Any]): Partition field values for missing fields from projection.
864866
865867
Raises:
866868
TypeError: In the case of an UnboundPredicate.
@@ -869,10 +871,12 @@ class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]):
869871

870872
file_schema: Schema
871873
case_sensitive: bool
874+
projected_fields: Dict[str, Any]
872875

873-
def __init__(self, file_schema: Schema, case_sensitive: bool) -> None:
876+
def __init__(self, file_schema: Schema, case_sensitive: bool, projected_fields: Optional[Dict[str, Any]] = None) -> None:
874877
self.file_schema = file_schema
875878
self.case_sensitive = case_sensitive
879+
self.projected_fields = projected_fields or {}
876880

877881
def visit_true(self) -> BooleanExpression:
878882
return AlwaysTrue()
@@ -912,7 +916,9 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
912916

913917
return (
914918
AlwaysTrue()
915-
if expression_evaluator(Schema(field), pred, case_sensitive=self.case_sensitive)(Record(field.initial_default))
919+
if expression_evaluator(Schema(field), pred, case_sensitive=self.case_sensitive)(
920+
Record(field.initial_default or self.projected_fields.get(field.name, None))
921+
)
916922
else AlwaysFalse()
917923
)
918924

@@ -926,8 +932,10 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
926932
raise ValueError(f"Unsupported predicate: {predicate}")
927933

928934

929-
def translate_column_names(expr: BooleanExpression, file_schema: Schema, case_sensitive: bool) -> BooleanExpression:
930-
return visit(expr, _ColumnNameTranslator(file_schema, case_sensitive))
935+
def translate_column_names(
936+
expr: BooleanExpression, file_schema: Schema, case_sensitive: bool, projected_fields: Optional[Dict[str, Any]] = None
937+
) -> BooleanExpression:
938+
return visit(expr, _ColumnNameTranslator(file_schema, case_sensitive, projected_fields))
931939

932940

933941
class _ProjectedColumnsEvaluator(BooleanExpressionVisitor[BooleanExpression]):

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
from pyiceberg.expressions.visitors import (
7979
BoundBooleanExpressionVisitor,
8080
bind,
81-
evaluate_projected_columns,
8281
extract_field_ids,
8382
translate_column_names,
8483
)
@@ -1469,17 +1468,8 @@ def _task_to_record_batches(
14691468

14701469
pyarrow_filter = None
14711470
if bound_row_filter is not AlwaysTrue():
1472-
evaluated_projected_columns_filter = evaluate_projected_columns(
1473-
bound_row_filter,
1474-
file_schema,
1475-
projected_schema,
1476-
case_sensitive=case_sensitive,
1477-
projected_missing_fields=projected_missing_fields,
1478-
)
14791471
translated_row_filter = translate_column_names(
1480-
evaluated_projected_columns_filter,
1481-
file_schema,
1482-
case_sensitive=case_sensitive,
1472+
bound_row_filter, file_schema, case_sensitive=case_sensitive, projected_fields=projected_missing_fields
14831473
)
14841474
bound_file_filter = bind(file_schema, translated_row_filter, case_sensitive=case_sensitive)
14851475
pyarrow_filter = expression_to_pyarrow(bound_file_filter)

0 commit comments

Comments
 (0)