@@ -861,6 +861,7 @@ class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]):
861861 Args:
862862 file_schema (Schema): The schema of the file.
863863 case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.
864+ projected_field_values (Dict[str, Any]): Values for projected fields not present in the data file.
864865
865866 Raises:
866867 TypeError: In the case of an UnboundPredicate.
@@ -869,10 +870,12 @@ class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]):
869870
870871 file_schema : Schema
871872 case_sensitive : bool
873+ projected_field_values : Dict [str , Any ]
872874
873- def __init__ (self , file_schema : Schema , case_sensitive : bool ) -> None :
875+ def __init__ (self , file_schema : Schema , case_sensitive : bool , projected_field_values : Dict [ str , Any ] = EMPTY_DICT ) -> None :
874876 self .file_schema = file_schema
875877 self .case_sensitive = case_sensitive
878+ self .projected_field_values = projected_field_values or {}
876879
877880 def visit_true (self ) -> BooleanExpression :
878881 return AlwaysTrue ()
@@ -897,9 +900,8 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
897900 file_column_name = self .file_schema .find_column_name (field .field_id )
898901
899902 if file_column_name is None :
900- # In the case of schema evolution, the column might not be present
901- # we can use the default value as a constant and evaluate it against
902- # the predicate
903+ # In the case of schema evolution or column projection, the field might not be present in the file schema.
904+ # we can use the projected value or the field's default value as a constant and evaluate it against the predicate
903905 pred : BooleanExpression
904906 if isinstance (predicate , BoundUnaryPredicate ):
905907 pred = predicate .as_unbound (field .name )
@@ -910,6 +912,14 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
910912 else :
911913 raise ValueError (f"Unsupported predicate: { predicate } " )
912914
915+ # In the order described by the "Column Projection" section of the Iceberg spec:
916+ # https://iceberg.apache.org/spec/#column-projection
917+ # Evaluate column projection first if it exists
918+ if projected_field_value := self .projected_field_values .get (field .name ):
919+ if expression_evaluator (Schema (field ), pred , case_sensitive = self .case_sensitive )(Record (projected_field_value )):
920+ return AlwaysTrue ()
921+
922+ # Evaluate initial_default value
913923 return (
914924 AlwaysTrue ()
915925 if expression_evaluator (Schema (field ), pred , case_sensitive = self .case_sensitive )(Record (field .initial_default ))
@@ -926,8 +936,10 @@ def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpressi
926936 raise ValueError (f"Unsupported predicate: { predicate } " )
927937
928938
929- def translate_column_names (expr : BooleanExpression , file_schema : Schema , case_sensitive : bool ) -> BooleanExpression :
930- return visit (expr , _ColumnNameTranslator (file_schema , case_sensitive ))
939+ def translate_column_names (
940+ expr : BooleanExpression , file_schema : Schema , case_sensitive : bool , projected_field_values : Dict [str , Any ] = EMPTY_DICT
941+ ) -> BooleanExpression :
942+ return visit (expr , _ColumnNameTranslator (file_schema , case_sensitive , projected_field_values ))
931943
932944
933945class _ExpressionFieldIDs (BooleanExpressionVisitor [Set [int ]]):
0 commit comments