Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pyiceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1783,7 +1783,11 @@ def struct(self, struct: StructType, field_results: builtins.list[Callable[[], b
return all(results)

def field(self, field: NestedField, field_result: Callable[[], bool]) -> bool:
return self._is_field_compatible(field) and field_result()
# Skip child validation for missing optional fields (#2797)
is_compatible = self._is_field_compatible(field)
if field.field_id not in self.provided_schema._lazy_id_to_field:
return is_compatible
return is_compatible and field_result()

def list(self, list_type: ListType, element_result: Callable[[], bool]) -> bool:
return self._is_field_compatible(list_type.element_field) and element_result()
Expand Down
105 changes: 105 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pyiceberg.schema import (
Accessor,
Schema,
_check_schema_compatible,
build_position_accessors,
index_by_id,
index_by_name,
Expand Down Expand Up @@ -1687,3 +1688,107 @@ def test_arrow_schema() -> None:
)

assert base_schema.as_arrow() == expected_schema


def test_check_schema_compatible_optional_map_field_missing() -> None:
"""Test that optional map field missing from provided schema is compatible (issue #2684)."""
requested_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(
field_id=2,
name="data",
field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()),
required=False, # Optional map field
),
)
# Provided schema is missing the optional map field
provided_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
)
# Should not raise - optional field can be missing
_check_schema_compatible(requested_schema, provided_schema)


def test_check_schema_compatible_required_map_field_missing() -> None:
"""Test that required map field missing from provided schema raises error."""
requested_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(
field_id=2,
name="data",
field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()),
required=True, # Required map field
),
)
# Provided schema is missing the required map field
provided_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
)
# Should raise - required field cannot be missing
with pytest.raises(ValueError, match="Mismatch in fields"):
_check_schema_compatible(requested_schema, provided_schema)


def test_check_schema_compatible_optional_list_field_missing() -> None:
"""Test that optional list field missing from provided schema is compatible."""
requested_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(
field_id=2,
name="items",
field_type=ListType(element_id=3, element_type=StringType(), element_required=True),
required=False, # Optional list field
),
)
# Provided schema is missing the optional list field
provided_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
)
# Should not raise - optional field can be missing
_check_schema_compatible(requested_schema, provided_schema)


def test_check_schema_compatible_optional_struct_field_missing() -> None:
"""Test that optional struct field missing from provided schema is compatible."""
requested_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(
field_id=2,
name="details",
field_type=StructType(
NestedField(field_id=3, name="name", field_type=StringType(), required=True),
NestedField(field_id=4, name="count", field_type=IntegerType(), required=True),
),
required=False, # Optional struct field
),
)
# Provided schema is missing the optional struct field
provided_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
)
# Should not raise - optional field can be missing
_check_schema_compatible(requested_schema, provided_schema)


def test_check_schema_compatible_optional_map_field_present() -> None:
"""Test that optional map field present in provided schema is compatible."""
requested_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(
field_id=2,
name="data",
field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()),
required=False,
),
)
provided_schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(
field_id=2,
name="data",
field_type=MapType(key_id=3, key_type=StringType(), value_id=4, value_type=StringType()),
required=False,
),
)
# Should not raise - schemas match
_check_schema_compatible(requested_schema, provided_schema)