Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ def visit_uuid(self, _: UUIDType) -> pa.DataType:
return pa.uuid()

def visit_unknown(self, _: UnknownType) -> pa.DataType:
"""Type `UnknownType` can be promoted to any primitive type in V3+ tables per the Iceberg spec."""
return pa.null()

def visit_binary(self, _: BinaryType) -> pa.DataType:
Expand Down Expand Up @@ -1358,6 +1359,8 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
primitive = cast(pa.FixedSizeBinaryType, primitive)
return FixedType(primitive.byte_width)
elif pa.types.is_null(primitive):
# PyArrow null type (pa.null()) is converted to Iceberg UnknownType
# UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec
return UnknownType()
elif isinstance(primitive, pa.UuidType):
return UUIDType()
Expand Down
19 changes: 18 additions & 1 deletion pyiceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,15 @@ def _(file_type: FixedType, read_type: IcebergType) -> IcebergType:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")


@promote.register(UnknownType)
def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType:
# Per V3 Spec, "Unknown" can be promoted to any Primitive type
if isinstance(read_type, PrimitiveType):
return read_type
else:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")


def _check_schema_compatible(requested_schema: Schema, provided_schema: Schema) -> None:
"""
Check if the `provided_schema` is compatible with `requested_schema`.
Expand Down Expand Up @@ -1761,7 +1770,15 @@ def _is_field_compatible(self, lhs: NestedField) -> bool:
self.rich_table.add_row("✅", str(lhs), str(rhs))
return True
except ResolveError:
self.rich_table.add_row("❌", str(lhs), str(rhs))
# UnknownType can only be promoted to Primitive types
if isinstance(rhs.field_type, UnknownType):
if not isinstance(lhs.field_type, PrimitiveType):
error_msg = f"Null type (UnknownType) cannot be promoted to non-primitive type {lhs.field_type}. UnknownType can only be promoted to primitive types (string, int, boolean, etc.) in V3+ tables."
else:
error_msg = f"Null type (UnknownType) cannot be promoted to {lhs.field_type}. This may be due to table format version limitations (V1/V2 tables don't support UnknownType promotion)."
self.rich_table.add_row("❌", str(lhs), f"{str(rhs)} - {error_msg}")
else:
self.rich_table.add_row("❌", str(lhs), str(rhs))
return False

def schema(self, schema: Schema, struct_result: Callable[[], bool]) -> bool:
Expand Down
31 changes: 31 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
TimestampType,
TimestamptzType,
TimeType,
UnknownType,
UUIDType,
)

Expand Down Expand Up @@ -919,6 +920,36 @@ def test_promotion(file_type: IcebergType, read_type: IcebergType) -> None:
promote(file_type, read_type)


def test_unknown_type_promotion_to_primitive() -> None:
"""Test that UnknownType can be promoted to primitive types (V3+ behavior)"""
unknown_type = UnknownType()

assert promote(unknown_type, StringType()) == StringType()
assert promote(unknown_type, IntegerType()) == IntegerType()
assert promote(unknown_type, BooleanType()) == BooleanType()
assert promote(unknown_type, FloatType()) == FloatType()


def test_unknown_type_promotion_to_non_primitive_raises_resolve_error() -> None:
"""Test that UnknownType cannot be promoted to non-primitive types and raises ResolveError"""
unknown_type = UnknownType()

with pytest.raises(ResolveError) as exc_info:
promote(unknown_type, ListType(element_id=1, element_type=StringType(), element_required=False))

assert "Cannot promote unknown to list<string>" in str(exc_info.value)

with pytest.raises(ResolveError) as exc_info:
promote(unknown_type, MapType(key_id=1, key_type=StringType(), value_id=2, value_type=StringType(), value_required=False))

assert "Cannot promote unknown to map<string, string>" in str(exc_info.value)

with pytest.raises(ResolveError) as exc_info:
promote(unknown_type, StructType(NestedField(field_id=1, name="field", field_type=StringType(), required=False)))

assert "Cannot promote unknown to struct<1: field: optional string>" in str(exc_info.value)


@pytest.fixture()
def primitive_fields() -> List[NestedField]:
return [
Expand Down