Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyiceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,11 @@ def promote(file_type: IcebergType, read_type: IcebergType) -> IcebergType:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")


@promote.register(UnknownType)
def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType:
return read_type # Per V3 Spec, "Unknown" can be promoted to any type
Copy link
Contributor

@Fokko Fokko Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I read the spec carefully, I think it only allows for promoting to primitive types:

image

So, it cannot be promoted to a {List,Map,Struct}Type.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I interpreted this as promotions available for a primitive type rather than promotions to a primitive type. i.e. UnknownType is a primitive type and these are the types it can be promoted to. It should be easy enough to change, so I'm fine either way. But, to me it would make sense that if a {List,Map,Struct}Type field is nullable and a null PyArrow field is being evaluated for compatibility, that evaluation should succeed. What are your thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to extend this logic a bit:

Suggested change
return read_type # Per V3 Spec, "Unknown" can be promoted to any type
# Per V3 Spec, "Unknown" can be promoted to any Primitive type
if isinstance(read_type, PrimitiveType):
return file_type
else:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")



@promote.register(IntegerType)
def _(file_type: IntegerType, read_type: IcebergType) -> IcebergType:
if isinstance(read_type, LongType):
Expand Down
23 changes: 23 additions & 0 deletions tests/io/test_pyarrow_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pyiceberg.expressions.literals import literal
from pyiceberg.io.pyarrow import (
UnsupportedPyArrowTypeException,
_check_pyarrow_schema_compatible,
_ConvertToArrowSchema,
_ConvertToIceberg,
_ConvertToIcebergWithoutIDs,
Expand Down Expand Up @@ -313,6 +314,28 @@ def test_pyarrow_dictionary_encoded_type_to_iceberg(value_type: pa.DataType, exp
assert visit_pyarrow(pyarrow_dict, _ConvertToIceberg()) == expected_result


def test_schema_check_null_column(table_schema_simple: Schema) -> None:
pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple)
new_field = pyarrow_schema.field(0).with_type(pa.null()) # Make the optional string field null for testing
pyarrow_schema = pyarrow_schema.set(0, new_field)
assert pyarrow_schema.field(0).type == pa.null()
_check_pyarrow_schema_compatible(table_schema_simple, pyarrow_schema)


def test_schema_conversion_null_column(table_schema_simple: Schema) -> None:
pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple)
new_field = pyarrow_schema.field(2).with_type(pa.null()) # Make the optional boolean field null for testing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about creating a new schema with just a field with a NestedType instead? I don't think we want to re-assign fields as that makes the test harder te read.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure I can make a new schema. I think I was just trying to get it working and forgot to clean this up 😀

pyarrow_schema = pyarrow_schema.set(2, new_field)
assert pyarrow_schema.field(2).type == pa.null()
actual = str(pyarrow_to_schema(pyarrow_schema))
expected = """table {
1: foo: optional string
2: bar: required int
3: baz: optional unknown
}"""
assert actual == expected


def test_round_schema_conversion_simple(table_schema_simple: Schema) -> None:
actual = str(pyarrow_to_schema(schema_to_pyarrow(table_schema_simple)))
expected = """table {
Expand Down
4 changes: 4 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
TimestampType,
TimestamptzType,
TimeType,
UnknownType,
UUIDType,
)

Expand All @@ -74,6 +75,7 @@
FixedType(16),
FixedType(20),
UUIDType(),
UnknownType(),
]


Expand Down Expand Up @@ -855,6 +857,8 @@ def should_promote(file_type: IcebergType, read_type: IcebergType) -> bool:
return file_type.precision <= read_type.precision and file_type.scale == file_type.scale
if isinstance(file_type, FixedType) and isinstance(read_type, UUIDType) and len(file_type) == 16:
return True
if isinstance(file_type, UnknownType):
return True
return False


Expand Down