Skip to content

Commit 35cf34a

Browse files
committed
Adding promotion for UnknownType per V3+ spec
1 parent 4cb9041 commit 35cf34a

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

pyiceberg/schema.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,9 @@ def promote(file_type: IcebergType, read_type: IcebergType) -> IcebergType:
16171617
else:
16181618
raise ResolveError(f"Cannot promote {file_type} to {read_type}")
16191619

1620+
@promote.register(UnknownType)
1621+
def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType:
1622+
return read_type # Per V3 Spec, "Unknown" can be promoted to any type
16201623

16211624
@promote.register(IntegerType)
16221625
def _(file_type: IntegerType, read_type: IcebergType) -> IcebergType:

tests/io/test_pyarrow_visitor.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pyiceberg.expressions.literals import literal
3535
from pyiceberg.io.pyarrow import (
3636
UnsupportedPyArrowTypeException,
37+
_check_pyarrow_schema_compatible,
3738
_ConvertToArrowSchema,
3839
_ConvertToIceberg,
3940
_ConvertToIcebergWithoutIDs,
@@ -313,6 +314,28 @@ def test_pyarrow_dictionary_encoded_type_to_iceberg(value_type: pa.DataType, exp
313314
assert visit_pyarrow(pyarrow_dict, _ConvertToIceberg()) == expected_result
314315

315316

317+
def test_schema_check_null_column(table_schema_simple: Schema) -> None:
318+
pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple)
319+
new_field = pyarrow_schema.field(0).with_type(pa.null()) # Make the optional string field null for testing
320+
pyarrow_schema = pyarrow_schema.set(0, new_field)
321+
assert pyarrow_schema.field(0).type == pa.null()
322+
_check_pyarrow_schema_compatible(table_schema_simple, pyarrow_schema)
323+
324+
325+
def test_schema_conversion_null_column(table_schema_simple: Schema) -> None:
326+
pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple)
327+
new_field = pyarrow_schema.field(2).with_type(pa.null()) # Make the optional boolean field null for testing
328+
pyarrow_schema = pyarrow_schema.set(2, new_field)
329+
assert pyarrow_schema.field(2).type == pa.null()
330+
actual = str(pyarrow_to_schema(pyarrow_schema))
331+
expected = """table {
332+
1: foo: optional string
333+
2: bar: required int
334+
3: baz: optional unknown
335+
}"""
336+
assert actual == expected
337+
338+
316339
def test_round_schema_conversion_simple(table_schema_simple: Schema) -> None:
317340
actual = str(pyarrow_to_schema(schema_to_pyarrow(table_schema_simple)))
318341
expected = """table {

tests/test_schema.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
TimestamptzType,
5656
TimeType,
5757
UUIDType,
58+
UnknownType
5859
)
5960

6061
TEST_PRIMITIVE_TYPES = [
@@ -74,6 +75,7 @@
7475
FixedType(16),
7576
FixedType(20),
7677
UUIDType(),
78+
UnknownType()
7779
]
7880

7981

@@ -855,6 +857,8 @@ def should_promote(file_type: IcebergType, read_type: IcebergType) -> bool:
855857
return file_type.precision <= read_type.precision and file_type.scale == file_type.scale
856858
if isinstance(file_type, FixedType) and isinstance(read_type, UUIDType) and len(file_type) == 16:
857859
return True
860+
if isinstance(file_type, UnknownType):
861+
return True
858862
return False
859863

860864

0 commit comments

Comments
 (0)