diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index 6333ace6e2..0da9677b50 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -1618,6 +1618,11 @@ def promote(file_type: IcebergType, read_type: IcebergType) -> IcebergType: raise ResolveError(f"Cannot promote {file_type} to {read_type}") +@promote.register(UnknownType) +def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType: + return read_type # Per V3 Spec, "Unknown" can be promoted to any type + + @promote.register(IntegerType) def _(file_type: IntegerType, read_type: IcebergType) -> IcebergType: if isinstance(read_type, LongType): diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index 9d5772d01c..0611c1ef21 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -34,6 +34,7 @@ from pyiceberg.expressions.literals import literal from pyiceberg.io.pyarrow import ( UnsupportedPyArrowTypeException, + _check_pyarrow_schema_compatible, _ConvertToArrowSchema, _ConvertToIceberg, _ConvertToIcebergWithoutIDs, @@ -313,6 +314,28 @@ def test_pyarrow_dictionary_encoded_type_to_iceberg(value_type: pa.DataType, exp assert visit_pyarrow(pyarrow_dict, _ConvertToIceberg()) == expected_result +def test_schema_check_null_column(table_schema_simple: Schema) -> None: + pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple) + new_field = pyarrow_schema.field(0).with_type(pa.null()) # Make the optional string field null for testing + pyarrow_schema = pyarrow_schema.set(0, new_field) + assert pyarrow_schema.field(0).type == pa.null() + _check_pyarrow_schema_compatible(table_schema_simple, pyarrow_schema) + + +def test_schema_conversion_null_column(table_schema_simple: Schema) -> None: + pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple) + new_field = pyarrow_schema.field(2).with_type(pa.null()) # Make the optional boolean field null for testing + pyarrow_schema = pyarrow_schema.set(2, new_field) + assert pyarrow_schema.field(2).type == pa.null() + actual = str(pyarrow_to_schema(pyarrow_schema)) + expected = """table { + 1: foo: optional string + 2: bar: required int + 3: baz: optional unknown +}""" + assert actual == expected + + def test_round_schema_conversion_simple(table_schema_simple: Schema) -> None: actual = str(pyarrow_to_schema(schema_to_pyarrow(table_schema_simple))) expected = """table { diff --git a/tests/test_schema.py b/tests/test_schema.py index 3ca74c4027..248ae9c97a 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -54,6 +54,7 @@ TimestampType, TimestamptzType, TimeType, + UnknownType, UUIDType, ) @@ -74,6 +75,7 @@ FixedType(16), FixedType(20), UUIDType(), + UnknownType(), ] @@ -855,6 +857,8 @@ def should_promote(file_type: IcebergType, read_type: IcebergType) -> bool: return file_type.precision <= read_type.precision and file_type.scale == file_type.scale if isinstance(file_type, FixedType) and isinstance(read_type, UUIDType) and len(file_type) == 16: return True + if isinstance(file_type, UnknownType): + return True return False