From a12bf107593cb15aa46f94579fbedeb4794158df Mon Sep 17 00:00:00 2001 From: Matthias Queitsch Date: Sat, 5 Jul 2025 17:01:53 +0200 Subject: [PATCH 1/3] feat(schema conversion): add schema conversion from avro timestamp-millis --- pyiceberg/utils/schema_conversion.py | 1 + tests/utils/test_schema_conversion.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/pyiceberg/utils/schema_conversion.py b/pyiceberg/utils/schema_conversion.py index ec2fccd509..501e85a7ec 100644 --- a/pyiceberg/utils/schema_conversion.py +++ b/pyiceberg/utils/schema_conversion.py @@ -69,6 +69,7 @@ LOGICAL_FIELD_TYPE_MAPPING: Dict[Tuple[str, str], PrimitiveType] = { ("date", "int"): DateType(), ("time-micros", "long"): TimeType(), + ("timestamp-millis", "int"): TimestampType(), ("timestamp-micros", "long"): TimestampType(), ("uuid", "fixed"): UUIDType(), } diff --git a/tests/utils/test_schema_conversion.py b/tests/utils/test_schema_conversion.py index e60a89563f..e48592fbde 100644 --- a/tests/utils/test_schema_conversion.py +++ b/tests/utils/test_schema_conversion.py @@ -33,6 +33,7 @@ NestedField, StringType, StructType, + TimestampType, UnknownType, ) from pyiceberg.utils.schema_conversion import AvroSchemaConversion @@ -327,6 +328,18 @@ def test_convert_date_type() -> None: assert actual == DateType() +def test_convert_timestamp_millis_type() -> None: + avro_logical_type = {"type": "int", "logicalType": "timestamp-millis"} + actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) + assert actual == TimestampType() + + +def test_convert_timestamp_micros_type() -> None: + avro_logical_type = {"type": "int", "logicalType": "timestamp-micros"} + actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) + assert actual == TimestampType() + + def test_unknown_logical_type() -> None: """Test raising a ValueError when converting an unknown logical type as part of an Avro schema conversion""" avro_logical_type = {"type": "bytes", "logicalType": "date"} From 299fb30877234780e5dcc48d4d52d92c3e73a076 Mon Sep 17 00:00:00 2001 From: Matthias Queitsch Date: Fri, 11 Jul 2025 21:15:26 +0200 Subject: [PATCH 2/3] feat(schema_conversion): add schema conversion for uuid type --- pyiceberg/utils/schema_conversion.py | 1 + tests/utils/test_schema_conversion.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pyiceberg/utils/schema_conversion.py b/pyiceberg/utils/schema_conversion.py index 501e85a7ec..c73b3ada7d 100644 --- a/pyiceberg/utils/schema_conversion.py +++ b/pyiceberg/utils/schema_conversion.py @@ -72,6 +72,7 @@ ("timestamp-millis", "int"): TimestampType(), ("timestamp-micros", "long"): TimestampType(), ("uuid", "fixed"): UUIDType(), + ("uuid", "string"): UUIDType(), } AvroType = Union[str, Any] diff --git a/tests/utils/test_schema_conversion.py b/tests/utils/test_schema_conversion.py index e48592fbde..48c6857d43 100644 --- a/tests/utils/test_schema_conversion.py +++ b/tests/utils/test_schema_conversion.py @@ -34,6 +34,7 @@ StringType, StructType, TimestampType, + UUIDType, UnknownType, ) from pyiceberg.utils.schema_conversion import AvroSchemaConversion @@ -327,13 +328,21 @@ def test_convert_date_type() -> None: actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) assert actual == DateType() +def test_convert_uuid_str_type() -> None: + avro_logical_type = {"type": "string", "logicalType": "uuid"} + actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) + assert actual == UUIDType() + +def test_convert_uuid_fixed_type() -> None: + avro_logical_type = {"type": "fixed", "logicalType": "uuid"} + actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) + assert actual == UUIDType() def test_convert_timestamp_millis_type() -> None: avro_logical_type = {"type": "int", "logicalType": "timestamp-millis"} actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) assert actual == TimestampType() - def test_convert_timestamp_micros_type() -> None: avro_logical_type = {"type": "int", "logicalType": "timestamp-micros"} actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) From 0f1969d3aefadb54b8aaa5e880912281e4547e78 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 14 Jul 2025 19:16:06 -0700 Subject: [PATCH 3/3] make lint --- tests/utils/test_schema_conversion.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_schema_conversion.py b/tests/utils/test_schema_conversion.py index 48c6857d43..a5c45447ff 100644 --- a/tests/utils/test_schema_conversion.py +++ b/tests/utils/test_schema_conversion.py @@ -34,8 +34,8 @@ StringType, StructType, TimestampType, - UUIDType, UnknownType, + UUIDType, ) from pyiceberg.utils.schema_conversion import AvroSchemaConversion @@ -328,21 +328,25 @@ def test_convert_date_type() -> None: actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) assert actual == DateType() + def test_convert_uuid_str_type() -> None: avro_logical_type = {"type": "string", "logicalType": "uuid"} actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) assert actual == UUIDType() + def test_convert_uuid_fixed_type() -> None: avro_logical_type = {"type": "fixed", "logicalType": "uuid"} actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) assert actual == UUIDType() + def test_convert_timestamp_millis_type() -> None: avro_logical_type = {"type": "int", "logicalType": "timestamp-millis"} actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type) assert actual == TimestampType() + def test_convert_timestamp_micros_type() -> None: avro_logical_type = {"type": "int", "logicalType": "timestamp-micros"} actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type)