From 87f2f2dc8137ce6795d90e3fb37a36760dd4125c Mon Sep 17 00:00:00 2001 From: sunxiaojian Date: Fri, 28 Mar 2025 12:11:55 +0800 Subject: [PATCH 1/3] fixed --- pyiceberg/types.py | 8 +++++++- tests/test_types.py | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pyiceberg/types.py b/pyiceberg/types.py index 8e83b011bf..0ea764e4f6 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -324,7 +324,7 @@ def __init__( self, field_id: Optional[int] = None, name: Optional[str] = None, - field_type: Optional[IcebergType] = None, + field_type: Optional[IcebergType | str] = None, required: bool = False, doc: Optional[str] = None, initial_default: Optional[Any] = None, @@ -340,6 +340,12 @@ def __init__( data["doc"] = doc data["initial-default"] = data["initial-default"] if "initial-default" in data else initial_default data["write-default"] = data["write-default"] if "write-default" in data else write_default + if isinstance(data["type"], str): + try: + data["type"] = IcebergType.handle_primitive_type(data["type"], None) + except ValueError as e: + raise ValueError(f"Unsupported field type: {data['type']}.") from e + super().__init__(**data) def __str__(self) -> str: diff --git a/tests/test_types.py b/tests/test_types.py index b19df17e08..6caaf80d34 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -231,6 +231,22 @@ def test_nested_field() -> None: assert "validation errors for NestedField" in str(exc_info.value) +def test_nested_field_type_as_str_unsupported() -> None: + with pytest.raises(ValueError) as exc_info: + _ = (NestedField(1, "field", "list", required=True),) + assert "Unsupported field type: list" in str(exc_info.value) + + +def test_nested_field_type_as_str_struct() -> None: + field_var = NestedField( + 1, + "field", + "string", + required=True, + ) + assert isinstance(field_var.field_type, StringType) + + @pytest.mark.parametrize("input_index,input_type", non_parameterized_types) @pytest.mark.parametrize("check_index,check_type", non_parameterized_types) def test_non_parameterized_type_equality( From fe5719f8d13b2e1ee16510d26f5e77562e640bd3 Mon Sep 17 00:00:00 2001 From: sunxiaojian Date: Sun, 30 Mar 2025 01:00:24 +0800 Subject: [PATCH 2/3] fixed --- pyiceberg/types.py | 25 +++++++++++++++++++------ tests/test_types.py | 32 ++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/pyiceberg/types.py b/pyiceberg/types.py index 0ea764e4f6..a33e56581a 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -47,6 +47,7 @@ Field, PrivateAttr, SerializeAsAny, + field_validator, model_serializer, model_validator, ) @@ -310,6 +311,14 @@ class NestedField(IcebergType): ... doc="Just a long" ... )) '2: bar: required long (Just a long)' + >>> str(NestedField( + ... field_id=3, + ... name='baz', + ... field_type="string", + ... required=True, + ... doc="A string field" + ... )) + '3: baz: required string (A string field)' """ field_id: int = Field(alias="id") @@ -320,6 +329,16 @@ class NestedField(IcebergType): initial_default: Optional[Any] = Field(alias="initial-default", default=None, repr=False) write_default: Optional[L] = Field(alias="write-default", default=None, repr=False) # type: ignore + @field_validator("field_type", mode="before") + def convert_field_type(cls, v: Any) -> IcebergType: + """Convert string values into IcebergType instances.""" + if isinstance(v, str): + try: + return IcebergType.handle_primitive_type(v, None) + except ValueError as e: + raise ValueError(f"Unsupported field type: '{v}'") from e + return v + def __init__( self, field_id: Optional[int] = None, @@ -340,12 +359,6 @@ def __init__( data["doc"] = doc data["initial-default"] = data["initial-default"] if "initial-default" in data else initial_default data["write-default"] = data["write-default"] if "write-default" in data else write_default - if isinstance(data["type"], str): - try: - data["type"] = IcebergType.handle_primitive_type(data["type"], None) - except ValueError as e: - raise ValueError(f"Unsupported field type: {data['type']}.") from e - super().__init__(**data) def __str__(self) -> str: diff --git a/tests/test_types.py b/tests/test_types.py index 6caaf80d34..9f84299da8 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -62,6 +62,21 @@ (12, BinaryType), ] +primitive_types = { + "boolean": BooleanType, + "int": IntegerType, + "long": LongType, + "float": FloatType, + "double": DoubleType, + "date": DateType, + "time": TimeType, + "timestamp": TimestampType, + "timestamptz": TimestamptzType, + "string": StringType, + "uuid": UUIDType, + "binary": BinaryType, +} + @pytest.mark.parametrize("input_index, input_type", non_parameterized_types) def test_repr_primitive_types(input_index: int, input_type: Type[PrimitiveType]) -> None: @@ -232,19 +247,24 @@ def test_nested_field() -> None: def test_nested_field_type_as_str_unsupported() -> None: - with pytest.raises(ValueError) as exc_info: - _ = (NestedField(1, "field", "list", required=True),) - assert "Unsupported field type: list" in str(exc_info.value) + unsupported_types = ["list", "map", "struct"] + for type_str in unsupported_types: + with pytest.raises(ValueError) as exc_info: + _ = NestedField(1, "field", type_str, required=True) + assert f"Unsupported field type: '{type_str}'" in str(exc_info.value) -def test_nested_field_type_as_str_struct() -> None: +@pytest.mark.parametrize("type_str, type_class", primitive_types.items()) +def test_nested_field_type_as_str(type_str: str, type_class: type) -> None: field_var = NestedField( 1, "field", - "string", + type_str, required=True, ) - assert isinstance(field_var.field_type, StringType) + assert isinstance( + field_var.field_type, type_class + ), f"Expected {type_class.__name__}, got {field_var.field_type.__class__.__name__}" @pytest.mark.parametrize("input_index,input_type", non_parameterized_types) From a73dda5f5ffa752742ec1387bdb162a471327ee1 Mon Sep 17 00:00:00 2001 From: sunxiaojian Date: Sun, 30 Mar 2025 23:05:59 +0800 Subject: [PATCH 3/3] fixed --- tests/test_types.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/tests/test_types.py b/tests/test_types.py index 9f84299da8..e14ec9dd6c 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -246,7 +246,7 @@ def test_nested_field() -> None: assert "validation errors for NestedField" in str(exc_info.value) -def test_nested_field_type_as_str_unsupported() -> None: +def test_nested_field_complex_type_as_str_unsupported() -> None: unsupported_types = ["list", "map", "struct"] for type_str in unsupported_types: with pytest.raises(ValueError) as exc_info: @@ -254,17 +254,22 @@ def test_nested_field_type_as_str_unsupported() -> None: assert f"Unsupported field type: '{type_str}'" in str(exc_info.value) -@pytest.mark.parametrize("type_str, type_class", primitive_types.items()) -def test_nested_field_type_as_str(type_str: str, type_class: type) -> None: - field_var = NestedField( - 1, - "field", - type_str, - required=True, - ) - assert isinstance( - field_var.field_type, type_class - ), f"Expected {type_class.__name__}, got {field_var.field_type.__class__.__name__}" +def test_nested_field_primitive_type_as_str() -> None: + for type_str, type_class in primitive_types.items(): + field_var = NestedField( + 1, + "field", + type_str, + required=True, + ) + assert isinstance( + field_var.field_type, type_class + ), f"Expected {type_class.__name__}, got {field_var.field_type.__class__.__name__}" + + # Test that passing 'bool' raises a ValueError, as it should be 'boolean' + with pytest.raises(ValueError) as exc_info: + _ = NestedField(1, "field", "bool", required=True) + assert "Unsupported field type: 'bool'" in str(exc_info.value) @pytest.mark.parametrize("input_index,input_type", non_parameterized_types)