From 7d1c4619b1e9d47d0d9a98a7a15184ccdcf045f6 Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Fri, 22 Aug 2025 15:59:56 -0400 Subject: [PATCH 1/8] Add serializer for AssertRefSnapshotId allowing null json value --- pyiceberg/table/update/__init__.py | 23 +++++++++++++---------- tests/test_serializers.py | 14 +++++++++++++- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 2f0e6c13d2..4748314d7e 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -21,9 +21,9 @@ from abc import ABC, abstractmethod from datetime import datetime from functools import singledispatch -from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Set, Tuple, TypeVar, Union, cast +from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Tuple, TypeVar, Union, cast -from pydantic import Field, field_validator, model_validator +from pydantic import Field, field_serializer, field_validator, model_validator from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec @@ -725,7 +725,17 @@ class AssertRefSnapshotId(ValidatableTableRequirement): type: Literal["assert-ref-snapshot-id"] = Field(default="assert-ref-snapshot-id") ref: str = Field(...) - snapshot_id: Optional[int] = Field(default=None, alias="snapshot-id") + snapshot_id: int = Field(default=-1, alias="snapshot-id") + + # serialize -1 to null when serializing to json + # TODO: make more generic Field so this can be used on other models that need + # an explicit null. + @field_serializer("snapshot_id", when_used="json") + def snapshot_id_can_be_null(self, snapshot_id: int) -> Optional[int]: + if snapshot_id == -1: + return None + else: + return snapshot_id def validate(self, base_metadata: Optional[TableMetadata]) -> None: if base_metadata is None: @@ -745,13 +755,6 @@ def validate(self, base_metadata: Optional[TableMetadata]) -> None: elif self.snapshot_id is not None: raise CommitFailedException(f"Requirement failed: branch or tag {self.ref} is missing, expected {self.snapshot_id}") - # override the override method, allowing None to serialize to `null` instead of being omitted. - def model_dump_json( - self, exclude_none: bool = False, exclude: Optional[Set[str]] = None, by_alias: bool = True, **kwargs: Any - ) -> str: - # `snapshot-id` is required in json response, even if null - return super().model_dump_json(exclude_none=False) - class AssertLastAssignedFieldId(ValidatableTableRequirement): """The table's last assigned column id must match the requirement's `last-assigned-field-id`.""" diff --git a/tests/test_serializers.py b/tests/test_serializers.py index ad40ea08e0..34dcf1b352 100644 --- a/tests/test_serializers.py +++ b/tests/test_serializers.py @@ -18,7 +18,7 @@ import json import os import uuid -from typing import Any, Dict +from typing import Any, Dict, Tuple import pytest from pytest_mock import MockFixture @@ -26,6 +26,8 @@ from pyiceberg.serializers import ToOutputFile from pyiceberg.table import StaticTable from pyiceberg.table.metadata import TableMetadataV1 +from pyiceberg.table.update import AssertRefSnapshotId, TableRequirement +from pyiceberg.typedef import IcebergBaseModel def test_legacy_current_snapshot_id( @@ -48,3 +50,13 @@ def test_legacy_current_snapshot_id( backwards_compatible_static_table = StaticTable.from_metadata(metadata_location) assert backwards_compatible_static_table.metadata.current_snapshot_id is None assert backwards_compatible_static_table.metadata == static_table.metadata + + +def test_null_serializer_field() -> None: + class ExampleRequest(IcebergBaseModel): + requirements: Tuple[TableRequirement, ...] + + request = ExampleRequest(requirements=(AssertRefSnapshotId(ref="main"),)) + dumped_json = request.model_dump_json() + expected_json = """{"type":"assert-ref-snapshot-id","ref":"main","snapshot-id":null}""" + assert expected_json in dumped_json From fcdd773b2d8912cc5591b2245c27d926c98e1e5b Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Fri, 22 Aug 2025 16:50:06 -0400 Subject: [PATCH 2/8] use model_serializer instead --- pyiceberg/table/update/__init__.py | 22 ++++++++++------------ tests/test_serializers.py | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 4748314d7e..dc8f93f98a 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -23,7 +23,7 @@ from functools import singledispatch from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Tuple, TypeVar, Union, cast -from pydantic import Field, field_serializer, field_validator, model_validator +from pydantic import Field, field_validator, model_validator, model_serializer from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec @@ -725,17 +725,15 @@ class AssertRefSnapshotId(ValidatableTableRequirement): type: Literal["assert-ref-snapshot-id"] = Field(default="assert-ref-snapshot-id") ref: str = Field(...) - snapshot_id: int = Field(default=-1, alias="snapshot-id") - - # serialize -1 to null when serializing to json - # TODO: make more generic Field so this can be used on other models that need - # an explicit null. - @field_serializer("snapshot_id", when_used="json") - def snapshot_id_can_be_null(self, snapshot_id: int) -> Optional[int]: - if snapshot_id == -1: - return None - else: - return snapshot_id + snapshot_id: Optional[int] = Field(default=None, alias="snapshot-id") + + @model_serializer + def ser_model(self) -> dict[str, Any]: + return { + "type": self.type, + "ref": self.ref, + "snapshot_id": self.snapshot_id, + } def validate(self, base_metadata: Optional[TableMetadata]) -> None: if base_metadata is None: diff --git a/tests/test_serializers.py b/tests/test_serializers.py index 34dcf1b352..3f2bd73e48 100644 --- a/tests/test_serializers.py +++ b/tests/test_serializers.py @@ -56,7 +56,7 @@ def test_null_serializer_field() -> None: class ExampleRequest(IcebergBaseModel): requirements: Tuple[TableRequirement, ...] - request = ExampleRequest(requirements=(AssertRefSnapshotId(ref="main"),)) + request = ExampleRequest(requirements=(AssertRefSnapshotId(ref="main", snapshot_id=None),)) dumped_json = request.model_dump_json() expected_json = """{"type":"assert-ref-snapshot-id","ref":"main","snapshot-id":null}""" assert expected_json in dumped_json From 14000dc5349f4fd3353f0205a2e8faea4854baed Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Fri, 22 Aug 2025 17:32:52 -0400 Subject: [PATCH 3/8] typo --- pyiceberg/table/update/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index dc8f93f98a..d3060952e3 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -732,7 +732,7 @@ def ser_model(self) -> dict[str, Any]: return { "type": self.type, "ref": self.ref, - "snapshot_id": self.snapshot_id, + "snapshot-id": self.snapshot_id, } def validate(self, base_metadata: Optional[TableMetadata]) -> None: From c799fc907735a9bbb77fb9280075381ae3a7b518 Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Mon, 25 Aug 2025 13:25:13 -0400 Subject: [PATCH 4/8] use default formatter first --- pyiceberg/table/update/__init__.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index d3060952e3..4b8d76dc1d 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Tuple, TypeVar, Union, cast from pydantic import Field, field_validator, model_validator, model_serializer +from pydantic_core.core_schema import SerializerFunctionWrapHandler, SerializationInfo from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec @@ -727,13 +728,10 @@ class AssertRefSnapshotId(ValidatableTableRequirement): ref: str = Field(...) snapshot_id: Optional[int] = Field(default=None, alias="snapshot-id") - @model_serializer - def ser_model(self) -> dict[str, Any]: - return { - "type": self.type, - "ref": self.ref, - "snapshot-id": self.snapshot_id, - } + @model_serializer(mode="wrap") + def serialize_model(self, handler) -> dict[str, Any]: + partial_result = handler(self) + return {**partial_result, "snapshot-id": self.snapshot_id} def validate(self, base_metadata: Optional[TableMetadata]) -> None: if base_metadata is None: From caac6c5319d5a0847ff02ae2b855949764b5991e Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Mon, 25 Aug 2025 14:05:09 -0400 Subject: [PATCH 5/8] lint --- pyiceberg/table/update/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 4b8d76dc1d..3980a602ba 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -24,7 +24,6 @@ from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Tuple, TypeVar, Union, cast from pydantic import Field, field_validator, model_validator, model_serializer -from pydantic_core.core_schema import SerializerFunctionWrapHandler, SerializationInfo from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec @@ -731,6 +730,7 @@ class AssertRefSnapshotId(ValidatableTableRequirement): @model_serializer(mode="wrap") def serialize_model(self, handler) -> dict[str, Any]: partial_result = handler(self) + # Ensure "snapshot-id" is always present, even if value is None return {**partial_result, "snapshot-id": self.snapshot_id} def validate(self, base_metadata: Optional[TableMetadata]) -> None: From 84bf133d55bbe953ea5cafd9103dad032502750f Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Mon, 25 Aug 2025 15:04:49 -0400 Subject: [PATCH 6/8] more lint --- pyiceberg/table/update/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 3980a602ba..961a5af69d 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -23,7 +23,8 @@ from functools import singledispatch from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Tuple, TypeVar, Union, cast -from pydantic import Field, field_validator, model_validator, model_serializer +from pydantic import Field, field_validator, model_serializer, model_validator +from pydantic.functional_serializers import ModelWrapSerializerWithoutInfo from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec @@ -728,7 +729,7 @@ class AssertRefSnapshotId(ValidatableTableRequirement): snapshot_id: Optional[int] = Field(default=None, alias="snapshot-id") @model_serializer(mode="wrap") - def serialize_model(self, handler) -> dict[str, Any]: + def serialize_model(self, handler: ModelWrapSerializerWithoutInfo) -> dict[str, Any]: partial_result = handler(self) # Ensure "snapshot-id" is always present, even if value is None return {**partial_result, "snapshot-id": self.snapshot_id} From 8343358cbffaf789a9f6001510129ea21c265211 Mon Sep 17 00:00:00 2001 From: Artem Titoulenko Date: Mon, 25 Aug 2025 16:21:15 -0400 Subject: [PATCH 7/8] lint and type even better --- pyiceberg/table/update/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 961a5af69d..98a5368552 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -24,7 +24,9 @@ from typing import TYPE_CHECKING, Annotated, Any, Dict, Generic, List, Literal, Optional, Tuple, TypeVar, Union, cast from pydantic import Field, field_validator, model_serializer, model_validator -from pydantic.functional_serializers import ModelWrapSerializerWithoutInfo + +if TYPE_CHECKING: + from pydantic.functional_serializers import ModelWrapSerializerWithoutInfo from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec From 7cd78c1b9f29accb13db1f375be78c51292c3e79 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 25 Aug 2025 13:56:38 -0700 Subject: [PATCH 8/8] combine TYPE_CHECKING --- pyiceberg/table/update/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index 98a5368552..c30d960d38 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -25,9 +25,6 @@ from pydantic import Field, field_validator, model_serializer, model_validator -if TYPE_CHECKING: - from pydantic.functional_serializers import ModelWrapSerializerWithoutInfo - from pyiceberg.exceptions import CommitFailedException from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec from pyiceberg.schema import Schema @@ -55,6 +52,8 @@ from pyiceberg.utils.properties import property_as_int if TYPE_CHECKING: + from pydantic.functional_serializers import ModelWrapSerializerWithoutInfo + from pyiceberg.table import Transaction U = TypeVar("U")