Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions pyiceberg/table/update/__init__.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a test, something like:

def test_set_statistics_update_without_snapshot_id(table_v2_with_statistics: Table) -> None:
    """Test that SetStatisticsUpdate auto-populates snapshot_id from statistics."""
    snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id

    blob_metadata = BlobMetadata(
        type="apache-datasketches-theta-v1",
        snapshot_id=snapshot_id,
        sequence_number=2,
        fields=[1],
        properties={"prop-key": "prop-value"},
    )

    statistics_file = StatisticsFile(
        snapshot_id=snapshot_id,
        statistics_path="s3://bucket/warehouse/stats.puffin",
        file_size_in_bytes=124,
        file_footer_size_in_bytes=27,
        blob_metadata=[blob_metadata],
    )

    # Create update without explicitly providing snapshot_id
    update = SetStatisticsUpdate(statistics=statistics_file)

    # Validator should auto-populate snapshot_id from statistics
    assert update.snapshot_id == snapshot_id

    new_metadata = update_table_metadata(
        table_v2_with_statistics.metadata,
        (update,),
    )

    assert len(new_metadata.statistics) == 2
    updated_statistics = [stat for stat in new_metadata.statistics if stat.snapshot_id == snapshot_id]
    assert len(updated_statistics) == 1

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from abc import ABC, abstractmethod
from datetime import datetime
from functools import singledispatch
from typing import TYPE_CHECKING, Annotated, Any, Generic, Literal, TypeVar, cast
from typing import TYPE_CHECKING, Annotated, Any, Generic, Literal, TypeVar, cast, Self

from pydantic import Field, field_validator, model_serializer, model_validator

Expand Down Expand Up @@ -179,13 +179,9 @@ class SetStatisticsUpdate(IcebergBaseModel):
description="snapshot-id is **DEPRECATED for REMOVAL** since it contains redundant information. Use `statistics.snapshot-id` field instead.",
)

@model_validator(mode="before")
def validate_snapshot_id(cls, data: dict[str, Any]) -> dict[str, Any]:
stats = cast(StatisticsFile, data["statistics"])

data["snapshot_id"] = stats.snapshot_id

return data
@model_validator(mode="after")
def validate_snapshot_id(self) -> Self:
return self.model_copy(update={"snapshot_id": self.statistics.snapshot_id})
Comment on lines +182 to +184
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@model_validator(mode="after")
def validate_snapshot_id(self) -> Self:
return self.model_copy(update={"snapshot_id": self.statistics.snapshot_id})
@model_validator(mode="after")
def validate_snapshot_id(self) -> Self:
self.snapshot_id = self.statistics.snapshot_id
return self

nit: use direct assignment



class RemoveStatisticsUpdate(IcebergBaseModel):
Expand Down
Loading