diff --git a/pyiceberg/types.py b/pyiceberg/types.py index 7e3862b7d3..e92056e082 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -160,7 +160,7 @@ def is_struct(self) -> bool: return isinstance(self, StructType) -class PrimitiveType(IcebergRootModel[str], IcebergType, Singleton): +class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType): """Base class for all Iceberg Primitive Types.""" root: Any = Field() diff --git a/pyiceberg/utils/singleton.py b/pyiceberg/utils/singleton.py index 90e909ecbc..8a4bbf91ce 100644 --- a/pyiceberg/utils/singleton.py +++ b/pyiceberg/utils/singleton.py @@ -47,3 +47,16 @@ def __new__(cls, *args, **kwargs): # type: ignore if key not in cls._instances: cls._instances[key] = super().__new__(cls) return cls._instances[key] + + def __deepcopy__(self, memo: Dict[int, Any]) -> Any: + """ + Prevent deep copy operations for singletons. + + The IcebergRootModel inherits from Pydantic RootModel, + which has its own implementation of deepcopy. When deepcopy + runs, it calls the RootModel __deepcopy__ method and ignores + that it's a Singleton. To handle this, the order of inheritance + is adjusted and a __deepcopy__ method is implemented for + singletons that simply returns itself. + """ + return self diff --git a/tests/conftest.py b/tests/conftest.py index 2092d93d0e..d200f3ab3c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -865,12 +865,70 @@ def generate_snapshot( "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}}, } +TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES = { + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 7, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [1], + "fields": [ + {"id": 1, "name": "x", "required": True, "type": "long"}, + {"id": 4, "name": "a", "required": True, "type": "decimal(16, 2)"}, + {"id": 5, "name": "b", "required": True, "type": "decimal(16, 8)"}, + {"id": 6, "name": "c", "required": True, "type": "fixed[16]"}, + {"id": 7, "name": "d", "required": True, "type": "fixed[18]"}, + ], + } + ], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}], + "last-partition-id": 1000, + "properties": {"read.split.target.size": "134217728"}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770, + "sequence-number": 0, + "summary": {"operation": "append"}, + "manifest-list": "s3://a/b/1.avro", + }, + { + "snapshot-id": 3055729675574597004, + "parent-snapshot-id": 3051729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": {"operation": "append"}, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 1, + }, + ], + "snapshot-log": [ + {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770}, + {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770}, + ], + "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}], + "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}}, +} + @pytest.fixture def example_table_metadata_v2() -> Dict[str, Any]: return EXAMPLE_TABLE_METADATA_V2 +@pytest.fixture +def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]: + return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES + + @pytest.fixture(scope="session") def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str: from pyiceberg.io.pyarrow import PyArrowFileIO @@ -2064,6 +2122,22 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table: ) +@pytest.fixture +def table_v2_with_fixed_and_decimal_types( + table_metadata_v2_with_fixed_and_decimal_types: Dict[str, Any], +) -> Table: + table_metadata = TableMetadataV2( + **table_metadata_v2_with_fixed_and_decimal_types, + ) + return Table( + identifier=("database", "table"), + metadata=table_metadata, + metadata_location=f"{table_metadata.location}/uuid.metadata.json", + io=load_file_io(), + catalog=NoopCatalog("NoopCatalog"), + ) + + @pytest.fixture def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table: table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 6f8260fa56..d7c4ffeeaf 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -93,7 +93,9 @@ BinaryType, BooleanType, DateType, + DecimalType, DoubleType, + FixedType, FloatType, IntegerType, ListType, @@ -845,6 +847,32 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None: assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"} +def test_update_metadata_schema_immutability( + table_v2_with_fixed_and_decimal_types: TableMetadataV2, +) -> None: + update = SetSnapshotRefUpdate( + ref_name="main", + type="branch", + snapshot_id=3051729675574597004, + max_ref_age_ms=123123123, + max_snapshot_age_ms=12312312312, + min_snapshots_to_keep=1, + ) + + new_metadata = update_table_metadata( + table_v2_with_fixed_and_decimal_types.metadata, + (update,), + ) + + assert new_metadata.schemas[0].fields == ( + NestedField(field_id=1, name="x", field_type=LongType(), required=True), + NestedField(field_id=4, name="a", field_type=DecimalType(precision=16, scale=2), required=True), + NestedField(field_id=5, name="b", field_type=DecimalType(precision=16, scale=8), required=True), + NestedField(field_id=6, name="c", field_type=FixedType(length=16), required=True), + NestedField(field_id=7, name="d", field_type=FixedType(length=18), required=True), + ) + + def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None: base_metadata = table_v1.metadata base_metadata_backup = base_metadata.model_copy(deep=True) diff --git a/tests/test_types.py b/tests/test_types.py index f56f5a0f38..0ffb1d0730 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -619,3 +619,14 @@ def test_types_singleton() -> None: assert id(BooleanType()) == id(BooleanType()) assert id(FixedType(22)) == id(FixedType(22)) assert id(FixedType(19)) != id(FixedType(25)) + + +def test_deepcopy_of_singleton_fixed_type() -> None: + """FixedType is a singleton, so deepcopy should return the same instance""" + from copy import deepcopy + + list_of_fixed_types = [FixedType(22), FixedType(19)] + copied_list = deepcopy(list_of_fixed_types) + + for lhs, rhs in zip(list_of_fixed_types, copied_list): + assert id(lhs) == id(rhs)