|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | +# pylint:disable=redefined-outer-name,eval-used |
| 18 | +from typing import cast |
| 19 | +from unittest.mock import patch |
| 20 | + |
| 21 | +import pytest |
| 22 | + |
| 23 | +from pyiceberg.exceptions import ValidationException |
| 24 | +from pyiceberg.io import FileIO |
| 25 | +from pyiceberg.manifest import ManifestContent, ManifestFile |
| 26 | +from pyiceberg.table import Table |
| 27 | +from pyiceberg.table.snapshots import Operation, Snapshot |
| 28 | +from pyiceberg.table.update.validate import validation_history |
| 29 | + |
| 30 | + |
| 31 | +@pytest.fixture |
| 32 | +def table_v2_with_extensive_snapshots_and_manifests( |
| 33 | + table_v2_with_extensive_snapshots: Table, |
| 34 | +) -> tuple[Table, dict[int, list[ManifestFile]]]: |
| 35 | + """Fixture to create a table with extensive snapshots and manifests.""" |
| 36 | + mock_manifests = {} |
| 37 | + |
| 38 | + for i, snapshot in enumerate(table_v2_with_extensive_snapshots.snapshots()): |
| 39 | + mock_manifest = ManifestFile.from_args( |
| 40 | + manifest_path=f"foo/bar/{i}", |
| 41 | + manifest_length=1, |
| 42 | + partition_spec_id=1, |
| 43 | + content=ManifestContent.DATA if i % 2 == 0 else ManifestContent.DELETES, |
| 44 | + sequence_number=1, |
| 45 | + min_sequence_number=1, |
| 46 | + added_snapshot_id=snapshot.snapshot_id, |
| 47 | + ) |
| 48 | + |
| 49 | + # Store the manifest for this specific snapshot |
| 50 | + mock_manifests[snapshot.snapshot_id] = [mock_manifest] |
| 51 | + |
| 52 | + return table_v2_with_extensive_snapshots, mock_manifests |
| 53 | + |
| 54 | + |
| 55 | +def test_validation_history(table_v2_with_extensive_snapshots_and_manifests: tuple[Table, dict[int, list[ManifestFile]]]) -> None: |
| 56 | + """Test the validation history function.""" |
| 57 | + table, mock_manifests = table_v2_with_extensive_snapshots_and_manifests |
| 58 | + |
| 59 | + expected_manifest_data_counts = len([m for m in mock_manifests.values() if m[0].content == ManifestContent.DATA]) |
| 60 | + |
| 61 | + oldest_snapshot = table.snapshots()[0] |
| 62 | + newest_snapshot = cast(Snapshot, table.current_snapshot()) |
| 63 | + |
| 64 | + def mock_read_manifest_side_effect(self: Snapshot, io: FileIO) -> list[ManifestFile]: |
| 65 | + """Mock the manifests method to use the snapshot_id for lookup.""" |
| 66 | + snapshot_id = self.snapshot_id |
| 67 | + if snapshot_id in mock_manifests: |
| 68 | + return mock_manifests[snapshot_id] |
| 69 | + return [] |
| 70 | + |
| 71 | + with patch("pyiceberg.table.snapshots.Snapshot.manifests", new=mock_read_manifest_side_effect): |
| 72 | + manifests, snapshots = validation_history( |
| 73 | + table, |
| 74 | + newest_snapshot, |
| 75 | + oldest_snapshot, |
| 76 | + {Operation.APPEND}, |
| 77 | + ManifestContent.DATA, |
| 78 | + ) |
| 79 | + |
| 80 | + assert len(manifests) == expected_manifest_data_counts |
| 81 | + |
| 82 | + |
| 83 | +def test_validation_history_fails_on_snapshot_with_no_summary( |
| 84 | + table_v2_with_extensive_snapshots_and_manifests: tuple[Table, dict[int, list[ManifestFile]]], |
| 85 | +) -> None: |
| 86 | + """Test the validation history function fails on snapshot with no summary.""" |
| 87 | + table, _ = table_v2_with_extensive_snapshots_and_manifests |
| 88 | + oldest_snapshot = table.snapshots()[0] |
| 89 | + newest_snapshot = cast(Snapshot, table.current_snapshot()) |
| 90 | + |
| 91 | + # Create a snapshot with no summary |
| 92 | + snapshot_with_no_summary = Snapshot( |
| 93 | + snapshot_id="1234", |
| 94 | + parent_id="5678", |
| 95 | + timestamp_ms=0, |
| 96 | + operation=Operation.APPEND, |
| 97 | + summary=None, |
| 98 | + manifest_list="foo/bar", |
| 99 | + ) |
| 100 | + with patch("pyiceberg.table.update.validate.ancestors_between", return_value=[snapshot_with_no_summary]): |
| 101 | + with pytest.raises(ValidationException): |
| 102 | + validation_history( |
| 103 | + table, |
| 104 | + newest_snapshot, |
| 105 | + oldest_snapshot, |
| 106 | + {Operation.APPEND}, |
| 107 | + ManifestContent.DATA, |
| 108 | + ) |
| 109 | + |
| 110 | + |
| 111 | +def test_validation_history_fails_on_from_snapshot_not_matching_last_snapshot( |
| 112 | + table_v2_with_extensive_snapshots_and_manifests: tuple[Table, dict[int, list[ManifestFile]]], |
| 113 | +) -> None: |
| 114 | + """Test the validation history function fails when from_snapshot doesn't match last_snapshot.""" |
| 115 | + table, mock_manifests = table_v2_with_extensive_snapshots_and_manifests |
| 116 | + |
| 117 | + oldest_snapshot = table.snapshots()[0] |
| 118 | + newest_snapshot = cast(Snapshot, table.current_snapshot()) |
| 119 | + |
| 120 | + def mock_read_manifest_side_effect(self: Snapshot, io: FileIO) -> list[ManifestFile]: |
| 121 | + """Mock the manifests method to use the snapshot_id for lookup.""" |
| 122 | + snapshot_id = self.snapshot_id |
| 123 | + if snapshot_id in mock_manifests: |
| 124 | + return mock_manifests[snapshot_id] |
| 125 | + return [] |
| 126 | + |
| 127 | + missing_oldest_snapshot = table.snapshots()[1:] |
| 128 | + |
| 129 | + with patch("pyiceberg.table.snapshots.Snapshot.manifests", new=mock_read_manifest_side_effect): |
| 130 | + with patch("pyiceberg.table.update.validate.ancestors_between", return_value=missing_oldest_snapshot): |
| 131 | + with pytest.raises(ValidationException): |
| 132 | + validation_history( |
| 133 | + table, |
| 134 | + newest_snapshot, |
| 135 | + oldest_snapshot, |
| 136 | + {Operation.APPEND}, |
| 137 | + ManifestContent.DATA, |
| 138 | + ) |
0 commit comments