Skip to content

Commit 790ebb0

Browse files
committed
encryption key
1 parent e5e7453 commit 790ebb0

File tree

6 files changed

+122
-1
lines changed

6 files changed

+122
-1
lines changed

pyiceberg/table/encryption.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from typing import Optional
19+
from pydantic import Field
20+
from pyiceberg.typedef import IcebergBaseModel
21+
22+
23+
class EncryptedKey(IcebergBaseModel):
24+
key_id: str = Field(alias="key-id", description="ID of the encryption key")
25+
encrypted_key_metadata: bytes = Field(alias="encrypted-key-metadata", description="Encrypted key and metadata, base64 encoded")
26+
encrypted_by_id: Optional[str] = Field(alias="encrypted-by-id", description="Optional ID of the key used to encrypt or wrap `key-metadata`", default=None)
27+
properties: Optional[dict[str, str]] = Field(alias="properties", description="A string to string map of additional metadata used by the table's encryption scheme", default=None)

pyiceberg/table/metadata.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pyiceberg.exceptions import ValidationError
2828
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec, assign_fresh_partition_spec_ids
2929
from pyiceberg.schema import Schema, assign_fresh_schema_ids
30+
from pyiceberg.table.encryption import EncryptedKey
3031
from pyiceberg.table.name_mapping import NameMapping, parse_mapping_from_json
3132
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
3233
from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot, SnapshotLogEntry
@@ -526,6 +527,7 @@ class TableMetadataV3(TableMetadataCommonFields, IcebergBaseModel):
526527
- Multi-argument transforms for partitioning and sorting
527528
- Row Lineage tracking
528529
- Binary deletion vectors
530+
- Encryption Keys
529531
530532
For more information:
531533
https://iceberg.apache.org/spec/?column-projection#version-3-extended-types-and-capabilities
@@ -562,6 +564,9 @@ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
562564
next_row_id: Optional[int] = Field(alias="next-row-id", default=None)
563565
"""A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""
564566

567+
encryption_keys: List[EncryptedKey] = Field(alias="encryption-keys", default=[])
568+
"""The list of encryption keys for this table."""
569+
565570
def model_dump_json(
566571
self, exclude_none: bool = True, exclude: Optional[Any] = None, by_alias: bool = True, **kwargs: Any
567572
) -> str:

pyiceberg/table/snapshots.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ class Snapshot(IcebergBaseModel):
244244
manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file")
245245
summary: Optional[Summary] = Field(default=None)
246246
schema_id: Optional[int] = Field(alias="schema-id", default=None)
247+
key_id: Optional[str] = Field(alias="key-id", default=None, description="The id of the encryption key")
247248

248249
def __str__(self) -> str:
249250
"""Return the string representation of the Snapshot class."""

pyiceberg/table/update/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pyiceberg.exceptions import CommitFailedException
2929
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec
3030
from pyiceberg.schema import Schema
31+
from pyiceberg.table.encryption import EncryptedKey
3132
from pyiceberg.table.metadata import SUPPORTED_TABLE_FORMAT_VERSION, TableMetadata, TableMetadataUtil
3233
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
3334
from pyiceberg.table.snapshots import (
@@ -89,6 +90,13 @@ class UpgradeFormatVersionUpdate(IcebergBaseModel):
8990
action: Literal["upgrade-format-version"] = Field(default="upgrade-format-version")
9091
format_version: int = Field(alias="format-version")
9192

93+
class AddEncryptedKeyUpdate(IcebergBaseModel):
94+
action: Literal["add-encryption-key"] = Field(default="add-encryption-key")
95+
key: EncryptedKey = Field(alias="key")
96+
97+
class RemoveEncryptedKeyUpdate(IcebergBaseModel):
98+
action: Literal["remove-encryption-key"] = Field(default="remove-encryption-key")
99+
key_id: str = Field(alias="key-id")
92100

93101
class AddSchemaUpdate(IcebergBaseModel):
94102
action: Literal["add-schema"] = Field(default="add-schema")
@@ -230,6 +238,8 @@ class RemovePartitionStatisticsUpdate(IcebergBaseModel):
230238
RemoveSchemasUpdate,
231239
SetPartitionStatisticsUpdate,
232240
RemovePartitionStatisticsUpdate,
241+
AddEncryptedKeyUpdate,
242+
RemoveEncryptedKeyUpdate,
233243
],
234244
Field(discriminator="action"),
235245
]
@@ -594,6 +604,14 @@ def _(update: RemoveStatisticsUpdate, base_metadata: TableMetadata, context: _Ta
594604

595605
return base_metadata.model_copy(update={"statistics": statistics})
596606

607+
@_apply_table_update.register(AddEncryptedKeyUpdate)
608+
def _(update: AddEncryptedKeyUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
609+
context.add_update(update)
610+
611+
if base_metadata.format_version <= 2:
612+
raise ValueError("Cannot add encryption keys to Iceberg v1 or v2 tables")
613+
614+
return base_metadata.model_copy(update={"encryption_keys": base_metadata.encryption_keys + [update.key]})
597615

598616
@_apply_table_update.register(RemoveSchemasUpdate)
599617
def _(update: RemoveSchemasUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:

tests/conftest.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
from pyiceberg.schema import Accessor, Schema
7373
from pyiceberg.serializers import ToOutputFile
7474
from pyiceberg.table import FileScanTask, Table
75-
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2
75+
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2, TableMetadataV3
7676
from pyiceberg.transforms import DayTransform, IdentityTransform
7777
from pyiceberg.types import (
7878
BinaryType,
@@ -2467,6 +2467,17 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
24672467
catalog=NoopCatalog("NoopCatalog"),
24682468
)
24692469

2470+
@pytest.fixture
2471+
def table_v3(example_table_metadata_v3: Dict[str, Any]) -> Table:
2472+
table_metadata = TableMetadataV3(**example_table_metadata_v3)
2473+
return Table(
2474+
identifier=("database", "table"),
2475+
metadata=table_metadata,
2476+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2477+
io=load_file_io(),
2478+
catalog=NoopCatalog("NoopCatalog"),
2479+
)
2480+
24702481

24712482
@pytest.fixture
24722483
def table_v2_orc(example_table_metadata_v2: Dict[str, Any]) -> Table:

tests/table/test_init.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
# pylint:disable=redefined-outer-name
18+
import base64
1819
import json
1920
import uuid
2021
from copy import copy
@@ -49,6 +50,7 @@
4950
TableIdentifier,
5051
_match_deletes_to_data_file,
5152
)
53+
from pyiceberg.table.encryption import EncryptedKey
5254
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataUtil, TableMetadataV2, _generate_snapshot_id
5355
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
5456
from pyiceberg.table.snapshots import (
@@ -66,6 +68,7 @@
6668
)
6769
from pyiceberg.table.statistics import BlobMetadata, PartitionStatisticsFile, StatisticsFile
6870
from pyiceberg.table.update import (
71+
AddEncryptedKeyUpdate,
6972
AddSnapshotUpdate,
7073
AddSortOrderUpdate,
7174
AssertCreate,
@@ -1419,6 +1422,13 @@ def test_set_partition_statistics_update(table_v2_with_statistics: Table) -> Non
14191422
new_metadata = update_table_metadata(
14201423
table_v2_with_statistics.metadata,
14211424
(update,),
1425+
1426+
def test_add_encryption_key(table_v3: Table) -> None:
1427+
update = AddEncryptedKeyUpdate(
1428+
key=EncryptedKey(
1429+
key_id="test",
1430+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1431+
)
14221432
)
14231433

14241434
expected = """
@@ -1477,3 +1487,52 @@ def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_wi
14771487
table_v2_with_statistics.metadata,
14781488
(RemovePartitionStatisticsUpdate(snapshot_id=123456789),),
14791489
)
1490+
"key-id": "test",
1491+
"encrypted-key-metadata": "aGVsbG8="
1492+
}"""
1493+
1494+
assert table_v3.metadata.encryption_keys == []
1495+
add_metadata = update_table_metadata(table_v3.metadata, (update,))
1496+
assert len(add_metadata.encryption_keys) == 1
1497+
1498+
assert json.loads(add_metadata.encryption_keys[0].model_dump_json()) == json.loads(expected)
1499+
1500+
def test_remove_encryption_key(table_v3: Table) -> None:
1501+
update_add = AddEncryptedKeyUpdate(
1502+
key=EncryptedKey(
1503+
key_id="test",
1504+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1505+
)
1506+
)
1507+
add_metadata = update_table_metadata(table_v3.metadata, (update_add,))
1508+
assert len(add_metadata.encryption_keys) == 1
1509+
1510+
update_remove = RemoveEncryptedKeyUpdate(key_id="test")
1511+
remove_metadata = update_table_metadata(add_metadata, (update_remove,))
1512+
assert len(remove_metadata.encryption_keys) == 0
1513+
1514+
1515+
def test_remove_non_existent_encryption_key(table_v3: Table) -> None:
1516+
update_add = AddEncryptedKeyUpdate(
1517+
key=EncryptedKey(
1518+
key_id="test",
1519+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1520+
)
1521+
)
1522+
add_metadata = update_table_metadata(table_v3.metadata, (update_add,))
1523+
assert len(add_metadata.encryption_keys) == 1
1524+
1525+
update_remove = RemoveEncryptedKeyUpdate(key_id="non_existent_key")
1526+
remove_metadata = update_table_metadata(add_metadata, (update_remove,))
1527+
assert len(remove_metadata.encryption_keys) == 1 # Should be a no-op
1528+
1529+
1530+
def test_add_remove_encryption_key_v2_table(table_v2: Table) -> None:
1531+
update_add = AddEncryptedKeyUpdate(
1532+
key=EncryptedKey(
1533+
key_id="test_v2",
1534+
encrypted_key_metadata=base64.b64encode("hello_v2".encode('utf-8'))
1535+
)
1536+
)
1537+
with pytest.raises(ValueError, match=r"Cannot add encryption keys from Iceberg v1 or v2 table"):
1538+
update_table_metadata(table_v2.metadata, (update_add,))

0 commit comments

Comments
 (0)