Skip to content

Commit 870c76e

Browse files
committed
encryption key
1 parent 4cac691 commit 870c76e

File tree

6 files changed

+122
-1
lines changed

6 files changed

+122
-1
lines changed

pyiceberg/table/encryption.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from typing import Optional
19+
from pydantic import Field
20+
from pyiceberg.typedef import IcebergBaseModel
21+
22+
23+
class EncryptedKey(IcebergBaseModel):
24+
key_id: str = Field(alias="key-id", description="ID of the encryption key")
25+
encrypted_key_metadata: bytes = Field(alias="encrypted-key-metadata", description="Encrypted key and metadata, base64 encoded")
26+
encrypted_by_id: Optional[str] = Field(alias="encrypted-by-id", description="Optional ID of the key used to encrypt or wrap `key-metadata`", default=None)
27+
properties: Optional[dict[str, str]] = Field(alias="properties", description="A string to string map of additional metadata used by the table's encryption scheme", default=None)

pyiceberg/table/metadata.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pyiceberg.exceptions import ValidationError
2828
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec, assign_fresh_partition_spec_ids
2929
from pyiceberg.schema import Schema, assign_fresh_schema_ids
30+
from pyiceberg.table.encryption import EncryptedKey
3031
from pyiceberg.table.name_mapping import NameMapping, parse_mapping_from_json
3132
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
3233
from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot, SnapshotLogEntry
@@ -524,6 +525,7 @@ class TableMetadataV3(TableMetadataCommonFields, IcebergBaseModel):
524525
- Multi-argument transforms for partitioning and sorting
525526
- Row Lineage tracking
526527
- Binary deletion vectors
528+
- Encryption Keys
527529
528530
For more information:
529531
https://iceberg.apache.org/spec/?column-projection#version-3-extended-types-and-capabilities
@@ -560,6 +562,9 @@ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
560562
next_row_id: Optional[int] = Field(alias="next-row-id", default=None)
561563
"""A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""
562564

565+
encryption_keys: List[EncryptedKey] = Field(alias="encryption-keys", default=[])
566+
"""The list of encryption keys for this table."""
567+
563568
def model_dump_json(
564569
self, exclude_none: bool = True, exclude: Optional[Any] = None, by_alias: bool = True, **kwargs: Any
565570
) -> str:

pyiceberg/table/snapshots.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ class Snapshot(IcebergBaseModel):
244244
manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file")
245245
summary: Optional[Summary] = Field(default=None)
246246
schema_id: Optional[int] = Field(alias="schema-id", default=None)
247+
key_id: Optional[str] = Field(alias="key-id", default=None, description="The id of the encryption key")
247248

248249
def __str__(self) -> str:
249250
"""Return the string representation of the Snapshot class."""

pyiceberg/table/update/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pyiceberg.exceptions import CommitFailedException
2929
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec
3030
from pyiceberg.schema import Schema
31+
from pyiceberg.table.encryption import EncryptedKey
3132
from pyiceberg.table.metadata import SUPPORTED_TABLE_FORMAT_VERSION, TableMetadata, TableMetadataUtil
3233
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
3334
from pyiceberg.table.snapshots import (
@@ -88,6 +89,13 @@ class UpgradeFormatVersionUpdate(IcebergBaseModel):
8889
action: Literal["upgrade-format-version"] = Field(default="upgrade-format-version")
8990
format_version: int = Field(alias="format-version")
9091

92+
class AddEncryptedKeyUpdate(IcebergBaseModel):
93+
action: Literal["add-encryption-key"] = Field(default="add-encryption-key")
94+
key: EncryptedKey = Field(alias="key")
95+
96+
class RemoveEncryptedKeyUpdate(IcebergBaseModel):
97+
action: Literal["remove-encryption-key"] = Field(default="remove-encryption-key")
98+
key_id: str = Field(alias="key-id")
9199

92100
class AddSchemaUpdate(IcebergBaseModel):
93101
action: Literal["add-schema"] = Field(default="add-schema")
@@ -233,6 +241,8 @@ class RemovePartitionStatisticsUpdate(IcebergBaseModel):
233241
RemoveStatisticsUpdate,
234242
SetPartitionStatisticsUpdate,
235243
RemovePartitionStatisticsUpdate,
244+
AddEncryptedKeyUpdate,
245+
RemoveEncryptedKeyUpdate,
236246
],
237247
Field(discriminator="action"),
238248
]
@@ -597,6 +607,14 @@ def _(update: RemoveStatisticsUpdate, base_metadata: TableMetadata, context: _Ta
597607

598608
return base_metadata.model_copy(update={"statistics": statistics})
599609

610+
@_apply_table_update.register(AddEncryptedKeyUpdate)
611+
def _(update: AddEncryptedKeyUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
612+
context.add_update(update)
613+
614+
if base_metadata.format_version <= 2:
615+
raise ValueError("Cannot add encryption keys to Iceberg v1 or v2 tables")
616+
617+
return base_metadata.model_copy(update={"encryption_keys": base_metadata.encryption_keys + [update.key]})
600618

601619
@_apply_table_update.register(SetPartitionStatisticsUpdate)
602620
def _(update: SetPartitionStatisticsUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:

tests/conftest.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
from pyiceberg.schema import Accessor, Schema
7171
from pyiceberg.serializers import ToOutputFile
7272
from pyiceberg.table import FileScanTask, Table
73-
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2
73+
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2, TableMetadataV3
7474
from pyiceberg.types import (
7575
BinaryType,
7676
BooleanType,
@@ -2380,6 +2380,17 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
23802380
catalog=NoopCatalog("NoopCatalog"),
23812381
)
23822382

2383+
@pytest.fixture
2384+
def table_v3(example_table_metadata_v3: Dict[str, Any]) -> Table:
2385+
table_metadata = TableMetadataV3(**example_table_metadata_v3)
2386+
return Table(
2387+
identifier=("database", "table"),
2388+
metadata=table_metadata,
2389+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2390+
io=load_file_io(),
2391+
catalog=NoopCatalog("NoopCatalog"),
2392+
)
2393+
23832394

23842395
@pytest.fixture
23852396
def table_v2_with_fixed_and_decimal_types(

tests/table/test_init.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
# pylint:disable=redefined-outer-name
18+
import base64
1819
import json
1920
import uuid
2021
from copy import copy
@@ -49,6 +50,7 @@
4950
TableIdentifier,
5051
_match_deletes_to_data_file,
5152
)
53+
from pyiceberg.table.encryption import EncryptedKey
5254
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataUtil, TableMetadataV2, _generate_snapshot_id
5355
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
5456
from pyiceberg.table.snapshots import (
@@ -66,6 +68,7 @@
6668
)
6769
from pyiceberg.table.statistics import BlobMetadata, PartitionStatisticsFile, StatisticsFile
6870
from pyiceberg.table.update import (
71+
AddEncryptedKeyUpdate,
6972
AddSnapshotUpdate,
7073
AddSortOrderUpdate,
7174
AssertCreate,
@@ -1379,6 +1382,13 @@ def test_set_partition_statistics_update(table_v2_with_statistics: Table) -> Non
13791382
new_metadata = update_table_metadata(
13801383
table_v2_with_statistics.metadata,
13811384
(update,),
1385+
1386+
def test_add_encryption_key(table_v3: Table) -> None:
1387+
update = AddEncryptedKeyUpdate(
1388+
key=EncryptedKey(
1389+
key_id="test",
1390+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1391+
)
13821392
)
13831393

13841394
expected = """
@@ -1437,3 +1447,52 @@ def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_wi
14371447
table_v2_with_statistics.metadata,
14381448
(RemovePartitionStatisticsUpdate(snapshot_id=123456789),),
14391449
)
1450+
"key-id": "test",
1451+
"encrypted-key-metadata": "aGVsbG8="
1452+
}"""
1453+
1454+
assert table_v3.metadata.encryption_keys == []
1455+
add_metadata = update_table_metadata(table_v3.metadata, (update,))
1456+
assert len(add_metadata.encryption_keys) == 1
1457+
1458+
assert json.loads(add_metadata.encryption_keys[0].model_dump_json()) == json.loads(expected)
1459+
1460+
def test_remove_encryption_key(table_v3: Table) -> None:
1461+
update_add = AddEncryptedKeyUpdate(
1462+
key=EncryptedKey(
1463+
key_id="test",
1464+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1465+
)
1466+
)
1467+
add_metadata = update_table_metadata(table_v3.metadata, (update_add,))
1468+
assert len(add_metadata.encryption_keys) == 1
1469+
1470+
update_remove = RemoveEncryptedKeyUpdate(key_id="test")
1471+
remove_metadata = update_table_metadata(add_metadata, (update_remove,))
1472+
assert len(remove_metadata.encryption_keys) == 0
1473+
1474+
1475+
def test_remove_non_existent_encryption_key(table_v3: Table) -> None:
1476+
update_add = AddEncryptedKeyUpdate(
1477+
key=EncryptedKey(
1478+
key_id="test",
1479+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1480+
)
1481+
)
1482+
add_metadata = update_table_metadata(table_v3.metadata, (update_add,))
1483+
assert len(add_metadata.encryption_keys) == 1
1484+
1485+
update_remove = RemoveEncryptedKeyUpdate(key_id="non_existent_key")
1486+
remove_metadata = update_table_metadata(add_metadata, (update_remove,))
1487+
assert len(remove_metadata.encryption_keys) == 1 # Should be a no-op
1488+
1489+
1490+
def test_add_remove_encryption_key_v2_table(table_v2: Table) -> None:
1491+
update_add = AddEncryptedKeyUpdate(
1492+
key=EncryptedKey(
1493+
key_id="test_v2",
1494+
encrypted_key_metadata=base64.b64encode("hello_v2".encode('utf-8'))
1495+
)
1496+
)
1497+
with pytest.raises(ValueError, match=r"Cannot add encryption keys from Iceberg v1 or v2 table"):
1498+
update_table_metadata(table_v2.metadata, (update_add,))

0 commit comments

Comments
 (0)