diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 7978fdc9b4..d84c82ec2a 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -215,6 +215,17 @@ static_table = StaticTable.from_metadata( The static-table is considered read-only. +Alternatively, if your table metadata directory contains a `version-hint.text` file, you can just specify +the table root path, and the latest metadata file will be picked automatically. + +```python +from pyiceberg.table import StaticTable + +static_table = StaticTable.from_metadata( + "s3://warehouse/wh/nyc.db/taxis +) +``` + ## Check if a table exists To check whether the `bids` table exists: diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 8f7b45f532..8dd52c4be2 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations import itertools +import os import uuid import warnings from abc import ABC, abstractmethod @@ -1378,8 +1379,27 @@ def refresh(self) -> Table: """Refresh the current table metadata.""" raise NotImplementedError("To be implemented") + @classmethod + def _metadata_location_from_version_hint(cls, metadata_location: str, properties: Properties = EMPTY_DICT) -> str: + version_hint_location = os.path.join(metadata_location, "metadata", "version-hint.text") + io = load_file_io(properties=properties, location=version_hint_location) + file = io.new_input(version_hint_location) + + with file.open() as stream: + content = stream.read().decode("utf-8") + + if content.endswith(".metadata.json"): + return os.path.join(metadata_location, "metadata", content) + elif content.isnumeric(): + return os.path.join(metadata_location, "metadata", "v%s.metadata.json").format(content) + else: + return os.path.join(metadata_location, "metadata", "%s.metadata.json").format(content) + @classmethod def from_metadata(cls, metadata_location: str, properties: Properties = EMPTY_DICT) -> StaticTable: + if not metadata_location.endswith(".metadata.json"): + metadata_location = StaticTable._metadata_location_from_version_hint(metadata_location, properties) + io = load_file_io(properties=properties, location=metadata_location) file = io.new_input(metadata_location) diff --git a/tests/conftest.py b/tests/conftest.py index a290b5d834..095b139a3e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1121,6 +1121,22 @@ def example_table_metadata_v3() -> Dict[str, Any]: return EXAMPLE_TABLE_METADATA_V3 +@pytest.fixture(scope="session") +def table_location(tmp_path_factory: pytest.TempPathFactory) -> str: + from pyiceberg.io.pyarrow import PyArrowFileIO + + metadata_filename = f"{uuid.uuid4()}.metadata.json" + metadata_location = str(tmp_path_factory.getbasetemp() / "metadata" / metadata_filename) + version_hint_location = str(tmp_path_factory.getbasetemp() / "metadata" / "version-hint.text") + metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2) + ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) + + with PyArrowFileIO().new_output(location=version_hint_location).create(overwrite=True) as s: + s.write(metadata_filename.encode("utf-8")) + + return str(tmp_path_factory.getbasetemp()) + + @pytest.fixture(scope="session") def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str: from pyiceberg.io.pyarrow import PyArrowFileIO diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 69bbab527e..8de6f4123d 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -383,6 +383,12 @@ def test_static_table_gz_same_as_table(table_v2: Table, metadata_location_gz: st assert static_table.metadata == table_v2.metadata +def test_static_table_version_hint_same_as_table(table_v2: Table, table_location: str) -> None: + static_table = StaticTable.from_metadata(table_location) + assert isinstance(static_table, Table) + assert static_table.metadata == table_v2.metadata + + def test_static_table_io_does_not_exist(metadata_location: str) -> None: with pytest.raises(ValueError): StaticTable.from_metadata(metadata_location, {PY_IO_IMPL: "pyiceberg.does.not.exist.FileIO"})