Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ catalog:
| rest.signing-region | us-east-1 | The region to use when SigV4 signing a request |
| rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request |
| oauth2-server-uri | <https://auth-service/cc> | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') |
| snapshot-loading-mode | refs | The snapshots to return in the body of the metadata. Setting the value to `all` would return the full set of snapshots currently valid for the table. Setting the value to `refs` would load all snapshots referenced by branches or tags. |

<!-- markdown-link-check-enable-->

Expand Down
12 changes: 11 additions & 1 deletion pyiceberg/catalog/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class IdentifierKind(Enum):
SIGV4_REGION = "rest.signing-region"
SIGV4_SERVICE = "rest.signing-name"
OAUTH2_SERVER_URI = "oauth2-server-uri"
SNAPSHOT_LOADING_MODE = "snapshot-loading-mode"

NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)

Expand Down Expand Up @@ -678,7 +679,16 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:

@retry(**_RETRY_ARGS)
def load_table(self, identifier: Union[str, Identifier]) -> Table:
response = self._session.get(self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)))
params = {}
if mode := self.properties.get(SNAPSHOT_LOADING_MODE):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to add this as a parameter to load_table that takes preference over session-level properties? I guess it's not part of the base catalog interface but 🤷🏻‍♂️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good seeing you here @corleyma, and I share your concern here. In some situations, we want to override this as well:

  • To enable time-travel.
  • In the case of a write, we want to know about all the snapshots to see if there are any conflicts.

I'm open to adding an option to the base interface, however, it does not make a lot of sense in the case of non-REST catalogs where we have to read the JSON anyway.

Copy link

@corleyma corleyma May 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for coming back to this late.

Maybe the base load_table interface should just change to accept kwargs, and we can add here some implementation-specific kwargs for REST catalog?

if mode in {"all", "refs"}:
params["snapshots"] = mode
else:
raise ValueError("Invalid snapshot-loading-mode: {}")
Comment on lines +683 to +687
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


response = self._session.get(
self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)), params=params
)
try:
response.raise_for_status()
except HTTPError as exc:
Expand Down
25 changes: 24 additions & 1 deletion tests/catalog/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import pyiceberg
from pyiceberg.catalog import PropertiesUpdateSummary, load_catalog
from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, RestCatalog
from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, RestCatalog
from pyiceberg.exceptions import (
AuthorizationExpiredError,
NamespaceAlreadyExistsError,
Expand Down Expand Up @@ -853,6 +853,29 @@ def test_load_table_200(rest_mock: Mocker, example_table_metadata_with_snapshot_
assert actual == expected


def test_load_table_200_loading_mode(
rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any]
) -> None:
rest_mock.get(
f"{TEST_URI}v1/namespaces/fokko/tables/table?snapshots=refs",
json=example_table_metadata_with_snapshot_v1_rest_json,
status_code=200,
request_headers=TEST_HEADERS,
)
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{SNAPSHOT_LOADING_MODE: "refs"})
actual = catalog.load_table(("fokko", "table"))
expected = Table(
identifier=("fokko", "table"),
metadata_location=example_table_metadata_with_snapshot_v1_rest_json["metadata-location"],
metadata=TableMetadataV1(**example_table_metadata_with_snapshot_v1_rest_json["metadata"]),
io=load_file_io(),
catalog=catalog,
)
# First compare the dicts
assert actual.metadata.model_dump() == expected.metadata.model_dump()
assert actual == expected


def test_load_table_honor_access_delegation(
rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any]
) -> None:
Expand Down