From 083b45811cf4791373dbc5edb737664131030705 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Wed, 11 Jun 2025 20:40:57 -0400 Subject: [PATCH 1/7] feat: support optional params in rest catalog --- pyiceberg/catalog/rest/__init__.py | 33 ++++++++++++++++++++++++++++-- tests/catalog/test_rest.py | 9 +++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 5ad7faef89..a3b31ff56e 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import re from enum import Enum from typing import ( TYPE_CHECKING, @@ -82,6 +83,9 @@ ICEBERG_REST_SPEC_VERSION = "0.14.1" +CAMEL_TO_SNAKE_CASE_PATTERN = re.compile(r"(? dict[str, Any] | None: + """Get the query parameters for the endpoint.""" + if not kwargs or endpoint not in ENDPOINT_PARAMS_MAP: + return None + + snake_case_query_params = { + param: CAMEL_TO_SNAKE_CASE_PATTERN.sub("_", param).lower() for param in ENDPOINT_PARAMS_MAP[endpoint] + } + + _params = {} + for camel, snake in snake_case_query_params.items(): + if snake in kwargs: + _params[camel] = kwargs[snake] + + return _params + + class IdentifierKind(Enum): TABLE = "table" VIEW = "view" @@ -584,7 +610,9 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: return self._response_to_table(self.identifier_to_tuple(identifier), table_response) @retry(**_RETRY_ARGS) - def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: + def list_tables( + self, namespace: Union[str, Identifier], page_token: str | None = None, page_size: int | None = None + ) -> List[Identifier]: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) @@ -616,8 +644,9 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: @retry(**_RETRY_ARGS) def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = False) -> None: + params = _get_endpoint_params(Endpoints.drop_table, purge_requested=purge_requested) response = self._session.delete( - self.url(Endpoints.drop_table, prefixed=True, purge=purge_requested, **self._split_identifier_for_path(identifier)), + self.url(Endpoints.drop_table, prefixed=True, **self._split_identifier_for_path(identifier)), params=params ) try: response.raise_for_status() diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 1ad6f57d36..9cea0f221a 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -24,7 +24,7 @@ import pyiceberg from pyiceberg.catalog import PropertiesUpdateSummary, load_catalog -from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, RestCatalog +from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, Endpoints, RestCatalog, _get_endpoint_params from pyiceberg.exceptions import ( AuthorizationExpiredError, NamespaceAlreadyExistsError, @@ -1621,3 +1621,10 @@ def test_drop_view_204(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).drop_view(("some_namespace", "some_view")) + + +def test_get_endpoint_params() -> None: + params = _get_endpoint_params(Endpoints.drop_table, purge_requested=True) + assert params == { + "purgeRequested": True, + } From 2ce5e401858c4f56b6428cfa037af7a07ae835eb Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Wed, 11 Jun 2025 21:34:40 -0400 Subject: [PATCH 2/7] add test for the None case --- tests/catalog/test_rest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 9cea0f221a..b659ccb3d1 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1628,3 +1628,8 @@ def test_get_endpoint_params() -> None: assert params == { "purgeRequested": True, } + + +def test_get_endpoint_with_no_params() -> None: + params = _get_endpoint_params(Endpoints.namespace_exists, purge_requested=True) + assert params is None From c0158407be305f415f60fd4950485b9a183baf0f Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Wed, 11 Jun 2025 21:53:19 -0400 Subject: [PATCH 3/7] handle `None` being passed in for a value --- pyiceberg/catalog/rest/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index a3b31ff56e..ae24d059e7 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -125,6 +125,8 @@ def _get_endpoint_params(endpoint: str, **kwargs: Any) -> dict[str, Any] | None: _params = {} for camel, snake in snake_case_query_params.items(): if snake in kwargs: + if kwargs[snake] is None: + continue _params[camel] = kwargs[snake] return _params From f66cd3125ed21cb5b29b8060a2b637d8f97bef18 Mon Sep 17 00:00:00 2001 From: Jayce Slesar <47452474+jayceslesar@users.noreply.github.com> Date: Thu, 12 Jun 2025 10:23:49 -0400 Subject: [PATCH 4/7] Update pyiceberg/catalog/rest/__init__.py --- pyiceberg/catalog/rest/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index ae24d059e7..2570dcfe86 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -613,8 +613,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: @retry(**_RETRY_ARGS) def list_tables( - self, namespace: Union[str, Identifier], page_token: str | None = None, page_size: int | None = None - ) -> List[Identifier]: + self, namespace: Union[str, Identifier]) -> List[Identifier]: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) From eee7c0898dc92a99aeacaef8a068c63881d884d1 Mon Sep 17 00:00:00 2001 From: Jayce Slesar <47452474+jayceslesar@users.noreply.github.com> Date: Thu, 12 Jun 2025 10:24:51 -0400 Subject: [PATCH 5/7] Update pyiceberg/catalog/rest/__init__.py --- pyiceberg/catalog/rest/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 2570dcfe86..a73699bb82 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -612,8 +612,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: return self._response_to_table(self.identifier_to_tuple(identifier), table_response) @retry(**_RETRY_ARGS) - def list_tables( - self, namespace: Union[str, Identifier]) -> List[Identifier]: + def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) From f1668f72ac71f09db2d1f5c619cec25ac6375dbd Mon Sep 17 00:00:00 2001 From: Jayce Date: Fri, 13 Jun 2025 10:47:32 -0400 Subject: [PATCH 6/7] let requests handle the params --- pyiceberg/catalog/rest/__init__.py | 30 +++--------------------------- tests/catalog/test_rest.py | 14 +------------- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index a73699bb82..6d9ff84ece 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -99,7 +99,7 @@ class Endpoints: register_table = "namespaces/{namespace}/register" load_table: str = "namespaces/{namespace}/tables/{table}" update_table: str = "namespaces/{namespace}/tables/{table}" - drop_table: str = "namespaces/{namespace}/tables/{table}?purgeRequested={purge}" + drop_table: str = "namespaces/{namespace}/tables/{table}" table_exists: str = "namespaces/{namespace}/tables/{table}" get_token: str = "oauth/tokens" rename_table: str = "tables/rename" @@ -108,30 +108,6 @@ class Endpoints: view_exists: str = "namespaces/{namespace}/views/{view}" -ENDPOINT_PARAMS_MAP: dict[str, tuple[str]] = { - Endpoints.drop_table: ("purgeRequested",), -} - - -def _get_endpoint_params(endpoint: str, **kwargs: Any) -> dict[str, Any] | None: - """Get the query parameters for the endpoint.""" - if not kwargs or endpoint not in ENDPOINT_PARAMS_MAP: - return None - - snake_case_query_params = { - param: CAMEL_TO_SNAKE_CASE_PATTERN.sub("_", param).lower() for param in ENDPOINT_PARAMS_MAP[endpoint] - } - - _params = {} - for camel, snake in snake_case_query_params.items(): - if snake in kwargs: - if kwargs[snake] is None: - continue - _params[camel] = kwargs[snake] - - return _params - - class IdentifierKind(Enum): TABLE = "table" VIEW = "view" @@ -644,9 +620,9 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: @retry(**_RETRY_ARGS) def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = False) -> None: - params = _get_endpoint_params(Endpoints.drop_table, purge_requested=purge_requested) response = self._session.delete( - self.url(Endpoints.drop_table, prefixed=True, **self._split_identifier_for_path(identifier)), params=params + self.url(Endpoints.drop_table, prefixed=True, **self._split_identifier_for_path(identifier)), + params={"purgeRequested": purge_requested}, ) try: response.raise_for_status() diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index b659ccb3d1..1ad6f57d36 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -24,7 +24,7 @@ import pyiceberg from pyiceberg.catalog import PropertiesUpdateSummary, load_catalog -from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, Endpoints, RestCatalog, _get_endpoint_params +from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, RestCatalog from pyiceberg.exceptions import ( AuthorizationExpiredError, NamespaceAlreadyExistsError, @@ -1621,15 +1621,3 @@ def test_drop_view_204(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).drop_view(("some_namespace", "some_view")) - - -def test_get_endpoint_params() -> None: - params = _get_endpoint_params(Endpoints.drop_table, purge_requested=True) - assert params == { - "purgeRequested": True, - } - - -def test_get_endpoint_with_no_params() -> None: - params = _get_endpoint_params(Endpoints.namespace_exists, purge_requested=True) - assert params is None From 9861dfb266d7839452afe21ce64374ee30ecfc5d Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Fri, 13 Jun 2025 18:50:33 -0400 Subject: [PATCH 7/7] remove constant and import --- pyiceberg/catalog/rest/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 6d9ff84ece..3f59a196ea 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import re from enum import Enum from typing import ( TYPE_CHECKING, @@ -83,9 +82,6 @@ ICEBERG_REST_SPEC_VERSION = "0.14.1" -CAMEL_TO_SNAKE_CASE_PATTERN = re.compile(r"(?