From 3ee34b77d9a31682b531bd8ff09fac2f71652dc3 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Sun, 1 Jun 2025 16:26:25 +0000 Subject: [PATCH 1/9] add OAuth2AuthManager --- pyiceberg/catalog/rest/auth.py | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 89395f1158..cc692a7165 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -17,9 +17,12 @@ import base64 import importlib +import threading +import time from abc import ABC, abstractmethod from typing import Any, Dict, Optional, Type +import requests from requests import HTTPError, PreparedRequest, Session from requests.auth import AuthBase @@ -42,11 +45,15 @@ def auth_header(self) -> Optional[str]: class NoopAuthManager(AuthManager): + """Auth Manager implementation with no auth.""" + def auth_header(self) -> Optional[str]: return None class BasicAuthManager(AuthManager): + """AuthManager implementation that supports basic password auth.""" + def __init__(self, username: str, password: str): credentials = f"{username}:{password}" self._token = base64.b64encode(credentials.encode()).decode() @@ -56,6 +63,12 @@ def auth_header(self) -> str: class LegacyOAuth2AuthManager(AuthManager): + """Legacy OAuth2 AuthManager implementation. + + This class exists for backward compatibility, and will be removed in + PyIceberg 1.0.0 in favor of OAuth2AuthManager. + """ + _session: Session _auth_url: Optional[str] _token: Optional[str] @@ -109,6 +122,80 @@ def auth_header(self) -> str: return f"Bearer {self._token}" +class OAuth2TokenProvider: + """Thread-safe OAuth2 token provider with token refresh support.""" + + client_id: str + client_secret: str + token_url: str + scope: Optional[str] + refresh_margin: int + expires_in: Optional[int] + + _token: Optional[str] + _expires_at: int + _lock: threading.Lock + + def __init__( + self, + client_id: str, + client_secret: str, + token_url: str, + scope: Optional[str] = None, + refresh_margin: int = 60, + expires_in: Optional[int] = None, + ): + self.client_id = client_id + self.client_secret = client_secret + self.token_url = token_url + self.scope = scope + self.refresh_margin = refresh_margin + self.expires_in = expires_in + + self._token = None + self._expires_at = 0 + self._lock = threading.Lock() + + def _refresh_token(self) -> None: + data = { + "grant_type": "client_credentials", + "client_id": self.client_id, + "client_secret": self.client_secret, + } + if self.scope: + data["scope"] = self.scope + + response = requests.post(self.token_url, data=data) + response.raise_for_status() + result = response.json() + + self._token = result["access_token"] + expires_in = result.get("expires_in", self.expires_in) + if expires_in is None: + raise ValueError( + "The expiration time of the Token must be provided by the Server in the Access Token Response in `expired_in` field, or by the PyIceberg Client." + ) + self._expires_at = time.time() + expires_in - self.refresh_margin + + def get_token(self) -> str: + with self._lock: + if not self._token or time.time() >= self._expires_at: + self._refresh_token() + if self._token is None: + raise ValueError("Authorization token is None after refresh") + return self._token + + +class OAuth2AuthManager(AuthManager): + """Auth Manager implementation that supports OAuth2 as defined in IETF RFC6749.""" + + def __init__(self, token_provider: OAuth2TokenProvider): + self.token_provider = token_provider + + def auth_header(self) -> str: + return f"Bearer {self.token_provider.get_token()}" + + class AuthManagerAdapter(AuthBase): """A `requests.auth.AuthBase` adapter that integrates an `AuthManager` into a `requests.Session` to automatically attach the appropriate Authorization header to every request. From 5954550e307acc9c8a99a3866c843bfc5655d8de Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 10 Jul 2025 02:42:22 +0000 Subject: [PATCH 2/9] add custom auth implementation parsing --- pyiceberg/catalog/rest/__init__.py | 15 ++++++++++++- tests/catalog/test_rest.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 6215d17a4f..fd283990ee 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -134,6 +134,7 @@ class IdentifierKind(Enum): SIGV4_SERVICE = "rest.signing-name" OAUTH2_SERVER_URI = "oauth2-server-uri" SNAPSHOT_LOADING_MODE = "snapshot-loading-mode" +AUTH = "auth" NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) @@ -247,7 +248,19 @@ def _create_session(self) -> Session: elif ssl_client_cert := ssl_client.get(CERT): session.cert = ssl_client_cert - session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session)) + if auth_config := self.properties.get(AUTH): + # set up auth_manager based on the properties + auth_type = auth_config.get("type") + if auth_type is None: + raise ValueError("auth.type must be defined") + auth_type_config = auth_config.get(auth_type, {}) + if auth_impl := auth_config.get("impl"): + session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl, auth_type_config)) + else: + session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_type, auth_type_config)) + else: + session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session)) + # Set HTTP headers self._config_headers(session) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index ed91dd15a1..4c02163ed6 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1519,6 +1519,42 @@ def test_request_session_with_ssl_client_cert() -> None: assert "Could not find the TLS certificate file, invalid path: path_to_client_cert" in str(e.value) +def test_rest_catalog_with_basic_auth_type() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "basic", + "basic": { + "username": "one", + }, + }, + } + with pytest.raises(TypeError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "BasicAuthManager.__init__() missing 1 required positional argument: 'password'" in str(e.value) + + +def test_rest_catalog_with_auth_impl() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "custom", + "impl": "dummy.nonexistent.package", + "custom": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "Could not load AuthManager class for 'dummy.nonexistent.package'" in str(e.value) + + EXAMPLE_ENV = {"PYICEBERG_CATALOG__PRODUCTION__URI": TEST_URI} From 92ad05185eb2931e9772088ad16ac5f5b951feb7 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 17 Jul 2025 01:26:46 +0000 Subject: [PATCH 3/9] add tests --- pyiceberg/catalog/rest/__init__.py | 15 +++-- pyiceberg/catalog/rest/auth.py | 20 ++++++- tests/catalog/test_rest.py | 91 +++++++++++++++++++++++++++++- 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index fd283990ee..d292362216 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -135,6 +135,7 @@ class IdentifierKind(Enum): OAUTH2_SERVER_URI = "oauth2-server-uri" SNAPSHOT_LOADING_MODE = "snapshot-loading-mode" AUTH = "auth" +CUSTOM = "custom" NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) @@ -249,15 +250,19 @@ def _create_session(self) -> Session: session.cert = ssl_client_cert if auth_config := self.properties.get(AUTH): - # set up auth_manager based on the properties auth_type = auth_config.get("type") if auth_type is None: raise ValueError("auth.type must be defined") auth_type_config = auth_config.get(auth_type, {}) - if auth_impl := auth_config.get("impl"): - session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl, auth_type_config)) - else: - session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_type, auth_type_config)) + auth_impl = auth_config.get("impl") + + if auth_type is CUSTOM and not auth_impl: + raise ValueError("auth.impl must be specified when using custom auth.type") + + if auth_type is not CUSTOM and auth_impl: + raise ValueError("auth.impl can only be specified when using custom auth.type") + + session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl or auth_type, auth_type_config)) else: session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session)) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index cc692a7165..26ce35d25b 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -189,8 +189,23 @@ def get_token(self) -> str: class OAuth2AuthManager(AuthManager): """Auth Manager implementation that supports OAuth2 as defined in IETF RFC6749.""" - def __init__(self, token_provider: OAuth2TokenProvider): - self.token_provider = token_provider + def __init__( + self, + client_id: str, + client_secret: str, + token_url: str, + scope: Optional[str] = None, + refresh_margin: int = 60, + expires_in: Optional[int] = None, + ): + self.token_provider = OAuth2TokenProvider( + client_id, + client_secret, + token_url, + scope, + refresh_margin, + expires_in, + ) def auth_header(self) -> str: return f"Bearer {self.token_provider.get_token()}" @@ -274,3 +289,4 @@ def create(cls, class_or_name: str, config: Dict[str, Any]) -> AuthManager: AuthManagerFactory.register("noop", NoopAuthManager) AuthManagerFactory.register("basic", BasicAuthManager) AuthManagerFactory.register("legacyoauth2", LegacyOAuth2AuthManager) +AuthManagerFactory.register("oauth2", OAuth2AuthManager) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 4c02163ed6..2fc1654e39 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1536,7 +1536,7 @@ def test_rest_catalog_with_basic_auth_type() -> None: assert "BasicAuthManager.__init__() missing 1 required positional argument: 'password'" in str(e.value) -def test_rest_catalog_with_auth_impl() -> None: +def test_rest_catalog_with_custom_auth_type() -> None: # Given catalog_properties = { "uri": TEST_URI, @@ -1555,6 +1555,95 @@ def test_rest_catalog_with_auth_impl() -> None: assert "Could not load AuthManager class for 'dummy.nonexistent.package'" in str(e.value) +def test_rest_catalog_with_custom_auth_type_no_impl() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "custom", + "custom": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "auth.impl must be specified when using custom auth.type" in str(e.value) + + +def test_rest_catalog_with_non_custom_auth_type_impl() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "oauth2", + "impl": "oauth2.package", + "oauth2": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "auth.impl can only be specified when using custom auth.type" in str(e.value) + + +def test_rest_catalog_with_unsupported_auth_type() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "unsupported", + "unsupported": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "Could not load AuthManager class for 'unsupported'" in str(e.value) + + +def test_rest_catalog_with_oauth2_auth_type(requests_mock: Mocker) -> None: + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + "refresh_token": "IwOGYzYTlmM2YxOTQ5MGE3YmNmMDFkNTVk", + "scope": "read", + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "oauth2", + "oauth2": { + "client_id": "some_client_id", + "client_secret": "some_client_secret", + "token_url": f"{TEST_URI}oauth2/token", + "scope": "read", + }, + }, + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + EXAMPLE_ENV = {"PYICEBERG_CATALOG__PRODUCTION__URI": TEST_URI} From 38042ca2051d524e9ef80f0ea5f03357c25998a7 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 17 Jul 2025 01:27:04 +0000 Subject: [PATCH 4/9] add docs --- mkdocs/docs/configuration.md | 88 ++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index bc514e39af..56c018b821 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -374,6 +374,94 @@ Specific headers defined by the RESTCatalog spec include: | ------------------------------------ | ------------------------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `header.X-Iceberg-Access-Delegation` | `{vended-credentials,remote-signing}` | `vended-credentials` | Signal to the server that the client supports delegated access via a comma-separated list of access mechanisms. The server may choose to supply access via any or none of the requested mechanisms | +#### Authentication in RESTCatalog + +The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. + +##### Supported Authentication Types + +- `noop`: No authentication (no Authorization header sent). +- `basic`: HTTP Basic authentication. +- `oauth2`: OAuth2 client credentials flow. +- `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) +- `custom`: Custom authentication manager (requires `auth.impl`). + +##### Configuration Properties + +The `auth` block is structured as follows: + +```yaml +catalog: + default: + type: rest + uri: http://rest-catalog/ws/ + auth: + type: + : + # Type-specific configuration + impl: # Only for custom auth +``` + +**Property Reference:** + +| Property | Required | Description | +|------------------|----------|-------------------------------------------------------------------------------------------------| +| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, `oauth2`, or `custom`). | +| `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | +| `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | +| `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | +| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | + +##### Examples + +**No Authentication:** + +```yaml +auth: + type: noop +``` + +**Basic Authentication:** + +```yaml +auth: + type: basic + basic: + username: myuser + password: mypass +``` + +**OAuth2 Authentication:** + +```yaml +auth: + type: oauth2 + oauth2: + client_id: my-client-id + client_secret: my-client-secret + token_url: https://auth.example.com/oauth/token + scope: read + refresh_margin: 60 # (optional) seconds before expiry to refresh + expires_in: 3600 # (optional) fallback if server does not provide +``` + +**Custom Authentication:** + +```yaml +auth: + type: custom + impl: mypackage.module.MyAuthManager + custom: + property1: value1 + property2: value2 +``` + +##### Notes + +- If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. +- If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. +- The configuration block under each type (e.g., `basic`, `oauth2`, `custom`) is passed as keyword arguments to the corresponding AuthManager. + ### SQL Catalog The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. The init_catalog_tables is optional and defaults to True. If it is set to False, the catalog tables will not be created when the SQLCatalog is initialized. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls): From ab75641804a2f2bcd44767f88fbf3e18b51292fe Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 17 Jul 2025 01:39:46 +0000 Subject: [PATCH 5/9] fix test for python3.9 --- tests/catalog/test_rest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 2fc1654e39..04dc78d879 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1533,7 +1533,7 @@ def test_rest_catalog_with_basic_auth_type() -> None: with pytest.raises(TypeError) as e: # Missing namespace RestCatalog("rest", **catalog_properties) # type: ignore - assert "BasicAuthManager.__init__() missing 1 required positional argument: 'password'" in str(e.value) + assert "__init__() missing 1 required positional argument: 'password'" in str(e.value) def test_rest_catalog_with_custom_auth_type() -> None: From 9c59d6730bebebdb75ab2a347c19ab158fe6a712 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Wed, 23 Jul 2025 02:25:29 +0000 Subject: [PATCH 6/9] remove oauth2manager to separate PR --- mkdocs/docs/configuration.md | 20 +------- pyiceberg/catalog/rest/auth.py | 93 ---------------------------------- tests/catalog/test_rest.py | 81 ++++++++++++++--------------- 3 files changed, 39 insertions(+), 155 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 56c018b821..58345720f6 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -382,7 +382,6 @@ The RESTCatalog supports pluggable authentication via the `auth` configuration b - `noop`: No authentication (no Authorization header sent). - `basic`: HTTP Basic authentication. -- `oauth2`: OAuth2 client credentials flow. - `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) - `custom`: Custom authentication manager (requires `auth.impl`). @@ -406,10 +405,9 @@ catalog: | Property | Required | Description | |------------------|----------|-------------------------------------------------------------------------------------------------| -| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, `oauth2`, or `custom`). | +| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, or `custom`). | | `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | | `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | -| `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | | `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | ##### Examples @@ -431,20 +429,6 @@ auth: password: mypass ``` -**OAuth2 Authentication:** - -```yaml -auth: - type: oauth2 - oauth2: - client_id: my-client-id - client_secret: my-client-secret - token_url: https://auth.example.com/oauth/token - scope: read - refresh_margin: 60 # (optional) seconds before expiry to refresh - expires_in: 3600 # (optional) fallback if server does not provide -``` - **Custom Authentication:** ```yaml @@ -460,7 +444,7 @@ auth: - If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. - If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. -- The configuration block under each type (e.g., `basic`, `oauth2`, `custom`) is passed as keyword arguments to the corresponding AuthManager. +- The configuration block under each type (e.g., `basic`, `custom`) is passed as keyword arguments to the corresponding AuthManager. ### SQL Catalog diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 26ce35d25b..c0a0267f54 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -17,12 +17,9 @@ import base64 import importlib -import threading -import time from abc import ABC, abstractmethod from typing import Any, Dict, Optional, Type -import requests from requests import HTTPError, PreparedRequest, Session from requests.auth import AuthBase @@ -122,95 +119,6 @@ def auth_header(self) -> str: return f"Bearer {self._token}" -class OAuth2TokenProvider: - """Thread-safe OAuth2 token provider with token refresh support.""" - - client_id: str - client_secret: str - token_url: str - scope: Optional[str] - refresh_margin: int - expires_in: Optional[int] - - _token: Optional[str] - _expires_at: int - _lock: threading.Lock - - def __init__( - self, - client_id: str, - client_secret: str, - token_url: str, - scope: Optional[str] = None, - refresh_margin: int = 60, - expires_in: Optional[int] = None, - ): - self.client_id = client_id - self.client_secret = client_secret - self.token_url = token_url - self.scope = scope - self.refresh_margin = refresh_margin - self.expires_in = expires_in - - self._token = None - self._expires_at = 0 - self._lock = threading.Lock() - - def _refresh_token(self) -> None: - data = { - "grant_type": "client_credentials", - "client_id": self.client_id, - "client_secret": self.client_secret, - } - if self.scope: - data["scope"] = self.scope - - response = requests.post(self.token_url, data=data) - response.raise_for_status() - result = response.json() - - self._token = result["access_token"] - expires_in = result.get("expires_in", self.expires_in) - if expires_in is None: - raise ValueError( - "The expiration time of the Token must be provided by the Server in the Access Token Response in `expired_in` field, or by the PyIceberg Client." - ) - self._expires_at = time.time() + expires_in - self.refresh_margin - - def get_token(self) -> str: - with self._lock: - if not self._token or time.time() >= self._expires_at: - self._refresh_token() - if self._token is None: - raise ValueError("Authorization token is None after refresh") - return self._token - - -class OAuth2AuthManager(AuthManager): - """Auth Manager implementation that supports OAuth2 as defined in IETF RFC6749.""" - - def __init__( - self, - client_id: str, - client_secret: str, - token_url: str, - scope: Optional[str] = None, - refresh_margin: int = 60, - expires_in: Optional[int] = None, - ): - self.token_provider = OAuth2TokenProvider( - client_id, - client_secret, - token_url, - scope, - refresh_margin, - expires_in, - ) - - def auth_header(self) -> str: - return f"Bearer {self.token_provider.get_token()}" - - class AuthManagerAdapter(AuthBase): """A `requests.auth.AuthBase` adapter that integrates an `AuthManager` into a `requests.Session` to automatically attach the appropriate Authorization header to every request. @@ -289,4 +197,3 @@ def create(cls, class_or_name: str, config: Dict[str, Any]) -> AuthManager: AuthManagerFactory.register("noop", NoopAuthManager) AuthManagerFactory.register("basic", BasicAuthManager) AuthManagerFactory.register("legacyoauth2", LegacyOAuth2AuthManager) -AuthManagerFactory.register("oauth2", OAuth2AuthManager) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 04dc78d879..055c55a70d 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1519,7 +1519,13 @@ def test_request_session_with_ssl_client_cert() -> None: assert "Could not find the TLS certificate file, invalid path: path_to_client_cert" in str(e.value) -def test_rest_catalog_with_basic_auth_type() -> None: +def test_rest_catalog_with_basic_auth_type(rest_mock: Mocker) -> None: + # Given + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) # Given catalog_properties = { "uri": TEST_URI, @@ -1527,13 +1533,12 @@ def test_rest_catalog_with_basic_auth_type() -> None: "type": "basic", "basic": { "username": "one", + "password": "two", }, }, } - with pytest.raises(TypeError) as e: - # Missing namespace - RestCatalog("rest", **catalog_properties) # type: ignore - assert "__init__() missing 1 required positional argument: 'password'" in str(e.value) + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI def test_rest_catalog_with_custom_auth_type() -> None: @@ -1555,6 +1560,28 @@ def test_rest_catalog_with_custom_auth_type() -> None: assert "Could not load AuthManager class for 'dummy.nonexistent.package'" in str(e.value) +def test_rest_catalog_with_custom_basic_auth_type(rest_mock: Mocker) -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "custom", + "impl": "pyiceberg.catalog.rest.auth.BasicAuthManager", + "custom": { + "username": "one", + "password": "two", + }, + }, + } + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + def test_rest_catalog_with_custom_auth_type_no_impl() -> None: # Given catalog_properties = { @@ -1578,11 +1605,11 @@ def test_rest_catalog_with_non_custom_auth_type_impl() -> None: catalog_properties = { "uri": TEST_URI, "auth": { - "type": "oauth2", - "impl": "oauth2.package", - "oauth2": { - "property1": "one", - "property2": "two", + "type": "basic", + "impl": "basic.package", + "basic": { + "username": "one", + "password": "two", }, }, } @@ -1610,40 +1637,6 @@ def test_rest_catalog_with_unsupported_auth_type() -> None: assert "Could not load AuthManager class for 'unsupported'" in str(e.value) -def test_rest_catalog_with_oauth2_auth_type(requests_mock: Mocker) -> None: - requests_mock.post( - f"{TEST_URI}oauth2/token", - json={ - "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", - "token_type": "Bearer", - "expires_in": 3600, - "refresh_token": "IwOGYzYTlmM2YxOTQ5MGE3YmNmMDFkNTVk", - "scope": "read", - }, - status_code=200, - ) - requests_mock.get( - f"{TEST_URI}v1/config", - json={"defaults": {}, "overrides": {}}, - status_code=200, - ) - # Given - catalog_properties = { - "uri": TEST_URI, - "auth": { - "type": "oauth2", - "oauth2": { - "client_id": "some_client_id", - "client_secret": "some_client_secret", - "token_url": f"{TEST_URI}oauth2/token", - "scope": "read", - }, - }, - } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore - assert catalog.uri == TEST_URI - - EXAMPLE_ENV = {"PYICEBERG_CATALOG__PRODUCTION__URI": TEST_URI} From 51dddf4c1d95e3857c163408c1045cf12d051e92 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Wed, 23 Jul 2025 02:35:13 +0000 Subject: [PATCH 7/9] verify header in basic auth --- tests/catalog/test_rest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 055c55a70d..f60231ac30 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=redefined-outer-name,unused-argument +import base64 import os from typing import Any, Callable, Dict, cast from unittest import mock @@ -1540,6 +1541,10 @@ def test_rest_catalog_with_basic_auth_type(rest_mock: Mocker) -> None: catalog = RestCatalog("rest", **catalog_properties) # type: ignore assert catalog.uri == TEST_URI + encoded_user_pass = base64.b64encode(b"one:two").decode() + expected_auth_header = f"Basic {encoded_user_pass}" + assert rest_mock.last_request.headers["Authorization"] == expected_auth_header + def test_rest_catalog_with_custom_auth_type() -> None: # Given @@ -1581,6 +1586,10 @@ def test_rest_catalog_with_custom_basic_auth_type(rest_mock: Mocker) -> None: catalog = RestCatalog("rest", **catalog_properties) # type: ignore assert catalog.uri == TEST_URI + encoded_user_pass = base64.b64encode(b"one:two").decode() + expected_auth_header = f"Basic {encoded_user_pass}" + assert rest_mock.last_request.headers["Authorization"] == expected_auth_header + def test_rest_catalog_with_custom_auth_type_no_impl() -> None: # Given From 779b49b8a657ad07eb54d5a81cc3bce0f9db7bbe Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Fri, 25 Jul 2025 01:54:47 +0000 Subject: [PATCH 8/9] combine auth docs --- mkdocs/docs/configuration.md | 147 ++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 73 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 48a44a9c5f..1c9e327fd7 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -359,7 +359,9 @@ catalog: #### Authentication Options -##### OAuth2 +##### Legacy OAuth2 + +Legacy OAuth2 Properties will be removed in PyIceberg 1.0 in place of pluggable AuthManager properties below | Key | Example | Description | | ------------------- | -------------------------------- | -------------------------------------------------------------------------------------------------- | @@ -378,6 +380,77 @@ catalog: | rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | | rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | +##### Pluggable Authentication via AuthManager + +The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. + +###### Supported Authentication Types + +- `noop`: No authentication (no Authorization header sent). +- `basic`: HTTP Basic authentication. +- `custom`: Custom authentication manager (requires `auth.impl`). + +###### Configuration Properties + +The `auth` block is structured as follows: + +```yaml +catalog: + default: + type: rest + uri: http://rest-catalog/ws/ + auth: + type: + : + # Type-specific configuration + impl: # Only for custom auth +``` + +###### Property Reference + +| Property | Required | Description | +|------------------|----------|-------------------------------------------------------------------------------------------------| +| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, or `custom`). | +| `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | +| `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | +| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | + +###### Examples + +No Authentication: + +```yaml +auth: + type: noop +``` + +Basic Authentication: + +```yaml +auth: + type: basic + basic: + username: myuser + password: mypass +``` + +Custom Authentication: + +```yaml +auth: + type: custom + impl: mypackage.module.MyAuthManager + custom: + property1: value1 + property2: value2 +``` + +###### Notes + +- If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. +- If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. +- The configuration block under each type (e.g., `basic`, `custom`) is passed as keyword arguments to the corresponding AuthManager. + #### Common Integrations & Examples @@ -445,78 +518,6 @@ catalog: py-io-impl: pyiceberg.io.fsspec.FsspecFileIO ``` -#### Authentication in RESTCatalog - -The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. - -##### Supported Authentication Types - -- `noop`: No authentication (no Authorization header sent). -- `basic`: HTTP Basic authentication. -- `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) -- `custom`: Custom authentication manager (requires `auth.impl`). - -##### Configuration Properties - -The `auth` block is structured as follows: - -```yaml -catalog: - default: - type: rest - uri: http://rest-catalog/ws/ - auth: - type: - : - # Type-specific configuration - impl: # Only for custom auth -``` - -**Property Reference:** - -| Property | Required | Description | -|------------------|----------|-------------------------------------------------------------------------------------------------| -| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, or `custom`). | -| `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | -| `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | -| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | - -##### Examples - -**No Authentication:** - -```yaml -auth: - type: noop -``` - -**Basic Authentication:** - -```yaml -auth: - type: basic - basic: - username: myuser - password: mypass -``` - -**Custom Authentication:** - -```yaml -auth: - type: custom - impl: mypackage.module.MyAuthManager - custom: - property1: value1 - property2: value2 -``` - -##### Notes - -- If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. -- If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. -- The configuration block under each type (e.g., `basic`, `custom`) is passed as keyword arguments to the corresponding AuthManager. - ### SQL Catalog The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. The init_catalog_tables is optional and defaults to True. If it is set to False, the catalog tables will not be created when the SQLCatalog is initialized. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls): From 36b34e5d4d139ed142f1499ae5b9e55d68c83e84 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Fri, 25 Jul 2025 13:39:58 -0400 Subject: [PATCH 9/9] thnx copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pyiceberg/catalog/rest/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 5148634087..b39af9fc92 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -256,10 +256,10 @@ def _create_session(self) -> Session: auth_type_config = auth_config.get(auth_type, {}) auth_impl = auth_config.get("impl") - if auth_type is CUSTOM and not auth_impl: + if auth_type == CUSTOM and not auth_impl: raise ValueError("auth.impl must be specified when using custom auth.type") - if auth_type is not CUSTOM and auth_impl: + if auth_type != CUSTOM and auth_impl: raise ValueError("auth.impl can only be specified when using custom auth.type") session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl or auth_type, auth_type_config))