From 3ee34b77d9a31682b531bd8ff09fac2f71652dc3 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Sun, 1 Jun 2025 16:26:25 +0000 Subject: [PATCH 01/12] add OAuth2AuthManager --- pyiceberg/catalog/rest/auth.py | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 89395f1158..cc692a7165 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -17,9 +17,12 @@ import base64 import importlib +import threading +import time from abc import ABC, abstractmethod from typing import Any, Dict, Optional, Type +import requests from requests import HTTPError, PreparedRequest, Session from requests.auth import AuthBase @@ -42,11 +45,15 @@ def auth_header(self) -> Optional[str]: class NoopAuthManager(AuthManager): + """Auth Manager implementation with no auth.""" + def auth_header(self) -> Optional[str]: return None class BasicAuthManager(AuthManager): + """AuthManager implementation that supports basic password auth.""" + def __init__(self, username: str, password: str): credentials = f"{username}:{password}" self._token = base64.b64encode(credentials.encode()).decode() @@ -56,6 +63,12 @@ def auth_header(self) -> str: class LegacyOAuth2AuthManager(AuthManager): + """Legacy OAuth2 AuthManager implementation. + + This class exists for backward compatibility, and will be removed in + PyIceberg 1.0.0 in favor of OAuth2AuthManager. + """ + _session: Session _auth_url: Optional[str] _token: Optional[str] @@ -109,6 +122,80 @@ def auth_header(self) -> str: return f"Bearer {self._token}" +class OAuth2TokenProvider: + """Thread-safe OAuth2 token provider with token refresh support.""" + + client_id: str + client_secret: str + token_url: str + scope: Optional[str] + refresh_margin: int + expires_in: Optional[int] + + _token: Optional[str] + _expires_at: int + _lock: threading.Lock + + def __init__( + self, + client_id: str, + client_secret: str, + token_url: str, + scope: Optional[str] = None, + refresh_margin: int = 60, + expires_in: Optional[int] = None, + ): + self.client_id = client_id + self.client_secret = client_secret + self.token_url = token_url + self.scope = scope + self.refresh_margin = refresh_margin + self.expires_in = expires_in + + self._token = None + self._expires_at = 0 + self._lock = threading.Lock() + + def _refresh_token(self) -> None: + data = { + "grant_type": "client_credentials", + "client_id": self.client_id, + "client_secret": self.client_secret, + } + if self.scope: + data["scope"] = self.scope + + response = requests.post(self.token_url, data=data) + response.raise_for_status() + result = response.json() + + self._token = result["access_token"] + expires_in = result.get("expires_in", self.expires_in) + if expires_in is None: + raise ValueError( + "The expiration time of the Token must be provided by the Server in the Access Token Response in `expired_in` field, or by the PyIceberg Client." + ) + self._expires_at = time.time() + expires_in - self.refresh_margin + + def get_token(self) -> str: + with self._lock: + if not self._token or time.time() >= self._expires_at: + self._refresh_token() + if self._token is None: + raise ValueError("Authorization token is None after refresh") + return self._token + + +class OAuth2AuthManager(AuthManager): + """Auth Manager implementation that supports OAuth2 as defined in IETF RFC6749.""" + + def __init__(self, token_provider: OAuth2TokenProvider): + self.token_provider = token_provider + + def auth_header(self) -> str: + return f"Bearer {self.token_provider.get_token()}" + + class AuthManagerAdapter(AuthBase): """A `requests.auth.AuthBase` adapter that integrates an `AuthManager` into a `requests.Session` to automatically attach the appropriate Authorization header to every request. From 5954550e307acc9c8a99a3866c843bfc5655d8de Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 10 Jul 2025 02:42:22 +0000 Subject: [PATCH 02/12] add custom auth implementation parsing --- pyiceberg/catalog/rest/__init__.py | 15 ++++++++++++- tests/catalog/test_rest.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 6215d17a4f..fd283990ee 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -134,6 +134,7 @@ class IdentifierKind(Enum): SIGV4_SERVICE = "rest.signing-name" OAUTH2_SERVER_URI = "oauth2-server-uri" SNAPSHOT_LOADING_MODE = "snapshot-loading-mode" +AUTH = "auth" NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) @@ -247,7 +248,19 @@ def _create_session(self) -> Session: elif ssl_client_cert := ssl_client.get(CERT): session.cert = ssl_client_cert - session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session)) + if auth_config := self.properties.get(AUTH): + # set up auth_manager based on the properties + auth_type = auth_config.get("type") + if auth_type is None: + raise ValueError("auth.type must be defined") + auth_type_config = auth_config.get(auth_type, {}) + if auth_impl := auth_config.get("impl"): + session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl, auth_type_config)) + else: + session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_type, auth_type_config)) + else: + session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session)) + # Set HTTP headers self._config_headers(session) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index ed91dd15a1..4c02163ed6 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1519,6 +1519,42 @@ def test_request_session_with_ssl_client_cert() -> None: assert "Could not find the TLS certificate file, invalid path: path_to_client_cert" in str(e.value) +def test_rest_catalog_with_basic_auth_type() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "basic", + "basic": { + "username": "one", + }, + }, + } + with pytest.raises(TypeError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "BasicAuthManager.__init__() missing 1 required positional argument: 'password'" in str(e.value) + + +def test_rest_catalog_with_auth_impl() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "custom", + "impl": "dummy.nonexistent.package", + "custom": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "Could not load AuthManager class for 'dummy.nonexistent.package'" in str(e.value) + + EXAMPLE_ENV = {"PYICEBERG_CATALOG__PRODUCTION__URI": TEST_URI} From 92ad05185eb2931e9772088ad16ac5f5b951feb7 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 17 Jul 2025 01:26:46 +0000 Subject: [PATCH 03/12] add tests --- pyiceberg/catalog/rest/__init__.py | 15 +++-- pyiceberg/catalog/rest/auth.py | 20 ++++++- tests/catalog/test_rest.py | 91 +++++++++++++++++++++++++++++- 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index fd283990ee..d292362216 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -135,6 +135,7 @@ class IdentifierKind(Enum): OAUTH2_SERVER_URI = "oauth2-server-uri" SNAPSHOT_LOADING_MODE = "snapshot-loading-mode" AUTH = "auth" +CUSTOM = "custom" NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) @@ -249,15 +250,19 @@ def _create_session(self) -> Session: session.cert = ssl_client_cert if auth_config := self.properties.get(AUTH): - # set up auth_manager based on the properties auth_type = auth_config.get("type") if auth_type is None: raise ValueError("auth.type must be defined") auth_type_config = auth_config.get(auth_type, {}) - if auth_impl := auth_config.get("impl"): - session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl, auth_type_config)) - else: - session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_type, auth_type_config)) + auth_impl = auth_config.get("impl") + + if auth_type is CUSTOM and not auth_impl: + raise ValueError("auth.impl must be specified when using custom auth.type") + + if auth_type is not CUSTOM and auth_impl: + raise ValueError("auth.impl can only be specified when using custom auth.type") + + session.auth = AuthManagerAdapter(AuthManagerFactory.create(auth_impl or auth_type, auth_type_config)) else: session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session)) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index cc692a7165..26ce35d25b 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -189,8 +189,23 @@ def get_token(self) -> str: class OAuth2AuthManager(AuthManager): """Auth Manager implementation that supports OAuth2 as defined in IETF RFC6749.""" - def __init__(self, token_provider: OAuth2TokenProvider): - self.token_provider = token_provider + def __init__( + self, + client_id: str, + client_secret: str, + token_url: str, + scope: Optional[str] = None, + refresh_margin: int = 60, + expires_in: Optional[int] = None, + ): + self.token_provider = OAuth2TokenProvider( + client_id, + client_secret, + token_url, + scope, + refresh_margin, + expires_in, + ) def auth_header(self) -> str: return f"Bearer {self.token_provider.get_token()}" @@ -274,3 +289,4 @@ def create(cls, class_or_name: str, config: Dict[str, Any]) -> AuthManager: AuthManagerFactory.register("noop", NoopAuthManager) AuthManagerFactory.register("basic", BasicAuthManager) AuthManagerFactory.register("legacyoauth2", LegacyOAuth2AuthManager) +AuthManagerFactory.register("oauth2", OAuth2AuthManager) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 4c02163ed6..2fc1654e39 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1536,7 +1536,7 @@ def test_rest_catalog_with_basic_auth_type() -> None: assert "BasicAuthManager.__init__() missing 1 required positional argument: 'password'" in str(e.value) -def test_rest_catalog_with_auth_impl() -> None: +def test_rest_catalog_with_custom_auth_type() -> None: # Given catalog_properties = { "uri": TEST_URI, @@ -1555,6 +1555,95 @@ def test_rest_catalog_with_auth_impl() -> None: assert "Could not load AuthManager class for 'dummy.nonexistent.package'" in str(e.value) +def test_rest_catalog_with_custom_auth_type_no_impl() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "custom", + "custom": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "auth.impl must be specified when using custom auth.type" in str(e.value) + + +def test_rest_catalog_with_non_custom_auth_type_impl() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "oauth2", + "impl": "oauth2.package", + "oauth2": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "auth.impl can only be specified when using custom auth.type" in str(e.value) + + +def test_rest_catalog_with_unsupported_auth_type() -> None: + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "unsupported", + "unsupported": { + "property1": "one", + "property2": "two", + }, + }, + } + with pytest.raises(ValueError) as e: + # Missing namespace + RestCatalog("rest", **catalog_properties) # type: ignore + assert "Could not load AuthManager class for 'unsupported'" in str(e.value) + + +def test_rest_catalog_with_oauth2_auth_type(requests_mock: Mocker) -> None: + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + "refresh_token": "IwOGYzYTlmM2YxOTQ5MGE3YmNmMDFkNTVk", + "scope": "read", + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + # Given + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "oauth2", + "oauth2": { + "client_id": "some_client_id", + "client_secret": "some_client_secret", + "token_url": f"{TEST_URI}oauth2/token", + "scope": "read", + }, + }, + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + EXAMPLE_ENV = {"PYICEBERG_CATALOG__PRODUCTION__URI": TEST_URI} From 38042ca2051d524e9ef80f0ea5f03357c25998a7 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 17 Jul 2025 01:27:04 +0000 Subject: [PATCH 04/12] add docs --- mkdocs/docs/configuration.md | 88 ++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index bc514e39af..56c018b821 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -374,6 +374,94 @@ Specific headers defined by the RESTCatalog spec include: | ------------------------------------ | ------------------------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `header.X-Iceberg-Access-Delegation` | `{vended-credentials,remote-signing}` | `vended-credentials` | Signal to the server that the client supports delegated access via a comma-separated list of access mechanisms. The server may choose to supply access via any or none of the requested mechanisms | +#### Authentication in RESTCatalog + +The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. + +##### Supported Authentication Types + +- `noop`: No authentication (no Authorization header sent). +- `basic`: HTTP Basic authentication. +- `oauth2`: OAuth2 client credentials flow. +- `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) +- `custom`: Custom authentication manager (requires `auth.impl`). + +##### Configuration Properties + +The `auth` block is structured as follows: + +```yaml +catalog: + default: + type: rest + uri: http://rest-catalog/ws/ + auth: + type: + : + # Type-specific configuration + impl: # Only for custom auth +``` + +**Property Reference:** + +| Property | Required | Description | +|------------------|----------|-------------------------------------------------------------------------------------------------| +| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, `oauth2`, or `custom`). | +| `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | +| `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | +| `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | +| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | + +##### Examples + +**No Authentication:** + +```yaml +auth: + type: noop +``` + +**Basic Authentication:** + +```yaml +auth: + type: basic + basic: + username: myuser + password: mypass +``` + +**OAuth2 Authentication:** + +```yaml +auth: + type: oauth2 + oauth2: + client_id: my-client-id + client_secret: my-client-secret + token_url: https://auth.example.com/oauth/token + scope: read + refresh_margin: 60 # (optional) seconds before expiry to refresh + expires_in: 3600 # (optional) fallback if server does not provide +``` + +**Custom Authentication:** + +```yaml +auth: + type: custom + impl: mypackage.module.MyAuthManager + custom: + property1: value1 + property2: value2 +``` + +##### Notes + +- If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. +- If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. +- The configuration block under each type (e.g., `basic`, `oauth2`, `custom`) is passed as keyword arguments to the corresponding AuthManager. + ### SQL Catalog The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. The init_catalog_tables is optional and defaults to True. If it is set to False, the catalog tables will not be created when the SQLCatalog is initialized. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls): From ab75641804a2f2bcd44767f88fbf3e18b51292fe Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 17 Jul 2025 01:39:46 +0000 Subject: [PATCH 05/12] fix test for python3.9 --- tests/catalog/test_rest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 2fc1654e39..04dc78d879 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1533,7 +1533,7 @@ def test_rest_catalog_with_basic_auth_type() -> None: with pytest.raises(TypeError) as e: # Missing namespace RestCatalog("rest", **catalog_properties) # type: ignore - assert "BasicAuthManager.__init__() missing 1 required positional argument: 'password'" in str(e.value) + assert "__init__() missing 1 required positional argument: 'password'" in str(e.value) def test_rest_catalog_with_custom_auth_type() -> None: From f15cc9012248edfc72fa39ff9eed7959ad7882e2 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 25 Jul 2025 16:41:37 -0700 Subject: [PATCH 06/12] merge docs --- mkdocs/docs/configuration.md | 125 ++++++++--------------------------- 1 file changed, 27 insertions(+), 98 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 89e618084b..4601886bbd 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -384,13 +384,15 @@ Legacy OAuth2 Properties will be removed in PyIceberg 1.0 in place of pluggable The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. -###### Supported Authentication Types +##### Supported Authentication Types - `noop`: No authentication (no Authorization header sent). - `basic`: HTTP Basic authentication. +- `oauth2`: OAuth2 client credentials flow. +- `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) - `custom`: Custom authentication manager (requires `auth.impl`). -###### Configuration Properties +##### Configuration Properties The `auth` block is structured as follows: @@ -406,25 +408,26 @@ catalog: impl: # Only for custom auth ``` -###### Property Reference +**Property Reference:** | Property | Required | Description | |------------------|----------|-------------------------------------------------------------------------------------------------| -| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, or `custom`). | +| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, `oauth2`, or `custom`). | | `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | | `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | +| `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | | `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | -###### Examples +##### Examples -No Authentication: +**No Authentication:** ```yaml auth: type: noop ``` -Basic Authentication: +**Basic Authentication:** ```yaml auth: @@ -434,7 +437,21 @@ auth: password: mypass ``` -Custom Authentication: +**OAuth2 Authentication:** + +```yaml +auth: + type: oauth2 + oauth2: + client_id: my-client-id + client_secret: my-client-secret + token_url: https://auth.example.com/oauth/token + scope: read + refresh_margin: 60 # (optional) seconds before expiry to refresh + expires_in: 3600 # (optional) fallback if server does not provide +``` + +**Custom Authentication:** ```yaml auth: @@ -445,11 +462,11 @@ auth: property2: value2 ``` -###### Notes +##### Notes - If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. - If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. -- The configuration block under each type (e.g., `basic`, `custom`) is passed as keyword arguments to the corresponding AuthManager. +- The configuration block under each type (e.g., `basic`, `oauth2`, `custom`) is passed as keyword arguments to the corresponding AuthManager. @@ -518,94 +535,6 @@ catalog: py-io-impl: pyiceberg.io.fsspec.FsspecFileIO ``` -#### Authentication in RESTCatalog - -The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. - -##### Supported Authentication Types - -- `noop`: No authentication (no Authorization header sent). -- `basic`: HTTP Basic authentication. -- `oauth2`: OAuth2 client credentials flow. -- `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) -- `custom`: Custom authentication manager (requires `auth.impl`). - -##### Configuration Properties - -The `auth` block is structured as follows: - -```yaml -catalog: - default: - type: rest - uri: http://rest-catalog/ws/ - auth: - type: - : - # Type-specific configuration - impl: # Only for custom auth -``` - -**Property Reference:** - -| Property | Required | Description | -|------------------|----------|-------------------------------------------------------------------------------------------------| -| `auth.type` | Yes | The authentication type to use (`noop`, `basic`, `oauth2`, or `custom`). | -| `auth.impl` | Conditionally | The fully qualified class path for a custom AuthManager. Required if `auth.type` is `custom`. | -| `auth.basic` | If type is `basic` | Block containing `username` and `password` for HTTP Basic authentication. | -| `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | -| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | - -##### Examples - -**No Authentication:** - -```yaml -auth: - type: noop -``` - -**Basic Authentication:** - -```yaml -auth: - type: basic - basic: - username: myuser - password: mypass -``` - -**OAuth2 Authentication:** - -```yaml -auth: - type: oauth2 - oauth2: - client_id: my-client-id - client_secret: my-client-secret - token_url: https://auth.example.com/oauth/token - scope: read - refresh_margin: 60 # (optional) seconds before expiry to refresh - expires_in: 3600 # (optional) fallback if server does not provide -``` - -**Custom Authentication:** - -```yaml -auth: - type: custom - impl: mypackage.module.MyAuthManager - custom: - property1: value1 - property2: value2 -``` - -##### Notes - -- If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. -- If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. -- The configuration block under each type (e.g., `basic`, `oauth2`, `custom`) is passed as keyword arguments to the corresponding AuthManager. - ### SQL Catalog The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. The init_catalog_tables is optional and defaults to True. If it is set to False, the catalog tables will not be created when the SQLCatalog is initialized. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls): From c32bc9cd64923e5c98224c4541775d950da9e79c Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 25 Jul 2025 16:43:22 -0700 Subject: [PATCH 07/12] align docs format --- mkdocs/docs/configuration.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 4601886bbd..987e1423b2 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -384,7 +384,7 @@ Legacy OAuth2 Properties will be removed in PyIceberg 1.0 in place of pluggable The RESTCatalog supports pluggable authentication via the `auth` configuration block. This allows you to specify which how the access token will be fetched and managed for use with the HTTP requests to the RESTCatalog server. The authentication method is selected by setting the `auth.type` property, and additional configuration can be provided as needed for each method. -##### Supported Authentication Types +###### Supported Authentication Types - `noop`: No authentication (no Authorization header sent). - `basic`: HTTP Basic authentication. @@ -392,7 +392,7 @@ The RESTCatalog supports pluggable authentication via the `auth` configuration b - `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) - `custom`: Custom authentication manager (requires `auth.impl`). -##### Configuration Properties +###### Configuration Properties The `auth` block is structured as follows: @@ -408,7 +408,7 @@ catalog: impl: # Only for custom auth ``` -**Property Reference:** +###### Property Reference | Property | Required | Description | |------------------|----------|-------------------------------------------------------------------------------------------------| @@ -418,16 +418,16 @@ catalog: | `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). | | `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. | -##### Examples +###### Examples -**No Authentication:** +No Authentication: ```yaml auth: type: noop ``` -**Basic Authentication:** +Basic Authentication: ```yaml auth: @@ -437,7 +437,7 @@ auth: password: mypass ``` -**OAuth2 Authentication:** +OAuth2 Authentication: ```yaml auth: @@ -451,7 +451,7 @@ auth: expires_in: 3600 # (optional) fallback if server does not provide ``` -**Custom Authentication:** +Custom Authentication: ```yaml auth: @@ -462,7 +462,7 @@ auth: property2: value2 ``` -##### Notes +###### Notes - If `auth.type` is `custom`, you **must** specify `auth.impl` with the full class path to your custom AuthManager. - If `auth.type` is not `custom`, specifying `auth.impl` is not allowed. From 9223de00783bbdcb6c22e524afec5724def54dc0 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 25 Jul 2025 17:17:04 -0700 Subject: [PATCH 08/12] Update pyiceberg/catalog/rest/auth.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pyiceberg/catalog/rest/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 26ce35d25b..ebadaa3199 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -173,7 +173,7 @@ def _refresh_token(self) -> None: expires_in = result.get("expires_in", self.expires_in) if expires_in is None: raise ValueError( - "The expiration time of the Token must be provided by the Server in the Access Token Response in `expired_in` field, or by the PyIceberg Client." + "The expiration time of the Token must be provided by the Server in the Access Token Response in `expires_in` field, or by the PyIceberg Client." ) self._expires_at = time.time() + expires_in - self.refresh_margin From 977842bc42c8f9ace24e2f0e0cbd66222ccc74a9 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 14 Aug 2025 03:08:21 +0000 Subject: [PATCH 09/12] use basic auth instead of post --- pyiceberg/catalog/rest/auth.py | 24 +++++++++++++++--------- tests/catalog/test_rest.py | 24 ++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 390a355ecb..85df069338 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -17,10 +17,11 @@ import base64 import importlib +import logging import threading import time -import logging from abc import ABC, abstractmethod +from functools import cached_property from typing import Any, Dict, List, Optional, Type import requests @@ -158,16 +159,19 @@ def __init__( self._expires_at = 0 self._lock = threading.Lock() + @cached_property + def _client_secret_header(self) -> str: + creds = f"{self.client_id}:{self.client_secret}" + creds_bytes = creds.encode("utf-8") + b64_creds = base64.b64encode(creds_bytes).decode("utf-8") + return f"Basic {b64_creds}" + def _refresh_token(self) -> None: - data = { - "grant_type": "client_credentials", - "client_id": self.client_id, - "client_secret": self.client_secret, - } + data = {} if self.scope: data["scope"] = self.scope - response = requests.post(self.token_url, data=data) + response = requests.post(self.token_url, data=data, headers={"Authorization": self._client_secret_header}) response.raise_for_status() result = response.json() @@ -177,11 +181,11 @@ def _refresh_token(self) -> None: raise ValueError( "The expiration time of the Token must be provided by the Server in the Access Token Response in `expires_in` field, or by the PyIceberg Client." ) - self._expires_at = time.time() + expires_in - self.refresh_margin + self._expires_at = time.monotonic() + expires_in - self.refresh_margin def get_token(self) -> str: with self._lock: - if not self._token or time.time() >= self._expires_at: + if not self._token or time.monotonic() >= self._expires_at: self._refresh_token() if self._token is None: raise ValueError("Authorization token is None after refresh") @@ -211,6 +215,8 @@ def __init__( def auth_header(self) -> str: return f"Bearer {self.token_provider.get_token()}" + + class GoogleAuthManager(AuthManager): """An auth manager that is responsible for handling Google credentials.""" diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index be80843a2a..c78a0344f1 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -21,6 +21,7 @@ from unittest import mock import pytest +from requests.exceptions import HTTPError from requests_mock import Mocker import pyiceberg @@ -1680,6 +1681,29 @@ def test_rest_catalog_with_oauth2_auth_type(requests_mock: Mocker) -> None: assert catalog.uri == TEST_URI +def test_rest_catalog_oauth2_non_200_token_response(requests_mock: Mocker) -> None: + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={"error": "invalid_client"}, + status_code=401, + ) + catalog_properties = { + "uri": TEST_URI, + "auth": { + "type": "oauth2", + "oauth2": { + "client_id": "bad_client_id", + "client_secret": "bad_client_secret", + "token_url": f"{TEST_URI}oauth2/token", + "scope": "read", + }, + }, + } + + with pytest.raises(HTTPError): + RestCatalog("rest", **catalog_properties) # type: ignore + + EXAMPLE_ENV = {"PYICEBERG_CATALOG__PRODUCTION__URI": TEST_URI} From c860e63f7e3aad950ac07580c830d2e9d79d71ca Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 14 Aug 2025 21:46:07 -0400 Subject: [PATCH 10/12] thanks Kevin! Co-authored-by: Kevin Liu --- pyiceberg/catalog/rest/auth.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 85df069338..8cae659298 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -167,7 +167,9 @@ def _client_secret_header(self) -> str: return f"Basic {b64_creds}" def _refresh_token(self) -> None: - data = {} + data = { + "grant_type": "client_credentials" + } if self.scope: data["scope"] = self.scope From 4011015b78bf9cd2f0cc7c87a99bcc58c8c3d2bc Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+sungwy@users.noreply.github.com> Date: Thu, 14 Aug 2025 21:48:31 -0400 Subject: [PATCH 11/12] thanks Kevin! Co-authored-by: Kevin Liu --- mkdocs/docs/configuration.md | 1 - 1 file changed, 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 0bafbef1fc..75720d2c95 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -389,7 +389,6 @@ The RESTCatalog supports pluggable authentication via the `auth` configuration b - `noop`: No authentication (no Authorization header sent). - `basic`: HTTP Basic authentication. - `oauth2`: OAuth2 client credentials flow. -- `legacyoauth2`: Legacy OAuth2 client credentials flow (Deprecated and will be removed in PyIceberg 1.0.0) - `custom`: Custom authentication manager (requires `auth.impl`). - `google`: Google Authentication support From d7af39ef2d7e58ced9e5ed428a79a22d603f5b91 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 14 Aug 2025 19:38:11 -0700 Subject: [PATCH 12/12] fix linter --- pyiceberg/catalog/rest/auth.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py index 8cae659298..527e82060e 100644 --- a/pyiceberg/catalog/rest/auth.py +++ b/pyiceberg/catalog/rest/auth.py @@ -167,9 +167,7 @@ def _client_secret_header(self) -> str: return f"Basic {b64_creds}" def _refresh_token(self) -> None: - data = { - "grant_type": "client_credentials" - } + data = {"grant_type": "client_credentials"} if self.scope: data["scope"] = self.scope