diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 40cfc0b8c9..8678719432 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -161,6 +161,7 @@ For the FileIO there are several configuration options available: | adls.dfs-storage-authority | .dfs.core.windows.net | The hostname[:port] of the Data Lake Gen 2 Service. Defaults to `.dfs.core.windows.net`. Useful for connecting to a local emulator, like [azurite](https://github.com/azure/azurite). See [AzureFileSystem](https://arrow.apache.org/docs/python/filesystems.html#azure-storage-file-system) for reference | | adls.blob-storage-scheme | https | Either `http` or `https`. Defaults to `https`. Useful for connecting to a local emulator, like [azurite](https://github.com/azure/azurite). See [AzureFileSystem](https://arrow.apache.org/docs/python/filesystems.html#azure-storage-file-system) for reference | | adls.dfs-storage-scheme | https | Either `http` or `https`. Defaults to `https`. Useful for connecting to a local emulator, like [azurite](https://github.com/azure/azurite). See [AzureFileSystem](https://arrow.apache.org/docs/python/filesystems.html#azure-storage-file-system) for reference | +| adls.token | eyJ0eXAiOiJKV1QiLCJhbGci... | Static access token for authenticating with ADLS. Used for OAuth2 flows. | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index f89de18f12..9e4bf104c2 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -86,6 +86,7 @@ ADLS_DFS_STORAGE_AUTHORITY = "adls.dfs-storage-authority" ADLS_BLOB_STORAGE_SCHEME = "adls.blob-storage-scheme" ADLS_DFS_STORAGE_SCHEME = "adls.dfs-storage-scheme" +ADLS_TOKEN = "adls.token" GCS_TOKEN = "gcs.oauth2.token" GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at" GCS_PROJECT_ID = "gcs.project-id" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index d075765ed1..eba9fb1944 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -48,6 +48,7 @@ ADLS_CREDENTIAL, ADLS_SAS_TOKEN, ADLS_TENANT_ID, + ADLS_TOKEN, AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, @@ -192,7 +193,11 @@ def _gs(properties: Properties) -> AbstractFileSystem: def _adls(properties: Properties) -> AbstractFileSystem: + # https://fsspec.github.io/adlfs/api/ + from adlfs import AzureBlobFileSystem + from azure.core.credentials import AccessToken + from azure.core.credentials_async import AsyncTokenCredential for key, sas_token in { key.replace(f"{ADLS_SAS_TOKEN}.", ""): value for key, value in properties.items() if key.startswith(ADLS_SAS_TOKEN) @@ -202,9 +207,27 @@ def _adls(properties: Properties) -> AbstractFileSystem: if ADLS_SAS_TOKEN not in properties: properties[ADLS_SAS_TOKEN] = sas_token + class StaticTokenCredential(AsyncTokenCredential): + _DEFAULT_EXPIRY_SECONDS = 3600 + + def __init__(self, token_string: str) -> None: + self._token = token_string + + async def get_token(self, *scopes: str, **kwargs: Any) -> AccessToken: + import time + + # Set expiration 1 hour from now + expires_on = int(time.time()) + self._DEFAULT_EXPIRY_SECONDS + return AccessToken(self._token, expires_on) + + if token := properties.get(ADLS_TOKEN): + credential = StaticTokenCredential(token) + else: + credential = properties.get(ADLS_CREDENTIAL) # type: ignore + return AzureBlobFileSystem( connection_string=properties.get(ADLS_CONNECTION_STRING), - credential=properties.get(ADLS_CREDENTIAL), + credential=credential, account_name=properties.get(ADLS_ACCOUNT_NAME), account_key=properties.get(ADLS_ACCOUNT_KEY), sas_token=properties.get(ADLS_SAS_TOKEN),