From e3e796b3e10dd103edd228a257c659f943e5d22e Mon Sep 17 00:00:00 2001 From: fcfangcc Date: Thu, 25 Sep 2025 10:55:57 +0800 Subject: [PATCH 1/2] support fsspec s3 addressing_style --- pyiceberg/io/__init__.py | 1 + pyiceberg/io/fsspec.py | 4 ++++ tests/io/test_fsspec.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 8836dec79d..b6af1a1b26 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -62,6 +62,7 @@ S3_PROXY_URI = "s3.proxy-uri" S3_CONNECT_TIMEOUT = "s3.connect-timeout" S3_REQUEST_TIMEOUT = "s3.request-timeout" +S3_ADDRESSING_STYLE = "s3.addressing-style" S3_SIGNER = "s3.signer" S3_SIGNER_URI = "s3.signer.uri" S3_SIGNER_ENDPOINT = "s3.signer.endpoint" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 55368e1948..11714a30d7 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -67,6 +67,7 @@ HF_ENDPOINT, HF_TOKEN, S3_ACCESS_KEY_ID, + S3_ADDRESSING_STYLE, S3_ANONYMOUS, S3_CONNECT_TIMEOUT, S3_ENDPOINT, @@ -168,6 +169,9 @@ def _s3(properties: Properties) -> AbstractFileSystem: if request_timeout := properties.get(S3_REQUEST_TIMEOUT): config_kwargs["read_timeout"] = float(request_timeout) + if addressing_style := properties.get(S3_ADDRESSING_STYLE): + config_kwargs["s3"] = {"addressing_style": addressing_style} + if s3_anonymous := properties.get(S3_ANONYMOUS): anon = strtobool(s3_anonymous) else: diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index 6924d6b1c3..71b4b981cc 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -306,6 +306,36 @@ def test_fsspec_s3_session_properties() -> None: ) +def test_fsspec_s3_session_properties_with_addressing_style() -> None: + session_properties: Properties = { + "s3.addressing-style": "virtual", + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + "s3.region": "us-east-1", + "s3.session-token": "s3.session-token", + **UNIFIED_AWS_SESSION_PROPERTIES, + } + + with mock.patch("s3fs.S3FileSystem") as mock_s3fs: + s3_fileio = FsspecFileIO(properties=session_properties) + filename = str(uuid.uuid4()) + + s3_fileio.new_input(location=f"s3://warehouse/{filename}") + + mock_s3fs.assert_called_with( + anon=False, + client_kwargs={ + "endpoint_url": "http://localhost:9000", + "aws_access_key_id": "admin", + "aws_secret_access_key": "password", + "region_name": "us-east-1", + "aws_session_token": "s3.session-token", + }, + config_kwargs={"s3": {"addressing_style": "virtual"}}, + ) + + def test_fsspec_s3_session_properties_with_anonymous() -> None: session_properties: Properties = { "s3.anonymous": "true", From 6328586ce80364751aebcc4c01a5d8d2f2b1232c Mon Sep 17 00:00:00 2001 From: fcfangcc Date: Fri, 26 Sep 2025 09:11:37 +0800 Subject: [PATCH 2/2] change to force-virtual-addressing --- pyiceberg/io/__init__.py | 1 - pyiceberg/io/fsspec.py | 6 +++--- tests/io/test_fsspec.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index b6af1a1b26..8836dec79d 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -62,7 +62,6 @@ S3_PROXY_URI = "s3.proxy-uri" S3_CONNECT_TIMEOUT = "s3.connect-timeout" S3_REQUEST_TIMEOUT = "s3.request-timeout" -S3_ADDRESSING_STYLE = "s3.addressing-style" S3_SIGNER = "s3.signer" S3_SIGNER_URI = "s3.signer.uri" S3_SIGNER_ENDPOINT = "s3.signer.endpoint" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 11714a30d7..522e660b15 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -67,10 +67,10 @@ HF_ENDPOINT, HF_TOKEN, S3_ACCESS_KEY_ID, - S3_ADDRESSING_STYLE, S3_ANONYMOUS, S3_CONNECT_TIMEOUT, S3_ENDPOINT, + S3_FORCE_VIRTUAL_ADDRESSING, S3_PROXY_URI, S3_REGION, S3_REQUEST_TIMEOUT, @@ -169,8 +169,8 @@ def _s3(properties: Properties) -> AbstractFileSystem: if request_timeout := properties.get(S3_REQUEST_TIMEOUT): config_kwargs["read_timeout"] = float(request_timeout) - if addressing_style := properties.get(S3_ADDRESSING_STYLE): - config_kwargs["s3"] = {"addressing_style": addressing_style} + if _force_virtual_addressing := properties.get(S3_FORCE_VIRTUAL_ADDRESSING): + config_kwargs["s3"] = {"addressing_style": "virtual"} if s3_anonymous := properties.get(S3_ANONYMOUS): anon = strtobool(s3_anonymous) diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index 71b4b981cc..fb22eefbfe 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -306,9 +306,9 @@ def test_fsspec_s3_session_properties() -> None: ) -def test_fsspec_s3_session_properties_with_addressing_style() -> None: +def test_fsspec_s3_session_properties_force_virtual_addressing() -> None: session_properties: Properties = { - "s3.addressing-style": "virtual", + "s3.force-virtual-addressing": True, "s3.endpoint": "http://localhost:9000", "s3.access-key-id": "admin", "s3.secret-access-key": "password",