From 906b0676c1afc84623ce34a676f6c02fe5e8aec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Lafarge?= Date: Tue, 30 Dec 2025 16:52:06 +0100 Subject: [PATCH 1/6] =?UTF-8?q?=E2=9C=A8=20Support=20alternative=20GCP=20U?= =?UTF-8?q?niverses?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commits adds support for the `GOOGLE_CLOUD_UNIVERSE_DOMAIN` environment variable to support alternative GCP universes. Context ------- In multiple countries, local hosting companies are partnering with GCP to offer EU-sovereign GCP-like environments. Such partnerships include [one with T-Systems in Germany](https://www.t-systems.com/de/en/sovereign-cloud/solutions/sovereign-cloud-powered-by-google-cloud) or [another one with Thalès in France called S3NS](https://www.s3ns.io/en). In order to support such new environments, Google introduced the notion of `universe` in their SDKs, essentially to point them at non `googleapis.com` endpoints. We ([Pigment](https://pigment.com)) are currently porting our Platform to S3NS, and that includes our time-series forecasting services relying on Dask and - therefore - `fsspec` and `gcsfs`. To make that work, we're currently passing the `url_endpoint` storage parameter as `storage_options` in Dask bag calls requiring GCS access, however, that's far from ideal (these calls are scattered all around our codebase). To connect to other universes, clients are advised to use the `GOOGLE_CLOUD_UNIVERSE_DOMAIN` environment variable as you can see: - on [this pull request on the Google Cloud Python SDK](https://github.com/googleapis/google-api-python-client/pull/2369) - in the documentation of S3NS: https://documentation.s3ns.fr/docs/overview/tpc-key-differences#key_differences_for_developers Support for this environment variable would make it much easier for us (and for anyone else) to connect to alternative GCP universes, without having to patch a single line of our own code. NOTE: credentials retrieval on S3NS Virtual Machines was working out-of-the-box, even without the `GOOGLE_CLOUD_UNIVERSE_DOMAIN` env var. set, because the underlying SDK supports it. Making sure `gcsfs` targets the correct endpoint was the only missing part. --- docs/source/index.rst | 11 +++++++++++ gcsfs/core.py | 16 +++++++++++++--- gcsfs/tests/test_core.py | 23 +++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index f1ad2c9e..386702e9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -195,6 +195,17 @@ For further reference check `aiohttp proxy support`_. .. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support +Alternative GCP Universes +------------------------- + +To target an alternative GCP universe, the ``GOOGLE_CLOUD_UNIVERSE_DOMAIN`` +environment variable should be set to your desired unverse domain for ``gcsfs`` +to target the `Google Cloud Storage`_ API in your alternative universe. + +For instance, set ``GOOGLE_CLOUD_UNIVERSE_DOMAIN=s3nsapis.fr`` to target the +S3NS_ universe. + +.. _S3NS: https://www.s3ns.io/en/offers/premi3ns-trusted-cloud Contents ======== diff --git a/gcsfs/core.py b/gcsfs/core.py index 766efb42..eb491be7 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -108,6 +108,10 @@ def close(self): self.seek(0) +def _gcp_universe_domain(): + return os.getenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "googleapis.com") + + def _location(): """ Resolves GCS HTTP location as http[s]://host @@ -125,7 +129,8 @@ def _location(): ): _emulator_location = f"http://{_emulator_location}" return _emulator_location - return "https://storage.googleapis.com" + + return f"https://storage.{_gcp_universe_domain()}" def _chunks(lst, n): @@ -346,6 +351,11 @@ def _location(self): def base(self): return f"{self._location}/storage/v1/" + @property + def batch_url_base(self): + return f"{self._location}/batch/storage/v1" + + @property def project(self): return self.credentials.project @@ -1300,7 +1310,7 @@ async def _rm_files(self, paths): body = "".join(parts) headers, content = await self._call( "POST", - f"{self._location}/batch/storage/v1", + self.batch_url_base, headers={ "Content-Type": 'multipart/mixed; boundary="==========' '=====7330845974216740156=="' @@ -1339,7 +1349,7 @@ async def _rm_files(self, paths): @property def on_google(self): - return "torage.googleapis.com" in self._location + return f"torage.{_gcp_universe_domain()}" in self._location async def _rm(self, path, recursive=False, maxdepth=None, batchsize=20): paths = await self._expand_path(path, recursive=recursive, maxdepth=maxdepth) diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index 25c14ad1..34b588d0 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -1709,3 +1709,26 @@ def test_ls_with_max_results(gcs): def test_get_error(gcs): with pytest.raises(FileNotFoundError): gcs.get_file(f"{TEST_BUCKET}/doesnotexist", "other") + + +def test_default_gcp_universe(): + fs = fsspec.filesystem("gcs") + assert fs.base == "https://storage.googleapis.com/storage/v1/" + assert fs.on_google is True + assert ( + fs.url("/test/path") + == "https://storage.googleapis.com/download/storage/v1/b/test/o/path?alt=media" + ) + assert fs.batch_url_base == "https://storage.googleapis.com/batch/storage/v1" + + +def test_custom_gcp_universe(monkeypatch): + monkeypatch.setenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "s3nsapis.fr") + fs = fsspec.filesystem("gcs") + assert fs.base == "https://storage.s3nsapis.fr/storage/v1/" + assert fs.on_google is True + assert ( + fs.url("/test/path") + == "https://storage.s3nsapis.fr/download/storage/v1/b/test/o/path?alt=media" + ) + assert fs.batch_url_base == "https://storage.s3nsapis.fr/batch/storage/v1" From 22df853c47ec6074deb4eadadc93c33caf17becb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Lafarge?= Date: Tue, 30 Dec 2025 23:16:47 +0100 Subject: [PATCH 2/6] Fix typo, fix tests on CI & require a version of google-auth that supports GCSFs --- docs/source/index.rst | 2 +- gcsfs/tests/test_core.py | 26 +++++++++++++++++--------- requirements.txt | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 386702e9..37cd2bcc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -199,7 +199,7 @@ Alternative GCP Universes ------------------------- To target an alternative GCP universe, the ``GOOGLE_CLOUD_UNIVERSE_DOMAIN`` -environment variable should be set to your desired unverse domain for ``gcsfs`` +environment variable should be set to your desired universe domain for ``gcsfs`` to target the `Google Cloud Storage`_ API in your alternative universe. For instance, set ``GOOGLE_CLOUD_UNIVERSE_DOMAIN=s3nsapis.fr`` to target the diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index 34b588d0..0689d719 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -1711,24 +1711,32 @@ def test_get_error(gcs): gcs.get_file(f"{TEST_BUCKET}/doesnotexist", "other") -def test_default_gcp_universe(): +def test_custom_gcp_universe(monkeypatch): + # Make sure we simulate a mock less connection + monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False) + monkeypatch.delenv("STORAGE_EMULATOR_HOST", raising=False) + + monkeypatch.setenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "s3nsapis.fr") fs = fsspec.filesystem("gcs") - assert fs.base == "https://storage.googleapis.com/storage/v1/" + assert fs.base == "https://storage.s3nsapis.fr/storage/v1/" assert fs.on_google is True assert ( fs.url("/test/path") - == "https://storage.googleapis.com/download/storage/v1/b/test/o/path?alt=media" + == "https://storage.s3nsapis.fr/download/storage/v1/b/test/o/path?alt=media" ) - assert fs.batch_url_base == "https://storage.googleapis.com/batch/storage/v1" + assert fs.batch_url_base == "https://storage.s3nsapis.fr/batch/storage/v1" -def test_custom_gcp_universe(monkeypatch): - monkeypatch.setenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "s3nsapis.fr") +def test_default_gcp_universe(monkeypatch): + # Make sure we simulate a mock less connection + monkeypatch.delenv("STORAGE_EMULATOR_HOST", raising=False) + monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False) + fs = fsspec.filesystem("gcs") - assert fs.base == "https://storage.s3nsapis.fr/storage/v1/" + assert fs.base == "https://storage.googleapis.com/storage/v1/" assert fs.on_google is True assert ( fs.url("/test/path") - == "https://storage.s3nsapis.fr/download/storage/v1/b/test/o/path?alt=media" + == "https://storage.googleapis.com/download/storage/v1/b/test/o/path?alt=media" ) - assert fs.batch_url_base == "https://storage.s3nsapis.fr/batch/storage/v1" + assert fs.batch_url_base == "https://storage.googleapis.com/batch/storage/v1" diff --git a/requirements.txt b/requirements.txt index 7f40c69d..cd31dfa9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp!=4.0.0a0, !=4.0.0a1 decorator>4.1.2 fsspec==2025.12.0 -google-auth>=1.2 +google-auth>=2.36.0 google-auth-oauthlib google-cloud-storage>=3.7.0 google-cloud-storage-control From cb7d8caa6d96ea076538a27f75b29c3c35e1197a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Lafarge?= Date: Wed, 31 Dec 2025 12:47:36 +0100 Subject: [PATCH 3/6] Fix tests --- gcsfs/core.py | 1 - gcsfs/tests/test_core.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gcsfs/core.py b/gcsfs/core.py index eb491be7..43fc70b6 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -355,7 +355,6 @@ def base(self): def batch_url_base(self): return f"{self._location}/batch/storage/v1" - @property def project(self): return self.credentials.project diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index 0689d719..f668df56 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -1717,7 +1717,7 @@ def test_custom_gcp_universe(monkeypatch): monkeypatch.delenv("STORAGE_EMULATOR_HOST", raising=False) monkeypatch.setenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "s3nsapis.fr") - fs = fsspec.filesystem("gcs") + fs = fsspec.filesystem("gcs", token="anon") assert fs.base == "https://storage.s3nsapis.fr/storage/v1/" assert fs.on_google is True assert ( @@ -1732,7 +1732,7 @@ def test_default_gcp_universe(monkeypatch): monkeypatch.delenv("STORAGE_EMULATOR_HOST", raising=False) monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False) - fs = fsspec.filesystem("gcs") + fs = fsspec.filesystem("gcs", token="anon") assert fs.base == "https://storage.googleapis.com/storage/v1/" assert fs.on_google is True assert ( From 17003409dcdd5c86fcf3efab62533d999c45f069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Lafarge?= Date: Mon, 5 Jan 2026 14:36:47 +0100 Subject: [PATCH 4/6] update docs --- docs/source/index.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 37cd2bcc..87e6cad1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -195,13 +195,23 @@ For further reference check `aiohttp proxy support`_. .. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support -Alternative GCP Universes -------------------------- +Targeting specific GCP endpoints +-------------------------------- + +There are multiple ways to target non-default Google Cloud storage endpoints: + +- setting the ``FSSPEC_GCS_ENDPOINT_URL`` environment variable to the desired + endpoint URL. +- passing the ``endpoint_url`` parameter to the ``GCSFileSystem`` constructor. +- setting the ``STORAGE_EMULATOR_HOST`` environment variable to the desired endpoint + URL (usage is reserved for testing purposes). To target an alternative GCP universe, the ``GOOGLE_CLOUD_UNIVERSE_DOMAIN`` environment variable should be set to your desired universe domain for ``gcsfs`` to target the `Google Cloud Storage`_ API in your alternative universe. +When doing so, `gcsfs` will target the ``https://storage.{universe_domain}`` endpoint. + For instance, set ``GOOGLE_CLOUD_UNIVERSE_DOMAIN=s3nsapis.fr`` to target the S3NS_ universe. From 82665e0589c39de187e8d9a4c1c5dc5c3edd075e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Lafarge?= Date: Mon, 5 Jan 2026 14:51:15 +0100 Subject: [PATCH 5/6] add order of precedence to docs --- docs/source/index.rst | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 87e6cad1..aded8e90 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -198,24 +198,18 @@ For further reference check `aiohttp proxy support`_. Targeting specific GCP endpoints -------------------------------- -There are multiple ways to target non-default Google Cloud storage endpoints: +There are multiple ways to target non-default Google Cloud storage endpoints. Here they are, +in order of precedence: +- passing the ``endpoint_url`` parameter to the ``GCSFileSystem`` constructor. - setting the ``FSSPEC_GCS_ENDPOINT_URL`` environment variable to the desired endpoint URL. -- passing the ``endpoint_url`` parameter to the ``GCSFileSystem`` constructor. - setting the ``STORAGE_EMULATOR_HOST`` environment variable to the desired endpoint URL (usage is reserved for testing purposes). +- setting the ``GOOGLE_CLOUD_UNIVERSE_DOMAIN`` environment variable to target + alternative GCP universes. `gcsfs` will target the ``https://storage.{universe_domain}`` + endpoint instead of the default ``https://storage.googleapis.com``. -To target an alternative GCP universe, the ``GOOGLE_CLOUD_UNIVERSE_DOMAIN`` -environment variable should be set to your desired universe domain for ``gcsfs`` -to target the `Google Cloud Storage`_ API in your alternative universe. - -When doing so, `gcsfs` will target the ``https://storage.{universe_domain}`` endpoint. - -For instance, set ``GOOGLE_CLOUD_UNIVERSE_DOMAIN=s3nsapis.fr`` to target the -S3NS_ universe. - -.. _S3NS: https://www.s3ns.io/en/offers/premi3ns-trusted-cloud Contents ======== From 357dc6aa88fd26846425c0bc6ad59223a9cb9242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Lafarge?= Date: Mon, 5 Jan 2026 22:18:13 +0100 Subject: [PATCH 6/6] relax google auth requirement constraint --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cd31dfa9..7f40c69d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp!=4.0.0a0, !=4.0.0a1 decorator>4.1.2 fsspec==2025.12.0 -google-auth>=2.36.0 +google-auth>=1.2 google-auth-oauthlib google-cloud-storage>=3.7.0 google-cloud-storage-control