diff --git a/docs/source/index.rst b/docs/source/index.rst index f1ad2c9e..aded8e90 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -195,6 +195,21 @@ For further reference check `aiohttp proxy support`_. .. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support +Targeting specific GCP endpoints +-------------------------------- + +There are multiple ways to target non-default Google Cloud storage endpoints. Here they are, +in order of precedence: + +- passing the ``endpoint_url`` parameter to the ``GCSFileSystem`` constructor. +- setting the ``FSSPEC_GCS_ENDPOINT_URL`` environment variable to the desired + endpoint URL. +- setting the ``STORAGE_EMULATOR_HOST`` environment variable to the desired endpoint + URL (usage is reserved for testing purposes). +- setting the ``GOOGLE_CLOUD_UNIVERSE_DOMAIN`` environment variable to target + alternative GCP universes. `gcsfs` will target the ``https://storage.{universe_domain}`` + endpoint instead of the default ``https://storage.googleapis.com``. + Contents ======== diff --git a/gcsfs/core.py b/gcsfs/core.py index 766efb42..43fc70b6 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -108,6 +108,10 @@ def close(self): self.seek(0) +def _gcp_universe_domain(): + return os.getenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "googleapis.com") + + def _location(): """ Resolves GCS HTTP location as http[s]://host @@ -125,7 +129,8 @@ def _location(): ): _emulator_location = f"http://{_emulator_location}" return _emulator_location - return "https://storage.googleapis.com" + + return f"https://storage.{_gcp_universe_domain()}" def _chunks(lst, n): @@ -346,6 +351,10 @@ def _location(self): def base(self): return f"{self._location}/storage/v1/" + @property + def batch_url_base(self): + return f"{self._location}/batch/storage/v1" + @property def project(self): return self.credentials.project @@ -1300,7 +1309,7 @@ async def _rm_files(self, paths): body = "".join(parts) headers, content = await self._call( "POST", - f"{self._location}/batch/storage/v1", + self.batch_url_base, headers={ "Content-Type": 'multipart/mixed; boundary="==========' '=====7330845974216740156=="' @@ -1339,7 +1348,7 @@ async def _rm_files(self, paths): @property def on_google(self): - return "torage.googleapis.com" in self._location + return f"torage.{_gcp_universe_domain()}" in self._location async def _rm(self, path, recursive=False, maxdepth=None, batchsize=20): paths = await self._expand_path(path, recursive=recursive, maxdepth=maxdepth) diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index 25c14ad1..f668df56 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -1709,3 +1709,34 @@ def test_ls_with_max_results(gcs): def test_get_error(gcs): with pytest.raises(FileNotFoundError): gcs.get_file(f"{TEST_BUCKET}/doesnotexist", "other") + + +def test_custom_gcp_universe(monkeypatch): + # Make sure we simulate a mock less connection + monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False) + monkeypatch.delenv("STORAGE_EMULATOR_HOST", raising=False) + + monkeypatch.setenv("GOOGLE_CLOUD_UNIVERSE_DOMAIN", "s3nsapis.fr") + fs = fsspec.filesystem("gcs", token="anon") + assert fs.base == "https://storage.s3nsapis.fr/storage/v1/" + assert fs.on_google is True + assert ( + fs.url("/test/path") + == "https://storage.s3nsapis.fr/download/storage/v1/b/test/o/path?alt=media" + ) + assert fs.batch_url_base == "https://storage.s3nsapis.fr/batch/storage/v1" + + +def test_default_gcp_universe(monkeypatch): + # Make sure we simulate a mock less connection + monkeypatch.delenv("STORAGE_EMULATOR_HOST", raising=False) + monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False) + + fs = fsspec.filesystem("gcs", token="anon") + assert fs.base == "https://storage.googleapis.com/storage/v1/" + assert fs.on_google is True + assert ( + fs.url("/test/path") + == "https://storage.googleapis.com/download/storage/v1/b/test/o/path?alt=media" + ) + assert fs.batch_url_base == "https://storage.googleapis.com/batch/storage/v1"