From cecab48f24e971599845baa9a408d988742ccb15 Mon Sep 17 00:00:00 2001
From: Jonas Hoersch <jonas.hoersch@openenergytransition.org>
Date: Thu, 22 Jan 2026 15:53:21 +0100
Subject: [PATCH 1/5] feat: support data.pypsa.org and its manifests

---
 .../__init__.py                               | 205 +++++++++++++-----
 .../monkeypatch.py                            |  10 +-
 2 files changed, 152 insertions(+), 63 deletions(-)

diff --git a/src/snakemake_storage_plugin_cached_http/__init__.py b/src/snakemake_storage_plugin_cached_http/__init__.py
index 1640c9a..7abf825 100644
--- a/src/snakemake_storage_plugin_cached_http/__init__.py
+++ b/src/snakemake_storage_plugin_cached_http/__init__.py
@@ -5,17 +5,18 @@
 import asyncio
 import hashlib
 import json
-from logging import Logger
 import shutil
 import time
 from contextlib import asynccontextmanager
 from dataclasses import dataclass, field
+from logging import Logger
 from pathlib import Path
-from typing_extensions import override
+from posixpath import basename, dirname, join, normpath, relpath
 from urllib.parse import urlparse
 
 import httpx
 import platformdirs
+import yaml
 from reretry import retry  # pyright: ignore[reportUnknownVariableType]
 from snakemake_interface_common.exceptions import WorkflowError
 from snakemake_interface_common.logging import get_logger
@@ -30,9 +31,10 @@
     StorageQueryValidationResult,
 )
 from tqdm_loggable.auto import tqdm
+from typing_extensions import override
 
 from .cache import Cache
-from .monkeypatch import is_zenodo_url  # noqa: F401 - applies monkeypatch on import
+from .monkeypatch import is_pypsa_or_zenodo_url
 
 logger = get_logger()
 
@@ -90,11 +92,12 @@ class StorageProviderSettings(SettingsBase):
 
 
 @dataclass
-class ZenodoFileMetadata:
-    """Metadata for a file in a Zenodo record."""
+class FileMetadata:
+    """Metadata for a file in a Zenodo or data.pypsa.org record."""
 
     checksum: str | None
     size: int
+    redirect: str | None = None  # used to indicate data.pypsa.org redirection
 
 
 class WrongChecksum(Exception):
@@ -140,7 +143,8 @@ def __post_init__(self):
         self._client_refcount: int = 0
 
         # Cache for record metadata to avoid repeated API calls
-        self._record_cache: dict[str, dict[str, ZenodoFileMetadata]] = {}
+        self._zenodo_record_cache: dict[str, dict[str, FileMetadata]] = {}
+        self._pypsa_manifest_cache: dict[str, dict[str, FileMetadata]] = {}
 
     @override
     def use_rate_limiter(self) -> bool:
@@ -162,16 +166,21 @@ def example_queries(cls) -> list[ExampleQuery]:
         return [
             ExampleQuery(
                 query="https://zenodo.org/records/17249457/files/ARDECO-SNPTD.2021.table.csv",
-                description="A Zenodo file URL (currently the only supported HTTP source)",
+                description="A Zenodo file URL",
                 type=QueryType.INPUT,
-            )
+            ),
+            ExampleQuery(
+                query="https://data.pypsa.org/workflows/eur/eez/v12_20231025/World_EEZ_v12_20231025_LR.zip",
+                description="A data pypsa file URL",
+                type=QueryType.INPUT,
+            ),
         ]
 
     @override
     @classmethod
     def is_valid_query(cls, query: str) -> StorageQueryValidationResult:
         """Only handle zenodo.org URLs"""
-        if is_zenodo_url(query):
+        if is_pypsa_or_zenodo_url(query):
             return StorageQueryValidationResult(query=query, valid=True)
 
         return StorageQueryValidationResult(
@@ -265,22 +274,50 @@ async def httpr(self, method: str, url: str):
             raise
 
     @retry_decorator
-    async def get_metadata(
-        self, record_id: str, netloc: str
-    ) -> dict[str, ZenodoFileMetadata]:
+    async def get_metadata(self, path: str, netloc: str) -> FileMetadata | None:
         """
-        Retrieve and cache file metadata for a Zenodo record.
+        Retrieve and cache file metadata for a Zenodo record or a data.pypsa.org file.
 
         Args:
-            record_id: The Zenodo record ID
+            path: Server path
+            netloc: Network location (e.g., "zenodo.org")
+
+        Returns:
+            Dictionary mapping filename to FileMetadata
+        """
+        if netloc in ("zenodo.org", "sandbox.zenodo.org"):
+            return await self.get_zenodo_metadata(path, netloc)
+        elif netloc in "data.pypsa.org":
+            return await self.get_pypsa_metadata(path, netloc)
+
+        raise WorkflowError(
+            "Cached-http storage plugin is only implemented for zenodo.org and data.pypsa.org urls"
+        )
+
+    async def get_zenodo_metadata(self, path: str, netloc: str) -> FileMetadata | None:
+        """
+        Retrieve and cache file metadata for a Zenodo record or a data.pypsa.org file.
+
+        Args:
+            path: Server path
             netloc: Network location (e.g., "zenodo.org")
 
         Returns:
-            Dictionary mapping filename to ZenodoFileMetadata
+            Dictionary mapping filename to FileMetadata
         """
+
+        # Zenodo record
+        _records, record_id, _files, filename = path.split("/", maxsplit=3)
+
+        if _records != "records" or _files != "files":
+            raise WorkflowError(
+                f"Invalid Zenodo URL format: http(s)://{netloc}/{path}. "
+                f"Expected format: https://zenodo.org/records/{{record_id}}/files/{{filename}}"
+            )
+
         # Check cache first
-        if record_id in self._record_cache:
-            return self._record_cache[record_id]
+        if record_id in self._zenodo_record_cache:
+            return self._zenodo_record_cache[record_id].get(filename)
 
         # Fetch from API
         api_url = f"https://{netloc}/api/records/{record_id}"
@@ -296,30 +333,87 @@ async def get_metadata(
             data = json.loads(content)
 
         # Parse files array and build metadata dict
-        metadata: dict[str, ZenodoFileMetadata] = {}
+        metadata: dict[str, FileMetadata] = {}
         files = data.get("files", [])
         for file_info in files:
-            filename: str | None = file_info.get("key")
+            fn: str | None = file_info.get("key")
             checksum: str | None = file_info.get("checksum")
             size: int = file_info.get("size", 0)
 
-            if not filename:
+            if not fn:
                 continue
 
-            metadata[filename] = ZenodoFileMetadata(checksum=checksum, size=size)
+            metadata[fn] = FileMetadata(checksum=checksum, size=size)
+
+        # Store in cache
+        self._zenodo_record_cache[record_id] = metadata
+
+        return metadata.get(filename)
+
+    async def get_pypsa_metadata(self, path: str, netloc: str) -> FileMetadata | None:
+        """
+        Retrieve and cache file metadata from data.pypsa.org manifest.
+
+        Args:
+            record_id: The Zenodo record ID
+            netloc: Network location (e.g., "data.pypsa.org")
+
+        Returns:
+            Dictionary mapping filename to FileMetadata
+        """
+
+        # Check cache first
+        base_path = dirname(path)
+        while base_path:
+            if base_path in self._pypsa_manifest_cache:
+                filename = relpath(path, base_path)
+                return self._pypsa_manifest_cache[base_path].get(filename)
+            base_path = dirname(base_path)
+
+        # Fetch manifest
+        base_path = dirname(path)
+        while base_path:
+            manifest_url = f"https://{netloc}/{base_path}/manifest.yaml"
+
+            async with self.httpr("get", manifest_url) as response:
+                if response.status_code == 200:
+                    content = await response.aread()
+                    data = yaml.safe_load(content)
+                    break
+
+            base_path = dirname(base_path)
+        else:
+            raise WorkflowError(
+                f"Failed to fetch data.pypsa.org manifest for https://{netloc}/{path}"
+            )
+
+        # Parse files array and build metadata dict
+        metadata: dict[str, FileMetadata] = {}
+        files = data.get("files", {})
+        for filename, file_info in files.items():
+            redirect: str | None = file_info.get("redirect")
+            checksum: str | None = file_info.get("checksum")
+            size: int = file_info.get("size", 0)
+
+            if redirect is not None:
+                redirect = normpath(join(base_path, redirect))
+
+            metadata[filename] = FileMetadata(
+                checksum=checksum, size=size, redirect=redirect
+            )
 
         # Store in cache
-        self._record_cache[record_id] = metadata
+        self._pypsa_manifest_cache[base_path] = metadata
 
-        return metadata
+        filename = relpath(path, base_path)
+        return metadata.get(filename)
 
 
 # Implementation of storage object
 class StorageObject(StorageObjectRead):
     provider: StorageProvider  # pyright: ignore[reportIncompatibleVariableOverride]
-    record_id: str
-    filename: str
     netloc: str
+    path: str
 
     def __post_init__(self):
         super().__post_init__()
@@ -327,25 +421,13 @@ def __post_init__(self):
         # Parse URL to extract record ID and filename
         # URL format: https://zenodo.org/records/{record_id}/files/{filename}
         parsed = urlparse(str(self.query))
-        _records, record_id, _files, filename = parsed.path.strip("/").split(
-            "/", maxsplit=3
-        )
-
-        if _records != "records" or _files != "files":
-            raise WorkflowError(
-                f"Invalid Zenodo URL format: {self.query}. "
-                f"Expected format: https://zenodo.org/records/{{record_id}}/files/{{filename}}"
-            )
-
-        self.record_id = record_id
-        self.filename = filename
         self.netloc = parsed.netloc
+        self.path = parsed.path.strip("/")
 
     @override
     def local_suffix(self) -> str:
         """Return the local suffix for this object (used by parent class)."""
-        parsed = urlparse(str(self.query))
-        return f"{parsed.netloc}{parsed.path}"
+        return f"{self.netloc}{self.path}"
 
     @override
     def get_inventory_parent(self) -> str | None:
@@ -363,8 +445,8 @@ async def managed_exists(self) -> bool:
             if cached is not None:
                 return True
 
-        metadata = await self.provider.get_metadata(self.record_id, self.netloc)
-        return self.filename in metadata
+        metadata = await self.provider.get_metadata(self.path, self.netloc)
+        return metadata is not None
 
     @override
     async def managed_mtime(self) -> float:
@@ -380,8 +462,8 @@ async def managed_size(self) -> int:
             if cached is not None:
                 return cached.stat().st_size
 
-        metadata = await self.provider.get_metadata(self.record_id, self.netloc)
-        return metadata[self.filename].size if self.filename in metadata else 0
+        metadata = await self.provider.get_metadata(self.path, self.netloc)
+        return metadata.size if metadata is not None else 0
 
     @override
     async def inventory(self, cache: IOCacheStorageInterface) -> None:
@@ -408,11 +490,11 @@ async def inventory(self, cache: IOCacheStorageInterface) -> None:
                 cache.size[key] = cached.stat().st_size
                 return
 
-        metadata = await self.provider.get_metadata(self.record_id, self.netloc)
-        exists = self.filename in metadata
+        metadata = await self.provider.get_metadata(self.path, self.netloc)
+        exists = metadata is not None
         cache.exists_in_storage[key] = exists
         cache.mtime[key] = Mtime(storage=0)
-        cache.size[key] = metadata[self.filename].size if exists else 0
+        cache.size[key] = metadata.size if exists else 0
 
     @override
     def cleanup(self):
@@ -443,13 +525,11 @@ async def verify_checksum(self, path: Path) -> None:
             WrongChecksum
         """
         # Get cached or fetch record metadata
-        metadata = await self.provider.get_metadata(self.record_id, self.netloc)
-        if self.filename not in metadata:
-            raise WorkflowError(
-                f"File {self.filename} not found in Zenodo record {self.record_id}"
-            )
+        metadata = await self.provider.get_metadata(self.path, self.netloc)
+        if metadata is None:
+            raise WorkflowError(f"No metadata found for {self.query}")
 
-        checksum = metadata[self.filename].checksum
+        checksum = metadata.checksum
         if checksum is None:
             return
 
@@ -472,23 +552,28 @@ async def managed_retrieve(self):
         local_path = self.local_path()
         local_path.parent.mkdir(parents=True, exist_ok=True)
 
+        query = str(self.query)
+        filename = basename(self.path)
+
+        metadata = self.provider.get_metadata(self.path, self.netloc)
+        if metadata.redirect is not None:
+            query = f"https://{self.netloc}/{metadata.redirect}"
+
         # If already in cache, just copy
         if self.provider.cache:
             cached = self.provider.cache.get(str(self.query))
             if cached is not None:
-                logger.info(
-                    f"Retrieved {self.filename} of zenodo record {self.record_id} from cache"
-                )
+                logger.info(f"Retrieved {filename} from cache ({query})")
                 shutil.copy2(cached, local_path)
                 return
 
         try:
-            # Download from Zenodo using a get request, rate limit errors are detected and
+            # Download from Zenodo or data.pypsa.org using a get request, rate limit errors are detected and
             # raise WorkflowError to trigger a retry
-            async with self.provider.httpr("get", str(self.query)) as response:
+            async with self.provider.httpr("get", query) as response:
                 if response.status_code != 200:
                     raise WorkflowError(
-                        f"Failed to download from Zenodo: HTTP {response.status_code} ({self.query})"
+                        f"Failed to download: HTTP {response.status_code} ({query})"
                     )
 
                 total_size = int(response.headers.get("content-length", 0))
@@ -499,7 +584,7 @@ async def managed_retrieve(self):
                         total=total_size,
                         unit="B",
                         unit_scale=True,
-                        desc=self.filename,
+                        desc=filename,
                         position=None,
                         leave=True,
                     ) as pbar:
@@ -511,7 +596,7 @@ async def managed_retrieve(self):
 
             # Copy to cache after successful verification
             if self.provider.cache:
-                self.provider.cache.put(str(self.query), local_path)
+                self.provider.cache.put(query, local_path)
 
         except:
             if local_path.exists():
diff --git a/src/snakemake_storage_plugin_cached_http/monkeypatch.py b/src/snakemake_storage_plugin_cached_http/monkeypatch.py
index dc43c32..2036624 100644
--- a/src/snakemake_storage_plugin_cached_http/monkeypatch.py
+++ b/src/snakemake_storage_plugin_cached_http/monkeypatch.py
@@ -17,9 +17,13 @@
 )
 
 
-def is_zenodo_url(url: str) -> bool:
+def is_pypsa_or_zenodo_url(url: str) -> bool:
     parsed = urlparse(url)
-    return parsed.netloc in ("zenodo.org", "sandbox.zenodo.org") and parsed.scheme in (
+    return parsed.netloc in (
+        "zenodo.org",
+        "sandbox.zenodo.org",
+        "data.pypsa.org",
+    ) and parsed.scheme in (
         "http",
         "https",
     )
@@ -34,7 +38,7 @@ def is_zenodo_url(url: str) -> bool:
             valid=False,
             reason="Deactivated in favour of cached_http",
         )
-        if is_zenodo_url(q)
+        if is_pypsa_or_zenodo_url(q)
         else orig_valid_query(q)
     )
 )

From d49b2026da410ea995ee2e7652707ac48fd5538f Mon Sep 17 00:00:00 2001
From: Jonas Hoersch <jonas.hoersch@openenergytransition.org>
Date: Thu, 22 Jan 2026 16:07:33 +0100
Subject: [PATCH 2/5] fix: bug and doc fixes

---
 .../__init__.py                                   | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/snakemake_storage_plugin_cached_http/__init__.py b/src/snakemake_storage_plugin_cached_http/__init__.py
index 7abf825..b4dd009 100644
--- a/src/snakemake_storage_plugin_cached_http/__init__.py
+++ b/src/snakemake_storage_plugin_cached_http/__init__.py
@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: MIT
 
-import asyncio
 import hashlib
 import json
 import shutil
@@ -186,7 +185,7 @@ def is_valid_query(cls, query: str) -> StorageQueryValidationResult:
         return StorageQueryValidationResult(
             query=query,
             valid=False,
-            reason="Not a Zenodo URL (only zenodo.org URLs are handled by this plugin)",
+            reason="Only zenodo.org and data.pypsa.org URLs are handled by this plugin",
         )
 
     @override
@@ -287,7 +286,7 @@ async def get_metadata(self, path: str, netloc: str) -> FileMetadata | None:
         """
         if netloc in ("zenodo.org", "sandbox.zenodo.org"):
             return await self.get_zenodo_metadata(path, netloc)
-        elif netloc in "data.pypsa.org":
+        elif netloc == "data.pypsa.org":
             return await self.get_pypsa_metadata(path, netloc)
 
         raise WorkflowError(
@@ -355,11 +354,11 @@ async def get_pypsa_metadata(self, path: str, netloc: str) -> FileMetadata | Non
         Retrieve and cache file metadata from data.pypsa.org manifest.
 
         Args:
-            record_id: The Zenodo record ID
+            path: Server path
             netloc: Network location (e.g., "data.pypsa.org")
 
         Returns:
-            Dictionary mapping filename to FileMetadata
+            FileMetadata for the requested file, or None if not found
         """
 
         # Check cache first
@@ -555,13 +554,13 @@ async def managed_retrieve(self):
         query = str(self.query)
         filename = basename(self.path)
 
-        metadata = self.provider.get_metadata(self.path, self.netloc)
-        if metadata.redirect is not None:
+        metadata = await self.provider.get_metadata(self.path, self.netloc)
+        if metadata is not None and metadata.redirect is not None:
             query = f"https://{self.netloc}/{metadata.redirect}"
 
         # If already in cache, just copy
         if self.provider.cache:
-            cached = self.provider.cache.get(str(self.query))
+            cached = self.provider.cache.get(query)
             if cached is not None:
                 logger.info(f"Retrieved {filename} from cache ({query})")
                 shutil.copy2(cached, local_path)

From b4dbc4cee3552286e5512548bebcb70f7ca25211 Mon Sep 17 00:00:00 2001
From: Jonas Hoersch <jonas.hoersch@openenergytransition.org>
Date: Thu, 22 Jan 2026 16:17:47 +0100
Subject: [PATCH 3/5] fix: update tests

---
 tests/test_download.py | 85 +++++++++++++++++++++++++++---------------
 tests/test_import.py   | 15 ++++++--
 2 files changed, 66 insertions(+), 34 deletions(-)

diff --git a/tests/test_download.py b/tests/test_download.py
index 704d62c..b0abe5a 100644
--- a/tests/test_download.py
+++ b/tests/test_download.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: MIT
 
-"""Functional tests for downloading and checksumming from Zenodo."""
+"""Functional tests for downloading and checksumming from Zenodo and data.pypsa.org."""
 
 import json
 import logging
@@ -16,7 +16,21 @@
     WrongChecksum,
 )
 
-TEST_URL = "https://zenodo.org/records/16810901/files/attributed_ports.json"
+# Test URLs and their metadata paths
+TEST_CONFIGS = {
+    "zenodo": {
+        "url": "https://zenodo.org/records/16810901/files/attributed_ports.json",
+        "path": "records/16810901/files/attributed_ports.json",
+        "netloc": "zenodo.org",
+        "has_size": True,
+    },
+    "pypsa": {
+        "url": "https://data.pypsa.org/workflows/eur/attributed_ports/2020-07-10/attributed_ports.json",
+        "path": "workflows/eur/attributed_ports/2020-07-10/attributed_ports.json",
+        "netloc": "data.pypsa.org",
+        "has_size": False,  # data.pypsa.org manifests don't include size
+    },
+}
 
 
 @pytest.fixture
@@ -44,33 +58,36 @@ def storage_provider(tmp_path):
     return provider
 
 
+@pytest.fixture(params=["zenodo", "pypsa"])
+def test_config(request):
+    """Provide test configuration (parametrized for zenodo and pypsa)."""
+    return TEST_CONFIGS[request.param]
+
+
 @pytest.fixture
-def storage_object(storage_provider):
-    """Create a StorageObject for the test file."""
-    # Create storage object
+def storage_object(test_config, storage_provider):
+    """Create a StorageObject for the test file (parametrized for zenodo and pypsa)."""
     obj = StorageObject(
-        query=TEST_URL,
+        query=test_config["url"],
         keep_local=False,
         retrieve=True,
         provider=storage_provider,
     )
-
     yield obj
 
 
 @pytest.mark.asyncio
-async def test_zenodo_metadata_fetch(storage_provider):
-    """Test that we can fetch metadata from Zenodo API."""
-    record_id = "16810901"
-    netloc = "zenodo.org"
-
-    metadata = await storage_provider.get_metadata(record_id, netloc)
+async def test_metadata_fetch(storage_provider, test_config):
+    """Test that we can fetch metadata from the API/manifest."""
+    metadata = await storage_provider.get_metadata(
+        test_config["path"], test_config["netloc"]
+    )
 
-    assert "attributed_ports.json" in metadata
-    file_meta = metadata["attributed_ports.json"]
-    assert file_meta.checksum is not None
-    assert file_meta.size > 0
-    assert file_meta.checksum.startswith("md5:")
+    assert metadata is not None
+    assert metadata.checksum is not None
+    assert metadata.checksum.startswith("md5:")
+    if test_config["has_size"]:
+        assert metadata.size > 0
 
 
 @pytest.mark.asyncio
@@ -81,17 +98,21 @@ async def test_storage_object_exists(storage_object):
 
 
 @pytest.mark.asyncio
-async def test_storage_object_size(storage_object):
+async def test_storage_object_size(storage_object, test_config):
     """Test that the storage object reports size correctly."""
     size = await storage_object.managed_size()
-    assert size > 0
-    # The file is a small JSON file, should be less than 1MB
-    assert size < 1_000_000
+    if test_config["has_size"]:
+        assert size > 0
+        # The file is a small JSON file, should be less than 1MB
+        assert size < 1_000_000
+    else:
+        # data.pypsa.org manifests don't include size
+        assert size == 0
 
 
 @pytest.mark.asyncio
 async def test_storage_object_mtime(storage_object):
-    """Test that mtime is 0 for immutable Zenodo URLs."""
+    """Test that mtime is 0 for immutable URLs."""
     mtime = await storage_object.managed_mtime()
     assert mtime == 0
 
@@ -112,21 +133,23 @@ async def test_download_and_checksum(storage_object, tmp_path):
     assert local_path.exists()
     assert local_path.stat().st_size > 0
 
-    # Verify it's valid JSON (use utf-8 with error handling for any encoding issues)
+    # Verify it's valid JSON
     with open(local_path, encoding="utf-8", errors="replace") as f:
         data = json.load(f)
-        assert isinstance(data, dict)
+        assert isinstance(data, (dict, list))
 
     # Verify checksum (should not raise WrongChecksum exception)
     await storage_object.verify_checksum(local_path)
 
 
 @pytest.mark.asyncio
-async def test_cache_functionality(storage_provider, tmp_path):
+async def test_cache_functionality(storage_provider, test_config, tmp_path):
     """Test that files are cached after download."""
+    url = test_config["url"]
+
     # First download
     obj1 = StorageObject(
-        query=TEST_URL,
+        query=url,
         keep_local=False,
         retrieve=True,
         provider=storage_provider,
@@ -140,13 +163,13 @@ async def test_cache_functionality(storage_provider, tmp_path):
 
     # Verify cache was populated
     assert obj1.provider.cache is not None
-    cached_path = obj1.provider.cache.get(str(obj1.query))
+    cached_path = obj1.provider.cache.get(url)
     assert cached_path is not None
     assert cached_path.exists()
 
     # Second download should use cache
     obj2 = StorageObject(
-        query=TEST_URL,
+        query=url,
         keep_local=False,
         retrieve=True,
         provider=storage_provider,
@@ -163,7 +186,7 @@ async def test_cache_functionality(storage_provider, tmp_path):
 
 
 @pytest.mark.asyncio
-async def test_skip_remote_checks(tmp_path):
+async def test_skip_remote_checks(test_config, tmp_path):
     """Test that skip_remote_checks works correctly."""
     local_prefix = tmp_path / "local"
     local_prefix.mkdir()
@@ -183,7 +206,7 @@ async def test_skip_remote_checks(tmp_path):
     )
 
     obj = StorageObject(
-        query=TEST_URL,
+        query=test_config["url"],
         keep_local=False,
         retrieve=True,
         provider=provider_skip,
diff --git a/tests/test_import.py b/tests/test_import.py
index dfcd378..194322c 100644
--- a/tests/test_import.py
+++ b/tests/test_import.py
@@ -55,14 +55,23 @@ def test_is_valid_query_zenodo():
     assert result.valid is True
 
 
-def test_is_valid_query_non_zenodo():
+def test_is_valid_query_pypsa():
+    """Test that is_valid_query accepts data.pypsa.org URLs."""
+    from snakemake_storage_plugin_cached_http import StorageProvider
+
+    result = StorageProvider.is_valid_query(
+        "https://data.pypsa.org/workflows/eur/eez/v12_20231025/World_EEZ_v12_20231025_LR.zip"
+    )
+    assert result.valid is True
+
+
+def test_is_valid_query_non_zenodo_or_pypsa():
     """Test that is_valid_query rejects non-zenodo URLs."""
     from snakemake_storage_plugin_cached_http import StorageProvider
 
-    # Non-Zenodo URL should be rejected
+    # Non-Zenodo/PyPSA URL should be rejected
     result = StorageProvider.is_valid_query("https://example.com/file.txt")
     assert result.valid is False
-    assert "zenodo" in result.reason.lower()
 
 
 def test_example_queries():

From ec90293b6a88376f63c9adb06e49e69bf168ea30 Mon Sep 17 00:00:00 2001
From: Jonas Hoersch <jonas.hoersch@openenergytransition.org>
Date: Thu, 22 Jan 2026 16:22:47 +0100
Subject: [PATCH 4/5] Update README.md

---
 README.md | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 9076e5e..3ee9120 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,14 @@ SPDX-License-Identifier: CC-BY-4.0
 
 A Snakemake storage plugin for downloading files via HTTP with local caching, checksum verification, and adaptive rate limiting.
 
-**Note:** This plugin is currently specifically designed for zenodo.org URLs.
+**Supported sources:**
+- **zenodo.org** - Zenodo data repository (checksum from API)
+- **data.pypsa.org** - PyPSA data repository (checksum from manifest.yaml)
 
 ## Features
 
 - **Local caching**: Downloads are cached to avoid redundant transfers (can be disabled)
-- **Checksum verification**: Automatically verifies MD5 checksums from Zenodo API
+- **Checksum verification**: Automatically verifies checksums (from Zenodo API or data.pypsa.org manifests)
 - **Rate limit handling**: Automatically respects Zenodo's rate limits using `X-RateLimit-*` headers with exponential backoff retry
 - **Concurrent download control**: Limits simultaneous downloads to prevent overwhelming Zenodo
 - **Progress bars**: Shows download progress with tqdm
@@ -64,16 +66,24 @@ If you don't explicitly configure it, the plugin will use default settings autom
 
 ## Usage
 
-Use Zenodo URLs directly in your rules. Snakemake automatically detects zenodo.org URLs and routes them to this plugin:
+Use Zenodo or data.pypsa.org URLs directly in your rules. Snakemake automatically detects supported URLs and routes them to this plugin:
 
 ```python
-rule download_data:
+rule download_zenodo:
     input:
         storage("https://zenodo.org/records/3520874/files/natura.tiff"),
     output:
         "resources/natura.tiff"
     shell:
         "cp {input} {output}"
+
+rule download_pypsa:
+    input:
+        storage("https://data.pypsa.org/workflows/eur/eez/v12_20231025/World_EEZ_v12_20231025_LR.zip"),
+    output:
+        "resources/eez.zip"
+    shell:
+        "cp {input} {output}"
 ```
 
 Or if you configured a tagged storage entity:
@@ -93,11 +103,11 @@ rule download_data:
 The plugin will:
 1. Check if the file exists in the cache (if caching is enabled)
 2. If cached, copy from cache (fast)
-3. If not cached, download from Zenodo with:
+3. If not cached, download with:
    - Progress bar showing download status
    - Automatic rate limit handling with exponential backoff retry
    - Concurrent download limiting
-   - MD5 checksum verification against Zenodo API metadata
+   - Checksum verification (from Zenodo API or data.pypsa.org manifest)
 4. Store in cache for future use (if caching is enabled)
 
 ### Example: CI/CD Configuration
@@ -129,19 +139,19 @@ The plugin automatically:
 
 ## URL Handling
 
-- Only handles URLs containing `zenodo.org`
+- Handles URLs from `zenodo.org`, `sandbox.zenodo.org`, and `data.pypsa.org`
 - Other HTTP(S) URLs are handled by the standard `snakemake-storage-plugin-http`
 - Both plugins can coexist in the same workflow
 
 ### Plugin Priority
 
 When using `storage()` without specifying a plugin name, Snakemake checks all installed plugins:
-- **Cached HTTP plugin**: Only accepts zenodo.org URLs (`is_valid_query` returns True only for zenodo.org)
+- **Cached HTTP plugin**: Only accepts zenodo.org and data.pypsa.org URLs
 - **HTTP plugin**: Accepts all HTTP/HTTPS URLs (including zenodo.org)
 
-If both plugins are installed, zenodo.org URLs are ambiguous - both plugins accept them.
-Typically snakemake would raise an error: **"Multiple suitable storage providers found"** if you try to use `storage()` without specifying which plugin to use, ie. one needs to explicitly call the Cached HTTP provider for zenodo.org URLs using `storage.cached_http(url)` instead of `storage(url)`,
-but we monkey-patch the http plugin to refuse zenodo.org urls.
+If both plugins are installed, supported URLs would be ambiguous - both plugins accept them.
+Typically snakemake would raise an error: **"Multiple suitable storage providers found"** if you try to use `storage()` without specifying which plugin to use, ie. one needs to explicitly call the Cached HTTP provider using `storage.cached_http(url)` instead of `storage(url)`,
+but we monkey-patch the http plugin to refuse zenodo.org and data.pypsa.org URLs.
 
 ## License
 

From 57562d3930f2b964b02edae2a8d4a81a70ba08fd Mon Sep 17 00:00:00 2001
From: Jonas Hoersch <jonas.hoersch@openenergytransition.org>
Date: Thu, 22 Jan 2026 17:04:52 +0100
Subject: [PATCH 5/5] fix: improve caching test

---
 tests/conftest.py      | 29 +++++++++++++++++++++++++++++
 tests/test_download.py |  6 ++++--
 2 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..78f7805
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Contributors to PyPSA-Eur <https://github.com/pypsa/pypsa-eur>
+#
+# SPDX-License-Identifier: MIT
+
+"""Pytest configuration and shared fixtures."""
+
+from contextlib import contextmanager
+
+
+@contextmanager
+def assert_no_http_requests(provider):
+    """
+    Context manager that fails if any HTTP requests are made.
+
+    Usage:
+        with assert_no_http_requests(storage_provider):
+            await obj.managed_retrieve()  # Should use cache, not HTTP
+    """
+    original_httpr = provider.httpr
+
+    async def httpr_should_not_be_called(*args, **kwargs):
+        raise AssertionError("HTTP request made when none was expected")
+
+    provider.httpr = httpr_should_not_be_called
+
+    try:
+        yield
+    finally:
+        provider.httpr = original_httpr
diff --git a/tests/test_download.py b/tests/test_download.py
index b0abe5a..6690542 100644
--- a/tests/test_download.py
+++ b/tests/test_download.py
@@ -15,6 +15,7 @@
     StorageProviderSettings,
     WrongChecksum,
 )
+from tests.conftest import assert_no_http_requests
 
 # Test URLs and their metadata paths
 TEST_CONFIGS = {
@@ -167,7 +168,7 @@ async def test_cache_functionality(storage_provider, test_config, tmp_path):
     assert cached_path is not None
     assert cached_path.exists()
 
-    # Second download should use cache
+    # Second download should use cache - verify by checking no HTTP requests are made
     obj2 = StorageObject(
         query=url,
         keep_local=False,
@@ -179,7 +180,8 @@ async def test_cache_functionality(storage_provider, test_config, tmp_path):
     local_path2.parent.mkdir(parents=True, exist_ok=True)
     obj2.local_path = lambda: local_path2
 
-    await obj2.managed_retrieve()
+    with assert_no_http_requests(storage_provider):
+        await obj2.managed_retrieve()
 
     # Both files should be identical
     assert local_path1.read_bytes() == local_path2.read_bytes()