Skip to content

Commit 4ff3366

Browse files
committed
0.2.0 - add external URL proxy support
Add the external URL proxy support for fetching the cross-origin resources.
1 parent 44a4780 commit 4ff3366

File tree

6 files changed

+323
-27
lines changed

6 files changed

+323
-27
lines changed

Changelog.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44

55
## CHANGELOG
66

7-
### 0.2.0 @ 03/25/2025
7+
### 0.2.0 @ 03/31/2025
88

99
#### :mega: New
1010

1111
1. Add the Dash component project based on TypeScript. The project contains a `PlainDownloader` component.
1212
2. Add the default downloader component `Downloader`. It is implemented by using `StreamSaver.js`.
1313
3. Add the cross-origin support when serving the data to clients (e.g. the browser).
14+
4. Add the external URL proxy support for fetching the cross-origin resources.
1415

1516
#### :floppy_disk: Change
1617

dash_file_cache/caches/tempfile.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ def dump(self, key: str, info: th.CachedFileInfo, data: th.CachedData) -> None:
193193
while chunk:
194194
fobj.write(chunk)
195195
chunk = _data.read(self.__chunk_size)
196+
elif data["type"] == "request":
197+
with open(path + ".tmp", "w") as fobj:
198+
json.dump(data, fobj, ensure_ascii=False)
196199
else:
197200
raise TypeError(
198201
"cache: The value to be dumped is not recognized: {0}".format(data)
@@ -233,6 +236,10 @@ def _deferred():
233236
elif file_type == "bytes":
234237
fobj = open(path + ".tmp", "rb")
235238
return th.CachedBytesIO(type=file_type, data=fobj)
239+
elif file_type == "request":
240+
with open(path + ".tmp", "r") as fobj:
241+
_data: th.CachedRequest = json.load(fobj)
242+
return _data
236243
else:
237244
raise TypeError(
238245
"cache: The type {0} of the key to be loaded is not "

dash_file_cache/caches/typehints.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919

2020
import os
2121

22-
from typing import Union, IO, TypeVar
22+
from typing import Union, Optional, IO, TypeVar
2323

2424
try:
2525
from typing import Callable
26+
from typing import Dict
2627
except ImportError:
2728
from collections.abc import Callable
29+
from builtins import dict as Dict
2830

2931
from typing_extensions import Literal, TypedDict
3032

@@ -49,7 +51,7 @@
4951
class CachedFileInfo(TypedDict):
5052
"""The metadata of the cached file."""
5153

52-
type: Literal["path", "str", "bytes"]
54+
type: Literal["path", "str", "bytes", "request"]
5355
"""The type os this cached data."""
5456

5557
data_size: int
@@ -90,6 +92,31 @@ class CachedPath(TypedDict):
9092
"""The path to the file on the local disk."""
9193

9294

95+
class CachedRequest(TypedDict):
96+
"""The URL and request configuration specifying a remote file. This configuration
97+
is mainly used when a cross-domain URL needs to be accessed or an authentication
98+
protected file is to be read.
99+
100+
In this case, the `CachedRequest` will only store the URL and the request
101+
configurations. The request stream will be established and forwarded to the
102+
users when this cached data item is to be streamed.
103+
"""
104+
105+
type: Literal["request"]
106+
"""The type os this cached data."""
107+
108+
url: str
109+
"""The URL referring to the remote file."""
110+
111+
headers: Dict[str, str]
112+
"""A collection of headers to be used when accessing the remote file."""
113+
114+
file_name_fallback: Optional[str]
115+
"""A fall-back file name. It is used when the response of the request does not
116+
provide a file name. If this valus is not specified, will use the URL to guess
117+
the file name."""
118+
119+
93120
class CachedStringIO(TypedDict):
94121
"""The data of one cached `StringIO` data."""
95122

@@ -112,5 +139,5 @@ class CachedBytesIO(TypedDict):
112139
file name and the `mime_type`."""
113140

114141

115-
CachedData = Union[CachedPath, CachedStringIO, CachedBytesIO]
142+
CachedData = Union[CachedPath, CachedRequest, CachedStringIO, CachedBytesIO]
116143
"""The typehint of the cached data."""

dash_file_cache/services/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323

2424
# Import sub-modules.
2525
from . import utilities
26+
from . import reqstream
2627
from . import data
2728

2829
from .data import ServiceData
2930

30-
__all__ = ("utilities", "data", "ServiceData")
31+
__all__ = ("utilities", "reqstream", "data", "ServiceData")
3132

3233
# Set this local module as the prefered one
3334
__path__ = extend_path(__path__, __name__)

dash_file_cache/services/data.py

Lines changed: 108 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@
2020
import os
2121
import io
2222
import uuid
23+
import urllib.parse
2324

2425
from typing import Union, Optional, IO, AnyStr, Any
2526

2627
try:
27-
from typing import Iterator, Callable
28+
from typing import Iterator, Callable, Mapping
2829
except ImportError:
29-
from collections.abc import Iterator, Callable
30+
from collections.abc import Iterator, Callable, Mapping
3031

3132
from typing_extensions import ClassVar, Literal
3233

@@ -40,12 +41,15 @@
4041
CachedPath,
4142
CachedStringIO,
4243
CachedBytesIO,
44+
CachedRequest,
4345
)
4446
from ..caches.abstract import CacheAbstract
4547
from ..caches.memory import CacheQueue
4648
from .utilities import no_cache, get_server
4749
from ..utilities import StreamFinalizer
4850

51+
from .reqstream import DeferredRequestStream
52+
4953

5054
__all__ = ("ServiceData",)
5155

@@ -118,6 +122,69 @@ def allowed_cross_origin(self) -> str:
118122
cross-origin data delivery will not be used."""
119123
return self.__allowed_cross_origin
120124

125+
def register_request(
126+
self,
127+
url: Union[str, urllib.parse.ParseResult],
128+
headers: Optional[Mapping[str, str]] = None,
129+
file_name_fallback: str = "",
130+
download: bool = True,
131+
) -> str:
132+
"""Register the a remote request to the cache.
133+
134+
Arguments
135+
---------
136+
url: `str | urllib.parse.ParseResult`
137+
The string-form or parsed URL of the remote file.
138+
139+
headers: `Mapping[str, str] | None`
140+
The customized headers for this request. If not specified, will not use
141+
any headers.
142+
143+
file_name_fallback: `str`
144+
The fall-back file name. If specified, it will be used as the saved file
145+
name when the remote service does not provide the file name.
146+
147+
download: `bool`
148+
If specified, will mark the returned address as a downloadable link.
149+
150+
Returns
151+
-------
152+
#1: `str`
153+
The URL that would be used for accessing this temporarily cached file.
154+
"""
155+
url_parsed = urllib.parse.urlparse(url) if isinstance(url, str) else url
156+
file_name = (
157+
url_parsed.path.replace("\\", "/").rsplit("/", maxsplit=1)[-1].strip()
158+
)
159+
if not file_name:
160+
file_name = "Unknown"
161+
162+
info = CachedFileInfo(
163+
type="request",
164+
data_size=0,
165+
file_name=file_name,
166+
content_type="",
167+
mime_type="application/octet-stream",
168+
one_time_service=False,
169+
)
170+
data = CachedRequest(
171+
type="request",
172+
url=url_parsed.geturl(),
173+
headers=dict(headers) if headers else dict(),
174+
file_name_fallback=file_name_fallback,
175+
)
176+
177+
uid = uuid.uuid4().hex
178+
179+
if isinstance(self.__cache, CacheQueue):
180+
cache = self.__cache.mirror
181+
else:
182+
cache = self.__cache
183+
cache.dump(key=uid, info=info, data=data)
184+
return "{0}?uid={1}{2}".format(
185+
self.__addr, uid, "&download=true" if download else ""
186+
)
187+
121188
def register(
122189
self,
123190
fobj: Union[str, os.PathLike, io.StringIO, io.BytesIO],
@@ -229,6 +296,11 @@ def loader() -> IO[Any]:
229296
_data = data()
230297
if _data["type"] == "path":
231298
fobj = open(_data["path"], "rb")
299+
elif _data["type"] == "request":
300+
raise TypeError(
301+
"service: Should not use the request data in file-based data "
302+
"loader."
303+
)
232304
else:
233305
fobj = _data["data"]
234306
fobj.seek(0, io.SEEK_SET)
@@ -264,7 +336,9 @@ def _stream_add_headers(
264336
"""Private method of `stream()`
265337
266338
Add customized headers to the data service response."""
267-
resp.headers["Content-Length"] = str(info["data_size"])
339+
data_size = info["data_size"]
340+
if isinstance(data_size, str) or (isinstance(data_size, int) and data_size > 0):
341+
resp.headers["Content-Length"] = str(data_size)
268342
if self.__allowed_cross_origin:
269343
resp.headers["Access-Control-Allow-Origin"] = self.__allowed_cross_origin
270344
resp.headers["Access-Control-Allow-Credentials"] = "true"
@@ -305,41 +379,53 @@ def stream(self, uid: str, download: bool = False) -> flask.Response:
305379

306380
info, deferred = self.__cache.load(uid)
307381

308-
if info["data_size"] <= 0:
309-
raise FileNotFoundError(
310-
"services: The requested file {0} is empty.".format(uid)
311-
)
312-
313382
file_type = info["type"]
314-
if file_type not in ("path", "str", "bytes"):
383+
if file_type not in ("path", "str", "bytes", "request"):
315384
raise TypeError(
316385
"service: Cannot recognize the type of fobj: " "{0}".format(file_type)
317386
)
318387

319-
one_time_service = info["one_time_service"]
320-
at_closed = self._stream_get_at_closed(cache=self.cache, uid=uid)
388+
if file_type != "request" and info["data_size"] <= 0:
389+
raise FileNotFoundError(
390+
"services: The requested file {0} is empty.".format(uid)
391+
)
321392

322-
def provider(_deferred: Callable[[], IO[AnyStr]]) -> Iterator[AnyStr]:
323-
"""Streaming data provider."""
393+
if info["type"] == "request":
394+
val = deferred()
395+
if val["type"] != "request":
396+
raise TypeError(
397+
"service: The data type ({0}) and the info type ({1}) does not "
398+
"match.".format(info["type"], val["type"])
399+
)
400+
streamer = DeferredRequestStream(info=info, data=val)
401+
stream = streamer.provide(chunk_size=self.__chunk_size)
402+
info = streamer.info
403+
else:
404+
one_time_service = info["one_time_service"]
405+
at_closed = self._stream_get_at_closed(cache=self.cache, uid=uid)
406+
407+
def provider(_deferred: Callable[[], IO[AnyStr]]) -> Iterator[AnyStr]:
408+
"""Streaming data provider."""
324409

325-
with StreamFinalizer(
326-
_deferred(), callback_on_exit=at_closed if one_time_service else None
327-
) as _fobj:
328-
data = _fobj.read(self.__chunk_size)
329-
while data:
330-
yield data
410+
with StreamFinalizer(
411+
_deferred(),
412+
callback_on_exit=at_closed if one_time_service else None,
413+
) as _fobj:
331414
data = _fobj.read(self.__chunk_size)
415+
while data:
416+
yield data
417+
data = _fobj.read(self.__chunk_size)
418+
419+
stream = provider(self._stream_data_to_loader(deferred))
332420

333421
resp = flask.Response(
334-
flask.stream_with_context(provider(self._stream_data_to_loader(deferred))),
422+
flask.stream_with_context(stream),
335423
content_type=(
336424
"application/octet-stream" if download else info["content_type"]
337425
),
338426
mimetype=info["mime_type"],
339427
)
340428
self._stream_add_headers(resp, info=info, uid=uid, download=download)
341-
print(resp.headers)
342-
343429
return resp
344430

345431
def serve(

0 commit comments

Comments
 (0)