Skip to content

Commit 939fe23

Browse files
committed
Add support for Scrapy 2.14 download handler API.
1 parent 573c32a commit 939fe23

File tree

2 files changed

+35
-14
lines changed

2 files changed

+35
-14
lines changed

scrapy_playwright/handler.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from time import time
1111
from typing import Awaitable, Callable, Dict, Optional, Tuple, Type, TypeVar, Union
1212

13+
from packaging.version import Version
1314
from playwright._impl._errors import TargetClosedError
1415
from playwright.async_api import (
1516
BrowserContext,
@@ -23,15 +24,15 @@
2324
Response as PlaywrightResponse,
2425
Route,
2526
)
26-
from scrapy import Spider, signals
27+
from scrapy import Spider, signals, __version__ as scrapy_version
2728
from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler
2829
from scrapy.crawler import Crawler
2930
from scrapy.exceptions import NotSupported, ScrapyDeprecationWarning
3031
from scrapy.http import Request, Response
3132
from scrapy.http.headers import Headers
3233
from scrapy.responsetypes import responsetypes
3334
from scrapy.settings import Settings
34-
from scrapy.utils.defer import deferred_from_coro
35+
from scrapy.utils.defer import deferred_from_coro, maybe_deferred_to_future
3536
from scrapy.utils.misc import load_object
3637
from scrapy.utils.reactor import verify_installed_reactor
3738
from twisted.internet.defer import Deferred, inlineCallbacks
@@ -62,6 +63,8 @@
6263
DEFAULT_CONTEXT_NAME = "default"
6364
PERSISTENT_CONTEXT_PATH_KEY = "user_data_dir"
6465

66+
_SCRAPY_VERSION = Version(scrapy_version)
67+
_ASYNC_HANDLER_API = _SCRAPY_VERSION >= Version("2.14.0")
6568

6669
@dataclass
6770
class BrowserContextWrapper:
@@ -138,7 +141,10 @@ class ScrapyPlaywrightDownloadHandler(HTTP11DownloadHandler):
138141
playwright: Optional[AsyncPlaywright] = None
139142

140143
def __init__(self, crawler: Crawler) -> None:
141-
super().__init__(settings=crawler.settings, crawler=crawler)
144+
if _ASYNC_HANDLER_API:
145+
super().__init__(crawler=crawler)
146+
else:
147+
super().__init__(settings=crawler.settings, crawler=crawler)
142148
verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
143149
crawler.signals.connect(self._engine_started, signals.engine_started)
144150
self.stats = crawler.stats
@@ -348,13 +354,21 @@ def _set_max_concurrent_context_count(self):
348354
"playwright/context_count/max_concurrent", len(self.context_wrappers)
349355
)
350356

351-
@inlineCallbacks
352-
def close(self) -> Deferred:
353-
logger.info("Closing download handler")
354-
yield super().close()
355-
yield self._deferred_from_coro(self._close())
356-
if self.config.use_threaded_loop:
357-
_ThreadedLoopAdapter.stop(id(self))
357+
if _ASYNC_HANDLER_API:
358+
async def close(self) -> None:
359+
logger.info("Closing download handler")
360+
await super().close()
361+
await maybe_deferred_to_future(self._deferred_from_coro(self._close()))
362+
if self.config.use_threaded_loop:
363+
_ThreadedLoopAdapter.stop(id(self))
364+
else:
365+
@inlineCallbacks
366+
def close(self) -> Deferred:
367+
logger.info("Closing download handler")
368+
yield super().close()
369+
yield self._deferred_from_coro(self._close())
370+
if self.config.use_threaded_loop:
371+
_ThreadedLoopAdapter.stop(id(self))
358372

359373
async def _close(self) -> None:
360374
with suppress(TargetClosedError):
@@ -368,10 +382,16 @@ async def _close(self) -> None:
368382
if self.playwright:
369383
await self.playwright.stop()
370384

371-
def download_request(self, request: Request, spider: Spider) -> Deferred:
372-
if request.meta.get("playwright"):
373-
return self._deferred_from_coro(self._download_request(request, spider))
374-
return super().download_request(request, spider)
385+
if _ASYNC_HANDLER_API:
386+
async def download_request(self, request: Request) -> Response:
387+
if request.meta.get("playwright"):
388+
return await maybe_deferred_to_future(self._deferred_from_coro(self._download_request(request, self._crawler.spider)))
389+
return await super().download_request(request)
390+
else:
391+
def download_request(self, request: Request, spider: Spider) -> Deferred: # type: ignore[misc]
392+
if request.meta.get("playwright"):
393+
return self._deferred_from_coro(self._download_request(request, spider))
394+
return super().download_request(request, spider)
375395

376396
async def _download_request(self, request: Request, spider: Spider) -> Response:
377397
counter = 0

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
],
3636
python_requires=">=3.9",
3737
install_requires=[
38+
"packaging>=20.0",
3839
"scrapy>=2.0,!=2.4.0",
3940
"playwright>=1.15",
4041
],

0 commit comments

Comments
 (0)