apify
diff --git a/‎.github/workflows/build_and_deploy_docs.yaml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/build_and_deploy_docs.yaml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 11 additions & 2 deletions b/‎CHANGELOG.md‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎docs/deployment/code_examples/google/cloud_run_example.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/deployment/code_examples/google/cloud_run_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/deployment/code_examples/google/google_example.py‎
Lines changed: 2 additions & 5 deletions b/‎docs/deployment/code_examples/google/google_example.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎docs/guides/code_examples/running_in_web_server/server.py‎
Lines changed: 2 additions & 2 deletions b/‎docs/guides/code_examples/running_in_web_server/server.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/crawlee/_utils/time.py‎
Lines changed: 41 additions & 1 deletion b/‎src/crawlee/_utils/time.py‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎src/crawlee/crawlers/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎src/crawlee/crawlers/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/crawlee/crawlers/_abstract_http/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎src/crawlee/crawlers/_abstract_http/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py‎
Lines changed: 46 additions & 12 deletions b/‎src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py‎
Lines changed: 46 additions & 12 deletions
@@ -67,6 +67,10 @@ jobs:
         uses: actions/deploy-pages@v4
 
       - name: Invalidate CloudFront cache
-        run: gh workflow run invalidate.yaml --repo apify/apify-docs-private
+        run: |
+          gh workflow run invalidate-cloudfront.yml \
+            --repo apify/apify-docs-private \
+            --field deployment=crawlee-web
+          echo "✅ CloudFront cache invalidation workflow triggered successfully"
         env:
           GITHUB_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
@@ -3,17 +3,26 @@
 All notable changes to this project will be documented in this file.
 
 <!-- git-cliff-unreleased-start -->
-## 1.1.1 - **not yet released**
+## 1.1.2 - **not yet released**
+
+### 🐛 Bug Fixes
+
+- Only apply requestHandlerTimeout to request handler ([#1474](https://github.com/apify/crawlee-python/pull/1474)) ([0dfb6c2](https://github.com/apify/crawlee-python/commit/0dfb6c2a13b6650736245fa39b3fbff397644df7)) by [@janbuchar](https://github.com/janbuchar)
+- Handle the case when `error_handler` returns `Request` ([#1595](https://github.com/apify/crawlee-python/pull/1595)) ([8a961a2](https://github.com/apify/crawlee-python/commit/8a961a2b07d0d33a7302dbb13c17f3d90999d390)) by [@Mantisus](https://github.com/Mantisus)
+
+
+<!-- git-cliff-unreleased-end -->
+## [1.1.1](https://github.com/apify/crawlee-python/releases/tag/v1.1.1) (2025-12-02)
 
 ### 🐛 Bug Fixes
 
 - Unify separators in `unique_key` construction ([#1569](https://github.com/apify/crawlee-python/pull/1569)) ([af46a37](https://github.com/apify/crawlee-python/commit/af46a3733b059a8052489296e172f005def953f7)) by [@vdusek](https://github.com/vdusek), closes [#1512](https://github.com/apify/crawlee-python/issues/1512)
 - Fix `same-domain` strategy ignoring public suffix  ([#1572](https://github.com/apify/crawlee-python/pull/1572)) ([3d018b2](https://github.com/apify/crawlee-python/commit/3d018b21a28a4bee493829783057188d6106a69b)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1571](https://github.com/apify/crawlee-python/issues/1571)
 - Make context helpers work in `FailedRequestHandler` and `ErrorHandler` ([#1570](https://github.com/apify/crawlee-python/pull/1570)) ([b830019](https://github.com/apify/crawlee-python/commit/b830019350830ac33075316061659e2854f7f4a5)) by [@Pijukatel](https://github.com/Pijukatel), closes [#1532](https://github.com/apify/crawlee-python/issues/1532)
 - Fix non-ASCII character corruption in `FileSystemStorageClient` on systems without UTF-8 default encoding ([#1580](https://github.com/apify/crawlee-python/pull/1580)) ([f179f86](https://github.com/apify/crawlee-python/commit/f179f8671b0b6af9264450e4fef7e49d1cecd2bd)) by [@Mantisus](https://github.com/Mantisus), closes [#1579](https://github.com/apify/crawlee-python/issues/1579)
+- Respect `&lt;base&gt;` when enqueuing ([#1590](https://github.com/apify/crawlee-python/pull/1590)) ([de517a1](https://github.com/apify/crawlee-python/commit/de517a1629cc29b20568143eb64018f216d4ba33)) by [@Mantisus](https://github.com/Mantisus), closes [#1589](https://github.com/apify/crawlee-python/issues/1589)
 
 
-<!-- git-cliff-unreleased-end -->
 ## [1.1.0](https://github.com/apify/crawlee-python/releases/tag/v1.1.0) (2025-11-18)
 
 ### 🚀 Features
 
@@ -9,7 +9,7 @@
 from crawlee.storage_clients import MemoryStorageClient
 
 
-@get('/')
+@get('/')  # type: ignore[untyped-decorator]
 async def main() -> str:
     """The crawler entry point that will be called when the HTTP endpoint is accessed."""
     # highlight-start
 
@@ -6,10 +6,7 @@
 import functions_framework
 from flask import Request, Response
 
-from crawlee.crawlers import (
-    BeautifulSoupCrawler,
-    BeautifulSoupCrawlingContext,
-)
+from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
 from crawlee.storage_clients import MemoryStorageClient
 
 
@@ -51,7 +48,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     # highlight-end
 
 
-@functions_framework.http
+@functions_framework.http  # type: ignore[untyped-decorator]
 def crawlee_run(request: Request) -> Response:
     # You can pass data to your crawler using `request`
     function_id = request.headers['Function-Execution-Id']
 
@@ -14,7 +14,7 @@
 app = FastAPI(lifespan=lifespan, title='Crawler app')
 
 
-@app.get('/', response_class=HTMLResponse)
+@app.get('/', response_class=HTMLResponse)  # type: ignore[untyped-decorator]
 def index() -> str:
     return """
 <!DOCTYPE html>
@@ -32,7 +32,7 @@ def index() -> str:
 """
 
 
-@app.get('/scrape')
+@app.get('/scrape')  # type: ignore[untyped-decorator]
 async def scrape_url(request: Request, url: str | None = None) -> dict:
     if not url:
         return {'url': 'missing', 'scrape result': 'no results'}
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "crawlee"
-version = "1.1.1"
+version = "1.1.2"
 description = "Crawlee for Python"
 authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
 license = { file = "LICENSE" }
@@ -34,6 +34,7 @@ keywords = [
     "scraping",
 ]
 dependencies = [
+    "async-timeout>=5.0.1",
     "cachetools>=5.5.0",
     "colorama>=0.4.0",
     "impit>=0.8.0",
@@ -74,7 +75,7 @@ otel = [
 ]
 sql_postgres = [
     "sqlalchemy[asyncio]>=2.0.0,<3.0.0",
-    "asyncpg>=0.24.0; python_version < '3.14'" # TODO: https://github.com/apify/crawlee-python/issues/1555
+    "asyncpg>=0.24.0"
 ]
 sql_sqlite = [
     "sqlalchemy[asyncio]>=2.0.0,<3.0.0",
@@ -101,7 +102,7 @@ dev = [
     "build<2.0.0", # For e2e tests.
     "dycw-pytest-only<3.0.0",
     "fakeredis[probabilistic,json,lua]<3.0.0",
-    "mypy~=1.18.0",
+    "mypy~=1.19.0",
     "pre-commit<5.0.0",
     "proxy-py<3.0.0",
     "pydoc-markdown<5.0.0",
 
@@ -3,11 +3,14 @@
 import time
 from contextlib import contextmanager
 from dataclasses import dataclass
+from datetime import timedelta
 from typing import TYPE_CHECKING
 
+from async_timeout import Timeout, timeout
+
 if TYPE_CHECKING:
     from collections.abc import Iterator
-    from datetime import timedelta
+    from types import TracebackType
 
 _SECONDS_PER_MINUTE = 60
 _SECONDS_PER_HOUR = 3600
@@ -35,6 +38,43 @@ def measure_time() -> Iterator[TimerResult]:
         result.cpu = after_cpu - before_cpu
 
 
+class SharedTimeout:
+    """Keeps track of a time budget shared by multiple independent async operations.
+
+    Provides a reusable, non-reentrant context manager interface.
+    """
+
+    def __init__(self, timeout: timedelta) -> None:
+        self._remaining_timeout = timeout
+        self._active_timeout: Timeout | None = None
+        self._activation_timestamp: float | None = None
+
+    async def __aenter__(self) -> timedelta:
+        if self._active_timeout is not None or self._activation_timestamp is not None:
+            raise RuntimeError('A shared timeout context cannot be entered twice at the same time')
+
+        self._activation_timestamp = time.monotonic()
+        self._active_timeout = new_timeout = timeout(self._remaining_timeout.total_seconds())
+        await new_timeout.__aenter__()
+        return self._remaining_timeout
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        exc_traceback: TracebackType | None,
+    ) -> None:
+        if self._active_timeout is None or self._activation_timestamp is None:
+            raise RuntimeError('Logic error')
+
+        await self._active_timeout.__aexit__(exc_type, exc_value, exc_traceback)
+        elapsed = time.monotonic() - self._activation_timestamp
+        self._remaining_timeout = self._remaining_timeout - timedelta(seconds=elapsed)
+
+        self._active_timeout = None
+        self._activation_timestamp = None
+
+
 def format_duration(duration: timedelta | None) -> str:
     """Format a timedelta into a human-readable string with appropriate units."""
     if duration is None:
 
@@ -1,7 +1,7 @@
 from crawlee._utils.try_import import install_import_hook as _install_import_hook
 from crawlee._utils.try_import import try_import as _try_import
 
-from ._abstract_http import AbstractHttpCrawler, AbstractHttpParser, ParsedHttpCrawlingContext
+from ._abstract_http import AbstractHttpCrawler, AbstractHttpParser, HttpCrawlerOptions, ParsedHttpCrawlingContext
 from ._basic import BasicCrawler, BasicCrawlerOptions, BasicCrawlingContext, ContextPipeline
 from ._http import HttpCrawler, HttpCrawlingContext, HttpCrawlingResult
 
@@ -51,6 +51,7 @@
     'BeautifulSoupParserType',
     'ContextPipeline',
     'HttpCrawler',
+    'HttpCrawlerOptions',
     'HttpCrawlingContext',
     'HttpCrawlingResult',
     'ParsedHttpCrawlingContext',
 
@@ -1,9 +1,10 @@
-from ._abstract_http_crawler import AbstractHttpCrawler
+from ._abstract_http_crawler import AbstractHttpCrawler, HttpCrawlerOptions
 from ._abstract_http_parser import AbstractHttpParser
 from ._http_crawling_context import ParsedHttpCrawlingContext
 
 __all__ = [
     'AbstractHttpCrawler',
     'AbstractHttpParser',
+    'HttpCrawlerOptions',
     'ParsedHttpCrawlingContext',
 ]
@@ -3,14 +3,16 @@
 import asyncio
 import logging
 from abc import ABC
+from datetime import timedelta
 from typing import TYPE_CHECKING, Any, Generic
 
 from more_itertools import partition
 from pydantic import ValidationError
-from typing_extensions import TypeVar
+from typing_extensions import NotRequired, TypeVar
 
 from crawlee._request import Request, RequestOptions
 from crawlee._utils.docs import docs_group
+from crawlee._utils.time import SharedTimeout
 from crawlee._utils.urls import to_absolute_url_iterator
 from crawlee.crawlers._basic import BasicCrawler, BasicCrawlerOptions, ContextPipeline
 from crawlee.errors import SessionError
@@ -32,6 +34,19 @@
 TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
 
 
+class HttpCrawlerOptions(
+    BasicCrawlerOptions[TCrawlingContext, TStatisticsState],
+    Generic[TCrawlingContext, TStatisticsState],
+):
+    """Arguments for the `AbstractHttpCrawler` constructor.
+
+    It is intended for typing forwarded `__init__` arguments in the subclasses.
+    """
+
+    navigation_timeout: NotRequired[timedelta | None]
+    """Timeout for the HTTP request."""
+
+
 @docs_group('Crawlers')
 class AbstractHttpCrawler(
     BasicCrawler[TCrawlingContext, StatisticsState],
@@ -56,10 +71,13 @@ def __init__(
         self,
         *,
         parser: AbstractHttpParser[TParseResult, TSelectResult],
+        navigation_timeout: timedelta | None = None,
         **kwargs: Unpack[BasicCrawlerOptions[TCrawlingContext, StatisticsState]],
     ) -> None:
         self._parser = parser
+        self._navigation_timeout = navigation_timeout or timedelta(minutes=1)
         self._pre_navigation_hooks: list[Callable[[BasicCrawlingContext], Awaitable[None]]] = []
+        self._shared_navigation_timeouts: dict[int, SharedTimeout] = {}
 
         if '_context_pipeline' not in kwargs:
             raise ValueError(
@@ -112,9 +130,17 @@ def _create_static_content_crawler_pipeline(self) -> ContextPipeline[ParsedHttpC
     async def _execute_pre_navigation_hooks(
         self, context: BasicCrawlingContext
     ) -> AsyncGenerator[BasicCrawlingContext, None]:
-        for hook in self._pre_navigation_hooks:
-            await hook(context)
-        yield context
+        context_id = id(context)
+        self._shared_navigation_timeouts[context_id] = SharedTimeout(self._navigation_timeout)
+
+        try:
+            for hook in self._pre_navigation_hooks:
+                async with self._shared_navigation_timeouts[context_id]:
+                    await hook(context)
+
+            yield context
+        finally:
+            self._shared_navigation_timeouts.pop(context_id, None)
 
     async def _parse_http_response(
         self, context: HttpCrawlingContext
@@ -167,9 +193,15 @@ async def extract_links(
             kwargs.setdefault('strategy', 'same-hostname')
 
             links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
-            links_iterator = to_absolute_url_iterator(
-                context.request.loaded_url or context.request.url, links_iterator, logger=context.log
+
+            # Get base URL from <base> tag if present
+            extracted_base_urls = list(self._parser.find_links(parsed_content, 'base[href]'))
+            base_url: str = (
+                str(extracted_base_urls[0])
+                if extracted_base_urls
+                else context.request.loaded_url or context.request.url
             )
+            links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
             if robots_txt_file:
                 skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -216,12 +248,14 @@ async def _make_http_request(self, context: BasicCrawlingContext) -> AsyncGenera
         Yields:
             The original crawling context enhanced by HTTP response.
         """
-        result = await self._http_client.crawl(
-            request=context.request,
-            session=context.session,
-            proxy_info=context.proxy_info,
-            statistics=self._statistics,
-        )
+        async with self._shared_navigation_timeouts[id(context)] as remaining_timeout:
+            result = await self._http_client.crawl(
+                request=context.request,
+                session=context.session,
+                proxy_info=context.proxy_info,
+                statistics=self._statistics,
+                timeout=remaining_timeout,
+            )
 
         yield HttpCrawlingContext.from_basic_crawling_context(context=context, http_response=result.http_response)