From 70e328ff961610847f0914d99b9706b7acb9c6ca Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 2 Feb 2026 13:39:14 +0000 Subject: [PATCH 1/2] Add `extra_headers` argument to allow setting additional user headers --- src/apify_client/_http_client.py | 7 +- src/apify_client/client.py | 6 ++ tests/unit/test_client_headers.py | 119 ++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 tests/unit/test_client_headers.py diff --git a/src/apify_client/_http_client.py b/src/apify_client/_http_client.py index 5f3c76d2..bbf05b70 100644 --- a/src/apify_client/_http_client.py +++ b/src/apify_client/_http_client.py @@ -38,6 +38,7 @@ def __init__( min_delay_between_retries_millis: int = 500, timeout_secs: int = 360, stats: Statistics | None = None, + extra_headers: dict | None = None, ) -> None: self.max_retries = max_retries self.min_delay_between_retries_millis = min_delay_between_retries_millis @@ -59,8 +60,10 @@ def __init__( if token is not None: headers['Authorization'] = f'Bearer {token}' - self.impit_client = impit.Client(headers=headers, follow_redirects=True, timeout=timeout_secs) - self.impit_async_client = impit.AsyncClient(headers=headers, follow_redirects=True, timeout=timeout_secs) + init_headers = {**(extra_headers or {}), **headers} + + self.impit_client = impit.Client(headers=init_headers, follow_redirects=True, timeout=timeout_secs) + self.impit_async_client = impit.AsyncClient(headers=init_headers, follow_redirects=True, timeout=timeout_secs) self.stats = stats or Statistics() diff --git a/src/apify_client/client.py b/src/apify_client/client.py index b6ed7abf..f5c54576 100644 --- a/src/apify_client/client.py +++ b/src/apify_client/client.py @@ -113,6 +113,7 @@ def __init__( max_retries: int | None = 8, min_delay_between_retries_millis: int | None = 500, timeout_secs: int | None = DEFAULT_TIMEOUT, + extra_headers: dict | None = None, ) -> None: """Initialize a new instance. @@ -126,6 +127,7 @@ def __init__( min_delay_between_retries_millis: How long will the client wait between retrying requests (increases exponentially from this value). timeout_secs: The socket timeout of the HTTP requests sent to the Apify API. + extra_headers: Additional headers to include in all requests. """ super().__init__( token, @@ -143,6 +145,7 @@ def __init__( min_delay_between_retries_millis=self.min_delay_between_retries_millis, timeout_secs=self.timeout_secs, stats=self.stats, + extra_headers=extra_headers, ) def actor(self, actor_id: str) -> ActorClient: @@ -301,6 +304,7 @@ def __init__( max_retries: int | None = 8, min_delay_between_retries_millis: int | None = 500, timeout_secs: int | None = DEFAULT_TIMEOUT, + extra_headers: dict | None = None, ) -> None: """Initialize a new instance. @@ -314,6 +318,7 @@ def __init__( min_delay_between_retries_millis: How long will the client wait between retrying requests (increases exponentially from this value). timeout_secs: The socket timeout of the HTTP requests sent to the Apify API. + extra_headers: Additional headers to include in all requests. """ super().__init__( token, @@ -331,6 +336,7 @@ def __init__( min_delay_between_retries_millis=self.min_delay_between_retries_millis, timeout_secs=self.timeout_secs, stats=self.stats, + extra_headers=extra_headers, ) def actor(self, actor_id: str) -> ActorClientAsync: diff --git a/tests/unit/test_client_headers.py b/tests/unit/test_client_headers.py new file mode 100644 index 00000000..a1dd671f --- /dev/null +++ b/tests/unit/test_client_headers.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import json +import os +import sys +from importlib import metadata +from typing import TYPE_CHECKING + +from werkzeug import Request, Response + +from apify_client._http_client import HTTPClient, HTTPClientAsync + +if TYPE_CHECKING: + from pytest_httpserver import HTTPServer + + +def _header_handler(request: Request) -> Response: + return Response( + status=200, + headers={}, + response=json.dumps({'received_headers': dict(request.headers)}), + ) + + +def _get_user_agent() -> str: + is_at_home = 'APIFY_IS_AT_HOME' in os.environ + python_version = '.'.join([str(x) for x in sys.version_info[:3]]) + client_version = metadata.version('apify-client') + return f'ApifyClient/{client_version} ({sys.platform}; Python/{python_version}); isAtHome/{is_at_home}' + + +async def test_default_headers_async(httpserver: HTTPServer) -> None: + """Test that default headers are sent with each request.""" + + client = HTTPClientAsync(token='placeholder_token') + httpserver.expect_request('/').respond_with_handler(_header_handler) + api_url = httpserver.url_for('/').removesuffix('/') + + response = await client.call(method='GET', url=f'{api_url}/') + + request_headers = json.loads(response.text)['received_headers'] + + assert request_headers == { + 'User-Agent': _get_user_agent(), + 'Accept': 'application/json, */*', + 'Authorization': 'Bearer placeholder_token', + 'Accept-Encoding': 'gzip, br, zstd, deflate', + 'Host': f'{httpserver.host}:{httpserver.port}', + } + + +def test_default_headers_sync(httpserver: HTTPServer) -> None: + """Test that default headers are sent with each request.""" + + client = HTTPClient(token='placeholder_token') + httpserver.expect_request('/').respond_with_handler(_header_handler) + api_url = httpserver.url_for('/').removesuffix('/') + + response = client.call(method='GET', url=f'{api_url}/') + + request_headers = json.loads(response.text)['received_headers'] + + assert request_headers == { + 'User-Agent': _get_user_agent(), + 'Accept': 'application/json, */*', + 'Authorization': 'Bearer placeholder_token', + 'Accept-Encoding': 'gzip, br, zstd, deflate', + 'Host': f'{httpserver.host}:{httpserver.port}', + } + + +async def test_extra_headers_async(httpserver: HTTPServer) -> None: + """Test that extra headers are sent with each request.""" + + extra_headers = { + 'Test-Header': 'blah', + 'User-Agent': 'CustomUserAgent/1.0', # Do not override Apify User-Agent + } + client = HTTPClientAsync(token='placeholder_token', extra_headers=extra_headers) + httpserver.expect_request('/').respond_with_handler(_header_handler) + api_url = httpserver.url_for('/').removesuffix('/') + + response = await client.call(method='GET', url=f'{api_url}/') + + request_headers = json.loads(response.text)['received_headers'] + + assert request_headers == { + 'Test-Header': 'blah', + 'User-Agent': _get_user_agent(), # Do not override Apify User-Agent + 'Accept': 'application/json, */*', + 'Authorization': 'Bearer placeholder_token', + 'Accept-Encoding': 'gzip, br, zstd, deflate', + 'Host': f'{httpserver.host}:{httpserver.port}', + } + + +def test_extra_headers_sync(httpserver: HTTPServer) -> None: + """Test that extra headers are sent with each request.""" + + extra_headers = { + 'Test-Header': 'blah', + 'User-Agent': 'CustomUserAgent/1.0', # Do not override Apify User-Agent + } + client = HTTPClient(token='placeholder_token', extra_headers=extra_headers) + httpserver.expect_request('/').respond_with_handler(_header_handler) + api_url = httpserver.url_for('/').removesuffix('/') + + response = client.call(method='GET', url=f'{api_url}/') + + request_headers = json.loads(response.text)['received_headers'] + + assert request_headers == { + 'Test-Header': 'blah', + 'User-Agent': _get_user_agent(), # Do not override Apify User-Agent + 'Accept': 'application/json, */*', + 'Authorization': 'Bearer placeholder_token', + 'Accept-Encoding': 'gzip, br, zstd, deflate', + 'Host': f'{httpserver.host}:{httpserver.port}', + } From 2a7899458ef9f401b071739ed2562812b9ac8550 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Tue, 3 Feb 2026 14:06:01 +0000 Subject: [PATCH 2/2] allow overwriting default headers --- src/apify_client/_http_client.py | 12 +++--- src/apify_client/client.py | 34 +++++++++++++--- tests/unit/test_client_headers.py | 66 ++++++++++++++++++++++--------- 3 files changed, 82 insertions(+), 30 deletions(-) diff --git a/src/apify_client/_http_client.py b/src/apify_client/_http_client.py index bbf05b70..131d32f3 100644 --- a/src/apify_client/_http_client.py +++ b/src/apify_client/_http_client.py @@ -38,29 +38,29 @@ def __init__( min_delay_between_retries_millis: int = 500, timeout_secs: int = 360, stats: Statistics | None = None, - extra_headers: dict | None = None, + headers: dict | None = None, ) -> None: self.max_retries = max_retries self.min_delay_between_retries_millis = min_delay_between_retries_millis self.timeout_secs = timeout_secs - headers = {'Accept': 'application/json, */*'} + default_headers = {'Accept': 'application/json, */*'} workflow_key = os.getenv('APIFY_WORKFLOW_KEY') if workflow_key is not None: - headers['X-Apify-Workflow-Key'] = workflow_key + default_headers['X-Apify-Workflow-Key'] = workflow_key is_at_home = 'APIFY_IS_AT_HOME' in os.environ python_version = '.'.join([str(x) for x in sys.version_info[:3]]) client_version = metadata.version('apify-client') user_agent = f'ApifyClient/{client_version} ({sys.platform}; Python/{python_version}); isAtHome/{is_at_home}' - headers['User-Agent'] = user_agent + default_headers['User-Agent'] = user_agent if token is not None: - headers['Authorization'] = f'Bearer {token}' + default_headers['Authorization'] = f'Bearer {token}' - init_headers = {**(extra_headers or {}), **headers} + init_headers = {**default_headers, **(headers or {})} self.impit_client = impit.Client(headers=init_headers, follow_redirects=True, timeout=timeout_secs) self.impit_async_client = impit.AsyncClient(headers=init_headers, follow_redirects=True, timeout=timeout_secs) diff --git a/src/apify_client/client.py b/src/apify_client/client.py index f5c54576..780a6132 100644 --- a/src/apify_client/client.py +++ b/src/apify_client/client.py @@ -1,5 +1,7 @@ from __future__ import annotations +import warnings + from apify_client._http_client import HTTPClient, HTTPClientAsync from apify_client._statistics import Statistics from apify_client.clients import ( @@ -98,6 +100,18 @@ def _options(self) -> dict: 'http_client': self.http_client, } + def _check_custom_headers(self, headers: dict) -> None: + default_headers = {'Accept', 'Authorization', 'Accept-Encoding', 'User-Agent'} + overwrite_headers = [key for key in headers if key.title() in default_headers] + if overwrite_headers: + warnings.warn( + f'{", ".join(overwrite_headers)} headers of {self.__class__.__name__} was overridden with an ' + 'explicit value. A wrong header value can lead to API errors, it is recommended to use the default ' + f'value for following headers: {", ".join(default_headers)}.', + category=UserWarning, + stacklevel=2, + ) + class ApifyClient(_BaseApifyClient): """The Apify API client.""" @@ -113,7 +127,7 @@ def __init__( max_retries: int | None = 8, min_delay_between_retries_millis: int | None = 500, timeout_secs: int | None = DEFAULT_TIMEOUT, - extra_headers: dict | None = None, + headers: dict | None = None, ) -> None: """Initialize a new instance. @@ -127,7 +141,7 @@ def __init__( min_delay_between_retries_millis: How long will the client wait between retrying requests (increases exponentially from this value). timeout_secs: The socket timeout of the HTTP requests sent to the Apify API. - extra_headers: Additional headers to include in all requests. + headers: Set headers to client for all requests. """ super().__init__( token, @@ -139,13 +153,17 @@ def __init__( ) self.stats = Statistics() + + if headers: + self._check_custom_headers(headers) + self.http_client = HTTPClient( token=token, max_retries=self.max_retries, min_delay_between_retries_millis=self.min_delay_between_retries_millis, timeout_secs=self.timeout_secs, stats=self.stats, - extra_headers=extra_headers, + headers=headers, ) def actor(self, actor_id: str) -> ActorClient: @@ -304,7 +322,7 @@ def __init__( max_retries: int | None = 8, min_delay_between_retries_millis: int | None = 500, timeout_secs: int | None = DEFAULT_TIMEOUT, - extra_headers: dict | None = None, + headers: dict | None = None, ) -> None: """Initialize a new instance. @@ -318,7 +336,7 @@ def __init__( min_delay_between_retries_millis: How long will the client wait between retrying requests (increases exponentially from this value). timeout_secs: The socket timeout of the HTTP requests sent to the Apify API. - extra_headers: Additional headers to include in all requests. + headers: Set headers to client for all requests. """ super().__init__( token, @@ -330,13 +348,17 @@ def __init__( ) self.stats = Statistics() + + if headers: + self._check_custom_headers(headers) + self.http_client = HTTPClientAsync( token=token, max_retries=self.max_retries, min_delay_between_retries_millis=self.min_delay_between_retries_millis, timeout_secs=self.timeout_secs, stats=self.stats, - extra_headers=extra_headers, + headers=headers, ) def actor(self, actor_id: str) -> ActorClientAsync: diff --git a/tests/unit/test_client_headers.py b/tests/unit/test_client_headers.py index a1dd671f..57560d9e 100644 --- a/tests/unit/test_client_headers.py +++ b/tests/unit/test_client_headers.py @@ -6,8 +6,10 @@ from importlib import metadata from typing import TYPE_CHECKING +import pytest from werkzeug import Request, Response +from apify_client import ApifyClient, ApifyClientAsync from apify_client._http_client import HTTPClient, HTTPClientAsync if TYPE_CHECKING: @@ -69,14 +71,13 @@ def test_default_headers_sync(httpserver: HTTPServer) -> None: } -async def test_extra_headers_async(httpserver: HTTPServer) -> None: - """Test that extra headers are sent with each request.""" +async def test_headers_async(httpserver: HTTPServer) -> None: + """Test that custom headers are sent with each request.""" - extra_headers = { - 'Test-Header': 'blah', - 'User-Agent': 'CustomUserAgent/1.0', # Do not override Apify User-Agent - } - client = HTTPClientAsync(token='placeholder_token', extra_headers=extra_headers) + client = HTTPClientAsync( + token='placeholder_token', + headers={'Test-Header': 'blah', 'User-Agent': 'CustomUserAgent/1.0', 'Authorization': 'strange_value'}, + ) httpserver.expect_request('/').respond_with_handler(_header_handler) api_url = httpserver.url_for('/').removesuffix('/') @@ -86,22 +87,25 @@ async def test_extra_headers_async(httpserver: HTTPServer) -> None: assert request_headers == { 'Test-Header': 'blah', - 'User-Agent': _get_user_agent(), # Do not override Apify User-Agent + 'User-Agent': 'CustomUserAgent/1.0', 'Accept': 'application/json, */*', - 'Authorization': 'Bearer placeholder_token', + 'Authorization': 'strange_value', 'Accept-Encoding': 'gzip, br, zstd, deflate', 'Host': f'{httpserver.host}:{httpserver.port}', } -def test_extra_headers_sync(httpserver: HTTPServer) -> None: - """Test that extra headers are sent with each request.""" +def test_headers_sync(httpserver: HTTPServer) -> None: + """Test that custom headers are sent with each request.""" - extra_headers = { - 'Test-Header': 'blah', - 'User-Agent': 'CustomUserAgent/1.0', # Do not override Apify User-Agent - } - client = HTTPClient(token='placeholder_token', extra_headers=extra_headers) + client = HTTPClient( + token='placeholder_token', + headers={ + 'Test-Header': 'blah', + 'User-Agent': 'CustomUserAgent/1.0', + 'Authorization': 'strange_value', + }, + ) httpserver.expect_request('/').respond_with_handler(_header_handler) api_url = httpserver.url_for('/').removesuffix('/') @@ -111,9 +115,35 @@ def test_extra_headers_sync(httpserver: HTTPServer) -> None: assert request_headers == { 'Test-Header': 'blah', - 'User-Agent': _get_user_agent(), # Do not override Apify User-Agent + 'User-Agent': 'CustomUserAgent/1.0', 'Accept': 'application/json, */*', - 'Authorization': 'Bearer placeholder_token', + 'Authorization': 'strange_value', 'Accept-Encoding': 'gzip, br, zstd, deflate', 'Host': f'{httpserver.host}:{httpserver.port}', } + + +def test_warning_on_overridden_headers_sync() -> None: + """Test that warning is raised when default headers are overridden.""" + + with pytest.warns(UserWarning, match='User-Agent, Authorization headers of ApifyClient'): + ApifyClient( + token='placeholder_token', + headers={ + 'User-Agent': 'CustomUserAgent/1.0', + 'Authorization': 'strange_value', + }, + ) + + +async def test_warning_on_overridden_headers_async() -> None: + """Test that warning is raised when default headers are overridden.""" + + with pytest.warns(UserWarning, match='User-Agent, Authorization headers of ApifyClientAsync'): + ApifyClientAsync( + token='placeholder_token', + headers={ + 'User-Agent': 'CustomUserAgent/1.0', + 'Authorization': 'strange_value', + }, + )