From 64a654744a414e76fa007063d6ea5425d17d574d Mon Sep 17 00:00:00 2001 From: shiquda Date: Thu, 13 Mar 2025 00:20:48 +0800 Subject: [PATCH 1/2] feat(fetch): add support for using proxy for requests --- src/fetch/README.md | 4 ++++ src/fetch/src/mcp_server_fetch/__init__.py | 3 ++- src/fetch/src/mcp_server_fetch/server.py | 19 +++++++++++-------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/fetch/README.md b/src/fetch/README.md index 0e58b3de16..01d99cacba 100644 --- a/src/fetch/README.md +++ b/src/fetch/README.md @@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration. +### Customization - Proxy + +The server can be configured to use a proxy by using the `--proxy-url` argument. + ## Debugging You can use the MCP inspector to debug the server. For uvx installations: diff --git a/src/fetch/src/mcp_server_fetch/__init__.py b/src/fetch/src/mcp_server_fetch/__init__.py index e3a35d5422..09744ce319 100644 --- a/src/fetch/src/mcp_server_fetch/__init__.py +++ b/src/fetch/src/mcp_server_fetch/__init__.py @@ -15,9 +15,10 @@ def main(): action="store_true", help="Ignore robots.txt restrictions", ) + parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests") args = parser.parse_args() - asyncio.run(serve(args.user_agent, args.ignore_robots_txt)) + asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url)) if __name__ == "__main__": diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 775d79c890..24c6a875d3 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str: return robots_url -async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None: +async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None: """ Check if the URL can be fetched by the user agent according to the robots.txt file. Raises a McpError if not. @@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None: robot_txt_url = get_robots_txt_url(url) - async with AsyncClient() as client: + async with AsyncClient(proxies=proxy_url) as client: try: response = await client.get( robot_txt_url, @@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None: async def fetch_url( - url: str, user_agent: str, force_raw: bool = False + url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None ) -> Tuple[str, str]: """ Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information. """ from httpx import AsyncClient, HTTPError - async with AsyncClient() as client: + async with AsyncClient(proxies=proxy_url) as client: try: response = await client.get( url, @@ -179,13 +179,16 @@ class Fetch(BaseModel): async def serve( - custom_user_agent: str | None = None, ignore_robots_txt: bool = False + custom_user_agent: str | None = None, + ignore_robots_txt: bool = False, + proxy_url: str | None = None, ) -> None: """Run the fetch MCP server. Args: custom_user_agent: Optional custom User-Agent string to use for requests ignore_robots_txt: Whether to ignore robots.txt restrictions + proxy_url: Optional proxy URL to use for requests """ server = Server("mcp-fetch") user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS @@ -229,10 +232,10 @@ async def call_tool(name, arguments: dict) -> list[TextContent]: raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required")) if not ignore_robots_txt: - await check_may_autonomously_fetch_url(url, user_agent_autonomous) + await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url) content, prefix = await fetch_url( - url, user_agent_autonomous, force_raw=args.raw + url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url ) original_length = len(content) if args.start_index >= original_length: @@ -259,7 +262,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult: url = arguments["url"] try: - content, prefix = await fetch_url(url, user_agent_manual) + content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url) # TODO: after SDK bug is addressed, don't catch the exception except McpError as e: return GetPromptResult( From 9a4d513724b4fbe638ca24f6f86e6fc3ca373e89 Mon Sep 17 00:00:00 2001 From: shiquda Date: Sun, 30 Mar 2025 22:33:51 +0800 Subject: [PATCH 2/2] fix(fetch): specify httpx<0.28 to resolve proxy problem - AsyncClient.__init__() got an unexpected keyword argument 'proxies' --- src/fetch/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fetch/pyproject.toml b/src/fetch/pyproject.toml index ed76fdcd8a..bbee516a6b 100644 --- a/src/fetch/pyproject.toml +++ b/src/fetch/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-server-fetch" -version = "0.6.2" +version = "0.6.3" description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs" readme = "README.md" requires-python = ">=3.10" @@ -16,6 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", ] dependencies = [ + "httpx<0.28", "markdownify>=0.13.1", "mcp>=1.1.3", "protego>=0.3.1",