Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/fetch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc

This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.

### Customization - Proxy

The server can be configured to use a proxy by using the `--proxy-url` argument.

## Debugging

You can use the MCP inspector to debug the server. For uvx installations:
Expand Down
3 changes: 2 additions & 1 deletion src/fetch/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "mcp-server-fetch"
version = "0.6.2"
version = "0.6.3"
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
readme = "README.md"
requires-python = ">=3.10"
Expand All @@ -16,6 +16,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]
dependencies = [
"httpx<0.28",
"markdownify>=0.13.1",
"mcp>=1.1.3",
"protego>=0.3.1",
Expand Down
3 changes: 2 additions & 1 deletion src/fetch/src/mcp_server_fetch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ def main():
action="store_true",
help="Ignore robots.txt restrictions",
)
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")

args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))


if __name__ == "__main__":
Expand Down
19 changes: 11 additions & 8 deletions src/fetch/src/mcp_server_fetch/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
return robots_url


async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
"""
Check if the URL can be fetched by the user agent according to the robots.txt file.
Raises a McpError if not.
Expand All @@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:

robot_txt_url = get_robots_txt_url(url)

async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
robot_txt_url,
Expand Down Expand Up @@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:


async def fetch_url(
url: str, user_agent: str, force_raw: bool = False
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
"""
from httpx import AsyncClient, HTTPError

async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
url,
Expand Down Expand Up @@ -179,13 +179,16 @@ class Fetch(BaseModel):


async def serve(
custom_user_agent: str | None = None, ignore_robots_txt: bool = False
custom_user_agent: str | None = None,
ignore_robots_txt: bool = False,
proxy_url: str | None = None,
) -> None:
"""Run the fetch MCP server.

Args:
custom_user_agent: Optional custom User-Agent string to use for requests
ignore_robots_txt: Whether to ignore robots.txt restrictions
proxy_url: Optional proxy URL to use for requests
"""
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
Expand Down Expand Up @@ -229,10 +232,10 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))

if not ignore_robots_txt:
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)

content, prefix = await fetch_url(
url, user_agent_autonomous, force_raw=args.raw
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
)
original_length = len(content)
if args.start_index >= original_length:
Expand All @@ -259,7 +262,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
url = arguments["url"]

try:
content, prefix = await fetch_url(url, user_agent_manual)
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(
Expand Down