Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ dependencies = [
"sentry-sdk==2.23.1",
"click>=8.1.7",
"requests>=2.32.3",
"beautifulsoup4>=4.12.3",
"lazy-object-proxy>=0.0.0",
"pydantic-core>=2.23.4",
"hatch-vcs>=0.4.0",
Expand Down Expand Up @@ -169,6 +170,7 @@ dev-dependencies = [
"modal>=0.73.25",
"pytest-lsp>=1.0.0b1",
"cython>=3.0.11",
"responses>=0.25.0",
]

[tool.uv.workspace]
Expand Down
4 changes: 4 additions & 0 deletions src/codegen/extensions/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .semantic_edit import semantic_edit
from .semantic_search import semantic_search
from .view_file import view_file
from .web import web_search_tool, web_view_page_tool

__all__ = [
# Git operations
Expand Down Expand Up @@ -61,4 +62,7 @@
"semantic_search",
"view_file",
"view_pr",
# Web browsing operations
"web_search_tool",
"web_view_page_tool",
]
5 changes: 5 additions & 0 deletions src/codegen/extensions/tools/web/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Web browsing tools."""

from .web import WebPageObservation, WebSearchObservation, web_search_tool, web_view_page_tool

__all__ = ["WebPageObservation", "WebSearchObservation", "web_search_tool", "web_view_page_tool"]
143 changes: 143 additions & 0 deletions src/codegen/extensions/tools/web/web.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Tools for browsing the web."""

from typing import ClassVar

import requests
from bs4 import BeautifulSoup
from pydantic import Field

from ..observation import Observation


class WebSearchObservation(Observation):
"""Response from searching the web."""

query: str = Field(description="Search query used")
results: list[dict] = Field(description="List of search results")

str_template: ClassVar[str] = "Found {result_count} results for '{query}'"

def _get_details(self) -> dict[str, str | int]:
"""Get details for string representation."""
return {
"result_count": len(self.results),
"query": self.query,
}


class WebPageObservation(Observation):
"""Response from viewing a web page."""

url: str = Field(description="URL of the web page")
title: str = Field(description="Title of the web page")
content: str = Field(description="Content of the web page")

str_template: ClassVar[str] = "Viewed web page: {title}"


def web_search_tool(query: str, num_results: int = 5) -> WebSearchObservation:
"""Search the web and get content snippets from search results.

Args:
query: Search query string
num_results: Maximum number of results to return (default: 5)

Returns:
WebSearchObservation with search results
"""
try:
# Note: In a real implementation, this would use a search API like Google Custom Search API,
# Bing Search API, or a similar service. For this example, we'll return mock data.
# The actual implementation would require API keys and proper error handling.

# Mock search results for demonstration
mock_results = [
{
"title": f"Result {i + 1} for {query}",
"url": f"https://example.com/result{i + 1}",
"snippet": f"This is a snippet of content for result {i + 1} related to {query}...",
}
for i in range(min(num_results, 5))
]

return WebSearchObservation(
status="success",
query=query,
results=mock_results,
)
except requests.exceptions.RequestException as e:
# Network-related errors
return WebSearchObservation(
status="error",
error=f"Network error when searching: {e!s}",
query=query,
results=[],
)
except Exception as e:
# Catch-all for other errors
return WebSearchObservation(
status="error",
error=f"Failed to search the web: {e!s}",
query=query,
results=[],
)


def web_view_page_tool(url: str, max_length: int = 10000) -> WebPageObservation:
"""View the content of a specific webpage.

Args:
url: URL of the webpage to view
max_length: Maximum length of content to return (default: 10000)

Returns:
WebPageObservation with page content
"""
try:
# Send a GET request to the URL
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() # Raise an exception for 4XX/5XX responses

# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")

# Extract the title
title = soup.title.string if soup.title else "No title found"

# Extract the main content (simplified approach)
# Remove script and style elements
for script in soup(["script", "style"]):
script.extract()

# Get text content
text = soup.get_text(separator="\n", strip=True)

# Truncate if necessary
if len(text) > max_length:
text = text[:max_length] + "... [content truncated]"

return WebPageObservation(
status="success",
url=url,
title=title,
content=text,
)
except requests.exceptions.RequestException as e:
# Network-related errors
return WebPageObservation(
status="error",
error=f"Network error when fetching page: {e!s}",
url=url,
title="",
content="",
)
except Exception as e:
# Catch-all for other errors
return WebPageObservation(
status="error",
error=f"Failed to view web page: {e!s}",
url=url,
title="",
content="",
)
59 changes: 59 additions & 0 deletions tests/integration/extension/test_web.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Tests for web browsing tools."""

import pytest
import responses

from codegen.extensions.tools.web import web_search_tool, web_view_page_tool


@pytest.fixture
def mock_responses():
"""Set up mock responses for testing."""
with responses.RequestsMock() as rsps:
# Mock a web page response
rsps.add(
responses.GET,
"https://example.com",
body="<html><head><title>Example Domain</title></head><body><h1>Example Domain</h1><p>This is a test page.</p></body></html>",
status=200,
content_type="text/html",
)
yield rsps


def test_web_search_tool():
"""Test the web search tool."""
# Since this is a mock implementation, we just test the basic functionality
result = web_search_tool("test query", num_results=3)

assert result.status == "success"
assert result.query == "test query"
assert len(result.results) == 3

# Check that the results have the expected structure
for i, item in enumerate(result.results):
assert item["title"] == f"Result {i + 1} for test query"
assert item["url"] == f"https://example.com/result{i + 1}"
assert "snippet" in item


def test_web_view_page_tool(mock_responses):
"""Test the web view page tool."""
result = web_view_page_tool("https://example.com")

assert result.status == "success"
assert result.url == "https://example.com"
assert result.title == "Example Domain"
assert "Example Domain" in result.content
assert "This is a test page." in result.content


def test_web_view_page_tool_error():
"""Test the web view page tool with an error."""
result = web_view_page_tool("https://nonexistent-domain-that-should-fail.com")

assert result.status == "error"
assert "error" in result
assert result.url == "https://nonexistent-domain-that-should-fail.com"
assert result.title == ""
assert result.content == ""