diff --git a/deploy/docker/server.py b/deploy/docker/server.py index 62e4e4413..b79c229e6 100644 --- a/deploy/docker/server.py +++ b/deploy/docker/server.py @@ -308,6 +308,17 @@ async def get_markdown( body: MarkdownRequest, _td: Dict = Depends(token_dep), ): + """ + Convert a web page into Markdown format. + + Supports multiple extraction modes: + - fit (default): Readability-based extraction for clean content + - raw: Direct DOM to Markdown conversion + - bm25: BM25 relevance ranking with optional query + - llm: LLM-based summarization with optional query + + Use this tool when you need clean, readable text from web pages. + """ if not body.url.startswith(("http://", "https://")) and not body.url.startswith(("raw:", "raw://")): raise HTTPException( 400, "Invalid URL format. Must start with http://, https://, or for raw HTML (raw:, raw://)") diff --git a/tests/unit/test_mcp_tool_docstrings.py b/tests/unit/test_mcp_tool_docstrings.py new file mode 100644 index 000000000..96b293b15 --- /dev/null +++ b/tests/unit/test_mcp_tool_docstrings.py @@ -0,0 +1,52 @@ +""" +Test that MCP tools have proper docstrings for LLM tool descriptions. + +This test uses AST parsing to avoid importing the server module which has +many dependencies that may not be available in a test environment. +""" + +import ast +import os +from typing import Optional + + +def get_function_docstring(filepath: str, function_name: str) -> Optional[str]: + """Parse a Python file and extract the docstring of a specific function.""" + with open(filepath, 'r') as f: + tree = ast.parse(f.read()) + + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + if node.name == function_name: + return ast.get_docstring(node) + return None + + +def test_get_markdown_has_docstring(): + """Test that the get_markdown endpoint has a docstring for MCP tool description.""" + server_path = os.path.join( + os.path.dirname(__file__), '..', '..', 'deploy', 'docker', 'server.py' + ) + docstring = get_function_docstring(server_path, 'get_markdown') + + assert docstring is not None, "get_markdown should have a docstring" + assert len(docstring) > 0, "get_markdown docstring should not be empty" + assert "Markdown" in docstring or "markdown" in docstring, \ + "get_markdown docstring should mention markdown" + + +def test_generate_html_has_docstring(): + """Test that the generate_html endpoint has a docstring for MCP tool description.""" + server_path = os.path.join( + os.path.dirname(__file__), '..', '..', 'deploy', 'docker', 'server.py' + ) + docstring = get_function_docstring(server_path, 'generate_html') + + assert docstring is not None, "generate_html should have a docstring" + assert len(docstring) > 0, "generate_html docstring should not be empty" + + +if __name__ == "__main__": + test_get_markdown_has_docstring() + test_generate_html_has_docstring() + print("All docstring tests passed!")