Skip to content

Commit b6b8620

Browse files
committed
feat(tools): Added tools to get an outline and search a notebook, so specific cells can be targeted for read/edit
- This commit introduces two new methods to the NotebookTools class: `notebook_get_outline` and `notebook_search`. - The `notebook_get_outline` method analyzes a Jupyter notebook's structure, extracting cell types, line counts, and outlines for code and markdown cells. - The `notebook_search` method allows for case-insensitive searching within notebook cells, returning matches with context snippets. - Additionally, tests for both methods have been added.
1 parent a4cdfe3 commit b6b8620

File tree

2 files changed

+383
-2
lines changed

2 files changed

+383
-2
lines changed

cursor_notebook_mcp/tools.py

Lines changed: 240 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import importlib.util
1111
import logging
1212
import json
13-
from typing import Any, List, Dict, Callable, Coroutine
13+
from typing import Any, List, Dict, Callable, Coroutine, Union
14+
import ast
15+
import re
1416

1517
import nbformat
1618
from nbformat import NotebookNode
@@ -77,6 +79,8 @@ def _register_tools(self):
7779
self.notebook_read,
7880
self.notebook_change_cell_type,
7981
self.notebook_duplicate_cell,
82+
self.notebook_get_outline,
83+
self.notebook_search,
8084
]
8185
for tool_method in tools_to_register:
8286
# Use the method's name and docstring for registration
@@ -1040,4 +1044,238 @@ async def notebook_duplicate_cell(self, notebook_path: str, cell_index: int, cou
10401044
raise
10411045
except Exception as e:
10421046
logger.exception(f"{log_prefix} FAILED - Unexpected error: {e}")
1043-
raise RuntimeError(f"An unexpected error occurred: {e}") from e
1047+
raise RuntimeError(f"An unexpected error occurred: {e}") from e
1048+
1049+
async def notebook_get_outline(self, notebook_path: str) -> List[Dict[str, Union[int, str, List[str]]]]:
1050+
"""Analyzes a Jupyter notebook file to extract its structure.
1051+
1052+
Reads the notebook, iterates through cells, identifies their type,
1053+
extracts definitions (func, class), headings (H1, H2), significant comments (comment:),
1054+
and calculates line counts. Returns a default context line if no other outline is found.
1055+
1056+
Args:
1057+
notebook_path: The absolute path to the .ipynb notebook file.
1058+
1059+
Returns:
1060+
A list of dictionaries, where each dictionary represents a cell
1061+
and contains:
1062+
- 'index': The 0-based index of the cell.
1063+
- 'type': The cell type ('code' or 'markdown').
1064+
- 'line_count': The total number of lines in the cell.
1065+
- 'outline': A list of strings representing the outline items found.
1066+
Guaranteed to contain at least one item.
1067+
"""
1068+
log_prefix = self._log_prefix('notebook_get_outline', path=notebook_path)
1069+
logger.info(f"{log_prefix} Called.")
1070+
try:
1071+
# read_notebook already performs path validation and read
1072+
nb = await self.read_notebook(notebook_path, self.config.allowed_roots)
1073+
1074+
# <<< Add check for empty notebook here >>>
1075+
if not nb.cells:
1076+
logger.info(f"{log_prefix} SUCCESS - Notebook is empty.")
1077+
return [{"message": "Notebook is empty or has no cells"}]
1078+
1079+
structure_map: List[Dict[str, Union[int, str, List[str]]]] = []
1080+
1081+
for index, cell in enumerate(nb.cells):
1082+
outline_items = []
1083+
line_count = len(cell.source.splitlines())
1084+
1085+
if cell.cell_type == 'code':
1086+
outline_items = self._extract_code_outline(cell.source)
1087+
elif cell.cell_type == 'markdown':
1088+
outline_items = self._extract_markdown_outline(cell.source)
1089+
1090+
# Ensure there's at least one outline item (context if needed)
1091+
if not outline_items:
1092+
outline_items = self._get_first_line_context(cell.source)
1093+
1094+
cell_info: Dict[str, Union[int, str, List[str]]] = {
1095+
"index": index,
1096+
"type": cell.cell_type,
1097+
"line_count": line_count,
1098+
"outline": outline_items
1099+
}
1100+
structure_map.append(cell_info)
1101+
1102+
logger.info(f"{log_prefix} SUCCESS - Generated outline ({len(structure_map)} cells analyzed).")
1103+
return structure_map # Return the structured list
1104+
1105+
except (ValueError, FileNotFoundError, IOError, PermissionError) as e:
1106+
logger.error(f"{log_prefix} FAILED - Specific error: {e}")
1107+
raise
1108+
except Exception as e:
1109+
logger.exception(f"{log_prefix} FAILED - Unexpected error: {e}")
1110+
raise RuntimeError(f"An unexpected error occurred: {e}") from e
1111+
1112+
# --- Helper methods for outline generation ---
1113+
1114+
def _extract_code_outline(self, source: str) -> List[str]:
1115+
"""Extracts functions, classes, and comment headings from code."""
1116+
outline = []
1117+
# First pass for comment headings
1118+
try:
1119+
lines = source.splitlines()
1120+
for line in lines:
1121+
match = re.match(r'^\s*#\s+(.*)', line) # Match comments like '# Heading'
1122+
if match and match.group(1):
1123+
outline.append(f"comment: {match.group(1).strip()}")
1124+
except Exception as e:
1125+
logger.warning(f"Error parsing comments for outline: {e}")
1126+
# Continue to AST parsing even if comment parsing fails
1127+
1128+
# Second pass for AST elements (functions, classes)
1129+
try:
1130+
tree = ast.parse(source)
1131+
for node in ast.walk(tree):
1132+
if isinstance(node, ast.FunctionDef):
1133+
outline.append(f"func: {node.name}")
1134+
elif isinstance(node, ast.ClassDef):
1135+
outline.append(f"class: {node.name}")
1136+
except SyntaxError:
1137+
# If syntax is invalid, AST parsing fails. Add indicator.
1138+
# Avoid adding if already found comment headings.
1139+
if not any(item.startswith("comment:") for item in outline):
1140+
outline.append("<Syntax Error>")
1141+
# Keep any comment headings found before the error
1142+
except Exception as e:
1143+
# Catch other potential AST parsing errors
1144+
if not outline: # Avoid adding if we already have items
1145+
outline.append(f"<AST Parsing Error: {e}>")
1146+
return outline
1147+
1148+
def _extract_markdown_outline(self, source: str) -> List[str]:
1149+
"""Extracts markdown headings (H1, H2, etc.) and HTML headings (h1-h6) from markdown."""
1150+
headings = []
1151+
# Regex to find HTML headings like <h1...>...</h1>, capturing level and content
1152+
# Handles attributes in the opening tag and ignores case for tag names
1153+
html_heading_re = re.compile(r'<h([1-6])[^>]*>(.*?)</h\1>', re.IGNORECASE | re.DOTALL)
1154+
try:
1155+
lines = source.split('\n')
1156+
for line in lines:
1157+
stripped_line = line.strip()
1158+
# Check for Markdown heading first
1159+
md_match = re.match(r'^(#+)\s+(.*)', stripped_line)
1160+
if md_match:
1161+
level = len(md_match.group(1))
1162+
heading_text = md_match.group(2).strip()
1163+
if heading_text:
1164+
headings.append(f"H{level}: {heading_text}")
1165+
else:
1166+
# If not Markdown, check for HTML heading
1167+
# We search the stripped_line instead of match to find tag anywhere
1168+
html_match = html_heading_re.search(stripped_line)
1169+
if html_match:
1170+
level = int(html_match.group(1))
1171+
# Basic cleanup: remove potential inner tags for outline brevity
1172+
heading_text = re.sub(r'<.*?>', '', html_match.group(2)).strip()
1173+
if heading_text:
1174+
headings.append(f"H{level}: {heading_text}")
1175+
1176+
except AttributeError:
1177+
headings.append("<Missing Source>") # Should be rare
1178+
except Exception as e:
1179+
headings.append(f"<Markdown Parsing Error: {e}>")
1180+
return headings
1181+
1182+
def _get_first_line_context(self, source: str) -> List[str]:
1183+
"""Gets the first non-empty line as context if no other outline found."""
1184+
try:
1185+
for line in source.splitlines():
1186+
stripped_line = line.strip()
1187+
if stripped_line:
1188+
# Truncate long lines for brevity
1189+
context = stripped_line[:100] + ('...' if len(stripped_line) > 100 else '')
1190+
return [f"context: {context}"]
1191+
# If loop finishes without finding a non-empty line
1192+
return ["<Empty Cell>"]
1193+
except Exception as e:
1194+
logger.warning(f"Error getting first line context: {e}")
1195+
return ["<Error getting context>"]
1196+
1197+
async def notebook_search(
1198+
self,
1199+
notebook_path: str,
1200+
query: str
1201+
) -> List[Dict[str, Union[int, str]]]:
1202+
"""Searches within a notebook's code and markdown cells for a query string.
1203+
1204+
Performs a case-insensitive search within the source of each cell.
1205+
1206+
Args:
1207+
notebook_path: Absolute path to the .ipynb notebook file.
1208+
query: The string to search for.
1209+
1210+
Returns:
1211+
A list of dictionaries, where each dictionary represents a match:
1212+
- 'cell_index': The 0-based index of the matching cell.
1213+
- 'cell_type': The type of the matching cell ('code' or 'markdown').
1214+
- 'match_line_number': The 1-based line number within the cell where the match occurred.
1215+
- 'snippet': A truncated snippet of the line containing the match.
1216+
"""
1217+
log_prefix = self._log_prefix('notebook_search', path=notebook_path, query=query)
1218+
logger.info(f"{log_prefix} Called.")
1219+
1220+
if not query:
1221+
raise ValueError("Search query cannot be empty.")
1222+
1223+
results: List[Dict[str, Union[int, str]]] = []
1224+
try:
1225+
nb = await self.read_notebook(notebook_path, self.config.allowed_roots)
1226+
query_lower = query.lower()
1227+
MAX_SNIPPET_LEN = 150 # Max length for the snippet
1228+
1229+
for index, cell in enumerate(nb.cells):
1230+
try:
1231+
source = cell.source
1232+
cell_type = cell.cell_type
1233+
lines = source.splitlines()
1234+
for line_num_0based, line in enumerate(lines):
1235+
if query_lower in line.lower():
1236+
line_num_1based = line_num_0based + 1
1237+
# Create snippet, truncating if necessary
1238+
snippet = line.strip()
1239+
if len(snippet) > MAX_SNIPPET_LEN:
1240+
# Try to find query position for better truncation
1241+
try:
1242+
match_start = snippet.lower().index(query_lower)
1243+
start = max(0, match_start - MAX_SNIPPET_LEN // 3)
1244+
end = min(len(snippet), match_start + len(query) + (MAX_SNIPPET_LEN * 2 // 3))
1245+
prefix = "..." if start > 0 else ""
1246+
suffix = "..." if end < len(snippet) else ""
1247+
snippet = prefix + snippet[start:end] + suffix
1248+
except ValueError:
1249+
# Fallback if query not found after lowercasing (shouldn't happen often)
1250+
snippet = snippet[:MAX_SNIPPET_LEN] + "..."
1251+
1252+
results.append({
1253+
"cell_index": index,
1254+
"cell_type": cell_type,
1255+
"match_line_number": line_num_1based,
1256+
"snippet": snippet
1257+
})
1258+
except AttributeError:
1259+
# Skip cells that might unexpectedly lack a source attribute
1260+
logger.warning(f"{log_prefix} Skipping cell {index} due to missing source.")
1261+
continue
1262+
except Exception as cell_err:
1263+
# Log error processing a specific cell but continue searching others
1264+
logger.error(f"{log_prefix} Error processing cell {index}: {cell_err}")
1265+
continue # Continue to the next cell
1266+
1267+
# <<< Add the check for empty results here >>>
1268+
if not results:
1269+
logger.info(f"{log_prefix} SUCCESS - No matches found.")
1270+
# Return a specific message instead of an empty list
1271+
return [{"message": "No matches found"}]
1272+
else:
1273+
logger.info(f"{log_prefix} SUCCESS - Found {len(results)} match(es).")
1274+
return results
1275+
1276+
except (ValueError, FileNotFoundError, IOError, PermissionError) as e:
1277+
logger.error(f"{log_prefix} FAILED - Specific error: {e}")
1278+
raise
1279+
except Exception as e:
1280+
logger.exception(f"{log_prefix} FAILED - Unexpected error: {e}")
1281+
raise RuntimeError(f"An unexpected error occurred during notebook search: {e}") from e

0 commit comments

Comments
 (0)