1010import importlib .util
1111import logging
1212import json
13- from typing import Any , List , Dict , Callable , Coroutine
13+ from typing import Any , List , Dict , Callable , Coroutine , Union
14+ import ast
15+ import re
1416
1517import nbformat
1618from nbformat import NotebookNode
@@ -77,6 +79,8 @@ def _register_tools(self):
7779 self .notebook_read ,
7880 self .notebook_change_cell_type ,
7981 self .notebook_duplicate_cell ,
82+ self .notebook_get_outline ,
83+ self .notebook_search ,
8084 ]
8185 for tool_method in tools_to_register :
8286 # Use the method's name and docstring for registration
@@ -1040,4 +1044,238 @@ async def notebook_duplicate_cell(self, notebook_path: str, cell_index: int, cou
10401044 raise
10411045 except Exception as e :
10421046 logger .exception (f"{ log_prefix } FAILED - Unexpected error: { e } " )
1043- raise RuntimeError (f"An unexpected error occurred: { e } " ) from e
1047+ raise RuntimeError (f"An unexpected error occurred: { e } " ) from e
1048+
1049+ async def notebook_get_outline (self , notebook_path : str ) -> List [Dict [str , Union [int , str , List [str ]]]]:
1050+ """Analyzes a Jupyter notebook file to extract its structure.
1051+
1052+ Reads the notebook, iterates through cells, identifies their type,
1053+ extracts definitions (func, class), headings (H1, H2), significant comments (comment:),
1054+ and calculates line counts. Returns a default context line if no other outline is found.
1055+
1056+ Args:
1057+ notebook_path: The absolute path to the .ipynb notebook file.
1058+
1059+ Returns:
1060+ A list of dictionaries, where each dictionary represents a cell
1061+ and contains:
1062+ - 'index': The 0-based index of the cell.
1063+ - 'type': The cell type ('code' or 'markdown').
1064+ - 'line_count': The total number of lines in the cell.
1065+ - 'outline': A list of strings representing the outline items found.
1066+ Guaranteed to contain at least one item.
1067+ """
1068+ log_prefix = self ._log_prefix ('notebook_get_outline' , path = notebook_path )
1069+ logger .info (f"{ log_prefix } Called." )
1070+ try :
1071+ # read_notebook already performs path validation and read
1072+ nb = await self .read_notebook (notebook_path , self .config .allowed_roots )
1073+
1074+ # <<< Add check for empty notebook here >>>
1075+ if not nb .cells :
1076+ logger .info (f"{ log_prefix } SUCCESS - Notebook is empty." )
1077+ return [{"message" : "Notebook is empty or has no cells" }]
1078+
1079+ structure_map : List [Dict [str , Union [int , str , List [str ]]]] = []
1080+
1081+ for index , cell in enumerate (nb .cells ):
1082+ outline_items = []
1083+ line_count = len (cell .source .splitlines ())
1084+
1085+ if cell .cell_type == 'code' :
1086+ outline_items = self ._extract_code_outline (cell .source )
1087+ elif cell .cell_type == 'markdown' :
1088+ outline_items = self ._extract_markdown_outline (cell .source )
1089+
1090+ # Ensure there's at least one outline item (context if needed)
1091+ if not outline_items :
1092+ outline_items = self ._get_first_line_context (cell .source )
1093+
1094+ cell_info : Dict [str , Union [int , str , List [str ]]] = {
1095+ "index" : index ,
1096+ "type" : cell .cell_type ,
1097+ "line_count" : line_count ,
1098+ "outline" : outline_items
1099+ }
1100+ structure_map .append (cell_info )
1101+
1102+ logger .info (f"{ log_prefix } SUCCESS - Generated outline ({ len (structure_map )} cells analyzed)." )
1103+ return structure_map # Return the structured list
1104+
1105+ except (ValueError , FileNotFoundError , IOError , PermissionError ) as e :
1106+ logger .error (f"{ log_prefix } FAILED - Specific error: { e } " )
1107+ raise
1108+ except Exception as e :
1109+ logger .exception (f"{ log_prefix } FAILED - Unexpected error: { e } " )
1110+ raise RuntimeError (f"An unexpected error occurred: { e } " ) from e
1111+
1112+ # --- Helper methods for outline generation ---
1113+
1114+ def _extract_code_outline (self , source : str ) -> List [str ]:
1115+ """Extracts functions, classes, and comment headings from code."""
1116+ outline = []
1117+ # First pass for comment headings
1118+ try :
1119+ lines = source .splitlines ()
1120+ for line in lines :
1121+ match = re .match (r'^\s*#\s+(.*)' , line ) # Match comments like '# Heading'
1122+ if match and match .group (1 ):
1123+ outline .append (f"comment: { match .group (1 ).strip ()} " )
1124+ except Exception as e :
1125+ logger .warning (f"Error parsing comments for outline: { e } " )
1126+ # Continue to AST parsing even if comment parsing fails
1127+
1128+ # Second pass for AST elements (functions, classes)
1129+ try :
1130+ tree = ast .parse (source )
1131+ for node in ast .walk (tree ):
1132+ if isinstance (node , ast .FunctionDef ):
1133+ outline .append (f"func: { node .name } " )
1134+ elif isinstance (node , ast .ClassDef ):
1135+ outline .append (f"class: { node .name } " )
1136+ except SyntaxError :
1137+ # If syntax is invalid, AST parsing fails. Add indicator.
1138+ # Avoid adding if already found comment headings.
1139+ if not any (item .startswith ("comment:" ) for item in outline ):
1140+ outline .append ("<Syntax Error>" )
1141+ # Keep any comment headings found before the error
1142+ except Exception as e :
1143+ # Catch other potential AST parsing errors
1144+ if not outline : # Avoid adding if we already have items
1145+ outline .append (f"<AST Parsing Error: { e } >" )
1146+ return outline
1147+
1148+ def _extract_markdown_outline (self , source : str ) -> List [str ]:
1149+ """Extracts markdown headings (H1, H2, etc.) and HTML headings (h1-h6) from markdown."""
1150+ headings = []
1151+ # Regex to find HTML headings like <h1...>...</h1>, capturing level and content
1152+ # Handles attributes in the opening tag and ignores case for tag names
1153+ html_heading_re = re .compile (r'<h([1-6])[^>]*>(.*?)</h\1>' , re .IGNORECASE | re .DOTALL )
1154+ try :
1155+ lines = source .split ('\n ' )
1156+ for line in lines :
1157+ stripped_line = line .strip ()
1158+ # Check for Markdown heading first
1159+ md_match = re .match (r'^(#+)\s+(.*)' , stripped_line )
1160+ if md_match :
1161+ level = len (md_match .group (1 ))
1162+ heading_text = md_match .group (2 ).strip ()
1163+ if heading_text :
1164+ headings .append (f"H{ level } : { heading_text } " )
1165+ else :
1166+ # If not Markdown, check for HTML heading
1167+ # We search the stripped_line instead of match to find tag anywhere
1168+ html_match = html_heading_re .search (stripped_line )
1169+ if html_match :
1170+ level = int (html_match .group (1 ))
1171+ # Basic cleanup: remove potential inner tags for outline brevity
1172+ heading_text = re .sub (r'<.*?>' , '' , html_match .group (2 )).strip ()
1173+ if heading_text :
1174+ headings .append (f"H{ level } : { heading_text } " )
1175+
1176+ except AttributeError :
1177+ headings .append ("<Missing Source>" ) # Should be rare
1178+ except Exception as e :
1179+ headings .append (f"<Markdown Parsing Error: { e } >" )
1180+ return headings
1181+
1182+ def _get_first_line_context (self , source : str ) -> List [str ]:
1183+ """Gets the first non-empty line as context if no other outline found."""
1184+ try :
1185+ for line in source .splitlines ():
1186+ stripped_line = line .strip ()
1187+ if stripped_line :
1188+ # Truncate long lines for brevity
1189+ context = stripped_line [:100 ] + ('...' if len (stripped_line ) > 100 else '' )
1190+ return [f"context: { context } " ]
1191+ # If loop finishes without finding a non-empty line
1192+ return ["<Empty Cell>" ]
1193+ except Exception as e :
1194+ logger .warning (f"Error getting first line context: { e } " )
1195+ return ["<Error getting context>" ]
1196+
1197+ async def notebook_search (
1198+ self ,
1199+ notebook_path : str ,
1200+ query : str
1201+ ) -> List [Dict [str , Union [int , str ]]]:
1202+ """Searches within a notebook's code and markdown cells for a query string.
1203+
1204+ Performs a case-insensitive search within the source of each cell.
1205+
1206+ Args:
1207+ notebook_path: Absolute path to the .ipynb notebook file.
1208+ query: The string to search for.
1209+
1210+ Returns:
1211+ A list of dictionaries, where each dictionary represents a match:
1212+ - 'cell_index': The 0-based index of the matching cell.
1213+ - 'cell_type': The type of the matching cell ('code' or 'markdown').
1214+ - 'match_line_number': The 1-based line number within the cell where the match occurred.
1215+ - 'snippet': A truncated snippet of the line containing the match.
1216+ """
1217+ log_prefix = self ._log_prefix ('notebook_search' , path = notebook_path , query = query )
1218+ logger .info (f"{ log_prefix } Called." )
1219+
1220+ if not query :
1221+ raise ValueError ("Search query cannot be empty." )
1222+
1223+ results : List [Dict [str , Union [int , str ]]] = []
1224+ try :
1225+ nb = await self .read_notebook (notebook_path , self .config .allowed_roots )
1226+ query_lower = query .lower ()
1227+ MAX_SNIPPET_LEN = 150 # Max length for the snippet
1228+
1229+ for index , cell in enumerate (nb .cells ):
1230+ try :
1231+ source = cell .source
1232+ cell_type = cell .cell_type
1233+ lines = source .splitlines ()
1234+ for line_num_0based , line in enumerate (lines ):
1235+ if query_lower in line .lower ():
1236+ line_num_1based = line_num_0based + 1
1237+ # Create snippet, truncating if necessary
1238+ snippet = line .strip ()
1239+ if len (snippet ) > MAX_SNIPPET_LEN :
1240+ # Try to find query position for better truncation
1241+ try :
1242+ match_start = snippet .lower ().index (query_lower )
1243+ start = max (0 , match_start - MAX_SNIPPET_LEN // 3 )
1244+ end = min (len (snippet ), match_start + len (query ) + (MAX_SNIPPET_LEN * 2 // 3 ))
1245+ prefix = "..." if start > 0 else ""
1246+ suffix = "..." if end < len (snippet ) else ""
1247+ snippet = prefix + snippet [start :end ] + suffix
1248+ except ValueError :
1249+ # Fallback if query not found after lowercasing (shouldn't happen often)
1250+ snippet = snippet [:MAX_SNIPPET_LEN ] + "..."
1251+
1252+ results .append ({
1253+ "cell_index" : index ,
1254+ "cell_type" : cell_type ,
1255+ "match_line_number" : line_num_1based ,
1256+ "snippet" : snippet
1257+ })
1258+ except AttributeError :
1259+ # Skip cells that might unexpectedly lack a source attribute
1260+ logger .warning (f"{ log_prefix } Skipping cell { index } due to missing source." )
1261+ continue
1262+ except Exception as cell_err :
1263+ # Log error processing a specific cell but continue searching others
1264+ logger .error (f"{ log_prefix } Error processing cell { index } : { cell_err } " )
1265+ continue # Continue to the next cell
1266+
1267+ # <<< Add the check for empty results here >>>
1268+ if not results :
1269+ logger .info (f"{ log_prefix } SUCCESS - No matches found." )
1270+ # Return a specific message instead of an empty list
1271+ return [{"message" : "No matches found" }]
1272+ else :
1273+ logger .info (f"{ log_prefix } SUCCESS - Found { len (results )} match(es)." )
1274+ return results
1275+
1276+ except (ValueError , FileNotFoundError , IOError , PermissionError ) as e :
1277+ logger .error (f"{ log_prefix } FAILED - Specific error: { e } " )
1278+ raise
1279+ except Exception as e :
1280+ logger .exception (f"{ log_prefix } FAILED - Unexpected error: { e } " )
1281+ raise RuntimeError (f"An unexpected error occurred during notebook search: { e } " ) from e
0 commit comments