99
1010from .tree_sitter_identifier_queries import LANG_TO_TREE_SITTER_QUERY
1111
12+ """
13+ Parser for extracting identifier information from source code using tree-sitter.
14+ Supports multiple languages and provides functionality to find and analyze identifiers
15+ like functions and classes along with their hierarchical relationships.
16+ """
17+
1218_log = logging .getLogger (__name__ )
1319
20+ """Type alias for functions that find identifiers in source code.
21+ Takes a Marker/Segment and optional RangeSpec, returns identifier boundaries or range."""
1422IdentifierFinder : TypeAlias = Callable [[Marker | Segment , RangeSpec | None ], IdentifierBoundaries | RangeSpec | None ]
1523
1624
1725def find_identifier (source_info : tuple [str , str | Sequence [str ]], search_rage : RangeSpec = RangeSpec .EMPTY ) -> IdentifierFinder :
26+ """Factory function that creates an identifier finder for the given source.
27+
28+ Args:
29+ source_info: Tuple of (file_path, source_content)
30+ search_rage: Optional range to limit the search scope
31+
32+ Returns:
33+ IdentifierFinder function configured for the given source
34+ """
1835 file_path = source_info [0 ]
1936 source = source_info [1 ]
2037 if not isinstance (source , str ):
@@ -23,6 +40,16 @@ def find_identifier(source_info: tuple[str, str | Sequence[str]], search_rage: R
2340
2441
2542def _select_finder (file_path : str , source : str , search_range : RangeSpec = RangeSpec .EMPTY ) -> IdentifierFinder :
43+ """Selects and configures an appropriate identifier finder for the given file.
44+
45+ Args:
46+ file_path: Path to the source file
47+ source: Source code content
48+ search_range: Optional range to limit the search scope
49+
50+ Returns:
51+ IdentifierFinder function configured for the file type
52+ """
2653 langstr = filename_to_lang (file_path )
2754 match langstr :
2855 case None :
@@ -37,6 +64,7 @@ def _select_finder(file_path: str, source: str, search_range: RangeSpec = RangeS
3764
3865 source = source .splitlines ()
3966
67+
4068 def find_by_marker (mos : Marker | Segment , search_range : RangeSpec | None = None ) -> IdentifierBoundaries | RangeSpec | None :
4169 match mos :
4270
@@ -58,6 +86,18 @@ def _get_by_offset(obj: Sequence, offset: int):
5886
5987
6088class CaptureInfo (NamedTuple ):
89+ """Container for information about a captured node from tree-sitter parsing.
90+
91+ Attributes:
92+ capture_type: Type of the captured node (e.g., 'function.definition')
93+ node: The tree-sitter node that was captured
94+
95+ Properties:
96+ node_type: Type of the underlying node
97+ range: Tuple of (start_line, end_line)
98+ identifier: Name of the identifier if this is a name capture
99+ parents: List of (node_type, node_name) tuples representing the hierarchy
100+ """
61101 capture_type : str
62102 node : any
63103
@@ -81,6 +121,15 @@ def identifier(self):
81121
82122
83123def associate_identifier_parts (captures : Iterable [CaptureInfo ], lines : Sequence [str ]) -> list [IdentifierBoundaries ]:
124+ """Associates related identifier parts (definition, body, docstring, etc) into IdentifierBoundaries.
125+
126+ Args:
127+ captures: Iterable of CaptureInfo objects representing related parts
128+ lines: Sequence of source code lines
129+
130+ Returns:
131+ List of IdentifierBoundaries with all parts associated
132+ """
84133 identifier_map : dict [int , IdentifierBoundaries ] = {}
85134
86135 for capture in captures :
@@ -119,17 +168,19 @@ def find_parent_definition(node):
119168 return None
120169
121170
122- def _find_identifier (language , source : Sequence [str ], tree , query_scm : dict [str , dict [str , str ]], marker : Marker ) \
123- -> IdentifierBoundaries | None :
124- """
125- Find the starting line index of a specified function in the given lines.
126-
127- :param source: The original text
128- :param tree: The parsed tree from tree-sitter
129- :param query_scm: A dictionary containing queries for different types of identifiers
130- :param marker: Type, name and offset of the identifier to find.
131- :return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier
132- or None if not found.
171+ def _find_identifier (language , source : Sequence [str ], tree , query_scm : dict [str , dict [str , str ]], marker : Marker ) -> IdentifierBoundaries | None :
172+ """Finds an identifier in the source code using tree-sitter queries.
173+
174+ Args:
175+ language: Tree-sitter language
176+ source: List of source code lines
177+ tree: Parsed tree-sitter tree
178+ query_scm: Dictionary of queries for different identifier types
179+ marker: Type, name and offset of the identifier to find
180+
181+ Returns:
182+ IdentifierBoundaries with identifier IdentifierBoundaries with identifier start, body start, and end lines of the identifier
183+ or None if not found
133184 """
134185 try :
135186 candidates = language .query (query_scm [marker .type ].format (name = marker .value )).captures (tree .root_node )
@@ -160,6 +211,15 @@ def _find_identifier(language, source: Sequence[str], tree, query_scm: dict[str,
160211
161212
162213def capture2identifier_boundaries (captures , lines : Sequence [str ]) -> list [IdentifierBoundaries ]:
214+ """Converts raw tree-sitter captures to IdentifierBoundaries objects.
215+
216+ Args:
217+ captures: Raw captures from tree-sitter query
218+ lines: Sequence of source code lines
219+
220+ Returns:
221+ List of IdentifierBoundaries representing the captured identifiers
222+ """
163223 captures = [CaptureInfo (c [1 ], c [0 ]) for c in captures if not c [1 ].startswith ('_' )]
164224 unique_captures = {}
165225 for capture in captures :
0 commit comments