Skip to content

Commit affaa0c

Browse files
committed
Documentation
1 parent b614279 commit affaa0c

File tree

1 file changed

+71
-11
lines changed

1 file changed

+71
-11
lines changed

src/cedarscript_editor/tree_sitter_identifier_finder.py

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,29 @@
99

1010
from .tree_sitter_identifier_queries import LANG_TO_TREE_SITTER_QUERY
1111

12+
"""
13+
Parser for extracting identifier information from source code using tree-sitter.
14+
Supports multiple languages and provides functionality to find and analyze identifiers
15+
like functions and classes along with their hierarchical relationships.
16+
"""
17+
1218
_log = logging.getLogger(__name__)
1319

20+
"""Type alias for functions that find identifiers in source code.
21+
Takes a Marker/Segment and optional RangeSpec, returns identifier boundaries or range."""
1422
IdentifierFinder: TypeAlias = Callable[[Marker | Segment, RangeSpec | None], IdentifierBoundaries | RangeSpec | None]
1523

1624

1725
def find_identifier(source_info: tuple[str, str | Sequence[str]], search_rage: RangeSpec = RangeSpec.EMPTY) -> IdentifierFinder:
26+
"""Factory function that creates an identifier finder for the given source.
27+
28+
Args:
29+
source_info: Tuple of (file_path, source_content)
30+
search_rage: Optional range to limit the search scope
31+
32+
Returns:
33+
IdentifierFinder function configured for the given source
34+
"""
1835
file_path = source_info[0]
1936
source = source_info[1]
2037
if not isinstance(source, str):
@@ -23,6 +40,16 @@ def find_identifier(source_info: tuple[str, str | Sequence[str]], search_rage: R
2340

2441

2542
def _select_finder(file_path: str, source: str, search_range: RangeSpec = RangeSpec.EMPTY) -> IdentifierFinder:
43+
"""Selects and configures an appropriate identifier finder for the given file.
44+
45+
Args:
46+
file_path: Path to the source file
47+
source: Source code content
48+
search_range: Optional range to limit the search scope
49+
50+
Returns:
51+
IdentifierFinder function configured for the file type
52+
"""
2653
langstr = filename_to_lang(file_path)
2754
match langstr:
2855
case None:
@@ -37,6 +64,7 @@ def _select_finder(file_path: str, source: str, search_range: RangeSpec = RangeS
3764

3865
source = source.splitlines()
3966

67+
4068
def find_by_marker(mos: Marker | Segment, search_range: RangeSpec | None = None) -> IdentifierBoundaries | RangeSpec | None:
4169
match mos:
4270

@@ -58,6 +86,18 @@ def _get_by_offset(obj: Sequence, offset: int):
5886

5987

6088
class CaptureInfo(NamedTuple):
89+
"""Container for information about a captured node from tree-sitter parsing.
90+
91+
Attributes:
92+
capture_type: Type of the captured node (e.g., 'function.definition')
93+
node: The tree-sitter node that was captured
94+
95+
Properties:
96+
node_type: Type of the underlying node
97+
range: Tuple of (start_line, end_line)
98+
identifier: Name of the identifier if this is a name capture
99+
parents: List of (node_type, node_name) tuples representing the hierarchy
100+
"""
61101
capture_type: str
62102
node: any
63103

@@ -81,6 +121,15 @@ def identifier(self):
81121

82122

83123
def associate_identifier_parts(captures: Iterable[CaptureInfo], lines: Sequence[str]) -> list[IdentifierBoundaries]:
124+
"""Associates related identifier parts (definition, body, docstring, etc) into IdentifierBoundaries.
125+
126+
Args:
127+
captures: Iterable of CaptureInfo objects representing related parts
128+
lines: Sequence of source code lines
129+
130+
Returns:
131+
List of IdentifierBoundaries with all parts associated
132+
"""
84133
identifier_map: dict[int, IdentifierBoundaries] = {}
85134

86135
for capture in captures:
@@ -119,17 +168,19 @@ def find_parent_definition(node):
119168
return None
120169

121170

122-
def _find_identifier(language, source: Sequence[str], tree, query_scm: dict[str, dict[str, str]], marker: Marker) \
123-
-> IdentifierBoundaries | None:
124-
"""
125-
Find the starting line index of a specified function in the given lines.
126-
127-
:param source: The original text
128-
:param tree: The parsed tree from tree-sitter
129-
:param query_scm: A dictionary containing queries for different types of identifiers
130-
:param marker: Type, name and offset of the identifier to find.
131-
:return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier
132-
or None if not found.
171+
def _find_identifier(language, source: Sequence[str], tree, query_scm: dict[str, dict[str, str]], marker: Marker) -> IdentifierBoundaries | None:
172+
"""Finds an identifier in the source code using tree-sitter queries.
173+
174+
Args:
175+
language: Tree-sitter language
176+
source: List of source code lines
177+
tree: Parsed tree-sitter tree
178+
query_scm: Dictionary of queries for different identifier types
179+
marker: Type, name and offset of the identifier to find
180+
181+
Returns:
182+
IdentifierBoundaries with identifier IdentifierBoundaries with identifier start, body start, and end lines of the identifier
183+
or None if not found
133184
"""
134185
try:
135186
candidates = language.query(query_scm[marker.type].format(name=marker.value)).captures(tree.root_node)
@@ -160,6 +211,15 @@ def _find_identifier(language, source: Sequence[str], tree, query_scm: dict[str,
160211

161212

162213
def capture2identifier_boundaries(captures, lines: Sequence[str]) -> list[IdentifierBoundaries]:
214+
"""Converts raw tree-sitter captures to IdentifierBoundaries objects.
215+
216+
Args:
217+
captures: Raw captures from tree-sitter query
218+
lines: Sequence of source code lines
219+
220+
Returns:
221+
List of IdentifierBoundaries representing the captured identifiers
222+
"""
163223
captures = [CaptureInfo(c[1], c[0]) for c in captures if not c[1].startswith('_')]
164224
unique_captures = {}
165225
for capture in captures:

0 commit comments

Comments
 (0)