CEDARScript
diff --git a/‎src/cedarscript_editor/tree_sitter_identifier_finder.py‎
Lines changed: 92 additions & 100 deletions b/‎src/cedarscript_editor/tree_sitter_identifier_finder.py‎
Lines changed: 92 additions & 100 deletions
diff --git a/‎tests/corpus/chat.make-top-level-from-method.1/chat.xml‎
Lines changed: 6 additions & 6 deletions b/‎tests/corpus/chat.make-top-level-from-method.1/chat.xml‎
Lines changed: 6 additions & 6 deletions
@@ -1,11 +1,12 @@
 import logging
+from dataclasses import dataclass
 from functools import cached_property
-from typing import Callable, TypeAlias, Sequence, NamedTuple, Iterable
+from typing import Sequence, Iterable
 
-from cedarscript_ast_parser import Marker, MarkerType, Segment
+from cedarscript_ast_parser import Marker, MarkerType, Segment, RelativeMarker
 from grep_ast import filename_to_lang
 from text_manipulation.indentation_kit import get_line_indent_count
-from text_manipulation.range_spec import IdentifierBoundaries, RangeSpec
+from text_manipulation.range_spec import IdentifierBoundaries, RangeSpec, ParentInfo, ParentRestriction
 from tree_sitter_languages import get_language, get_parser
 
 from .tree_sitter_identifier_queries import LANG_TO_TREE_SITTER_QUERY
@@ -18,65 +19,94 @@
 
 _log = logging.getLogger(__name__)
 
-"""Type alias for functions that find identifiers in source code.
-Takes a Marker/Segment and optional RangeSpec, returns identifier boundaries or range."""
-IdentifierFinder: TypeAlias = Callable[[Marker | Segment, RangeSpec | None], IdentifierBoundaries | RangeSpec | None]
+class IdentifierFinder:
+    """Finds identifiers in source code based on markers and parent restrictions.
 
-
-def find_identifier(source_info: tuple[str, str | Sequence[str]], search_rage: RangeSpec = RangeSpec.EMPTY) -> IdentifierFinder:
-    """Factory function that creates an identifier finder for the given source.
-
-    Args:
-        source_info: Tuple of (file_path, source_content)
-        search_rage: Optional range to limit the search scope
-
-    Returns:
-        IdentifierFinder function configured for the given source
-    """
-    file_path = source_info[0]
-    source = source_info[1]
-    if not isinstance(source, str):
-        source = '\n'.join(source)
-    return _select_finder(file_path, source, search_rage)
-
-
-def _select_finder(file_path: str, source: str, search_range: RangeSpec = RangeSpec.EMPTY) -> IdentifierFinder:
-    """Selects and configures an appropriate identifier finder for the given file.
-
-    Args:
+    Attributes:
+        lines: List of source code lines
         file_path: Path to the source file
-        source: Source code content
-        search_range: Optional range to limit the search scope
-
-    Returns:
-        IdentifierFinder function configured for the file type
+        source: Complete source code as a single string
+        language: Tree-sitter language instance
+        tree: Parsed tree-sitter tree
+        query_info: Language-specific query information
     """
-    langstr = filename_to_lang(file_path)
-    match langstr:
-        case None:
-            language = None
-            query_info = None
-            _log.info(f"[select_finder] NO LANGUAGE for `{file_path}`")
-        case _:
-            query_info = LANG_TO_TREE_SITTER_QUERY[langstr]
-            language = get_language(langstr)
-            _log.info(f"[select_finder] Selected {language}")
-            tree = get_parser(langstr).parse(bytes(source, "utf-8"))
-
-    source = source.splitlines()
-
-    def find_by_marker(mos: Marker | Segment, search_range: RangeSpec | None = None) -> IdentifierBoundaries | RangeSpec | None:
-        match mos:
 
+    def __init__(self, fname: str, source: str | Sequence[str], parent_restriction: ParentRestriction = None):
+        self.parent_restriction = parent_restriction
+        match source:
+            case str() as s:
+                self.lines = s.splitlines()
+            case _ as lines:
+                self.lines = lines
+                source = '\n'.join(lines)
+        langstr = filename_to_lang(fname)
+        if langstr is None:
+            self.language = None
+            self.query_info = None
+            _log.info(f"[IdentifierFinder] NO LANGUAGE for `{fname}`")
+            return
+        self.query_info: dict[str, dict[str, str]] = LANG_TO_TREE_SITTER_QUERY[langstr]
+        self.language = get_language(langstr)
+        _log.info(f"[IdentifierFinder] Selected {self.language}")
+        self.tree = get_parser(langstr).parse(bytes(source, "utf-8"))
+
+    def __call__(self, mos: Marker | Segment, parent_restriction: ParentRestriction = None) -> IdentifierBoundaries | RangeSpec | None:
+        parent_restriction = parent_restriction or self.parent_restriction
+        match mos:
             case Marker(MarkerType.LINE) | Segment():
                 # TODO pass IdentifierFinder to enable identifiers as start and/or end of a segment
-                return mos.to_search_range(source, search_range).set_line_count(1)  # returns RangeSpec
+                return mos.to_search_range(self.lines, parent_restriction).set_line_count(1)  # returns RangeSpec
 
             case Marker() as marker:
                 # Returns IdentifierBoundaries
-                return _find_identifier(language, source, tree, query_info, marker)
-
-    return find_by_marker
+                return self._find_identifier(marker, parent_restriction)
+
+    def _find_identifier(self,
+        marker: Marker,
+        parent_restriction: ParentRestriction
+    ) -> IdentifierBoundaries | RangeSpec | None:
+        """Finds an identifier in the source code using tree-sitter queries.
+
+        Args:
+            language: Tree-sitter language
+            source: List of source code lines
+            tree: Parsed tree-sitter tree
+            query_scm: Dictionary of queries for different identifier types
+            marker: Type, name and offset of the identifier to find
+
+        Returns:
+            IdentifierBoundaries with identifier IdentifierBoundaries with identifier start, body start, and end lines of the identifier
+        or None if not found
+        """
+        try:
+            candidates = self.language.query(self.query_info[marker.type].format(name=marker.value)).captures(self.tree.root_node)
+            candidates: list[IdentifierBoundaries] = [ib for ib in capture2identifier_boundaries(
+                candidates,
+                self.lines
+            ) if ib.match_parent(parent_restriction)]
+        except Exception as e:
+            raise ValueError(f"Unable to capture nodes for {marker}: {e}") from e
+
+        candidate_count = len(candidates)
+        if not candidate_count:
+            return None
+        if candidate_count > 1 and marker.offset is None:
+            raise ValueError(
+                f"The {marker.type} identifier named `{marker.value}` is ambiguous (found {candidate_count} matches). "
+                f"Choose an `OFFSET` between 0 and {candidate_count - 1} to determine how many to skip. "
+                f"Example to reference the *last* `{marker.value}`: `OFFSET {candidate_count - 1}`"
+            )
+        if marker.offset and marker.offset >= candidate_count:
+            raise ValueError(
+                f"There are only {candidate_count} {marker.type} identifiers named `{marker.value}`, "
+                f"but 'OFFSET' was set to {marker.offset} (you can skip at most {candidate_count - 1} of those)"
+            )
+        candidates.sort(key=lambda x: x.whole.start)
+        result: IdentifierBoundaries = _get_by_offset(candidates, marker.offset or 0)
+        match marker:
+            case RelativeMarker(qualifier=relative_position_type):
+                return result.location_to_search_range(relative_position_type)
+        return result
 
 
 def _get_by_offset(obj: Sequence, offset: int):
@@ -85,7 +115,8 @@ def _get_by_offset(obj: Sequence, offset: int):
     return None
 
 
-class CaptureInfo(NamedTuple):
+@dataclass(frozen=True)
+class CaptureInfo:
     """Container for information about a captured node from tree-sitter parsing.
 
     Attributes:
@@ -120,10 +151,10 @@ def identifier(self):
         return self.node.text.decode("utf-8")
 
     @cached_property
-    def parents(self) -> list[tuple[str, str]]:
+    def parents(self) -> list[ParentInfo]:
         """Returns a list of (node_type, node_name) tuples representing the hierarchy.
         The list is ordered from immediate parent to root."""
-        parents = []
+        parents: list[ParentInfo] = []
         current = self.node.parent
 
         while current:
@@ -135,7 +166,7 @@ def parents(self) -> list[tuple[str, str]]:
                     if child.type == 'identifier' or child.type == 'name':
                         name = child.text.decode('utf-8')
                         break
-                parents.append((current.type, name))
+                parents.append(ParentInfo(name, current.type))
             current = current.parent
 
         return parents
@@ -157,7 +188,10 @@ def associate_identifier_parts(captures: Iterable[CaptureInfo], lines: Sequence[
         capture_type = capture.capture_type.split('.')[-1]
         range_spec = capture.to_range_spec(lines)
         if capture_type == 'definition':
-            identifier_map[range_spec.start] = IdentifierBoundaries(range_spec)
+            identifier_map[range_spec.start] = IdentifierBoundaries(
+                whole=range_spec,
+                parents=capture.parents
+            )
 
         else:
             parent = find_parent_definition(capture.node)
@@ -190,48 +224,6 @@ def find_parent_definition(node):
     return None
 
 
-def _find_identifier(language, source: Sequence[str], tree, query_scm: dict[str, dict[str, str]], marker: Marker) -> IdentifierBoundaries | None:
-    """Finds an identifier in the source code using tree-sitter queries.
-
-    Args:
-        language: Tree-sitter language
-        source: List of source code lines
-        tree: Parsed tree-sitter tree
-        query_scm: Dictionary of queries for different identifier types
-        marker: Type, name and offset of the identifier to find
-
-    Returns:
-        IdentifierBoundaries with identifier IdentifierBoundaries with identifier start, body start, and end lines of the identifier
-    or None if not found
-    """
-    try:
-        candidates = language.query(query_scm[marker.type].format(name=marker.value)).captures(tree.root_node)
-        candidates: list[IdentifierBoundaries] = capture2identifier_boundaries(
-            candidates,
-            source
-        )
-    except Exception as e:
-        raise ValueError(f"Unable to capture nodes for {marker}: {e}") from e
-
-    candidate_count = len(candidates)
-    if not candidate_count:
-        return None
-    if candidate_count > 1 and marker.offset is None:
-        raise ValueError(
-            f"The {marker.type} identifier named `{marker.value}` is ambiguous (found {candidate_count} matches). "
-            f"Choose an `OFFSET` between 0 and {candidate_count - 1} to determine how many to skip. "
-            f"Example to reference the *last* `{marker.value}`: `OFFSET {candidate_count - 1}`"
-        )
-    if marker.offset and marker.offset >= candidate_count:
-        raise ValueError(
-            f"There are only {candidate_count} {marker.type} identifiers named `{marker.value}`, "
-            f"but 'OFFSET' was set to {marker.offset} (you can skip at most {candidate_count - 1} of those)"
-        )
-    candidates.sort(key=lambda x: x.whole.start)
-    result: IdentifierBoundaries = _get_by_offset(candidates, marker.offset or 0)
-    return result
-
-
 def capture2identifier_boundaries(captures, lines: Sequence[str]) -> list[IdentifierBoundaries]:
     """Converts raw tree-sitter captures to IdentifierBoundaries objects.
 
 
@@ -12,27 +12,27 @@ Here's the CEDARScript script:
 
 ```CEDARScript
 -- 1. Move the `_candidate()` method from the `A` class, placing it at the top level, just before the line where its class starts
-UPDATE CLASS "A" -- As method `calc1` exists in more than 1 class, we specify which one we want to reference
-  FROM FILE "main.py"
+UPDATE CLASS "A"
+FROM FILE "main.py"
 MOVE FUNCTION "_candidate"
 INSERT BEFORE CLASS "A"
-  RELATIVE INDENTATION 0;
+RELATIVE INDENTATION 0;
 
 -- 2. Update the copied function to remove references to `self`
 UPDATE FUNCTION "_candidate"
-  FROM FILE "main.py"
+FROM FILE "main.py"
 REPLACE LINE "def _candidate(self, a, b, c):"
 WITH CONTENT '''
 @0:def _candidate(a, b, c):
 ''';
 
 -- 3. Update ALL call sites of the method `_candidate()` to call the new top-level function with the same name
 UPDATE FUNCTION "_check"
-  FROM FILE "main.py"
+FROM FILE "main.py"
 REPLACE LINE 'self._candidate(' -- There's only 1 call site and this line marker is unambiguous, so it's more concise to just replace a LINE instead of a SEGMENT
 WITH CONTENT '''
 @0:_candidate(
 ''';
 ```
 </assistant>
-</train>
+</train>