dkpro · dmnc-grdnr · Aug 10, 2025 · Nov 10, 2025 · Dec 21, 2025 · Copilot
diff --git a/cassis/cas.py b/cassis/cas.py
@@ -367,6 +367,38 @@ def add_annotations(self, annotations: Iterable[FeatureStructure]):
         """
         self.add_all(annotations)
 
+    def cut_sofa_string_to_range(self, sofa_begin:int, sofa_end:int, overlap=True):
-    def cut_sofa_string_to_range(self, sofa_begin:int, sofa_end:int, overlap=True):
+    def cut_sofa_string_to_range(self, sofa_begin: int, sofa_end: int, overlap=True):
-    def cut_sofa_string_to_range(self, sofa_begin:int, sofa_end:int, overlap=True):
+    def cut_sofa_string_to_range(self, sofa_begin: int, sofa_end: int, overlap=True):
+        """Replaces current sofa string with a cutout of the given range. Removes all annotations outside of range,
+            but keeps annotations that overlap with cutout points by default.
+
+                Args:
+                    sofa_begin: The beginning of the cutout sofa.
+                    sofa_end: The end of the cutout sofa.
+                    overlap: If true, keeps overlapping annotations and modifies begin and end of annotation accordingly.
+
+                Raises:
+                    ValueError: If cutout indices are invalid.
+                """
+        if 0 <= sofa_begin < sofa_end <= len(self.sofa_string):
+            self.sofa_string = self.sofa_string[sofa_begin:sofa_end]
+            for annotation in self.select_all():
+                if sofa_begin <= annotation.begin and annotation.end <= sofa_end:
+                    annotation.begin = annotation.begin - sofa_begin
+                    annotation.end = annotation.end - sofa_begin
+                elif overlap and sofa_begin < annotation.end <= sofa_end:
+                    annotation.begin = 0
+                    annotation.end = sofa_end - sofa_begin
-                    annotation.end = sofa_end - sofa_begin
+                    annotation.end = annotation.end - sofa_begin
-                    annotation.end = sofa_end - sofa_begin
+                    annotation.end = annotation.end - sofa_begin
+                elif overlap and sofa_begin <= annotation.begin < sofa_end:
+                    annotation.begin = annotation.begin - sofa_begin
+                    annotation.end = len(self.sofa_string)
+                elif overlap and annotation.begin <= sofa_begin and sofa_end <= annotation.end:
+                    annotation.begin = 0
+                    annotation.end = len(self.sofa_string)
+                else:
+                    self.remove(annotation)
+        else:
+            raise ValueError(f"Invalid indices for begin {sofa_begin} and end {sofa_end}")
+
     def remove(self, annotation: FeatureStructure):
         """Removes an annotation from an index. This throws if the
         annotation was not present.
@@ -386,6 +418,26 @@ def remove_annotation(self, annotation: FeatureStructure):
         """
         self.remove(annotation)
 
+
+    def remove_in_range(self, cut_begin:int, cut_end:int, type_: Union[Type, str]=None):
-    def remove_in_range(self, cut_begin:int, cut_end:int, type_: Union[Type, str]=None):
+    def remove_in_range(self, cut_begin: int, cut_end: int, type_: Union[Type, str] = None):
-    def remove_in_range(self, cut_begin:int, cut_end:int, type_: Union[Type, str]=None):
+    def remove_in_range(self, cut_begin: int, cut_end: int, type_: Union[Type, str] = None):
+        """Removes annotations between two indices of the sofa string.
+
+        Args:
+            cut_begin: The beginning of the cutting interval.
+            cut_end: The end of the cutting interval.
+            type_: The type or name of the type name whose annotation instances are to be found
+        Raises:
+            ValueError: If range indices are invalid or annotation type_ not found.
+        """
+
+        annotations = self.select_all() if type_ is None else self.select(type_)
+        if 0 <= cut_begin < cut_end <= len(self.sofa_string):
+            for annotation in annotations:
+                if cut_begin <= annotation.begin < annotation.end <= cut_end:
+                    self.remove(annotation)
+        else:
+            raise ValueError(f"Invalid indices for begin {cut_begin} and end {cut_end}")
+
     @deprecation.deprecated(details="Use annotation.get_covered_text()")
     def get_covered_text(self, annotation: FeatureStructure) -> str:
         """Gets the text that is covered by `annotation`.

diff --git a/tests/test_cas.py b/tests/test_cas.py
@@ -604,3 +604,78 @@ def test_covered_text_on_annotation_without_sofa():
 
     with pytest.raises(AnnotationHasNoSofa):
         ann.get_covered_text()
+
+
+def test_remove_in_range(small_typesystem_xml, small_xmi):
+    typesystem = load_typesystem(small_typesystem_xml)
+    cas = load_cas_from_xmi(small_xmi, typesystem)
+
+    begin = 10
+    end = 20
+
+    expected_leftover_annotations = [annotation for annotation in cas.select_all()
+                                     if not (begin <= annotation.begin < annotation.end <= end)]
+
+    cas.remove_in_range(begin, end)
+
+    result_leftover_annotations = cas.select_all()
+
+    assert len(result_leftover_annotations) == len(expected_leftover_annotations)
+
+    for annotation in expected_leftover_annotations:
+        assert annotation in result_leftover_annotations
+
+def test_remove_in_range_with_type(small_typesystem_xml, small_xmi):
+    typesystem = load_typesystem(small_typesystem_xml)
+    cas = load_cas_from_xmi(small_xmi, typesystem)
+
+    begin = 0
+    end = 27
+    type_ = 'cassis.Token'
+    expected_leftover_annotations = [annotation for annotation in cas.select_all()
+                                     if not (begin <= annotation.begin < annotation.end <= end
+                                     and annotation.type.name == type_)]
+
+    cas.remove_in_range(begin, end, type_)
+
+    result_leftover_annotations = cas.select_all()
+
+    assert len(result_leftover_annotations) == len(expected_leftover_annotations)
+
+    for annotation in expected_leftover_annotations:
+        assert annotation in result_leftover_annotations
+        if begin <= annotation.begin < annotation.end <= end:
+            assert annotation.type.name != type_
+
+
+def test_cut_sofa_string_to_range(small_typesystem_xml, small_xmi):
+    typesystem = load_typesystem(small_typesystem_xml)
+    cas = load_cas_from_xmi(small_xmi, typesystem)
+
+    begin = 10
+    end = 20
+
+    expected_leftover_annotations = [annotation for annotation in cas.select_all()
+                                     if (begin <= annotation.begin < end)
+                                        or (annotation.begin < begin < end <= annotation.end)]
-                                        or (annotation.begin < begin < end <= annotation.end)]
+                                        or (begin < annotation.end <= end)
+                                        or (annotation.begin < begin and annotation.end > end)]
-                                        or (annotation.begin < begin < end <= annotation.end)]
+                                        or (begin < annotation.end <= end)
+                                        or (annotation.begin < begin and annotation.end > end)]
+
+    cas.cut_sofa_string_to_range(begin, end)
+
+    assert len(cas.select_all()) == len(expected_leftover_annotations)
+
+
+def test_cut_sofa_string_to_range_no_overlap(small_typesystem_xml, small_xmi):
+    typesystem = load_typesystem(small_typesystem_xml)
+    cas = load_cas_from_xmi(small_xmi, typesystem)
+
+    begin = 10
+    end = 20
+
+    expected_leftover_annotations = [annotation for annotation in cas.select_all()
+                                     if begin <= annotation.begin < annotation.end <= end]
+
+    cas.cut_sofa_string_to_range(begin, end, overlap=False)
+
+    assert len(cas.select_all()) == len(expected_leftover_annotations)
+
+    print(cas.sofa_string)
-    print(cas.sofa_string)
-    print(cas.sofa_string)