66"""
77
88import asyncio
9- from typing import Dict , List , Union
9+ from typing import TYPE_CHECKING , Dict , List , Union
1010
11- from datafog .processing .text_processing .regex_annotator .regex_annotator import (
12- RegexAnnotator ,
13- Span ,
14- )
11+ if TYPE_CHECKING :
12+ from datafog .processing .text_processing .regex_annotator .regex_annotator import Span
1513
1614
1715class TextService :
@@ -43,26 +41,58 @@ def __init__(self, text_chunk_length: int = 1000, engine: str = "regex"):
4341 """
4442 assert engine in {"regex" , "spacy" , "auto" }, "Invalid engine"
4543 self .engine = engine
46- self .regex_annotator = RegexAnnotator ()
4744 self .text_chunk_length = text_chunk_length
4845
49- # Only initialize spacy if needed and available
50- self .spacy_annotator = None
51- if engine in {"spacy" , "auto" }:
52- try :
53- from datafog .processing .text_processing .spacy_pii_annotator import (
54- SpacyPIIAnnotator ,
55- )
56-
57- self .spacy_annotator = SpacyPIIAnnotator .create ()
58- except ImportError :
59- if engine == "spacy" :
60- raise ImportError (
61- "SpaCy engine requires additional dependencies. "
62- "Install with: pip install datafog[nlp]"
63- )
64- # For auto mode, just continue with regex only
65- self .spacy_annotator = None
46+ # Lazy initialization - annotators created only when needed
47+ self ._regex_annotator = None
48+ self ._spacy_annotator = None
49+ self ._spacy_import_attempted = False
50+
51+ # For spacy-only mode, validate dependencies at init time
52+ if engine == "spacy" :
53+ self ._ensure_spacy_available ()
54+
55+ @property
56+ def regex_annotator (self ):
57+ """Lazy-loaded regex annotator."""
58+ if self ._regex_annotator is None :
59+ from datafog .processing .text_processing .regex_annotator .regex_annotator import (
60+ RegexAnnotator ,
61+ )
62+
63+ self ._regex_annotator = RegexAnnotator ()
64+ return self ._regex_annotator
65+
66+ @property
67+ def spacy_annotator (self ):
68+ """Lazy-loaded spaCy annotator."""
69+ if self ._spacy_annotator is None and not self ._spacy_import_attempted :
70+ self ._spacy_annotator = self ._create_spacy_annotator ()
71+ self ._spacy_import_attempted = True
72+ return self ._spacy_annotator
73+
74+ def _ensure_spacy_available (self ):
75+ """Ensure spaCy dependencies are available, raise ImportError if not."""
76+ try :
77+ from datafog .processing .text_processing .spacy_pii_annotator import ( # noqa: F401
78+ SpacyPIIAnnotator ,
79+ )
80+ except ImportError :
81+ raise ImportError (
82+ "SpaCy engine requires additional dependencies. "
83+ "Install with: pip install datafog[nlp]"
84+ )
85+
86+ def _create_spacy_annotator (self ):
87+ """Create spaCy annotator if dependencies are available."""
88+ try :
89+ from datafog .processing .text_processing .spacy_pii_annotator import (
90+ SpacyPIIAnnotator ,
91+ )
92+
93+ return SpacyPIIAnnotator .create ()
94+ except ImportError :
95+ return None
6696
6797 def _chunk_text (self , text : str ) -> List [str ]:
6898 """Split the text into chunks of specified length."""
@@ -85,7 +115,7 @@ def _combine_annotations(
85115
86116 def annotate_text_sync (
87117 self , text : str , structured : bool = False
88- ) -> Union [Dict [str , List [str ]], List [Span ]]:
118+ ) -> Union [Dict [str , List [str ]], List [" Span" ]]:
89119 """
90120 Annotate text synchronously for PII entities.
91121
@@ -162,7 +192,7 @@ def annotate_text_sync(
162192
163193 async def annotate_text_async (
164194 self , text : str , structured : bool = False
165- ) -> Union [Dict [str , List [str ]], List [Span ]]:
195+ ) -> Union [Dict [str , List [str ]], List [" Span" ]]:
166196 """
167197 Annotate text asynchronously for PII entities.
168198
0 commit comments