108108
109109# Namespace cache version - bump major for incompatible format changes
110110# 1.0: Single-file cache format with atomic writes (meta + spec in one file)
111+ # Extended to include full analysis caching (keyword_references, variable_references, local_variable_assignments)
111112NAMESPACE_META_VERSION = "1.0"
112113
113114
@@ -167,6 +168,37 @@ class CachedVariablesEntry(_CachedEntryBase):
167168 variables : tuple ["ImportedVariableDefinition" , ...] = ()
168169
169170
171+ @dataclass (frozen = True )
172+ class KeywordRefKey :
173+ """Stable key for identifying keywords across cache sessions (immutable).
174+
175+ Uses minimal fields needed for unique identification:
176+ - source + line_no uniquely identifies a location in code
177+ - name ensures we match the right keyword at that location
178+ """
179+
180+ source : str # File path (empty string for builtins)
181+ name : str # Keyword name
182+ line_no : int # Line number (0 for builtins)
183+
184+
185+ @dataclass (frozen = True )
186+ class VariableRefKey :
187+ """Stable key for identifying variables across cache sessions (immutable).
188+
189+ Uses minimal fields needed for unique identification:
190+ - source + line_no + col_offset uniquely identifies a location
191+ - name ensures we match the right variable at that location
192+ - var_type distinguishes between different variable definition types
193+ """
194+
195+ source : str # File path
196+ name : str # Variable name (e.g., "${MY_VAR}")
197+ var_type : str # VariableDefinitionType.value
198+ line_no : int
199+ col_offset : int
200+
201+
170202@dataclass (frozen = True )
171203class NamespaceCacheData :
172204 """Serializable namespace state for disk caching (immutable)."""
@@ -189,6 +221,13 @@ class NamespaceCacheData:
189221 # Maps import index (in imports tuple) to set of locations where the import is referenced
190222 namespace_references : tuple [tuple [int , tuple [Location , ...]], ...] = ()
191223
224+ # Full analysis caching - keyword and variable references
225+ # When these are populated and fully_analyzed is True, analysis phase can be skipped entirely
226+ keyword_references : tuple [tuple [KeywordRefKey , tuple [Location , ...]], ...] = ()
227+ variable_references : tuple [tuple [VariableRefKey , tuple [Location , ...]], ...] = ()
228+ local_variable_assignments : tuple [tuple [VariableRefKey , tuple [Range , ...]], ...] = ()
229+ fully_analyzed : bool = False # True if full analysis data is cached
230+
192231
193232class DiagnosticsError (Exception ):
194233 pass
@@ -1171,6 +1210,67 @@ def _serialize_namespace_references(self) -> tuple[tuple[int, tuple[Location, ..
11711210
11721211 return tuple (result )
11731212
1213+ def _serialize_keyword_references (self ) -> tuple [tuple [KeywordRefKey , tuple [Location , ...]], ...]:
1214+ """Serialize _keyword_references for caching using stable keys.
1215+
1216+ Uses (source, name, line_no) as a stable key that survives cache sessions.
1217+ """
1218+ if self ._keyword_references is None :
1219+ return ()
1220+
1221+ result : list [tuple [KeywordRefKey , tuple [Location , ...]]] = []
1222+ for kw_doc , locations in self ._keyword_references .items ():
1223+ key = KeywordRefKey (
1224+ source = kw_doc .source or "" ,
1225+ name = kw_doc .name ,
1226+ line_no = kw_doc .line_no ,
1227+ )
1228+ result .append ((key , tuple (locations )))
1229+
1230+ return tuple (result )
1231+
1232+ def _serialize_variable_references (self ) -> tuple [tuple [VariableRefKey , tuple [Location , ...]], ...]:
1233+ """Serialize _variable_references for caching using stable keys.
1234+
1235+ Uses (source, name, var_type, line_no, col_offset) as a stable key.
1236+ """
1237+ if self ._variable_references is None :
1238+ return ()
1239+
1240+ result : list [tuple [VariableRefKey , tuple [Location , ...]]] = []
1241+ for var_def , locations in self ._variable_references .items ():
1242+ key = VariableRefKey (
1243+ source = var_def .source or "" ,
1244+ name = var_def .name ,
1245+ var_type = var_def .type .value ,
1246+ line_no = var_def .line_no ,
1247+ col_offset = var_def .col_offset ,
1248+ )
1249+ result .append ((key , tuple (locations )))
1250+
1251+ return tuple (result )
1252+
1253+ def _serialize_local_variable_assignments (self ) -> tuple [tuple [VariableRefKey , tuple [Range , ...]], ...]:
1254+ """Serialize _local_variable_assignments for caching using stable keys.
1255+
1256+ Uses the same key format as variable references.
1257+ """
1258+ if self ._local_variable_assignments is None :
1259+ return ()
1260+
1261+ result : list [tuple [VariableRefKey , tuple [Range , ...]]] = []
1262+ for var_def , ranges in self ._local_variable_assignments .items ():
1263+ key = VariableRefKey (
1264+ source = var_def .source or "" ,
1265+ name = var_def .name ,
1266+ var_type = var_def .type .value ,
1267+ line_no = var_def .line_no ,
1268+ col_offset = var_def .col_offset ,
1269+ )
1270+ result .append ((key , tuple (ranges )))
1271+
1272+ return tuple (result )
1273+
11741274 def to_cache_data (self ) -> NamespaceCacheData :
11751275 """Extract serializable state for disk caching."""
11761276 # Convert LibraryEntry -> CachedLibraryEntry
@@ -1252,6 +1352,11 @@ def to_cache_data(self) -> NamespaceCacheData:
12521352 ),
12531353 tag_definitions = tuple (self ._tag_definitions ) if self ._tag_definitions is not None else (),
12541354 namespace_references = self ._serialize_namespace_references (),
1355+ # Full analysis caching
1356+ keyword_references = self ._serialize_keyword_references () if self ._analyzed else (),
1357+ variable_references = self ._serialize_variable_references () if self ._analyzed else (),
1358+ local_variable_assignments = self ._serialize_local_variable_assignments () if self ._analyzed else (),
1359+ fully_analyzed = self ._analyzed ,
12551360 )
12561361
12571362 @classmethod
@@ -1464,6 +1569,163 @@ def _restore_namespace_references(
14641569 entry = index_to_entry [import_idx ]
14651570 ns ._namespace_references [entry ] = set (locations )
14661571
1572+ @classmethod
1573+ def _restore_keyword_references (
1574+ cls ,
1575+ ns : "Namespace" ,
1576+ cached_refs : tuple [tuple [KeywordRefKey , tuple [Location , ...]], ...],
1577+ ) -> dict [KeywordDoc , set [Location ]] | None :
1578+ """Restore _keyword_references from cached stable keys.
1579+
1580+ Returns None if >10% of references are missing (cache likely stale),
1581+ otherwise returns the restored dictionary.
1582+ """
1583+ if not cached_refs :
1584+ return {}
1585+
1586+ # Build O(1) lookup: KeywordRefKey -> KeywordDoc
1587+ lookup : dict [KeywordRefKey , KeywordDoc ] = {}
1588+
1589+ # Include keywords from all imported libraries
1590+ for entry in ns ._libraries .values ():
1591+ for kw in entry .library_doc .keywords :
1592+ key = KeywordRefKey (kw .source or "" , kw .name , kw .line_no )
1593+ lookup [key ] = kw
1594+
1595+ # Include keywords from all imported resources
1596+ for entry in ns ._resources .values ():
1597+ for kw in entry .library_doc .keywords :
1598+ key = KeywordRefKey (kw .source or "" , kw .name , kw .line_no )
1599+ lookup [key ] = kw
1600+
1601+ # Include own keywords if this file has a library_doc
1602+ if ns ._library_doc is not None :
1603+ for kw in ns ._library_doc .keywords :
1604+ key = KeywordRefKey (kw .source or "" , kw .name , kw .line_no )
1605+ lookup [key ] = kw
1606+
1607+ # Restore references with validation
1608+ result : dict [KeywordDoc , set [Location ]] = {}
1609+ missing = 0
1610+
1611+ for key , locations in cached_refs :
1612+ if key in lookup :
1613+ result [lookup [key ]] = set (locations )
1614+ else :
1615+ missing += 1
1616+
1617+ # If >10% missing, cache is likely stale - signal to recompute
1618+ if missing > len (cached_refs ) * 0.1 :
1619+ return None
1620+
1621+ return result
1622+
1623+ @classmethod
1624+ def _restore_variable_references (
1625+ cls ,
1626+ ns : "Namespace" ,
1627+ cached_refs : tuple [tuple [VariableRefKey , tuple [Location , ...]], ...],
1628+ ) -> dict [VariableDefinition , set [Location ]] | None :
1629+ """Restore _variable_references from cached stable keys.
1630+
1631+ Returns None if >10% of references are missing (cache likely stale),
1632+ otherwise returns the restored dictionary.
1633+ """
1634+ if not cached_refs :
1635+ return {}
1636+
1637+ # Build O(1) lookup: VariableRefKey -> VariableDefinition
1638+ lookup : dict [VariableRefKey , VariableDefinition ] = {}
1639+
1640+ # Include own variables
1641+ if ns ._own_variables is not None :
1642+ for var in ns ._own_variables :
1643+ key = VariableRefKey (
1644+ var .source or "" , var .name , var .type .value , var .line_no , var .col_offset
1645+ )
1646+ lookup [key ] = var
1647+
1648+ # Include variables from imported resources
1649+ for res_entry in ns ._resources .values ():
1650+ for var in res_entry .variables :
1651+ key = VariableRefKey (
1652+ var .source or "" , var .name , var .type .value , var .line_no , var .col_offset
1653+ )
1654+ lookup [key ] = var
1655+
1656+ # Include variables from variables imports
1657+ for var_entry in ns ._variables_imports .values ():
1658+ for var in var_entry .variables :
1659+ key = VariableRefKey (
1660+ var .source or "" , var .name , var .type .value , var .line_no , var .col_offset
1661+ )
1662+ lookup [key ] = var
1663+
1664+ # Restore references with validation
1665+ result : dict [VariableDefinition , set [Location ]] = {}
1666+ missing = 0
1667+
1668+ for key , locations in cached_refs :
1669+ if key in lookup :
1670+ result [lookup [key ]] = set (locations )
1671+ else :
1672+ missing += 1
1673+
1674+ # If >10% missing, cache is likely stale - signal to recompute
1675+ if missing > len (cached_refs ) * 0.1 :
1676+ return None
1677+
1678+ return result
1679+
1680+ @classmethod
1681+ def _restore_local_variable_assignments (
1682+ cls ,
1683+ ns : "Namespace" ,
1684+ cached_refs : tuple [tuple [VariableRefKey , tuple [Range , ...]], ...],
1685+ ) -> dict [VariableDefinition , set [Range ]] | None :
1686+ """Restore _local_variable_assignments from cached stable keys.
1687+
1688+ Returns None if >10% of assignments are missing (cache likely stale),
1689+ otherwise returns the restored dictionary.
1690+ """
1691+ if not cached_refs :
1692+ return {}
1693+
1694+ # Build O(1) lookup: VariableRefKey -> VariableDefinition
1695+ # Local variables are typically in own_variables
1696+ lookup : dict [VariableRefKey , VariableDefinition ] = {}
1697+
1698+ if ns ._own_variables is not None :
1699+ for var in ns ._own_variables :
1700+ key = VariableRefKey (
1701+ var .source or "" , var .name , var .type .value , var .line_no , var .col_offset
1702+ )
1703+ lookup [key ] = var
1704+
1705+ # Also check resources for local variables defined there
1706+ for res_entry in ns ._resources .values ():
1707+ for var in res_entry .variables :
1708+ key = VariableRefKey (
1709+ var .source or "" , var .name , var .type .value , var .line_no , var .col_offset
1710+ )
1711+ lookup [key ] = var
1712+
1713+ # Restore assignments with validation
1714+ result : dict [VariableDefinition , set [Range ]] = {}
1715+ missing = 0
1716+
1717+ for key , ranges in cached_refs :
1718+ if key in lookup :
1719+ result [lookup [key ]] = set (ranges )
1720+ else :
1721+ missing += 1
1722+
1723+ # If >10% missing, cache is likely stale - signal to recompute
1724+ if missing > len (cached_refs ) * 0.1 :
1725+ return None
1726+
1727+ return result
1728+
14671729 @classmethod
14681730 def from_cache_data (
14691731 cls ,
@@ -1517,13 +1779,35 @@ def from_cache_data(
15171779 # Mark as initialized
15181780 ns ._initialized = True
15191781
1520- # Restore cached diagnostics if available (analysis will still run to populate references)
1521- # Note: We don't set _analyzed=True because features like goto definition need
1522- # _variable_references and _keyword_references which aren't cached yet.
1523- # The analysis phase will run but initialization is cached, providing partial speedup.
1782+ # Restore cached diagnostics if available
15241783 if cache_data .analyzed and cache_data .diagnostics :
15251784 ns ._diagnostics = list (cache_data .diagnostics )
15261785
1786+ # Restore cached test case and tag definitions
1787+ if cache_data .test_case_definitions :
1788+ ns ._test_case_definitions = list (cache_data .test_case_definitions )
1789+ if cache_data .tag_definitions :
1790+ ns ._tag_definitions = list (cache_data .tag_definitions )
1791+
1792+ # Restore namespace references
1793+ if cache_data .namespace_references :
1794+ cls ._restore_namespace_references (ns , cache_data .namespace_references )
1795+
1796+ # Attempt full analysis restoration if available
1797+ # This allows skipping the analysis phase entirely on warm start
1798+ if cache_data .fully_analyzed :
1799+ keyword_refs = cls ._restore_keyword_references (ns , cache_data .keyword_references )
1800+ variable_refs = cls ._restore_variable_references (ns , cache_data .variable_references )
1801+ local_var_assigns = cls ._restore_local_variable_assignments (ns , cache_data .local_variable_assignments )
1802+
1803+ # Only set _analyzed=True if ALL references were restored successfully
1804+ # If any returned None (>10% missing), fall back to recomputing
1805+ if keyword_refs is not None and variable_refs is not None and local_var_assigns is not None :
1806+ ns ._keyword_references = keyword_refs
1807+ ns ._variable_references = variable_refs
1808+ ns ._local_variable_assignments = local_var_assigns
1809+ ns ._analyzed = True
1810+
15271811 return ns
15281812
15291813 class DataEntry (NamedTuple ):
0 commit comments