Skip to content

Commit 020fb76

Browse files
committed
perf(robot): cache full analysis state for faster warm starts
Extend namespace disk caching to include keyword references, variable references, and local variable assignments. This allows the analysis phase to be completely skipped when loading from a valid cache. Key changes: - Add KeywordRefKey and VariableRefKey stable keys for serialization - Serialize/restore keyword_references, variable_references, and local_variable_assignments in namespace cache - Implement 10% staleness threshold: if >10% of cached references cannot be resolved, fall back to fresh analysis - Track references when loading fully-analyzed namespaces from cache
1 parent 59e0314 commit 020fb76

File tree

2 files changed

+292
-4
lines changed

2 files changed

+292
-4
lines changed

packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,10 @@ def __get_namespace_for_document_type(
887887
# Mark as initialized in document data and track imports
888888
document.set_data(self.INITIALIZED_NAMESPACE, cached)
889889
self._track_imports(document, cached)
890+
# If fully analyzed from cache, also track references
891+
# (since has_analysed event won't fire for already-analyzed namespaces)
892+
if cached._analyzed:
893+
self._track_references(document, cached)
890894
return cached
891895

892896
# Cache miss - create new namespace

packages/robot/src/robotcode/robot/diagnostics/namespace.py

Lines changed: 288 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108

109109
# Namespace cache version - bump major for incompatible format changes
110110
# 1.0: Single-file cache format with atomic writes (meta + spec in one file)
111+
# Extended to include full analysis caching (keyword_references, variable_references, local_variable_assignments)
111112
NAMESPACE_META_VERSION = "1.0"
112113

113114

@@ -167,6 +168,37 @@ class CachedVariablesEntry(_CachedEntryBase):
167168
variables: tuple["ImportedVariableDefinition", ...] = ()
168169

169170

171+
@dataclass(frozen=True)
172+
class KeywordRefKey:
173+
"""Stable key for identifying keywords across cache sessions (immutable).
174+
175+
Uses minimal fields needed for unique identification:
176+
- source + line_no uniquely identifies a location in code
177+
- name ensures we match the right keyword at that location
178+
"""
179+
180+
source: str # File path (empty string for builtins)
181+
name: str # Keyword name
182+
line_no: int # Line number (0 for builtins)
183+
184+
185+
@dataclass(frozen=True)
186+
class VariableRefKey:
187+
"""Stable key for identifying variables across cache sessions (immutable).
188+
189+
Uses minimal fields needed for unique identification:
190+
- source + line_no + col_offset uniquely identifies a location
191+
- name ensures we match the right variable at that location
192+
- var_type distinguishes between different variable definition types
193+
"""
194+
195+
source: str # File path
196+
name: str # Variable name (e.g., "${MY_VAR}")
197+
var_type: str # VariableDefinitionType.value
198+
line_no: int
199+
col_offset: int
200+
201+
170202
@dataclass(frozen=True)
171203
class NamespaceCacheData:
172204
"""Serializable namespace state for disk caching (immutable)."""
@@ -189,6 +221,13 @@ class NamespaceCacheData:
189221
# Maps import index (in imports tuple) to set of locations where the import is referenced
190222
namespace_references: tuple[tuple[int, tuple[Location, ...]], ...] = ()
191223

224+
# Full analysis caching - keyword and variable references
225+
# When these are populated and fully_analyzed is True, analysis phase can be skipped entirely
226+
keyword_references: tuple[tuple[KeywordRefKey, tuple[Location, ...]], ...] = ()
227+
variable_references: tuple[tuple[VariableRefKey, tuple[Location, ...]], ...] = ()
228+
local_variable_assignments: tuple[tuple[VariableRefKey, tuple[Range, ...]], ...] = ()
229+
fully_analyzed: bool = False # True if full analysis data is cached
230+
192231

193232
class DiagnosticsError(Exception):
194233
pass
@@ -1171,6 +1210,67 @@ def _serialize_namespace_references(self) -> tuple[tuple[int, tuple[Location, ..
11711210

11721211
return tuple(result)
11731212

1213+
def _serialize_keyword_references(self) -> tuple[tuple[KeywordRefKey, tuple[Location, ...]], ...]:
1214+
"""Serialize _keyword_references for caching using stable keys.
1215+
1216+
Uses (source, name, line_no) as a stable key that survives cache sessions.
1217+
"""
1218+
if self._keyword_references is None:
1219+
return ()
1220+
1221+
result: list[tuple[KeywordRefKey, tuple[Location, ...]]] = []
1222+
for kw_doc, locations in self._keyword_references.items():
1223+
key = KeywordRefKey(
1224+
source=kw_doc.source or "",
1225+
name=kw_doc.name,
1226+
line_no=kw_doc.line_no,
1227+
)
1228+
result.append((key, tuple(locations)))
1229+
1230+
return tuple(result)
1231+
1232+
def _serialize_variable_references(self) -> tuple[tuple[VariableRefKey, tuple[Location, ...]], ...]:
1233+
"""Serialize _variable_references for caching using stable keys.
1234+
1235+
Uses (source, name, var_type, line_no, col_offset) as a stable key.
1236+
"""
1237+
if self._variable_references is None:
1238+
return ()
1239+
1240+
result: list[tuple[VariableRefKey, tuple[Location, ...]]] = []
1241+
for var_def, locations in self._variable_references.items():
1242+
key = VariableRefKey(
1243+
source=var_def.source or "",
1244+
name=var_def.name,
1245+
var_type=var_def.type.value,
1246+
line_no=var_def.line_no,
1247+
col_offset=var_def.col_offset,
1248+
)
1249+
result.append((key, tuple(locations)))
1250+
1251+
return tuple(result)
1252+
1253+
def _serialize_local_variable_assignments(self) -> tuple[tuple[VariableRefKey, tuple[Range, ...]], ...]:
1254+
"""Serialize _local_variable_assignments for caching using stable keys.
1255+
1256+
Uses the same key format as variable references.
1257+
"""
1258+
if self._local_variable_assignments is None:
1259+
return ()
1260+
1261+
result: list[tuple[VariableRefKey, tuple[Range, ...]]] = []
1262+
for var_def, ranges in self._local_variable_assignments.items():
1263+
key = VariableRefKey(
1264+
source=var_def.source or "",
1265+
name=var_def.name,
1266+
var_type=var_def.type.value,
1267+
line_no=var_def.line_no,
1268+
col_offset=var_def.col_offset,
1269+
)
1270+
result.append((key, tuple(ranges)))
1271+
1272+
return tuple(result)
1273+
11741274
def to_cache_data(self) -> NamespaceCacheData:
11751275
"""Extract serializable state for disk caching."""
11761276
# Convert LibraryEntry -> CachedLibraryEntry
@@ -1252,6 +1352,11 @@ def to_cache_data(self) -> NamespaceCacheData:
12521352
),
12531353
tag_definitions=tuple(self._tag_definitions) if self._tag_definitions is not None else (),
12541354
namespace_references=self._serialize_namespace_references(),
1355+
# Full analysis caching
1356+
keyword_references=self._serialize_keyword_references() if self._analyzed else (),
1357+
variable_references=self._serialize_variable_references() if self._analyzed else (),
1358+
local_variable_assignments=self._serialize_local_variable_assignments() if self._analyzed else (),
1359+
fully_analyzed=self._analyzed,
12551360
)
12561361

12571362
@classmethod
@@ -1464,6 +1569,163 @@ def _restore_namespace_references(
14641569
entry = index_to_entry[import_idx]
14651570
ns._namespace_references[entry] = set(locations)
14661571

1572+
@classmethod
1573+
def _restore_keyword_references(
1574+
cls,
1575+
ns: "Namespace",
1576+
cached_refs: tuple[tuple[KeywordRefKey, tuple[Location, ...]], ...],
1577+
) -> dict[KeywordDoc, set[Location]] | None:
1578+
"""Restore _keyword_references from cached stable keys.
1579+
1580+
Returns None if >10% of references are missing (cache likely stale),
1581+
otherwise returns the restored dictionary.
1582+
"""
1583+
if not cached_refs:
1584+
return {}
1585+
1586+
# Build O(1) lookup: KeywordRefKey -> KeywordDoc
1587+
lookup: dict[KeywordRefKey, KeywordDoc] = {}
1588+
1589+
# Include keywords from all imported libraries
1590+
for entry in ns._libraries.values():
1591+
for kw in entry.library_doc.keywords:
1592+
key = KeywordRefKey(kw.source or "", kw.name, kw.line_no)
1593+
lookup[key] = kw
1594+
1595+
# Include keywords from all imported resources
1596+
for entry in ns._resources.values():
1597+
for kw in entry.library_doc.keywords:
1598+
key = KeywordRefKey(kw.source or "", kw.name, kw.line_no)
1599+
lookup[key] = kw
1600+
1601+
# Include own keywords if this file has a library_doc
1602+
if ns._library_doc is not None:
1603+
for kw in ns._library_doc.keywords:
1604+
key = KeywordRefKey(kw.source or "", kw.name, kw.line_no)
1605+
lookup[key] = kw
1606+
1607+
# Restore references with validation
1608+
result: dict[KeywordDoc, set[Location]] = {}
1609+
missing = 0
1610+
1611+
for key, locations in cached_refs:
1612+
if key in lookup:
1613+
result[lookup[key]] = set(locations)
1614+
else:
1615+
missing += 1
1616+
1617+
# If >10% missing, cache is likely stale - signal to recompute
1618+
if missing > len(cached_refs) * 0.1:
1619+
return None
1620+
1621+
return result
1622+
1623+
@classmethod
1624+
def _restore_variable_references(
1625+
cls,
1626+
ns: "Namespace",
1627+
cached_refs: tuple[tuple[VariableRefKey, tuple[Location, ...]], ...],
1628+
) -> dict[VariableDefinition, set[Location]] | None:
1629+
"""Restore _variable_references from cached stable keys.
1630+
1631+
Returns None if >10% of references are missing (cache likely stale),
1632+
otherwise returns the restored dictionary.
1633+
"""
1634+
if not cached_refs:
1635+
return {}
1636+
1637+
# Build O(1) lookup: VariableRefKey -> VariableDefinition
1638+
lookup: dict[VariableRefKey, VariableDefinition] = {}
1639+
1640+
# Include own variables
1641+
if ns._own_variables is not None:
1642+
for var in ns._own_variables:
1643+
key = VariableRefKey(
1644+
var.source or "", var.name, var.type.value, var.line_no, var.col_offset
1645+
)
1646+
lookup[key] = var
1647+
1648+
# Include variables from imported resources
1649+
for res_entry in ns._resources.values():
1650+
for var in res_entry.variables:
1651+
key = VariableRefKey(
1652+
var.source or "", var.name, var.type.value, var.line_no, var.col_offset
1653+
)
1654+
lookup[key] = var
1655+
1656+
# Include variables from variables imports
1657+
for var_entry in ns._variables_imports.values():
1658+
for var in var_entry.variables:
1659+
key = VariableRefKey(
1660+
var.source or "", var.name, var.type.value, var.line_no, var.col_offset
1661+
)
1662+
lookup[key] = var
1663+
1664+
# Restore references with validation
1665+
result: dict[VariableDefinition, set[Location]] = {}
1666+
missing = 0
1667+
1668+
for key, locations in cached_refs:
1669+
if key in lookup:
1670+
result[lookup[key]] = set(locations)
1671+
else:
1672+
missing += 1
1673+
1674+
# If >10% missing, cache is likely stale - signal to recompute
1675+
if missing > len(cached_refs) * 0.1:
1676+
return None
1677+
1678+
return result
1679+
1680+
@classmethod
1681+
def _restore_local_variable_assignments(
1682+
cls,
1683+
ns: "Namespace",
1684+
cached_refs: tuple[tuple[VariableRefKey, tuple[Range, ...]], ...],
1685+
) -> dict[VariableDefinition, set[Range]] | None:
1686+
"""Restore _local_variable_assignments from cached stable keys.
1687+
1688+
Returns None if >10% of assignments are missing (cache likely stale),
1689+
otherwise returns the restored dictionary.
1690+
"""
1691+
if not cached_refs:
1692+
return {}
1693+
1694+
# Build O(1) lookup: VariableRefKey -> VariableDefinition
1695+
# Local variables are typically in own_variables
1696+
lookup: dict[VariableRefKey, VariableDefinition] = {}
1697+
1698+
if ns._own_variables is not None:
1699+
for var in ns._own_variables:
1700+
key = VariableRefKey(
1701+
var.source or "", var.name, var.type.value, var.line_no, var.col_offset
1702+
)
1703+
lookup[key] = var
1704+
1705+
# Also check resources for local variables defined there
1706+
for res_entry in ns._resources.values():
1707+
for var in res_entry.variables:
1708+
key = VariableRefKey(
1709+
var.source or "", var.name, var.type.value, var.line_no, var.col_offset
1710+
)
1711+
lookup[key] = var
1712+
1713+
# Restore assignments with validation
1714+
result: dict[VariableDefinition, set[Range]] = {}
1715+
missing = 0
1716+
1717+
for key, ranges in cached_refs:
1718+
if key in lookup:
1719+
result[lookup[key]] = set(ranges)
1720+
else:
1721+
missing += 1
1722+
1723+
# If >10% missing, cache is likely stale - signal to recompute
1724+
if missing > len(cached_refs) * 0.1:
1725+
return None
1726+
1727+
return result
1728+
14671729
@classmethod
14681730
def from_cache_data(
14691731
cls,
@@ -1517,13 +1779,35 @@ def from_cache_data(
15171779
# Mark as initialized
15181780
ns._initialized = True
15191781

1520-
# Restore cached diagnostics if available (analysis will still run to populate references)
1521-
# Note: We don't set _analyzed=True because features like goto definition need
1522-
# _variable_references and _keyword_references which aren't cached yet.
1523-
# The analysis phase will run but initialization is cached, providing partial speedup.
1782+
# Restore cached diagnostics if available
15241783
if cache_data.analyzed and cache_data.diagnostics:
15251784
ns._diagnostics = list(cache_data.diagnostics)
15261785

1786+
# Restore cached test case and tag definitions
1787+
if cache_data.test_case_definitions:
1788+
ns._test_case_definitions = list(cache_data.test_case_definitions)
1789+
if cache_data.tag_definitions:
1790+
ns._tag_definitions = list(cache_data.tag_definitions)
1791+
1792+
# Restore namespace references
1793+
if cache_data.namespace_references:
1794+
cls._restore_namespace_references(ns, cache_data.namespace_references)
1795+
1796+
# Attempt full analysis restoration if available
1797+
# This allows skipping the analysis phase entirely on warm start
1798+
if cache_data.fully_analyzed:
1799+
keyword_refs = cls._restore_keyword_references(ns, cache_data.keyword_references)
1800+
variable_refs = cls._restore_variable_references(ns, cache_data.variable_references)
1801+
local_var_assigns = cls._restore_local_variable_assignments(ns, cache_data.local_variable_assignments)
1802+
1803+
# Only set _analyzed=True if ALL references were restored successfully
1804+
# If any returned None (>10% missing), fall back to recomputing
1805+
if keyword_refs is not None and variable_refs is not None and local_var_assigns is not None:
1806+
ns._keyword_references = keyword_refs
1807+
ns._variable_references = variable_refs
1808+
ns._local_variable_assignments = local_var_assigns
1809+
ns._analyzed = True
1810+
15271811
return ns
15281812

15291813
class DataEntry(NamedTuple):

0 commit comments

Comments
 (0)