Skip to content

Commit af2170a

Browse files
committed
feat(robot): add namespace disk caching with atomic writes and tests
Add persistent namespace caching to significantly improve warm start performance. Cached namespaces are loaded from disk instead of being rebuilt from scratch. Key changes: - Add NamespaceMetaData and NamespaceCacheData frozen dataclasses for cache serialization with validation fields (mtime, content_hash, python_executable, sys_path_hash) - Add atomic cache writes using temp file + rename pattern - Add reverse dependency tracking for efficient library/variable change propagation (get_library_users, get_variables_users) - Skip content hash computation when mtime AND size match - Add ResourceMetaData for resource caching Tests: - Unit tests for PickleDataCache atomic writes (28 tests) - Unit tests for NamespaceMetaData and cached entries (20 tests) - Unit tests for ResourceMetaData cache keys (15 tests) - Integration tests for namespace caching behavior (11 tests)
1 parent a4c5d02 commit af2170a

File tree

9 files changed

+2263
-159
lines changed

9 files changed

+2263
-159
lines changed

packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py

Lines changed: 79 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,35 @@ def __init__(self, parent: "RobotLanguageServerProtocol") -> None:
4545
self.parent.diagnostics.collect.add(self.collect_unused_keyword_references)
4646
self.parent.diagnostics.collect.add(self.collect_unused_variable_references)
4747

48-
self.parent.diagnostics.on_get_related_documents.add(self._on_get_related_documents)
48+
self.parent.diagnostics.on_get_related_documents.add(
49+
self._on_get_related_documents
50+
)
4951

5052
def _on_initialized(self, sender: Any) -> None:
5153
self.parent.diagnostics.analyze.add(self.analyze)
52-
self.parent.documents_cache.namespace_initialized(self._on_namespace_initialized)
54+
self.parent.documents_cache.namespace_initialized(
55+
self._on_namespace_initialized
56+
)
5357
self.parent.documents_cache.libraries_changed.add(self._on_libraries_changed)
5458
self.parent.documents_cache.variables_changed.add(self._on_variables_changed)
5559

5660
def _on_libraries_changed(self, sender: Any, libraries: List[LibraryDoc]) -> None:
57-
for doc in self.parent.documents.documents:
58-
namespace = self.parent.documents_cache.get_only_initialized_namespace(doc)
59-
if namespace is not None:
60-
lib_docs = (e.library_doc for e in namespace.get_libraries().values())
61-
if any(lib_doc in lib_docs for lib_doc in libraries):
62-
self.parent.diagnostics.force_refresh_document(doc)
61+
docs_to_refresh: set[TextDocument] = set()
62+
for lib_doc in libraries:
63+
docs_to_refresh.update(
64+
self.parent.documents_cache.get_library_users(lib_doc)
65+
)
66+
for doc in docs_to_refresh:
67+
self.parent.diagnostics.force_refresh_document(doc)
6368

6469
def _on_variables_changed(self, sender: Any, variables: List[LibraryDoc]) -> None:
65-
for doc in self.parent.documents.documents:
66-
namespace = self.parent.documents_cache.get_only_initialized_namespace(doc)
67-
if namespace is not None:
68-
lib_docs = (e.library_doc for e in namespace.get_variables_imports().values())
69-
if any(lib_doc in lib_docs for lib_doc in variables):
70-
self.parent.diagnostics.force_refresh_document(doc)
70+
docs_to_refresh: set[TextDocument] = set()
71+
for var_doc in variables:
72+
docs_to_refresh.update(
73+
self.parent.documents_cache.get_variables_users(var_doc)
74+
)
75+
for doc in docs_to_refresh:
76+
self.parent.diagnostics.force_refresh_document(doc)
7177

7278
@language_id("robotframework")
7379
def analyze(self, sender: Any, document: TextDocument) -> None:
@@ -79,41 +85,35 @@ def _on_namespace_initialized(self, sender: Any, namespace: Namespace) -> None:
7985
self.parent.diagnostics.force_refresh_document(namespace.document)
8086

8187
@language_id("robotframework")
82-
def _on_get_related_documents(self, sender: Any, document: TextDocument) -> Optional[List[TextDocument]]:
88+
def _on_get_related_documents(
89+
self, sender: Any, document: TextDocument
90+
) -> Optional[List[TextDocument]]:
8391
namespace = self.parent.documents_cache.get_only_initialized_namespace(document)
8492
if namespace is None:
8593
return None
94+
source = str(document.uri.to_path())
95+
return self.parent.documents_cache.get_importers(source)
8696

87-
result = []
88-
89-
lib_doc = namespace.get_library_doc()
90-
for doc in self.parent.documents.documents:
91-
if doc.language_id != "robotframework":
92-
continue
93-
94-
doc_namespace = self.parent.documents_cache.get_only_initialized_namespace(doc)
95-
if doc_namespace is None:
96-
continue
97-
98-
if doc_namespace.is_analyzed():
99-
for ref in doc_namespace.get_namespace_references():
100-
if ref.library_doc == lib_doc:
101-
result.append(doc)
102-
103-
return result
104-
105-
def modify_diagnostics(self, document: TextDocument, diagnostics: List[Diagnostic]) -> List[Diagnostic]:
106-
return self.parent.documents_cache.get_diagnostic_modifier(document).modify_diagnostics(diagnostics)
97+
def modify_diagnostics(
98+
self, document: TextDocument, diagnostics: List[Diagnostic]
99+
) -> List[Diagnostic]:
100+
return self.parent.documents_cache.get_diagnostic_modifier(
101+
document
102+
).modify_diagnostics(diagnostics)
107103

108104
@language_id("robotframework")
109105
def collect_namespace_diagnostics(
110-
self, sender: Any, document: TextDocument, diagnostics_type: DiagnosticsCollectType
106+
self,
107+
sender: Any,
108+
document: TextDocument,
109+
diagnostics_type: DiagnosticsCollectType,
111110
) -> DiagnosticsResult:
112111
try:
113112
namespace = self.parent.documents_cache.get_namespace(document)
114113

115114
return DiagnosticsResult(
116-
self.collect_namespace_diagnostics, self.modify_diagnostics(document, namespace.get_diagnostics())
115+
self.collect_namespace_diagnostics,
116+
self.modify_diagnostics(document, namespace.get_diagnostics()),
117117
)
118118
except (CancelledError, SystemExit, KeyboardInterrupt):
119119
raise
@@ -141,7 +141,10 @@ def collect_namespace_diagnostics(
141141
@language_id("robotframework")
142142
@_logger.call
143143
def collect_unused_keyword_references(
144-
self, sender: Any, document: TextDocument, diagnostics_type: DiagnosticsCollectType
144+
self,
145+
sender: Any,
146+
document: TextDocument,
147+
diagnostics_type: DiagnosticsCollectType,
145148
) -> DiagnosticsResult:
146149
config = self.parent.workspace.get_configuration(AnalysisConfig, document.uri)
147150

@@ -153,15 +156,19 @@ def collect_unused_keyword_references(
153156

154157
return self._collect_unused_keyword_references(document)
155158

156-
def _collect_unused_keyword_references(self, document: TextDocument) -> DiagnosticsResult:
159+
def _collect_unused_keyword_references(
160+
self, document: TextDocument
161+
) -> DiagnosticsResult:
157162
try:
158163
namespace = self.parent.documents_cache.get_namespace(document)
159164

160165
result: List[Diagnostic] = []
161166
for kw in (namespace.get_library_doc()).keywords.values():
162167
check_current_task_canceled()
163168

164-
references = self.parent.robot_references.find_keyword_references(document, kw, False, True)
169+
references = self.parent.robot_references.find_keyword_references(
170+
document, kw, False, True
171+
)
165172
if not references:
166173
result.append(
167174
Diagnostic(
@@ -174,7 +181,10 @@ def _collect_unused_keyword_references(self, document: TextDocument) -> Diagnost
174181
)
175182
)
176183

177-
return DiagnosticsResult(self.collect_unused_keyword_references, self.modify_diagnostics(document, result))
184+
return DiagnosticsResult(
185+
self.collect_unused_keyword_references,
186+
self.modify_diagnostics(document, result),
187+
)
178188
except (CancelledError, SystemExit, KeyboardInterrupt):
179189
raise
180190
except BaseException as e:
@@ -200,19 +210,26 @@ def _collect_unused_keyword_references(self, document: TextDocument) -> Diagnost
200210
@language_id("robotframework")
201211
@_logger.call
202212
def collect_unused_variable_references(
203-
self, sender: Any, document: TextDocument, diagnostics_type: DiagnosticsCollectType
213+
self,
214+
sender: Any,
215+
document: TextDocument,
216+
diagnostics_type: DiagnosticsCollectType,
204217
) -> DiagnosticsResult:
205218
config = self.parent.workspace.get_configuration(AnalysisConfig, document.uri)
206219

207220
if not config.find_unused_references:
208221
return DiagnosticsResult(self.collect_unused_variable_references, [])
209222

210223
if diagnostics_type != DiagnosticsCollectType.SLOW:
211-
return DiagnosticsResult(self.collect_unused_variable_references, None, True)
224+
return DiagnosticsResult(
225+
self.collect_unused_variable_references, None, True
226+
)
212227

213228
return self._collect_unused_variable_references(document)
214229

215-
def _collect_unused_variable_references(self, document: TextDocument) -> DiagnosticsResult:
230+
def _collect_unused_variable_references(
231+
self, document: TextDocument
232+
) -> DiagnosticsResult:
216233
try:
217234
namespace = self.parent.documents_cache.get_namespace(document)
218235

@@ -222,14 +239,25 @@ def _collect_unused_variable_references(self, document: TextDocument) -> Diagnos
222239
check_current_task_canceled()
223240

224241
if isinstance(
225-
var, (LibraryArgumentDefinition, EnvironmentVariableDefinition, GlobalVariableDefinition)
242+
var,
243+
(
244+
LibraryArgumentDefinition,
245+
EnvironmentVariableDefinition,
246+
GlobalVariableDefinition,
247+
),
226248
):
227249
continue
228250

229-
if var.name_token is not None and var.name_token.value and var.name_token.value.startswith("_"):
251+
if (
252+
var.name_token is not None
253+
and var.name_token.value
254+
and var.name_token.value.startswith("_")
255+
):
230256
continue
231257

232-
references = self.parent.robot_references.find_variable_references(document, var, False, True)
258+
references = self.parent.robot_references.find_variable_references(
259+
document, var, False, True
260+
)
233261
if not references:
234262
result.append(
235263
Diagnostic(
@@ -243,7 +271,10 @@ def _collect_unused_variable_references(self, document: TextDocument) -> Diagnos
243271
)
244272
)
245273

246-
return DiagnosticsResult(self.collect_unused_variable_references, self.modify_diagnostics(document, result))
274+
return DiagnosticsResult(
275+
self.collect_unused_variable_references,
276+
self.modify_diagnostics(document, result),
277+
)
247278
except (CancelledError, SystemExit, KeyboardInterrupt):
248279
raise
249280
except BaseException as e:

packages/robot/src/robotcode/robot/diagnostics/data_cache.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import os
12
import pickle
3+
import tempfile
24
from abc import ABC, abstractmethod
35
from enum import Enum
46
from pathlib import Path
@@ -12,6 +14,8 @@
1214
class CacheSection(Enum):
1315
LIBRARY = "libdoc"
1416
VARIABLES = "variables"
17+
RESOURCE = "resource"
18+
NAMESPACE = "namespace"
1519

1620

1721
class DataCache(ABC):
@@ -85,5 +89,23 @@ def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) ->
8589
cached_file = self.build_cache_data_filename(section, entry_name)
8690

8791
cached_file.parent.mkdir(parents=True, exist_ok=True)
88-
with cached_file.open("wb") as f:
89-
pickle.dump(data, f)
92+
93+
# Atomic write: write to temp file, then rename
94+
# This ensures readers never see partial/corrupt data
95+
temp_fd, temp_path = tempfile.mkstemp(
96+
dir=cached_file.parent,
97+
prefix=cached_file.stem + "_",
98+
suffix=".tmp",
99+
)
100+
try:
101+
with os.fdopen(temp_fd, "wb") as f:
102+
pickle.dump(data, f)
103+
# Atomic rename (POSIX guarantees atomicity; Windows may fail if target exists)
104+
Path(temp_path).replace(cached_file)
105+
except Exception:
106+
# Clean up temp file on failure (temp file may be left behind on SystemExit/KeyboardInterrupt)
107+
try:
108+
os.unlink(temp_path)
109+
except OSError:
110+
pass
111+
raise

0 commit comments

Comments
 (0)