Skip to content

Commit 4ce4246

Browse files
committed
p1
1 parent 92a89a6 commit 4ce4246

File tree

4 files changed

+298
-0
lines changed

4 files changed

+298
-0
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,21 @@ async def main():
8989
asyncio.run(main())
9090
```
9191

92+
### Failure Artifact Buffer (Phase 1)
93+
94+
Capture a short ring buffer of screenshots and persist them when a required assertion fails.
95+
96+
```python
97+
from sentience.failure_artifacts import FailureArtifactsOptions
98+
99+
await runtime.enable_failure_artifacts(
100+
FailureArtifactsOptions(buffer_seconds=15, capture_on_action=True, fps=0.0)
101+
)
102+
103+
# After each action, record it (best-effort).
104+
await runtime.record_action("CLICK")
105+
```
106+
92107
**See examples:** [`examples/asserts/`](examples/asserts/)
93108

94109
## 🚀 Quick Start: Choose Your Abstraction Level

sentience/agent_runtime.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from dataclasses import dataclass
7171
from typing import TYPE_CHECKING, Any
7272

73+
from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
7374
from .models import Snapshot, SnapshotOptions
7475
from .verification import AssertContext, AssertOutcome, Predicate
7576

@@ -138,6 +139,10 @@ def __init__(
138139
# Snapshot state
139140
self.last_snapshot: Snapshot | None = None
140141

142+
# Failure artifacts (Phase 1)
143+
self._artifact_buffer: FailureArtifactBuffer | None = None
144+
self._artifact_timer_task: asyncio.Task | None = None
145+
141146
# Cached URL (updated on snapshot or explicit get_url call)
142147
self._cached_url: str | None = None
143148

@@ -250,6 +255,90 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
250255
self.last_snapshot = await backend_snapshot(self.backend, options=options)
251256
return self.last_snapshot
252257

258+
async def enable_failure_artifacts(
259+
self,
260+
options: FailureArtifactsOptions | None = None,
261+
) -> None:
262+
"""
263+
Enable failure artifact buffer (Phase 1).
264+
"""
265+
opts = options or FailureArtifactsOptions()
266+
self._artifact_buffer = FailureArtifactBuffer(
267+
run_id=self.tracer.run_id,
268+
options=opts,
269+
)
270+
if opts.fps > 0:
271+
self._artifact_timer_task = asyncio.create_task(self._artifact_timer_loop())
272+
273+
def disable_failure_artifacts(self) -> None:
274+
"""
275+
Disable failure artifact buffer and stop background capture.
276+
"""
277+
if self._artifact_timer_task:
278+
self._artifact_timer_task.cancel()
279+
self._artifact_timer_task = None
280+
281+
async def record_action(
282+
self,
283+
action: str,
284+
*,
285+
url: str | None = None,
286+
) -> None:
287+
"""
288+
Record an action in the artifact timeline and capture a frame if enabled.
289+
"""
290+
if not self._artifact_buffer:
291+
return
292+
self._artifact_buffer.record_step(
293+
action=action,
294+
step_id=self.step_id,
295+
step_index=self.step_index,
296+
url=url,
297+
)
298+
if self._artifact_buffer.options.capture_on_action:
299+
await self._capture_artifact_frame()
300+
301+
async def _capture_artifact_frame(self) -> None:
302+
if not self._artifact_buffer:
303+
return
304+
try:
305+
image_bytes = await self.backend.screenshot_png()
306+
except Exception:
307+
return
308+
self._artifact_buffer.add_frame(image_bytes, fmt="png")
309+
310+
async def _artifact_timer_loop(self) -> None:
311+
if not self._artifact_buffer:
312+
return
313+
interval = 1.0 / max(0.001, self._artifact_buffer.options.fps)
314+
try:
315+
while True:
316+
await self._capture_artifact_frame()
317+
await asyncio.sleep(interval)
318+
except asyncio.CancelledError:
319+
return
320+
321+
def finalize_run(self, *, success: bool) -> None:
322+
"""
323+
Finalize artifact buffer at end of run.
324+
"""
325+
if not self._artifact_buffer:
326+
return
327+
if success:
328+
if self._artifact_buffer.options.persist_mode == "always":
329+
self._artifact_buffer.persist(reason="success", status="success")
330+
self._artifact_buffer.cleanup()
331+
else:
332+
self._persist_failure_artifacts(reason="finalize_failure")
333+
334+
def _persist_failure_artifacts(self, *, reason: str) -> None:
335+
if not self._artifact_buffer:
336+
return
337+
self._artifact_buffer.persist(reason=reason, status="failure")
338+
self._artifact_buffer.cleanup()
339+
if self._artifact_buffer.options.persist_mode == "onFail":
340+
self.disable_failure_artifacts()
341+
253342
def begin_step(self, goal: str, step_index: int | None = None) -> str:
254343
"""
255344
Begin a new step in the verification loop.
@@ -309,6 +398,8 @@ def assert_(
309398
kind="assert",
310399
record_in_step=True,
311400
)
401+
if required and not outcome.passed:
402+
self._persist_failure_artifacts(reason=f"assert_failed:{label}")
312403
return outcome.passed
313404

314405
def check(self, predicate: Predicate, label: str, required: bool = False) -> AssertionHandle:
@@ -619,6 +710,10 @@ async def eventually(
619710
"vision_fallback": True,
620711
},
621712
)
713+
if self.required and not passed:
714+
self.runtime._persist_failure_artifacts(
715+
reason=f"assert_eventually_failed:{self.label}"
716+
)
622717
return passed
623718
except Exception as e:
624719
# If vision fallback fails, fall through to snapshot_exhausted.
@@ -649,6 +744,10 @@ async def eventually(
649744
"exhausted": True,
650745
},
651746
)
747+
if self.required:
748+
self.runtime._persist_failure_artifacts(
749+
reason=f"assert_eventually_failed:{self.label}"
750+
)
652751
return False
653752

654753
if time.monotonic() >= deadline:
@@ -666,6 +765,10 @@ async def eventually(
666765
"timeout": True,
667766
},
668767
)
768+
if self.required:
769+
self.runtime._persist_failure_artifacts(
770+
reason=f"assert_eventually_timeout:{self.label}"
771+
)
669772
return False
670773

671774
await asyncio.sleep(poll_s)
@@ -705,6 +808,10 @@ async def eventually(
705808
record_in_step=True,
706809
extra={"eventually": True, "attempt": attempt, "final": True, "timeout": True},
707810
)
811+
if self.required:
812+
self.runtime._persist_failure_artifacts(
813+
reason=f"assert_eventually_timeout:{self.label}"
814+
)
708815
return False
709816

710817
await asyncio.sleep(poll_s)

sentience/failure_artifacts.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from __future__ import annotations
2+
3+
import json
4+
import shutil
5+
import tempfile
6+
import time
7+
from dataclasses import dataclass
8+
from pathlib import Path
9+
from typing import Callable, Literal
10+
11+
12+
@dataclass
13+
class FailureArtifactsOptions:
14+
buffer_seconds: float = 15.0
15+
capture_on_action: bool = True
16+
fps: float = 0.0
17+
persist_mode: Literal["onFail", "always"] = "onFail"
18+
output_dir: str = ".sentience/artifacts"
19+
20+
21+
@dataclass
22+
class _FrameRecord:
23+
ts: float
24+
file_name: str
25+
path: Path
26+
27+
28+
class FailureArtifactBuffer:
29+
"""
30+
Ring buffer of screenshots with minimal persistence on failure.
31+
"""
32+
33+
def __init__(
34+
self,
35+
*,
36+
run_id: str,
37+
options: FailureArtifactsOptions,
38+
time_fn: Callable[[], float] = time.time,
39+
) -> None:
40+
self.run_id = run_id
41+
self.options = options
42+
self._time_fn = time_fn
43+
self._temp_dir = Path(tempfile.mkdtemp(prefix="sentience-artifacts-"))
44+
self._frames_dir = self._temp_dir / "frames"
45+
self._frames_dir.mkdir(parents=True, exist_ok=True)
46+
self._frames: list[_FrameRecord] = []
47+
self._steps: list[dict] = []
48+
self._persisted = False
49+
50+
@property
51+
def temp_dir(self) -> Path:
52+
return self._temp_dir
53+
54+
def record_step(
55+
self,
56+
*,
57+
action: str,
58+
step_id: str | None,
59+
step_index: int | None,
60+
url: str | None,
61+
) -> None:
62+
self._steps.append(
63+
{
64+
"ts": self._time_fn(),
65+
"action": action,
66+
"step_id": step_id,
67+
"step_index": step_index,
68+
"url": url,
69+
}
70+
)
71+
72+
def add_frame(self, image_bytes: bytes, *, fmt: str = "png") -> None:
73+
ts = self._time_fn()
74+
file_name = f"frame_{int(ts * 1000)}.{fmt}"
75+
path = self._frames_dir / file_name
76+
path.write_bytes(image_bytes)
77+
self._frames.append(_FrameRecord(ts=ts, file_name=file_name, path=path))
78+
self._prune()
79+
80+
def frame_count(self) -> int:
81+
return len(self._frames)
82+
83+
def _prune(self) -> None:
84+
cutoff = self._time_fn() - max(0.0, self.options.buffer_seconds)
85+
keep: list[_FrameRecord] = []
86+
for frame in self._frames:
87+
if frame.ts >= cutoff:
88+
keep.append(frame)
89+
else:
90+
try:
91+
frame.path.unlink(missing_ok=True)
92+
except Exception:
93+
pass
94+
self._frames = keep
95+
96+
def persist(self, *, reason: str | None, status: Literal["failure", "success"]) -> Path | None:
97+
if self._persisted:
98+
return None
99+
100+
output_dir = Path(self.options.output_dir)
101+
output_dir.mkdir(parents=True, exist_ok=True)
102+
ts = int(self._time_fn() * 1000)
103+
run_dir = output_dir / f"{self.run_id}-{ts}"
104+
frames_out = run_dir / "frames"
105+
frames_out.mkdir(parents=True, exist_ok=True)
106+
107+
for frame in self._frames:
108+
shutil.copy2(frame.path, frames_out / frame.file_name)
109+
110+
steps_path = run_dir / "steps.json"
111+
steps_path.write_text(json.dumps(self._steps, indent=2))
112+
113+
manifest = {
114+
"run_id": self.run_id,
115+
"created_at_ms": ts,
116+
"status": status,
117+
"reason": reason,
118+
"buffer_seconds": self.options.buffer_seconds,
119+
"frame_count": len(self._frames),
120+
"frames": [
121+
{"file": frame.file_name, "ts": frame.ts} for frame in self._frames
122+
],
123+
}
124+
manifest_path = run_dir / "manifest.json"
125+
manifest_path.write_text(json.dumps(manifest, indent=2))
126+
127+
self._persisted = True
128+
return run_dir
129+
130+
def cleanup(self) -> None:
131+
if self._temp_dir.exists():
132+
shutil.rmtree(self._temp_dir, ignore_errors=True)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from __future__ import annotations
2+
3+
import json
4+
5+
from sentience.failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
6+
7+
8+
def test_buffer_prunes_by_time(tmp_path) -> None:
9+
now = {"t": 0.0}
10+
11+
def time_fn() -> float:
12+
return now["t"]
13+
14+
opts = FailureArtifactsOptions(buffer_seconds=1.0, output_dir=str(tmp_path))
15+
buf = FailureArtifactBuffer(run_id="run-1", options=opts, time_fn=time_fn)
16+
17+
buf.add_frame(b"first")
18+
assert buf.frame_count() == 1
19+
20+
now["t"] = 2.0
21+
buf.add_frame(b"second")
22+
assert buf.frame_count() == 1
23+
24+
25+
def test_persist_writes_manifest_and_steps(tmp_path) -> None:
26+
now = {"t": 10.0}
27+
28+
def time_fn() -> float:
29+
return now["t"]
30+
31+
opts = FailureArtifactsOptions(output_dir=str(tmp_path))
32+
buf = FailureArtifactBuffer(run_id="run-2", options=opts, time_fn=time_fn)
33+
34+
buf.record_step(action="CLICK", step_id="s1", step_index=1, url="https://example.com")
35+
buf.add_frame(b"frame")
36+
37+
run_dir = buf.persist(reason="assert_failed", status="failure")
38+
assert run_dir is not None
39+
manifest = json.loads((run_dir / "manifest.json").read_text())
40+
steps = json.loads((run_dir / "steps.json").read_text())
41+
42+
assert manifest["run_id"] == "run-2"
43+
assert manifest["frame_count"] == 1
44+
assert len(steps) == 1

0 commit comments

Comments
 (0)