Skip to content

Commit aff1fdf

Browse files
authored
Merge pull request #205 from SentienceAPI/sample_snapshot
sample snapshot + don't swallow server error/exception
2 parents a1e56ef + d16d7a8 commit aff1fdf

File tree

10 files changed

+760
-38
lines changed

10 files changed

+760
-38
lines changed

sentience/agent.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,25 @@ def _compute_hash(self, text: str) -> str:
188188
"""Compute SHA256 hash of text."""
189189
return hashlib.sha256(text.encode("utf-8")).hexdigest()
190190

191+
async def _best_effort_post_snapshot_digest(self, goal: str) -> str | None:
192+
"""
193+
Best-effort post-action snapshot digest for tracing (async).
194+
"""
195+
try:
196+
snap_opts = SnapshotOptions(
197+
limit=min(10, self.default_snapshot_limit),
198+
goal=f"{goal} (post)",
199+
)
200+
snap_opts.screenshot = False
201+
snap_opts.show_overlay = self.config.show_overlay if self.config else None
202+
post_snap = await snapshot_async(self.browser, snap_opts)
203+
if post_snap.status != "success":
204+
return None
205+
digest_input = f"{post_snap.url}{post_snap.timestamp}"
206+
return f"sha256:{self._compute_hash(digest_input)}"
207+
except Exception:
208+
return None
209+
191210
def _best_effort_post_snapshot_digest(self, goal: str) -> str | None:
192211
"""
193212
Best-effort post-action snapshot digest for tracing.

sentience/agent_runtime.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,50 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
358358
await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
359359
return self.last_snapshot
360360

361+
async def sampled_snapshot(
362+
self,
363+
*,
364+
samples: int = 4,
365+
scroll_delta_y: float | None = None,
366+
settle_ms: int = 250,
367+
union_limit: int | None = None,
368+
restore_scroll: bool = True,
369+
**kwargs: Any,
370+
) -> Snapshot:
371+
"""
372+
Take multiple snapshots while scrolling and merge them into a "union snapshot".
373+
374+
Intended for analysis/extraction on long / virtualized pages where a single
375+
viewport snapshot is insufficient.
376+
377+
IMPORTANT:
378+
- The returned snapshot's element bboxes may not correspond to the current viewport.
379+
Do NOT use it for clicking unless you also scroll to the right position.
380+
- This method does NOT update `self.last_snapshot` (to avoid confusing verification
381+
loops that depend on the current viewport).
382+
"""
383+
# Legacy browser path: fall back to a single snapshot (we can't rely on backend ops).
384+
if hasattr(self, "_legacy_browser") and hasattr(self, "_legacy_page"):
385+
return await self.snapshot(**kwargs)
386+
387+
from .backends.snapshot import sampled_snapshot as backend_sampled_snapshot
388+
389+
# Merge default options with call-specific kwargs
390+
options_dict = self._snapshot_options.model_dump(exclude_none=True)
391+
options_dict.update(kwargs)
392+
options = SnapshotOptions(**options_dict)
393+
394+
snap = await backend_sampled_snapshot(
395+
self.backend,
396+
options=options,
397+
samples=samples,
398+
scroll_delta_y=scroll_delta_y,
399+
settle_ms=settle_ms,
400+
union_limit=union_limit,
401+
restore_scroll=restore_scroll,
402+
)
403+
return snap
404+
361405
async def evaluate_js(self, request: EvaluateJsRequest) -> EvaluateJsResult:
362406
"""
363407
Evaluate JavaScript expression in the active backend.

sentience/backends/snapshot.py

Lines changed: 186 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from typing import TYPE_CHECKING, Any
2727

2828
from ..constants import SENTIENCE_API_URL
29-
from ..models import Snapshot, SnapshotOptions
29+
from ..models import Element, Snapshot, SnapshotOptions
3030
from ..snapshot import (
3131
_build_snapshot_payload,
3232
_merge_api_result_with_local,
@@ -259,6 +259,182 @@ async def snapshot(
259259
return await _snapshot_via_extension(backend, options)
260260

261261

262+
def _normalize_ws(text: str) -> str:
263+
return " ".join((text or "").split()).strip()
264+
265+
266+
def _dedupe_key(el: Element) -> tuple:
267+
"""
268+
Best-effort stable dedupe key across scroll-sampled snapshots.
269+
270+
Notes:
271+
- IDs are not reliable across snapshots (virtualization can remount nodes).
272+
- BBox coordinates are viewport-relative and depend on scroll position.
273+
- Prefer href/name/text + approximate document position when available.
274+
"""
275+
href = (el.href or "").strip()
276+
if href:
277+
return ("href", href)
278+
279+
name = _normalize_ws(el.name or "")
280+
if name:
281+
return ("role_name", el.role, name)
282+
283+
text = _normalize_ws(el.text or "")
284+
doc_y = el.doc_y
285+
if text:
286+
# Use doc_y when present (more stable across scroll positions than bbox.y).
287+
if isinstance(doc_y, (int, float)):
288+
return ("role_text_docy", el.role, text[:120], int(float(doc_y) // 10))
289+
return ("role_text", el.role, text[:120])
290+
291+
# Fallback: role + approximate position
292+
if isinstance(doc_y, (int, float)):
293+
return ("role_docy", el.role, int(float(doc_y) // 10))
294+
295+
# Last resort (can still dedupe within a single snapshot)
296+
return ("id", int(el.id))
297+
298+
299+
def merge_snapshots(
300+
snaps: list[Snapshot],
301+
*,
302+
union_limit: int | None = None,
303+
) -> Snapshot:
304+
"""
305+
Merge multiple snapshots into a single "union snapshot" for analysis/extraction.
306+
307+
CRITICAL:
308+
- Element bboxes are viewport-relative to the scroll position at the time each snapshot
309+
was taken. Do NOT use merged elements for direct clicking unless you also scroll
310+
back to their position.
311+
"""
312+
if not snaps:
313+
raise ValueError("merge_snapshots requires at least one snapshot")
314+
315+
base = snaps[0]
316+
best_by_key: dict[tuple, Element] = {}
317+
first_seen_idx: dict[tuple, int] = {}
318+
319+
# Keep the "best" representative per key:
320+
# - Prefer higher importance (usually means in-viewport at that sampling moment)
321+
# - Prefer having href/text/name (more useful for extraction)
322+
def _quality_score(e: Element) -> tuple:
323+
has_href = 1 if (e.href or "").strip() else 0
324+
has_text = 1 if _normalize_ws(e.text or "") else 0
325+
has_name = 1 if _normalize_ws(e.name or "") else 0
326+
has_docy = 1 if isinstance(e.doc_y, (int, float)) else 0
327+
return (e.importance, has_href, has_text, has_name, has_docy)
328+
329+
idx = 0
330+
for snap in snaps:
331+
for el in list(getattr(snap, "elements", []) or []):
332+
k = _dedupe_key(el)
333+
if k not in first_seen_idx:
334+
first_seen_idx[k] = idx
335+
prev = best_by_key.get(k)
336+
if prev is None or _quality_score(el) > _quality_score(prev):
337+
best_by_key[k] = el
338+
idx += 1
339+
340+
merged: list[Element] = list(best_by_key.values())
341+
342+
# Deterministic ordering: prefer document order when doc_y is available,
343+
# then fall back to "first seen" (stable for a given sampling sequence).
344+
def _sort_key(e: Element) -> tuple:
345+
doc_y = e.doc_y
346+
if isinstance(doc_y, (int, float)):
347+
return (0, float(doc_y), -int(e.importance))
348+
return (1, float("inf"), first_seen_idx.get(_dedupe_key(e), 10**9))
349+
350+
merged.sort(key=_sort_key)
351+
352+
if union_limit is not None:
353+
try:
354+
lim = max(1, int(union_limit))
355+
except (TypeError, ValueError):
356+
lim = None
357+
if lim is not None:
358+
merged = merged[:lim]
359+
360+
# Construct a new Snapshot object with merged elements.
361+
# Keep base url/viewport/diagnostics, and drop screenshot by default to avoid confusion.
362+
data = base.model_dump()
363+
data["elements"] = [e.model_dump() for e in merged]
364+
data["screenshot"] = None
365+
return Snapshot(**data)
366+
367+
368+
async def sampled_snapshot(
369+
backend: "BrowserBackend",
370+
*,
371+
options: SnapshotOptions | None = None,
372+
samples: int = 4,
373+
scroll_delta_y: float | None = None,
374+
settle_ms: int = 250,
375+
union_limit: int | None = None,
376+
restore_scroll: bool = True,
377+
) -> Snapshot:
378+
"""
379+
Take multiple snapshots while scrolling downward and return a merged union snapshot.
380+
381+
Designed for long / virtualized results pages where a single viewport snapshot
382+
cannot cover enough relevant items.
383+
"""
384+
if options is None:
385+
options = SnapshotOptions()
386+
387+
k = max(1, int(samples))
388+
if k <= 1:
389+
return await snapshot(backend, options=options)
390+
391+
# Baseline scroll position
392+
try:
393+
info = await backend.refresh_page_info()
394+
base_scroll_y = float(getattr(info, "scroll_y", 0.0) or 0.0)
395+
vh = float(getattr(info, "height", 800) or 800)
396+
except Exception: # pylint: disable=broad-exception-caught
397+
base_scroll_y = 0.0
398+
vh = 800.0
399+
400+
# Choose a conservative scroll delta if not provided.
401+
delta = float(scroll_delta_y) if scroll_delta_y is not None else (vh * 0.9)
402+
if delta <= 0:
403+
delta = max(200.0, vh * 0.9)
404+
405+
snaps: list[Snapshot] = []
406+
try:
407+
# Snapshot at current position.
408+
snaps.append(await snapshot(backend, options=options))
409+
410+
for _i in range(1, k):
411+
try:
412+
# Scroll by wheel delta (plays nicer with sites that hook scroll events).
413+
await backend.wheel(delta_y=delta)
414+
except Exception: # pylint: disable=broad-exception-caught
415+
# Fallback: direct scrollTo
416+
try:
417+
cur = await backend.eval("window.scrollY")
418+
await backend.call("(y) => window.scrollTo(0, y)", [float(cur) + delta])
419+
except Exception: # pylint: disable=broad-exception-caught
420+
break
421+
422+
if settle_ms > 0:
423+
await asyncio.sleep(float(settle_ms) / 1000.0)
424+
425+
snaps.append(await snapshot(backend, options=options))
426+
finally:
427+
if restore_scroll:
428+
try:
429+
await backend.call("(y) => window.scrollTo(0, y)", [float(base_scroll_y)])
430+
if settle_ms > 0:
431+
await asyncio.sleep(min(0.2, float(settle_ms) / 1000.0))
432+
except Exception: # pylint: disable=broad-exception-caught
433+
pass
434+
435+
return merge_snapshots(snaps, union_limit=union_limit)
436+
437+
262438
async def _wait_for_extension(
263439
backend: "BrowserBackend",
264440
timeout_ms: int = 5000,
@@ -273,7 +449,6 @@ async def _wait_for_extension(
273449
Raises:
274450
RuntimeError: If extension not injected within timeout
275451
"""
276-
import asyncio
277452
import logging
278453

279454
logger = logging.getLogger("sentience.backends.snapshot")
@@ -446,6 +621,15 @@ async def _snapshot_via_api(
446621
# Re-raise validation errors as-is
447622
raise
448623
except Exception as e:
624+
# Preserve structured gateway details when available.
625+
try:
626+
from ..snapshot import SnapshotGatewayError # type: ignore
627+
628+
if isinstance(e, SnapshotGatewayError):
629+
raise
630+
except Exception:
631+
pass
632+
449633
# Fallback to local extension on API error
450634
# This matches the behavior of the main snapshot function
451635
raise RuntimeError(

0 commit comments

Comments
 (0)