2626from typing import TYPE_CHECKING , Any
2727
2828from ..constants import SENTIENCE_API_URL
29- from ..models import Snapshot , SnapshotOptions
29+ from ..models import Element , Snapshot , SnapshotOptions
3030from ..snapshot import (
3131 _build_snapshot_payload ,
3232 _merge_api_result_with_local ,
@@ -259,6 +259,182 @@ async def snapshot(
259259 return await _snapshot_via_extension (backend , options )
260260
261261
262+ def _normalize_ws (text : str ) -> str :
263+ return " " .join ((text or "" ).split ()).strip ()
264+
265+
266+ def _dedupe_key (el : Element ) -> tuple :
267+ """
268+ Best-effort stable dedupe key across scroll-sampled snapshots.
269+
270+ Notes:
271+ - IDs are not reliable across snapshots (virtualization can remount nodes).
272+ - BBox coordinates are viewport-relative and depend on scroll position.
273+ - Prefer href/name/text + approximate document position when available.
274+ """
275+ href = (el .href or "" ).strip ()
276+ if href :
277+ return ("href" , href )
278+
279+ name = _normalize_ws (el .name or "" )
280+ if name :
281+ return ("role_name" , el .role , name )
282+
283+ text = _normalize_ws (el .text or "" )
284+ doc_y = el .doc_y
285+ if text :
286+ # Use doc_y when present (more stable across scroll positions than bbox.y).
287+ if isinstance (doc_y , (int , float )):
288+ return ("role_text_docy" , el .role , text [:120 ], int (float (doc_y ) // 10 ))
289+ return ("role_text" , el .role , text [:120 ])
290+
291+ # Fallback: role + approximate position
292+ if isinstance (doc_y , (int , float )):
293+ return ("role_docy" , el .role , int (float (doc_y ) // 10 ))
294+
295+ # Last resort (can still dedupe within a single snapshot)
296+ return ("id" , int (el .id ))
297+
298+
299+ def merge_snapshots (
300+ snaps : list [Snapshot ],
301+ * ,
302+ union_limit : int | None = None ,
303+ ) -> Snapshot :
304+ """
305+ Merge multiple snapshots into a single "union snapshot" for analysis/extraction.
306+
307+ CRITICAL:
308+ - Element bboxes are viewport-relative to the scroll position at the time each snapshot
309+ was taken. Do NOT use merged elements for direct clicking unless you also scroll
310+ back to their position.
311+ """
312+ if not snaps :
313+ raise ValueError ("merge_snapshots requires at least one snapshot" )
314+
315+ base = snaps [0 ]
316+ best_by_key : dict [tuple , Element ] = {}
317+ first_seen_idx : dict [tuple , int ] = {}
318+
319+ # Keep the "best" representative per key:
320+ # - Prefer higher importance (usually means in-viewport at that sampling moment)
321+ # - Prefer having href/text/name (more useful for extraction)
322+ def _quality_score (e : Element ) -> tuple :
323+ has_href = 1 if (e .href or "" ).strip () else 0
324+ has_text = 1 if _normalize_ws (e .text or "" ) else 0
325+ has_name = 1 if _normalize_ws (e .name or "" ) else 0
326+ has_docy = 1 if isinstance (e .doc_y , (int , float )) else 0
327+ return (e .importance , has_href , has_text , has_name , has_docy )
328+
329+ idx = 0
330+ for snap in snaps :
331+ for el in list (getattr (snap , "elements" , []) or []):
332+ k = _dedupe_key (el )
333+ if k not in first_seen_idx :
334+ first_seen_idx [k ] = idx
335+ prev = best_by_key .get (k )
336+ if prev is None or _quality_score (el ) > _quality_score (prev ):
337+ best_by_key [k ] = el
338+ idx += 1
339+
340+ merged : list [Element ] = list (best_by_key .values ())
341+
342+ # Deterministic ordering: prefer document order when doc_y is available,
343+ # then fall back to "first seen" (stable for a given sampling sequence).
344+ def _sort_key (e : Element ) -> tuple :
345+ doc_y = e .doc_y
346+ if isinstance (doc_y , (int , float )):
347+ return (0 , float (doc_y ), - int (e .importance ))
348+ return (1 , float ("inf" ), first_seen_idx .get (_dedupe_key (e ), 10 ** 9 ))
349+
350+ merged .sort (key = _sort_key )
351+
352+ if union_limit is not None :
353+ try :
354+ lim = max (1 , int (union_limit ))
355+ except (TypeError , ValueError ):
356+ lim = None
357+ if lim is not None :
358+ merged = merged [:lim ]
359+
360+ # Construct a new Snapshot object with merged elements.
361+ # Keep base url/viewport/diagnostics, and drop screenshot by default to avoid confusion.
362+ data = base .model_dump ()
363+ data ["elements" ] = [e .model_dump () for e in merged ]
364+ data ["screenshot" ] = None
365+ return Snapshot (** data )
366+
367+
368+ async def sampled_snapshot (
369+ backend : "BrowserBackend" ,
370+ * ,
371+ options : SnapshotOptions | None = None ,
372+ samples : int = 4 ,
373+ scroll_delta_y : float | None = None ,
374+ settle_ms : int = 250 ,
375+ union_limit : int | None = None ,
376+ restore_scroll : bool = True ,
377+ ) -> Snapshot :
378+ """
379+ Take multiple snapshots while scrolling downward and return a merged union snapshot.
380+
381+ Designed for long / virtualized results pages where a single viewport snapshot
382+ cannot cover enough relevant items.
383+ """
384+ if options is None :
385+ options = SnapshotOptions ()
386+
387+ k = max (1 , int (samples ))
388+ if k <= 1 :
389+ return await snapshot (backend , options = options )
390+
391+ # Baseline scroll position
392+ try :
393+ info = await backend .refresh_page_info ()
394+ base_scroll_y = float (getattr (info , "scroll_y" , 0.0 ) or 0.0 )
395+ vh = float (getattr (info , "height" , 800 ) or 800 )
396+ except Exception : # pylint: disable=broad-exception-caught
397+ base_scroll_y = 0.0
398+ vh = 800.0
399+
400+ # Choose a conservative scroll delta if not provided.
401+ delta = float (scroll_delta_y ) if scroll_delta_y is not None else (vh * 0.9 )
402+ if delta <= 0 :
403+ delta = max (200.0 , vh * 0.9 )
404+
405+ snaps : list [Snapshot ] = []
406+ try :
407+ # Snapshot at current position.
408+ snaps .append (await snapshot (backend , options = options ))
409+
410+ for _i in range (1 , k ):
411+ try :
412+ # Scroll by wheel delta (plays nicer with sites that hook scroll events).
413+ await backend .wheel (delta_y = delta )
414+ except Exception : # pylint: disable=broad-exception-caught
415+ # Fallback: direct scrollTo
416+ try :
417+ cur = await backend .eval ("window.scrollY" )
418+ await backend .call ("(y) => window.scrollTo(0, y)" , [float (cur ) + delta ])
419+ except Exception : # pylint: disable=broad-exception-caught
420+ break
421+
422+ if settle_ms > 0 :
423+ await asyncio .sleep (float (settle_ms ) / 1000.0 )
424+
425+ snaps .append (await snapshot (backend , options = options ))
426+ finally :
427+ if restore_scroll :
428+ try :
429+ await backend .call ("(y) => window.scrollTo(0, y)" , [float (base_scroll_y )])
430+ if settle_ms > 0 :
431+ await asyncio .sleep (min (0.2 , float (settle_ms ) / 1000.0 ))
432+ except Exception : # pylint: disable=broad-exception-caught
433+ pass
434+
435+ return merge_snapshots (snaps , union_limit = union_limit )
436+
437+
262438async def _wait_for_extension (
263439 backend : "BrowserBackend" ,
264440 timeout_ms : int = 5000 ,
@@ -273,7 +449,6 @@ async def _wait_for_extension(
273449 Raises:
274450 RuntimeError: If extension not injected within timeout
275451 """
276- import asyncio
277452 import logging
278453
279454 logger = logging .getLogger ("sentience.backends.snapshot" )
@@ -446,6 +621,15 @@ async def _snapshot_via_api(
446621 # Re-raise validation errors as-is
447622 raise
448623 except Exception as e :
624+ # Preserve structured gateway details when available.
625+ try :
626+ from ..snapshot import SnapshotGatewayError # type: ignore
627+
628+ if isinstance (e , SnapshotGatewayError ):
629+ raise
630+ except Exception :
631+ pass
632+
449633 # Fallback to local extension on API error
450634 # This matches the behavior of the main snapshot function
451635 raise RuntimeError (
0 commit comments