Skip to content

Commit a7de676

Browse files
author
SentienceDEV
committed
show grid overlay
1 parent affd8cc commit a7de676

File tree

4 files changed

+326
-27
lines changed

4 files changed

+326
-27
lines changed

examples/show_grid_examples.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""
2+
Example: Grid Overlay Visualization
3+
4+
Demonstrates how to use the grid overlay feature to visualize detected grids
5+
on a webpage, including highlighting specific grids and identifying the dominant group.
6+
"""
7+
8+
import os
9+
import time
10+
11+
from sentience import SentienceBrowser, snapshot
12+
from sentience.models import SnapshotOptions
13+
14+
15+
def main():
16+
# Get API key from environment variable (optional - uses free tier if not set)
17+
api_key = os.environ.get("SENTIENCE_API_KEY")
18+
19+
try:
20+
with SentienceBrowser(api_key=api_key, headless=False) as browser:
21+
# Navigate to a page with grid layouts (e.g., product listings, article feeds)
22+
browser.page.goto("https://example.com/products", wait_until="domcontentloaded")
23+
time.sleep(2) # Wait for page to fully load
24+
25+
print("=" * 60)
26+
print("Example 1: Show all detected grids")
27+
print("=" * 60)
28+
# Show all grids (all in purple)
29+
snap = snapshot(browser, SnapshotOptions(show_grid=True))
30+
print(f"✅ Found {len(snap.elements)} elements")
31+
print(" Purple borders appear around all detected grids for 5 seconds")
32+
time.sleep(6) # Wait to see the overlay
33+
34+
print("\n" + "=" * 60)
35+
print("Example 2: Highlight a specific grid in red")
36+
print("=" * 60)
37+
# Get grid information first
38+
grids = snap.get_grid_bounds()
39+
if grids:
40+
print(f"✅ Found {len(grids)} grids:")
41+
for grid in grids:
42+
print(f" Grid {grid.grid_id}: {grid.item_count} items, "
43+
f"{grid.row_count}x{grid.col_count} rows/cols, "
44+
f"label: {grid.label or 'none'}")
45+
46+
# Highlight the first grid in red
47+
if len(grids) > 0:
48+
target_grid_id = grids[0].grid_id
49+
print(f"\n Highlighting Grid {target_grid_id} in red...")
50+
snap = snapshot(browser, SnapshotOptions(
51+
show_grid=True,
52+
grid_id=target_grid_id # This grid will be highlighted in red
53+
))
54+
time.sleep(6) # Wait to see the overlay
55+
else:
56+
print(" ⚠️ No grids detected on this page")
57+
58+
print("\n" + "=" * 60)
59+
print("Example 3: Highlight the dominant group")
60+
print("=" * 60)
61+
# Find and highlight the dominant grid
62+
grids = snap.get_grid_bounds()
63+
dominant_grid = next((g for g in grids if g.is_dominant), None)
64+
65+
if dominant_grid:
66+
print(f"✅ Dominant group detected: Grid {dominant_grid.grid_id}")
67+
print(f" Label: {dominant_grid.label or 'none'}")
68+
print(f" Items: {dominant_grid.item_count}")
69+
print(f" Size: {dominant_grid.row_count}x{dominant_grid.col_count}")
70+
print(f"\n Highlighting dominant grid in red...")
71+
snap = snapshot(browser, SnapshotOptions(
72+
show_grid=True,
73+
grid_id=dominant_grid.grid_id # Highlight dominant grid in red
74+
))
75+
time.sleep(6) # Wait to see the overlay
76+
else:
77+
print(" ⚠️ No dominant group detected")
78+
79+
print("\n" + "=" * 60)
80+
print("Example 4: Combine element overlay and grid overlay")
81+
print("=" * 60)
82+
# Show both element borders and grid borders simultaneously
83+
snap = snapshot(browser, SnapshotOptions(
84+
show_overlay=True, # Show element borders (green/blue/red)
85+
show_grid=True # Show grid borders (purple/orange/red)
86+
))
87+
print("✅ Both overlays are now visible:")
88+
print(" - Element borders: Green (regular), Blue (primary), Red (target)")
89+
print(" - Grid borders: Purple (regular), Orange (dominant), Red (target)")
90+
time.sleep(6) # Wait to see the overlay
91+
92+
print("\n" + "=" * 60)
93+
print("Example 5: Grid information analysis")
94+
print("=" * 60)
95+
# Analyze grid structure
96+
grids = snap.get_grid_bounds()
97+
print(f"✅ Grid Analysis:")
98+
for grid in grids:
99+
dominant_indicator = "⭐ DOMINANT" if grid.is_dominant else ""
100+
print(f"\n Grid {grid.grid_id} {dominant_indicator}:")
101+
print(f" Label: {grid.label or 'none'}")
102+
print(f" Items: {grid.item_count}")
103+
print(f" Size: {grid.row_count} rows × {grid.col_count} cols")
104+
print(f" BBox: ({grid.bbox.x:.0f}, {grid.bbox.y:.0f}) "
105+
f"{grid.bbox.width:.0f}×{grid.bbox.height:.0f}")
106+
print(f" Confidence: {grid.confidence}")
107+
108+
print("\n✅ All examples completed!")
109+
110+
except Exception as e:
111+
print(f"❌ Error: {e}")
112+
import traceback
113+
traceback.print_exc()
114+
115+
116+
if __name__ == "__main__":
117+
main()

sentience/models.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class GridInfo(BaseModel):
118118
label: str | None = (
119119
None # Optional inferred label (e.g., "product_grid", "search_results", "navigation")
120120
)
121+
is_dominant: bool = False # Whether this grid is the dominant group (main content area)
121122

122123

123124
class Snapshot(BaseModel):
@@ -190,10 +191,16 @@ def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
190191

191192
grid_infos = []
192193

194+
# First pass: compute all grid infos and count dominant group elements
195+
grid_dominant_counts = {}
193196
for gid, elements_in_grid in sorted(grid_elements.items()):
194197
if not elements_in_grid:
195198
continue
196199

200+
# Count dominant group elements in this grid
201+
dominant_count = sum(1 for elem in elements_in_grid if elem.in_dominant_group is True)
202+
grid_dominant_counts[gid] = (dominant_count, len(elements_in_grid))
203+
197204
# Compute bounding box
198205
min_x = min(elem.bbox.x for elem in elements_in_grid)
199206
min_y = min(elem.bbox.y for elem in elements_in_grid)
@@ -226,9 +233,42 @@ def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
226233
item_count=len(elements_in_grid),
227234
confidence=1.0,
228235
label=label,
236+
is_dominant=False, # Will be set below
229237
)
230238
)
231239

240+
# Second pass: identify dominant grid
241+
# The grid with the highest count (or highest percentage >= 50%) of dominant group elements
242+
if grid_dominant_counts:
243+
# Find grid with highest absolute count
244+
max_dominant_count = max(count for count, _ in grid_dominant_counts.values())
245+
if max_dominant_count > 0:
246+
# Find grid(s) with highest count
247+
dominant_grids = [
248+
gid
249+
for gid, (count, total) in grid_dominant_counts.items()
250+
if count == max_dominant_count
251+
]
252+
# If multiple grids tie, prefer the one with highest percentage
253+
if len(dominant_grids) > 1:
254+
dominant_grids.sort(
255+
key=lambda gid: (
256+
grid_dominant_counts[gid][0] / grid_dominant_counts[gid][1]
257+
if grid_dominant_counts[gid][1] > 0
258+
else 0
259+
),
260+
reverse=True,
261+
)
262+
# Mark the dominant grid
263+
dominant_gid = dominant_grids[0]
264+
# Only mark as dominant if it has >= 50% dominant group elements or >= 3 elements
265+
dominant_count, total_count = grid_dominant_counts[dominant_gid]
266+
if dominant_count >= 3 or (total_count > 0 and dominant_count / total_count >= 0.5):
267+
for grid_info in grid_infos:
268+
if grid_info.grid_id == dominant_gid:
269+
grid_info.is_dominant = True
270+
break
271+
232272
return grid_infos
233273

234274
@staticmethod
@@ -456,6 +496,10 @@ class SnapshotOptions(BaseModel):
456496
trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
457497
goal: str | None = None # Optional goal/task description for the snapshot
458498
show_overlay: bool = False # Show visual overlay highlighting elements in browser
499+
show_grid: bool = False # Show visual overlay highlighting detected grids
500+
grid_id: int | None = (
501+
None # Optional grid ID to show specific grid (only used if show_grid=True)
502+
)
459503

460504
# API credentials (for browser-use integration without SentienceBrowser)
461505
sentience_api_key: str | None = None # Sentience API key for Pro/Enterprise features

sentience/snapshot.py

Lines changed: 111 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ def _snapshot_via_extension(
250250
if options.save_trace:
251251
_save_trace_to_file(result.get("raw_elements", []), options.trace_path)
252252

253+
# Validate and parse with Pydantic
254+
snapshot_obj = Snapshot(**result)
255+
253256
# Show visual overlay if requested
254257
if options.show_overlay:
255258
raw_elements = result.get("raw_elements", [])
@@ -265,8 +268,29 @@ def _snapshot_via_extension(
265268
raw_elements,
266269
)
267270

268-
# Validate and parse with Pydantic
269-
snapshot_obj = Snapshot(**result)
271+
# Show grid overlay if requested
272+
if options.show_grid:
273+
# Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
274+
grids = snapshot_obj.get_grid_bounds(grid_id=None)
275+
if grids:
276+
# Convert GridInfo to dict for JavaScript
277+
grid_dicts = [grid.model_dump() for grid in grids]
278+
# Pass grid_id as targetGridId to highlight it in red
279+
target_grid_id = options.grid_id if options.grid_id is not None else None
280+
browser.page.evaluate(
281+
"""
282+
(grids, targetGridId) => {
283+
if (window.sentience && window.sentience.showGrid) {
284+
window.sentience.showGrid(grids, targetGridId);
285+
} else {
286+
console.warn('[SDK] showGrid not available in extension');
287+
}
288+
}
289+
""",
290+
grid_dicts,
291+
target_grid_id,
292+
)
293+
270294
return snapshot_obj
271295

272296

@@ -308,6 +332,9 @@ def _snapshot_via_api(
308332
# Merge API result with local data (screenshot, etc.)
309333
snapshot_data = _merge_api_result_with_local(api_result, raw_result)
310334

335+
# Create snapshot object
336+
snapshot_obj = Snapshot(**snapshot_data)
337+
311338
# Show visual overlay if requested (use API-ranked elements)
312339
if options.show_overlay:
313340
elements = api_result.get("elements", [])
@@ -323,7 +350,29 @@ def _snapshot_via_api(
323350
elements,
324351
)
325352

326-
return Snapshot(**snapshot_data)
353+
# Show grid overlay if requested
354+
if options.show_grid:
355+
# Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
356+
grids = snapshot_obj.get_grid_bounds(grid_id=None)
357+
if grids:
358+
grid_dicts = [grid.model_dump() for grid in grids]
359+
# Pass grid_id as targetGridId to highlight it in red
360+
target_grid_id = options.grid_id if options.grid_id is not None else None
361+
browser.page.evaluate(
362+
"""
363+
(grids, targetGridId) => {
364+
if (window.sentience && window.sentience.showGrid) {
365+
window.sentience.showGrid(grids, targetGridId);
366+
} else {
367+
console.warn('[SDK] showGrid not available in extension');
368+
}
369+
}
370+
""",
371+
grid_dicts,
372+
target_grid_id,
373+
)
374+
375+
return snapshot_obj
327376
except requests.exceptions.RequestException as e:
328377
raise RuntimeError(f"API request failed: {e}") from e
329378

@@ -440,6 +489,18 @@ async def _snapshot_via_extension_async(
440489
if options.save_trace:
441490
_save_trace_to_file(result.get("raw_elements", []), options.trace_path)
442491

492+
# Extract screenshot_format from data URL if not provided by extension
493+
if result.get("screenshot") and not result.get("screenshot_format"):
494+
screenshot_data_url = result.get("screenshot", "")
495+
if screenshot_data_url.startswith("data:image/"):
496+
# Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
497+
format_match = screenshot_data_url.split(";")[0].split("/")[-1]
498+
if format_match in ["jpeg", "jpg", "png"]:
499+
result["screenshot_format"] = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
500+
501+
# Validate and parse with Pydantic
502+
snapshot_obj = Snapshot(**result)
503+
443504
# Show visual overlay if requested
444505
if options.show_overlay:
445506
raw_elements = result.get("raw_elements", [])
@@ -455,17 +516,28 @@ async def _snapshot_via_extension_async(
455516
raw_elements,
456517
)
457518

458-
# Extract screenshot_format from data URL if not provided by extension
459-
if result.get("screenshot") and not result.get("screenshot_format"):
460-
screenshot_data_url = result.get("screenshot", "")
461-
if screenshot_data_url.startswith("data:image/"):
462-
# Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
463-
format_match = screenshot_data_url.split(";")[0].split("/")[-1]
464-
if format_match in ["jpeg", "jpg", "png"]:
465-
result["screenshot_format"] = "jpeg" if format_match in ["jpeg", "jpg"] else "png"
519+
# Show grid overlay if requested
520+
if options.show_grid:
521+
# Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
522+
grids = snapshot_obj.get_grid_bounds(grid_id=None)
523+
if grids:
524+
grid_dicts = [grid.model_dump() for grid in grids]
525+
# Pass grid_id as targetGridId to highlight it in red
526+
target_grid_id = options.grid_id if options.grid_id is not None else None
527+
await browser.page.evaluate(
528+
"""
529+
(grids, targetGridId) => {
530+
if (window.sentience && window.sentience.showGrid) {
531+
window.sentience.showGrid(grids, targetGridId);
532+
} else {
533+
console.warn('[SDK] showGrid not available in extension');
534+
}
535+
}
536+
""",
537+
grid_dicts,
538+
target_grid_id,
539+
)
466540

467-
# Validate and parse with Pydantic
468-
snapshot_obj = Snapshot(**result)
469541
return snapshot_obj
470542

471543

@@ -584,6 +656,9 @@ async def _snapshot_via_api_async(
584656
"error": api_result.get("error"),
585657
}
586658

659+
# Create snapshot object
660+
snapshot_obj = Snapshot(**snapshot_data)
661+
587662
# Show visual overlay if requested
588663
if options.show_overlay:
589664
elements = api_result.get("elements", [])
@@ -599,7 +674,29 @@ async def _snapshot_via_api_async(
599674
elements,
600675
)
601676

602-
return Snapshot(**snapshot_data)
677+
# Show grid overlay if requested
678+
if options.show_grid:
679+
# Get all grids (don't filter by grid_id here - we want to show all but highlight the target)
680+
grids = snapshot_obj.get_grid_bounds(grid_id=None)
681+
if grids:
682+
grid_dicts = [grid.model_dump() for grid in grids]
683+
# Pass grid_id as targetGridId to highlight it in red
684+
target_grid_id = options.grid_id if options.grid_id is not None else None
685+
await browser.page.evaluate(
686+
"""
687+
(grids, targetGridId) => {
688+
if (window.sentience && window.sentience.showGrid) {
689+
window.sentience.showGrid(grids, targetGridId);
690+
} else {
691+
console.warn('[SDK] showGrid not available in extension');
692+
}
693+
}
694+
""",
695+
grid_dicts,
696+
target_grid_id,
697+
)
698+
699+
return snapshot_obj
603700
except ImportError:
604701
# Fallback to requests if httpx not available (shouldn't happen in async context)
605702
raise RuntimeError(

0 commit comments

Comments
 (0)