Skip to content

Commit 6c1405c

Browse files
author
SentienceDev
committed
Phase 3: polish
1 parent 8020867 commit 6c1405c

File tree

7 files changed

+662
-31
lines changed

7 files changed

+662
-31
lines changed
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
"""
2+
Example: Using Sentience with browser-use for element grounding.
3+
4+
This example demonstrates how to integrate Sentience's semantic element
5+
detection with browser-use, enabling accurate click/type/scroll operations
6+
using Sentience's snapshot-based grounding instead of coordinate estimation.
7+
8+
Requirements:
9+
pip install browser-use sentienceapi
10+
11+
Usage:
12+
python examples/browser_use_integration.py
13+
"""
14+
15+
import asyncio
16+
17+
# browser-use imports (install via: pip install browser-use)
18+
# from browser_use import BrowserSession, BrowserProfile
19+
20+
# Sentience imports
21+
from sentience import (
22+
find,
23+
get_extension_dir,
24+
query,
25+
)
26+
from sentience.backends import (
27+
BrowserUseAdapter,
28+
CachedSnapshot,
29+
ExtensionNotLoadedError,
30+
click,
31+
scroll,
32+
snapshot,
33+
type_text,
34+
)
35+
36+
37+
async def main() -> None:
38+
"""
39+
Demo: Search on Google using Sentience grounding with browser-use.
40+
41+
This example shows the full workflow:
42+
1. Launch browser-use with Sentience extension loaded
43+
2. Create a Sentience backend adapter
44+
3. Take snapshots and interact with elements using semantic queries
45+
"""
46+
47+
# =========================================================================
48+
# STEP 1: Setup browser-use with Sentience extension
49+
# =========================================================================
50+
#
51+
# The Sentience extension must be loaded for element grounding to work.
52+
# Use get_extension_dir() to get the path to the bundled extension.
53+
#
54+
# Uncomment the following when running with browser-use installed:
55+
56+
# extension_path = get_extension_dir()
57+
# print(f"Loading Sentience extension from: {extension_path}")
58+
#
59+
# profile = BrowserProfile(
60+
# args=[
61+
# f"--load-extension={extension_path}",
62+
# "--disable-extensions-except=" + extension_path,
63+
# ],
64+
# )
65+
# session = BrowserSession(browser_profile=profile)
66+
# await session.start()
67+
68+
# =========================================================================
69+
# STEP 2: Create Sentience backend adapter
70+
# =========================================================================
71+
#
72+
# The adapter bridges browser-use's CDP client to Sentience's backend protocol.
73+
#
74+
# adapter = BrowserUseAdapter(session)
75+
# backend = await adapter.create_backend()
76+
77+
# =========================================================================
78+
# STEP 3: Navigate and take snapshots
79+
# =========================================================================
80+
#
81+
# await session.navigate("https://www.google.com")
82+
#
83+
# # Take a snapshot - this uses the Sentience extension's element detection
84+
# try:
85+
# snap = await snapshot(backend)
86+
# print(f"Found {len(snap.elements)} elements")
87+
# except ExtensionNotLoadedError as e:
88+
# print(f"Extension not loaded: {e}")
89+
# print("Make sure the browser was launched with --load-extension flag")
90+
# return
91+
92+
# =========================================================================
93+
# STEP 4: Find and interact with elements using semantic queries
94+
# =========================================================================
95+
#
96+
# Sentience provides powerful element selectors:
97+
# - Role-based: 'role=textbox', 'role=button'
98+
# - Name-based: 'role=button[name="Submit"]'
99+
# - Text-based: 'text=Search'
100+
#
101+
# # Find the search input
102+
# search_input = find(snap, 'role=textbox[name*="Search"]')
103+
# if search_input:
104+
# # Click on the search input (uses center of bounding box)
105+
# await click(backend, search_input.bbox)
106+
#
107+
# # Type search query
108+
# await type_text(backend, "Sentience AI browser automation")
109+
# print("Typed search query")
110+
111+
# =========================================================================
112+
# STEP 5: Using cached snapshots for efficiency
113+
# =========================================================================
114+
#
115+
# Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots:
116+
#
117+
# cache = CachedSnapshot(backend, max_age_ms=2000)
118+
#
119+
# # First call takes fresh snapshot
120+
# snap1 = await cache.get()
121+
#
122+
# # Second call returns cached version if less than 2 seconds old
123+
# snap2 = await cache.get()
124+
#
125+
# # After actions that modify DOM, invalidate the cache
126+
# await click(backend, some_element.bbox)
127+
# cache.invalidate() # Next get() will take fresh snapshot
128+
129+
# =========================================================================
130+
# STEP 6: Scrolling to elements
131+
# =========================================================================
132+
#
133+
# # Scroll down by 500 pixels
134+
# await scroll(backend, delta_y=500)
135+
#
136+
# # Scroll at a specific position (useful for scrollable containers)
137+
# await scroll(backend, delta_y=300, target=(400, 500))
138+
139+
# =========================================================================
140+
# STEP 7: Advanced element queries
141+
# =========================================================================
142+
#
143+
# # Find all buttons
144+
# buttons = query(snap, 'role=button')
145+
# print(f"Found {len(buttons)} buttons")
146+
#
147+
# # Find by partial text match
148+
# links = query(snap, 'role=link[name*="Learn"]')
149+
#
150+
# # Find by exact text
151+
# submit_btn = find(snap, 'role=button[name="Submit"]')
152+
153+
# =========================================================================
154+
# STEP 8: Error handling
155+
# =========================================================================
156+
#
157+
# Sentience provides specific exceptions for common errors:
158+
#
159+
# from sentience.backends import (
160+
# ExtensionNotLoadedError, # Extension not loaded in browser
161+
# SnapshotError, # Snapshot failed
162+
# ActionError, # Click/type/scroll failed
163+
# )
164+
#
165+
# try:
166+
# snap = await snapshot(backend)
167+
# except ExtensionNotLoadedError as e:
168+
# # The error message includes fix suggestions
169+
# print(f"Fix: {e}")
170+
171+
# =========================================================================
172+
# CLEANUP
173+
# =========================================================================
174+
#
175+
# await session.stop()
176+
177+
print("=" * 60)
178+
print("browser-use + Sentience Integration Example")
179+
print("=" * 60)
180+
print()
181+
print("This example demonstrates the integration pattern.")
182+
print("To run with a real browser, uncomment the code sections above")
183+
print("and install browser-use: pip install browser-use")
184+
print()
185+
print("Key imports:")
186+
print(" from sentience import get_extension_dir, find, query")
187+
print(" from sentience.backends import (")
188+
print(" BrowserUseAdapter, snapshot, click, type_text, scroll")
189+
print(" )")
190+
print()
191+
print("Extension path:", get_extension_dir())
192+
193+
194+
async def full_example() -> None:
195+
"""
196+
Complete working example - requires browser-use installed.
197+
198+
This is the uncommented version for users who have browser-use installed.
199+
"""
200+
# Import browser-use (uncomment when installed)
201+
# from browser_use import BrowserSession, BrowserProfile
202+
203+
print("To run the full example:")
204+
print("1. Install browser-use: pip install browser-use")
205+
print("2. Uncomment the imports in this function")
206+
print("3. Run: python examples/browser_use_integration.py")
207+
208+
209+
if __name__ == "__main__":
210+
asyncio.run(main())

sentience/backends/__init__.py

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,28 @@
55
Sentience actions (click, type, scroll) to work with different browser
66
automation frameworks.
77
8-
Supported backends:
9-
- PlaywrightBackend: Default backend using Playwright (existing SentienceBrowser)
10-
- CDPBackendV0: CDP-based backend for browser-use integration
8+
Supported Backends
9+
------------------
10+
11+
**PlaywrightBackend**
12+
Wraps Playwright Page objects. Use this when integrating with existing
13+
SentienceBrowser or Playwright-based code.
14+
15+
**CDPBackendV0**
16+
Low-level CDP (Chrome DevTools Protocol) backend. Use this when you have
17+
direct access to a CDP client and session.
18+
19+
**BrowserUseAdapter**
20+
High-level adapter for browser-use framework. Automatically creates a
21+
CDPBackendV0 from a BrowserSession.
22+
23+
Quick Start with browser-use
24+
----------------------------
25+
26+
.. code-block:: python
1127
12-
For browser-use integration:
1328
from browser_use import BrowserSession, BrowserProfile
14-
from sentience import get_extension_dir
29+
from sentience import get_extension_dir, find
1530
from sentience.backends import BrowserUseAdapter, snapshot, click, type_text
1631
1732
# Setup browser-use with Sentience extension
@@ -23,15 +38,63 @@
2338
adapter = BrowserUseAdapter(session)
2439
backend = await adapter.create_backend()
2540
26-
# Take snapshot and interact
41+
# Take snapshot and interact with elements
2742
snap = await snapshot(backend)
28-
element = find(snap, 'role=button[name="Submit"]')
43+
search_box = find(snap, 'role=textbox[name*="Search"]')
44+
await click(backend, search_box.bbox)
45+
await type_text(backend, "Sentience AI")
46+
47+
Snapshot Caching
48+
----------------
49+
50+
Use CachedSnapshot to reduce redundant snapshot calls in action loops:
51+
52+
.. code-block:: python
53+
54+
from sentience.backends import CachedSnapshot
55+
56+
cache = CachedSnapshot(backend, max_age_ms=2000)
57+
58+
snap1 = await cache.get() # Takes fresh snapshot
59+
snap2 = await cache.get() # Returns cached if < 2s old
60+
2961
await click(backend, element.bbox)
62+
cache.invalidate() # Force refresh on next get()
63+
64+
Error Handling
65+
--------------
66+
67+
The module provides specific exceptions for common failure modes:
68+
69+
- ``ExtensionNotLoadedError``: Extension not loaded in browser launch args
70+
- ``SnapshotError``: window.sentience.snapshot() failed
71+
- ``ActionError``: Click/type/scroll operation failed
72+
73+
All exceptions inherit from ``SentienceBackendError`` and include helpful
74+
fix suggestions in their error messages.
75+
76+
.. code-block:: python
77+
78+
from sentience.backends import ExtensionNotLoadedError, snapshot
79+
80+
try:
81+
snap = await snapshot(backend)
82+
except ExtensionNotLoadedError as e:
83+
print(f"Fix suggestion: {e}")
3084
"""
3185

3286
from .actions import click, scroll, scroll_to_element, type_text, wait_for_stable
3387
from .browser_use_adapter import BrowserUseAdapter, BrowserUseCDPTransport
3488
from .cdp_backend import CDPBackendV0, CDPTransport
89+
from .exceptions import (
90+
ActionError,
91+
BackendEvalError,
92+
ExtensionDiagnostics,
93+
ExtensionInjectionError,
94+
ExtensionNotLoadedError,
95+
SentienceBackendError,
96+
SnapshotError,
97+
)
3598
from .playwright_backend import PlaywrightBackend
3699
from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
37100
from .snapshot import CachedSnapshot, snapshot
@@ -58,4 +121,12 @@
58121
"scroll",
59122
"scroll_to_element",
60123
"wait_for_stable",
124+
# Exceptions
125+
"SentienceBackendError",
126+
"ExtensionNotLoadedError",
127+
"ExtensionInjectionError",
128+
"ExtensionDiagnostics",
129+
"BackendEvalError",
130+
"SnapshotError",
131+
"ActionError",
61132
]

sentience/backends/actions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ async def scroll_to_element(
226226
start_time = time.time()
227227

228228
try:
229-
scrolled = await backend.eval(f"""
229+
scrolled = await backend.eval(
230+
f"""
230231
(() => {{
231232
const el = window.sentience_registry && window.sentience_registry[{element_id}];
232233
if (el && el.scrollIntoView) {{
@@ -239,7 +240,8 @@ async def scroll_to_element(
239240
}}
240241
return false;
241242
}})()
242-
""")
243+
"""
244+
)
243245

244246
# Wait for scroll animation
245247
wait_time = 0.3 if behavior == "smooth" else 0.05

0 commit comments

Comments
 (0)