From 3d870c20e18a5a248ad44a575b3520923deccd22 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 1 Feb 2026 11:39:53 -0800 Subject: [PATCH] improve tracing with automated upload screenshot if opt-in; auto step start --- sentience/agent_runtime.py | 68 ++++++++++++++++- sentience/tracer_factory.py | 49 +++++++++++- sentience/tracing.py | 62 ++++++++++++++++ tests/test_agent_runtime.py | 44 +++++++++++ tests/test_cloud_tracing.py | 81 ++++++++++++++++++++ tests/test_tracing.py | 143 ++++++++++++++++++++++++++++++++++++ 6 files changed, 443 insertions(+), 4 deletions(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 27cd7ff..8e26157 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -312,13 +312,17 @@ async def get_url(self) -> str: self._cached_url = url return url - async def snapshot(self, **kwargs: Any) -> Snapshot: + async def snapshot(self, emit_trace: bool = True, **kwargs: Any) -> Snapshot: """ Take a snapshot of the current page state. This updates last_snapshot which is used as context for assertions. + When emit_trace=True (default), automatically emits a 'snapshot' trace event + with screenshot_base64 for Sentience Studio visualization. Args: + emit_trace: If True (default), emit a 'snapshot' trace event with screenshot. + Set to False to disable automatic trace emission. **kwargs: Override default snapshot options for this call. Common options: - limit: Maximum elements to return @@ -328,6 +332,15 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: Returns: Snapshot of current page state + + Example: + >>> # Default: snapshot with auto-emit trace event + >>> snapshot = await runtime.snapshot() + + >>> # Disable auto-emit for manual control + >>> snapshot = await runtime.snapshot(emit_trace=False) + >>> # Later, manually emit if needed: + >>> tracer.emit_snapshot(snapshot, step_id=runtime.step_id) """ # Check if using legacy browser (backward compat) if hasattr(self, "_legacy_browser") and hasattr(self, "_legacy_page"): @@ -337,6 +350,9 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: if self._step_pre_snapshot is None: self._step_pre_snapshot = self.last_snapshot self._step_pre_url = self.last_snapshot.url + # Auto-emit trace for legacy path too + if emit_trace and self.last_snapshot is not None: + self._emit_snapshot_trace(self.last_snapshot) return self.last_snapshot # Use backend-agnostic snapshot @@ -356,8 +372,33 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: self._step_pre_url = self.last_snapshot.url if not skip_captcha_handling: await self._handle_captcha_if_needed(self.last_snapshot, source="gateway") + + # Auto-emit snapshot trace event for Studio visualization + if emit_trace and self.last_snapshot is not None: + self._emit_snapshot_trace(self.last_snapshot) + return self.last_snapshot + def _emit_snapshot_trace(self, snapshot: Snapshot) -> None: + """ + Emit a snapshot trace event with screenshot for Studio visualization. + + This is called automatically by snapshot() when emit_trace=True. + """ + if self.tracer is None: + return + + try: + self.tracer.emit_snapshot( + snapshot=snapshot, + step_id=self.step_id, + step_index=self.step_index, + screenshot_format="jpeg", + ) + except Exception: + # Best-effort: don't let trace emission errors break snapshot + pass + async def sampled_snapshot( self, *, @@ -903,7 +944,13 @@ def _artifact_metadata(self) -> dict[str, Any]: "url": url, } - def begin_step(self, goal: str, step_index: int | None = None) -> str: + def begin_step( + self, + goal: str, + step_index: int | None = None, + emit_trace: bool = True, + pre_url: str | None = None, + ) -> str: """ Begin a new step in the verification loop. @@ -911,10 +958,13 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str: - Generates a new step_id - Clears assertions from previous step - Increments step_index (or uses provided value) + - Emits step_start trace event (optional) Args: goal: Description of what this step aims to achieve step_index: Optional explicit step index (otherwise auto-increments) + emit_trace: If True (default), emit step_start trace event for Studio timeline + pre_url: Optional URL to record in step_start event (otherwise uses cached URL) Returns: Generated step_id in format 'step-N' where N is the step index @@ -939,6 +989,20 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str: # Generate step_id in 'step-N' format for Studio compatibility self.step_id = f"step-{self.step_index}" + # Emit step_start trace event for Studio timeline display + if emit_trace and self.tracer: + try: + url = pre_url or self._cached_url or "" + self.tracer.emit_step_start( + step_id=self.step_id, + step_index=self.step_index, + goal=goal, + attempt=0, + pre_url=url, + ) + except Exception: + pass # Tracing must be non-fatal + return self.step_id def assert_( diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py index 82137b7..fc8dd9f 100644 --- a/sentience/tracer_factory.py +++ b/sentience/tracer_factory.py @@ -18,6 +18,32 @@ from sentience.tracing import JsonlTraceSink, Tracer +def _emit_run_start( + tracer: Tracer, + agent_type: str | None, + llm_model: str | None, + goal: str | None, + start_url: str | None, +) -> None: + """ + Helper to emit run_start event with available metadata. + """ + try: + config: dict[str, Any] = {} + if goal: + config["goal"] = goal + if start_url: + config["start_url"] = start_url + + tracer.emit_run_start( + agent=agent_type or "SentienceAgent", + llm_model=llm_model, + config=config if config else None, + ) + except Exception: + pass # Tracing must be non-fatal + + def create_tracer( api_key: str | None = None, run_id: str | None = None, @@ -29,6 +55,7 @@ def create_tracer( llm_model: str | None = None, start_url: str | None = None, screenshot_processor: Callable[[str], str] | None = None, + auto_emit_run_start: bool = True, ) -> Tracer: """ Create tracer with automatic tier detection. @@ -56,6 +83,9 @@ def create_tracer( screenshot_processor: Optional function to process screenshots before upload. Takes base64 string, returns processed base64 string. Useful for PII redaction or custom image processing. + auto_emit_run_start: If True (default), automatically emit run_start event + with the provided metadata. This ensures traces have + complete structure for Studio visualization. Returns: Tracer configured with appropriate sink @@ -71,6 +101,7 @@ def create_tracer( ... start_url="https://amazon.com" ... ) >>> # Returns: Tracer with CloudTraceSink + >>> # run_start event is automatically emitted >>> >>> # With screenshot processor for PII redaction >>> def redact_pii(screenshot_base64: str) -> str: @@ -87,6 +118,10 @@ def create_tracer( >>> tracer = create_tracer(run_id="demo") >>> # Returns: Tracer with JsonlTraceSink (local-only) >>> + >>> # Disable auto-emit for manual control + >>> tracer = create_tracer(run_id="demo", auto_emit_run_start=False) + >>> tracer.emit_run_start("MyAgent", "gpt-4o") # Manual emit + >>> >>> # Use with agent >>> agent = SentienceAgent(browser, llm, tracer=tracer) >>> agent.act("Click search") @@ -136,7 +171,7 @@ def create_tracer( if upload_url: print("☁️ [Sentience] Cloud tracing enabled (Pro tier)") - return Tracer( + tracer = Tracer( run_id=run_id, sink=CloudTraceSink( upload_url=upload_url, @@ -147,6 +182,10 @@ def create_tracer( ), screenshot_processor=screenshot_processor, ) + # Auto-emit run_start for complete trace structure + if auto_emit_run_start: + _emit_run_start(tracer, agent_type, llm_model, goal, start_url) + return tracer else: print("⚠️ [Sentience] Cloud init response missing upload_url") print(f" Response data: {data}") @@ -204,12 +243,18 @@ def create_tracer( local_path = traces_dir / f"{run_id}.jsonl" print(f"💾 [Sentience] Local tracing: {local_path}") - return Tracer( + tracer = Tracer( run_id=run_id, sink=JsonlTraceSink(str(local_path)), screenshot_processor=screenshot_processor, ) + # Auto-emit run_start for complete trace structure + if auto_emit_run_start: + _emit_run_start(tracer, agent_type, llm_model, goal, start_url) + + return tracer + def _recover_orphaned_traces(api_key: str, api_url: str = SENTIENCE_API_URL) -> None: """ diff --git a/sentience/tracing.py b/sentience/tracing.py index 06e8639..cbfeac0 100644 --- a/sentience/tracing.py +++ b/sentience/tracing.py @@ -358,6 +358,68 @@ def emit_error( } self.emit("error", data, step_id=step_id) + def emit_snapshot( + self, + snapshot: Any, + step_id: str | None = None, + step_index: int | None = None, + screenshot_format: str = "jpeg", + ) -> None: + """ + Emit snapshot event with screenshot for Studio visualization. + + This method builds and emits a 'snapshot' trace event that includes: + - Page URL and element data + - Screenshot (if present in snapshot) + - Step correlation info + + Use this when you want screenshots to appear in the Sentience Studio timeline. + + Args: + snapshot: Snapshot object (must have 'screenshot' attribute for images) + step_id: Step UUID (for correlating snapshot with a step) + step_index: Step index (0-based) for Studio timeline ordering + screenshot_format: Format of screenshot ("jpeg" or "png", default: "jpeg") + + Example: + >>> # After taking a snapshot with AgentRuntime + >>> snapshot = await runtime.snapshot(screenshot=True) + >>> tracer.emit_snapshot(snapshot, step_id=runtime.step_id, step_index=runtime.step_index) + + >>> # Or use auto-emit (default in AgentRuntime.snapshot()) + >>> snapshot = await runtime.snapshot() # Auto-emits snapshot event + """ + if snapshot is None: + return + + try: + # Import TraceEventBuilder here to avoid circular imports + from .trace_event_builder import TraceEventBuilder + + # Build the snapshot event data + data = TraceEventBuilder.build_snapshot_event(snapshot, step_index=step_index) + + # Extract and add screenshot if present + screenshot_raw = getattr(snapshot, "screenshot", None) + if screenshot_raw: + # Extract base64 string from data URL if needed + # Format: "data:image/jpeg;base64,{base64_string}" + if isinstance(screenshot_raw, str) and screenshot_raw.startswith("data:image"): + screenshot_base64 = ( + screenshot_raw.split(",", 1)[1] + if "," in screenshot_raw + else screenshot_raw + ) + else: + screenshot_base64 = screenshot_raw + data["screenshot_base64"] = screenshot_base64 + data["screenshot_format"] = screenshot_format + + self.emit("snapshot", data=data, step_id=step_id) + except Exception: + # Best-effort: don't let trace emission errors break the caller + pass + def set_final_status(self, status: str) -> None: """ Set the final status of the trace run. diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py index d96e142..c0d2ef8 100644 --- a/tests/test_agent_runtime.py +++ b/tests/test_agent_runtime.py @@ -75,6 +75,8 @@ class MockTracer: def __init__(self) -> None: self.events: list[dict] = [] + self.emit_step_start_called: bool = False + self.emit_step_start_args: dict = {} def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None: self.events.append( @@ -85,6 +87,23 @@ def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None: } ) + def emit_step_start( + self, + step_id: str, + step_index: int, + goal: str, + attempt: int = 0, + pre_url: str | None = None, + ) -> None: + self.emit_step_start_called = True + self.emit_step_start_args = { + "step_id": step_id, + "step_index": step_index, + "goal": goal, + "attempt": attempt, + "pre_url": pre_url, + } + class TestAgentRuntimeInit: """Tests for AgentRuntime initialization.""" @@ -277,6 +296,31 @@ def test_begin_step_clears_assertions(self) -> None: assert runtime._assertions_this_step == [] + def test_begin_step_emits_step_start_event(self) -> None: + """Test begin_step emits step_start trace event by default.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + runtime.begin_step(goal="Test step", step_index=1) + + # Check that emit_step_start was called + assert tracer.emit_step_start_called is True + assert tracer.emit_step_start_args["step_id"] == "step-1" + assert tracer.emit_step_start_args["step_index"] == 1 + assert tracer.emit_step_start_args["goal"] == "Test step" + + def test_begin_step_emit_trace_false(self) -> None: + """Test begin_step with emit_trace=False skips trace event.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + runtime.begin_step(goal="Test step", step_index=1, emit_trace=False) + + # Check that emit_step_start was NOT called + assert tracer.emit_step_start_called is False + class TestAgentRuntimeAssertions: """Tests for assertion methods.""" diff --git a/tests/test_cloud_tracing.py b/tests/test_cloud_tracing.py index 5b343cb..8685499 100644 --- a/tests/test_cloud_tracing.py +++ b/tests/test_cloud_tracing.py @@ -754,6 +754,87 @@ def test_create_tracer_orphaned_trace_recovery(self, capsys): os.remove(orphaned_path) +class TestCreateTracerAutoEmitRunStart: + """Tests for create_tracer auto_emit_run_start functionality.""" + + def test_create_tracer_auto_emits_run_start_by_default(self): + """Test create_tracer automatically emits run_start event.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Use local tracing (no API key) for simplicity + with patch("sentience.tracer_factory.Path") as mock_path_cls: + # Make traces dir point to temp dir + mock_path_cls.return_value.mkdir = Mock() + mock_path_cls.return_value.__truediv__ = lambda self, x: Path(tmpdir) / x + + tracer = create_tracer( + run_id="test-run", + agent_type="TestAgent", + llm_model="gpt-4o", + goal="Test goal", + start_url="https://example.com", + ) + + # Close and read the trace file + tracer.close() + + # Find the trace file + trace_files = list(Path(tmpdir).glob("*.jsonl")) + if not trace_files: + # Check traces subdir + traces_dir = Path("traces") + if traces_dir.exists(): + trace_files = list(traces_dir.glob("test-run.jsonl")) + + # The tracer should have emitted at least one event (run_start) + assert tracer.seq >= 1, "run_start should be auto-emitted" + + def test_create_tracer_auto_emit_disabled(self): + """Test create_tracer with auto_emit_run_start=False.""" + tracer = create_tracer( + run_id="test-run-no-emit", + agent_type="TestAgent", + auto_emit_run_start=False, + ) + + # With auto-emit disabled, seq should still be 0 + assert tracer.seq == 0, "run_start should NOT be emitted when auto_emit_run_start=False" + + tracer.close() + + def test_create_tracer_auto_emit_with_metadata(self): + """Test create_tracer auto-emits run_start with correct metadata.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "test-run.jsonl" + sink = JsonlTraceSink(str(trace_path)) + + # Directly test the helper function behavior + tracer = Tracer(run_id="test-run", sink=sink) + + # Import and call the helper directly + from sentience.tracer_factory import _emit_run_start + + _emit_run_start( + tracer, + agent_type="CustomAgent", + llm_model="claude-3", + goal="Test goal", + start_url="https://test.com", + ) + + tracer.close() + + # Read and verify the emitted event + lines = trace_path.read_text().strip().split("\n") + assert len(lines) == 1 + + event = json.loads(lines[0]) + assert event["type"] == "run_start" + assert event["data"]["agent"] == "CustomAgent" + assert event["data"]["llm_model"] == "claude-3" + assert event["data"]["config"]["goal"] == "Test goal" + assert event["data"]["config"]["start_url"] == "https://test.com" + + class TestRegressionTests: """Regression tests to ensure cloud tracing doesn't break existing functionality.""" diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 7a3c254..0f2f8d6 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -568,3 +568,146 @@ def test_tracer_context_manager(): # Verify file is closed and flushed lines = trace_path.read_text().strip().split("\n") assert len(lines) == 1 + + +# ============================================================================ +# Tests for emit_snapshot() helper method +# ============================================================================ + + +class MockSnapshot: + """Mock snapshot object for testing emit_snapshot().""" + + def __init__( + self, + url: str = "https://example.com", + screenshot: str | None = None, + timestamp: str = "2024-01-01T00:00:00.000Z", + ): + self.url = url + self.screenshot = screenshot + self.timestamp = timestamp + self.elements = [] + + +def test_tracer_emit_snapshot_basic(): + """Test Tracer.emit_snapshot() emits snapshot event.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.jsonl" + + with JsonlTraceSink(trace_path) as sink: + tracer = Tracer(run_id="test-run-123", sink=sink) + snapshot = MockSnapshot(url="https://example.com") + tracer.emit_snapshot( + snapshot=snapshot, + step_id="step-456", + step_index=1, + ) + + lines = trace_path.read_text().strip().split("\n") + event = json.loads(lines[0]) + + assert event["type"] == "snapshot" + assert event["step_id"] == "step-456" + assert event["data"]["url"] == "https://example.com" + assert event["data"]["step_index"] == 1 + assert event["data"]["element_count"] == 0 + + +def test_tracer_emit_snapshot_with_screenshot(): + """Test Tracer.emit_snapshot() includes screenshot_base64.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.jsonl" + + with JsonlTraceSink(trace_path) as sink: + tracer = Tracer(run_id="test-run-123", sink=sink) + # Raw base64 string (no data URL prefix) + snapshot = MockSnapshot( + url="https://example.com", + screenshot="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ", + ) + tracer.emit_snapshot(snapshot=snapshot, step_id="step-456") + + lines = trace_path.read_text().strip().split("\n") + event = json.loads(lines[0]) + + assert event["type"] == "snapshot" + assert event["data"]["screenshot_base64"] == "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ" + assert event["data"]["screenshot_format"] == "jpeg" + + +def test_tracer_emit_snapshot_with_data_url(): + """Test Tracer.emit_snapshot() extracts base64 from data URL.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.jsonl" + + with JsonlTraceSink(trace_path) as sink: + tracer = Tracer(run_id="test-run-123", sink=sink) + # Data URL format (common from browser screenshot APIs) + snapshot = MockSnapshot( + url="https://example.com", + screenshot="data:image/jpeg;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ", + ) + tracer.emit_snapshot(snapshot=snapshot, step_id="step-456") + + lines = trace_path.read_text().strip().split("\n") + event = json.loads(lines[0]) + + # Should extract just the base64 part (strip data URL prefix) + assert event["data"]["screenshot_base64"] == "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ" + assert event["data"]["screenshot_format"] == "jpeg" + + +def test_tracer_emit_snapshot_without_step_id(): + """Test Tracer.emit_snapshot() works without step_id.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.jsonl" + + with JsonlTraceSink(trace_path) as sink: + tracer = Tracer(run_id="test-run-123", sink=sink) + snapshot = MockSnapshot(url="https://example.com") + tracer.emit_snapshot(snapshot=snapshot) + + lines = trace_path.read_text().strip().split("\n") + event = json.loads(lines[0]) + + assert event["type"] == "snapshot" + assert "step_id" not in event + assert event["data"]["url"] == "https://example.com" + + +def test_tracer_emit_snapshot_none_snapshot(): + """Test Tracer.emit_snapshot() handles None snapshot gracefully.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.jsonl" + + with JsonlTraceSink(trace_path) as sink: + tracer = Tracer(run_id="test-run-123", sink=sink) + tracer.emit_snapshot(snapshot=None) + + # Should not emit anything for None snapshot + content = trace_path.read_text().strip() + assert content == "" + + +def test_tracer_emit_snapshot_custom_format(): + """Test Tracer.emit_snapshot() with custom screenshot format.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.jsonl" + + with JsonlTraceSink(trace_path) as sink: + tracer = Tracer(run_id="test-run-123", sink=sink) + snapshot = MockSnapshot( + url="https://example.com", + screenshot="iVBORw0KGgo...", + ) + tracer.emit_snapshot( + snapshot=snapshot, + step_id="step-456", + screenshot_format="png", + ) + + lines = trace_path.read_text().strip().split("\n") + event = json.loads(lines[0]) + + assert event["data"]["screenshot_format"] == "png"