Merge pull request #202 from SentienceAPI/tighten

rcholic · web-flow · commit fdfc82017eb8 · 2026-01-29T19:00:31.000-08:00
tighten debugger lifecycle ergonomics
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
@@ -782,6 +782,14 @@ async def emit_step_end(
         self.tracer.emit("step_end", step_end_data, step_id=self.step_id)
         return step_end_data
 
+    async def end_step(self, **kwargs: Any) -> dict[str, Any]:
+        """
+        User-friendly alias for emit_step_end().
+
+        This keeps step lifecycle naming symmetric with begin_step().
+        """
+        return await self.emit_step_end(**kwargs)
+
     async def _capture_artifact_frame(self) -> None:
         if not self._artifact_buffer:
             return
diff --git a/sentience/debugger.py b/sentience/debugger.py
@@ -22,9 +22,10 @@ class SentienceDebugger:
     Verifier-only sidecar wrapper around AgentRuntime.
     """
 
-    def __init__(self, runtime: AgentRuntime) -> None:
+    def __init__(self, runtime: AgentRuntime, *, auto_step: bool = True) -> None:
         self.runtime = runtime
         self._step_open = False
+        self._auto_step = bool(auto_step)
 
     @classmethod
     def attach(
@@ -65,5 +66,9 @@ async def snapshot(self, **kwargs: Any):
 
     def check(self, predicate, label: str, required: bool = False):
         if not self._step_open:
+            if not self._auto_step:
+                raise RuntimeError(
+                    f"No active step. Call dbg.begin_step(...) or use 'async with dbg.step(...)' before check(label={label!r})."
+                )
             self.begin_step(f"verify:{label}")
         return self.runtime.check(predicate, label, required=required)
diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py
@@ -755,6 +755,21 @@ async def test_snapshot_with_legacy_browser(self) -> None:
         assert result is mock_snapshot
         assert runtime.last_snapshot is mock_snapshot
 
+
+class TestAgentRuntimeEndStep:
+    @pytest.mark.asyncio
+    async def test_end_step_aliases_emit_step_end(self) -> None:
+        backend = MockBackend()
+        tracer = MockTracer()
+        runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+        with patch.object(runtime, "emit_step_end", new_callable=AsyncMock) as emit_mock:
+            emit_mock.return_value = {"ok": True}
+            out = await runtime.end_step(action="noop")
+
+        emit_mock.assert_awaited_once_with(action="noop")
+        assert out == {"ok": True}
+
     @pytest.mark.asyncio
     async def test_snapshot_with_backend(self) -> None:
         """Test snapshot uses backend-agnostic snapshot."""
diff --git a/tests/test_debugger.py b/tests/test_debugger.py
@@ -69,3 +69,14 @@ def test_check_auto_opens_step_when_missing() -> None:
     runtime.begin_step.assert_called_once_with("verify:has_cart", step_index=None)
     runtime.check.assert_called_once_with(predicate, "has_cart", required=True)
     assert handle == "check-handle"
+
+
+def test_check_strict_mode_requires_explicit_step() -> None:
+    runtime = MockRuntime()
+
+    from sentience.debugger import SentienceDebugger
+
+    debugger = SentienceDebugger(runtime=runtime, auto_step=False)
+
+    with pytest.raises(RuntimeError, match="No active step"):
+        debugger.check(predicate=MagicMock(), label="has_cart", required=True)