Merge pull request #204 from SentienceAPI/tighten2

rcholic · web-flow · commit 41cd3f5a6857 · 2026-01-29T20:00:32.000-08:00
updated debugger doc with reacordAction, autoStop, stepId 0-based
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
@@ -154,7 +154,8 @@ def __init__(
 
         # Step tracking
         self.step_id: str | None = None
-        self.step_index: int = 0
+        # 0-based step indexing (first auto-generated step_id is "step-0")
+        self.step_index: int = -1
 
         # Snapshot state
         self.last_snapshot: Snapshot | None = None
diff --git a/sentience/debugger.py b/sentience/debugger.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import asyncio
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from typing import TYPE_CHECKING, Any
@@ -26,6 +27,20 @@ def __init__(self, runtime: AgentRuntime, *, auto_step: bool = True) -> None:
         self.runtime = runtime
         self._step_open = False
         self._auto_step = bool(auto_step)
+        self._auto_opened_step = False
+        self._auto_opened_step_id: str | None = None
+
+    def _schedule_close_auto_step(self) -> None:
+        """
+        Best-effort: close an auto-opened step without forcing callers to await.
+        """
+        if not (self._step_open and self._auto_opened_step):
+            return
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            return
+        loop.create_task(self.end_step())
 
     @classmethod
     def attach(
@@ -46,15 +61,25 @@ def attach(
         return cls(runtime=runtime)
 
     def begin_step(self, goal: str, step_index: int | None = None) -> str:
+        # If we previously auto-opened a verification step, close it before starting a real step.
+        if self._step_open and self._auto_opened_step:
+            self._schedule_close_auto_step()
+            self._auto_opened_step = False
+            self._auto_opened_step_id = None
         self._step_open = True
         return self.runtime.begin_step(goal, step_index=step_index)
 
     async def end_step(self, **kwargs: Any) -> dict[str, Any]:
         self._step_open = False
+        self._auto_opened_step = False
+        self._auto_opened_step_id = None
         return await self.runtime.emit_step_end(**kwargs)
 
     @asynccontextmanager
     async def step(self, goal: str, step_index: int | None = None) -> AsyncIterator[None]:
+        # Async form can safely close any auto-opened step before starting.
+        if self._step_open and self._auto_opened_step:
+            await self.end_step()
         self.begin_step(goal, step_index=step_index)
         try:
             yield
@@ -64,11 +89,61 @@ async def step(self, goal: str, step_index: int | None = None) -> AsyncIterator[
     async def snapshot(self, **kwargs: Any):
         return await self.runtime.snapshot(**kwargs)
 
+    async def record_action(self, action: str, *, url: str | None = None) -> None:
+        """
+        Sidecar helper: let the host framework report the action it performed.
+
+        This improves trace readability and (when artifacts are enabled) enriches the action timeline.
+        """
+        await self.runtime.record_action(action, url=url)
+
     def check(self, predicate, label: str, required: bool = False):
         if not self._step_open:
             if not self._auto_step:
                 raise RuntimeError(
                     f"No active step. Call dbg.begin_step(...) or use 'async with dbg.step(...)' before check(label={label!r})."
                 )
             self.begin_step(f"verify:{label}")
-        return self.runtime.check(predicate, label, required=required)
+            self._auto_opened_step = True
+            self._auto_opened_step_id = getattr(self.runtime, "step_id", None)
+
+        base = self.runtime.check(predicate, label, required=required)
+
+        # Auto-close auto-opened verification steps after the check completes.
+        if not self._auto_opened_step:
+            return base
+
+        dbg = self
+        opened_step_id = self._auto_opened_step_id
+
+        class _AutoCloseAssertionHandle:
+            def __init__(self, inner):
+                self._inner = inner
+
+            def once(self) -> bool:
+                ok = self._inner.once()
+                if (
+                    dbg._step_open
+                    and dbg._auto_opened_step
+                    and (
+                        opened_step_id is None
+                        or getattr(dbg.runtime, "step_id", None) == opened_step_id
+                    )
+                ):
+                    dbg._schedule_close_auto_step()
+                return ok
+
+            async def eventually(self, **kwargs: Any) -> bool:
+                ok = await self._inner.eventually(**kwargs)
+                if (
+                    dbg._step_open
+                    and dbg._auto_opened_step
+                    and (
+                        opened_step_id is None
+                        or getattr(dbg.runtime, "step_id", None) == opened_step_id
+                    )
+                ):
+                    await dbg.end_step()
+                return ok
+
+        return _AutoCloseAssertionHandle(base)
diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py
@@ -99,7 +99,8 @@ def test_init_with_backend(self) -> None:
         assert runtime.backend is backend
         assert runtime.tracer is tracer
         assert runtime.step_id is None
-        assert runtime.step_index == 0
+        # 0-based step ids: first begin_step() will produce "step-0"
+        assert runtime.step_index == -1
         assert runtime.last_snapshot is None
         assert runtime.is_task_done is False
 
@@ -221,7 +222,7 @@ def test_begin_step_generates_step_id(self) -> None:
         step_id = runtime.begin_step(goal="Test step")
 
         assert step_id is not None
-        assert step_id == "step-1"  # First step should be step-1
+        assert step_id == "step-0"  # First step should be step-0
 
     def test_begin_step_id_matches_index(self) -> None:
         """Test step_id format matches step_index for Studio compatibility."""
@@ -230,12 +231,12 @@ def test_begin_step_id_matches_index(self) -> None:
         runtime = AgentRuntime(backend=backend, tracer=tracer)
 
         step_id_1 = runtime.begin_step(goal="Step 1")
-        assert step_id_1 == "step-1"
-        assert runtime.step_index == 1
+        assert step_id_1 == "step-0"
+        assert runtime.step_index == 0
 
         step_id_2 = runtime.begin_step(goal="Step 2")
-        assert step_id_2 == "step-2"
-        assert runtime.step_index == 2
+        assert step_id_2 == "step-1"
+        assert runtime.step_index == 1
 
         # With explicit index
         step_id_10 = runtime.begin_step(goal="Step 10", step_index=10)
@@ -249,10 +250,10 @@ def test_begin_step_increments_index(self) -> None:
         runtime = AgentRuntime(backend=backend, tracer=tracer)
 
         runtime.begin_step(goal="Step 1")
-        assert runtime.step_index == 1
+        assert runtime.step_index == 0
 
         runtime.begin_step(goal="Step 2")
-        assert runtime.step_index == 2
+        assert runtime.step_index == 1
 
     def test_begin_step_explicit_index(self) -> None:
         """Test begin_step with explicit step_index."""
diff --git a/tests/test_debugger.py b/tests/test_debugger.py
@@ -68,7 +68,8 @@ def test_check_auto_opens_step_when_missing() -> None:
 
     runtime.begin_step.assert_called_once_with("verify:has_cart", step_index=None)
     runtime.check.assert_called_once_with(predicate, "has_cart", required=True)
-    assert handle == "check-handle"
+    assert hasattr(handle, "once")
+    assert hasattr(handle, "eventually")
 
 
 def test_check_strict_mode_requires_explicit_step() -> None: