From 954c98ed3e0978c32407b5fa2bb57799b0bff67f Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 29 Jan 2026 19:37:30 -0800 Subject: [PATCH 1/2] updated debugger doc with reacordAction, autoStop, stepId 0-based --- sentience/agent_runtime.py | 3 +- sentience/debugger.py | 77 ++++++++++++++++++++++++++++++++++++- tests/test_agent_runtime.py | 14 +++---- tests/test_debugger.py | 3 +- 4 files changed, 87 insertions(+), 10 deletions(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 56e3f0d..b64fb12 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -154,7 +154,8 @@ def __init__( # Step tracking self.step_id: str | None = None - self.step_index: int = 0 + # 0-based step indexing (first auto-generated step_id is "step-0") + self.step_index: int = -1 # Snapshot state self.last_snapshot: Snapshot | None = None diff --git a/sentience/debugger.py b/sentience/debugger.py index de2f3d6..1b8321c 100644 --- a/sentience/debugger.py +++ b/sentience/debugger.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio from collections.abc import AsyncIterator from contextlib import asynccontextmanager from typing import TYPE_CHECKING, Any @@ -26,6 +27,20 @@ def __init__(self, runtime: AgentRuntime, *, auto_step: bool = True) -> None: self.runtime = runtime self._step_open = False self._auto_step = bool(auto_step) + self._auto_opened_step = False + self._auto_opened_step_id: str | None = None + + def _schedule_close_auto_step(self) -> None: + """ + Best-effort: close an auto-opened step without forcing callers to await. + """ + if not (self._step_open and self._auto_opened_step): + return + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return + loop.create_task(self.end_step()) @classmethod def attach( @@ -46,15 +61,25 @@ def attach( return cls(runtime=runtime) def begin_step(self, goal: str, step_index: int | None = None) -> str: + # If we previously auto-opened a verification step, close it before starting a real step. + if self._step_open and self._auto_opened_step: + self._schedule_close_auto_step() + self._auto_opened_step = False + self._auto_opened_step_id = None self._step_open = True return self.runtime.begin_step(goal, step_index=step_index) async def end_step(self, **kwargs: Any) -> dict[str, Any]: self._step_open = False + self._auto_opened_step = False + self._auto_opened_step_id = None return await self.runtime.emit_step_end(**kwargs) @asynccontextmanager async def step(self, goal: str, step_index: int | None = None) -> AsyncIterator[None]: + # Async form can safely close any auto-opened step before starting. + if self._step_open and self._auto_opened_step: + await self.end_step() self.begin_step(goal, step_index=step_index) try: yield @@ -64,6 +89,14 @@ async def step(self, goal: str, step_index: int | None = None) -> AsyncIterator[ async def snapshot(self, **kwargs: Any): return await self.runtime.snapshot(**kwargs) + async def record_action(self, action: str, *, url: str | None = None) -> None: + """ + Sidecar helper: let the host framework report the action it performed. + + This improves trace readability and (when artifacts are enabled) enriches the action timeline. + """ + await self.runtime.record_action(action, url=url) + def check(self, predicate, label: str, required: bool = False): if not self._step_open: if not self._auto_step: @@ -71,4 +104,46 @@ def check(self, predicate, label: str, required: bool = False): f"No active step. Call dbg.begin_step(...) or use 'async with dbg.step(...)' before check(label={label!r})." ) self.begin_step(f"verify:{label}") - return self.runtime.check(predicate, label, required=required) + self._auto_opened_step = True + self._auto_opened_step_id = getattr(self.runtime, "step_id", None) + + base = self.runtime.check(predicate, label, required=required) + + # Auto-close auto-opened verification steps after the check completes. + if not self._auto_opened_step: + return base + + dbg = self + opened_step_id = self._auto_opened_step_id + + class _AutoCloseAssertionHandle: + def __init__(self, inner): + self._inner = inner + + def once(self) -> bool: + ok = self._inner.once() + if ( + dbg._step_open + and dbg._auto_opened_step + and ( + opened_step_id is None + or getattr(dbg.runtime, "step_id", None) == opened_step_id + ) + ): + dbg._schedule_close_auto_step() + return ok + + async def eventually(self, **kwargs: Any) -> bool: + ok = await self._inner.eventually(**kwargs) + if ( + dbg._step_open + and dbg._auto_opened_step + and ( + opened_step_id is None + or getattr(dbg.runtime, "step_id", None) == opened_step_id + ) + ): + await dbg.end_step() + return ok + + return _AutoCloseAssertionHandle(base) diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py index 3d28184..7ca3927 100644 --- a/tests/test_agent_runtime.py +++ b/tests/test_agent_runtime.py @@ -221,7 +221,7 @@ def test_begin_step_generates_step_id(self) -> None: step_id = runtime.begin_step(goal="Test step") assert step_id is not None - assert step_id == "step-1" # First step should be step-1 + assert step_id == "step-0" # First step should be step-0 def test_begin_step_id_matches_index(self) -> None: """Test step_id format matches step_index for Studio compatibility.""" @@ -230,12 +230,12 @@ def test_begin_step_id_matches_index(self) -> None: runtime = AgentRuntime(backend=backend, tracer=tracer) step_id_1 = runtime.begin_step(goal="Step 1") - assert step_id_1 == "step-1" - assert runtime.step_index == 1 + assert step_id_1 == "step-0" + assert runtime.step_index == 0 step_id_2 = runtime.begin_step(goal="Step 2") - assert step_id_2 == "step-2" - assert runtime.step_index == 2 + assert step_id_2 == "step-1" + assert runtime.step_index == 1 # With explicit index step_id_10 = runtime.begin_step(goal="Step 10", step_index=10) @@ -249,10 +249,10 @@ def test_begin_step_increments_index(self) -> None: runtime = AgentRuntime(backend=backend, tracer=tracer) runtime.begin_step(goal="Step 1") - assert runtime.step_index == 1 + assert runtime.step_index == 0 runtime.begin_step(goal="Step 2") - assert runtime.step_index == 2 + assert runtime.step_index == 1 def test_begin_step_explicit_index(self) -> None: """Test begin_step with explicit step_index.""" diff --git a/tests/test_debugger.py b/tests/test_debugger.py index acca435..8dbe615 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -68,7 +68,8 @@ def test_check_auto_opens_step_when_missing() -> None: runtime.begin_step.assert_called_once_with("verify:has_cart", step_index=None) runtime.check.assert_called_once_with(predicate, "has_cart", required=True) - assert handle == "check-handle" + assert hasattr(handle, "once") + assert hasattr(handle, "eventually") def test_check_strict_mode_requires_explicit_step() -> None: From 4a5026abc4e9abe3f526677007669cb84b804a4a Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 29 Jan 2026 19:47:37 -0800 Subject: [PATCH 2/2] fix tests --- tests/test_agent_runtime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py index 7ca3927..d96e142 100644 --- a/tests/test_agent_runtime.py +++ b/tests/test_agent_runtime.py @@ -99,7 +99,8 @@ def test_init_with_backend(self) -> None: assert runtime.backend is backend assert runtime.tracer is tracer assert runtime.step_id is None - assert runtime.step_index == 0 + # 0-based step ids: first begin_step() will produce "step-0" + assert runtime.step_index == -1 assert runtime.last_snapshot is None assert runtime.is_task_done is False