Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ def __init__(

# Step tracking
self.step_id: str | None = None
self.step_index: int = 0
# 0-based step indexing (first auto-generated step_id is "step-0")
self.step_index: int = -1

# Snapshot state
self.last_snapshot: Snapshot | None = None
Expand Down
77 changes: 76 additions & 1 deletion sentience/debugger.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import asyncio
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from typing import TYPE_CHECKING, Any
Expand All @@ -26,6 +27,20 @@ def __init__(self, runtime: AgentRuntime, *, auto_step: bool = True) -> None:
self.runtime = runtime
self._step_open = False
self._auto_step = bool(auto_step)
self._auto_opened_step = False
self._auto_opened_step_id: str | None = None

def _schedule_close_auto_step(self) -> None:
"""
Best-effort: close an auto-opened step without forcing callers to await.
"""
if not (self._step_open and self._auto_opened_step):
return
try:
loop = asyncio.get_running_loop()
except RuntimeError:
return
loop.create_task(self.end_step())

@classmethod
def attach(
Expand All @@ -46,15 +61,25 @@ def attach(
return cls(runtime=runtime)

def begin_step(self, goal: str, step_index: int | None = None) -> str:
# If we previously auto-opened a verification step, close it before starting a real step.
if self._step_open and self._auto_opened_step:
self._schedule_close_auto_step()
self._auto_opened_step = False
self._auto_opened_step_id = None
self._step_open = True
return self.runtime.begin_step(goal, step_index=step_index)

async def end_step(self, **kwargs: Any) -> dict[str, Any]:
self._step_open = False
self._auto_opened_step = False
self._auto_opened_step_id = None
return await self.runtime.emit_step_end(**kwargs)

@asynccontextmanager
async def step(self, goal: str, step_index: int | None = None) -> AsyncIterator[None]:
# Async form can safely close any auto-opened step before starting.
if self._step_open and self._auto_opened_step:
await self.end_step()
self.begin_step(goal, step_index=step_index)
try:
yield
Expand All @@ -64,11 +89,61 @@ async def step(self, goal: str, step_index: int | None = None) -> AsyncIterator[
async def snapshot(self, **kwargs: Any):
return await self.runtime.snapshot(**kwargs)

async def record_action(self, action: str, *, url: str | None = None) -> None:
"""
Sidecar helper: let the host framework report the action it performed.

This improves trace readability and (when artifacts are enabled) enriches the action timeline.
"""
await self.runtime.record_action(action, url=url)

def check(self, predicate, label: str, required: bool = False):
if not self._step_open:
if not self._auto_step:
raise RuntimeError(
f"No active step. Call dbg.begin_step(...) or use 'async with dbg.step(...)' before check(label={label!r})."
)
self.begin_step(f"verify:{label}")
return self.runtime.check(predicate, label, required=required)
self._auto_opened_step = True
self._auto_opened_step_id = getattr(self.runtime, "step_id", None)

base = self.runtime.check(predicate, label, required=required)

# Auto-close auto-opened verification steps after the check completes.
if not self._auto_opened_step:
return base

dbg = self
opened_step_id = self._auto_opened_step_id

class _AutoCloseAssertionHandle:
def __init__(self, inner):
self._inner = inner

def once(self) -> bool:
ok = self._inner.once()
if (
dbg._step_open
and dbg._auto_opened_step
and (
opened_step_id is None
or getattr(dbg.runtime, "step_id", None) == opened_step_id
)
):
dbg._schedule_close_auto_step()
return ok

async def eventually(self, **kwargs: Any) -> bool:
ok = await self._inner.eventually(**kwargs)
if (
dbg._step_open
and dbg._auto_opened_step
and (
opened_step_id is None
or getattr(dbg.runtime, "step_id", None) == opened_step_id
)
):
await dbg.end_step()
return ok

return _AutoCloseAssertionHandle(base)
17 changes: 9 additions & 8 deletions tests/test_agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def test_init_with_backend(self) -> None:
assert runtime.backend is backend
assert runtime.tracer is tracer
assert runtime.step_id is None
assert runtime.step_index == 0
# 0-based step ids: first begin_step() will produce "step-0"
assert runtime.step_index == -1
assert runtime.last_snapshot is None
assert runtime.is_task_done is False

Expand Down Expand Up @@ -221,7 +222,7 @@ def test_begin_step_generates_step_id(self) -> None:
step_id = runtime.begin_step(goal="Test step")

assert step_id is not None
assert step_id == "step-1" # First step should be step-1
assert step_id == "step-0" # First step should be step-0

def test_begin_step_id_matches_index(self) -> None:
"""Test step_id format matches step_index for Studio compatibility."""
Expand All @@ -230,12 +231,12 @@ def test_begin_step_id_matches_index(self) -> None:
runtime = AgentRuntime(backend=backend, tracer=tracer)

step_id_1 = runtime.begin_step(goal="Step 1")
assert step_id_1 == "step-1"
assert runtime.step_index == 1
assert step_id_1 == "step-0"
assert runtime.step_index == 0

step_id_2 = runtime.begin_step(goal="Step 2")
assert step_id_2 == "step-2"
assert runtime.step_index == 2
assert step_id_2 == "step-1"
assert runtime.step_index == 1

# With explicit index
step_id_10 = runtime.begin_step(goal="Step 10", step_index=10)
Expand All @@ -249,10 +250,10 @@ def test_begin_step_increments_index(self) -> None:
runtime = AgentRuntime(backend=backend, tracer=tracer)

runtime.begin_step(goal="Step 1")
assert runtime.step_index == 1
assert runtime.step_index == 0

runtime.begin_step(goal="Step 2")
assert runtime.step_index == 2
assert runtime.step_index == 1

def test_begin_step_explicit_index(self) -> None:
"""Test begin_step with explicit step_index."""
Expand Down
3 changes: 2 additions & 1 deletion tests/test_debugger.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def test_check_auto_opens_step_when_missing() -> None:

runtime.begin_step.assert_called_once_with("verify:has_cart", step_index=None)
runtime.check.assert_called_once_with(predicate, "has_cart", required=True)
assert handle == "check-handle"
assert hasattr(handle, "once")
assert hasattr(handle, "eventually")


def test_check_strict_mode_requires_explicit_step() -> None:
Expand Down
Loading