Skip to content

Commit e8524e6

Browse files
committed
feat: include complete turn response in Agent.create_turn
Summary: In #102, we made a turn's behavior more complete by automatically passing back the tool response and create another turn when client tool is used. However, this creates a problem with the non-streaming API where the response object only contains information since the last tool call. This PR is a hacky attemp to address this, by combining the Turn responses into one. I think ideally we should move all the loop logic to only be on the server side, where a turn would pause and the client SDK would pass tool reponses back to resume a turn. Test Plan: Run a simple script with Agent and client tool. Observe the returned response has steps from both created turns. Turn( │ input_messages=[ │ │ UserMessage( │ │ │ content='load https://llama-stack.readthedocs.io/en/latest/introduction/index.html and summarize it', │ │ │ role='user', │ │ │ context=None │ │ ) │ ], │ output_message=CompletionMessage( │ │ content="The document from the given URL is about Google releasing the source code to PebbleOS, which is a significant development for Rebble. This allows Rebble to accelerate its efforts to produce new hardware. Rebble had been working on its own replacement firmware, RebbleOS, but the release of PebbleOS's source code will help Rebble to build a production-ready real-time OS for the Pebble.", │ │ role='assistant', │ │ stop_reason='end_of_turn', │ │ tool_calls=[] │ ), │ session_id='dec1c6c0-ed9b-42c1-97d7-906871acd5ba', │ started_at=datetime.datetime(2025, 2, 12, 16, 38, 14, 643186), │ steps=[ │ │ InferenceStep( │ │ │ api_model_response=CompletionMessage( │ │ │ │ content='', │ │ │ │ role='assistant', │ │ │ │ stop_reason='end_of_turn', │ │ │ │ tool_calls=[ │ │ │ │ │ ToolCall( │ │ │ │ │ │ arguments={'url': 'https://llama-stack.readthedocs.io/en/latest/introduction/index.html'}, │ │ │ │ │ │ call_id='5d09151b-8a53-4292-be8d-f21e134d5142', │ │ │ │ │ │ tool_name='load_url' │ │ │ │ │ ) │ │ │ │ ] │ │ │ ), │ │ │ step_id='d724a238-d02b-4d77-a4bc-a978a54979c6', │ │ │ step_type='inference', │ │ │ turn_id='0496c654-cd02-48bb-a2ab-d1a0a5e91aba', │ │ │ completed_at=datetime.datetime(2025, 2, 12, 16, 38, 15, 523310), │ │ │ started_at=datetime.datetime(2025, 2, 12, 16, 38, 14, 654535) │ │ ), │ │ ToolExecutionStep( │ │ │ step_id='49f19a5e-6a1e-4b1c-9232-fbafb82f2f89', │ │ │ step_type='tool_execution', │ │ │ tool_calls=[ │ │ │ │ ToolCall( │ │ │ │ │ arguments={'url': 'https://llama-stack.readthedocs.io/en/latest/introduction/index.html'}, │ │ │ │ │ call_id='5d09151b-8a53-4292-be8d-f21e134d5142', │ │ │ │ │ tool_name='load_url' │ │ │ │ ) │ │ │ ], │ │ │ tool_responses=[ │ │ │ │ ToolResponse( │ │ │ │ │ call_id='5d09151b-8a53-4292-be8d-f21e134d5142', │ │ │ │ │ content='{"content": "\nToday Google announced that they have released the source code to PebbleOS. This is massive for Rebble, and will accelerate our efforts to produce new hardware.\n\nPreviously, we have been working on our own replacement firmware: RebbleOS. As you can see by the commit history though, progress was slow. Building a production-ready realtime OS for the Pebble is no small feat, and although we were confident we’d get there given enough time, it was never our ideal path. Thanks to the hard work of many people both within Google and not, we finally have our hands on the original source code for PebbleOS. You can read Google’s blog post on this for even more information.\n\nThis does not mean we instantly have the ability to start developing updates for PebbleOS though, we first will need to spend some concentrated time getting it to build. But before we talk about that, let’s talk about Rebble itself.\n"}', │ │ │ │ │ tool_name='load_url' │ │ │ │ ) │ │ │ ], │ │ │ turn_id='0496c654-cd02-48bb-a2ab-d1a0a5e91aba', │ │ │ completed_at=datetime.datetime(2025, 2, 12, 16, 38, 15, 534830), │ │ │ started_at=datetime.datetime(2025, 2, 12, 16, 38, 15, 534756) │ │ ), │ │ InferenceStep( │ │ │ api_model_response=CompletionMessage( │ │ │ │ content="The document from the given URL is about Google releasing the source code to PebbleOS, which is a significant development for Rebble. This allows Rebble to accelerate its efforts to produce new hardware. Rebble had been working on its own replacement firmware, RebbleOS, but the release of PebbleOS's source code will help Rebble to build a production-ready real-time OS for the Pebble.", │ │ │ │ role='assistant', │ │ │ │ stop_reason='end_of_turn', │ │ │ │ tool_calls=[] │ │ │ ), │ │ │ step_id='5e6daa91-e689-4d7a-a7f9-d7c3da2eca5a', │ │ │ step_type='inference', │ │ │ turn_id='8f65d88d-7643-4dd7-acc7-48cd9e8aa449', │ │ │ completed_at=datetime.datetime(2025, 2, 12, 16, 38, 16, 179107), │ │ │ started_at=datetime.datetime(2025, 2, 12, 16, 38, 15, 561449) │ │ ) │ ], │ turn_id='0496c654-cd02-48bb-a2ab-d1a0a5e91aba', │ completed_at=datetime.datetime(2025, 2, 12, 16, 38, 16, 191199), │ output_attachments=[] ) ```
1 parent b5dce10 commit e8524e6

File tree

1 file changed

+45
-9
lines changed
  • src/llama_stack_client/lib/agents

1 file changed

+45
-9
lines changed

src/llama_stack_client/lib/agents/agent.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
from llama_stack_client.types.agents.turn import CompletionMessage
1616
from .client_tool import ClientTool
1717
from .tool_parser import ToolParser
18+
from datetime import datetime
19+
import uuid
20+
from llama_stack_client.types.tool_execution_step import ToolExecutionStep
21+
from llama_stack_client.types.tool_response import ToolResponse
1822

1923
DEFAULT_MAX_ITER = 10
2024

@@ -119,24 +123,36 @@ def create_turn(
119123
stream: bool = True,
120124
) -> Iterator[AgentTurnResponseStreamChunk] | Turn:
121125
if stream:
122-
return self._create_turn_streaming(messages, session_id, toolgroups, documents, stream)
126+
return self._create_turn_streaming(messages, session_id, toolgroups, documents)
123127
else:
124-
chunk = None
125-
for chunk in self._create_turn_streaming(messages, session_id, toolgroups, documents, stream):
128+
chunks = []
129+
for chunk in self._create_turn_streaming(messages, session_id, toolgroups, documents):
130+
if chunk.event.payload.event_type == "turn_complete":
131+
chunks.append(chunk)
126132
pass
127-
if not chunk:
128-
raise Exception("No chunk returned")
129-
if chunk.event.payload.event_type != "turn_complete":
133+
if not chunks:
130134
raise Exception("Turn did not complete")
131-
return chunk.event.payload.turn
135+
136+
# merge chunks
137+
return Turn(
138+
input_messages=chunks[0].event.payload.turn.input_messages,
139+
output_message=chunks[-1].event.payload.turn.output_message,
140+
session_id=chunks[0].event.payload.turn.session_id,
141+
steps=[step for chunk in chunks for step in chunk.event.payload.turn.steps],
142+
turn_id=chunks[0].event.payload.turn.turn_id,
143+
started_at=chunks[0].event.payload.turn.started_at,
144+
completed_at=chunks[-1].event.payload.turn.completed_at,
145+
output_attachments=[
146+
attachment for chunk in chunks for attachment in chunk.event.payload.turn.output_attachments
147+
],
148+
)
132149

133150
def _create_turn_streaming(
134151
self,
135152
messages: List[Union[UserMessage, ToolResponseMessage]],
136153
session_id: Optional[str] = None,
137154
toolgroups: Optional[List[Toolgroup]] = None,
138155
documents: Optional[List[Document]] = None,
139-
stream: bool = True,
140156
) -> Iterator[AgentTurnResponseStreamChunk]:
141157
stop = False
142158
n_iter = 0
@@ -161,8 +177,28 @@ def _create_turn_streaming(
161177
elif not tool_calls:
162178
yield chunk
163179
else:
180+
yield chunk
181+
tool_execution_start_time = datetime.now()
164182
next_message = self._run_tool(tool_calls)
165-
yield next_message
183+
184+
# HACK: append the tool execution step to the turn
185+
chunk.event.payload.turn.steps.append(
186+
ToolExecutionStep(
187+
step_type="tool_execution",
188+
step_id=str(uuid.uuid4()),
189+
tool_calls=tool_calls,
190+
tool_responses=[
191+
ToolResponse(
192+
tool_name=next_message.tool_name,
193+
content=next_message.content,
194+
call_id=next_message.call_id,
195+
)
196+
],
197+
turn_id=chunk.event.payload.turn.turn_id,
198+
completed_at=datetime.now(),
199+
started_at=tool_execution_start_time,
200+
)
201+
)
166202

167203
# continue the turn when there's a tool call
168204
stop = False

0 commit comments

Comments
 (0)