diff --git a/pyproject.toml b/pyproject.toml index 80376c8..de7fb28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath-runtime" -version = "0.1.2" +version = "0.1.3" description = "Runtime abstractions and interfaces for building agents and automation scripts in the UiPath ecosystem" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/src/uipath/runtime/chat/__init__.py b/src/uipath/runtime/chat/__init__.py new file mode 100644 index 0000000..1720cdc --- /dev/null +++ b/src/uipath/runtime/chat/__init__.py @@ -0,0 +1,135 @@ +"""UiPath Conversation Models. + +This module provides Pydantic models that represent the JSON event schema for conversations between a client (UI) and an LLM/agent. + +The event objects define a hierarchal conversation structure: + +* Conversation + * Exchange + * Message + * Content Parts + * Citations + * Tool Calls + * Tool Results + + A conversation may contain multiple exchanges, and an exchange may contain multiple messages. A message may contain + multiple content parts, each of which can be text or binary, including media input and output streams; and each + content part can include multiple citations. A message may also contain multiple tool calls, which may contain a tool + result. + + The protocol also supports a top level, "async", input media streams (audio and video), which can span multiple + exchanges. These are used for Gemini's automatic turn detection mode, where the LLM determines when the user has + stopped talking and starts producing output. The output forms one or more messages in an exchange with no explicit + input message. However, the LLM may produce an input transcript which can be used to construct the implicit input + message that started the exchange. + + In addition, the protocol also supports "async" tool calls that span multiple exchanges. This can be used with + Gemini's asynchronous function calling protocol, which allows function calls to produce results that interrupt the + conversation when ready, even after multiple exchanges. They also support generating multiple results from a single + tool call. By contrast most tool calls are scoped to a single message, which contains both the call and the single + result produced by that call. + + Not all features supported by the protocol will be supported by all clients and LLMs. The optional top level + `capabilities` property can be used to communicate information about supported features. This property should be set + on the first event written to a new websocket connection. This initial event may or may not contain additional + sub-events. +""" + +from .async_stream import ( + UiPathConversationAsyncInputStreamEndEvent, + UiPathConversationAsyncInputStreamEvent, + UiPathConversationAsyncInputStreamStartEvent, + UiPathConversationInputStreamChunkEvent, +) +from .citation import ( + UiPathConversationCitationEndEvent, + UiPathConversationCitationEvent, + UiPathConversationCitationSource, + UiPathConversationCitationSourceMedia, + UiPathConversationCitationSourceUrl, + UiPathConversationCitationStartEvent, +) +from .content import ( + UiPathConversationContentPart, + UiPathConversationContentPartChunkEvent, + UiPathConversationContentPartEndEvent, + UiPathConversationContentPartEvent, + UiPathConversationContentPartStartEvent, + UiPathExternalValue, + UiPathInlineValue, +) +from .conversation import ( + UiPathConversationCapabilities, + UiPathConversationEndEvent, + UiPathConversationStartedEvent, + UiPathConversationStartEvent, +) +from .event import UiPathConversationEvent +from .exchange import ( + UiPathConversationExchange, + UiPathConversationExchangeEndEvent, + UiPathConversationExchangeEvent, + UiPathConversationExchangeStartEvent, +) +from .message import ( + UiPathConversationMessage, + UiPathConversationMessageEndEvent, + UiPathConversationMessageEvent, + UiPathConversationMessageStartEvent, +) +from .meta import UiPathConversationMetaEvent +from .tool import ( + UiPathConversationToolCall, + UiPathConversationToolCallEndEvent, + UiPathConversationToolCallEvent, + UiPathConversationToolCallResult, + UiPathConversationToolCallStartEvent, +) + +__all__ = [ + # Root + "UiPathConversationEvent", + # Conversation + "UiPathConversationCapabilities", + "UiPathConversationStartEvent", + "UiPathConversationStartedEvent", + "UiPathConversationEndEvent", + # Exchange + "UiPathConversationExchangeStartEvent", + "UiPathConversationExchangeEndEvent", + "UiPathConversationExchangeEvent", + "UiPathConversationExchange", + # Message + "UiPathConversationMessageStartEvent", + "UiPathConversationMessageEndEvent", + "UiPathConversationMessageEvent", + "UiPathConversationMessage", + # Content + "UiPathConversationContentPartChunkEvent", + "UiPathConversationContentPartStartEvent", + "UiPathConversationContentPartEndEvent", + "UiPathConversationContentPartEvent", + "UiPathConversationContentPart", + "UiPathInlineValue", + "UiPathExternalValue", + # Citation + "UiPathConversationCitationStartEvent", + "UiPathConversationCitationEndEvent", + "UiPathConversationCitationEvent", + "UiPathConversationCitationSource", + "UiPathConversationCitationSourceUrl", + "UiPathConversationCitationSourceMedia", + # Tool + "UiPathConversationToolCallStartEvent", + "UiPathConversationToolCallEndEvent", + "UiPathConversationToolCallEvent", + "UiPathConversationToolCallResult", + "UiPathConversationToolCall", + # Async Stream + "UiPathConversationInputStreamChunkEvent", + "UiPathConversationAsyncInputStreamStartEvent", + "UiPathConversationAsyncInputStreamEndEvent", + "UiPathConversationAsyncInputStreamEvent", + # Meta + "UiPathConversationMetaEvent", +] diff --git a/src/uipath/runtime/chat/async_stream.py b/src/uipath/runtime/chat/async_stream.py new file mode 100644 index 0000000..1f43783 --- /dev/null +++ b/src/uipath/runtime/chat/async_stream.py @@ -0,0 +1,52 @@ +"""Async input stream events.""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class UiPathConversationInputStreamChunkEvent(BaseModel): + """Represents a single chunk of input stream data.""" + + input_stream_sequence: int | None = Field(None, alias="inputStreamSequence") + data: str + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationAsyncInputStreamStartEvent(BaseModel): + """Signals the start of an asynchronous input stream.""" + + mime_type: str = Field(..., alias="mimeType") + start_of_speech_sensitivity: str | None = Field( + None, alias="startOfSpeechSensitivity" + ) + end_of_speech_sensitivity: str | None = Field(None, alias="endOfSpeechSensitivity") + prefix_padding_ms: int | None = Field(None, alias="prefixPaddingMs") + silence_duration_ms: int | None = Field(None, alias="silenceDurationMs") + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationAsyncInputStreamEndEvent(BaseModel): + """Signals the end of an asynchronous input stream.""" + + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + last_chunk_content_part_sequence: int | None = Field( + None, alias="lastChunkContentPartSequence" + ) + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationAsyncInputStreamEvent(BaseModel): + """Encapsulates sub-events related to an asynchronous input stream.""" + + stream_id: str = Field(..., alias="streamId") + start: UiPathConversationAsyncInputStreamStartEvent | None = None + end: UiPathConversationAsyncInputStreamEndEvent | None = None + chunk: UiPathConversationInputStreamChunkEvent | None = None + meta_event: dict[str, Any] | None = Field(None, alias="metaEvent") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/citation.py b/src/uipath/runtime/chat/citation.py new file mode 100644 index 0000000..106872a --- /dev/null +++ b/src/uipath/runtime/chat/citation.py @@ -0,0 +1,70 @@ +"""Citation events for message content.""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class UiPathConversationCitationStartEvent(BaseModel): + """Indicates the start of a citation target in a content part.""" + + pass + + +class UiPathConversationCitationEndEvent(BaseModel): + """Indicates the end of a citation target in a content part.""" + + sources: list[dict[str, Any]] + + +class UiPathConversationCitationEvent(BaseModel): + """Encapsulates sub-events related to citations.""" + + citation_id: str = Field(..., alias="citationId") + start: UiPathConversationCitationStartEvent | None = None + end: UiPathConversationCitationEndEvent | None = None + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationCitationSourceUrl(BaseModel): + """Represents a citation source that can be rendered as a link (URL).""" + + url: str + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationCitationSourceMedia(BaseModel): + """Represents a citation source that references media, such as a PDF document.""" + + mime_type: str = Field(..., alias="mimeType") + download_url: str | None = Field(None, alias="downloadUrl") + page_number: str | None = Field(None, alias="pageNumber") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationCitationSource(BaseModel): + """Represents a citation source, either a URL or media reference.""" + + title: str | None = None + + # Union of Url or Media + url: str | None = None + mime_type: str | None = Field(None, alias="mimeType") + download_url: str | None = Field(None, alias="downloadUrl") + page_number: str | None = Field(None, alias="pageNumber") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationCitation(BaseModel): + """Represents a citation or reference inside a content part.""" + + citation_id: str = Field(..., alias="citationId") + offset: int + length: int + sources: list[UiPathConversationCitationSource] + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/content.py b/src/uipath/runtime/chat/content.py new file mode 100644 index 0000000..f41e737 --- /dev/null +++ b/src/uipath/runtime/chat/content.py @@ -0,0 +1,80 @@ +"""Message content part events.""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from .citation import UiPathConversationCitation, UiPathConversationCitationEvent + + +class UiPathConversationContentPartChunkEvent(BaseModel): + """Contains a chunk of a message content part.""" + + content_part_sequence: int | None = Field(None, alias="contentPartSequence") + data: str | None = None + citation: UiPathConversationCitationEvent | None = None + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationContentPartStartEvent(BaseModel): + """Signals the start of a message content part.""" + + mime_type: str = Field(..., alias="mimeType") + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationContentPartEndEvent(BaseModel): + """Signals the end of a message content part.""" + + last_chunk_content_part_sequence: int | None = Field( + None, alias="lastChunkContentPartSequence" + ) + interrupted: dict[str, Any] | None = None + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationContentPartEvent(BaseModel): + """Encapsulates events related to message content parts.""" + + content_part_id: str = Field(..., alias="contentPartId") + start: UiPathConversationContentPartStartEvent | None = None + end: UiPathConversationContentPartEndEvent | None = None + chunk: UiPathConversationContentPartChunkEvent | None = None + meta_event: dict[str, Any] | None = Field(None, alias="metaEvent") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathInlineValue(BaseModel): + """Used when a value is small enough to be returned inline.""" + + inline: Any + + +class UiPathExternalValue(BaseModel): + """Used when a value is too large to be returned inline.""" + + url: str + byte_count: int | None = Field(None, alias="byteCount") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +InlineOrExternal = UiPathInlineValue | UiPathExternalValue + + +class UiPathConversationContentPart(BaseModel): + """Represents a single part of message content.""" + + content_part_id: str = Field(..., alias="contentPartId") + mime_type: str = Field(..., alias="mimeType") + data: InlineOrExternal + citations: list[UiPathConversationCitation] | None = None + is_transcript: bool | None = Field(None, alias="isTranscript") + is_incomplete: bool | None = Field(None, alias="isIncomplete") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/conversation.py b/src/uipath/runtime/chat/conversation.py new file mode 100644 index 0000000..c694c44 --- /dev/null +++ b/src/uipath/runtime/chat/conversation.py @@ -0,0 +1,49 @@ +"""Conversation-level events and capabilities.""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class UiPathConversationCapabilities(BaseModel): + """Describes the capabilities of a conversation participant.""" + + async_input_stream_emitter: bool | None = Field( + None, alias="asyncInputStreamEmitter" + ) + async_input_stream_handler: bool | None = Field( + None, alias="asyncInputStreamHandler" + ) + async_tool_call_emitter: bool | None = Field(None, alias="asyncToolCallEmitter") + async_tool_call_handler: bool | None = Field(None, alias="asyncToolCallHandler") + mime_types_emitted: list[str] | None = Field(None, alias="mimeTypesEmitted") + mime_types_handled: list[str] | None = Field(None, alias="mimeTypesHandled") + + model_config = ConfigDict( + validate_by_name=True, validate_by_alias=True, extra="allow" + ) + + +class UiPathConversationStartEvent(BaseModel): + """Signals the start of a conversation event stream.""" + + capabilities: UiPathConversationCapabilities | None = None + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationStartedEvent(BaseModel): + """Signals the acceptance of the start of a conversation.""" + + capabilities: UiPathConversationCapabilities | None = None + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationEndEvent(BaseModel): + """Signals the end of a conversation event stream.""" + + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/event.py b/src/uipath/runtime/chat/event.py new file mode 100644 index 0000000..de97831 --- /dev/null +++ b/src/uipath/runtime/chat/event.py @@ -0,0 +1,53 @@ +"""The top-level event type representing an event in a conversation. + +This is the root container for all other event subtypes (conversation start, +exchanges, messages, content, citations, tool calls, and async streams). +""" + +from pydantic import BaseModel, ConfigDict, Field + +from .async_stream import UiPathConversationAsyncInputStreamEvent +from .conversation import ( + UiPathConversationEndEvent, + UiPathConversationStartedEvent, + UiPathConversationStartEvent, +) +from .exchange import UiPathConversationExchangeEvent +from .meta import UiPathConversationMetaEvent +from .tool import UiPathConversationToolCallEvent + + +class UiPathConversationEvent(BaseModel): + """The top-level event type representing an event in a conversation. + + This is the root container for all other event subtypes (conversation start, + exchanges, messages, content, citations, tool calls, and async streams). + """ + + """A globally unique identifier for conversation to which the other sub-event and data properties apply.""" + conversation_id: str = Field(..., alias="conversationId") + """Signals the start of an event stream concerning a conversation. This event does NOT necessarily mean this is a + brand new conversation. It may be a continuation of an existing conversation. + """ + start: UiPathConversationStartEvent | None = None + """Signals the acceptance of the start of a conversation.""" + started: UiPathConversationStartedEvent | None = None + """Signals the end of a conversation event stream. This does NOT mean the conversation is over. A new event stream for + the conversation could be started in the future. + """ + end: UiPathConversationEndEvent | None = None + """Encapsulates sub-events related to an exchange within a conversation.""" + exchange: UiPathConversationExchangeEvent | None = None + """Encapsulates sub-events related to an asynchronous input stream.""" + async_input_stream: UiPathConversationAsyncInputStreamEvent | None = Field( + None, alias="asyncInputStream" + ) + """Optional async tool call sub-event. This feature is not supported by all LLMs. Most tool calls are scoped to a + message, and use the toolCall and toolResult properties defined by the ConversationMessage type. + """ + async_tool_call: UiPathConversationToolCallEvent | None = Field( + None, alias="asyncToolCall" + ) + """Allows additional events to be sent in the context of the enclosing event stream.""" + meta_event: UiPathConversationMetaEvent | None = Field(None, alias="metaEvent") + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/exchange.py b/src/uipath/runtime/chat/exchange.py new file mode 100644 index 0000000..c0a8ff5 --- /dev/null +++ b/src/uipath/runtime/chat/exchange.py @@ -0,0 +1,61 @@ +"""Exchange-level events. + +Characteristics of an Exchange: +It groups together messages that belong to the same turn of conversation. + +Example: + User says something → one message inside the exchange. + LLM responds → one or more messages in the same exchange. + +Each exchange has: + A start event (signals the beginning of the turn). + An end event (signals the end of the turn). + Messages that happened in between. + +An exchange can include multiple messages (e.g. LLM streaming several outputs, or user message + assistant + tool outputs). +Exchanges are ordered within a conversation via conversation_sequence. +""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from .message import UiPathConversationMessage, UiPathConversationMessageEvent + + +class UiPathConversationExchangeStartEvent(BaseModel): + """Signals the start of an exchange of messages within a conversation.""" + + conversation_sequence: int | None = Field(None, alias="conversationSequence") + metadata: dict[str, Any] | None = None + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationExchangeEndEvent(BaseModel): + """Signals the end of an exchange of messages within a conversation.""" + + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationExchangeEvent(BaseModel): + """Encapsulates a single exchange in the conversation.""" + + exchange_id: str = Field(..., alias="exchangeId") + start: UiPathConversationExchangeStartEvent | None = None + end: UiPathConversationExchangeEndEvent | None = None + message: UiPathConversationMessageEvent | None = None + meta_event: dict[str, Any] | None = Field(None, alias="metaEvent") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationExchange(BaseModel): + """Represents a group of related messages (one turn of conversation).""" + + exchange_id: str = Field(..., alias="exchangeId") + messages: list[UiPathConversationMessage] + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/message.py b/src/uipath/runtime/chat/message.py new file mode 100644 index 0000000..8767ced --- /dev/null +++ b/src/uipath/runtime/chat/message.py @@ -0,0 +1,57 @@ +"""Message-level events.""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from .content import UiPathConversationContentPart, UiPathConversationContentPartEvent +from .tool import UiPathConversationToolCall, UiPathConversationToolCallEvent + + +class UiPathConversationMessageStartEvent(BaseModel): + """Signals the start of a message within an exchange.""" + + exchange_sequence: int | None = Field(None, alias="exchangeSequence") + timestamp: str | None = None + role: str + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationMessageEndEvent(BaseModel): + """Signals the end of a message.""" + + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationMessageEvent(BaseModel): + """Encapsulates sub-events related to a message.""" + + message_id: str = Field(..., alias="messageId") + start: UiPathConversationMessageStartEvent | None = None + end: UiPathConversationMessageEndEvent | None = None + content_part: UiPathConversationContentPartEvent | None = Field( + None, alias="contentPart" + ) + tool_call: UiPathConversationToolCallEvent | None = Field(None, alias="toolCall") + meta_event: dict[str, Any] | None = Field(None, alias="metaEvent") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationMessage(BaseModel): + """Represents a single message within an exchange.""" + + message_id: str = Field(..., alias="messageId") + role: str + content_parts: list[UiPathConversationContentPart] | None = Field( + None, alias="contentParts" + ) + tool_calls: list[UiPathConversationToolCall] | None = Field(None, alias="toolCalls") + created_at: str = Field(..., alias="createdAt") + updated_at: str = Field(..., alias="updatedAt") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/src/uipath/runtime/chat/meta.py b/src/uipath/runtime/chat/meta.py new file mode 100644 index 0000000..574737e --- /dev/null +++ b/src/uipath/runtime/chat/meta.py @@ -0,0 +1,11 @@ +"""Meta events allow additional extensible data.""" + +from pydantic import BaseModel, ConfigDict + + +class UiPathConversationMetaEvent(BaseModel): + """Arbitrary metadata events in the conversation schema.""" + + model_config = ConfigDict( + validate_by_name=True, validate_by_alias=True, extra="allow" + ) diff --git a/src/uipath/runtime/chat/tool.py b/src/uipath/runtime/chat/tool.py new file mode 100644 index 0000000..c32f4a3 --- /dev/null +++ b/src/uipath/runtime/chat/tool.py @@ -0,0 +1,63 @@ +"""Tool call events.""" + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from .content import InlineOrExternal + + +class UiPathConversationToolCallResult(BaseModel): + """Represents the result of a tool call execution.""" + + timestamp: str | None = None + value: InlineOrExternal | None = None + is_error: bool | None = Field(None, alias="isError") + cancelled: bool | None = None + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationToolCall(BaseModel): + """Represents a call to an external tool or function within a message.""" + + tool_call_id: str = Field(..., alias="toolCallId") + name: str + arguments: InlineOrExternal | None = None + timestamp: str | None = None + result: UiPathConversationToolCallResult | None = None + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationToolCallStartEvent(BaseModel): + """Signals the start of a tool call.""" + + tool_name: str = Field(..., alias="toolName") + timestamp: str | None = None + arguments: InlineOrExternal | None = None + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationToolCallEndEvent(BaseModel): + """Signals the end of a tool call.""" + + timestamp: str | None = None + result: Any = None + is_error: bool | None = Field(None, alias="isError") + cancelled: bool | None = None + meta_data: dict[str, Any] | None = Field(None, alias="metaData") + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) + + +class UiPathConversationToolCallEvent(BaseModel): + """Encapsulates the data related to a tool call event.""" + + tool_call_id: str = Field(..., alias="toolCallId") + start: UiPathConversationToolCallStartEvent | None = None + end: UiPathConversationToolCallEndEvent | None = None + meta_event: dict[str, Any] | None = Field(None, alias="metaEvent") + + model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) diff --git a/uv.lock b/uv.lock index 9f1c8e8..0384934 100644 --- a/uv.lock +++ b/uv.lock @@ -938,7 +938,7 @@ wheels = [ [[package]] name = "uipath-runtime" -version = "0.1.2" +version = "0.1.3" source = { editable = "." } dependencies = [ { name = "uipath-core" },