UiPath · cristipufu · Dec 1, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath-runtime"
-version = "0.1.2"
+version = "0.1.3"
 description = "Runtime abstractions and interfaces for building agents and automation scripts in the UiPath ecosystem"
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"

diff --git a/src/uipath/runtime/chat/__init__.py b/src/uipath/runtime/chat/__init__.py
@@ -0,0 +1,135 @@
+"""UiPath Conversation Models.
+
+This module provides Pydantic models that represent the JSON event schema for conversations between a client (UI) and an LLM/agent.
+
+The event objects define a hierarchal conversation structure:
+
+* Conversation
+    * Exchange
+        * Message
+            * Content Parts
+                * Citations
+            * Tool Calls
+                * Tool Results
+
+ A conversation may contain multiple exchanges, and an exchange may contain multiple messages. A message may contain
+ multiple content parts, each of which can be text or binary, including media input and output streams; and each
+ content part can include multiple citations. A message may also contain multiple tool calls, which may contain a tool
+ result.
+
+ The protocol also supports a top level, "async", input media streams (audio and video), which can span multiple
+ exchanges. These are used for Gemini's automatic turn detection mode, where the LLM determines when the user has
+ stopped talking and starts producing output. The output forms one or more messages in an exchange with no explicit
+ input message. However, the LLM may produce an input transcript which can be used to construct the implicit input
+ message that started the exchange.
+
+ In addition, the protocol also supports "async" tool calls that span multiple exchanges. This can be used with
+ Gemini's asynchronous function calling protocol, which allows function calls to produce results that interrupt the
+ conversation when ready, even after multiple exchanges. They also support generating multiple results from a single
+ tool call. By contrast most tool calls are scoped to a single message, which contains both the call and the single
+ result produced by that call.
+
+ Not all features supported by the protocol will be supported by all clients and LLMs. The optional top level
+ `capabilities` property can be used to communicate information about supported features. This property should be set
+ on the first event written to a new websocket connection. This initial event may or may not contain additional
+ sub-events.
+"""
+
+from .async_stream import (
+    UiPathConversationAsyncInputStreamEndEvent,
+    UiPathConversationAsyncInputStreamEvent,
+    UiPathConversationAsyncInputStreamStartEvent,
+    UiPathConversationInputStreamChunkEvent,
+)
+from .citation import (
+    UiPathConversationCitationEndEvent,
+    UiPathConversationCitationEvent,
+    UiPathConversationCitationSource,
+    UiPathConversationCitationSourceMedia,
+    UiPathConversationCitationSourceUrl,
+    UiPathConversationCitationStartEvent,
+)
+from .content import (
+    UiPathConversationContentPart,
+    UiPathConversationContentPartChunkEvent,
+    UiPathConversationContentPartEndEvent,
+    UiPathConversationContentPartEvent,
+    UiPathConversationContentPartStartEvent,
+    UiPathExternalValue,
+    UiPathInlineValue,
+)
+from .conversation import (
+    UiPathConversationCapabilities,
+    UiPathConversationEndEvent,
+    UiPathConversationStartedEvent,
+    UiPathConversationStartEvent,
+)
+from .event import UiPathConversationEvent
+from .exchange import (
+    UiPathConversationExchange,
+    UiPathConversationExchangeEndEvent,
+    UiPathConversationExchangeEvent,
+    UiPathConversationExchangeStartEvent,
+)
+from .message import (
+    UiPathConversationMessage,
+    UiPathConversationMessageEndEvent,
+    UiPathConversationMessageEvent,
+    UiPathConversationMessageStartEvent,
+)
+from .meta import UiPathConversationMetaEvent
+from .tool import (
+    UiPathConversationToolCall,
+    UiPathConversationToolCallEndEvent,
+    UiPathConversationToolCallEvent,
+    UiPathConversationToolCallResult,
+    UiPathConversationToolCallStartEvent,
+)
+
+__all__ = [
+    # Root
+    "UiPathConversationEvent",
+    # Conversation
+    "UiPathConversationCapabilities",
+    "UiPathConversationStartEvent",
+    "UiPathConversationStartedEvent",
+    "UiPathConversationEndEvent",
+    # Exchange
+    "UiPathConversationExchangeStartEvent",
+    "UiPathConversationExchangeEndEvent",
+    "UiPathConversationExchangeEvent",
+    "UiPathConversationExchange",
+    # Message
+    "UiPathConversationMessageStartEvent",
+    "UiPathConversationMessageEndEvent",
+    "UiPathConversationMessageEvent",
+    "UiPathConversationMessage",
+    # Content
+    "UiPathConversationContentPartChunkEvent",
+    "UiPathConversationContentPartStartEvent",
+    "UiPathConversationContentPartEndEvent",
+    "UiPathConversationContentPartEvent",
+    "UiPathConversationContentPart",
+    "UiPathInlineValue",
+    "UiPathExternalValue",
+    # Citation
+    "UiPathConversationCitationStartEvent",
+    "UiPathConversationCitationEndEvent",
+    "UiPathConversationCitationEvent",
+    "UiPathConversationCitationSource",
+    "UiPathConversationCitationSourceUrl",
+    "UiPathConversationCitationSourceMedia",
+    # Tool
+    "UiPathConversationToolCallStartEvent",
+    "UiPathConversationToolCallEndEvent",
+    "UiPathConversationToolCallEvent",
+    "UiPathConversationToolCallResult",
+    "UiPathConversationToolCall",
+    # Async Stream
+    "UiPathConversationInputStreamChunkEvent",
+    "UiPathConversationAsyncInputStreamStartEvent",
+    "UiPathConversationAsyncInputStreamEndEvent",
+    "UiPathConversationAsyncInputStreamEvent",
+    # Meta
+    "UiPathConversationMetaEvent",
+]
diff --git a/src/uipath/runtime/chat/async_stream.py b/src/uipath/runtime/chat/async_stream.py
@@ -0,0 +1,52 @@
+"""Async input stream events."""
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class UiPathConversationInputStreamChunkEvent(BaseModel):
+    """Represents a single chunk of input stream data."""
+
+    input_stream_sequence: int | None = Field(None, alias="inputStreamSequence")
+    data: str
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationAsyncInputStreamStartEvent(BaseModel):
+    """Signals the start of an asynchronous input stream."""
+
+    mime_type: str = Field(..., alias="mimeType")
+    start_of_speech_sensitivity: str | None = Field(
+        None, alias="startOfSpeechSensitivity"
+    )
+    end_of_speech_sensitivity: str | None = Field(None, alias="endOfSpeechSensitivity")
+    prefix_padding_ms: int | None = Field(None, alias="prefixPaddingMs")
+    silence_duration_ms: int | None = Field(None, alias="silenceDurationMs")
+    meta_data: dict[str, Any] | None = Field(None, alias="metaData")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationAsyncInputStreamEndEvent(BaseModel):
+    """Signals the end of an asynchronous input stream."""
+
+    meta_data: dict[str, Any] | None = Field(None, alias="metaData")
+    last_chunk_content_part_sequence: int | None = Field(
+        None, alias="lastChunkContentPartSequence"
+    )
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationAsyncInputStreamEvent(BaseModel):
+    """Encapsulates sub-events related to an asynchronous input stream."""
+
+    stream_id: str = Field(..., alias="streamId")
+    start: UiPathConversationAsyncInputStreamStartEvent | None = None
+    end: UiPathConversationAsyncInputStreamEndEvent | None = None
+    chunk: UiPathConversationInputStreamChunkEvent | None = None
+    meta_event: dict[str, Any] | None = Field(None, alias="metaEvent")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
diff --git a/src/uipath/runtime/chat/citation.py b/src/uipath/runtime/chat/citation.py
@@ -0,0 +1,70 @@
+"""Citation events for message content."""
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class UiPathConversationCitationStartEvent(BaseModel):
+    """Indicates the start of a citation target in a content part."""
+
+    pass
+
+
+class UiPathConversationCitationEndEvent(BaseModel):
+    """Indicates the end of a citation target in a content part."""
+
+    sources: list[dict[str, Any]]
+
+
+class UiPathConversationCitationEvent(BaseModel):
+    """Encapsulates sub-events related to citations."""
+
+    citation_id: str = Field(..., alias="citationId")
+    start: UiPathConversationCitationStartEvent | None = None
+    end: UiPathConversationCitationEndEvent | None = None
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationCitationSourceUrl(BaseModel):
+    """Represents a citation source that can be rendered as a link (URL)."""
+
+    url: str
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationCitationSourceMedia(BaseModel):
+    """Represents a citation source that references media, such as a PDF document."""
+
+    mime_type: str = Field(..., alias="mimeType")
+    download_url: str | None = Field(None, alias="downloadUrl")
+    page_number: str | None = Field(None, alias="pageNumber")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationCitationSource(BaseModel):
+    """Represents a citation source, either a URL or media reference."""
+
+    title: str | None = None
+
+    # Union of Url or Media
+    url: str | None = None
+    mime_type: str | None = Field(None, alias="mimeType")
+    download_url: str | None = Field(None, alias="downloadUrl")
+    page_number: str | None = Field(None, alias="pageNumber")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationCitation(BaseModel):
+    """Represents a citation or reference inside a content part."""
+
+    citation_id: str = Field(..., alias="citationId")
+    offset: int
+    length: int
+    sources: list[UiPathConversationCitationSource]
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
diff --git a/src/uipath/runtime/chat/content.py b/src/uipath/runtime/chat/content.py
@@ -0,0 +1,80 @@
+"""Message content part events."""
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .citation import UiPathConversationCitation, UiPathConversationCitationEvent
+
+
+class UiPathConversationContentPartChunkEvent(BaseModel):
+    """Contains a chunk of a message content part."""
+
+    content_part_sequence: int | None = Field(None, alias="contentPartSequence")
+    data: str | None = None
+    citation: UiPathConversationCitationEvent | None = None
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationContentPartStartEvent(BaseModel):
+    """Signals the start of a message content part."""
+
+    mime_type: str = Field(..., alias="mimeType")
+    meta_data: dict[str, Any] | None = Field(None, alias="metaData")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationContentPartEndEvent(BaseModel):
+    """Signals the end of a message content part."""
+
+    last_chunk_content_part_sequence: int | None = Field(
+        None, alias="lastChunkContentPartSequence"
+    )
+    interrupted: dict[str, Any] | None = None
+    meta_data: dict[str, Any] | None = Field(None, alias="metaData")
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationContentPartEvent(BaseModel):
+    """Encapsulates events related to message content parts."""
+
+    content_part_id: str = Field(..., alias="contentPartId")
+    start: UiPathConversationContentPartStartEvent | None = None
+    end: UiPathConversationContentPartEndEvent | None = None
+    chunk: UiPathConversationContentPartChunkEvent | None = None
+    meta_event: dict[str, Any] | None = Field(None, alias="metaEvent")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathInlineValue(BaseModel):
+    """Used when a value is small enough to be returned inline."""
+
+    inline: Any
+
+
+class UiPathExternalValue(BaseModel):
+    """Used when a value is too large to be returned inline."""
+
+    url: str
+    byte_count: int | None = Field(None, alias="byteCount")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+InlineOrExternal = UiPathInlineValue | UiPathExternalValue
+
+
+class UiPathConversationContentPart(BaseModel):
+    """Represents a single part of message content."""
+
+    content_part_id: str = Field(..., alias="contentPartId")
+    mime_type: str = Field(..., alias="mimeType")
+    data: InlineOrExternal
+    citations: list[UiPathConversationCitation] | None = None
+    is_transcript: bool | None = Field(None, alias="isTranscript")
+    is_incomplete: bool | None = Field(None, alias="isIncomplete")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
diff --git a/src/uipath/runtime/chat/conversation.py b/src/uipath/runtime/chat/conversation.py
@@ -0,0 +1,49 @@
+"""Conversation-level events and capabilities."""
+
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class UiPathConversationCapabilities(BaseModel):
+    """Describes the capabilities of a conversation participant."""
+
+    async_input_stream_emitter: bool | None = Field(
+        None, alias="asyncInputStreamEmitter"
+    )
+    async_input_stream_handler: bool | None = Field(
+        None, alias="asyncInputStreamHandler"
+    )
+    async_tool_call_emitter: bool | None = Field(None, alias="asyncToolCallEmitter")
+    async_tool_call_handler: bool | None = Field(None, alias="asyncToolCallHandler")
+    mime_types_emitted: list[str] | None = Field(None, alias="mimeTypesEmitted")
+    mime_types_handled: list[str] | None = Field(None, alias="mimeTypesHandled")
+
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+class UiPathConversationStartEvent(BaseModel):
+    """Signals the start of a conversation event stream."""
+
+    capabilities: UiPathConversationCapabilities | None = None
+    meta_data: dict[str, Any] | None = Field(None, alias="metaData")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationStartedEvent(BaseModel):
+    """Signals the acceptance of the start of a conversation."""
+
+    capabilities: UiPathConversationCapabilities | None = None
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
+
+
+class UiPathConversationEndEvent(BaseModel):
+    """Signals the end of a conversation event stream."""
+
+    meta_data: dict[str, Any] | None = Field(None, alias="metaData")
+
+    model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)