diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py index df6daeace..1bdfac254 100644 --- a/langfuse/api/__init__.py +++ b/langfuse/api/__init__.py @@ -87,6 +87,7 @@ ObservationsView, ObservationsViews, OpenAiUsage, + OpenAiUsageSchema, OptionalObservationBody, PaginatedDatasetItems, PaginatedDatasetRuns, @@ -133,6 +134,7 @@ UpdateSpanEvent, Usage, UsageByModel, + UsageDetails, comments, commons, dataset_items, @@ -240,6 +242,7 @@ "ObservationsView", "ObservationsViews", "OpenAiUsage", + "OpenAiUsageSchema", "OptionalObservationBody", "PaginatedDatasetItems", "PaginatedDatasetRuns", @@ -286,6 +289,7 @@ "UpdateSpanEvent", "Usage", "UsageByModel", + "UsageDetails", "comments", "commons", "dataset_items", diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py index 4a6cc0319..f838c2f8c 100644 --- a/langfuse/api/resources/__init__.py +++ b/langfuse/api/resources/__init__.py @@ -92,6 +92,7 @@ ObservationBody, ObservationType, OpenAiUsage, + OpenAiUsageSchema, OptionalObservationBody, ScoreBody, ScoreEvent, @@ -105,6 +106,7 @@ UpdateObservationEvent, UpdateSpanBody, UpdateSpanEvent, + UsageDetails, ) from .media import ( GetMediaResponse, @@ -237,6 +239,7 @@ "ObservationsView", "ObservationsViews", "OpenAiUsage", + "OpenAiUsageSchema", "OptionalObservationBody", "PaginatedDatasetItems", "PaginatedDatasetRuns", @@ -283,6 +286,7 @@ "UpdateSpanEvent", "Usage", "UsageByModel", + "UsageDetails", "comments", "commons", "dataset_items", diff --git a/langfuse/api/resources/comments/types/create_comment_request.py b/langfuse/api/resources/comments/types/create_comment_request.py index 98e25e2e1..9ba6081ee 100644 --- a/langfuse/api/resources/comments/types/create_comment_request.py +++ b/langfuse/api/resources/comments/types/create_comment_request.py @@ -25,7 +25,7 @@ class CreateCommentRequest(pydantic_v1.BaseModel): content: str = pydantic_v1.Field() """ - The content of the comment. May include markdown. Currently limited to 500 characters. + The content of the comment. May include markdown. Currently limited to 3000 characters. """ author_user_id: typing.Optional[str] = pydantic_v1.Field( diff --git a/langfuse/api/resources/commons/types/observation.py b/langfuse/api/resources/commons/types/observation.py index 130fe732d..93fabb754 100644 --- a/langfuse/api/resources/commons/types/observation.py +++ b/langfuse/api/resources/commons/types/observation.py @@ -84,7 +84,7 @@ class Observation(pydantic_v1.BaseModel): usage: typing.Optional[Usage] = pydantic_v1.Field(default=None) """ - The usage data of the observation + (Deprecated. Use usageDetails and costDetails instead.) The usage data of the observation """ level: ObservationLevel = pydantic_v1.Field() @@ -111,6 +111,20 @@ class Observation(pydantic_v1.BaseModel): The prompt ID associated with the observation """ + usage_details: typing.Optional[typing.Dict[str, int]] = pydantic_v1.Field( + alias="usageDetails", default=None + ) + """ + The usage details of the observation. Key is the name of the usage metric, value is the number of units consumed. The total key is the sum of all (non-total) usage metrics or the total value ingested. + """ + + cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field( + alias="costDetails", default=None + ) + """ + The cost details of the observation. Key is the name of the cost metric, value is the cost in USD. The total key is the sum of all (non-total) cost metrics or the total value ingested. + """ + def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { "by_alias": True, diff --git a/langfuse/api/resources/commons/types/observations_view.py b/langfuse/api/resources/commons/types/observations_view.py index 3e15909ea..e011fa32b 100644 --- a/langfuse/api/resources/commons/types/observations_view.py +++ b/langfuse/api/resources/commons/types/observations_view.py @@ -53,21 +53,21 @@ class ObservationsView(Observation): alias="calculatedInputCost", default=None ) """ - The calculated cost of the input in USD + (Deprecated. Use usageDetails and costDetails instead.) The calculated cost of the input in USD """ calculated_output_cost: typing.Optional[float] = pydantic_v1.Field( alias="calculatedOutputCost", default=None ) """ - The calculated cost of the output in USD + (Deprecated. Use usageDetails and costDetails instead.) The calculated cost of the output in USD """ calculated_total_cost: typing.Optional[float] = pydantic_v1.Field( alias="calculatedTotalCost", default=None ) """ - The calculated total cost in USD + (Deprecated. Use usageDetails and costDetails instead.) The calculated total cost in USD """ latency: typing.Optional[float] = pydantic_v1.Field(default=None) diff --git a/langfuse/api/resources/commons/types/usage.py b/langfuse/api/resources/commons/types/usage.py index bc5041c5f..c38330494 100644 --- a/langfuse/api/resources/commons/types/usage.py +++ b/langfuse/api/resources/commons/types/usage.py @@ -10,7 +10,7 @@ class Usage(pydantic_v1.BaseModel): """ - Standard interface for usage and cost + (Deprecated. Use usageDetails and costDetails instead.) Standard interface for usage and cost """ input: typing.Optional[int] = pydantic_v1.Field(default=None) diff --git a/langfuse/api/resources/ingestion/__init__.py b/langfuse/api/resources/ingestion/__init__.py index dde470ccc..6bd1373be 100644 --- a/langfuse/api/resources/ingestion/__init__.py +++ b/langfuse/api/resources/ingestion/__init__.py @@ -27,6 +27,7 @@ ObservationBody, ObservationType, OpenAiUsage, + OpenAiUsageSchema, OptionalObservationBody, ScoreBody, ScoreEvent, @@ -40,6 +41,7 @@ UpdateObservationEvent, UpdateSpanBody, UpdateSpanEvent, + UsageDetails, ) __all__ = [ @@ -69,6 +71,7 @@ "ObservationBody", "ObservationType", "OpenAiUsage", + "OpenAiUsageSchema", "OptionalObservationBody", "ScoreBody", "ScoreEvent", @@ -82,4 +85,5 @@ "UpdateObservationEvent", "UpdateSpanBody", "UpdateSpanEvent", + "UsageDetails", ] diff --git a/langfuse/api/resources/ingestion/types/__init__.py b/langfuse/api/resources/ingestion/types/__init__.py index 0981aa841..95fa2559e 100644 --- a/langfuse/api/resources/ingestion/types/__init__.py +++ b/langfuse/api/resources/ingestion/types/__init__.py @@ -28,6 +28,7 @@ from .observation_body import ObservationBody from .observation_type import ObservationType from .open_ai_usage import OpenAiUsage +from .open_ai_usage_schema import OpenAiUsageSchema from .optional_observation_body import OptionalObservationBody from .score_body import ScoreBody from .score_event import ScoreEvent @@ -41,6 +42,7 @@ from .update_observation_event import UpdateObservationEvent from .update_span_body import UpdateSpanBody from .update_span_event import UpdateSpanEvent +from .usage_details import UsageDetails __all__ = [ "BaseEvent", @@ -69,6 +71,7 @@ "ObservationBody", "ObservationType", "OpenAiUsage", + "OpenAiUsageSchema", "OptionalObservationBody", "ScoreBody", "ScoreEvent", @@ -82,4 +85,5 @@ "UpdateObservationEvent", "UpdateSpanBody", "UpdateSpanEvent", + "UsageDetails", ] diff --git a/langfuse/api/resources/ingestion/types/create_generation_body.py b/langfuse/api/resources/ingestion/types/create_generation_body.py index 65905e78e..428b58607 100644 --- a/langfuse/api/resources/ingestion/types/create_generation_body.py +++ b/langfuse/api/resources/ingestion/types/create_generation_body.py @@ -8,6 +8,7 @@ from ...commons.types.map_value import MapValue from .create_span_body import CreateSpanBody from .ingestion_usage import IngestionUsage +from .usage_details import UsageDetails class CreateGenerationBody(CreateSpanBody): @@ -19,6 +20,12 @@ class CreateGenerationBody(CreateSpanBody): alias="modelParameters", default=None ) usage: typing.Optional[IngestionUsage] = None + usage_details: typing.Optional[UsageDetails] = pydantic_v1.Field( + alias="usageDetails", default=None + ) + cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field( + alias="costDetails", default=None + ) prompt_name: typing.Optional[str] = pydantic_v1.Field( alias="promptName", default=None ) diff --git a/langfuse/api/resources/ingestion/types/open_ai_usage_schema.py b/langfuse/api/resources/ingestion/types/open_ai_usage_schema.py new file mode 100644 index 000000000..ecf755bb3 --- /dev/null +++ b/langfuse/api/resources/ingestion/types/open_ai_usage_schema.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ....core.datetime_utils import serialize_datetime +from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 + + +class OpenAiUsageSchema(pydantic_v1.BaseModel): + prompt_tokens: int + completion_tokens: int + total_tokens: int + prompt_tokens_details: typing.Optional[typing.Dict[str, int]] = None + completion_tokens_details: typing.Optional[typing.Dict[str, int]] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/langfuse/api/resources/ingestion/types/update_generation_body.py b/langfuse/api/resources/ingestion/types/update_generation_body.py index e216e4604..2058543af 100644 --- a/langfuse/api/resources/ingestion/types/update_generation_body.py +++ b/langfuse/api/resources/ingestion/types/update_generation_body.py @@ -8,6 +8,7 @@ from ...commons.types.map_value import MapValue from .ingestion_usage import IngestionUsage from .update_span_body import UpdateSpanBody +from .usage_details import UsageDetails class UpdateGenerationBody(UpdateSpanBody): @@ -22,6 +23,12 @@ class UpdateGenerationBody(UpdateSpanBody): prompt_name: typing.Optional[str] = pydantic_v1.Field( alias="promptName", default=None ) + usage_details: typing.Optional[UsageDetails] = pydantic_v1.Field( + alias="usageDetails", default=None + ) + cost_details: typing.Optional[typing.Dict[str, float]] = pydantic_v1.Field( + alias="costDetails", default=None + ) prompt_version: typing.Optional[int] = pydantic_v1.Field( alias="promptVersion", default=None ) diff --git a/langfuse/api/resources/ingestion/types/usage_details.py b/langfuse/api/resources/ingestion/types/usage_details.py new file mode 100644 index 000000000..89c0fc2e9 --- /dev/null +++ b/langfuse/api/resources/ingestion/types/usage_details.py @@ -0,0 +1,7 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from .open_ai_usage_schema import OpenAiUsageSchema + +UsageDetails = typing.Union[typing.Dict[str, int], OpenAiUsageSchema] diff --git a/langfuse/api/resources/media/client.py b/langfuse/api/resources/media/client.py index a32916d5e..eb8eb16ac 100644 --- a/langfuse/api/resources/media/client.py +++ b/langfuse/api/resources/media/client.py @@ -197,7 +197,7 @@ def get_upload_url( Examples -------- - from langfuse.api import GetMediaUploadUrlRequest + from langfuse.api import GetMediaUploadUrlRequest, MediaContentType from langfuse.api.client import FernLangfuse client = FernLangfuse( @@ -212,6 +212,7 @@ def get_upload_url( request=GetMediaUploadUrlRequest( trace_id="string", observation_id="string", + content_type=MediaContentType.IMAGE_PNG, content_length=1, sha_256_hash="string", field="string", @@ -446,7 +447,7 @@ async def get_upload_url( -------- import asyncio - from langfuse.api import GetMediaUploadUrlRequest + from langfuse.api import GetMediaUploadUrlRequest, MediaContentType from langfuse.api.client import AsyncFernLangfuse client = AsyncFernLangfuse( @@ -464,6 +465,7 @@ async def main() -> None: request=GetMediaUploadUrlRequest( trace_id="string", observation_id="string", + content_type=MediaContentType.IMAGE_PNG, content_length=1, sha_256_hash="string", field="string", diff --git a/langfuse/api/resources/media/types/media_content_type.py b/langfuse/api/resources/media/types/media_content_type.py index 9b0cea41a..e8fdeefa2 100644 --- a/langfuse/api/resources/media/types/media_content_type.py +++ b/langfuse/api/resources/media/types/media_content_type.py @@ -1,35 +1,133 @@ # This file was auto-generated by Fern from our API Definition. +import enum import typing -MediaContentType = typing.Literal[ - "image/png", - "image/jpeg", - "image/jpg", - "image/webp", - "image/gif", - "image/svg+xml", - "image/tiff", - "image/bmp", - "audio/mpeg", - "audio/mp3", - "audio/wav", - "audio/ogg", - "audio/oga", - "audio/aac", - "audio/mp4", - "audio/flac", - "video/mp4", - "video/webm", - "text/plain", - "text/html", - "text/css", - "text/csv", - "application/pdf", - "application/msword", - "application/vnd.ms-excel", - "application/zip", - "application/json", - "application/xml", - "application/octet-stream", -] +T_Result = typing.TypeVar("T_Result") + + +class MediaContentType(str, enum.Enum): + """ + The MIME type of the media record + """ + + IMAGE_PNG = "image/png" + IMAGE_JPEG = "image/jpeg" + IMAGE_JPG = "image/jpg" + IMAGE_WEBP = "image/webp" + IMAGE_GIF = "image/gif" + IMAGE_SVG_XML = "image/svg+xml" + IMAGE_TIFF = "image/tiff" + IMAGE_BMP = "image/bmp" + AUDIO_MPEG = "audio/mpeg" + AUDIO_MP_3 = "audio/mp3" + AUDIO_WAV = "audio/wav" + AUDIO_OGG = "audio/ogg" + AUDIO_OGA = "audio/oga" + AUDIO_AAC = "audio/aac" + AUDIO_MP_4 = "audio/mp4" + AUDIO_FLAC = "audio/flac" + VIDEO_MP_4 = "video/mp4" + VIDEO_WEBM = "video/webm" + TEXT_PLAIN = "text/plain" + TEXT_HTML = "text/html" + TEXT_CSS = "text/css" + TEXT_CSV = "text/csv" + APPLICATION_PDF = "application/pdf" + APPLICATION_MSWORD = "application/msword" + APPLICATION_MS_EXCEL = "application/vnd.ms-excel" + APPLICATION_ZIP = "application/zip" + APPLICATION_JSON = "application/json" + APPLICATION_XML = "application/xml" + APPLICATION_OCTET_STREAM = "application/octet-stream" + + def visit( + self, + image_png: typing.Callable[[], T_Result], + image_jpeg: typing.Callable[[], T_Result], + image_jpg: typing.Callable[[], T_Result], + image_webp: typing.Callable[[], T_Result], + image_gif: typing.Callable[[], T_Result], + image_svg_xml: typing.Callable[[], T_Result], + image_tiff: typing.Callable[[], T_Result], + image_bmp: typing.Callable[[], T_Result], + audio_mpeg: typing.Callable[[], T_Result], + audio_mp_3: typing.Callable[[], T_Result], + audio_wav: typing.Callable[[], T_Result], + audio_ogg: typing.Callable[[], T_Result], + audio_oga: typing.Callable[[], T_Result], + audio_aac: typing.Callable[[], T_Result], + audio_mp_4: typing.Callable[[], T_Result], + audio_flac: typing.Callable[[], T_Result], + video_mp_4: typing.Callable[[], T_Result], + video_webm: typing.Callable[[], T_Result], + text_plain: typing.Callable[[], T_Result], + text_html: typing.Callable[[], T_Result], + text_css: typing.Callable[[], T_Result], + text_csv: typing.Callable[[], T_Result], + application_pdf: typing.Callable[[], T_Result], + application_msword: typing.Callable[[], T_Result], + application_ms_excel: typing.Callable[[], T_Result], + application_zip: typing.Callable[[], T_Result], + application_json: typing.Callable[[], T_Result], + application_xml: typing.Callable[[], T_Result], + application_octet_stream: typing.Callable[[], T_Result], + ) -> T_Result: + if self is MediaContentType.IMAGE_PNG: + return image_png() + if self is MediaContentType.IMAGE_JPEG: + return image_jpeg() + if self is MediaContentType.IMAGE_JPG: + return image_jpg() + if self is MediaContentType.IMAGE_WEBP: + return image_webp() + if self is MediaContentType.IMAGE_GIF: + return image_gif() + if self is MediaContentType.IMAGE_SVG_XML: + return image_svg_xml() + if self is MediaContentType.IMAGE_TIFF: + return image_tiff() + if self is MediaContentType.IMAGE_BMP: + return image_bmp() + if self is MediaContentType.AUDIO_MPEG: + return audio_mpeg() + if self is MediaContentType.AUDIO_MP_3: + return audio_mp_3() + if self is MediaContentType.AUDIO_WAV: + return audio_wav() + if self is MediaContentType.AUDIO_OGG: + return audio_ogg() + if self is MediaContentType.AUDIO_OGA: + return audio_oga() + if self is MediaContentType.AUDIO_AAC: + return audio_aac() + if self is MediaContentType.AUDIO_MP_4: + return audio_mp_4() + if self is MediaContentType.AUDIO_FLAC: + return audio_flac() + if self is MediaContentType.VIDEO_MP_4: + return video_mp_4() + if self is MediaContentType.VIDEO_WEBM: + return video_webm() + if self is MediaContentType.TEXT_PLAIN: + return text_plain() + if self is MediaContentType.TEXT_HTML: + return text_html() + if self is MediaContentType.TEXT_CSS: + return text_css() + if self is MediaContentType.TEXT_CSV: + return text_csv() + if self is MediaContentType.APPLICATION_PDF: + return application_pdf() + if self is MediaContentType.APPLICATION_MSWORD: + return application_msword() + if self is MediaContentType.APPLICATION_MS_EXCEL: + return application_ms_excel() + if self is MediaContentType.APPLICATION_ZIP: + return application_zip() + if self is MediaContentType.APPLICATION_JSON: + return application_json() + if self is MediaContentType.APPLICATION_XML: + return application_xml() + if self is MediaContentType.APPLICATION_OCTET_STREAM: + return application_octet_stream() diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py index 1c327aab5..e3bb29b88 100644 --- a/langfuse/callback/langchain.py +++ b/langfuse/callback/langchain.py @@ -857,6 +857,7 @@ def on_llm_end( self.runs[run_id] = self.runs[run_id].end( output=extracted_response, usage=llm_usage, + usage_details=llm_usage, version=self.version, input=kwargs.get("inputs"), model=model, @@ -1040,12 +1041,14 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): # https://pypi.org/project/langchain-anthropic/ (works also for Bedrock-Anthropic) ("input_tokens", "input"), ("output_tokens", "output"), + ("total_tokens", "total"), # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count ("prompt_token_count", "input"), ("candidates_token_count", "output"), # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics ("inputTokenCount", "input"), ("outputTokenCount", "output"), + ("totalTokenCount", "total"), # langchain-ibm https://pypi.org/project/langchain-ibm/ ("input_token_count", "input"), ("generated_token_count", "output"), @@ -1063,6 +1066,19 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): usage_model[langfuse_key] = final_count # Translate key and keep the value + if isinstance(usage_model, dict): + if "input_token_details" in usage_model: + input_token_details = usage_model.pop("input_token_details", {}) + + for key, value in input_token_details.items(): + usage_model[f"input_{key}"] = value + + if "output_token_details" in usage_model: + output_token_details = usage_model.pop("output_token_details", {}) + + for key, value in output_token_details.items(): + usage_model[f"output_{key}"] = value + return usage_model if usage_model else None diff --git a/langfuse/client.py b/langfuse/client.py index bc20a9051..1c8b42de1 100644 --- a/langfuse/client.py +++ b/langfuse/client.py @@ -78,7 +78,11 @@ from langfuse.media import LangfuseMedia from langfuse.request import LangfuseClient from langfuse.types import MaskFunction, ScoreDataType, SpanLevel -from langfuse.utils import _convert_usage_input, _create_prompt_context, _get_timestamp +from langfuse.utils import ( + _convert_usage_input, + _create_prompt_context, + _get_timestamp, +) from .version import __version__ as version @@ -1832,6 +1836,8 @@ def generation( input: typing.Optional[typing.Any] = None, output: typing.Optional[typing.Any] = None, usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None, + usage_details: typing.Optional[typing.Dict[str, int]] = None, + cost_details: typing.Optional[typing.Dict[str, float]] = None, prompt: typing.Optional[PromptClient] = None, **kwargs, ) -> "StatefulGenerationClient": @@ -1859,7 +1865,9 @@ def generation( model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs. input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object. output (Optional[dict]): The completion generated by the model. Can be any string or JSON object. - usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}. + cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}. prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation. **kwargs: Additional keyword arguments to include in the generation. @@ -1905,6 +1913,8 @@ def generation( "model": model, "model_parameters": model_parameters, "usage": _convert_usage_input(usage) if usage is not None else None, + "usage_details": usage_details, + "cost_details": cost_details, "trace": {"release": self.release}, **_create_prompt_context(prompt), **kwargs, @@ -2094,6 +2104,8 @@ def generation( input: typing.Optional[typing.Any] = None, output: typing.Optional[typing.Any] = None, usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None, + usage_details: typing.Optional[typing.Dict[str, int]] = None, + cost_details: typing.Optional[typing.Dict[str, float]] = None, prompt: typing.Optional[PromptClient] = None, **kwargs, ) -> "StatefulGenerationClient": @@ -2115,7 +2127,9 @@ def generation( model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs. input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object. output (Optional[dict]): The completion generated by the model. Can be any string or JSON object. - usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}. + cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}. prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation. **kwargs: Additional keyword arguments to include in the generation. @@ -2159,6 +2173,8 @@ def generation( "input": input, "output": output, "usage": _convert_usage_input(usage) if usage is not None else None, + "usage_details": usage_details, + "cost_details": cost_details, **_create_prompt_context(prompt), **kwargs, } @@ -2526,6 +2542,8 @@ def update( input: typing.Optional[typing.Any] = None, output: typing.Optional[typing.Any] = None, usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None, + usage_details: typing.Optional[typing.Dict[str, int]] = None, + cost_details: typing.Optional[typing.Dict[str, float]] = None, prompt: typing.Optional[PromptClient] = None, **kwargs, ) -> "StatefulGenerationClient": @@ -2544,7 +2562,9 @@ def update( model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs. input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object. output (Optional[dict]): The completion generated by the model. Can be any string or JSON object. - usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}. + cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}. prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation. **kwargs: Additional keyword arguments to include in the generation. @@ -2584,6 +2604,8 @@ def update( "input": input, "output": output, "usage": _convert_usage_input(usage) if usage is not None else None, + "usage_details": usage_details, + "cost_details": cost_details, **_create_prompt_context(prompt), **kwargs, } @@ -2632,6 +2654,8 @@ def end( input: typing.Optional[typing.Any] = None, output: typing.Optional[typing.Any] = None, usage: typing.Optional[typing.Union[pydantic.BaseModel, ModelUsage]] = None, + usage_details: typing.Optional[typing.Dict[str, int]] = None, + cost_details: typing.Optional[typing.Dict[str, float]] = None, prompt: typing.Optional[PromptClient] = None, **kwargs, ) -> "StatefulGenerationClient": @@ -2650,7 +2674,9 @@ def end( model_parameters (Optional[dict]): The parameters of the model used for the generation; can be any key-value pairs. input (Optional[dict]): The prompt used for the generation. Can be any string or JSON object. output (Optional[dict]): The completion generated by the model. Can be any string or JSON object. - usage (Optional[dict]): The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage (Optional[dict]): [DEPRECATED, use usage_details and cost_details instead] The usage object supports the OpenAi structure with {`promptTokens`, `completionTokens`, `totalTokens`} and a more generic version {`input`, `output`, `total`, `unit`, `inputCost`, `outputCost`, `totalCost`} where unit can be of value `"TOKENS"`, `"CHARACTERS"`, `"MILLISECONDS"`, `"SECONDS"`, or `"IMAGES"`. Refer to the docs on how to [automatically infer](https://langfuse.com/docs/model-usage-and-cost) token usage and costs in Langfuse. + usage_details (Optional[dict]): The usage details of the generation. Also accepts OpenAI usage details. Keys are the usage type (e.g. "input", "input_cached", "output") and values are integers representing the number of units used. For accurate cost calculations in Langfuse, ensure each usage type has a corresponding price configured in the Langfuse models table. Example: {"input": 500, "output": 150}. + cost_details (Optional[dict]): The cost details of the generation. Keys are the usage type (e.g. "input", "input_cached", "output") and values are floats representing the cost in USD. Example: {"input": 0.0015, "output": 0.002}. prompt (Optional[PromptClient]): The Langfuse prompt object used for the generation. **kwargs: Additional keyword arguments to include in the generation. @@ -2687,6 +2713,8 @@ def end( input=input, output=output, usage=usage, + usage_details=usage_details, + cost_details=cost_details, prompt=prompt, **kwargs, ) diff --git a/langfuse/decorators/langfuse_decorator.py b/langfuse/decorators/langfuse_decorator.py index fb4cd34cf..d5164e7c4 100644 --- a/langfuse/decorators/langfuse_decorator.py +++ b/langfuse/decorators/langfuse_decorator.py @@ -27,6 +27,7 @@ from typing_extensions import ParamSpec +from langfuse.api import UsageDetails from langfuse.client import ( Langfuse, StatefulSpanClient, @@ -71,6 +72,8 @@ "model": None, "model_parameters": None, "usage": None, + "usage_details": None, + "cost_details": None, "prompt": None, "public": None, }, @@ -823,6 +826,8 @@ def update_current_observation( model: Optional[str] = None, model_parameters: Optional[Dict[str, MapValue]] = None, usage: Optional[Union[BaseModel, ModelUsage]] = None, + usage_details: Optional[UsageDetails] = None, + cost_details: Optional[Dict[str, float]] = None, prompt: Optional[PromptClient] = None, public: Optional[bool] = None, ): @@ -857,7 +862,9 @@ def update_current_observation( Generation-specific params: - `completion_start_time` (Optional[datetime]): The time at which the completion started (streaming). Set it to get latency analytics broken down into time until completion started and completion duration. - `model_parameters` (Optional[Dict[str, MapValue]]): The parameters of the model used for the generation; can be any key-value pairs. - - `usage` (Optional[Union[BaseModel, ModelUsage]]): The usage object supports the OpenAi structure with {promptTokens, completionTokens, totalTokens} and a more generic version {input, output, total, unit, inputCost, outputCost, totalCost} where unit can be of value "TOKENS", "CHARACTERS", "MILLISECONDS", "SECONDS", or "IMAGES". Refer to the docs on how to automatically infer token usage and costs in Langfuse. + - `usage` (Optional[Union[BaseModel, ModelUsage]]): (Deprecated. Use `usage_details` and `cost_details` instead.) The usage object supports the OpenAi structure with {promptTokens, completionTokens, totalTokens} and a more generic version {input, output, total, unit, inputCost, outputCost, totalCost} where unit can be of value "TOKENS", "CHARACTERS", "MILLISECONDS", "SECONDS", or "IMAGES". Refer to the docs on how to automatically infer token usage and costs in Langfuse. + - `usage_details` (Optional[Dict[str, int]]): The usage details of the observation. Reflects the number of units consumed per usage type. All keys must sum up to the total key value. The total key holds the total number of units consumed. + - `cost_details` (Optional[Dict[str, float]]): The cost details of the observation. Reflects the USD cost of the observation per cost type. All keys must sum up to the total key value. The total key holds the total cost of the observation. - `prompt`(Optional[PromptClient]): The prompt object used for the generation. Returns: @@ -899,6 +906,8 @@ def update_current_observation( "model": model, "model_parameters": model_parameters, "usage": usage, + "usage_details": usage_details, + "cost_details": cost_details, "prompt": prompt, "public": public, }.items() diff --git a/langfuse/llama_index/_event_handler.py b/langfuse/llama_index/_event_handler.py index 8a9644053..c299643e3 100644 --- a/langfuse/llama_index/_event_handler.py +++ b/langfuse/llama_index/_event_handler.py @@ -6,7 +6,6 @@ StateType, ) from langfuse.utils import _get_timestamp -from langfuse.model import ModelUsage from ._context import InstrumentorContext from uuid import uuid4 as create_uuid @@ -119,12 +118,12 @@ def update_generation_from_end_event( } self._get_generation_client(event.span_id).update( - usage=usage, end_time=_get_timestamp() + usage=usage, usage_details=usage, end_time=_get_timestamp() ) def _parse_token_usage( self, response: Union[ChatResponse, CompletionResponse] - ) -> Optional[ModelUsage]: + ) -> Optional[dict]: if ( (raw := getattr(response, "raw", None)) and hasattr(raw, "get") @@ -154,15 +153,15 @@ def _get_generation_client(self, id: str) -> StatefulGenerationClient: def _parse_usage_from_mapping( usage: Union[object, Mapping[str, Any]], -) -> ModelUsage: +): if isinstance(usage, Mapping): return _get_token_counts_from_mapping(usage) return _parse_usage_from_object(usage) -def _parse_usage_from_object(usage: object) -> ModelUsage: - model_usage: ModelUsage = { +def _parse_usage_from_object(usage: object): + model_usage = { "unit": None, "input": None, "output": None, @@ -179,21 +178,26 @@ def _parse_usage_from_object(usage: object) -> ModelUsage: if (total_tokens := getattr(usage, "total_tokens", None)) is not None: model_usage["total"] = total_tokens + if ( + prompt_tokens_details := getattr(usage, "prompt_tokens_details", None) + ) is not None and isinstance(prompt_tokens_details, dict): + for key, value in prompt_tokens_details.items(): + model_usage[f"input_{key}"] = value + + if ( + completion_tokens_details := getattr(usage, "completion_tokens_details", None) + ) is not None and isinstance(completion_tokens_details, dict): + for key, value in completion_tokens_details.items(): + model_usage[f"output_{key}"] = value + return model_usage def _get_token_counts_from_mapping( usage_mapping: Mapping[str, Any], -) -> ModelUsage: - model_usage: ModelUsage = { - "unit": None, - "input": None, - "output": None, - "total": None, - "input_cost": None, - "output_cost": None, - "total_cost": None, - } +): + model_usage = {} + if (prompt_tokens := usage_mapping.get("prompt_tokens")) is not None: model_usage["input"] = prompt_tokens if (completion_tokens := usage_mapping.get("completion_tokens")) is not None: @@ -201,4 +205,16 @@ def _get_token_counts_from_mapping( if (total_tokens := usage_mapping.get("total_tokens")) is not None: model_usage["total"] = total_tokens + if ( + prompt_tokens_details := usage_mapping.get("prompt_tokens_details") + ) is not None and isinstance(prompt_tokens_details, dict): + for key, value in prompt_tokens_details.items(): + model_usage[f"input_{key}"] = value + + if ( + completion_tokens_details := usage_mapping.get("completion_tokens_details") + ) is not None and isinstance(completion_tokens_details, dict): + for key, value in completion_tokens_details.items(): + model_usage[f"output_{key}"] = value + return model_usage diff --git a/langfuse/openai.py b/langfuse/openai.py index b3a06a229..e3c8aee9c 100644 --- a/langfuse/openai.py +++ b/langfuse/openai.py @@ -654,7 +654,11 @@ def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs) else openai_response, ) generation.update( - model=model, output=completion, end_time=_get_timestamp(), usage=usage + model=model, + output=completion, + end_time=_get_timestamp(), + usage=usage, # backward compat for all V2 self hosters + usage_details=usage, ) # Avoiding the trace-update if trace-id is provided by user. @@ -670,7 +674,12 @@ def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs) status_message=str(ex), level="ERROR", model=model, - usage={"input_cost": 0, "output_cost": 0, "total_cost": 0}, + usage={ + "input_cost": 0, + "output_cost": 0, + "total_cost": 0, + }, # backward compat for all V2 self hosters + cost_details={"input": 0, "output": 0, "total": 0}, ) raise ex @@ -710,7 +719,8 @@ async def _wrap_async( model=model, output=completion, end_time=_get_timestamp(), - usage=usage, + usage=usage, # backward compat for all V2 self hosters + usage_details=usage, ) # Avoiding the trace-update if trace-id is provided by user. if not is_nested_trace: @@ -724,7 +734,12 @@ async def _wrap_async( status_message=str(ex), level="ERROR", model=model, - usage={"input_cost": 0, "output_cost": 0, "total_cost": 0}, + usage={ + "input_cost": 0, + "output_cost": 0, + "total_cost": 0, + }, # Backward compat for all V2 self hosters + cost_details={"input": 0, "output": 0, "total": 0}, ) raise ex diff --git a/langfuse/types/__init__.py b/langfuse/types/__init__.py index 1cef199f7..888966259 100644 --- a/langfuse/types/__init__.py +++ b/langfuse/types/__init__.py @@ -4,7 +4,7 @@ from typing import Any, Dict, List, Literal, Optional, Protocol, TypedDict, Union from pydantic import BaseModel -from langfuse.api import MediaContentType +from langfuse.api import MediaContentType, UsageDetails from langfuse.model import MapValue, ModelUsage, PromptClient SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"] @@ -34,6 +34,8 @@ class ObservationParams(TraceMetadata, TypedDict): model: Optional[str] model_parameters: Optional[Dict[str, MapValue]] usage: Optional[Union[BaseModel, ModelUsage]] + usage_details: Optional[UsageDetails] + cost_details: Optional[Dict[str, float]] prompt: Optional[PromptClient]