From c27724fafc34baa5367642fffe3efd8b7eee2f3e Mon Sep 17 00:00:00 2001 From: OpeOginni Date: Wed, 24 Dec 2025 14:02:28 +0100 Subject: [PATCH 1/2] feat: Added experimental TPS for model responses [FEATURE]: Adding Experimental Calculation and Display of Tokens per second Fixes #6096 --- .../src/cli/cmd/tui/routes/session/index.tsx | 64 +++++++++++++++++++ packages/opencode/src/flag/flag.ts | 1 + packages/opencode/src/session/processor.ts | 2 +- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index c685d8c66cc..dbd45ed905d 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -69,6 +69,7 @@ import { Footer } from "./footer.tsx" import { usePromptRef } from "../../context/prompt" import { Filesystem } from "@/util/filesystem" import { DialogSubagent } from "./dialog-subagent.tsx" +import { Flag } from "@/flag/flag.ts" addDefaultParsers(parsers.parsers) @@ -1254,6 +1255,10 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las const sync = useSync() const messages = createMemo(() => sync.data.message[props.message.sessionID] ?? []) + function getParts(messageID: string) { + return sync.data.part[messageID] ?? [] + } + const final = createMemo(() => { return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish) }) @@ -1266,6 +1271,62 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las return props.message.time.completed - user.time.created }) + const TPS = createMemo(() => { + if (!final()) return 0 + if (!props.message.time.completed) return 0 + if (!Flag.OPENCODE_EXPERIMENTAL_TPS) return 0 + + const allParts = getParts(props.message.id) + + const INVALID_REASONING_TEXTS = ["[REDACTED]", "", null, undefined] as const + + // Filter for actual streaming parts (reasoning + text), exclude tool/step markers + const streamingParts = allParts.filter((part): part is TextPart | ReasoningPart => { + // Only text and reasoning parts have streaming time data + if (part.type !== "text" && part.type !== "reasoning") return false + + // Skip parts without valid timestamps + if (!part.time?.start || !part.time?.end) return false + + // Include text parts with content + if (part.type === "text" && (part.text?.trim().length ?? 0) > 0) return true + + // Include reasoning parts with valid (non-empty) text + if (part.type === "reasoning" && !INVALID_REASONING_TEXTS.includes(part.text as any)) { + return true + } + + return false + }) + + if (streamingParts.length === 0) return 0 + + // Sum individual part durations (excludes tool execution time between parts) + let totalStreamingTimeMs = 0 + let hasValidReasoning = false + + for (const part of streamingParts) { + totalStreamingTimeMs += part.time!.end! - part.time!.start! + if (part.type === "reasoning") { + hasValidReasoning = true + } + } + + if (totalStreamingTimeMs === 0) return 0 + + // Only count reasoning tokens if valid reasoning exists + const totalTokens = + (hasValidReasoning ? props.message.tokens.reasoning : 0) + props.message.tokens.output + + if (totalTokens === 0) return 0 + + // Calculate tokens per second + const totalStreamingTimeSec = totalStreamingTimeMs / 1000 + const tokensPerSecond = totalTokens / totalStreamingTimeSec + + return Number(tokensPerSecond.toFixed(2)) + }) + return ( <> @@ -1307,6 +1368,9 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las · {Locale.duration(duration())} + + · {TPS()} tps + diff --git a/packages/opencode/src/flag/flag.ts b/packages/opencode/src/flag/flag.ts index 805da33cc7a..0e1a9b8c60a 100644 --- a/packages/opencode/src/flag/flag.ts +++ b/packages/opencode/src/flag/flag.ts @@ -31,6 +31,7 @@ export namespace Flag { export const OPENCODE_EXPERIMENTAL_OXFMT = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_OXFMT") export const OPENCODE_EXPERIMENTAL_LSP_TY = truthy("OPENCODE_EXPERIMENTAL_LSP_TY") export const OPENCODE_EXPERIMENTAL_LSP_TOOL = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_LSP_TOOL") + export const OPENCODE_EXPERIMENTAL_TPS = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_TPS") function truthy(key: string) { const value = process.env[key]?.toLowerCase() diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 78871630c65..dde39cd9fc8 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -321,7 +321,7 @@ export namespace SessionProcessor { ) currentText.text = textOutput.text currentText.time = { - start: Date.now(), + start: currentText.time?.start ?? Date.now(), // No need to set start time here, it's already set in the text-start event end: Date.now(), } if (value.providerMetadata) currentText.metadata = value.providerMetadata From 240df9b1df0c4a0f7b1b48b6ac8f703de5c09d27 Mon Sep 17 00:00:00 2001 From: OpeOginni Date: Wed, 24 Dec 2025 14:45:02 +0100 Subject: [PATCH 2/2] refactor: enhance token calculation for multiple assistant messages connected by 1 parent message --- .../src/cli/cmd/tui/routes/session/index.tsx | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index dbd45ed905d..6a79dad5cf6 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -1276,7 +1276,9 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las if (!props.message.time.completed) return 0 if (!Flag.OPENCODE_EXPERIMENTAL_TPS) return 0 - const allParts = getParts(props.message.id) + const assistantMessages : AssistantMessage[] = messages().filter((msg) => msg.role === "assistant" && msg.id !== props.message.id) as AssistantMessage[] + + const allParts = assistantMessages.flatMap((msg) => getParts(msg.id)) const INVALID_REASONING_TEXTS = ["[REDACTED]", "", null, undefined] as const @@ -1314,9 +1316,16 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las if (totalStreamingTimeMs === 0) return 0 - // Only count reasoning tokens if valid reasoning exists - const totalTokens = - (hasValidReasoning ? props.message.tokens.reasoning : 0) + props.message.tokens.output + const totals = assistantMessages.reduce( + (acc, m) => { + acc.output += m.tokens.output + if (hasValidReasoning) acc.reasoning += m.tokens.reasoning // Only count reasoning tokens if valid reasoning parts exists + return acc + }, + { output: 0, reasoning: 0 }, + ) + + const totalTokens = totals.reasoning + totals.output if (totalTokens === 0) return 0