🤖 fix: avoid mixed server/client clocks in timing stats

ThomasK33 · ThomasK33 · commit 5a289452b782 · 2025-12-18T13:31:24.000+01:00
- Track backend timestamps separately from renderer clock time
- Translate backend timestamps into renderer time before computing live durations
- Prefer backend-provided duration on stream-end; remove Date.now()-startTime override
- Compute streamingMs from duration/ttft/toolExecution deltas to avoid mixed clocks

Change-Id: Ia451bdeee13ad5797a3ab10ec1af0944b6a05eb1
Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/browser/utils/messages/StreamingMessageAggregator.ts b/src/browser/utils/messages/StreamingMessageAggregator.ts
@@ -32,6 +32,7 @@ import { isDynamicToolPart } from "@/common/types/toolParts";
 import { z } from "zod";
 import { createDeltaStorage, type DeltaRecordStorage } from "./StreamingTPSCalculator";
 import { computeRecencyTimestamp } from "./recency";
+import { assert } from "@/common/utils/assert";
 import { getStatusStateKey, getSessionTimingKey } from "@/common/constants/storage";
 
 // Maximum number of messages to display in the DOM for performance
@@ -63,17 +64,29 @@ type AgentStatus = z.infer<typeof AgentStatusSchema>;
 const MAX_DISPLAYED_MESSAGES = 128;
 
 interface StreamingContext {
-  startTime: number;
+  /** Backend timestamp when stream started (Date.now()) */
+  serverStartTime: number;
+  /**
+   * Offset to translate backend timestamps into the renderer clock.
+   * Computed as: `Date.now() - lastServerTimestamp`.
+   */
+  clockOffsetMs: number;
+  /** Most recent backend timestamp observed for this stream */
+  lastServerTimestamp: number;
+
   isComplete: boolean;
   isCompacting: boolean;
   model: string;
-  /** Timestamp of first content token (text or reasoning delta) */
-  firstTokenTime: number | null;
+
+  /** Timestamp of first content token (text or reasoning delta) - backend Date.now() */
+  serverFirstTokenTime: number | null;
+
   /** Accumulated tool execution time in ms */
   toolExecutionMs: number;
-  /** Map of tool call start times for in-progress tool calls */
+  /** Map of tool call start times for in-progress tool calls (backend timestamps) */
   pendingToolStarts: Map<string, number>;
-  /** Mode (plan/exec) - captured from message metadata when stream ends */
+
+  /** Mode (plan/exec) */
   mode?: "plan" | "exec";
 }
 
@@ -341,6 +354,26 @@ export class StreamingMessageAggregator {
     }
   }
 
+  private updateStreamClock(context: StreamingContext, serverTimestamp: number): void {
+    assert(context, "updateStreamClock requires context");
+    assert(typeof serverTimestamp === "number", "updateStreamClock requires serverTimestamp");
+
+    // Only update if this timestamp is >= the most recent one we've seen.
+    // During stream replay, older historical parts may be re-emitted out of order.
+    if (serverTimestamp < context.lastServerTimestamp) {
+      return;
+    }
+
+    context.lastServerTimestamp = serverTimestamp;
+    context.clockOffsetMs = Date.now() - serverTimestamp;
+  }
+
+  private translateServerTime(context: StreamingContext, serverTimestamp: number): number {
+    assert(context, "translateServerTime requires context");
+    assert(typeof serverTimestamp === "number", "translateServerTime requires serverTimestamp");
+
+    return serverTimestamp + context.clockOffsetMs;
+  }
   private invalidateCache(): void {
     this.cachedAllMessages = null;
     this.cachedDisplayedMessages = null;
@@ -447,31 +480,53 @@ export class StreamingMessageAggregator {
     const context = this.activeStreams.get(messageId);
     if (context) {
       const endTime = Date.now();
-      const duration = endTime - context.startTime;
+      const message = this.messages.get(messageId);
 
-      const ttft =
-        context.firstTokenTime !== null ? context.firstTokenTime - context.startTime : null;
+      // Prefer backend-provided duration (computed in the same clock domain as tool/delta timestamps).
+      // Fall back to renderer-based timing translated into the renderer clock.
+      const durationMsFromMetadata = message?.metadata?.duration;
+      const fallbackStartTime = this.translateServerTime(context, context.serverStartTime);
+      const fallbackDurationMs = Math.max(0, endTime - fallbackStartTime);
+      const durationMs =
+        typeof durationMsFromMetadata === "number" && Number.isFinite(durationMsFromMetadata)
+          ? durationMsFromMetadata
+          : fallbackDurationMs;
+
+      const ttftMs =
+        context.serverFirstTokenTime !== null
+          ? Math.max(0, context.serverFirstTokenTime - context.serverStartTime)
+          : null;
 
       // Get output tokens from cumulative usage (if available)
       const cumulativeUsage = this.activeStreamUsage.get(messageId)?.cumulative.usage;
       const outputTokens = cumulativeUsage?.outputTokens ?? 0;
       const reasoningTokens = cumulativeUsage?.reasoningTokens ?? 0;
 
-      // Calculate streaming duration: time from first token to end, EXCLUDING tool execution
-      // This is what determines actual model output speed - we don't count time waiting for tools
-      const rawStreamingMs = context.firstTokenTime !== null ? endTime - context.firstTokenTime : 0;
-      const streamingMs = Math.max(0, rawStreamingMs - context.toolExecutionMs);
+      // Account for in-progress tool calls (can happen on abort/error)
+      let totalToolExecutionMs = context.toolExecutionMs;
+      if (context.pendingToolStarts.size > 0) {
+        const serverEndTime = context.serverStartTime + durationMs;
+        for (const toolStartTime of context.pendingToolStarts.values()) {
+          const toolMs = serverEndTime - toolStartTime;
+          if (toolMs > 0) {
+            totalToolExecutionMs += toolMs;
+          }
+        }
+      }
+
+      // Streaming duration excludes TTFT and tool execution - used for avg tok/s
+      const streamingMs = Math.max(0, durationMs - (ttftMs ?? 0) - totalToolExecutionMs);
 
-      // Get mode from the message metadata (set by backend at stream-end)
-      const message = this.messages.get(messageId);
-      const mode = message?.metadata?.mode as "plan" | "exec" | undefined;
+      const mode = (message?.metadata?.mode ?? context.mode) as "plan" | "exec" | undefined;
 
-      // Store last completed stream stats (including tokens and mode)
+      // Store last completed stream stats (include durations anchored in the renderer clock)
+      const startTime = endTime - durationMs;
+      const firstTokenTime = ttftMs !== null ? startTime + ttftMs : null;
       this.lastCompletedStreamStats = {
-        startTime: context.startTime,
+        startTime,
         endTime,
-        firstTokenTime: context.firstTokenTime,
-        toolExecutionMs: context.toolExecutionMs,
+        firstTokenTime,
+        toolExecutionMs: totalToolExecutionMs,
         model: context.model,
         outputTokens: outputTokens + reasoningTokens, // Combined for speed calc
         reasoningTokens,
@@ -494,14 +549,14 @@ export class StreamingMessageAggregator {
         totalReasoningTokens: 0,
         totalStreamingMs: 0,
       };
-      modelStats.totalDurationMs += duration;
-      modelStats.totalToolExecutionMs += context.toolExecutionMs;
+      modelStats.totalDurationMs += durationMs;
+      modelStats.totalToolExecutionMs += totalToolExecutionMs;
       modelStats.responseCount += 1;
       modelStats.totalOutputTokens += outputTokens + reasoningTokens; // Combined for speed calc
       modelStats.totalReasoningTokens += reasoningTokens;
       modelStats.totalStreamingMs += streamingMs;
-      if (ttft !== null) {
-        modelStats.totalTtftMs += ttft;
+      if (ttftMs !== null) {
+        modelStats.totalTtftMs += ttftMs;
         modelStats.ttftCount += 1;
       }
       this.sessionTimingStats[statsKey] = modelStats;
@@ -652,16 +707,24 @@ export class StreamingMessageAggregator {
     if (entries.length === 0) return null;
     const [messageId, context] = entries[0];
 
+    const now = Date.now();
+
+    const startTime = this.translateServerTime(context, context.serverStartTime);
+    const firstTokenTime =
+      context.serverFirstTokenTime !== null
+        ? this.translateServerTime(context, context.serverFirstTokenTime)
+        : null;
+
     // Include time from currently-executing tools (not just completed ones)
     let totalToolMs = context.toolExecutionMs;
-    const now = Date.now();
-    for (const startTime of context.pendingToolStarts.values()) {
-      totalToolMs += now - startTime;
+    for (const toolStartServerTime of context.pendingToolStarts.values()) {
+      const toolStartTime = this.translateServerTime(context, toolStartServerTime);
+      totalToolMs += Math.max(0, now - toolStartTime);
     }
 
     return {
-      startTime: context.startTime,
-      firstTokenTime: context.firstTokenTime,
+      startTime,
+      firstTokenTime,
       toolExecutionMs: totalToolMs,
       model: context.model,
       liveTokenCount: this.getStreamingTokenCount(messageId),
@@ -876,12 +939,15 @@ export class StreamingMessageAggregator {
     const lastUserMsg = [...messages].reverse().find((m) => m.role === "user");
     const isCompacting = lastUserMsg?.metadata?.muxMetadata?.type === "compaction-request";
 
+    const now = Date.now();
     const context: StreamingContext = {
-      startTime: data.startTime,
+      serverStartTime: data.startTime,
+      clockOffsetMs: now - data.startTime,
+      lastServerTimestamp: data.startTime,
       isComplete: false,
       isCompacting,
       model: data.model,
-      firstTokenTime: null,
+      serverFirstTokenTime: null,
       toolExecutionMs: 0,
       pendingToolStarts: new Map(),
       mode: data.mode,
@@ -906,11 +972,13 @@ export class StreamingMessageAggregator {
     const message = this.messages.get(data.messageId);
     if (!message) return;
 
-    // Track first token time (only for non-empty deltas)
-    if (data.delta.length > 0) {
-      const context = this.activeStreams.get(data.messageId);
-      if (context?.firstTokenTime === null) {
-        context.firstTokenTime = data.timestamp;
+    const context = this.activeStreams.get(data.messageId);
+    if (context) {
+      this.updateStreamClock(context, data.timestamp);
+
+      // Track first token time (only for non-empty deltas)
+      if (data.delta.length > 0 && context.serverFirstTokenTime === null) {
+        context.serverFirstTokenTime = data.timestamp;
       }
     }
 
@@ -939,8 +1007,12 @@ export class StreamingMessageAggregator {
         const updatedMetadata: MuxMetadata = {
           ...message.metadata,
           ...data.metadata,
-          duration: Date.now() - activeStream.startTime,
         };
+
+        const durationMs = data.metadata.duration;
+        if (typeof durationMs === "number" && Number.isFinite(durationMs)) {
+          this.updateStreamClock(activeStream, activeStream.serverStartTime + durationMs);
+        }
         message.metadata = updatedMetadata;
 
         // Update tool parts with their results if provided
@@ -1080,6 +1152,7 @@ export class StreamingMessageAggregator {
     // Track tool start time for execution duration calculation
     const context = this.activeStreams.get(data.messageId);
     if (context) {
+      this.updateStreamClock(context, data.timestamp);
       context.pendingToolStarts.set(data.toolCallId, data.timestamp);
     }
 
@@ -1159,6 +1232,8 @@ export class StreamingMessageAggregator {
     // Track tool execution duration
     const context = this.activeStreams.get(data.messageId);
     if (context) {
+      this.updateStreamClock(context, data.timestamp);
+
       const startTime = context.pendingToolStarts.get(data.toolCallId);
       if (startTime !== undefined) {
         context.toolExecutionMs += data.timestamp - startTime;
@@ -1191,11 +1266,13 @@ export class StreamingMessageAggregator {
     const message = this.messages.get(data.messageId);
     if (!message) return;
 
-    // Track first token time (reasoning also counts as first token)
-    if (data.delta.length > 0) {
-      const context = this.activeStreams.get(data.messageId);
-      if (context?.firstTokenTime === null) {
-        context.firstTokenTime = data.timestamp;
+    const context = this.activeStreams.get(data.messageId);
+    if (context) {
+      this.updateStreamClock(context, data.timestamp);
+
+      // Track first token time (reasoning also counts as first token)
+      if (data.delta.length > 0 && context.serverFirstTokenTime === null) {
+        context.serverFirstTokenTime = data.timestamp;
       }
     }