From 55be48c60c98c8a71b68e776e8b752ff40b50892 Mon Sep 17 00:00:00 2001 From: cawthorne Date: Tue, 20 Jan 2026 23:25:38 +0000 Subject: [PATCH 1/2] Add WebSocket failover counter metric, abnormal closure tracking, and URL change logging --- src/metrics/index.ts | 5 +++++ src/transports/websocket.ts | 18 +++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/metrics/index.ts b/src/metrics/index.ts index 08d15242..2893f9bf 100644 --- a/src/metrics/index.ts +++ b/src/metrics/index.ts @@ -357,4 +357,9 @@ export const metrics = new Metrics(() => ({ help: 'The number of addresses in PoR request input parameters', labelNames: ['feed_id'] as const, }), + wsConnectionFailoverCount: new client.Gauge({ + name: 'ws_connection_failover_count', + help: 'The number of consecutive connection issues (unresponsive/no data, abnormal closures), used to trigger URL failover. Resets to 0 when data flows successfully.', + labelNames: ['transport_name'] as const, + }), })) diff --git a/src/transports/websocket.ts b/src/transports/websocket.ts index eafb3077..881efd86 100644 --- a/src/transports/websocket.ts +++ b/src/transports/websocket.ts @@ -295,6 +295,16 @@ export class WebSocketTransport< `Closed websocket connection. Code: ${event.code} ; reason: ${event.reason?.toString()}`, ) + // If abnormal closure, increment failover counter to trigger potential URL switch + // Code 1000 is normal closure, all other codes indicate abnormal disconnections + if (event.code !== 1000) { + this.streamHandlerInvocationsWithNoConnection += 1 + logger.info( + `Abnormal closure detected (code ${event.code}), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, + ) + metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) + } + // Record active ws connections by decrementing count on close // Using URL in label since connection_key is removed from v3 metrics.get('wsConnectionActive').dec() @@ -414,9 +424,10 @@ export class WebSocketTransport< // to determine minimum TTL of an open connection given no explicit connection errors. if (connectionUnresponsive) { this.streamHandlerInvocationsWithNoConnection += 1 - logger.trace( - `The connection is unresponsive, incremented streamHandlerIterationsWithNoConnection = ${this.streamHandlerInvocationsWithNoConnection}`, + logger.info( + `The connection is unresponsive (last message ${timeSinceLastMessage}ms ago), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, ) + metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) } // We want to check if the URL we calculate is different from the one currently connected. @@ -431,9 +442,10 @@ export class WebSocketTransport< // Check if we should close the current connection if (!connectionClosed && (urlChanged || connectionUnresponsive)) { if (urlChanged) { + logger.info('Websocket URL has changed, closing connection to reconnect...') censorLogs(() => logger.debug( - `Websocket url has changed from ${this.currentUrl} to ${urlFromConfig}, closing connection...`, + `Websocket URL changed from ${this.currentUrl} to ${urlFromConfig}`, ), ) } else { From 7090a7cfa74df948763c200ade98043f40eb9a6c Mon Sep 17 00:00:00 2001 From: cawthorne Date: Tue, 20 Jan 2026 23:45:02 +0000 Subject: [PATCH 2/2] Remove increment on 1006 --- src/transports/websocket.ts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/transports/websocket.ts b/src/transports/websocket.ts index 881efd86..8de94291 100644 --- a/src/transports/websocket.ts +++ b/src/transports/websocket.ts @@ -295,16 +295,6 @@ export class WebSocketTransport< `Closed websocket connection. Code: ${event.code} ; reason: ${event.reason?.toString()}`, ) - // If abnormal closure, increment failover counter to trigger potential URL switch - // Code 1000 is normal closure, all other codes indicate abnormal disconnections - if (event.code !== 1000) { - this.streamHandlerInvocationsWithNoConnection += 1 - logger.info( - `Abnormal closure detected (code ${event.code}), incremented failover counter to ${this.streamHandlerInvocationsWithNoConnection}`, - ) - metrics.get('wsConnectionFailoverCount').labels({ transport_name: this.name }).set(this.streamHandlerInvocationsWithNoConnection) - } - // Record active ws connections by decrementing count on close // Using URL in label since connection_key is removed from v3 metrics.get('wsConnectionActive').dec()