diff --git a/packages/core-backend/CHANGELOG.md b/packages/core-backend/CHANGELOG.md index 3253e7fe1ea..62e85bb1b67 100644 --- a/packages/core-backend/CHANGELOG.md +++ b/packages/core-backend/CHANGELOG.md @@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add `forceReconnection()` method to `BackendWebSocketService` for controlled subscription state cleanup ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Performs a controlled disconnect-then-reconnect sequence with exponential backoff + - Useful for recovering from subscription/unsubscription issues and cleaning up orphaned subscriptions + - Add `BackendWebSocketService:forceReconnection` messenger action +- Add stable connection timer to prevent rapid reconnection loops ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Connection must stay stable for 10 seconds before resetting reconnect attempts + - Prevents issues when server accepts connection then immediately closes it + +### Changed + +- Update `AccountActivityService` to use new `forceReconnection()` method instead of manually calling disconnect/connect ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Improve reconnection scheduling in `BackendWebSocketService` to be idempotent ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Prevents duplicate reconnection timers and inflated attempt counters + - Scheduler checks if reconnect is already scheduled before creating new timer + - Renamed internal `#scheduleReconnect()` to `#scheduleConnect()` for clarity +- Improve error handling in `BackendWebSocketService.connect()` ([#6861](https://github.com/MetaMask/core/pull/6861)) + - Always schedule reconnect on connection failure (exponential backoff prevents aggressive retries) + - Remove redundant schedule calls from error paths +- Update `BackendWebSocketService.disconnect()` to reset reconnect attempts counter ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Update `BackendWebSocketService.disconnect()` return type from `Promise` to `void` ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Improve logging throughout `BackendWebSocketService` for better debugging ([#6861](https://github.com/MetaMask/core/pull/6861)) + +### Fixed + +- Fix potential race condition in `BackendWebSocketService.connect()` that could bypass exponential backoff when reconnect is already scheduled ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Fix memory leak from orphaned timers when multiple reconnects are scheduled ([#6861](https://github.com/MetaMask/core/pull/6861)) +- Fix issue where reconnect attempts counter could grow unnecessarily with duplicate scheduled reconnects ([#6861](https://github.com/MetaMask/core/pull/6861)) + ## [2.1.0] ### Added diff --git a/packages/core-backend/src/AccountActivityService.test.ts b/packages/core-backend/src/AccountActivityService.test.ts index 6ba242469d1..c60fe7bdeb9 100644 --- a/packages/core-backend/src/AccountActivityService.test.ts +++ b/packages/core-backend/src/AccountActivityService.test.ts @@ -71,7 +71,7 @@ const getMessenger = () => { // Create mock action handlers const mockGetSelectedAccount = jest.fn(); const mockConnect = jest.fn(); - const mockDisconnect = jest.fn(); + const mockForceReconnection = jest.fn(); const mockSubscribe = jest.fn(); const mockChannelHasSubscription = jest.fn(); const mockGetSubscriptionsByChannel = jest.fn(); @@ -89,8 +89,8 @@ const getMessenger = () => { mockConnect, ); rootMessenger.registerActionHandler( - 'BackendWebSocketService:disconnect', - mockDisconnect, + 'BackendWebSocketService:forceReconnection', + mockForceReconnection, ); rootMessenger.registerActionHandler( 'BackendWebSocketService:subscribe', @@ -123,7 +123,7 @@ const getMessenger = () => { mocks: { getSelectedAccount: mockGetSelectedAccount, connect: mockConnect, - disconnect: mockDisconnect, + forceReconnection: mockForceReconnection, subscribe: mockSubscribe, channelHasSubscription: mockChannelHasSubscription, getSubscriptionsByChannel: mockGetSubscriptionsByChannel, @@ -222,7 +222,7 @@ type WithServiceCallback = (payload: { mocks: { getSelectedAccount: jest.Mock; connect: jest.Mock; - disconnect: jest.Mock; + forceReconnection: jest.Mock; subscribe: jest.Mock; channelHasSubscription: jest.Mock; getSubscriptionsByChannel: jest.Mock; @@ -464,28 +464,22 @@ describe('AccountActivityService', () => { ); }); - it('should handle disconnect failures during force reconnection by logging error and continuing gracefully', async () => { + it('should handle subscription failure by calling forceReconnection', async () => { await withService(async ({ service, mocks }) => { - // Mock disconnect to fail - this prevents the reconnect step from executing - mocks.disconnect.mockRejectedValue( - new Error('Disconnect failed during force reconnection'), - ); - - // Trigger scenario that causes force reconnection by making subscribe fail + // Mock subscribe to fail mocks.subscribe.mockRejectedValue(new Error('Subscription failed')); - // Should handle both subscription failure and disconnect failure gracefully - should not throw + // Should handle subscription failure gracefully - should not throw const result = await service.subscribe({ address: '0x123abc' }); expect(result).toBeUndefined(); // Verify the subscription was attempted expect(mocks.subscribe).toHaveBeenCalledTimes(1); - // Verify disconnect was attempted (but failed, preventing reconnection) - expect(mocks.disconnect).toHaveBeenCalledTimes(1); + // Verify forceReconnection was called (lines 289-290) + expect(mocks.forceReconnection).toHaveBeenCalledTimes(1); - // Connect is only called once at the start because disconnect failed, - // so the reconnect step never executes (it's in the same try-catch block) + // Connect is only called once at the start expect(mocks.connect).toHaveBeenCalledTimes(1); }); }); @@ -536,14 +530,8 @@ describe('AccountActivityService', () => { // unsubscribe catches errors and forces reconnection instead of throwing await service.unsubscribe(mockSubscription); - // Should have attempted to force reconnection with exact sequence - expect(mocks.disconnect).toHaveBeenCalledTimes(1); - expect(mocks.connect).toHaveBeenCalledTimes(1); - - // Verify disconnect was called before connect - const disconnectOrder = mocks.disconnect.mock.invocationCallOrder[0]; - const connectOrder = mocks.connect.mock.invocationCallOrder[0]; - expect(disconnectOrder).toBeLessThan(connectOrder); + // Should have attempted to force reconnection + expect(mocks.forceReconnection).toHaveBeenCalledTimes(1); }, ); }); diff --git a/packages/core-backend/src/AccountActivityService.ts b/packages/core-backend/src/AccountActivityService.ts index c92ab1a2a2a..30c8b95d98c 100644 --- a/packages/core-backend/src/AccountActivityService.ts +++ b/packages/core-backend/src/AccountActivityService.ts @@ -80,7 +80,7 @@ export type AccountActivityServiceActions = AccountActivityServiceMethodActions; export const ACCOUNT_ACTIVITY_SERVICE_ALLOWED_ACTIONS = [ 'AccountsController:getSelectedAccount', 'BackendWebSocketService:connect', - 'BackendWebSocketService:disconnect', + 'BackendWebSocketService:forceReconnection', 'BackendWebSocketService:subscribe', 'BackendWebSocketService:getConnectionInfo', 'BackendWebSocketService:channelHasSubscription', @@ -559,16 +559,11 @@ export class AccountActivityService { * Force WebSocket reconnection to clean up subscription state */ async #forceReconnection(): Promise { - try { - log('Forcing WebSocket reconnection to clean up subscription state'); - - // All subscriptions will be cleaned up automatically on WebSocket disconnect + log('Forcing WebSocket reconnection to clean up subscription state'); - await this.#messenger.call('BackendWebSocketService:disconnect'); - await this.#messenger.call('BackendWebSocketService:connect'); - } catch (error) { - log('Failed to force WebSocket reconnection', { error }); - } + // Use the dedicated forceReconnection method which performs a controlled + // disconnect-then-connect sequence to clean up subscription state + await this.#messenger.call('BackendWebSocketService:forceReconnection'); } // ============================================================================= diff --git a/packages/core-backend/src/BackendWebSocketService-method-action-types.ts b/packages/core-backend/src/BackendWebSocketService-method-action-types.ts index 2410df1449b..0d93fa38aa3 100644 --- a/packages/core-backend/src/BackendWebSocketService-method-action-types.ts +++ b/packages/core-backend/src/BackendWebSocketService-method-action-types.ts @@ -25,6 +25,27 @@ export type BackendWebSocketServiceDisconnectAction = { handler: BackendWebSocketService['disconnect']; }; +/** + * Forces a WebSocket reconnection to clean up subscription state + * + * This method is useful when subscription state may be out of sync and needs to be reset. + * It performs a controlled disconnect-then-reconnect sequence: + * - Disconnects cleanly to trigger subscription cleanup + * - Schedules reconnection with exponential backoff to prevent rapid loops + * - All subscriptions will be cleaned up automatically on disconnect + * + * Use cases: + * - Recovering from subscription/unsubscription issues + * - Cleaning up orphaned subscriptions + * - Forcing a fresh subscription state + * + * @returns Promise that resolves when disconnection is complete (reconnection is scheduled) + */ +export type BackendWebSocketServiceForceReconnectionAction = { + type: `BackendWebSocketService:forceReconnection`; + handler: BackendWebSocketService['forceReconnection']; +}; + /** * Sends a message through the WebSocket * @@ -159,6 +180,7 @@ export type BackendWebSocketServiceSubscribeAction = { export type BackendWebSocketServiceMethodActions = | BackendWebSocketServiceConnectAction | BackendWebSocketServiceDisconnectAction + | BackendWebSocketServiceForceReconnectionAction | BackendWebSocketServiceSendMessageAction | BackendWebSocketServiceSendRequestAction | BackendWebSocketServiceGetConnectionInfoAction diff --git a/packages/core-backend/src/BackendWebSocketService.test.ts b/packages/core-backend/src/BackendWebSocketService.test.ts index d2281b72a51..db1ccd345d6 100644 --- a/packages/core-backend/src/BackendWebSocketService.test.ts +++ b/packages/core-backend/src/BackendWebSocketService.test.ts @@ -609,26 +609,6 @@ describe('BackendWebSocketService', () => { ); }); - it('should handle connection timeout by rejecting with timeout error and setting state to ERROR', async () => { - await withService( - { - options: { timeout: TEST_CONSTANTS.TIMEOUT_MS }, - mockWebSocketOptions: { autoConnect: false }, - }, - async ({ service, completeAsyncOperations }) => { - const connectPromise = service.connect().catch((error) => error); - - await completeAsyncOperations(TEST_CONSTANTS.TIMEOUT_MS + 50); - - const error = await connectPromise; - expect(error.message).toBe( - `Failed to connect to WebSocket: Connection timeout after ${TEST_CONSTANTS.TIMEOUT_MS}ms`, - ); - expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); - }, - ); - }); - it('should reject sendMessage and sendRequest operations when WebSocket is disconnected', async () => { await withService( { mockWebSocketOptions: { autoConnect: false } }, @@ -703,13 +683,15 @@ describe('BackendWebSocketService', () => { ); // Advance time to trigger reconnection attempt - await completeAsyncOperations(100); + // With jitter (±25%), first attempt delay is 375-625ms (base 500ms) + await completeAsyncOperations(700); // Service should have successfully reconnected expect(service.getConnectionInfo().state).toBe( WebSocketState.CONNECTED, ); - expect(service.getConnectionInfo().reconnectAttempts).toBe(0); // Reset on successful connection + // reconnectAttempts will be 1 until stable connection timer (10s) resets it + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); }, ); }); @@ -730,7 +712,7 @@ describe('BackendWebSocketService', () => { }, ); - await service.disconnect(); + service.disconnect(); const connectionInfo = service.getConnectionInfo(); expect(connectionInfo.state).toBe(WebSocketState.DISCONNECTED); @@ -739,25 +721,6 @@ describe('BackendWebSocketService', () => { }); }); - it('should handle WebSocket error events during connection establishment by setting state to ERROR', async () => { - await withService( - { mockWebSocketOptions: { autoConnect: false } }, - async ({ service, getMockWebSocket, completeAsyncOperations }) => { - const connectPromise = service.connect(); - await completeAsyncOperations(10); - - // Trigger error event during connection phase - const mockWs = getMockWebSocket(); - mockWs.simulateError(); - - await expect(connectPromise).rejects.toThrow( - 'WebSocket connection error', - ); - expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); - }, - ); - }); - it('should remain in CONNECTED state when trying to connect again', async () => { await withService(async ({ service }) => { await service.connect(); @@ -802,30 +765,13 @@ describe('BackendWebSocketService', () => { ); // Disconnect when already disconnected - await service.disconnect(); + service.disconnect(); expect(service.getConnectionInfo().state).toBe( WebSocketState.DISCONNECTED, ); }); }); - it('should handle WebSocket close during connection phase', async () => { - await withService( - { mockWebSocketOptions: { autoConnect: false } }, - async ({ service, getMockWebSocket, completeAsyncOperations }) => { - const connectPromise = service.connect(); - await completeAsyncOperations(10); - - const mockWs = getMockWebSocket(); - mockWs.simulateClose(1006, 'Connection failed'); - - await expect(connectPromise).rejects.toThrow( - 'WebSocket connection closed during connection', - ); - }, - ); - }); - it('should handle unexpected disconnect with empty reason by using default close reason', async () => { await withService( async ({ service, getMockWebSocket, completeAsyncOperations }) => { @@ -865,6 +811,251 @@ describe('BackendWebSocketService', () => { }, ); }); + + it('should skip connect when reconnect timer is already scheduled', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Start connection + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + const mockWs = getMockWebSocket(); + + // Simulate connection error to trigger scheduleConnect + mockWs.simulateError(); + await completeAsyncOperations(10); + + // Verify reconnect timer is scheduled + const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + expect(attemptsBefore).toBeGreaterThan(0); + + // Now try to connect again while reconnect timer is scheduled + // This should return early without doing anything (line 451) + await service.connect(); + + // Attempts should be unchanged since connect returned early + expect(service.getConnectionInfo().reconnectAttempts).toBe( + attemptsBefore, + ); + }, + ); + }); + + it('should handle connection timeout', async () => { + await withService( + { + options: { timeout: 100 }, + mockWebSocketOptions: { autoConnect: false }, + }, + async ({ service, completeAsyncOperations }) => { + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + + // Advance time past the timeout + await completeAsyncOperations(150); + + // Should have transitioned to ERROR state after timeout (lines 1002-1006) + expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + }, + ); + }); + + it('should reset reconnect attempts after stable connection', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Connect successfully + await service.connect(); + + // Close connection to trigger reconnect + const mockWs = getMockWebSocket(); + mockWs.simulateClose(1006, 'Test close'); + await completeAsyncOperations(10); + + // Reconnect (this increments attempts to 1) + // With jitter (±25%), first attempt delay is 375-625ms (base 500ms) + await completeAsyncOperations(700); + + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); + + // Wait for stable connection timer (10 seconds + buffer) (lines 1042-1044) + await completeAsyncOperations(10050); + + // Attempts should now be reset to 0 + expect(service.getConnectionInfo().reconnectAttempts).toBe(0); + }, + ); + }); + + it('should handle WebSocket onerror during connection', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + // Trigger error during connection phase (lines 1053-1059) + const mockWs = getMockWebSocket(); + mockWs.simulateError(); + await completeAsyncOperations(10); + + // Should schedule reconnect and be in ERROR state + expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + }, + ); + }); + + it('should handle WebSocket onclose during connection phase', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + // Close during connection phase (lines 1070-1074) + const mockWs = getMockWebSocket(); + mockWs.simulateClose(1006, 'Connection failed'); + await completeAsyncOperations(10); + + // Should schedule reconnect and be in ERROR state + expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + }, + ); + }); + + it('should clear connection timeout in handleClose when timeout occurs then close fires', async () => { + await withService( + { + options: { timeout: 100 }, + mockWebSocketOptions: { autoConnect: false }, + }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Start connection (this sets connectionTimeout) + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + const mockWs = getMockWebSocket(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTING, + ); + + // Let timeout fire (closes WebSocket and sets state to ERROR) + await completeAsyncOperations(150); + + // State should be ERROR or DISCONNECTED after timeout + const stateAfterTimeout = service.getConnectionInfo().state; + expect([WebSocketState.ERROR, WebSocketState.DISCONNECTED]).toContain( + stateAfterTimeout, + ); + + // Now manually trigger close event + // Since state is ERROR (not CONNECTING), onclose will call handleClose + // which will clear connectionTimeout (lines 1296-1297) + mockWs.simulateClose(1006, 'Close after timeout'); + await completeAsyncOperations(10); + + // State should still be ERROR or DISCONNECTED + expect([WebSocketState.ERROR, WebSocketState.DISCONNECTED]).toContain( + service.getConnectionInfo().state, + ); + }, + ); + }); + + it('should not schedule multiple reconnects when scheduleConnect called multiple times', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + await service.connect(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTED, + ); + + const mockWs = getMockWebSocket(); + + // First close to trigger scheduleConnect + mockWs.simulateClose(1006, 'Connection lost'); + await completeAsyncOperations(10); + + const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + expect(attemptsBefore).toBeGreaterThan(0); + + // Second close should trigger scheduleConnect again, + // but it should return early since timer already exists (line 1380) + mockWs.simulateClose(1006, 'Connection lost again'); + await completeAsyncOperations(10); + + // Attempts should not have increased again due to idempotency + expect(service.getConnectionInfo().reconnectAttempts).toBe( + attemptsBefore, + ); + }, + ); + }); + }); + + // ===================================================== + // FORCE RECONNECTION TESTS + // ===================================================== + describe('forceReconnection', () => { + it('should force reconnection and schedule connect', async () => { + await withService( + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + await service.connect(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTED, + ); + + const mockWs = getMockWebSocket(); + mockWs.close.mockImplementation( + (code = 1000, reason = 'Normal closure') => { + mockWs.simulateClose(code, reason); + }, + ); + + // Force reconnection (lines 548-558) + await service.forceReconnection(); + await completeAsyncOperations(10); + + // Should be disconnected after forceReconnection + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, + ); + + // Should have scheduled a reconnect (attempts incremented) + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); + }, + ); + }); + + it('should skip forceReconnection when reconnect timer is already scheduled', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Trigger a connection failure to schedule a reconnect + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + const mockWs = getMockWebSocket(); + mockWs.simulateError(); + await completeAsyncOperations(10); + + const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + + // Try to force reconnection while timer is already scheduled + await service.forceReconnection(); + + // Should have returned early, attempts unchanged + expect(service.getConnectionInfo().reconnectAttempts).toBe( + attemptsBefore, + ); + }, + ); + }); }); // ===================================================== @@ -1213,7 +1404,7 @@ describe('BackendWebSocketService', () => { data: { test: true }, }); - await service.disconnect(); + service.disconnect(); await expect(requestPromise).rejects.toThrow('WebSocket disconnected'); }); @@ -1463,43 +1654,56 @@ describe('BackendWebSocketService', () => { }); }); - it('should handle authentication required but user not signed in by rejecting connection with error', async () => { + it('should handle getBearerToken error during connection by scheduling reconnect', async () => { await withService( { options: {}, mockWebSocketOptions: { autoConnect: false }, }, async ({ service, mocks }) => { - mocks.getBearerToken.mockResolvedValueOnce(null); + const authError = new Error('Auth error'); + mocks.getBearerToken.mockRejectedValueOnce(authError); - await expect(service.connect()).rejects.toThrow( - 'Authentication required: user not signed in', - ); + // connect() will catch the error and schedule reconnect (not throw) + await service.connect(); + // Initial state should be DISCONNECTED since connection failed expect(service.getConnectionInfo().state).toBe( WebSocketState.DISCONNECTED, ); expect(mocks.getBearerToken).toHaveBeenCalled(); + + // Verify reconnect was scheduled (attempts should be incremented) + expect(service.getConnectionInfo().reconnectAttempts).toBeGreaterThan( + 0, + ); }, ); }); - it('should handle getBearerToken error during connection by rejecting with authentication error', async () => { + it('should handle null bearer token by scheduling reconnect', async () => { await withService( { options: {}, mockWebSocketOptions: { autoConnect: false }, }, async ({ service, mocks }) => { - const authError = new Error('Auth error'); - mocks.getBearerToken.mockRejectedValueOnce(authError); + // Return null to simulate user not signed in (line 464) + mocks.getBearerToken.mockResolvedValueOnce(null); - await expect(service.connect()).rejects.toThrow('Auth error'); + // connect() will catch the authentication error and schedule reconnect + await service.connect(); + // Should be in DISCONNECTED state expect(service.getConnectionInfo().state).toBe( WebSocketState.DISCONNECTED, ); expect(mocks.getBearerToken).toHaveBeenCalled(); + + // Verify reconnect was scheduled + expect(service.getConnectionInfo().reconnectAttempts).toBeGreaterThan( + 0, + ); }, ); }); @@ -1561,7 +1765,8 @@ describe('BackendWebSocketService', () => { mockEnabledCallback.mockReturnValue(false); // Advance time to trigger reconnection check - await completeAsyncOperations(50); + // With jitter (±25%), delay with reconnectDelay=50ms is 37.5-62.5ms + await completeAsyncOperations(70); // Should have checked isEnabled and stopped reconnection expect(mockEnabledCallback).toHaveBeenCalled(); @@ -1572,34 +1777,6 @@ describe('BackendWebSocketService', () => { }, ); }); - - it('should continue reconnection attempts when reconnect fails', async () => { - await withService( - { - options: { - reconnectDelay: 50, - }, - }, - async ({ service, getMockWebSocket, completeAsyncOperations }) => { - await service.connect(); - const mockWs = getMockWebSocket(); - - // Simulate connection loss - mockWs.simulateClose(1006, 'Connection lost'); - await completeAsyncOperations(0); - - const connectSpy = jest - .spyOn(service, 'connect') - .mockRejectedValue(new Error('Reconnect failed')); - - // Advance time to trigger first reconnection attempt - await completeAsyncOperations(50); - - // Should have attempted to reconnect - expect(connectSpy).toHaveBeenCalled(); - }, - ); - }); }); // ===================================================== diff --git a/packages/core-backend/src/BackendWebSocketService.ts b/packages/core-backend/src/BackendWebSocketService.ts index 9a1a98297c8..018e9d4a1b0 100644 --- a/packages/core-backend/src/BackendWebSocketService.ts +++ b/packages/core-backend/src/BackendWebSocketService.ts @@ -18,6 +18,7 @@ const log = createModuleLogger(projectLogger, SERVICE_NAME); const MESSENGER_EXPOSED_METHODS = [ 'connect', 'disconnect', + 'forceReconnection', 'sendMessage', 'sendRequest', 'subscribe', @@ -299,6 +300,8 @@ export class BackendWebSocketService { #connectionTimeout: NodeJS.Timeout | null = null; + #stableConnectionTimer: NodeJS.Timeout | null = null; + // Track the current connection promise to handle concurrent connection attempts #connectionPromise: Promise | null = null; @@ -381,7 +384,6 @@ export class BackendWebSocketService { // eslint-disable-next-line @typescript-eslint/no-floating-promises this.connect(); } else { - // eslint-disable-next-line @typescript-eslint/no-floating-promises this.disconnect(); } }, @@ -396,7 +398,6 @@ export class BackendWebSocketService { // Subscribe to wallet lock event this.#messenger.subscribe('KeyringController:lock', () => { - // eslint-disable-next-line @typescript-eslint/no-floating-promises this.disconnect(); }); } @@ -442,6 +443,12 @@ export class BackendWebSocketService { return; } + // If a reconnect is already scheduled, defer to it to avoid bypassing exponential backoff + // This prevents rapid loops when server accepts then immediately closes connections + if (this.#reconnectTimer) { + return; + } + // Create and store the connection promise IMMEDIATELY (before any async operations) // This ensures subsequent connect() calls will wait for this promise instead of creating new connections this.#connectionPromise = (async () => { @@ -452,15 +459,11 @@ export class BackendWebSocketService { 'AuthenticationController:getBearerToken', ); if (!token) { - this.#scheduleReconnect(); throw new Error('Authentication required: user not signed in'); } bearerToken = token; } catch (error) { log('Failed to check authentication requirements', { error }); - - // Can't connect - schedule retry - this.#scheduleReconnect(); throw error; } @@ -473,14 +476,16 @@ export class BackendWebSocketService { const errorMessage = getErrorMessage(error); log('Connection attempt failed', { errorMessage, error }); this.#setState(WebSocketState.ERROR); - - // Rethrow to propagate error to caller throw error; } })(); try { await this.#connectionPromise; + } catch { + // Always schedule reconnect on any failure + // Exponential backoff will prevent aggressive retries + this.#scheduleConnect(); } finally { // Clear the connection promise when done (success or failure) this.#connectionPromise = null; @@ -489,10 +494,8 @@ export class BackendWebSocketService { /** * Closes WebSocket connection - * - * @returns Promise that resolves when disconnection is complete */ - async disconnect(): Promise { + disconnect(): void { if ( this.#state === WebSocketState.DISCONNECTED || this.#state === WebSocketState.DISCONNECTING @@ -510,6 +513,9 @@ export class BackendWebSocketService { // Clear any pending connection promise this.#connectionPromise = null; + // Reset reconnect attempts on manual disconnect + this.#reconnectAttempts = 0; + if (this.#ws) { this.#ws.close(1000, 'Normal closure'); } @@ -517,6 +523,38 @@ export class BackendWebSocketService { log('WebSocket manually disconnected'); } + /** + * Forces a WebSocket reconnection to clean up subscription state + * + * This method is useful when subscription state may be out of sync and needs to be reset. + * It performs a controlled disconnect-then-reconnect sequence: + * - Disconnects cleanly to trigger subscription cleanup + * - Schedules reconnection with exponential backoff to prevent rapid loops + * - All subscriptions will be cleaned up automatically on disconnect + * + * Use cases: + * - Recovering from subscription/unsubscription issues + * - Cleaning up orphaned subscriptions + * - Forcing a fresh subscription state + * + * @returns Promise that resolves when disconnection is complete (reconnection is scheduled) + */ + async forceReconnection(): Promise { + // If a reconnect is already scheduled, don't force another one + if (this.#reconnectTimer) { + log('Reconnect already scheduled, skipping force reconnection'); + return; + } + + log('Forcing WebSocket reconnection to clean up subscription state'); + + // Perform controlled disconnect + this.disconnect(); + + // Schedule reconnection with exponential backoff + this.#scheduleConnect(); + } + /** * Sends a message through the WebSocket (fire-and-forget, no response expected) * @@ -991,8 +1029,13 @@ export class BackendWebSocketService { this.#setState(WebSocketState.CONNECTED); this.#connectedAt = Date.now(); - // Reset reconnect attempts on successful connection - this.#reconnectAttempts = 0; + // Only reset after connection stays stable for a period (10 seconds) + // This prevents rapid reconnect loops when server accepts then immediately closes + this.#stableConnectionTimer = setTimeout(() => { + this.#stableConnectionTimer = null; + this.#reconnectAttempts = 0; + log('Connection stable - reset reconnect attempts'); + }, 10000); resolve(); }, @@ -1242,7 +1285,15 @@ export class BackendWebSocketService { // Calculate connection duration before we clear state const connectionDuration = Date.now() - this.#connectedAt; - this.#clearTimers(); + if (this.#connectionTimeout) { + clearTimeout(this.#connectionTimeout); + this.#connectionTimeout = null; + } + if (this.#stableConnectionTimer) { + clearTimeout(this.#stableConnectionTimer); + this.#stableConnectionTimer = null; + } + this.#connectedAt = 0; // Clear any pending connection promise @@ -1281,9 +1332,7 @@ export class BackendWebSocketService { }, ); - // For any unexpected disconnects, attempt reconnection - // The manualDisconnect flag is the only gate - if it's false, we reconnect - this.#scheduleReconnect(); + this.#scheduleConnect(); } /** @@ -1300,14 +1349,52 @@ export class BackendWebSocketService { // ============================================================================= /** - * Schedules a reconnection attempt with exponential backoff + * Schedules a connection attempt with exponential backoff and jitter + * + * This method is used for automatic reconnection with backoff: + * - Prevents duplicate reconnection timers (idempotent) + * - Applies exponential backoff based on previous failures + * - Adds jitter (±25%) to prevent thundering herd problem + * - Used ONLY for automatic retries, not user-initiated actions + * + * Call this from: + * - connect() catch block (on connection failure) + * - #handleClose() (on unexpected disconnect) + * + * For user-initiated actions (sign in, unlock), call connect() directly instead. + * + * If a reconnect is already scheduled, this is a no-op to prevent: + * - Orphaned timers (memory leak) + * - Inflated reconnect attempts counter + * - Prematurely long delays */ - #scheduleReconnect(): void { + #scheduleConnect(): void { + // If a reconnect is already scheduled, don't schedule another one + if (this.#reconnectTimer) { + return; + } + + // Increment attempts BEFORE calculating delay so backoff grows properly this.#reconnectAttempts += 1; + // Calculate delay with exponential backoff + // First attempt (reconnectAttempts=1) → 500ms + // Second attempt (reconnectAttempts=2) → 750ms + // Third attempt (reconnectAttempts=3) → 1125ms + // Continues up to maxReconnectDelay (5000ms) const rawDelay = this.#options.reconnectDelay * Math.pow(1.5, this.#reconnectAttempts - 1); - const delay = Math.min(rawDelay, this.#options.maxReconnectDelay); + const baseDelay = Math.min(rawDelay, this.#options.maxReconnectDelay); + + // Add jitter (±25%) to prevent thundering herd problem + // This randomizes reconnection timing across multiple clients + const jitterFactor = 0.75 + Math.random() * 0.5; // Random value between 0.75 and 1.25 + const delay = Math.floor(baseDelay * jitterFactor); + + log('Scheduling reconnect', { + attempt: this.#reconnectAttempts, + delay_ms: delay, + }); this.#reconnectTimer = setTimeout(() => { // Clear timer reference first @@ -1319,10 +1406,8 @@ export class BackendWebSocketService { return; } - // Attempt to reconnect - if it fails, schedule another attempt - this.connect().catch(() => { - this.#scheduleReconnect(); - }); + // eslint-disable-next-line @typescript-eslint/no-floating-promises + this.connect(); }, delay); } @@ -1338,6 +1423,10 @@ export class BackendWebSocketService { clearTimeout(this.#connectionTimeout); this.#connectionTimeout = null; } + if (this.#stableConnectionTimer) { + clearTimeout(this.#stableConnectionTimer); + this.#stableConnectionTimer = null; + } } /**