Skip to content

Commit b5ef8cb

Browse files
committed
wip dedupe reload on poll error; and on non-transient request errors, reload rather than retry
Hmm or maybe for those non-transient request errors we should stick with retrying? Maybe up to a bounded number of times? Those are by definition a bug either in the server or in our formulating the request: it's either a 4xx we didn't expect, or a malformed response (so in either case, a mismatch of our expectations to the server's behavior, and therefore a bug in one or the other), or something other than ApiRequestException entirely (and so a bug in the API request code). It's therefore hard to predict what's most likely to work. Maybe the most likely cause is that the server is having operational trouble that's not showing up as 5xx responses. We handle rate-limit errors separately, though, and make sure to back off if we get one of those. So this may not be a likely scenario. Also, when we abandon retrying getEvents and switch to re-register, we do end up backing off if that fails. It's just that that's a heavier request in the first place.
1 parent 779dcaf commit b5ef8cb

File tree

1 file changed

+51
-39
lines changed

1 file changed

+51
-39
lines changed

lib/model/store.dart

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -967,49 +967,29 @@ class UpdateMachine {
967967
if (_disposed) return;
968968

969969
store.isLoading = true;
970-
bool isUnexpected;
971970
bool shouldReportToUser;
972971
switch (e) {
973-
case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'):
974-
assert(debugLog('Lost event queue for $store. Replacing…'));
975-
// This disposes the store, which disposes this update machine.
976-
await store._globalStore._reloadPerAccount(store.accountId);
977-
debugLog('… Event queue replaced.');
978-
return;
979-
980972
case NetworkException(cause: SocketException()):
981973
// A [SocketException] is common when the app returns from sleep.
982-
isUnexpected = false;
983974
shouldReportToUser = false;
984975

985976
case NetworkException():
986977
case Server5xxException():
987-
isUnexpected = false;
988978
shouldReportToUser = true;
989979

990980
case ZulipApiException(httpStatus: 429):
991981
case ZulipApiException(code: 'RATE_LIMIT_HIT'):
992982
// TODO(#946) handle rate-limit errors more generally, in ApiConnection
993-
isUnexpected = false;
994983
shouldReportToUser = true;
995984

996985
default:
997-
isUnexpected = true;
998-
shouldReportToUser = true;
986+
rethrow;
999987
}
1000988

1001-
if (isUnexpected) {
1002-
assert(shouldReportToUser);
1003-
assert(debugLog('Error polling event queue for $store: $e\n'
1004-
'Backing off and retrying even though may be hopeless…'));
1005-
// TODO(#186): Handle unrecoverable failures
1006-
_reportToUserErrorConnectingToServer(e);
1007-
} else {
1008-
assert(debugLog('Transient error polling event queue for $store: $e\n'
1009-
'Backing off, then will retry…'));
1010-
if (shouldReportToUser) {
1011-
maybeReportToUserTransientError(e);
1012-
}
989+
assert(debugLog('Transient error polling event queue for $store: $e\n'
990+
'Backing off, then will retry…'));
991+
if (shouldReportToUser) {
992+
maybeReportToUserTransientError(e);
1013993
}
1014994
await (backoffMachine ??= BackoffMachine()).wait();
1015995
if (_disposed) return;
@@ -1043,27 +1023,52 @@ class UpdateMachine {
10431023
try {
10441024
await store.handleEvent(event);
10451025
if (_disposed) return;
1046-
} catch (e) {
1047-
assert(debugLog('BUG: Error handling an event: $e\n' // TODO(log)
1048-
' event: $event\n'
1049-
'Replacing event queue…'));
1050-
// TODO maybe tell user (in beta) that event handling threw error
1051-
// TODO dedupe this with the other _reloadPerAccount sites?
1052-
// This disposes the store, which disposes this update machine.
1053-
await store._globalStore._reloadPerAccount(store.accountId);
1054-
debugLog('… Event queue replaced.');
1055-
return;
1026+
} catch (e, stackTrace) {
1027+
Error.throwWithStackTrace(
1028+
_EventHandlingException(cause: e, event: event),
1029+
stackTrace);
10561030
}
10571031
}
10581032
if (events.isNotEmpty) {
10591033
lastEventId = events.last.id;
10601034
}
10611035
}
10621036
} catch (e) {
1063-
assert(debugLog('BUG: Unexpected error in event polling: $e\n' // TODO(log)
1064-
'Replacing event queue…'));
1065-
// TODO maybe tell user (in beta) that event poll loop threw error
1066-
// TODO dedupe this with the other _reloadPerAccount sites above?
1037+
// An error occurred, other than the transient request errors we retry on.
1038+
// This means either a lost/expired event queue on the server (which is
1039+
// normal after the app is offline for a period like 10 minutes),
1040+
// or an unexpected exception representing a bug in our code or the server.
1041+
// Either way, the show must go on. So reload server data from scratch.
1042+
1043+
// First, log what happened.
1044+
switch (e) {
1045+
case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'):
1046+
assert(debugLog('Lost event queue for $store. Replacing…'));
1047+
// The old event queue is gone, so we need a new one. This is normal.
1048+
1049+
case _EventHandlingException(:final cause, :final event):
1050+
assert(debugLog('BUG: Error handling an event: $cause\n' // TODO(log)
1051+
' event: $event\n'
1052+
'Replacing event queue…'));
1053+
// TODO maybe tell user (in beta) that event handling threw error
1054+
// We can't just continue with the next event, because our state
1055+
// may be garbled due to failing to apply this one (and worse,
1056+
// any invariants that were left in a broken state from where
1057+
// the exception was thrown). So reload from scratch.
1058+
// Hopefully (probably?) the bug only affects our implementation of
1059+
// the *change* in state represented by the event, and when we get the
1060+
// new state in a fresh InitialSnapshot we'll handle that just fine.
1061+
1062+
default:
1063+
assert(debugLog('BUG: Unexpected error in event polling: $e\n' // TODO(log)
1064+
'Replacing event queue…'));
1065+
_reportToUserErrorConnectingToServer(e);
1066+
// Similar story to the _EventHandlingException case;
1067+
// separate only so that one can print more context.
1068+
// The bug here could be in the server if it's an ApiRequestException,
1069+
// but our remedy is the same either way.
1070+
}
1071+
10671072
// This disposes the store, which disposes this update machine.
10681073
await store._globalStore._reloadPerAccount(store.accountId);
10691074
debugLog('… Event queue replaced.');
@@ -1159,3 +1164,10 @@ class UpdateMachine {
11591164
@override
11601165
String toString() => '${objectRuntimeType(this, 'UpdateMachine')}#${shortHash(this)}';
11611166
}
1167+
1168+
class _EventHandlingException implements Exception {
1169+
final Object cause;
1170+
final Event event;
1171+
1172+
_EventHandlingException({required this.cause, required this.event});
1173+
}

0 commit comments

Comments
 (0)