Skip to content

Commit 54b39be

Browse files
committed
store [nfc]: Move polling backoff/reload inside error helpers
Now the `poll` method itself is a lot more focused on the overall structure of how polling works.
1 parent b87a038 commit 54b39be

File tree

1 file changed

+78
-70
lines changed

1 file changed

+78
-70
lines changed

lib/model/store.dart

Lines changed: 78 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -996,22 +996,9 @@ class UpdateMachine {
996996
}());
997997
}
998998

999-
// This is static so that it persists through new UpdateMachine instances
1000-
// as we attempt to fix things by reloading data from scratch. In principle
1001-
// it could also be per-account (or per-realm or per-server); but currently
1002-
// we skip that complication, as well as attempting to reset backoff on
1003-
// later success. After all, these unexpected errors should be uncommon;
1004-
// ideally they'd never happen.
1005-
static BackoffMachine get _unexpectedErrorBackoffMachine {
1006-
return __unexpectedErrorBackoffMachine
1007-
??= BackoffMachine(maxBound: const Duration(seconds: 60));
1008-
}
1009-
static BackoffMachine? __unexpectedErrorBackoffMachine;
1010-
1011999
void poll() async {
10121000
assert(!_disposed);
10131001
try {
1014-
BackoffMachine? backoffMachine;
10151002
while (true) {
10161003
if (_debugLoopSignal != null) {
10171004
await _debugLoopSignal!.future;
@@ -1027,34 +1014,13 @@ class UpdateMachine {
10271014
result = await getEvents(store.connection,
10281015
queueId: queueId, lastEventId: lastEventId);
10291016
if (_disposed) return;
1030-
} catch (e) {
1017+
} catch (e, stackTrace) {
10311018
if (_disposed) return;
1032-
final shouldRetry = _triagePollRequestError(e);
1033-
if (!shouldRetry) rethrow;
1034-
await (backoffMachine ??= BackoffMachine()).wait();
1019+
await _handlePollRequestError(e, stackTrace); // may rethrow
10351020
if (_disposed) return;
1036-
assert(debugLog('… Backoff wait complete, retrying poll.'));
10371021
continue;
10381022
}
1039-
1040-
// After one successful request, we reset backoff to its initial state.
1041-
// That way if the user is off the network and comes back on, the app
1042-
// doesn't wind up in a state where it's slow to recover the next time
1043-
// one request fails.
1044-
//
1045-
// This does mean that if the server is having trouble and handling some
1046-
// but not all of its requests, we'll end up doing a lot more retries than
1047-
// if we stayed at the max backoff interval; partway toward what would
1048-
// happen if we weren't backing off at all.
1049-
//
1050-
// But at least for [getEvents] requests, as here, it should be OK,
1051-
// because this is a long-poll. That means a typical successful request
1052-
// takes a long time to come back; in fact longer than our max backoff
1053-
// duration (which is 10 seconds). So if we're getting a mix of successes
1054-
// and failures, the successes themselves should space out the requests.
1055-
backoffMachine = null;
1056-
1057-
_clearReportingErrorsToUser();
1023+
_clearPollErrors();
10581024

10591025
final events = result.events;
10601026
for (final event in events) {
@@ -1073,29 +1039,26 @@ class UpdateMachine {
10731039
}
10741040
} catch (e) {
10751041
if (_disposed) return;
1076-
1077-
// An error occurred, other than the transient request errors we retry on.
1078-
// This means either a lost/expired event queue on the server (which is
1079-
// normal after the app is offline for a period like 10 minutes),
1080-
// or an unexpected exception representing a bug in our code or the server.
1081-
// Either way, the show must go on. So reload server data from scratch.
1082-
1083-
final isUnexpected = _triagePollError(e);
1084-
1085-
if (isUnexpected) {
1086-
// We don't know the cause of the failure; it might well keep happening.
1087-
// Avoid creating a retry storm.
1088-
await _unexpectedErrorBackoffMachine.wait();
1089-
if (_disposed) return;
1090-
}
1091-
1092-
// This disposes the store, which disposes this update machine.
1093-
await store._globalStore._reloadPerAccount(store.accountId);
1094-
assert(debugLog('… Event queue replaced.'));
1042+
await _handlePollError(e);
1043+
assert(_disposed);
10951044
return;
10961045
}
10971046
}
10981047

1048+
// This is static so that it persists through new UpdateMachine instances
1049+
// as we attempt to fix things by reloading data from scratch. In principle
1050+
// it could also be per-account (or per-realm or per-server); but currently
1051+
// we skip that complication, as well as attempting to reset backoff on
1052+
// later success. After all, these unexpected errors should be uncommon;
1053+
// ideally they'd never happen.
1054+
static BackoffMachine get _unexpectedErrorBackoffMachine {
1055+
return __unexpectedErrorBackoffMachine
1056+
??= BackoffMachine(maxBound: const Duration(seconds: 60));
1057+
}
1058+
static BackoffMachine? __unexpectedErrorBackoffMachine;
1059+
1060+
BackoffMachine? _pollBackoffMachine;
1061+
10991062
/// This controls when we start to report transient errors to the user when
11001063
/// polling.
11011064
///
@@ -1105,24 +1068,49 @@ class UpdateMachine {
11051068

11061069
int _accumulatedTransientFailureCount = 0;
11071070

1108-
void _clearReportingErrorsToUser() {
1071+
void _clearPollErrors() {
1072+
// After one successful request, we reset backoff to its initial state.
1073+
// That way if the user is off the network and comes back on, the app
1074+
// doesn't wind up in a state where it's slow to recover the next time
1075+
// one request fails.
1076+
//
1077+
// This does mean that if the server is having trouble and handling some
1078+
// but not all of its requests, we'll end up doing a lot more retries than
1079+
// if we stayed at the max backoff interval; partway toward what would
1080+
// happen if we weren't backing off at all.
1081+
//
1082+
// But at least for [getEvents] requests, as here, it should be OK,
1083+
// because this is a long-poll. That means a typical successful request
1084+
// takes a long time to come back; in fact longer than our max backoff
1085+
// duration (which is 10 seconds). So if we're getting a mix of successes
1086+
// and failures, the successes themselves should space out the requests.
1087+
_pollBackoffMachine = null;
1088+
11091089
store.isLoading = false;
11101090
_accumulatedTransientFailureCount = 0;
11111091
reportErrorToUserBriefly(null);
11121092
}
11131093

1114-
/// Sort out an error from the network request in [poll].
1094+
/// Sort out an error from the network request in [poll]:
1095+
/// either wait for a backoff duration (and possibly report the error),
1096+
/// or rethrow.
11151097
///
1116-
/// If the request should be retried, this method returns true,
1098+
/// If the request should be retried, this method uses [_pollBackoffMachine]
1099+
/// to wait an appropriate backoff duration for that retry,
11171100
/// after reporting the error if appropriate to the user and/or developer.
1118-
/// Otherwise, this method returns false with no side effects.
1119-
bool _triagePollRequestError(Object error) {
1101+
///
1102+
/// Otherwise this method rethrows the error, with no other side effects.
1103+
///
1104+
/// See also:
1105+
/// * [_handlePollError], which handles errors from the rest of [poll]
1106+
/// and errors this method rethrows.
1107+
Future<void> _handlePollRequestError(Object error, StackTrace stackTrace) async {
11201108
store.isLoading = true;
11211109

11221110
if (error is! ApiRequestException) {
11231111
// Some unexpected error, outside even making the HTTP request.
11241112
// Definitely a bug in our code.
1125-
return false;
1113+
Error.throwWithStackTrace(error, stackTrace);
11261114
}
11271115

11281116
bool shouldReportToUser;
@@ -1142,30 +1130,40 @@ class UpdateMachine {
11421130
shouldReportToUser = true;
11431131

11441132
case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'):
1145-
return false;
1133+
Error.throwWithStackTrace(error, stackTrace);
11461134

11471135
case ZulipApiException():
11481136
case MalformedServerResponseException():
11491137
// Either a 4xx we didn't expect, or a malformed response;
11501138
// in either case, a mismatch of the client's expectations to the
11511139
// server's behavior, and therefore a bug in one or the other.
11521140
// TODO(#1054) handle auth failures specifically
1153-
return false;
1141+
Error.throwWithStackTrace(error, stackTrace);
11541142
}
11551143

11561144
assert(debugLog('Transient error polling event queue for $store: $error\n'
11571145
'Backing off, then will retry…'));
11581146
if (shouldReportToUser) {
11591147
_maybeReportToUserTransientError(error);
11601148
}
1161-
return true;
1149+
await (_pollBackoffMachine ??= BackoffMachine()).wait();
1150+
if (_disposed) return;
1151+
assert(debugLog('… Backoff wait complete, retrying poll.'));
11621152
}
11631153

1164-
/// Sort out an error in [poll].
1154+
/// Deal with an error in [poll]: reload server data to replace the store,
1155+
/// after reporting the error as appropriate to the user and/or developer.
11651156
///
1166-
/// Reports the error if appropriate to the user and/or developer;
1167-
/// then returns true just if the error was unexpected.
1168-
bool _triagePollError(Object error) {
1157+
/// See also:
1158+
/// * [_handlePollRequestError], which handles certain errors
1159+
/// and causes them not to reach this method.
1160+
Future<void> _handlePollError(Object error) async {
1161+
// An error occurred, other than the transient request errors we retry on.
1162+
// This means either a lost/expired event queue on the server (which is
1163+
// normal after the app is offline for a period like 10 minutes),
1164+
// or an unexpected exception representing a bug in our code or the server.
1165+
// Either way, the show must go on. So reload server data from scratch.
1166+
11691167
store.isLoading = true;
11701168

11711169
bool isUnexpected;
@@ -1202,7 +1200,17 @@ class UpdateMachine {
12021200
// but our remedy is the same either way.
12031201
isUnexpected = true;
12041202
}
1205-
return isUnexpected;
1203+
1204+
if (isUnexpected) {
1205+
// We don't know the cause of the failure; it might well keep happening.
1206+
// Avoid creating a retry storm.
1207+
await _unexpectedErrorBackoffMachine.wait();
1208+
if (_disposed) return;
1209+
}
1210+
1211+
await store._globalStore._reloadPerAccount(store.accountId);
1212+
assert(_disposed);
1213+
assert(debugLog('… Event queue replaced.'));
12061214
}
12071215

12081216
/// This only reports transient errors after reaching

0 commit comments

Comments
 (0)