@@ -996,22 +996,9 @@ class UpdateMachine {
996
996
}());
997
997
}
998
998
999
- // This is static so that it persists through new UpdateMachine instances
1000
- // as we attempt to fix things by reloading data from scratch. In principle
1001
- // it could also be per-account (or per-realm or per-server); but currently
1002
- // we skip that complication, as well as attempting to reset backoff on
1003
- // later success. After all, these unexpected errors should be uncommon;
1004
- // ideally they'd never happen.
1005
- static BackoffMachine get _unexpectedErrorBackoffMachine {
1006
- return __unexpectedErrorBackoffMachine
1007
- ?? = BackoffMachine (maxBound: const Duration (seconds: 60 ));
1008
- }
1009
- static BackoffMachine ? __unexpectedErrorBackoffMachine;
1010
-
1011
999
void poll () async {
1012
1000
assert (! _disposed);
1013
1001
try {
1014
- BackoffMachine ? backoffMachine;
1015
1002
while (true ) {
1016
1003
if (_debugLoopSignal != null ) {
1017
1004
await _debugLoopSignal! .future;
@@ -1027,34 +1014,13 @@ class UpdateMachine {
1027
1014
result = await getEvents (store.connection,
1028
1015
queueId: queueId, lastEventId: lastEventId);
1029
1016
if (_disposed) return ;
1030
- } catch (e) {
1017
+ } catch (e, stackTrace ) {
1031
1018
if (_disposed) return ;
1032
- final shouldRetry = _triagePollRequestError (e);
1033
- if (! shouldRetry) rethrow ;
1034
- await (backoffMachine ?? = BackoffMachine ()).wait ();
1019
+ await _handlePollRequestError (e, stackTrace); // may rethrow
1035
1020
if (_disposed) return ;
1036
- assert (debugLog ('… Backoff wait complete, retrying poll.' ));
1037
1021
continue ;
1038
1022
}
1039
-
1040
- // After one successful request, we reset backoff to its initial state.
1041
- // That way if the user is off the network and comes back on, the app
1042
- // doesn't wind up in a state where it's slow to recover the next time
1043
- // one request fails.
1044
- //
1045
- // This does mean that if the server is having trouble and handling some
1046
- // but not all of its requests, we'll end up doing a lot more retries than
1047
- // if we stayed at the max backoff interval; partway toward what would
1048
- // happen if we weren't backing off at all.
1049
- //
1050
- // But at least for [getEvents] requests, as here, it should be OK,
1051
- // because this is a long-poll. That means a typical successful request
1052
- // takes a long time to come back; in fact longer than our max backoff
1053
- // duration (which is 10 seconds). So if we're getting a mix of successes
1054
- // and failures, the successes themselves should space out the requests.
1055
- backoffMachine = null ;
1056
-
1057
- _clearReportingErrorsToUser ();
1023
+ _clearPollErrors ();
1058
1024
1059
1025
final events = result.events;
1060
1026
for (final event in events) {
@@ -1073,29 +1039,26 @@ class UpdateMachine {
1073
1039
}
1074
1040
} catch (e) {
1075
1041
if (_disposed) return ;
1076
-
1077
- // An error occurred, other than the transient request errors we retry on.
1078
- // This means either a lost/expired event queue on the server (which is
1079
- // normal after the app is offline for a period like 10 minutes),
1080
- // or an unexpected exception representing a bug in our code or the server.
1081
- // Either way, the show must go on. So reload server data from scratch.
1082
-
1083
- final isUnexpected = _triagePollError (e);
1084
-
1085
- if (isUnexpected) {
1086
- // We don't know the cause of the failure; it might well keep happening.
1087
- // Avoid creating a retry storm.
1088
- await _unexpectedErrorBackoffMachine.wait ();
1089
- if (_disposed) return ;
1090
- }
1091
-
1092
- // This disposes the store, which disposes this update machine.
1093
- await store._globalStore._reloadPerAccount (store.accountId);
1094
- assert (debugLog ('… Event queue replaced.' ));
1042
+ await _handlePollError (e);
1043
+ assert (_disposed);
1095
1044
return ;
1096
1045
}
1097
1046
}
1098
1047
1048
+ // This is static so that it persists through new UpdateMachine instances
1049
+ // as we attempt to fix things by reloading data from scratch. In principle
1050
+ // it could also be per-account (or per-realm or per-server); but currently
1051
+ // we skip that complication, as well as attempting to reset backoff on
1052
+ // later success. After all, these unexpected errors should be uncommon;
1053
+ // ideally they'd never happen.
1054
+ static BackoffMachine get _unexpectedErrorBackoffMachine {
1055
+ return __unexpectedErrorBackoffMachine
1056
+ ?? = BackoffMachine (maxBound: const Duration (seconds: 60 ));
1057
+ }
1058
+ static BackoffMachine ? __unexpectedErrorBackoffMachine;
1059
+
1060
+ BackoffMachine ? _pollBackoffMachine;
1061
+
1099
1062
/// This controls when we start to report transient errors to the user when
1100
1063
/// polling.
1101
1064
///
@@ -1105,24 +1068,49 @@ class UpdateMachine {
1105
1068
1106
1069
int _accumulatedTransientFailureCount = 0 ;
1107
1070
1108
- void _clearReportingErrorsToUser () {
1071
+ void _clearPollErrors () {
1072
+ // After one successful request, we reset backoff to its initial state.
1073
+ // That way if the user is off the network and comes back on, the app
1074
+ // doesn't wind up in a state where it's slow to recover the next time
1075
+ // one request fails.
1076
+ //
1077
+ // This does mean that if the server is having trouble and handling some
1078
+ // but not all of its requests, we'll end up doing a lot more retries than
1079
+ // if we stayed at the max backoff interval; partway toward what would
1080
+ // happen if we weren't backing off at all.
1081
+ //
1082
+ // But at least for [getEvents] requests, as here, it should be OK,
1083
+ // because this is a long-poll. That means a typical successful request
1084
+ // takes a long time to come back; in fact longer than our max backoff
1085
+ // duration (which is 10 seconds). So if we're getting a mix of successes
1086
+ // and failures, the successes themselves should space out the requests.
1087
+ _pollBackoffMachine = null ;
1088
+
1109
1089
store.isLoading = false ;
1110
1090
_accumulatedTransientFailureCount = 0 ;
1111
1091
reportErrorToUserBriefly (null );
1112
1092
}
1113
1093
1114
- /// Sort out an error from the network request in [poll] .
1094
+ /// Sort out an error from the network request in [poll] :
1095
+ /// either wait for a backoff duration (and possibly report the error),
1096
+ /// or rethrow.
1115
1097
///
1116
- /// If the request should be retried, this method returns true,
1098
+ /// If the request should be retried, this method uses [_pollBackoffMachine]
1099
+ /// to wait an appropriate backoff duration for that retry,
1117
1100
/// after reporting the error if appropriate to the user and/or developer.
1118
- /// Otherwise, this method returns false with no side effects.
1119
- bool _triagePollRequestError (Object error) {
1101
+ ///
1102
+ /// Otherwise this method rethrows the error, with no other side effects.
1103
+ ///
1104
+ /// See also:
1105
+ /// * [_handlePollError] , which handles errors from the rest of [poll]
1106
+ /// and errors this method rethrows.
1107
+ Future <void > _handlePollRequestError (Object error, StackTrace stackTrace) async {
1120
1108
store.isLoading = true ;
1121
1109
1122
1110
if (error is ! ApiRequestException ) {
1123
1111
// Some unexpected error, outside even making the HTTP request.
1124
1112
// Definitely a bug in our code.
1125
- return false ;
1113
+ Error . throwWithStackTrace (error, stackTrace) ;
1126
1114
}
1127
1115
1128
1116
bool shouldReportToUser;
@@ -1142,30 +1130,40 @@ class UpdateMachine {
1142
1130
shouldReportToUser = true ;
1143
1131
1144
1132
case ZulipApiException (code: 'BAD_EVENT_QUEUE_ID' ):
1145
- return false ;
1133
+ Error . throwWithStackTrace (error, stackTrace) ;
1146
1134
1147
1135
case ZulipApiException ():
1148
1136
case MalformedServerResponseException ():
1149
1137
// Either a 4xx we didn't expect, or a malformed response;
1150
1138
// in either case, a mismatch of the client's expectations to the
1151
1139
// server's behavior, and therefore a bug in one or the other.
1152
1140
// TODO(#1054) handle auth failures specifically
1153
- return false ;
1141
+ Error . throwWithStackTrace (error, stackTrace) ;
1154
1142
}
1155
1143
1156
1144
assert (debugLog ('Transient error polling event queue for $store : $error \n '
1157
1145
'Backing off, then will retry…' ));
1158
1146
if (shouldReportToUser) {
1159
1147
_maybeReportToUserTransientError (error);
1160
1148
}
1161
- return true ;
1149
+ await (_pollBackoffMachine ?? = BackoffMachine ()).wait ();
1150
+ if (_disposed) return ;
1151
+ assert (debugLog ('… Backoff wait complete, retrying poll.' ));
1162
1152
}
1163
1153
1164
- /// Sort out an error in [poll] .
1154
+ /// Deal with an error in [poll] : reload server data to replace the store,
1155
+ /// after reporting the error as appropriate to the user and/or developer.
1165
1156
///
1166
- /// Reports the error if appropriate to the user and/or developer;
1167
- /// then returns true just if the error was unexpected.
1168
- bool _triagePollError (Object error) {
1157
+ /// See also:
1158
+ /// * [_handlePollRequestError] , which handles certain errors
1159
+ /// and causes them not to reach this method.
1160
+ Future <void > _handlePollError (Object error) async {
1161
+ // An error occurred, other than the transient request errors we retry on.
1162
+ // This means either a lost/expired event queue on the server (which is
1163
+ // normal after the app is offline for a period like 10 minutes),
1164
+ // or an unexpected exception representing a bug in our code or the server.
1165
+ // Either way, the show must go on. So reload server data from scratch.
1166
+
1169
1167
store.isLoading = true ;
1170
1168
1171
1169
bool isUnexpected;
@@ -1202,7 +1200,17 @@ class UpdateMachine {
1202
1200
// but our remedy is the same either way.
1203
1201
isUnexpected = true ;
1204
1202
}
1205
- return isUnexpected;
1203
+
1204
+ if (isUnexpected) {
1205
+ // We don't know the cause of the failure; it might well keep happening.
1206
+ // Avoid creating a retry storm.
1207
+ await _unexpectedErrorBackoffMachine.wait ();
1208
+ if (_disposed) return ;
1209
+ }
1210
+
1211
+ await store._globalStore._reloadPerAccount (store.accountId);
1212
+ assert (_disposed);
1213
+ assert (debugLog ('… Event queue replaced.' ));
1206
1214
}
1207
1215
1208
1216
/// This only reports transient errors after reaching
0 commit comments