Skip to content

Treat ABORTED writes as retryable #2247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions Firestore/Example/Tests/Remote/FSTDatastoreTests.mm
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ @interface FSTDatastoreTests : XCTestCase

@implementation FSTDatastoreTests

- (void)testIsPermanentWriteError {
- (NSError *)errorForCode:(FIRFirestoreErrorCode)code {
return [NSError errorWithDomain:FIRFirestoreErrorDomain code:code userInfo:nil];
}

- (void)testIsPermanentError {
// From GRPCCall -cancel
NSError *error = [NSError errorWithDomain:FIRFirestoreErrorDomain
code:FIRFirestoreErrorCodeCancelled
userInfo:@{NSLocalizedDescriptionKey : @"Canceled by app"}];
XCTAssertFalse([FSTDatastore isPermanentWriteError:error]);
XCTAssertFalse([FSTDatastore isPermanentError:error]);

// From GRPCCall -startNextRead
error = [NSError errorWithDomain:FIRFirestoreErrorDomain
Expand All @@ -38,24 +42,37 @@ - (void)testIsPermanentWriteError {
NSLocalizedDescriptionKey :
@"Client does not have enough memory to hold the server response."
}];
XCTAssertFalse([FSTDatastore isPermanentWriteError:error]);
XCTAssertFalse([FSTDatastore isPermanentError:error]);

// From GRPCCall -startWithWriteable
error = [NSError errorWithDomain:FIRFirestoreErrorDomain
code:FIRFirestoreErrorCodeUnavailable
userInfo:@{NSLocalizedDescriptionKey : @"Connectivity lost."}];
XCTAssertFalse([FSTDatastore isPermanentWriteError:error]);
XCTAssertFalse([FSTDatastore isPermanentError:error]);

// User info doesn't matter:
error = [NSError errorWithDomain:FIRFirestoreErrorDomain
code:FIRFirestoreErrorCodeUnavailable
userInfo:nil];
XCTAssertFalse([FSTDatastore isPermanentWriteError:error]);
error = [self errorForCode:FIRFirestoreErrorCodeUnavailable];
XCTAssertFalse([FSTDatastore isPermanentError:error]);

// "unauthenticated" is considered a recoverable error due to expired token.
error = [NSError errorWithDomain:FIRFirestoreErrorDomain
code:FIRFirestoreErrorCodeUnauthenticated
userInfo:nil];
error = [self errorForCode:FIRFirestoreErrorCodeUnauthenticated];
XCTAssertFalse([FSTDatastore isPermanentError:error]);

error = [self errorForCode:FIRFirestoreErrorCodeDataLoss];
XCTAssertTrue([FSTDatastore isPermanentError:error]);

error = [self errorForCode:FIRFirestoreErrorCodeAborted];
XCTAssertTrue([FSTDatastore isPermanentError:error]);
}

- (void)testIsPermanentWriteError {
NSError *error = [self errorForCode:FIRFirestoreErrorCodeUnauthenticated];
XCTAssertFalse([FSTDatastore isPermanentWriteError:error]);

error = [self errorForCode:FIRFirestoreErrorCodeDataLoss];
XCTAssertTrue([FSTDatastore isPermanentWriteError:error]);

error = [self errorForCode:FIRFirestoreErrorCodeAborted];
XCTAssertFalse([FSTDatastore isPermanentWriteError:error]);
}

Expand Down
145 changes: 95 additions & 50 deletions Firestore/Example/Tests/SpecTests/json/write_spec_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -5797,9 +5797,9 @@
}
]
},
"Writes that fail with code aborted are rejected": {
"Writes that fail with code out-of-range are rejected": {
"describeName": "Writes:",
"itName": "Writes that fail with code aborted are rejected",
"itName": "Writes that fail with code out-of-range are rejected",
"tags": [],
"config": {
"useGarbageCollection": true,
Expand Down Expand Up @@ -5864,7 +5864,7 @@
{
"failWrite": {
"error": {
"code": 10
"code": 11
}
},
"stateExpect": {
Expand Down Expand Up @@ -5903,9 +5903,9 @@
}
]
},
"Writes that fail with code out-of-range are rejected": {
"Writes that fail with code unimplemented are rejected": {
"describeName": "Writes:",
"itName": "Writes that fail with code out-of-range are rejected",
"itName": "Writes that fail with code unimplemented are rejected",
"tags": [],
"config": {
"useGarbageCollection": true,
Expand Down Expand Up @@ -5970,7 +5970,7 @@
{
"failWrite": {
"error": {
"code": 11
"code": 12
}
},
"stateExpect": {
Expand Down Expand Up @@ -6009,9 +6009,9 @@
}
]
},
"Writes that fail with code unimplemented are rejected": {
"Writes that fail with code data-loss are rejected": {
"describeName": "Writes:",
"itName": "Writes that fail with code unimplemented are rejected",
"itName": "Writes that fail with code data-loss are rejected",
"tags": [],
"config": {
"useGarbageCollection": true,
Expand Down Expand Up @@ -6076,7 +6076,7 @@
{
"failWrite": {
"error": {
"code": 12
"code": 15
}
},
"stateExpect": {
Expand Down Expand Up @@ -6115,9 +6115,9 @@
}
]
},
"Writes that fail with code data-loss are rejected": {
"Writes that fail with code resource_exhausted are not rejected": {
"describeName": "Writes:",
"itName": "Writes that fail with code data-loss are rejected",
"itName": "Writes that fail with code resource_exhausted are not rejected",
"tags": [],
"config": {
"useGarbageCollection": true,
Expand Down Expand Up @@ -6182,48 +6182,16 @@
{
"failWrite": {
"error": {
"code": 15
}
},
"stateExpect": {
"userCallbacks": {
"acknowledgedDocs": [],
"rejectedDocs": [
"collection/key"
]
}
},
"expect": [
{
"query": {
"path": "collection/key",
"filters": [],
"orderBys": []
},
"removed": [
{
"key": "collection/key",
"version": 0,
"value": {
"foo": "bar"
},
"options": {
"hasLocalMutations": true,
"hasCommittedMutations": false
}
}
],
"errorCode": 0,
"fromCache": true,
"hasPendingWrites": false
}
]
"code": 8
},
"keepInQueue": true
}
}
]
},
"Writes that fail with code resource_exhausted are not rejected": {
"Writes that fail with code aborted are retried": {
"describeName": "Writes:",
"itName": "Writes that fail with code resource_exhausted are not rejected",
"itName": "Writes that fail with code aborted are retried",
"tags": [],
"config": {
"useGarbageCollection": true,
Expand Down Expand Up @@ -6288,10 +6256,87 @@
{
"failWrite": {
"error": {
"code": 8
"code": 10
},
"keepInQueue": true
}
},
{
"writeAck": {
"version": 1000
},
"stateExpect": {
"userCallbacks": {
"acknowledgedDocs": [
"collection/key"
],
"rejectedDocs": []
}
}
},
{
"watchAck": [
2
]
},
{
"watchEntity": {
"docs": [
{
"key": "collection/key",
"version": 1000,
"value": {
"foo": "bar"
},
"options": {
"hasLocalMutations": false,
"hasCommittedMutations": false
}
}
],
"targets": [
2
]
}
},
{
"watchCurrent": [
[
2
],
"resume-token-1000"
]
},
{
"watchSnapshot": {
"version": 1000,
"targetIds": []
},
"expect": [
{
"query": {
"path": "collection/key",
"filters": [],
"orderBys": []
},
"metadata": [
{
"key": "collection/key",
"version": 1000,
"value": {
"foo": "bar"
},
"options": {
"hasLocalMutations": false,
"hasCommittedMutations": false
}
}
],
"errorCode": 0,
"fromCache": false,
"hasPendingWrites": false
}
]
}
]
},
Expand Down
20 changes: 19 additions & 1 deletion Firestore/Source/Remote/FSTDatastore.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,25 @@ NS_ASSUME_NONNULL_BEGIN
/** Returns YES if the given error is a GRPC ABORTED error. **/
+ (BOOL)isAbortedError:(NSError *)error;

/** Returns YES if the given error indicates the RPC associated with it may not be retried. */
/**
* Determines whether an error code represents a permanent error when received in response to a
* non-write operation.
*
* See +isPermanentWriteError for classifying write errors.
*/
+ (BOOL)isPermanentError:(NSError *)error;

/**
* Determines whether an error code represents a permanent error when received in response to a
* write operation.
*
* Write operations must be handled specially because as of b/119437764, ABORTED errors on the write
* stream should be retried too (even though ABORTED errors are not generally retryable).
*
* Note that during the initial handshake on the write stream an ABORTED error signals that we
* should discard our stream token (i.e. it is permanent). This means a handshake error should be
* classified with isPermanentError, above.
*/
+ (BOOL)isPermanentWriteError:(NSError *)error;

/** Looks up a list of documents in datastore. */
Expand Down
18 changes: 11 additions & 7 deletions Firestore/Source/Remote/FSTDatastore.mm
Original file line number Diff line number Diff line change
Expand Up @@ -144,18 +144,18 @@ + (BOOL)isAbortedError:(NSError *)error {
return error.code == FIRFirestoreErrorCodeAborted;
}

+ (BOOL)isPermanentWriteError:(NSError *)error {
+ (BOOL)isPermanentError:(NSError *)error {
HARD_ASSERT([error.domain isEqualToString:FIRFirestoreErrorDomain],
"isPerminanteWriteError: only works with errors emitted by FSTDatastore.");
"isPermanentError: only works with errors emitted by FSTDatastore.");
switch (error.code) {
case FIRFirestoreErrorCodeCancelled:
case FIRFirestoreErrorCodeUnknown:
case FIRFirestoreErrorCodeDeadlineExceeded:
case FIRFirestoreErrorCodeResourceExhausted:
case FIRFirestoreErrorCodeInternal:
case FIRFirestoreErrorCodeUnavailable:
// Unauthenticated means something went wrong with our token and we need to retry with new
// credentials which will happen automatically.
// Unauthenticated means something went wrong with our token and we need to retry with new
// credentials which will happen automatically.
case FIRFirestoreErrorCodeUnauthenticated:
return NO;
case FIRFirestoreErrorCodeInvalidArgument:
Expand All @@ -164,9 +164,9 @@ + (BOOL)isPermanentWriteError:(NSError *)error {
case FIRFirestoreErrorCodePermissionDenied:
case FIRFirestoreErrorCodeFailedPrecondition:
case FIRFirestoreErrorCodeAborted:
// Aborted might be retried in some scenarios, but that is dependant on
// the context and should handled individually by the calling code.
// See https://cloud.google.com/apis/design/errors
// Aborted might be retried in some scenarios, but that is dependant on
// the context and should handled individually by the calling code.
// See https://cloud.google.com/apis/design/errors
case FIRFirestoreErrorCodeOutOfRange:
case FIRFirestoreErrorCodeUnimplemented:
case FIRFirestoreErrorCodeDataLoss:
Expand All @@ -176,6 +176,10 @@ + (BOOL)isPermanentWriteError:(NSError *)error {
}
}

+ (BOOL)isPermanentWriteError:(NSError *)error {
return [self isPermanentError:error] && error.code != FIRFirestoreErrorCodeAborted;
}

- (void)commitMutations:(NSArray<FSTMutation *> *)mutations
completion:(FSTVoidErrorBlock)completion {
_datastore->CommitMutations(mutations, completion);
Expand Down
12 changes: 8 additions & 4 deletions Firestore/Source/Remote/FSTRemoteStore.mm
Original file line number Diff line number Diff line change
Expand Up @@ -573,20 +573,24 @@ - (void)writeStreamWasInterruptedWithError:(nullable NSError *)error {

- (void)handleHandshakeError:(NSError *)error {
HARD_ASSERT(error, "Handling write error with status OK.");
// Reset the token if it's a permanent error or the error code is ABORTED, signaling the write
// stream is no longer valid.
if ([FSTDatastore isPermanentWriteError:error] || [FSTDatastore isAbortedError:error]) {
// Reset the token if it's a permanent error, signaling the write stream is
// no longer valid. Note that the handshake does not count as a write: see
// comments on isPermanentWriteError for details.
if ([FSTDatastore isPermanentError:error]) {
NSString *token = [_writeStream->GetLastStreamToken() base64EncodedStringWithOptions:0];
LOG_DEBUG("FSTRemoteStore %s error before completed handshake; resetting stream token %s: %s",
(__bridge void *)self, token, error);
_writeStream->SetLastStreamToken(nil);
[self.localStore setLastStreamToken:nil];
} else {
// Some other error, don't reset stream token. Our stream logic will just retry with exponential
// backoff.
}
}

- (void)handleWriteError:(NSError *)error {
HARD_ASSERT(error, "Handling write error with status OK.");
// Only handle permanent error. If it's transient, just let the retry logic kick in.
// Only handle permanent errors here. If it's transient, just let the retry logic kick in.
if (![FSTDatastore isPermanentWriteError:error]) {
return;
}
Expand Down