Skip to content

Commit 7bb0b9a

Browse files
[Multi-Tab] Recover from unexpected primary lease loss (#984)
* Recover from unexpected primary lease loss * Review comments * RFC/Not dropping primary status right away * Some clever review comment before I merge `firestore-multi-tab`. * Don't release primary lease * [AUTOMATED]: Prettier Code Styling * Dropping the primary lease in IndexedDb * [AUTOMATED]: Prettier Code Styling * More feedback * Fixing some nits * [AUTOMATED]: Prettier Code Styling * Addresseing one more round of feedback. * [AUTOMATED]: Prettier Code Styling * [AUTOMATED]: Prettier Code Styling
1 parent 59a7208 commit 7bb0b9a

File tree

11 files changed

+586
-131
lines changed

11 files changed

+586
-131
lines changed

packages/firestore/src/core/sync_engine.ts

Lines changed: 200 additions & 102 deletions
Large diffs are not rendered by default.

packages/firestore/src/core/view.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,34 @@ export class View {
383383
return changes;
384384
}
385385

386+
/**
387+
* Update the in-memory state of the current view with the state read from
388+
* persistence.
389+
*
390+
* We update the query view whenever a client's primary status changes:
391+
* - When a client transitions from primary to secondary, it can miss
392+
* LocalStorage updates and its query views may temporarily not be
393+
* synchronized with the state on disk.
394+
* - For secondary to primary transitions, the client needs to update the list
395+
* of `syncedDocuments` since secondary clients update their query views
396+
* based purely on synthesized RemoteEvents.
397+
*
398+
* @param localDocs - The documents that match the query according to the
399+
* LocalStore.
400+
* @param remoteKeys - The keys of the documents that match the query
401+
* according to the backend.
402+
*
403+
* @return The ViewChange that resulted from this synchronization.
404+
*/
386405
// PORTING NOTE: Multi-tab only.
387-
synchronizeWithRemoteKeys(remoteKeys: DocumentKeySet): void {
406+
synchronizeWithPersistedState(
407+
localDocs: MaybeDocumentMap,
408+
remoteKeys: DocumentKeySet
409+
): ViewChange {
388410
this._syncedDocuments = remoteKeys;
411+
this.limboDocuments = documentKeySet();
412+
const docChanges = this.computeDocChanges(localDocs);
413+
return this.applyChanges(docChanges, /*updateLimboDocuments=*/ true);
389414
}
390415

391416
/**

packages/firestore/src/local/indexeddb_persistence.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,8 @@ export class IndexedDbPersistence implements Persistence {
478478
log.error(
479479
`Failed to obtain primary lease for action '${action}'.`
480480
);
481+
this.isPrimary = false;
482+
this.queue.enqueue(() => this.primaryStateListener(false));
481483
throw new FirestoreError(
482484
Code.FAILED_PRECONDITION,
483485
PRIMARY_LEASE_LOST_ERROR_MSG
@@ -734,6 +736,12 @@ export class IndexedDbPersistence implements Persistence {
734736
}
735737
}
736738

739+
export function isPrimaryLeaseLostError(err: FirestoreError): boolean {
740+
return (
741+
err.code === Code.FAILED_PRECONDITION &&
742+
err.message === PRIMARY_LEASE_LOST_ERROR_MSG
743+
);
744+
}
737745
/**
738746
* Helper to get a typed SimpleDbStore for the owner object store.
739747
*/

packages/firestore/src/local/local_store.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ export class LocalStore {
687687
* found - used for testing.
688688
*/
689689
readDocument(key: DocumentKey): Promise<MaybeDocument | null> {
690-
return this.persistence.runTransaction('read document', true, txn => {
690+
return this.persistence.runTransaction('read document', false, txn => {
691691
return this.localDocuments.getDocument(txn, key);
692692
});
693693
}

packages/firestore/src/remote/remote_store.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ import {
5151
import { OnlineStateTracker } from './online_state_tracker';
5252
import { AsyncQueue } from '../util/async_queue';
5353
import { DocumentKeySet } from '../model/collections';
54+
import { isPrimaryLeaseLostError } from '../local/indexeddb_persistence';
5455

5556
const LOG_TAG = 'RemoteStore';
5657

@@ -580,7 +581,24 @@ export class RemoteStore implements TargetMetadataProvider {
580581
for (const batch of this.writePipeline) {
581582
this.writeStream.writeMutations(batch.mutations);
582583
}
583-
});
584+
})
585+
.catch(err => this.ignoreIfPrimaryLeaseLoss(err));
586+
}
587+
588+
/**
589+
* Verifies the error thrown by an LocalStore operation. If a LocalStore
590+
* operation fails because the primary lease has been taken by another client,
591+
* we ignore the error. All other errors are re-thrown.
592+
*
593+
* @param err An error returned by a LocalStore operation.
594+
* @return A Promise that resolves after we recovered, or the original error.
595+
*/
596+
private ignoreIfPrimaryLeaseLoss(err: FirestoreError): void {
597+
if (isPrimaryLeaseLostError(err)) {
598+
log.debug(LOG_TAG, 'Unexpectedly lost primary lease');
599+
} else {
600+
throw err;
601+
}
584602
}
585603

586604
private onMutationResult(
@@ -650,7 +668,9 @@ export class RemoteStore implements TargetMetadataProvider {
650668
);
651669
this.writeStream.lastStreamToken = emptyByteString();
652670

653-
return this.localStore.setLastStreamToken(emptyByteString());
671+
return this.localStore
672+
.setLastStreamToken(emptyByteString())
673+
.catch(err => this.ignoreIfPrimaryLeaseLoss(err));
654674
} else {
655675
// Some other error, don't reset stream token. Our stream logic will
656676
// just retry with exponential backoff.

packages/firestore/src/util/obj.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ export function size<V>(obj: Dict<V>): number {
3939
return count;
4040
}
4141

42+
/** Extracts the numeric indices from a dictionary. */
43+
export function indices<V>(obj: { [numberKey: number]: V }): number[] {
44+
return Object.keys(obj).map(key => {
45+
return Number(key);
46+
});
47+
}
48+
4249
/** Returns the given value if it's defined or the defaultValue otherwise. */
4350
export function defaulted<V>(value: V | undefined, defaultValue: V): V {
4451
return value !== undefined ? value : defaultValue;

packages/firestore/test/unit/specs/limbo_spec.test.ts

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,52 @@ describeSpec('Limbo Documents:', [], () => {
393393
}
394394
);
395395

396-
// TODO(multitab): We need a test case that verifies that a primary client
397-
// that loses its primary lease while documents are in limbo correctly handles
398-
// these documents even when it picks up its lease again.
396+
specTest(
397+
'Limbo documents survive primary state transitions',
398+
['multi-client'],
399+
() => {
400+
const query = Query.atPath(path('collection'));
401+
const docA = doc('collection/a', 1000, { key: 'a' });
402+
const docB = doc('collection/b', 1001, { key: 'b' });
403+
const docC = doc('collection/c', 1002, { key: 'c' });
404+
const deletedDocB = deletedDoc('collection/b', 1006);
405+
const deletedDocC = deletedDoc('collection/c', 1008);
406+
407+
return (
408+
client(0, false)
409+
.expectPrimaryState(true)
410+
.userListens(query)
411+
.watchAcksFull(query, 1002, docA, docB, docC)
412+
.expectEvents(query, { added: [docA, docB, docC] })
413+
.watchRemovesDoc(docB.key, query)
414+
.watchRemovesDoc(docC.key, query)
415+
.watchSnapshots(1003)
416+
.expectEvents(query, { fromCache: true })
417+
.expectLimboDocs(docB.key, docC.key)
418+
.client(1)
419+
.stealPrimaryLease()
420+
.client(0)
421+
.runTimer(TimerId.ClientMetadataRefresh)
422+
.expectPrimaryState(false)
423+
.expectLimboDocs()
424+
.client(1)
425+
// TODO(37254270): This should be 'resume-token-1003' from the last
426+
// global snapshot.
427+
.expectListen(query, 'resume-token-1002')
428+
.watchAcksFull(query, 1005)
429+
.expectLimboDocs(docB.key, docC.key)
430+
.ackLimbo(1006, deletedDocB)
431+
.expectLimboDocs(docC.key)
432+
.client(0)
433+
.expectEvents(query, { removed: [docB], fromCache: true })
434+
.stealPrimaryLease()
435+
.expectListen(query, 'resume-token-1005')
436+
.watchAcksFull(query, 1007)
437+
.expectLimboDocs(docC.key)
438+
.ackLimbo(1007, deletedDocC)
439+
.expectLimboDocs()
440+
.expectEvents(query, { removed: [docC] })
441+
);
442+
}
443+
);
399444
});

packages/firestore/test/unit/specs/listen_spec.test.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,66 @@ describeSpec('Listens:', [], () => {
939939
.expectEvents(query, { added: [docB] });
940940
});
941941

942+
specTest('Query recovers after primary takeover', ['multi-client'], () => {
943+
const query = Query.atPath(path('collection'));
944+
const docA = doc('collection/a', 1000, { key: 'a' });
945+
const docB = doc('collection/b', 2000, { key: 'b' });
946+
const docC = doc('collection/c', 3000, { key: 'c' });
947+
948+
return (
949+
client(0)
950+
.expectPrimaryState(true)
951+
.userListens(query)
952+
.watchAcksFull(query, 1000, docA)
953+
.expectEvents(query, { added: [docA] })
954+
.client(1)
955+
.userListens(query)
956+
.expectEvents(query, { added: [docA] })
957+
.stealPrimaryLease()
958+
.expectListen(query, 'resume-token-1000')
959+
.watchAcksFull(query, 2000, docB)
960+
.expectEvents(query, { added: [docB] })
961+
.client(0)
962+
// Client 0 ignores all events until it transitions to secondary
963+
.client(1)
964+
.watchSends({ affects: [query] }, docC)
965+
.watchSnapshots(3000)
966+
.expectEvents(query, { added: [docC] })
967+
.client(0)
968+
.runTimer(TimerId.ClientMetadataRefresh)
969+
.expectPrimaryState(false)
970+
.expectEvents(query, { added: [docB, docC] })
971+
);
972+
});
973+
974+
specTest(
975+
'Unresponsive primary ignores watch update',
976+
['multi-client'],
977+
() => {
978+
const query = Query.atPath(path('collection'));
979+
const docA = doc('collection/a', 1000, { key: 'a' });
980+
981+
return (
982+
client(0)
983+
.expectPrimaryState(true)
984+
.client(1)
985+
.userListens(query)
986+
.client(0)
987+
.expectListen(query)
988+
.client(1)
989+
.stealPrimaryLease()
990+
.client(0)
991+
// Send a watch update to client 0, who is longer primary (but doesn't
992+
// know it yet). The watch update gets ignored.
993+
.watchAcksFull(query, 1000, docA)
994+
.client(1)
995+
.expectListen(query)
996+
.watchAcksFull(query, 1000, docA)
997+
.expectEvents(query, { added: [docA] })
998+
);
999+
}
1000+
);
1001+
9421002
specTest(
9431003
'Listen is established in newly started primary',
9441004
['multi-client'],

packages/firestore/test/unit/specs/spec_builder.ts

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ import {
2424
} from '../../../src/model/document';
2525
import { DocumentKey } from '../../../src/model/document_key';
2626
import { JsonObject } from '../../../src/model/field_value';
27-
import { mapRpcCodeFromCode } from '../../../src/remote/rpc_error';
27+
import {
28+
isPermanentError,
29+
mapCodeFromRpcCode,
30+
mapRpcCodeFromCode
31+
} from '../../../src/remote/rpc_error';
2832
import { assert } from '../../../src/util/assert';
2933
import { fail } from '../../../src/util/assert';
3034
import { Code } from '../../../src/util/error';
@@ -76,12 +80,20 @@ export class ClientMemoryState {
7680
this.reset();
7781
}
7882

83+
/** Reset all internal memory state (as done during a client restart). */
7984
reset(): void {
8085
this.queryMapping = {};
8186
this.limboMapping = {};
8287
this.activeTargets = {};
8388
this.limboIdGenerator = TargetIdGenerator.forSyncEngine();
8489
}
90+
91+
/**
92+
* Reset the internal limbo mapping (as done during a primary lease failover).
93+
*/
94+
resetLimboMapping(): void {
95+
this.limboMapping = {};
96+
}
8597
}
8698

8799
/**
@@ -457,12 +469,16 @@ export class SpecBuilder {
457469
writeAcks(
458470
doc: string,
459471
version: TestSnapshotVersion,
460-
options?: { expectUserCallback: boolean }
472+
options?: { expectUserCallback?: boolean; keepInQueue?: boolean }
461473
): this {
462474
this.nextStep();
463-
this.currentStep = { writeAck: { version } };
475+
options = options || {};
464476

465-
if (!options || options.expectUserCallback) {
477+
this.currentStep = {
478+
writeAck: { version, keepInQueue: !!options.keepInQueue }
479+
};
480+
481+
if (options.expectUserCallback) {
466482
return this.expectUserCallbacks({ acknowledged: [doc] });
467483
} else {
468484
return this;
@@ -476,13 +492,23 @@ export class SpecBuilder {
476492
*/
477493
failWrite(
478494
doc: string,
479-
err: RpcError,
480-
options?: { expectUserCallback: boolean }
495+
error: RpcError,
496+
options?: { expectUserCallback?: boolean; keepInQueue?: boolean }
481497
): this {
482498
this.nextStep();
483-
this.currentStep = { failWrite: { error: err } };
499+
options = options || {};
484500

485-
if (!options || options.expectUserCallback) {
501+
// If this is a permanent error, the write is not expected to be sent
502+
// again.
503+
const isPermanentFailure = isPermanentError(mapCodeFromRpcCode(error.code));
504+
const keepInQueue =
505+
options.keepInQueue !== undefined
506+
? options.keepInQueue
507+
: !isPermanentFailure;
508+
509+
this.currentStep = { failWrite: { error, keepInQueue } };
510+
511+
if (options.expectUserCallback) {
486512
return this.expectUserCallbacks({ rejected: [doc] });
487513
} else {
488514
return this;
@@ -904,6 +930,31 @@ export class MultiClientSpecBuilder extends SpecBuilder {
904930
return this;
905931
}
906932

933+
/**
934+
* Take the primary lease, even if another client has already obtained the
935+
* lease.
936+
*/
937+
stealPrimaryLease(): this {
938+
this.nextStep();
939+
this.currentStep = {
940+
applyClientState: {
941+
primary: true
942+
},
943+
stateExpect: {
944+
isPrimary: true
945+
}
946+
};
947+
948+
// HACK: SyncEngine resets its limbo mapping when it gains the primary
949+
// lease. The SpecTests need to also clear their mapping, but when we parse
950+
// the spec tests, we don't know when the primary lease transition happens.
951+
// It is likely going to happen right after `stealPrimaryLease`, so we are
952+
// clearing the limbo mapping here.
953+
this.clientState.resetLimboMapping();
954+
955+
return this;
956+
}
957+
907958
protected nextStep(): void {
908959
if (this.currentStep !== null) {
909960
this.currentStep.clientIndex = this.activeClientIndex;

0 commit comments

Comments
 (0)