Skip to content

Firestore: Misc bloom filter cleanup ported from Android #7474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 59 additions & 49 deletions packages/firestore/src/remote/watch_change.ts
Original file line number Diff line number Diff line change
Expand Up @@ -433,15 +433,18 @@ export class WatchChangeAggregator {
// raise a snapshot with `isFromCache:true`.
if (currentSize !== expectedCount) {
// Apply bloom filter to identify and mark removed documents.
const applyResult = this.applyBloomFilter(watchChange, currentSize);
const bloomFilter = this.parseBloomFilter(watchChange);
const status = bloomFilter
? this.applyBloomFilter(bloomFilter, watchChange, currentSize)
: BloomFilterApplicationStatus.Skipped;

if (applyResult.status !== BloomFilterApplicationStatus.Success) {
if (status !== BloomFilterApplicationStatus.Success) {
// If bloom filter application fails, we reset the mapping and
// trigger re-run of the query.
this.resetTarget(targetId);

const purpose: TargetPurpose =
applyResult.status === BloomFilterApplicationStatus.FalsePositive
status === BloomFilterApplicationStatus.FalsePositive
? TargetPurpose.ExistenceFilterMismatchBloom
: TargetPurpose.ExistenceFilterMismatch;
this.pendingTargetResets = this.pendingTargetResets.insert(
Expand All @@ -451,10 +454,11 @@ export class WatchChangeAggregator {
}
TestingHooks.instance?.notifyOnExistenceFilterMismatch(
createExistenceFilterMismatchInfoForTestingHooks(
applyResult.status,
applyResult.bloomFilterMightContain ?? null,
currentSize,
watchChange.existenceFilter
watchChange.existenceFilter,
this.metadataProvider.getDatabaseId(),
bloomFilter,
status
)
);
}
Expand All @@ -463,21 +467,15 @@ export class WatchChangeAggregator {
}

/**
* Apply bloom filter to remove the deleted documents, and return the
* application status.
* Parse the bloom filter from the "unchanged_names" field of an existence
* filter.
*/
private applyBloomFilter(
watchChange: ExistenceFilterChange,
currentCount: number
): {
status: BloomFilterApplicationStatus;
bloomFilterMightContain?: (documentPath: string) => boolean;
} {
const { unchangedNames, count: expectedCount } =
watchChange.existenceFilter;

private parseBloomFilter(
watchChange: ExistenceFilterChange
): BloomFilter | null {
const unchangedNames = watchChange.existenceFilter.unchangedNames;
if (!unchangedNames || !unchangedNames.bits) {
return { status: BloomFilterApplicationStatus.Skipped };
return null;
}

const {
Expand All @@ -495,7 +493,7 @@ export class WatchChangeAggregator {
err.message +
'); ignoring the bloom filter and falling back to full re-query.'
);
return { status: BloomFilterApplicationStatus.Skipped };
return null;
} else {
throw err;
}
Expand All @@ -511,46 +509,56 @@ export class WatchChangeAggregator {
} else {
logWarn('Applying bloom filter failed: ', err);
}
return { status: BloomFilterApplicationStatus.Skipped };
return null;
}

const bloomFilterMightContain = (documentPath: string): boolean => {
const databaseId = this.metadataProvider.getDatabaseId();
return bloomFilter.mightContain(
`projects/${databaseId.projectId}/databases/${databaseId.database}` +
`/documents/${documentPath}`
);
};

if (bloomFilter.bitCount === 0) {
return { status: BloomFilterApplicationStatus.Skipped };
return null;
}

return bloomFilter;
}

/**
* Apply bloom filter to remove the deleted documents, and return the
* application status.
*/
private applyBloomFilter(
bloomFilter: BloomFilter,
watchChange: ExistenceFilterChange,
currentCount: number
): BloomFilterApplicationStatus {
const expectedCount = watchChange.existenceFilter.count;

const removedDocumentCount = this.filterRemovedDocuments(
watchChange.targetId,
bloomFilterMightContain
bloomFilter,
watchChange.targetId
);

const status =
expectedCount === currentCount - removedDocumentCount
? BloomFilterApplicationStatus.Success
: BloomFilterApplicationStatus.FalsePositive;
return { status, bloomFilterMightContain };
return expectedCount === currentCount - removedDocumentCount
? BloomFilterApplicationStatus.Success
: BloomFilterApplicationStatus.FalsePositive;
}

/**
* Filter out removed documents based on bloom filter membership result and
* return number of documents removed.
*/
private filterRemovedDocuments(
targetId: number,
bloomFilterMightContain: (documentPath: string) => boolean
bloomFilter: BloomFilter,
targetId: number
): number {
const existingKeys = this.metadataProvider.getRemoteKeysForTarget(targetId);
let removalCount = 0;

existingKeys.forEach(key => {
if (!bloomFilterMightContain(key.path.canonicalString())) {
const databaseId = this.metadataProvider.getDatabaseId();
const documentPath =
`projects/${databaseId.projectId}` +
`/databases/${databaseId.database}` +
`/documents/${key.path.canonicalString()}`;

if (!bloomFilter.mightContain(documentPath)) {
this.removeDocumentFromTarget(targetId, key, /*updatedDocument=*/ null);
removalCount++;
}
Expand Down Expand Up @@ -835,27 +843,29 @@ function snapshotChangesMap(): SortedMap<DocumentKey, ChangeType> {
}

function createExistenceFilterMismatchInfoForTestingHooks(
status: BloomFilterApplicationStatus,
bloomFilterMightContain: null | ((documentPath: string) => boolean),
localCacheCount: number,
existenceFilter: ExistenceFilter
existenceFilter: ExistenceFilter,
databaseId: DatabaseId,
bloomFilter: BloomFilter | null,
bloomFilterStatus: BloomFilterApplicationStatus
): TestingHooksExistenceFilterMismatchInfo {
const result: TestingHooksExistenceFilterMismatchInfo = {
localCacheCount,
existenceFilterCount: existenceFilter.count
existenceFilterCount: existenceFilter.count,
databaseId: databaseId.database,
projectId: databaseId.projectId
};

const unchangedNames = existenceFilter.unchangedNames;
if (unchangedNames) {
result.bloomFilter = {
applied: status === BloomFilterApplicationStatus.Success,
applied: bloomFilterStatus === BloomFilterApplicationStatus.Success,
hashCount: unchangedNames?.hashCount ?? 0,
bitmapLength: unchangedNames?.bits?.bitmap?.length ?? 0,
padding: unchangedNames?.bits?.padding ?? 0
padding: unchangedNames?.bits?.padding ?? 0,
mightContain: (value: string): boolean =>
bloomFilter?.mightContain(value) ?? false
};
if (bloomFilterMightContain) {
result.bloomFilter.mightContain = bloomFilterMightContain;
}
}

return result;
Expand Down
28 changes: 16 additions & 12 deletions packages/firestore/src/util/testing_hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ export interface ExistenceFilterMismatchInfo {
*/
existenceFilterCount: number;

/**
* The projectId used when checking documents for membership in the bloom
* filter.
*/
projectId: string;

/**
* The databaseId used when checking documents for membership in the bloom
* filter.
*/
databaseId: string;

/**
* Information about the bloom filter provided by Watch in the ExistenceFilter
* message's `unchangedNames` field. If this property is omitted or undefined
Expand All @@ -126,19 +138,11 @@ export interface ExistenceFilterMismatchInfo {
padding: number;

/**
* Check if the given document path is contained in the bloom filter.
*
* The "path" of a document can be retrieved from the
* `DocumentReference.path` property.
*
* Note that due to the probabilistic nature of a bloom filter, it is
* possible that false positives may occur; that is, this function may
* return `true` even though the given string is not in the bloom filter.
*
* This property is "optional"; if it is undefined then parsing the bloom
* filter failed.
* Tests the given string for membership in the bloom filter created from
* the existence filter; will be undefined if creating the bloom filter
* failed.
*/
mightContain?(documentPath: string): boolean;
mightContain?: (value: string) => boolean;
};
}

Expand Down
16 changes: 7 additions & 9 deletions packages/firestore/test/integration/api/query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2260,8 +2260,9 @@ apiDescribe('Queries', persistence => {
// Delete one of the documents so that the next call to getDocs() will
// experience an existence filter mismatch. Use a WriteBatch, rather
// than deleteDoc(), to avoid affecting the local cache.
const documentToDelete = doc(coll, 'DocumentToDelete');
const writeBatchForDocumentDeletes = writeBatch(db);
writeBatchForDocumentDeletes.delete(doc(coll, 'DocumentToDelete'));
writeBatchForDocumentDeletes.delete(documentToDelete);
await writeBatchForDocumentDeletes.commit();

// Wait for 10 seconds, during which Watch will stop tracking the query
Expand All @@ -2278,7 +2279,7 @@ apiDescribe('Queries', persistence => {
documentSnapshot => documentSnapshot.id
);
const testDocIdsMinusDeletedDocId = testDocIds.filter(
documentId => documentId !== 'DocumentToDelete'
documentId => documentId !== documentToDelete.id
);
expect(snapshot2DocumentIds, 'snapshot2DocumentIds').to.have.members(
testDocIdsMinusDeletedDocId
Expand Down Expand Up @@ -2309,20 +2310,17 @@ apiDescribe('Queries', persistence => {
// is if there is a false positive when testing for 'DocumentToDelete'
// in the bloom filter. So verify that the bloom filter application is
// successful, unless there was a false positive.
const isFalsePositive = bloomFilter.mightContain!(
`${coll.path}/DocumentToDelete`
);
const isFalsePositive = bloomFilter.mightContain(documentToDelete);
expect(bloomFilter.applied, 'bloomFilter.applied').to.equal(
!isFalsePositive
);

// Verify that the bloom filter contains the document paths with complex
// Unicode characters.
for (const testDocId of testDocIdsMinusDeletedDocId) {
const testDocPath = `${coll.path}/${testDocId}`;
for (const testDoc of snapshot2.docs.map(snapshot => snapshot.ref)) {
expect(
bloomFilter.mightContain!(testDocPath),
`bloomFilter.mightContain('${testDocPath}')`
bloomFilter.mightContain(testDoc),
`bloomFilter.mightContain('${testDoc.path}')`
).to.be.true;
}
});
Expand Down
62 changes: 56 additions & 6 deletions packages/firestore/test/integration/util/testing_hooks_util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
* limitations under the License.
*/

import { _TestingHooks as TestingHooks } from './firebase_export';
import {
DocumentReference,
_TestingHooks as TestingHooks
} from './firebase_export';

/**
* Captures all existence filter mismatches in the Watch 'Listen' stream that
Expand All @@ -30,9 +33,9 @@ export async function captureExistenceFilterMismatches<T>(
): Promise<[ExistenceFilterMismatchInfo[], T]> {
const results: ExistenceFilterMismatchInfo[] = [];
const onExistenceFilterMismatchCallback = (
info: ExistenceFilterMismatchInfo
info: ExistenceFilterMismatchInfoInternal
): void => {
results.push(info);
results.push(createExistenceFilterMismatchInfoFrom(info));
};

const unregister =
Expand All @@ -51,8 +54,7 @@ export async function captureExistenceFilterMismatches<T>(
}

/**
* Information about an existence filter mismatch, captured during an invocation
* of `captureExistenceFilterMismatches()`.
* A copy of `ExistenceFilterMismatchInfo` as defined in `testing_hooks.ts`.
*
* See the documentation of `TestingHooks.notifyOnExistenceFilterMismatch()`
* for the meaning of these values.
Expand All @@ -61,6 +63,27 @@ export async function captureExistenceFilterMismatches<T>(
* testing_hooks.ts. I tried to do this but couldn't figure out how to get it to
* work in a way that survived bundling and minification.
*/
interface ExistenceFilterMismatchInfoInternal {
localCacheCount: number;
existenceFilterCount: number;
projectId: string;
databaseId: string;
bloomFilter?: {
applied: boolean;
hashCount: number;
bitmapLength: number;
padding: number;
mightContain?: (value: string) => boolean;
};
}

/**
* Information about an existence filter mismatch, captured during an invocation
* of `captureExistenceFilterMismatches()`.
*
* See the documentation of `TestingHooks.notifyOnExistenceFilterMismatch()`
* for the meaning of these values.
*/
export interface ExistenceFilterMismatchInfo {
localCacheCount: number;
existenceFilterCount: number;
Expand All @@ -69,6 +92,33 @@ export interface ExistenceFilterMismatchInfo {
hashCount: number;
bitmapLength: number;
padding: number;
mightContain?(documentPath: string): boolean;
mightContain(documentRef: DocumentReference): boolean;
};
}

function createExistenceFilterMismatchInfoFrom(
internalInfo: ExistenceFilterMismatchInfoInternal
): ExistenceFilterMismatchInfo {
const info: ExistenceFilterMismatchInfo = {
localCacheCount: internalInfo.localCacheCount,
existenceFilterCount: internalInfo.existenceFilterCount
};

const internalBloomFilter = internalInfo.bloomFilter;
if (internalBloomFilter) {
info.bloomFilter = {
applied: internalBloomFilter.applied,
hashCount: internalBloomFilter.hashCount,
bitmapLength: internalBloomFilter.bitmapLength,
padding: internalBloomFilter.padding,
mightContain: (documentRef: DocumentReference): boolean =>
internalBloomFilter.mightContain?.(
`projects/${internalInfo.projectId}` +
`/databases/${internalInfo.databaseId}` +
`/documents/${documentRef.path}`
) ?? false
};
}

return info;
}