Skip to content

Performance optimizations to speed up reading large collections #123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 28, 2018
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,46 @@ private MaybeDocument getDocument(DocumentKey key, List<MutationBatch> inBatches
return document;
}

// Returns the view of the given {@code docs} as they would appear after applying all mutations in
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note the significant refactoring in this file.

// the given {@code batches}.
private Map<DocumentKey, MaybeDocument> applyLocalMutationsToDocuments(
Map<DocumentKey, MaybeDocument> docs, List<MutationBatch> batches) {
for (Map.Entry<DocumentKey, MaybeDocument> base : docs.entrySet()) {
MaybeDocument localView = base.getValue();
for (MutationBatch batch : batches) {
localView = batch.applyToLocalView(base.getKey(), localView);
}
base.setValue(localView);
}

return docs;
}

/**
* Gets the local view of the documents identified by {@code keys}.
*
* <p>If we don't have cached state for a document in {@code keys}, a NoDocument will be stored
* for that key in the resulting set.
*/
ImmutableSortedMap<DocumentKey, MaybeDocument> getDocuments(Iterable<DocumentKey> keys) {
Map<DocumentKey, MaybeDocument> docs = remoteDocumentCache.getAll(keys);
return getLocalViewOfDocuments(docs);
}

/**
* Similar to {@code #getDocuments}, but creates the local view from the given {@code baseDocs}
* without retrieving documents from the local store.
*/
ImmutableSortedMap<DocumentKey, MaybeDocument> getLocalViewOfDocuments(
Map<DocumentKey, MaybeDocument> baseDocs) {
ImmutableSortedMap<DocumentKey, MaybeDocument> results = emptyMaybeDocumentMap();

List<MutationBatch> batches = mutationQueue.getAllMutationBatchesAffectingDocumentKeys(keys);
for (DocumentKey key : keys) {
// TODO: PERF: Consider fetching all remote documents at once rather than
// one-by-one.
MaybeDocument maybeDoc = getDocument(key, batches);
List<MutationBatch> batches =
mutationQueue.getAllMutationBatchesAffectingDocumentKeys(baseDocs.keySet());
Map<DocumentKey, MaybeDocument> docs = applyLocalMutationsToDocuments(baseDocs, batches);
for (Map.Entry<DocumentKey, MaybeDocument> entry : docs.entrySet()) {
DocumentKey key = entry.getKey();
MaybeDocument maybeDoc = entry.getValue();
// TODO: Don't conflate missing / deleted.
if (maybeDoc == null) {
maybeDoc = new NoDocument(key, SnapshotVersion.NONE, /*hasCommittedMutations=*/ false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,19 @@ public LocalSerializer(RemoteSerializer rpcSerializer) {
com.google.firebase.firestore.proto.MaybeDocument encodeMaybeDocument(MaybeDocument document) {
com.google.firebase.firestore.proto.MaybeDocument.Builder builder =
com.google.firebase.firestore.proto.MaybeDocument.newBuilder();

if (document instanceof NoDocument) {
NoDocument noDocument = (NoDocument) document;
builder.setNoDocument(encodeNoDocument(noDocument));
builder.setHasCommittedMutations(noDocument.hasCommittedMutations());
} else if (document instanceof Document) {
Document existingDocument = (Document) document;
builder.setDocument(encodeDocument(existingDocument));
// Use the memoized encoded form if it exists.
if (existingDocument.getProto() != null) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optimization 1 ("Avoid encode"): avoid serializing a Document to a proto again; instead, keep the serialized version around and reuse it. Storing the proto right in the document object is a strawman.

builder.setDocument(existingDocument.getProto());
} else {
builder.setDocument(encodeDocument(existingDocument));
}
builder.setHasCommittedMutations(existingDocument.hasCommittedMutations());
} else if (document instanceof UnknownDocument) {
builder.setUnknownDocument(encodeUnknownDocument((UnknownDocument) document));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.google.firebase.firestore.remote.TargetChange;
import com.google.firebase.firestore.util.Logger;
import com.google.protobuf.ByteString;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -329,14 +330,19 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve
}
}

Set<DocumentKey> changedDocKeys = new HashSet<>();
Map<DocumentKey, MaybeDocument> changedDocs = new HashMap<>();
Map<DocumentKey, MaybeDocument> documentUpdates = remoteEvent.getDocumentUpdates();
Set<DocumentKey> limboDocuments = remoteEvent.getResolvedLimboDocuments();
// Each loop iteration only affects its "own" doc, so it's safe to get all the remote
// documents in advance in a single call.
Map<DocumentKey, MaybeDocument> existingDocs =
remoteDocuments.getAll(documentUpdates.keySet());

for (Entry<DocumentKey, MaybeDocument> entry : documentUpdates.entrySet()) {
DocumentKey key = entry.getKey();
MaybeDocument doc = entry.getValue();
changedDocKeys.add(key);
MaybeDocument existingDoc = remoteDocuments.get(key);
MaybeDocument existingDoc = existingDocs.get(key);

// If a document update isn't authoritative, make sure we don't
// apply an old document version to the remote cache. We make an
// exception for SnapshotVersion.MIN which can happen for
Expand All @@ -347,6 +353,7 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve
|| (authoritativeUpdates.contains(doc.getKey()) && !existingDoc.hasPendingWrites())
|| doc.getVersion().compareTo(existingDoc.getVersion()) >= 0) {
remoteDocuments.add(doc);
changedDocs.put(key, doc);
} else {
Logger.debug(
"LocalStore",
Expand Down Expand Up @@ -376,7 +383,7 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve
queryCache.setLastRemoteSnapshotVersion(remoteVersion);
}

return localDocuments.getDocuments(changedDocKeys);
return localDocuments.getLocalViewOfDocuments(changedDocs);
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.firebase.firestore.model.DocumentKey;
import com.google.firebase.firestore.model.MaybeDocument;
import com.google.firebase.firestore.model.ResourcePath;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -53,6 +54,19 @@ public MaybeDocument get(DocumentKey key) {
return docs.get(key);
}

@Override
public Map<DocumentKey, MaybeDocument> getAll(Iterable<DocumentKey> keys) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of the reasons I'm returning a Map and not an ImmutableSortedMap is because the latter doesn't have keySet/values methods, which are very useful.

Map<DocumentKey, MaybeDocument> result = new HashMap<>();

for (DocumentKey key : keys) {
// Make sure each key has a corresponding entry, which is null in case the document is not
// found.
result.put(key, get(key));
}

return result;
}

@Override
public ImmutableSortedMap<DocumentKey, Document> getAllDocumentsMatchingQuery(Query query) {
ImmutableSortedMap<DocumentKey, Document> result = emptyDocumentMap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.google.firebase.firestore.model.Document;
import com.google.firebase.firestore.model.DocumentKey;
import com.google.firebase.firestore.model.MaybeDocument;
import java.util.Map;
import javax.annotation.Nullable;

/**
Expand Down Expand Up @@ -51,6 +52,15 @@ interface RemoteDocumentCache {
@Nullable
MaybeDocument get(DocumentKey documentKey);

/**
* Looks up a set of entries in the cache.
*
* @param documentKeys The keys of the entries to look up.
* @return The cached Document or NoDocument entries indexed by key. If an entry is not cached,
* the corresponding key will be mapped to a null value.
*/
Map<DocumentKey, MaybeDocument> getAll(Iterable<DocumentKey> documentKeys);

/**
* Executes a query against the cached Document entries
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.MessageLite;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -277,46 +277,29 @@ public List<MutationBatch> getAllMutationBatchesAffectingDocumentKey(DocumentKey
@Override
public List<MutationBatch> getAllMutationBatchesAffectingDocumentKeys(
Iterable<DocumentKey> documentKeys) {
List<MutationBatch> result = new ArrayList<>();
if (!documentKeys.iterator().hasNext()) {
return result;
List<Object> args = new ArrayList<>();
for (DocumentKey key : documentKeys) {
args.add(EncodedPath.encode(key.getPath()));
}

// SQLite limits maximum number of host parameters to 999 (see
// https://www.sqlite.org/limits.html). To work around this, split the given keys into several
// smaller sets and issue a separate query for each.
int limit = 900;
Iterator<DocumentKey> keyIter = documentKeys.iterator();
SQLitePersistence.LongQuery longQuery =
new SQLitePersistence.LongQuery(
db,
"SELECT DISTINCT dm.batch_id, m.mutations FROM document_mutations dm, mutations m "
+ "WHERE dm.uid = ? "
+ "AND dm.path IN (",
Arrays.asList(uid),
args,
") "
+ "AND dm.uid = m.uid "
+ "AND dm.batch_id = m.batch_id "
+ "ORDER BY dm.batch_id");

List<MutationBatch> result = new ArrayList<>();
Set<Integer> uniqueBatchIds = new HashSet<>();
int queriesPerformed = 0;
while (keyIter.hasNext()) {
++queriesPerformed;
StringBuilder placeholdersBuilder = new StringBuilder();
List<String> args = new ArrayList<>();
args.add(uid);

for (int i = 0; keyIter.hasNext() && i < limit; i++) {
DocumentKey key = keyIter.next();

if (i > 0) {
placeholdersBuilder.append(", ");
}
placeholdersBuilder.append("?");

args.add(EncodedPath.encode(key.getPath()));
}
String placeholders = placeholdersBuilder.toString();

db.query(
"SELECT DISTINCT dm.batch_id, m.mutations FROM document_mutations dm, mutations m "
+ "WHERE dm.uid = ? "
+ "AND dm.path IN ("
+ placeholders
+ ") "
+ "AND dm.uid = m.uid "
+ "AND dm.batch_id = m.batch_id "
+ "ORDER BY dm.batch_id")
.binding(args.toArray())
while (longQuery.hasMoreSubqueries()) {
longQuery
.performNextSubquery()
.forEach(
row -> {
int batchId = row.getInt(0);
Expand All @@ -330,7 +313,7 @@ public List<MutationBatch> getAllMutationBatchesAffectingDocumentKeys(
// If more than one query was issued, batches might be in an unsorted order (batches are ordered
// within one query's results, but not across queries). It's likely to be rare, so don't impose
// performance penalty on the normal case.
if (queriesPerformed > 1) {
if (longQuery.getSubqueriesPerformed() > 1) {
Collections.sort(
result,
(MutationBatch lhs, MutationBatch rhs) ->
Expand Down
Loading