-
Notifications
You must be signed in to change notification settings - Fork 615
Performance optimizations to speed up reading large collections #123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
876e4eb
cfa2338
999d655
7fefc65
8b2af95
3585c3a
2438147
2f47171
4b02f9b
3e005bd
284e08b
56e1681
323d697
7ad6899
de3486c
44682ab
d5357d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,8 @@ | |
import com.google.firebase.firestore.model.SnapshotVersion; | ||
import com.google.firebase.firestore.model.mutation.Mutation; | ||
import com.google.firebase.firestore.model.mutation.MutationBatch; | ||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.Map; | ||
import javax.annotation.Nullable; | ||
|
@@ -69,6 +71,28 @@ private MaybeDocument getDocument(DocumentKey key, List<MutationBatch> inBatches | |
return document; | ||
} | ||
|
||
@Nullable | ||
private List<MaybeDocument> getDocumentsInternal(Iterable<DocumentKey> keys, List<MutationBatch> batches) { | ||
List<MaybeDocument> documents = remoteDocumentCache.getAll(keys); | ||
// TODO(varconst): uncomment and fix. | ||
// for (MutationBatch batch : batches) { | ||
// document = batch.applyToLocalView(key, document); | ||
// } | ||
|
||
return documents; | ||
} | ||
|
||
@Nullable | ||
private List<MaybeDocument> getDocumentsInternal(Collection<MaybeDocument> docs, List<MutationBatch> batches) { | ||
List<MaybeDocument> result = new ArrayList<>(docs); | ||
// TODO(varconst): uncomment and fix. | ||
// for (MutationBatch batch : batches) { | ||
// document = batch.applyToLocalView(key, document); | ||
// } | ||
|
||
return result; | ||
} | ||
|
||
/** | ||
* Gets the local view of the documents identified by {@code keys}. | ||
* | ||
|
@@ -79,15 +103,28 @@ ImmutableSortedMap<DocumentKey, MaybeDocument> getDocuments(Iterable<DocumentKey | |
ImmutableSortedMap<DocumentKey, MaybeDocument> results = emptyMaybeDocumentMap(); | ||
|
||
List<MutationBatch> batches = mutationQueue.getAllMutationBatchesAffectingDocumentKeys(keys); | ||
for (DocumentKey key : keys) { | ||
// TODO: PERF: Consider fetching all remote documents at once rather than | ||
// one-by-one. | ||
MaybeDocument maybeDoc = getDocument(key, batches); | ||
List<MaybeDocument> docs = getDocumentsInternal(keys, batches); | ||
for (MaybeDocument maybeDoc : docs) { | ||
// TODO: Don't conflate missing / deleted. | ||
if (maybeDoc == null) { | ||
maybeDoc = new NoDocument(maybeDoc.getKey(), SnapshotVersion.NONE, /*hasCommittedMutations=*/ false); | ||
} | ||
results = results.insert(maybeDoc.getKey(), maybeDoc); | ||
} | ||
return results; | ||
} | ||
|
||
ImmutableSortedMap<DocumentKey, MaybeDocument> getDocuments(Map<DocumentKey, MaybeDocument> docsByKey) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This avoids accessing the local database and just applies pending batches to the given documents. |
||
ImmutableSortedMap<DocumentKey, MaybeDocument> results = emptyMaybeDocumentMap(); | ||
|
||
List<MutationBatch> batches = mutationQueue.getAllMutationBatchesAffectingDocumentKeys(docsByKey.keySet()); | ||
List<MaybeDocument> docs = getDocumentsInternal(docsByKey.values(), batches); | ||
for (MaybeDocument maybeDoc : docs) { | ||
// TODO: Don't conflate missing / deleted. | ||
if (maybeDoc == null) { | ||
maybeDoc = new NoDocument(key, SnapshotVersion.NONE, /*hasCommittedMutations=*/ false); | ||
maybeDoc = new NoDocument(maybeDoc.getKey(), SnapshotVersion.NONE, /*hasCommittedMutations=*/ false); | ||
} | ||
results = results.insert(key, maybeDoc); | ||
results = results.insert(maybeDoc.getKey(), maybeDoc); | ||
} | ||
return results; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,13 +48,18 @@ public LocalSerializer(RemoteSerializer rpcSerializer) { | |
com.google.firebase.firestore.proto.MaybeDocument encodeMaybeDocument(MaybeDocument document) { | ||
com.google.firebase.firestore.proto.MaybeDocument.Builder builder = | ||
com.google.firebase.firestore.proto.MaybeDocument.newBuilder(); | ||
|
||
if (document instanceof NoDocument) { | ||
NoDocument noDocument = (NoDocument) document; | ||
builder.setNoDocument(encodeNoDocument(noDocument)); | ||
builder.setHasCommittedMutations(noDocument.hasCommittedMutations()); | ||
} else if (document instanceof Document) { | ||
Document existingDocument = (Document) document; | ||
builder.setDocument(encodeDocument(existingDocument)); | ||
if (existingDocument.getProto() != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optimization 1 ("Avoid encode"): avoid serializing a |
||
builder.setDocument(existingDocument.getProto()); | ||
} else { | ||
builder.setDocument(encodeDocument(existingDocument)); | ||
} | ||
builder.setHasCommittedMutations(existingDocument.hasCommittedMutations()); | ||
} else if (document instanceof UnknownDocument) { | ||
builder.setUnknownDocument(encodeUnknownDocument((UnknownDocument) document)); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
import com.google.firebase.firestore.remote.TargetChange; | ||
import com.google.firebase.firestore.util.Logger; | ||
import com.google.protobuf.ByteString; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -329,14 +330,27 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve | |
} | ||
} | ||
|
||
Set<DocumentKey> changedDocKeys = new HashSet<>(); | ||
Map<DocumentKey, MaybeDocument> changedDocs = new HashMap<>(); | ||
Map<DocumentKey, MaybeDocument> documentUpdates = remoteEvent.getDocumentUpdates(); | ||
Set<DocumentKey> limboDocuments = remoteEvent.getResolvedLimboDocuments(); | ||
|
||
Set<DocumentKey> keys = new HashSet<>(); | ||
for (Entry<DocumentKey, MaybeDocument> entry : documentUpdates.entrySet()) { | ||
keys.add(entry.getKey()); | ||
} | ||
List<MaybeDocument> existingDocs = remoteDocuments.getAll(keys); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optimization 2 ("No get by one"): get all the documents from the local database in a single query. IIUC, keys are not repeated, so retrieving all the documents before going into the main for loop is okay, because no iteration of the loop may affect subsequent iterations. |
||
|
||
for (Entry<DocumentKey, MaybeDocument> entry : documentUpdates.entrySet()) { | ||
DocumentKey key = entry.getKey(); | ||
MaybeDocument doc = entry.getValue(); | ||
changedDocKeys.add(key); | ||
MaybeDocument existingDoc = remoteDocuments.get(key); | ||
MaybeDocument existingDoc = null; | ||
for (MaybeDocument aDoc : existingDocs) { | ||
if (aDoc.getKey().equals(key)) { | ||
existingDoc = aDoc; | ||
break; | ||
} | ||
} | ||
|
||
// If a document update isn't authoritative, make sure we don't | ||
// apply an old document version to the remote cache. We make an | ||
// exception for SnapshotVersion.MIN which can happen for | ||
|
@@ -347,6 +361,7 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve | |
|| (authoritativeUpdates.contains(doc.getKey()) && !existingDoc.hasPendingWrites()) | ||
|| doc.getVersion().compareTo(existingDoc.getVersion()) >= 0) { | ||
remoteDocuments.add(doc); | ||
changedDocs.put(key, doc); | ||
} else { | ||
Logger.debug( | ||
"LocalStore", | ||
|
@@ -355,6 +370,7 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve | |
key, | ||
existingDoc.getVersion(), | ||
doc.getVersion()); | ||
changedDocs.put(key, existingDoc); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like this probably isn't necessary (nothing changed). WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, the previous behavior was to add the key to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, logically this seems unnecessary. However, the previous behavior added the key to The tests pass either way, but I'm not sure we cover the situation when the server sends outdated docs. All in all, I'm a little wary about this change... What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does https://github.com/firebase/firebase-js-sdk/blob/master/packages/firestore/test/unit/specs/listen_spec.test.ts#L228 cover this? In general the best way to see if we have a test covering a behavior is to set a breakpoint and then check if we hit it under the debugger. I applaud your skeptical approach to changing existing behavior, but it's also worth considering that a lot of this code was written in a hurry or has evolved organically over time. For example, we only learned that Watch even had this kind of behavior after we observed it in a bug bash. The code to defend against this was added later and it's likely we just didn't adjust the initial change computation. In any case, our approach in the LocalStore has always been to err on the side of over-notifying because the view code is ultimately responsible for computing what has changed. This likely has no visible effect precisely because the view is discarding updates that don't net any changes, but that doesn't invalidate the logic behind changing this. We just need to avoid under-notifying--that's something for which the view can't compensate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure it's safe. Since we're not updating the cached doc, it logically makes sense not to include it in the changed docs. The result of this method is used to update our Views, and again, since we kept the existing doc, no update should be needed. And this code is exercised by the "Listens: Individual documents cannot revert" spec test. So I'd feel comfortable removing it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
|
||
if (limboDocuments.contains(key)) { | ||
|
@@ -376,7 +392,7 @@ public ImmutableSortedMap<DocumentKey, MaybeDocument> applyRemoteEvent(RemoteEve | |
queryCache.setLastRemoteSnapshotVersion(remoteVersion); | ||
} | ||
|
||
return localDocuments.getDocuments(changedDocKeys); | ||
return localDocuments.getDocuments(changedDocs); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optimization 3 ("No double get"): avoid retrieving the documents from local database again, we already have them in this function; just apply the pending write batches to them. |
||
}); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,11 @@ | |
import com.google.firebase.firestore.model.ResourcePath; | ||
import com.google.protobuf.InvalidProtocolBufferException; | ||
import com.google.protobuf.MessageLite; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import javax.annotation.Nullable; | ||
|
||
|
@@ -66,6 +70,62 @@ public MaybeDocument get(DocumentKey documentKey) { | |
.firstValue(row -> decodeMaybeDocument(row.getBlob(0))); | ||
} | ||
|
||
@Nullable | ||
@Override | ||
public List<MaybeDocument> getAll(Iterable<DocumentKey> documentKeys) { | ||
List<MaybeDocument> result = new ArrayList<>(); | ||
if (!documentKeys.iterator().hasNext()) { | ||
return result; | ||
} | ||
|
||
// SQLite limits maximum number of host parameters to 999 (see | ||
// https://www.sqlite.org/limits.html). To work around this, split the given keys into several | ||
// smaller sets and issue a separate query for each. | ||
int limit = 900; | ||
Iterator<DocumentKey> keyIter = documentKeys.iterator(); | ||
int queriesPerformed = 0; | ||
while (keyIter.hasNext()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Thanks, I didn't realize it can be done on |
||
++queriesPerformed; | ||
StringBuilder placeholdersBuilder = new StringBuilder(); | ||
List<String> args = new ArrayList<>(); | ||
|
||
for (int i = 0; keyIter.hasNext() && i < limit; i++) { | ||
DocumentKey key = keyIter.next(); | ||
|
||
if (i > 0) { | ||
placeholdersBuilder.append(", "); | ||
} | ||
placeholdersBuilder.append("?"); | ||
|
||
args.add(EncodedPath.encode(key.getPath())); | ||
} | ||
String placeholders = placeholdersBuilder.toString(); | ||
|
||
db.query( | ||
"SELECT contents FROM remote_documents " | ||
+ "WHERE path IN (" | ||
+ placeholders | ||
+ ") " | ||
+ "ORDER BY path") | ||
.binding(args.toArray()) | ||
.forEach( | ||
row -> { | ||
result.add(decodeMaybeDocument(row.getBlob(0))); | ||
}); | ||
} | ||
|
||
// If more than one query was issued, batches might be in an unsorted order (batches are ordered | ||
// within one query's results, but not across queries). It's likely to be rare, so don't impose | ||
// performance penalty on the normal case. | ||
if (queriesPerformed > 1) { | ||
Collections.sort( | ||
result, | ||
(MaybeDocument lhs, MaybeDocument rhs) -> | ||
lhs.getKey().compareTo(rhs.getKey())); | ||
} | ||
return result; | ||
} | ||
|
||
@Override | ||
public ImmutableSortedMap<DocumentKey, Document> getAllDocumentsMatchingQuery(Query query) { | ||
// Use the query path as a prefix for testing if a document matches the query. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,11 +109,13 @@ public void onHeaders(Metadata headers) { | |
public void onNext(RespT response) { | ||
dispatcher.run( | ||
() -> { | ||
Logger.debug( | ||
AbstractStream.this.getClass().getSimpleName(), | ||
"(%x) Stream received: %s", | ||
System.identityHashCode(AbstractStream.this), | ||
response); | ||
if (Logger.isDebugEnabled()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even with Proguard, logging adds about a second to 2-3 seconds the network usually takes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hrm! I wonder why... Since we're not building up the log string or anything, it seems like this should just be a plain cheap method call (which immediately no-ops on the receiving end), unless Any interest in digging a little deeper to see why it's slow? We may learn something that helps us preemptively improve other parts of the code [e.g. maybe we should cache this.getClass().getSimpleName()]. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The profiler shows There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still struggling to see how String.format() would be getting called by this code. Maybe we can chat during standup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for pushing me to dig into this. Looks like I got confused here. In the app, logging is on, which is why To hopefully untangle this, I reran the numbers, using three "variables":
Results (three runs in each case, Release mode, same device as before):
So it seems that in 17.1.1 (IIUC, the last SDK version that was proguarded), the difference between logging enabled and disabled is negligible, and whether the app itself is proguarded doesn't really matter.
In 17.1.2, an app pays significant penalty if it enables logging but isn't proguarded. The rest of the numbers are probably within error margin.
Surprisingly, for this branch, the penalty of no Proguard/logging seems even higher than 17.1.2 (but perhaps within fluctuation). If Proguard is enabled, logging doesn't add any significant difference. |
||
Logger.debug( | ||
AbstractStream.this.getClass().getSimpleName(), | ||
"(%x) Stream received: %s", | ||
System.identityHashCode(AbstractStream.this), | ||
response); | ||
} | ||
AbstractStream.this.onNext(response); | ||
}); | ||
} | ||
|
@@ -203,6 +205,7 @@ public void run() { | |
this.idleTimerId = idleTimerId; | ||
this.listener = listener; | ||
this.idleTimeoutRunnable = new IdleTimeoutRunnable(); | ||
|
||
backoff = | ||
new ExponentialBackoff( | ||
workerQueue, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1015,6 +1015,7 @@ public WatchChange decodeWatchChange(ListenResponse protoChange) { | |
!version.equals(SnapshotVersion.NONE), "Got a document change without an update time"); | ||
ObjectValue data = decodeFields(docChange.getDocument().getFieldsMap()); | ||
Document document = new Document(key, version, data, Document.DocumentState.SYNCED); | ||
document.setProto(docChange.getDocument()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For "Avoid encode" optimization: keep the original proto around. |
||
watchChange = new WatchChange.DocumentChange(added, removed, document.getKey(), document); | ||
break; | ||
case DOCUMENT_DELETE: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't bother with batches, because there are no pending write batches in the case being tested.