Skip to content

Performance: Don't deserialize full document Proto for Query execution #561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 27, 2019
Merged
2 changes: 2 additions & 0 deletions firebase-firestore/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Unreleased
- [changed] Improved performance for queries that only return a subset of the
data in a collection.
- [changed] Instead of failing silently, Firestore now crashes the client app
if it fails to load SSL Ciphers. To avoid these crashes, you must bundle
Conscrypt to support non-GMSCore devices on Android KitKat or JellyBean (see
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,22 +174,23 @@ public static SnapshotVersion version(long versionMicros) {
}

public static Document doc(String key, long version, Map<String, Object> data) {
return new Document(
return Document.fromObjectValue(
key(key), version(version), wrapObject(data), Document.DocumentState.SYNCED);
}

public static Document doc(DocumentKey key, long version, Map<String, Object> data) {
return new Document(key, version(version), wrapObject(data), Document.DocumentState.SYNCED);
return Document.fromObjectValue(
key, version(version), wrapObject(data), Document.DocumentState.SYNCED);
}

public static Document doc(
String key, long version, ObjectValue data, Document.DocumentState documentState) {
return new Document(key(key), version(version), data, documentState);
return Document.fromObjectValue(key(key), version(version), data, documentState);
}

public static Document doc(
String key, long version, Map<String, Object> data, Document.DocumentState documentState) {
return new Document(key(key), version(version), wrapObject(data), documentState);
return Document.fromObjectValue(key(key), version(version), wrapObject(data), documentState);
}

public static NoDocument deletedDoc(String key, long version) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,9 @@ private com.google.firestore.v1.Document encodeDocument(Document document) {
/** Decodes a Document proto to the equivalent model. */
private Document decodeDocument(
com.google.firestore.v1.Document document, boolean hasCommittedMutations) {
DocumentKey key = rpcSerializer.decodeKey(document.getName());
ObjectValue value = rpcSerializer.decodeFields(document.getFieldsMap());
SnapshotVersion version = rpcSerializer.decodeVersion(document.getUpdateTime());
return new Document(
key,
version,
value,
return Document.fromProto(
this.rpcSerializer,
document,
hasCommittedMutations
? Document.DocumentState.COMMITTED_MUTATIONS
: Document.DocumentState.SYNCED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,20 @@

package com.google.firebase.firestore.model;

import static com.google.firebase.firestore.util.Assert.hardAssert;

import com.google.firebase.firestore.model.value.FieldValue;
import com.google.firebase.firestore.model.value.ObjectValue;
import com.google.firebase.firestore.remote.RemoteSerializer;
import com.google.firestore.v1.Value;
import java.util.Comparator;
import javax.annotation.Nullable;

/**
* Represents a document in Firestore with a key, version, data and whether the data has local
* mutations applied to it.
*/
public final class Document extends MaybeDocument {
public abstract class Document extends MaybeDocument {

/** Describes the `hasPendingWrites` state of a document. */
public enum DocumentState {
Expand All @@ -36,59 +40,107 @@ public enum DocumentState {
}

private static final Comparator<Document> KEY_COMPARATOR =
new Comparator<Document>() {
@Override
public int compare(Document left, Document right) {
return left.getKey().compareTo(right.getKey());
}
};
(left, right) -> left.getKey().compareTo(right.getKey());

/** A document comparator that returns document by key and key only. */
public static Comparator<Document> keyComparator() {
return KEY_COMPARATOR;
}

private final ObjectValue data;

private final DocumentState documentState;
/** Creates a new Document from an existing ObjectValue. */
public static Document fromObjectValue(
DocumentKey key, SnapshotVersion version, ObjectValue data, DocumentState documentState) {
return new Document(key, version, documentState) {
@Nullable
@Override
public com.google.firestore.v1.Document getProto() {
return null;
}

@Override
public ObjectValue getData() {
return data;
}

@Nullable
@Override
public FieldValue getField(FieldPath path) {
return data.get(path);
}
};
}

/**
* Memoized serialized form of the document for optimization purposes (avoids repeated
* serialization). Might be null.
* Creates a new Document from a Proto representation.
*
* <p>The Proto is only converted to an ObjectValue if the consumer calls `getData()`.
*/
private final com.google.firestore.v1.Document proto;

public @Nullable com.google.firestore.v1.Document getProto() {
return proto;
public static Document fromProto(
RemoteSerializer serializer,
com.google.firestore.v1.Document proto,
DocumentState documentState) {
DocumentKey key = serializer.decodeKey(proto.getName());
SnapshotVersion version = serializer.decodeVersion(proto.getUpdateTime());

hardAssert(
!version.equals(SnapshotVersion.NONE), "Found a document Proto with no snapshot version");

return new Document(key, version, documentState) {
private ObjectValue memoizedData = null;

@Override
public com.google.firestore.v1.Document getProto() {
return proto;
}

@Override
public ObjectValue getData() {
if (memoizedData != null) {
return memoizedData;
} else {
memoizedData = serializer.decodeFields(proto.getFieldsMap());
return memoizedData;
}
}

@Nullable
@Override
public FieldValue getField(FieldPath path) {
if (memoizedData != null) {
return memoizedData.get(path);
} else {
// Instead of deserializing the full Document proto, we only deserialize the value at
// the requested field path. This speeds up Query execution as query filters can discard
// documents based on a single field.
Value value = proto.getFieldsMap().get(path.getFirstSegment());
for (int i = 1; value != null && i < path.length(); ++i) {
if (value.getValueTypeCase() != Value.ValueTypeCase.MAP_VALUE) {
return null;
}
value = value.getMapValue().getFieldsMap().get(path.getSegment(i));
}
return value != null ? serializer.decodeValue(value) : null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our OrderBy implementation does not implement a Schwartzian transform so we'll be performing these decodes for each comparison. For a large collection this could potentially eat into the gains you're observing here. I wonder if a cache of FieldPath to FieldValue would hurt here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a cache - but this has to be a ConcurrentHashMap since this function can be called from user land. Please do shout if that is not actually an issue :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Users can call this, albeit indirectly. I don't really care if we cached these values for that purpose. I wish there were a way to avoid instantiating a new ConcurrentHashMap per Document.

Were you able to see the effect of the cache in a query that had an orderby? If not, this probably isn't justified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the impact of this caching will be hard to quantify. I thought about adding this initially, but was only won over by your comments earlier. We do re-run getField() excessively during the computation of our Views.

I changed the code to lazy-init the field value cache. I do want to change the comparator to compare on the Protos directly, at which point we can remove the cache again.

}
}
};
}

public Document(
DocumentKey key, SnapshotVersion version, ObjectValue data, DocumentState documentState) {
super(key, version);
this.data = data;
this.documentState = documentState;
this.proto = null;
}
private final DocumentState documentState;

public Document(
DocumentKey key,
SnapshotVersion version,
ObjectValue data,
DocumentState documentState,
com.google.firestore.v1.Document proto) {
private Document(DocumentKey key, SnapshotVersion version, DocumentState documentState) {
super(key, version);
this.data = data;
this.documentState = documentState;
this.proto = proto;
}

public ObjectValue getData() {
return data;
}
/**
* Memoized serialized form of the document for optimization purposes (avoids repeated
* serialization). Might be null.
*/
public abstract @Nullable com.google.firestore.v1.Document getProto();

public @Nullable FieldValue getField(FieldPath path) {
return data.get(path);
}
public abstract ObjectValue getData();

public abstract @Nullable FieldValue getField(FieldPath path);

public @Nullable Object getFieldValue(FieldPath path) {
FieldValue value = getField(path);
Expand All @@ -113,7 +165,7 @@ public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
if (!(o instanceof Document)) {
return false;
}

Expand All @@ -122,13 +174,12 @@ public boolean equals(Object o) {
return getVersion().equals(document.getVersion())
&& getKey().equals(document.getKey())
&& documentState.equals(document.documentState)
&& data.equals(document.data);
&& getData().equals(document.getData());
}

@Override
public int hashCode() {
int result = getKey().hashCode();
result = 31 * result + data.hashCode();
result = 31 * result + getVersion().hashCode();
result = 31 * result + documentState.hashCode();
return result;
Expand All @@ -140,7 +191,7 @@ public String toString() {
+ "key="
+ getKey()
+ ", data="
+ data
+ getData()
+ ", version="
+ getVersion()
+ ", documentState="
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ public MaybeDocument applyToRemoteDocument(

SnapshotVersion version = mutationResult.getVersion();
ObjectValue newData = patchDocument(maybeDoc);
return new Document(getKey(), version, newData, Document.DocumentState.COMMITTED_MUTATIONS);
return Document.fromObjectValue(
getKey(), version, newData, Document.DocumentState.COMMITTED_MUTATIONS);
}

@Nullable
Expand All @@ -127,7 +128,8 @@ public MaybeDocument applyToLocalView(

SnapshotVersion version = getPostMutationVersion(maybeDoc);
ObjectValue newData = patchDocument(maybeDoc);
return new Document(getKey(), version, newData, Document.DocumentState.LOCAL_MUTATIONS);
return Document.fromObjectValue(
getKey(), version, newData, Document.DocumentState.LOCAL_MUTATIONS);
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public MaybeDocument applyToRemoteDocument(
// accepted the mutation so the precondition must have held.

SnapshotVersion version = mutationResult.getVersion();
return new Document(getKey(), version, value, Document.DocumentState.COMMITTED_MUTATIONS);
return Document.fromObjectValue(
getKey(), version, value, Document.DocumentState.COMMITTED_MUTATIONS);
}

@Nullable
Expand All @@ -86,7 +87,8 @@ public MaybeDocument applyToLocalView(
}

SnapshotVersion version = getPostMutationVersion(maybeDoc);
return new Document(getKey(), version, value, Document.DocumentState.LOCAL_MUTATIONS);
return Document.fromObjectValue(
getKey(), version, value, Document.DocumentState.LOCAL_MUTATIONS);
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public MaybeDocument applyToRemoteDocument(
List<FieldValue> transformResults =
serverTransformResults(doc, mutationResult.getTransformResults());
ObjectValue newData = transformObject(doc.getData(), transformResults);
return new Document(
return Document.fromObjectValue(
getKey(), mutationResult.getVersion(), newData, Document.DocumentState.COMMITTED_MUTATIONS);
}

Expand All @@ -119,7 +119,7 @@ public MaybeDocument applyToLocalView(
Document doc = requireDocument(maybeDoc);
List<FieldValue> transformResults = localTransformResults(localWriteTime, baseDoc);
ObjectValue newData = transformObject(doc.getData(), transformResults);
return new Document(
return Document.fromObjectValue(
getKey(), doc.getVersion(), newData, Document.DocumentState.LOCAL_MUTATIONS);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,7 @@ private Document decodeFoundDocument(BatchGetDocumentsResponse response) {
Assert.hardAssert(
response.getResultCase().equals(ResultCase.FOUND),
"Tried to deserialize a found document from a missing document.");
DocumentKey key = decodeKey(response.getFound().getName());
ObjectValue value = decodeFields(response.getFound().getFieldsMap());
SnapshotVersion version = decodeVersion(response.getFound().getUpdateTime());
hardAssert(
!version.equals(SnapshotVersion.NONE), "Got a document response with no snapshot version");
return new Document(key, version, value, Document.DocumentState.SYNCED, response.getFound());
return Document.fromProto(this, response.getFound(), Document.DocumentState.SYNCED);
}

private NoDocument decodeMissingDocument(BatchGetDocumentsResponse response) {
Expand Down Expand Up @@ -1041,24 +1036,16 @@ public WatchChange decodeWatchChange(ListenResponse protoChange) {
DocumentChange docChange = protoChange.getDocumentChange();
List<Integer> added = docChange.getTargetIdsList();
List<Integer> removed = docChange.getRemovedTargetIdsList();
DocumentKey key = decodeKey(docChange.getDocument().getName());
SnapshotVersion version = decodeVersion(docChange.getDocument().getUpdateTime());
hardAssert(
!version.equals(SnapshotVersion.NONE), "Got a document change without an update time");
ObjectValue data = decodeFields(docChange.getDocument().getFieldsMap());
// The document may soon be re-serialized back to protos in order to store it in local
// persistence. Memoize the encoded form to avoid encoding it again.
Document document =
new Document(
key, version, data, Document.DocumentState.SYNCED, docChange.getDocument());
Document.fromProto(this, docChange.getDocument(), Document.DocumentState.SYNCED);
watchChange = new WatchChange.DocumentChange(added, removed, document.getKey(), document);
break;
case DOCUMENT_DELETE:
DocumentDelete docDelete = protoChange.getDocumentDelete();
removed = docDelete.getRemovedTargetIdsList();
key = decodeKey(docDelete.getDocument());
DocumentKey key = decodeKey(docDelete.getDocument());
// Note that version might be unset in which case we use SnapshotVersion.NONE
version = decodeVersion(docDelete.getReadTime());
SnapshotVersion version = decodeVersion(docDelete.getReadTime());
NoDocument doc = new NoDocument(key, version, /*hasCommittedMutations=*/ false);
watchChange =
new WatchChange.DocumentChange(Collections.emptyList(), removed, doc.getKey(), doc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,13 @@ public void testEquals() {
assertNotEquals(base, differentData);
assertNotEquals(base, fromCache);

// Note: `base` and `differentData` have the same hash code since we no longer take document
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can't be right. equals and hashCode have to be consistent or all kinds of hell breaks loose when you use these in hash sets or as keys in a map. If two objects are equal for equality purposes, the contract for hashCode requires that the hashCodes be equal too.

We can't just ignore data in equality. There are a few cases to consider:

  • When version=SnapshotVersion.MIN the documents are just made-up, so data must play a roll
  • When DocumentState isn't synced, the documents are potentially dirty even if they have a version

However if we have a non-MIN version and the DocumentState is SYNCED then we could ignore the data for equality and hashing purposes. This would preserve what you're after here which is avoiding rehydrating the data in the common case of loading a value from the remote document cache.

What do you think of implementing both equality and hashCode according to the principle above? I think that would give most (if not all) the benefit without compromising the contract of equals/hashCode.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hrm. This is a good comment, and I 100% agree with you, but I don't think I broke the contract here in the way that you stated. I removed data from hash code (I am still using document key and version). This might cause more hash collisions, but the invariant that two equal objects have the same hash code should remain intact.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha! Sorry about misreading that. I didn't realize that this was about an expected collision.

An alternative phrasing that might be less confusing is:

Note: the assertions below that hash codes of different values are not equal is not something that can be guaranteed. In particular base and differentData have a hash collision because we don't use data in the hashCode.

You might also want to call attention to this in an implementation comment in DocumentSnapshot.hashCode(). That way someone won't see that the value is unused in the future and then accidentally "fix" it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea about the additional comment. I added it to Document.hashCode().

// contents into account.
assertEquals(base.hashCode(), baseDup.hashCode());
assertEquals(noData.hashCode(), noDataDup.hashCode());
assertNotEquals(base.hashCode(), noData.hashCode());
assertNotEquals(noData.hashCode(), base.hashCode());
assertNotEquals(base.hashCode(), differentPath.hashCode());
assertNotEquals(base.hashCode(), differentData.hashCode());
assertNotEquals(base.hashCode(), fromCache.hashCode());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ public void testEquals() {
assertNotEquals(foo, noPendingWrites);
assertNotEquals(foo, fromCache);

// Note: `foo` and `differentDoc` have the same hash code since we no longer take document
// contents into account.
assertEquals(foo.hashCode(), fooDup.hashCode());
assertNotEquals(foo.hashCode(), differentPath.hashCode());
assertNotEquals(foo.hashCode(), differentDoc.hashCode());
assertNotEquals(foo.hashCode(), noPendingWrites.hashCode());
assertNotEquals(foo.hashCode(), fromCache.hashCode());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,15 +229,15 @@ public void testHandlesSetMutation() {
assertChanged(doc("foo/bar", 0, map("foo", "bar"), Document.DocumentState.LOCAL_MUTATIONS));
assertContains(doc("foo/bar", 0, map("foo", "bar"), Document.DocumentState.LOCAL_MUTATIONS));

acknowledgeMutation(0);
assertChanged(doc("foo/bar", 0, map("foo", "bar"), Document.DocumentState.COMMITTED_MUTATIONS));
acknowledgeMutation(1);
assertChanged(doc("foo/bar", 1, map("foo", "bar"), Document.DocumentState.COMMITTED_MUTATIONS));
if (garbageCollectorIsEager()) {
// Nothing is pinning this anymore, as it has been acknowledged and there are no targets
// active.
assertNotContains("foo/bar");
} else {
assertContains(
doc("foo/bar", 0, map("foo", "bar"), Document.DocumentState.COMMITTED_MUTATIONS));
doc("foo/bar", 1, map("foo", "bar"), Document.DocumentState.COMMITTED_MUTATIONS));
}
}

Expand Down
Loading