Skip to content

Enable LRU GC #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Dec 19, 2018
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,7 @@ private void ensureClientConfigured() {
new DatabaseInfo(databaseId, persistenceKey, settings.getHost(), settings.isSslEnabled());

client =
new FirestoreClient(
context,
databaseInfo,
settings.isPersistenceEnabled(),
credentialsProvider,
asyncQueue);
new FirestoreClient(context, databaseInfo, settings, credentialsProvider, asyncQueue);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
/** Settings used to configure a FirebaseFirestore instance. */
@PublicApi
public final class FirebaseFirestoreSettings {
/**
* Set this constant as the value for {@link
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe Constant to use with , since we want to describe what the value is, rather than what someone can do with it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

* FirebaseFirestoreSettings.Builder#setCacheSizeBytes(long)} to disable garbage collection.
*/
@PublicApi public static final long CACHE_SIZE_UNLIMITED = -1;

private static final long MINIMUM_CACHE_BYTES = 1 * 1024 * 1024; // 1 MB
private static final long DEFAULT_CACHE_SIZE_BYTES = 100 * 1024 * 1024; // 100 MB
private static final String DEFAULT_HOST = "firestore.googleapis.com";
private static final boolean DEFAULT_TIMESTAMPS_IN_SNAPSHOTS_ENABLED = false;

Expand All @@ -34,6 +42,7 @@ public static final class Builder {
private boolean sslEnabled;
private boolean persistenceEnabled;
private boolean timestampsInSnapshotsEnabled;
private long cacheSizeBytes;

/** Constructs a new FirebaseFirestoreSettings Builder object. */
@PublicApi
Expand All @@ -42,6 +51,7 @@ public Builder() {
sslEnabled = true;
persistenceEnabled = true;
timestampsInSnapshotsEnabled = DEFAULT_TIMESTAMPS_IN_SNAPSHOTS_ENABLED;
cacheSizeBytes = DEFAULT_CACHE_SIZE_BYTES;
}

/**
Expand Down Expand Up @@ -124,6 +134,29 @@ public Builder setTimestampsInSnapshotsEnabled(boolean value) {
return this;
}

/**
* Sets an approximate cache size threshold for the on-disk data. If the cache grows beyond this
* size, Firestore will start removing data that hasn't been recently used. The size is not a
* guarantee that the cache will stay below that size, only that if the cache exceeds the given
* size, cleanup will be attempted.
*
* <p>The default value is 100 MB. The cache size must be set to at least 1 MB, and can be set
* to {@link FirebaseFirestoreSettings#CACHE_SIZE_UNLIMITED} to disable garbage collection.
*
* @return A settings object on which the cache size is configured as specified by the given
* {@code value}.
*/
@NonNull
@PublicApi
public Builder setCacheSizeBytes(long value) {
if (value != CACHE_SIZE_UNLIMITED && value < MINIMUM_CACHE_BYTES) {
throw new IllegalArgumentException(
"Cache size must be set to at least " + MINIMUM_CACHE_BYTES + " bytes");
}
this.cacheSizeBytes = value;
return this;
}

@NonNull
@PublicApi
public FirebaseFirestoreSettings build() {
Expand All @@ -139,13 +172,15 @@ public FirebaseFirestoreSettings build() {
private final boolean sslEnabled;
private final boolean persistenceEnabled;
private final boolean timestampsInSnapshotsEnabled;
private final long cacheSizeBytes;

/** Constructs a FirebaseFirestoreSettings object based on the values in the Builder. */
private FirebaseFirestoreSettings(Builder builder) {
host = builder.host;
sslEnabled = builder.sslEnabled;
persistenceEnabled = builder.persistenceEnabled;
timestampsInSnapshotsEnabled = builder.timestampsInSnapshotsEnabled;
cacheSizeBytes = builder.cacheSizeBytes;
}

@Override
Expand All @@ -161,7 +196,8 @@ public boolean equals(@Nullable Object o) {
return host.equals(that.host)
&& sslEnabled == that.sslEnabled
&& persistenceEnabled == that.persistenceEnabled
&& timestampsInSnapshotsEnabled == that.timestampsInSnapshotsEnabled;
&& timestampsInSnapshotsEnabled == that.timestampsInSnapshotsEnabled
&& cacheSizeBytes == that.cacheSizeBytes;
}

@Override
Expand All @@ -170,6 +206,7 @@ public int hashCode() {
result = 31 * result + (sslEnabled ? 1 : 0);
result = 31 * result + (persistenceEnabled ? 1 : 0);
result = 31 * result + (timestampsInSnapshotsEnabled ? 1 : 0);
result = 31 * result + (int) cacheSizeBytes;
return result;
}

Expand Down Expand Up @@ -211,4 +248,13 @@ public boolean isPersistenceEnabled() {
public boolean areTimestampsInSnapshotsEnabled() {
return timestampsInSnapshotsEnabled;
}

/**
* Returns the threshold for the cache size above which the SDK will attempt to collect the least
* recently used documents.
*/
@PublicApi
public long getCacheSizeBytes() {
return cacheSizeBytes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@
import com.google.firebase.firestore.EventListener;
import com.google.firebase.firestore.FirebaseFirestoreException;
import com.google.firebase.firestore.FirebaseFirestoreException.Code;
import com.google.firebase.firestore.FirebaseFirestoreSettings;
import com.google.firebase.firestore.auth.CredentialsProvider;
import com.google.firebase.firestore.auth.User;
import com.google.firebase.firestore.core.EventManager.ListenOptions;
import com.google.firebase.firestore.local.LocalSerializer;
import com.google.firebase.firestore.local.LocalStore;
import com.google.firebase.firestore.local.LruDelegate;
import com.google.firebase.firestore.local.LruGarbageCollector;
import com.google.firebase.firestore.local.MemoryPersistence;
import com.google.firebase.firestore.local.Persistence;
import com.google.firebase.firestore.local.SQLitePersistence;
Expand All @@ -51,6 +54,7 @@
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

/**
Expand All @@ -61,6 +65,11 @@ public final class FirestoreClient implements RemoteStore.RemoteStoreCallback {

private static final String LOG_TAG = "FirestoreClient";

/** How long we wait to try running LRU GC after SDK initialization. */
private static final long INITIAL_GC_DELAY_MS = TimeUnit.MINUTES.toMillis(1);
/** Minimum amount of time between GC checks, after the first one. */
private static final long REGULAR_GC_DELAY_MS = TimeUnit.MINUTES.toMillis(5);

private final DatabaseInfo databaseInfo;
private final CredentialsProvider credentialsProvider;
private final AsyncQueue asyncQueue;
Expand All @@ -71,10 +80,15 @@ public final class FirestoreClient implements RemoteStore.RemoteStoreCallback {
private SyncEngine syncEngine;
private EventManager eventManager;

// LRU-related
private boolean gcHasRun = false;
@Nullable private LruDelegate lruDelegate;
@Nullable private AsyncQueue.DelayedTask gcTask;

public FirestoreClient(
final Context context,
DatabaseInfo databaseInfo,
final boolean usePersistence,
FirebaseFirestoreSettings settings,
CredentialsProvider credentialsProvider,
final AsyncQueue asyncQueue) {
this.databaseInfo = databaseInfo;
Expand Down Expand Up @@ -105,7 +119,11 @@ public FirestoreClient(
try {
// Block on initial user being available
User initialUser = Tasks.await(firstUser.getTask());
initialize(context, initialUser, usePersistence);
initialize(
context,
initialUser,
settings.isPersistenceEnabled(),
settings.getCacheSizeBytes());
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
Expand All @@ -127,6 +145,9 @@ public Task<Void> shutdown() {
() -> {
remoteStore.shutdown();
persistence.shutdown();
if (gcTask != null) {
gcTask.cancel();
}
});
}

Expand Down Expand Up @@ -194,7 +215,7 @@ public <TResult> Task<TResult> transaction(
() -> syncEngine.transaction(asyncQueue, updateFunction, retries));
}

private void initialize(Context context, User user, boolean usePersistence) {
private void initialize(Context context, User user, boolean usePersistence, long cacheSizeBytes) {
// Note: The initialization work must all be synchronous (we can't dispatch more work) since
// external write/listen operations could get queued to run before that subsequent work
// completes.
Expand All @@ -203,9 +224,18 @@ private void initialize(Context context, User user, boolean usePersistence) {
if (usePersistence) {
LocalSerializer serializer =
new LocalSerializer(new RemoteSerializer(databaseInfo.getDatabaseId()));
persistence =
LruGarbageCollector.Params params =
LruGarbageCollector.Params.WithCacheSizeBytes(cacheSizeBytes);
SQLitePersistence sqlitePersistence =
new SQLitePersistence(
context, databaseInfo.getPersistenceKey(), databaseInfo.getDatabaseId(), serializer);
context,
databaseInfo.getPersistenceKey(),
databaseInfo.getDatabaseId(),
serializer,
params);
lruDelegate = sqlitePersistence.getReferenceDelegate();
persistence = sqlitePersistence;
scheduleLruGarbageCollection();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to check if the cache size is unlimited, so that we can skip the GC run?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good question, but I think I'd prefer to leave it the way it is. The reasoning is this: currently the check for actually running the garbage collection is confined to one place: the garbage collector. If in the future we decide to customize the GC runs more than we have, I think it will be helpful to have that logic encapsulated.

In addition, we would still need to check against CACHE_SIZE_UNLIMITED in the garbage collector. If we did somehow run it with that value set, the cache size would always be greater than our threshold, giving us the opposite of what we want.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can look at this more in the future, but it might actually make sense to split these up, using the same argument that you provided: In FirebaseFirestoreSettings/FirestoreClient we can validate whether the settings for the GC algorithm make sense. In the GC code, we can then always assume that all values make sense and we don't have to special case anything.

That's just my two cents and your mileage may vary. I believe that for now the PR could be simplified if we didn't have to differentiate between -1 and >0 in the LRU GC.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not wild about FirestoreClient having to know about the constant for whether GC is enabled or not, but I think we can compromise here. What if we gate scheduling on a call to garbageCollector.isEnabled() or something like that, as well as add an assert to the actual run of the garbage collector, asserting that the minimum threshold is > 0? That way, if we ever decide to do something like allow enabling and disabling of garbage collection, the source of truth is still localized to the garbage collector, we'll have asserts if we get something wrong, and we won't fire the timer if we don't need it.

What do you think?

Copy link
Contributor

@schmidt-sebastian schmidt-sebastian Oct 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I think that would indeed be a compromise, and it's one that I am less happy with than what we currently have. I prefer the current solution over asking GC whether it is enabled and then acting upon this by asking GC to do its job.

I thought of another approach though - can we passed the AsyncQueue to the GC algorithm and let it schedule itself if it needs to?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@schmidt-sebastian I started implementing this, but in the process of doing the plumbing to get the AsyncQueue to the garbage collector, I decided I really don't like it. A lot of tests that currently run synchronously and have no need of even the notion of our worker thread would have to have an async queue because they happen to use persistence.

Additionally, I believe the only component that is initialized currently with an async queue is the remote store. I don't think we want to extend that list to include persistence, which is also sort of an end run around sync engine and local store having an async queue.

My next proposal is this instead: what about an object that is solely responsible for the GC scheduling, owned by FirestoreClient? My model for doing something like this is ExponentialBackoff. FirestoreClient would initialize one of these, and then cancel it on shutdown, but would otherwise ignore it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey :) I like the idea of following the pattern of ExponentialBackoff. Please consider how much boilerplate this might add though, as the current solution might just be a good enough "compromise". If you do want to pursue this and it turns out that reworking the initialization adds a considerable amount of code, then maybe we should revisit this at a later point.

FWIW, the Web JS SDK does have a dependency on the AsyncQueue from the persistence layer (but that doesn't mean that we have to follow this example).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took a quick stab at it: #111

I'm not sure it's better, maybe just different? Let me know what you think. If you think it's better, then I'll clean it up and we can merge it, otherwise let's keep what's here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do like this quite a bit, since it moves the LRU state out of Firestore Client and nicely encapsulates the different timeouts and its scheduling. If you have some time to clean it up (not sure how much you were thinking?) and merge it into here, then I'd be very happy :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I'll merge it here shortly.

} else {
persistence = MemoryPersistence.createEagerGcMemoryPersistence();
}
Expand All @@ -225,6 +255,19 @@ private void initialize(Context context, User user, boolean usePersistence) {
remoteStore.start();
}

private void scheduleLruGarbageCollection() {
long delay = gcHasRun ? REGULAR_GC_DELAY_MS : INITIAL_GC_DELAY_MS;
gcTask =
asyncQueue.enqueueAfterDelay(
AsyncQueue.TimerId.GARBAGE_COLLECTION,
delay,
() -> {
localStore.collectGarbage(lruDelegate.getGarbageCollector());
gcHasRun = true;
scheduleLruGarbageCollection();
});
}

@Override
public void handleRemoteEvent(RemoteEvent remoteEvent) {
syncEngine.handleRemoteEvent(remoteEvent);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -555,4 +555,8 @@ private void applyWriteToRemoteDocuments(MutationBatchResult batchResult) {

mutationQueue.removeMutationBatch(batch);
}

public LruGarbageCollector.Results collectGarbage(LruGarbageCollector garbageCollector) {
return persistence.runTransaction("Collect garbage", () -> garbageCollector.collect(targetIds));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* Persistence layers intending to use LRU Garbage collection should implement this interface. This
* interface defines the operations that the LRU garbage collector needs from the persistence layer.
*/
interface LruDelegate {
public interface LruDelegate {

/** Enumerates all the targets in the QueryCache. */
void forEachTarget(Consumer<QueryData> consumer);
Expand Down Expand Up @@ -49,4 +49,7 @@ interface LruDelegate {

/** Access to the underlying LRU Garbage collector instance. */
LruGarbageCollector getGarbageCollector();

/** Return the size of the cache in bytes. */
long getByteSize();
}
Loading