-
Notifications
You must be signed in to change notification settings - Fork 615
Fix Android Connectivity Monitor (v2) #1045
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
11ca8a5
448311d
403f91f
00568be
c865827
d714378
55e8f46
d202893
411ed9d
1519a40
5bce0a0
ba515b7
110fdc3
c9cafd5
b77e02e
31d0ad8
8b2ad17
af9ed48
1cca6a4
86b4733
873d83d
d321316
a5dba09
51fac1c
595046f
a3fc304
36e448f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,8 @@ | |
|
||
package com.google.firebase.firestore.remote; | ||
|
||
import static com.google.firebase.firestore.util.Assert.hardAssert; | ||
|
||
import android.content.Context; | ||
import androidx.annotation.VisibleForTesting; | ||
import com.google.android.gms.common.GooglePlayServicesNotAvailableException; | ||
|
@@ -23,13 +25,16 @@ | |
import com.google.android.gms.tasks.Tasks; | ||
import com.google.firebase.firestore.core.DatabaseInfo; | ||
import com.google.firebase.firestore.util.AsyncQueue; | ||
import com.google.firebase.firestore.util.AsyncQueue.DelayedTask; | ||
import com.google.firebase.firestore.util.AsyncQueue.TimerId; | ||
import com.google.firebase.firestore.util.Executors; | ||
import com.google.firebase.firestore.util.Logger; | ||
import com.google.firebase.firestore.util.Supplier; | ||
import com.google.firestore.v1.FirestoreGrpc; | ||
import io.grpc.CallCredentials; | ||
import io.grpc.CallOptions; | ||
import io.grpc.ClientCall; | ||
import io.grpc.ConnectivityState; | ||
import io.grpc.ManagedChannel; | ||
import io.grpc.ManagedChannelBuilder; | ||
import io.grpc.MethodDescriptor; | ||
|
@@ -45,11 +50,22 @@ public class GrpcCallProvider { | |
|
||
private static Supplier<ManagedChannelBuilder<?>> overrideChannelBuilderSupplier; | ||
|
||
private final Task<ManagedChannel> channelTask; | ||
private Task<ManagedChannel> channelTask; | ||
private final AsyncQueue asyncQueue; | ||
|
||
private CallOptions callOptions; | ||
|
||
// This timeout is used when attempting to establish a connection in gRPC. If a connection attempt | ||
// does not succeed in CONNECTIVITY_ATTEMPT_TIMEOUT_MS, we restart the channel and try | ||
// reconnecting again, rather than waiting up to 2+ minutes for gRPC to timeout. | ||
// More details about usage can be found in GrpcCallProvider.onConnectivityStateChanged(). | ||
private static final int CONNECTIVITY_ATTEMPT_TIMEOUT_MS = 15 * 1000; | ||
private DelayedTask connectivityAttemptTimer; | ||
|
||
private final Context context; | ||
private final DatabaseInfo databaseInfo; | ||
private final CallCredentials firestoreHeaders; | ||
|
||
/** | ||
* Helper function to globally override the channel that RPCs use. Useful for testing when you | ||
* want to bypass SSL certificate checking. | ||
|
@@ -69,24 +85,11 @@ public static void overrideChannelBuilder( | |
DatabaseInfo databaseInfo, | ||
CallCredentials firestoreHeaders) { | ||
this.asyncQueue = asyncQueue; | ||
this.context = context; | ||
this.databaseInfo = databaseInfo; | ||
this.firestoreHeaders = firestoreHeaders; | ||
|
||
// We execute network initialization on a separate thread to not block operations that depend on | ||
// the AsyncQueue. | ||
this.channelTask = | ||
Tasks.call( | ||
Executors.BACKGROUND_EXECUTOR, | ||
() -> { | ||
ManagedChannel channel = initChannel(context, databaseInfo); | ||
FirestoreGrpc.FirestoreStub firestoreStub = | ||
FirestoreGrpc.newStub(channel) | ||
.withCallCredentials(firestoreHeaders) | ||
// Ensure all callbacks are issued on the worker queue. If this call is | ||
// removed, all calls need to be audited to make sure they are executed on the | ||
// right thread. | ||
.withExecutor(asyncQueue.getExecutor()); | ||
callOptions = firestoreStub.getCallOptions(); | ||
return channel; | ||
}); | ||
initChannelTask(); | ||
} | ||
|
||
/** Sets up the SSL provider and configures the gRPC channel. */ | ||
|
@@ -198,4 +201,84 @@ void shutdown() { | |
Thread.currentThread().interrupt(); | ||
} | ||
} | ||
|
||
/** | ||
* Monitors the connectivity state of the gRPC channel and resets the channel when gRPC fails to | ||
* connect. | ||
* | ||
* <p>We currently cannot configure timeouts in connection attempts for gRPC | ||
* (https://github.com/grpc/grpc-java/issues/1943), and until they support doing so, the gRPC | ||
* connection can stay open for up to 2+ minutes before shutting down. | ||
* | ||
* <p>We start a timer when the channel enters ConnectivityState.CONNECTING. If the timer elapses, | ||
* we reset the channel by shutting it down and reinitializing the channelTask. Changes to the | ||
* connectivity state will clear the timer and start a new one-time listener for the next | ||
* ConnectivityState change. | ||
* | ||
* @param channel The channel to monitor the connectivity state of. | ||
*/ | ||
private void onConnectivityStateChange(ManagedChannel channel) { | ||
thebrianchen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ConnectivityState newState = channel.getState(true); | ||
Logger.debug(LOG_TAG, "Current gRPC connectivity state: " + newState); | ||
// Check that the new state is online, then cancel timer. | ||
if (newState == ConnectivityState.CONNECTING) { | ||
Logger.debug(LOG_TAG, "Setting the connectivityAttemptTimer"); | ||
hardAssert( | ||
connectivityAttemptTimer == null, | ||
"connectivityAttemptTimer should be null when setting a new timer."); | ||
thebrianchen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
connectivityAttemptTimer = | ||
thebrianchen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
asyncQueue.enqueueAfterDelay( | ||
TimerId.CONNECTIVITY_ATTEMPT_TIMER, | ||
CONNECTIVITY_ATTEMPT_TIMEOUT_MS, | ||
() -> { | ||
Logger.debug(LOG_TAG, "connectivityAttemptTimer elapsed. Resetting the channel."); | ||
clearConnectivityAttemptTimer(); | ||
resetChannel(channel); | ||
}); | ||
} else { | ||
// Clear the timer otherwise, so we don't end up with multiple connectivityAttemptTimers. | ||
clearConnectivityAttemptTimer(); | ||
} | ||
// Re-listen for next state change. | ||
channel.notifyWhenStateChanged( | ||
newState, () -> asyncQueue.enqueueAndForget(() -> onConnectivityStateChange(channel))); | ||
} | ||
|
||
private void resetChannel(ManagedChannel channel) { | ||
asyncQueue.enqueueAndForget( | ||
() -> { | ||
channel.shutdownNow(); | ||
initChannelTask(); | ||
}); | ||
} | ||
|
||
private void initChannelTask() { | ||
// We execute network initialization on a separate thread to not block operations that depend on | ||
// the AsyncQueue. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One potential concern with recreating the channel is it could be expensive. We're already doing it on a background thread for performance reasons, so it's possible this adds some meaningful delay. As part of your logging, I'd recommend logging the start/finish of this so that we can get timing data and see how long this takes each time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's taking around 15-40ms to reset the channel from what I've seen based on the logs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hrm. That is longer than I hoped. I don't know if it's a problem or not. I suggest getting input from Gil and gRPC folks (and perhaps point out this delay to gRPC and see if they have other suggestions for implementing a connection timeout). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW, we're spending an extra 40 ms to bring the reaction time on failed reconnects down from two minutes to 15 seconds. While 40 ms isn't exactly cheap it creates a big enough win that it seems worthwhile. Also note that this only kicks in when Android's own network transition logic isn't kicking in. That we're not being inundated with requests for this feature suggests that it's going to be fairly rare. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
this.channelTask = | ||
Tasks.call( | ||
Executors.BACKGROUND_EXECUTOR, | ||
() -> { | ||
ManagedChannel channel = initChannel(context, databaseInfo); | ||
onConnectivityStateChange(channel); | ||
FirestoreGrpc.FirestoreStub firestoreStub = | ||
FirestoreGrpc.newStub(channel) | ||
.withCallCredentials(firestoreHeaders) | ||
// Ensure all callbacks are issued on the worker queue. If this call is | ||
// removed, all calls need to be audited to make sure they are executed on the | ||
// right thread. | ||
.withExecutor(asyncQueue.getExecutor()); | ||
callOptions = firestoreStub.getCallOptions(); | ||
Logger.debug(LOG_TAG, "Channel successfully reset."); | ||
return channel; | ||
}); | ||
} | ||
|
||
private void clearConnectivityAttemptTimer() { | ||
if (connectivityAttemptTimer != null) { | ||
Logger.debug(LOG_TAG, "Clearing the connectivityAttemptTimer"); | ||
connectivityAttemptTimer.cancel(); | ||
connectivityAttemptTimer = null; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,15 +29,22 @@ public class ExponentialBackoff { | |
|
||
public static final double DEFAULT_BACKOFF_FACTOR = 1.5; | ||
|
||
/** Maximum backoff time in milliseconds */ | ||
public static final long DEFAULT_BACKOFF_MAX_DELAY_MS = 60 * 1000; | ||
|
||
private final AsyncQueue queue; | ||
private final TimerId timerId; | ||
private final long initialDelayMs; | ||
private final double backoffFactor; | ||
|
||
/** The maximum backoff time in milliseconds. */ | ||
private final long maxDelayMs; | ||
|
||
/** | ||
* The maximum backoff time used when calculating the next backoff. This value can be changed for | ||
* a single backoffAndRun call, after which it resets to maxDelayMs. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
*/ | ||
private long nextMaxDelayMs; | ||
|
||
private long currentBaseMs; | ||
private long lastAttemptTime; | ||
private DelayedTask timerTask; | ||
|
@@ -71,6 +78,7 @@ public ExponentialBackoff( | |
this.initialDelayMs = initialDelayMs; | ||
this.backoffFactor = backoffFactor; | ||
this.maxDelayMs = maxDelayMs; | ||
this.nextMaxDelayMs = maxDelayMs; | ||
this.lastAttemptTime = new Date().getTime(); | ||
|
||
reset(); | ||
|
@@ -100,7 +108,17 @@ public void reset() { | |
* Resets the backoff delay to the maximum delay (e.g. for use after a RESOURCE_EXHAUSTED error). | ||
*/ | ||
public void resetToMax() { | ||
currentBaseMs = maxDelayMs; | ||
currentBaseMs = nextMaxDelayMs; | ||
} | ||
|
||
/** | ||
* Set the backoff's maximum delay for only the next call to backoffAndRun, after which the delay | ||
* will be reset to maxDelayMs. | ||
* | ||
* @param newMax The temporary maximum delay to set. | ||
*/ | ||
public void setTemporaryMaxDelay(long newMax) { | ||
nextMaxDelayMs = newMax; | ||
} | ||
|
||
/** | ||
|
@@ -148,9 +166,12 @@ public void backoffAndRun(Runnable task) { | |
currentBaseMs = (long) (currentBaseMs * backoffFactor); | ||
if (currentBaseMs < initialDelayMs) { | ||
currentBaseMs = initialDelayMs; | ||
} else if (currentBaseMs > maxDelayMs) { | ||
currentBaseMs = maxDelayMs; | ||
} else if (currentBaseMs > nextMaxDelayMs) { | ||
currentBaseMs = nextMaxDelayMs; | ||
} | ||
|
||
// Reset max delay to the default. | ||
nextMaxDelayMs = maxDelayMs; | ||
} | ||
|
||
public void cancel() { | ||
|
Uh oh!
There was an error while loading. Please reload this page.