Skip to content

Commit fa9a8c7

Browse files
author
Brian Chen
authored
Fix Android Connectivity Monitor (#1045)
1 parent e114571 commit fa9a8c7

File tree

4 files changed

+139
-25
lines changed

4 files changed

+139
-25
lines changed

firebase-firestore/src/main/java/com/google/firebase/firestore/remote/AbstractStream.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,12 @@ public void run() {
174174
/** The time a stream stays open after it is marked idle. */
175175
private static final long IDLE_TIMEOUT_MS = TimeUnit.MINUTES.toMillis(1);
176176

177+
/**
178+
* Maximum backoff time for reconnecting when we know the connection is failed on the client-side.
179+
*/
180+
private static final long BACKOFF_CLIENT_NETWORK_FAILURE_MAX_DELAY_MS =
181+
TimeUnit.SECONDS.toMillis(10);
182+
177183
@Nullable private DelayedTask idleTimer;
178184

179185
private final FirestoreChannel firestoreChannel;
@@ -290,18 +296,23 @@ private void close(State finalState, Status status) {
290296
if (code == Code.OK) {
291297
// If this is an intentional close ensure we don't delay our next connection attempt.
292298
backoff.reset();
293-
294299
} else if (code == Code.RESOURCE_EXHAUSTED) {
295300
Logger.debug(
296301
getClass().getSimpleName(),
297302
"(%x) Using maximum backoff delay to prevent overloading the backend.",
298303
System.identityHashCode(this));
299304
backoff.resetToMax();
300-
301305
} else if (code == Code.UNAUTHENTICATED) {
302306
// "unauthenticated" error means the token was rejected. Try force refreshing it in case it
303307
// just expired.
304308
firestoreChannel.invalidateToken();
309+
} else if (code == Code.UNAVAILABLE) {
310+
// This exception is thrown when the gRPC connection fails on the client side, To shorten
311+
// reconnect time, we can use a shorter max delay when reconnecting.
312+
if (status.getCause() instanceof java.net.UnknownHostException
313+
|| status.getCause() instanceof java.net.ConnectException) {
314+
backoff.setTemporaryMaxDelay(BACKOFF_CLIENT_NETWORK_FAILURE_MAX_DELAY_MS);
315+
}
305316
}
306317

307318
if (finalState != State.Error) {

firebase-firestore/src/main/java/com/google/firebase/firestore/remote/GrpcCallProvider.java

Lines changed: 95 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@
2323
import com.google.android.gms.tasks.Tasks;
2424
import com.google.firebase.firestore.core.DatabaseInfo;
2525
import com.google.firebase.firestore.util.AsyncQueue;
26+
import com.google.firebase.firestore.util.AsyncQueue.DelayedTask;
27+
import com.google.firebase.firestore.util.AsyncQueue.TimerId;
2628
import com.google.firebase.firestore.util.Executors;
2729
import com.google.firebase.firestore.util.Logger;
2830
import com.google.firebase.firestore.util.Supplier;
2931
import com.google.firestore.v1.FirestoreGrpc;
3032
import io.grpc.CallCredentials;
3133
import io.grpc.CallOptions;
3234
import io.grpc.ClientCall;
35+
import io.grpc.ConnectivityState;
3336
import io.grpc.ManagedChannel;
3437
import io.grpc.ManagedChannelBuilder;
3538
import io.grpc.MethodDescriptor;
@@ -45,11 +48,22 @@ public class GrpcCallProvider {
4548

4649
private static Supplier<ManagedChannelBuilder<?>> overrideChannelBuilderSupplier;
4750

48-
private final Task<ManagedChannel> channelTask;
51+
private Task<ManagedChannel> channelTask;
4952
private final AsyncQueue asyncQueue;
5053

5154
private CallOptions callOptions;
5255

56+
// This timeout is used when attempting to establish a connection in gRPC. If a connection attempt
57+
// does not succeed in CONNECTIVITY_ATTEMPT_TIMEOUT_MS, we restart the channel and try
58+
// reconnecting again, rather than waiting up to 2+ minutes for gRPC to timeout.
59+
// More details about usage can be found in GrpcCallProvider.onConnectivityStateChanged().
60+
private static final int CONNECTIVITY_ATTEMPT_TIMEOUT_MS = 15 * 1000;
61+
private DelayedTask connectivityAttemptTimer;
62+
63+
private final Context context;
64+
private final DatabaseInfo databaseInfo;
65+
private final CallCredentials firestoreHeaders;
66+
5367
/**
5468
* Helper function to globally override the channel that RPCs use. Useful for testing when you
5569
* want to bypass SSL certificate checking.
@@ -69,24 +83,11 @@ public static void overrideChannelBuilder(
6983
DatabaseInfo databaseInfo,
7084
CallCredentials firestoreHeaders) {
7185
this.asyncQueue = asyncQueue;
86+
this.context = context;
87+
this.databaseInfo = databaseInfo;
88+
this.firestoreHeaders = firestoreHeaders;
7289

73-
// We execute network initialization on a separate thread to not block operations that depend on
74-
// the AsyncQueue.
75-
this.channelTask =
76-
Tasks.call(
77-
Executors.BACKGROUND_EXECUTOR,
78-
() -> {
79-
ManagedChannel channel = initChannel(context, databaseInfo);
80-
FirestoreGrpc.FirestoreStub firestoreStub =
81-
FirestoreGrpc.newStub(channel)
82-
.withCallCredentials(firestoreHeaders)
83-
// Ensure all callbacks are issued on the worker queue. If this call is
84-
// removed, all calls need to be audited to make sure they are executed on the
85-
// right thread.
86-
.withExecutor(asyncQueue.getExecutor());
87-
callOptions = firestoreStub.getCallOptions();
88-
return channel;
89-
});
90+
initChannelTask();
9091
}
9192

9293
/** Sets up the SSL provider and configures the gRPC channel. */
@@ -198,4 +199,80 @@ void shutdown() {
198199
Thread.currentThread().interrupt();
199200
}
200201
}
202+
203+
/**
204+
* Monitors the connectivity state of the gRPC channel and resets the channel when gRPC fails to
205+
* connect.
206+
*
207+
* <p>We currently cannot configure timeouts in connection attempts for gRPC
208+
* (https://github.com/grpc/grpc-java/issues/1943), and until they support doing so, the gRPC
209+
* connection can stay open for up to 2+ minutes before notifying us that it has shut down.
210+
*
211+
* <p>We start a timer when the channel enters ConnectivityState.CONNECTING. If the timer elapses,
212+
* we reset the channel by shutting it down and reinitializing the channelTask. Changes to the
213+
* connectivity state will clear the timer and start a new one-time listener for the next
214+
* ConnectivityState change.
215+
*
216+
* @param channel The channel to monitor the connectivity state of.
217+
*/
218+
private void onConnectivityStateChange(ManagedChannel channel) {
219+
ConnectivityState newState = channel.getState(true);
220+
Logger.debug(LOG_TAG, "Current gRPC connectivity state: " + newState);
221+
// Clear the timer, so we don't end up with multiple connectivityAttemptTimers.
222+
clearConnectivityAttemptTimer();
223+
224+
if (newState == ConnectivityState.CONNECTING) {
225+
Logger.debug(LOG_TAG, "Setting the connectivityAttemptTimer");
226+
connectivityAttemptTimer =
227+
asyncQueue.enqueueAfterDelay(
228+
TimerId.CONNECTIVITY_ATTEMPT_TIMER,
229+
CONNECTIVITY_ATTEMPT_TIMEOUT_MS,
230+
() -> {
231+
Logger.debug(LOG_TAG, "connectivityAttemptTimer elapsed. Resetting the channel.");
232+
clearConnectivityAttemptTimer();
233+
resetChannel(channel);
234+
});
235+
}
236+
// Re-listen for next state change.
237+
channel.notifyWhenStateChanged(
238+
newState, () -> asyncQueue.enqueueAndForget(() -> onConnectivityStateChange(channel)));
239+
}
240+
241+
private void resetChannel(ManagedChannel channel) {
242+
asyncQueue.enqueueAndForget(
243+
() -> {
244+
channel.shutdownNow();
245+
initChannelTask();
246+
});
247+
}
248+
249+
private void initChannelTask() {
250+
// We execute network initialization on a separate thread to not block operations that depend on
251+
// the AsyncQueue.
252+
this.channelTask =
253+
Tasks.call(
254+
Executors.BACKGROUND_EXECUTOR,
255+
() -> {
256+
ManagedChannel channel = initChannel(context, databaseInfo);
257+
onConnectivityStateChange(channel);
258+
FirestoreGrpc.FirestoreStub firestoreStub =
259+
FirestoreGrpc.newStub(channel)
260+
.withCallCredentials(firestoreHeaders)
261+
// Ensure all callbacks are issued on the worker queue. If this call is
262+
// removed, all calls need to be audited to make sure they are executed on the
263+
// right thread.
264+
.withExecutor(asyncQueue.getExecutor());
265+
callOptions = firestoreStub.getCallOptions();
266+
Logger.debug(LOG_TAG, "Channel successfully reset.");
267+
return channel;
268+
});
269+
}
270+
271+
private void clearConnectivityAttemptTimer() {
272+
if (connectivityAttemptTimer != null) {
273+
Logger.debug(LOG_TAG, "Clearing the connectivityAttemptTimer");
274+
connectivityAttemptTimer.cancel();
275+
connectivityAttemptTimer = null;
276+
}
277+
}
201278
}

firebase-firestore/src/main/java/com/google/firebase/firestore/util/AsyncQueue.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ public enum TimerId {
7474
* A timer used to retry transactions. Since there can be multiple concurrent transactions,
7575
* multiple of these may be in the queue at a given time.
7676
*/
77-
RETRY_TRANSACTION
77+
RETRY_TRANSACTION,
78+
/**
79+
* A timer used to monitor when a connection attempt in gRPC is unsuccessful and retry
80+
* accordingly.
81+
*/
82+
CONNECTIVITY_ATTEMPT_TIMER
7883
}
7984

8085
/**

firebase-firestore/src/main/java/com/google/firebase/firestore/util/ExponentialBackoff.java

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,22 @@ public class ExponentialBackoff {
2929

3030
public static final double DEFAULT_BACKOFF_FACTOR = 1.5;
3131

32-
/** Maximum backoff time in milliseconds */
3332
public static final long DEFAULT_BACKOFF_MAX_DELAY_MS = 60 * 1000;
3433

3534
private final AsyncQueue queue;
3635
private final TimerId timerId;
3736
private final long initialDelayMs;
3837
private final double backoffFactor;
38+
39+
/** The maximum backoff time in milliseconds. */
3940
private final long maxDelayMs;
4041

42+
/**
43+
* The maximum backoff time used when calculating the next backoff. This value can be changed for
44+
* a single backoffAndRun call, after which it resets to maxDelayMs.
45+
*/
46+
private long nextMaxDelayMs;
47+
4148
private long currentBaseMs;
4249
private long lastAttemptTime;
4350
private DelayedTask timerTask;
@@ -71,6 +78,7 @@ public ExponentialBackoff(
7178
this.initialDelayMs = initialDelayMs;
7279
this.backoffFactor = backoffFactor;
7380
this.maxDelayMs = maxDelayMs;
81+
this.nextMaxDelayMs = maxDelayMs;
7482
this.lastAttemptTime = new Date().getTime();
7583

7684
reset();
@@ -100,7 +108,17 @@ public void reset() {
100108
* Resets the backoff delay to the maximum delay (e.g. for use after a RESOURCE_EXHAUSTED error).
101109
*/
102110
public void resetToMax() {
103-
currentBaseMs = maxDelayMs;
111+
currentBaseMs = nextMaxDelayMs;
112+
}
113+
114+
/**
115+
* Set the backoff's maximum delay for only the next call to backoffAndRun, after which the delay
116+
* will be reset to maxDelayMs.
117+
*
118+
* @param newMax The temporary maximum delay to set.
119+
*/
120+
public void setTemporaryMaxDelay(long newMax) {
121+
nextMaxDelayMs = newMax;
104122
}
105123

106124
/**
@@ -148,9 +166,12 @@ public void backoffAndRun(Runnable task) {
148166
currentBaseMs = (long) (currentBaseMs * backoffFactor);
149167
if (currentBaseMs < initialDelayMs) {
150168
currentBaseMs = initialDelayMs;
151-
} else if (currentBaseMs > maxDelayMs) {
152-
currentBaseMs = maxDelayMs;
169+
} else if (currentBaseMs > nextMaxDelayMs) {
170+
currentBaseMs = nextMaxDelayMs;
153171
}
172+
173+
// Reset max delay to the default.
174+
nextMaxDelayMs = maxDelayMs;
154175
}
155176

156177
public void cancel() {

0 commit comments

Comments
 (0)