Skip to content

Commit 423e1a4

Browse files
authored
feat: long running transaction clean up background task. Adding configuration options for closing inactive transactions. (#2419)
1 parent 5a12cd2 commit 423e1a4

File tree

7 files changed

+1181
-30
lines changed

7 files changed

+1181
-30
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/SessionPool.java

+174-15
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import com.google.cloud.spanner.Options.TransactionOption;
5252
import com.google.cloud.spanner.Options.UpdateOption;
5353
import com.google.cloud.spanner.SessionClient.SessionConsumer;
54+
import com.google.cloud.spanner.SessionPoolOptions.InactiveTransactionRemovalOptions;
5455
import com.google.cloud.spanner.SpannerException.ResourceNotFoundException;
5556
import com.google.cloud.spanner.SpannerImpl.ClosedException;
5657
import com.google.common.annotations.VisibleForTesting;
@@ -1279,7 +1280,7 @@ public AsyncTransactionManager transactionManagerAsync(TransactionOption... opti
12791280
@Override
12801281
public long executePartitionedUpdate(Statement stmt, UpdateOption... options) {
12811282
try {
1282-
return get().executePartitionedUpdate(stmt, options);
1283+
return get(true).executePartitionedUpdate(stmt, options);
12831284
} finally {
12841285
close();
12851286
}
@@ -1332,6 +1333,10 @@ private PooledSession getOrNull() {
13321333

13331334
@Override
13341335
public PooledSession get() {
1336+
return get(false);
1337+
}
1338+
1339+
PooledSession get(final boolean eligibleForLongRunning) {
13351340
if (inUse.compareAndSet(false, true)) {
13361341
PooledSession res = null;
13371342
try {
@@ -1346,6 +1351,7 @@ public PooledSession get() {
13461351
incrementNumSessionsInUse();
13471352
checkedOutSessions.add(this);
13481353
}
1354+
res.eligibleForLongRunning = eligibleForLongRunning;
13491355
}
13501356
initialized.countDown();
13511357
}
@@ -1366,6 +1372,28 @@ final class PooledSession implements Session {
13661372
private volatile SpannerException lastException;
13671373
private volatile boolean allowReplacing = true;
13681374

1375+
/**
1376+
* Property to mark if the session is eligible to be long-running. This can only be true if the
1377+
* session is executing certain types of transactions (for ex - Partitioned DML) which can be
1378+
* long-running. By default, most transaction types are not expected to be long-running and
1379+
* hence this value is false.
1380+
*/
1381+
private volatile boolean eligibleForLongRunning = false;
1382+
1383+
/**
1384+
* Property to mark if the session is no longer part of the session pool. For ex - A session
1385+
* which is long-running gets cleaned up and removed from the pool.
1386+
*/
1387+
private volatile boolean isRemovedFromPool = false;
1388+
1389+
/**
1390+
* Property to mark if a leaked session exception is already logged. Given a session maintainer
1391+
* thread runs repeatedly at a defined interval, this property allows us to ensure that an
1392+
* exception is logged only once per leaked session. This is to avoid noisy repeated logs around
1393+
* session leaks for long-running sessions.
1394+
*/
1395+
private volatile boolean isLeakedExceptionLogged = false;
1396+
13691397
@GuardedBy("lock")
13701398
private SessionState state;
13711399

@@ -1385,6 +1413,11 @@ void setAllowReplacing(boolean allowReplacing) {
13851413
this.allowReplacing = allowReplacing;
13861414
}
13871415

1416+
@VisibleForTesting
1417+
void setEligibleForLongRunning(boolean eligibleForLongRunning) {
1418+
this.eligibleForLongRunning = eligibleForLongRunning;
1419+
}
1420+
13881421
@Override
13891422
public Timestamp write(Iterable<Mutation> mutations) throws SpannerException {
13901423
return writeWithOptions(mutations).getCommitTimestamp();
@@ -1485,7 +1518,7 @@ public void close() {
14851518
numSessionsInUse--;
14861519
numSessionsReleased++;
14871520
}
1488-
if (lastException != null && isSessionNotFound(lastException)) {
1521+
if ((lastException != null && isSessionNotFound(lastException)) || isRemovedFromPool) {
14891522
invalidateSession(this);
14901523
} else {
14911524
if (lastException != null && isDatabaseOrInstanceNotFound(lastException)) {
@@ -1499,6 +1532,7 @@ public void close() {
14991532
}
15001533
}
15011534
lastException = null;
1535+
isRemovedFromPool = false;
15021536
if (state != SessionState.CLOSING) {
15031537
state = SessionState.AVAILABLE;
15041538
}
@@ -1651,6 +1685,10 @@ private PooledSession pollUninterruptiblyWithTimeout(long timeoutMillis) {
16511685
* <li>Keeps alive sessions that have not been used for a user configured time in order to keep
16521686
* MinSessions sessions alive in the pool at any time. The keep-alive traffic is smeared out
16531687
* over a window of 10 minutes to avoid bursty traffic.
1688+
* <li>Removes unexpected long running transactions from the pool. Only certain transaction
1689+
* types (for ex - Partitioned DML / Batch Reads) can be long running. This tasks checks the
1690+
* sessions which have been inactive for a longer than usual duration (for ex - 60 minutes)
1691+
* and removes such sessions from the pool.
16541692
* </ul>
16551693
*/
16561694
final class PoolMaintainer {
@@ -1659,16 +1697,24 @@ final class PoolMaintainer {
16591697
private final Duration windowLength = Duration.ofMillis(TimeUnit.MINUTES.toMillis(10));
16601698
// Frequency of the timer loop.
16611699
@VisibleForTesting final long loopFrequency = options.getLoopFrequency();
1662-
// Number of loop iterations in which we need to to close all the sessions waiting for closure.
1700+
// Number of loop iterations in which we need to close all the sessions waiting for closure.
16631701
@VisibleForTesting final long numClosureCycles = windowLength.toMillis() / loopFrequency;
16641702
private final Duration keepAliveMillis =
16651703
Duration.ofMillis(TimeUnit.MINUTES.toMillis(options.getKeepAliveIntervalMinutes()));
16661704
// Number of loop iterations in which we need to keep alive all the sessions
16671705
@VisibleForTesting final long numKeepAliveCycles = keepAliveMillis.toMillis() / loopFrequency;
16681706

1669-
Instant lastResetTime = Instant.ofEpochMilli(0);
1670-
int numSessionsToClose = 0;
1671-
int sessionsToClosePerLoop = 0;
1707+
/**
1708+
* Variable maintaining the last execution time of the long-running transaction cleanup task.
1709+
*
1710+
* <p>The long-running transaction cleanup needs to be performed every X minutes. The X minutes
1711+
* recurs multiple times within the invocation of the pool maintainer thread. For ex - If the
1712+
* main thread runs every 10s and the long-running transaction clean-up needs to be performed
1713+
* every 2 minutes, then we need to keep a track of when was the last time that this task
1714+
* executed and makes sure we only execute it every 2 minutes and not every 10 seconds.
1715+
*/
1716+
@VisibleForTesting Instant lastExecutionTime;
1717+
16721718
boolean closed = false;
16731719

16741720
@GuardedBy("lock")
@@ -1678,6 +1724,7 @@ final class PoolMaintainer {
16781724
boolean running;
16791725

16801726
void init() {
1727+
lastExecutionTime = clock.instant();
16811728
// Scheduled pool maintenance worker.
16821729
synchronized (lock) {
16831730
scheduledFuture =
@@ -1723,6 +1770,7 @@ void maintainPool() {
17231770
decrementPendingClosures(1);
17241771
}
17251772
}
1773+
removeLongRunningSessions(currTime);
17261774
}
17271775

17281776
private void removeIdleSessions(Instant currTime) {
@@ -1736,7 +1784,13 @@ private void removeIdleSessions(Instant currTime) {
17361784
PooledSession session = iterator.next();
17371785
if (session.lastUseTime.isBefore(minLastUseTime)) {
17381786
if (session.state != SessionState.CLOSING) {
1739-
removeFromPool(session);
1787+
boolean isRemoved = removeFromPool(session);
1788+
if (isRemoved) {
1789+
numIdleSessionsRemoved++;
1790+
if (idleSessionRemovedListener != null) {
1791+
idleSessionRemovedListener.apply(session);
1792+
}
1793+
}
17401794
iterator.remove();
17411795
}
17421796
}
@@ -1792,6 +1846,87 @@ private void replenishPool() {
17921846
}
17931847
}
17941848
}
1849+
1850+
// cleans up sessions which are unexpectedly long-running.
1851+
void removeLongRunningSessions(Instant currentTime) {
1852+
try {
1853+
if (SessionPool.this.isClosed()) {
1854+
return;
1855+
}
1856+
final InactiveTransactionRemovalOptions inactiveTransactionRemovalOptions =
1857+
options.getInactiveTransactionRemovalOptions();
1858+
final Instant minExecutionTime =
1859+
lastExecutionTime.plus(inactiveTransactionRemovalOptions.getExecutionFrequency());
1860+
if (currentTime.isBefore(minExecutionTime)) {
1861+
return;
1862+
}
1863+
lastExecutionTime = currentTime; // update this only after we have decided to execute task
1864+
if (options.closeInactiveTransactions()
1865+
|| options.warnInactiveTransactions()
1866+
|| options.warnAndCloseInactiveTransactions()) {
1867+
removeLongRunningSessions(currentTime, inactiveTransactionRemovalOptions);
1868+
}
1869+
} catch (final Throwable t) {
1870+
logger.log(Level.WARNING, "Failed removing long running transactions", t);
1871+
}
1872+
}
1873+
1874+
private void removeLongRunningSessions(
1875+
final Instant currentTime,
1876+
final InactiveTransactionRemovalOptions inactiveTransactionRemovalOptions) {
1877+
synchronized (lock) {
1878+
final double usedSessionsRatio = getRatioOfSessionsInUse();
1879+
if (usedSessionsRatio > inactiveTransactionRemovalOptions.getUsedSessionsRatioThreshold()) {
1880+
Iterator<PooledSessionFuture> iterator = checkedOutSessions.iterator();
1881+
while (iterator.hasNext()) {
1882+
final PooledSessionFuture sessionFuture = iterator.next();
1883+
// the below get() call on future object is non-blocking since checkedOutSessions
1884+
// collection is populated only when the get() method in {@code PooledSessionFuture} is
1885+
// called.
1886+
final PooledSession session = sessionFuture.get();
1887+
final Duration durationFromLastUse = Duration.between(session.lastUseTime, currentTime);
1888+
if (!session.eligibleForLongRunning
1889+
&& durationFromLastUse.compareTo(
1890+
inactiveTransactionRemovalOptions.getIdleTimeThreshold())
1891+
> 0) {
1892+
if ((options.warnInactiveTransactions() || options.warnAndCloseInactiveTransactions())
1893+
&& !session.isLeakedExceptionLogged) {
1894+
if (options.warnAndCloseInactiveTransactions()) {
1895+
logger.log(
1896+
Level.WARNING,
1897+
String.format("Removing long-running session => %s", session.getName()),
1898+
sessionFuture.leakedException);
1899+
session.isLeakedExceptionLogged = true;
1900+
} else if (options.warnInactiveTransactions()) {
1901+
logger.log(
1902+
Level.WARNING,
1903+
String.format(
1904+
"Detected long-running session => %s. To automatically remove "
1905+
+ "long-running sessions, set SessionOption ActionOnInactiveTransaction "
1906+
+ "to WARN_AND_CLOSE by invoking setWarnAndCloseIfInactiveTransactions() method.",
1907+
session.getName()),
1908+
sessionFuture.leakedException);
1909+
session.isLeakedExceptionLogged = true;
1910+
}
1911+
}
1912+
if ((options.closeInactiveTransactions()
1913+
|| options.warnAndCloseInactiveTransactions())
1914+
&& session.state != SessionState.CLOSING) {
1915+
final boolean isRemoved = removeFromPool(session);
1916+
if (isRemoved) {
1917+
session.isRemovedFromPool = true;
1918+
numLeakedSessionsRemoved++;
1919+
if (longRunningSessionRemovedListener != null) {
1920+
longRunningSessionRemovedListener.apply(session);
1921+
}
1922+
}
1923+
iterator.remove();
1924+
}
1925+
}
1926+
}
1927+
}
1928+
}
1929+
}
17951930
}
17961931

17971932
private enum Position {
@@ -1872,6 +2007,9 @@ private enum Position {
18722007
@GuardedBy("lock")
18732008
private long numIdleSessionsRemoved = 0;
18742009

2010+
@GuardedBy("lock")
2011+
private long numLeakedSessionsRemoved = 0;
2012+
18752013
private AtomicLong numWaiterTimeouts = new AtomicLong();
18762014

18772015
@GuardedBy("lock")
@@ -1885,6 +2023,8 @@ private enum Position {
18852023

18862024
@VisibleForTesting Function<PooledSession, Void> idleSessionRemovedListener;
18872025

2026+
@VisibleForTesting Function<PooledSession, Void> longRunningSessionRemovedListener;
2027+
18882028
private final CountDownLatch waitOnMinSessionsLatch;
18892029

18902030
/**
@@ -1895,12 +2035,16 @@ private enum Position {
18952035
*/
18962036
static SessionPool createPool(
18972037
SpannerOptions spannerOptions, SessionClient sessionClient, List<LabelValue> labelValues) {
2038+
final SessionPoolOptions sessionPoolOptions = spannerOptions.getSessionPoolOptions();
2039+
2040+
// A clock instance is passed in {@code SessionPoolOptions} in order to allow mocking via tests.
2041+
final Clock poolMaintainerClock = sessionPoolOptions.getPoolMaintainerClock();
18982042
return createPool(
1899-
spannerOptions.getSessionPoolOptions(),
2043+
sessionPoolOptions,
19002044
spannerOptions.getDatabaseRole(),
19012045
((GrpcTransportOptions) spannerOptions.getTransportOptions()).getExecutorFactory(),
19022046
sessionClient,
1903-
new Clock(),
2047+
poolMaintainerClock == null ? new Clock() : poolMaintainerClock,
19042048
Metrics.getMetricRegistry(),
19052049
labelValues);
19062050
}
@@ -2015,18 +2159,26 @@ int getNumberOfSessionsInUse() {
20152159
}
20162160
}
20172161

2018-
void removeFromPool(PooledSession session) {
2162+
@VisibleForTesting
2163+
double getRatioOfSessionsInUse() {
2164+
synchronized (lock) {
2165+
final int maxSessions = options.getMaxSessions();
2166+
if (maxSessions == 0) {
2167+
return 0;
2168+
}
2169+
return (double) numSessionsInUse / maxSessions;
2170+
}
2171+
}
2172+
2173+
boolean removeFromPool(PooledSession session) {
20192174
synchronized (lock) {
20202175
if (isClosed()) {
20212176
decrementPendingClosures(1);
2022-
return;
2177+
return false;
20232178
}
20242179
session.markClosing();
20252180
allSessions.remove(session);
2026-
numIdleSessionsRemoved++;
2027-
}
2028-
if (idleSessionRemovedListener != null) {
2029-
idleSessionRemovedListener.apply(session);
2181+
return true;
20302182
}
20312183
}
20322184

@@ -2036,6 +2188,13 @@ long numIdleSessionsRemoved() {
20362188
}
20372189
}
20382190

2191+
@VisibleForTesting
2192+
long numLeakedSessionsRemoved() {
2193+
synchronized (lock) {
2194+
return numLeakedSessionsRemoved;
2195+
}
2196+
}
2197+
20392198
@VisibleForTesting
20402199
int getNumberOfSessionsInPool() {
20412200
synchronized (lock) {

0 commit comments

Comments
 (0)