51
51
import com .google .cloud .spanner .Options .TransactionOption ;
52
52
import com .google .cloud .spanner .Options .UpdateOption ;
53
53
import com .google .cloud .spanner .SessionClient .SessionConsumer ;
54
+ import com .google .cloud .spanner .SessionPoolOptions .InactiveTransactionRemovalOptions ;
54
55
import com .google .cloud .spanner .SpannerException .ResourceNotFoundException ;
55
56
import com .google .cloud .spanner .SpannerImpl .ClosedException ;
56
57
import com .google .common .annotations .VisibleForTesting ;
@@ -1279,7 +1280,7 @@ public AsyncTransactionManager transactionManagerAsync(TransactionOption... opti
1279
1280
@ Override
1280
1281
public long executePartitionedUpdate (Statement stmt , UpdateOption ... options ) {
1281
1282
try {
1282
- return get ().executePartitionedUpdate (stmt , options );
1283
+ return get (true ).executePartitionedUpdate (stmt , options );
1283
1284
} finally {
1284
1285
close ();
1285
1286
}
@@ -1332,6 +1333,10 @@ private PooledSession getOrNull() {
1332
1333
1333
1334
@ Override
1334
1335
public PooledSession get () {
1336
+ return get (false );
1337
+ }
1338
+
1339
+ PooledSession get (final boolean eligibleForLongRunning ) {
1335
1340
if (inUse .compareAndSet (false , true )) {
1336
1341
PooledSession res = null ;
1337
1342
try {
@@ -1346,6 +1351,7 @@ public PooledSession get() {
1346
1351
incrementNumSessionsInUse ();
1347
1352
checkedOutSessions .add (this );
1348
1353
}
1354
+ res .eligibleForLongRunning = eligibleForLongRunning ;
1349
1355
}
1350
1356
initialized .countDown ();
1351
1357
}
@@ -1366,6 +1372,28 @@ final class PooledSession implements Session {
1366
1372
private volatile SpannerException lastException ;
1367
1373
private volatile boolean allowReplacing = true ;
1368
1374
1375
+ /**
1376
+ * Property to mark if the session is eligible to be long-running. This can only be true if the
1377
+ * session is executing certain types of transactions (for ex - Partitioned DML) which can be
1378
+ * long-running. By default, most transaction types are not expected to be long-running and
1379
+ * hence this value is false.
1380
+ */
1381
+ private volatile boolean eligibleForLongRunning = false ;
1382
+
1383
+ /**
1384
+ * Property to mark if the session is no longer part of the session pool. For ex - A session
1385
+ * which is long-running gets cleaned up and removed from the pool.
1386
+ */
1387
+ private volatile boolean isRemovedFromPool = false ;
1388
+
1389
+ /**
1390
+ * Property to mark if a leaked session exception is already logged. Given a session maintainer
1391
+ * thread runs repeatedly at a defined interval, this property allows us to ensure that an
1392
+ * exception is logged only once per leaked session. This is to avoid noisy repeated logs around
1393
+ * session leaks for long-running sessions.
1394
+ */
1395
+ private volatile boolean isLeakedExceptionLogged = false ;
1396
+
1369
1397
@ GuardedBy ("lock" )
1370
1398
private SessionState state ;
1371
1399
@@ -1385,6 +1413,11 @@ void setAllowReplacing(boolean allowReplacing) {
1385
1413
this .allowReplacing = allowReplacing ;
1386
1414
}
1387
1415
1416
+ @ VisibleForTesting
1417
+ void setEligibleForLongRunning (boolean eligibleForLongRunning ) {
1418
+ this .eligibleForLongRunning = eligibleForLongRunning ;
1419
+ }
1420
+
1388
1421
@ Override
1389
1422
public Timestamp write (Iterable <Mutation > mutations ) throws SpannerException {
1390
1423
return writeWithOptions (mutations ).getCommitTimestamp ();
@@ -1485,7 +1518,7 @@ public void close() {
1485
1518
numSessionsInUse --;
1486
1519
numSessionsReleased ++;
1487
1520
}
1488
- if (lastException != null && isSessionNotFound (lastException )) {
1521
+ if (( lastException != null && isSessionNotFound (lastException )) || isRemovedFromPool ) {
1489
1522
invalidateSession (this );
1490
1523
} else {
1491
1524
if (lastException != null && isDatabaseOrInstanceNotFound (lastException )) {
@@ -1499,6 +1532,7 @@ public void close() {
1499
1532
}
1500
1533
}
1501
1534
lastException = null ;
1535
+ isRemovedFromPool = false ;
1502
1536
if (state != SessionState .CLOSING ) {
1503
1537
state = SessionState .AVAILABLE ;
1504
1538
}
@@ -1651,6 +1685,10 @@ private PooledSession pollUninterruptiblyWithTimeout(long timeoutMillis) {
1651
1685
* <li>Keeps alive sessions that have not been used for a user configured time in order to keep
1652
1686
* MinSessions sessions alive in the pool at any time. The keep-alive traffic is smeared out
1653
1687
* over a window of 10 minutes to avoid bursty traffic.
1688
+ * <li>Removes unexpected long running transactions from the pool. Only certain transaction
1689
+ * types (for ex - Partitioned DML / Batch Reads) can be long running. This tasks checks the
1690
+ * sessions which have been inactive for a longer than usual duration (for ex - 60 minutes)
1691
+ * and removes such sessions from the pool.
1654
1692
* </ul>
1655
1693
*/
1656
1694
final class PoolMaintainer {
@@ -1659,16 +1697,24 @@ final class PoolMaintainer {
1659
1697
private final Duration windowLength = Duration .ofMillis (TimeUnit .MINUTES .toMillis (10 ));
1660
1698
// Frequency of the timer loop.
1661
1699
@ VisibleForTesting final long loopFrequency = options .getLoopFrequency ();
1662
- // Number of loop iterations in which we need to to close all the sessions waiting for closure.
1700
+ // Number of loop iterations in which we need to close all the sessions waiting for closure.
1663
1701
@ VisibleForTesting final long numClosureCycles = windowLength .toMillis () / loopFrequency ;
1664
1702
private final Duration keepAliveMillis =
1665
1703
Duration .ofMillis (TimeUnit .MINUTES .toMillis (options .getKeepAliveIntervalMinutes ()));
1666
1704
// Number of loop iterations in which we need to keep alive all the sessions
1667
1705
@ VisibleForTesting final long numKeepAliveCycles = keepAliveMillis .toMillis () / loopFrequency ;
1668
1706
1669
- Instant lastResetTime = Instant .ofEpochMilli (0 );
1670
- int numSessionsToClose = 0 ;
1671
- int sessionsToClosePerLoop = 0 ;
1707
+ /**
1708
+ * Variable maintaining the last execution time of the long-running transaction cleanup task.
1709
+ *
1710
+ * <p>The long-running transaction cleanup needs to be performed every X minutes. The X minutes
1711
+ * recurs multiple times within the invocation of the pool maintainer thread. For ex - If the
1712
+ * main thread runs every 10s and the long-running transaction clean-up needs to be performed
1713
+ * every 2 minutes, then we need to keep a track of when was the last time that this task
1714
+ * executed and makes sure we only execute it every 2 minutes and not every 10 seconds.
1715
+ */
1716
+ @ VisibleForTesting Instant lastExecutionTime ;
1717
+
1672
1718
boolean closed = false ;
1673
1719
1674
1720
@ GuardedBy ("lock" )
@@ -1678,6 +1724,7 @@ final class PoolMaintainer {
1678
1724
boolean running ;
1679
1725
1680
1726
void init () {
1727
+ lastExecutionTime = clock .instant ();
1681
1728
// Scheduled pool maintenance worker.
1682
1729
synchronized (lock ) {
1683
1730
scheduledFuture =
@@ -1723,6 +1770,7 @@ void maintainPool() {
1723
1770
decrementPendingClosures (1 );
1724
1771
}
1725
1772
}
1773
+ removeLongRunningSessions (currTime );
1726
1774
}
1727
1775
1728
1776
private void removeIdleSessions (Instant currTime ) {
@@ -1736,7 +1784,13 @@ private void removeIdleSessions(Instant currTime) {
1736
1784
PooledSession session = iterator .next ();
1737
1785
if (session .lastUseTime .isBefore (minLastUseTime )) {
1738
1786
if (session .state != SessionState .CLOSING ) {
1739
- removeFromPool (session );
1787
+ boolean isRemoved = removeFromPool (session );
1788
+ if (isRemoved ) {
1789
+ numIdleSessionsRemoved ++;
1790
+ if (idleSessionRemovedListener != null ) {
1791
+ idleSessionRemovedListener .apply (session );
1792
+ }
1793
+ }
1740
1794
iterator .remove ();
1741
1795
}
1742
1796
}
@@ -1792,6 +1846,87 @@ private void replenishPool() {
1792
1846
}
1793
1847
}
1794
1848
}
1849
+
1850
+ // cleans up sessions which are unexpectedly long-running.
1851
+ void removeLongRunningSessions (Instant currentTime ) {
1852
+ try {
1853
+ if (SessionPool .this .isClosed ()) {
1854
+ return ;
1855
+ }
1856
+ final InactiveTransactionRemovalOptions inactiveTransactionRemovalOptions =
1857
+ options .getInactiveTransactionRemovalOptions ();
1858
+ final Instant minExecutionTime =
1859
+ lastExecutionTime .plus (inactiveTransactionRemovalOptions .getExecutionFrequency ());
1860
+ if (currentTime .isBefore (minExecutionTime )) {
1861
+ return ;
1862
+ }
1863
+ lastExecutionTime = currentTime ; // update this only after we have decided to execute task
1864
+ if (options .closeInactiveTransactions ()
1865
+ || options .warnInactiveTransactions ()
1866
+ || options .warnAndCloseInactiveTransactions ()) {
1867
+ removeLongRunningSessions (currentTime , inactiveTransactionRemovalOptions );
1868
+ }
1869
+ } catch (final Throwable t ) {
1870
+ logger .log (Level .WARNING , "Failed removing long running transactions" , t );
1871
+ }
1872
+ }
1873
+
1874
+ private void removeLongRunningSessions (
1875
+ final Instant currentTime ,
1876
+ final InactiveTransactionRemovalOptions inactiveTransactionRemovalOptions ) {
1877
+ synchronized (lock ) {
1878
+ final double usedSessionsRatio = getRatioOfSessionsInUse ();
1879
+ if (usedSessionsRatio > inactiveTransactionRemovalOptions .getUsedSessionsRatioThreshold ()) {
1880
+ Iterator <PooledSessionFuture > iterator = checkedOutSessions .iterator ();
1881
+ while (iterator .hasNext ()) {
1882
+ final PooledSessionFuture sessionFuture = iterator .next ();
1883
+ // the below get() call on future object is non-blocking since checkedOutSessions
1884
+ // collection is populated only when the get() method in {@code PooledSessionFuture} is
1885
+ // called.
1886
+ final PooledSession session = sessionFuture .get ();
1887
+ final Duration durationFromLastUse = Duration .between (session .lastUseTime , currentTime );
1888
+ if (!session .eligibleForLongRunning
1889
+ && durationFromLastUse .compareTo (
1890
+ inactiveTransactionRemovalOptions .getIdleTimeThreshold ())
1891
+ > 0 ) {
1892
+ if ((options .warnInactiveTransactions () || options .warnAndCloseInactiveTransactions ())
1893
+ && !session .isLeakedExceptionLogged ) {
1894
+ if (options .warnAndCloseInactiveTransactions ()) {
1895
+ logger .log (
1896
+ Level .WARNING ,
1897
+ String .format ("Removing long-running session => %s" , session .getName ()),
1898
+ sessionFuture .leakedException );
1899
+ session .isLeakedExceptionLogged = true ;
1900
+ } else if (options .warnInactiveTransactions ()) {
1901
+ logger .log (
1902
+ Level .WARNING ,
1903
+ String .format (
1904
+ "Detected long-running session => %s. To automatically remove "
1905
+ + "long-running sessions, set SessionOption ActionOnInactiveTransaction "
1906
+ + "to WARN_AND_CLOSE by invoking setWarnAndCloseIfInactiveTransactions() method." ,
1907
+ session .getName ()),
1908
+ sessionFuture .leakedException );
1909
+ session .isLeakedExceptionLogged = true ;
1910
+ }
1911
+ }
1912
+ if ((options .closeInactiveTransactions ()
1913
+ || options .warnAndCloseInactiveTransactions ())
1914
+ && session .state != SessionState .CLOSING ) {
1915
+ final boolean isRemoved = removeFromPool (session );
1916
+ if (isRemoved ) {
1917
+ session .isRemovedFromPool = true ;
1918
+ numLeakedSessionsRemoved ++;
1919
+ if (longRunningSessionRemovedListener != null ) {
1920
+ longRunningSessionRemovedListener .apply (session );
1921
+ }
1922
+ }
1923
+ iterator .remove ();
1924
+ }
1925
+ }
1926
+ }
1927
+ }
1928
+ }
1929
+ }
1795
1930
}
1796
1931
1797
1932
private enum Position {
@@ -1872,6 +2007,9 @@ private enum Position {
1872
2007
@ GuardedBy ("lock" )
1873
2008
private long numIdleSessionsRemoved = 0 ;
1874
2009
2010
+ @ GuardedBy ("lock" )
2011
+ private long numLeakedSessionsRemoved = 0 ;
2012
+
1875
2013
private AtomicLong numWaiterTimeouts = new AtomicLong ();
1876
2014
1877
2015
@ GuardedBy ("lock" )
@@ -1885,6 +2023,8 @@ private enum Position {
1885
2023
1886
2024
@ VisibleForTesting Function <PooledSession , Void > idleSessionRemovedListener ;
1887
2025
2026
+ @ VisibleForTesting Function <PooledSession , Void > longRunningSessionRemovedListener ;
2027
+
1888
2028
private final CountDownLatch waitOnMinSessionsLatch ;
1889
2029
1890
2030
/**
@@ -1895,12 +2035,16 @@ private enum Position {
1895
2035
*/
1896
2036
static SessionPool createPool (
1897
2037
SpannerOptions spannerOptions , SessionClient sessionClient , List <LabelValue > labelValues ) {
2038
+ final SessionPoolOptions sessionPoolOptions = spannerOptions .getSessionPoolOptions ();
2039
+
2040
+ // A clock instance is passed in {@code SessionPoolOptions} in order to allow mocking via tests.
2041
+ final Clock poolMaintainerClock = sessionPoolOptions .getPoolMaintainerClock ();
1898
2042
return createPool (
1899
- spannerOptions . getSessionPoolOptions () ,
2043
+ sessionPoolOptions ,
1900
2044
spannerOptions .getDatabaseRole (),
1901
2045
((GrpcTransportOptions ) spannerOptions .getTransportOptions ()).getExecutorFactory (),
1902
2046
sessionClient ,
1903
- new Clock (),
2047
+ poolMaintainerClock == null ? new Clock () : poolMaintainerClock ,
1904
2048
Metrics .getMetricRegistry (),
1905
2049
labelValues );
1906
2050
}
@@ -2015,18 +2159,26 @@ int getNumberOfSessionsInUse() {
2015
2159
}
2016
2160
}
2017
2161
2018
- void removeFromPool (PooledSession session ) {
2162
+ @ VisibleForTesting
2163
+ double getRatioOfSessionsInUse () {
2164
+ synchronized (lock ) {
2165
+ final int maxSessions = options .getMaxSessions ();
2166
+ if (maxSessions == 0 ) {
2167
+ return 0 ;
2168
+ }
2169
+ return (double ) numSessionsInUse / maxSessions ;
2170
+ }
2171
+ }
2172
+
2173
+ boolean removeFromPool (PooledSession session ) {
2019
2174
synchronized (lock ) {
2020
2175
if (isClosed ()) {
2021
2176
decrementPendingClosures (1 );
2022
- return ;
2177
+ return false ;
2023
2178
}
2024
2179
session .markClosing ();
2025
2180
allSessions .remove (session );
2026
- numIdleSessionsRemoved ++;
2027
- }
2028
- if (idleSessionRemovedListener != null ) {
2029
- idleSessionRemovedListener .apply (session );
2181
+ return true ;
2030
2182
}
2031
2183
}
2032
2184
@@ -2036,6 +2188,13 @@ long numIdleSessionsRemoved() {
2036
2188
}
2037
2189
}
2038
2190
2191
+ @ VisibleForTesting
2192
+ long numLeakedSessionsRemoved () {
2193
+ synchronized (lock ) {
2194
+ return numLeakedSessionsRemoved ;
2195
+ }
2196
+ }
2197
+
2039
2198
@ VisibleForTesting
2040
2199
int getNumberOfSessionsInPool () {
2041
2200
synchronized (lock ) {
0 commit comments