Skip to content

Commit 8d8aed9

Browse files
committed
Race condition in TarantoolClientImpl
- Avoid a possible race between reading, writing and reconnecting threads when a reconnection process is started. It might have happened that the lagged thread (reading or writing) could reset the state to RECONNECT after the reconnecting thread has already started and set the state to 0. As a result, all next attempts to reconnect will never happen. Now the reconnect thread holds on the state as long as it is required. - Avoid another possible race between reading and writing threads when they are started during the reconnection process. It might have happened that one of the threads crashed when it was starting and another slightly lagged thread set up its flag. It could have led that the reconnecting thread saw RECONNECT + R/W state instead of pure RECONNECT. Again, this case broke down all next reconnection attempts. Now reading and writing threads take into account whether RECONNECT state is already set or not. - Avoid LockSupport class usage for a thread to be suspended and woken up. Actually, LockSupport is more like an internal component to build high-level blocking primitives. It is not recommended using this class directly. It was replaced by ReentrantLock.Condition primitive based on LockSupport but which has proper LockSupport usage inside. Fixes: #142 Affects: #34, #136
1 parent 4715727 commit 8d8aed9

File tree

1 file changed

+88
-48
lines changed

1 file changed

+88
-48
lines changed

src/main/java/org/tarantool/TarantoolClientImpl.java

+88-48
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
import org.tarantool.protocol.ProtoUtils;
44
import org.tarantool.protocol.ReadableViaSelectorChannel;
5-
import org.tarantool.protocol.TarantoolPacket;
65
import org.tarantool.protocol.TarantoolGreeting;
6+
import org.tarantool.protocol.TarantoolPacket;
77

88
import java.io.IOException;
99
import java.nio.ByteBuffer;
@@ -22,7 +22,6 @@
2222
import java.util.concurrent.atomic.AtomicInteger;
2323
import java.util.concurrent.atomic.AtomicReference;
2424
import java.util.concurrent.locks.Condition;
25-
import java.util.concurrent.locks.LockSupport;
2625
import java.util.concurrent.locks.ReentrantLock;
2726

2827

@@ -66,14 +65,15 @@ public class TarantoolClientImpl extends TarantoolBase<Future<?>> implements Tar
6665
protected Thread reader;
6766
protected Thread writer;
6867

68+
protected final ReentrantLock connectorLock = new ReentrantLock();
69+
protected final Condition reconnectRequired = connectorLock.newCondition();
70+
6971
protected Thread connector = new Thread(new Runnable() {
7072
@Override
7173
public void run() {
7274
while (!Thread.currentThread().isInterrupted()) {
73-
if (state.compareAndSet(StateHelper.RECONNECT, 0)) {
74-
reconnect(0, thumbstone);
75-
}
76-
LockSupport.park(state);
75+
reconnect(0, thumbstone);
76+
awaitReconnection();
7777
}
7878
}
7979
});
@@ -139,16 +139,13 @@ protected void reconnect(int retry, Throwable lastError) {
139139
protected void connect(final SocketChannel channel) throws Exception {
140140
try {
141141
TarantoolGreeting greeting = ProtoUtils.connect(channel,
142-
config.username, config.password);
142+
config.username, config.password);
143143
this.serverVersion = greeting.getServerVersion();
144144
} catch (IOException e) {
145-
try {
146-
channel.close();
147-
} catch (IOException ignored) {
148-
}
149-
145+
closeChannel(channel);
150146
throw new CommunicationException("Couldn't connect to tarantool", e);
151147
}
148+
152149
channel.configureBlocking(false);
153150
this.channel = channel;
154151
this.readChannel = new ReadableViaSelectorChannel(channel);
@@ -165,6 +162,15 @@ protected void connect(final SocketChannel channel) throws Exception {
165162

166163
protected void startThreads(String threadName) throws InterruptedException {
167164
final CountDownLatch init = new CountDownLatch(2);
165+
166+
if (reader != null) {
167+
reader.join(config.initTimeoutMillis / 2);
168+
}
169+
if (writer != null) {
170+
writer.join(config.initTimeoutMillis / 2);
171+
}
172+
state.release(StateHelper.RECONNECT);
173+
168174
reader = new Thread(new Runnable() {
169175
@Override
170176
public void run() {
@@ -174,8 +180,7 @@ public void run() {
174180
readThread();
175181
} finally {
176182
state.release(StateHelper.READING);
177-
if (state.compareAndSet(0, StateHelper.RECONNECT))
178-
LockSupport.unpark(connector);
183+
trySignalForReconnection();
179184
}
180185
}
181186
}
@@ -189,8 +194,7 @@ public void run() {
189194
writeThread();
190195
} finally {
191196
state.release(StateHelper.WRITING);
192-
if (state.compareAndSet(0, StateHelper.RECONNECT))
193-
LockSupport.unpark(connector);
197+
trySignalForReconnection();
194198
}
195199
}
196200
}
@@ -337,25 +341,21 @@ private boolean directWrite(ByteBuffer buffer) throws InterruptedException, IOEx
337341
}
338342

339343
protected void readThread() {
340-
try {
341-
while (!Thread.currentThread().isInterrupted()) {
342-
try {
343-
TarantoolPacket packet = ProtoUtils.readPacket(readChannel);
344+
while (!Thread.currentThread().isInterrupted()) {
345+
try {
346+
TarantoolPacket packet = ProtoUtils.readPacket(readChannel);
344347

345-
Map<Integer, Object> headers = packet.getHeaders();
348+
Map<Integer, Object> headers = packet.getHeaders();
346349

347-
Long syncId = (Long) headers.get(Key.SYNC.getId());
348-
CompletableFuture<?> future = futures.remove(syncId);
349-
stats.received++;
350-
wait.decrementAndGet();
351-
complete(packet, future);
352-
} catch (Exception e) {
353-
die("Cant read answer", e);
354-
return;
355-
}
350+
Long syncId = (Long) headers.get(Key.SYNC.getId());
351+
CompletableFuture<?> future = futures.remove(syncId);
352+
stats.received++;
353+
wait.decrementAndGet();
354+
complete(packet, future);
355+
} catch (Exception e) {
356+
die("Cant read answer", e);
357+
return;
356358
}
357-
} catch (Exception e) {
358-
die("Cant init thread", e);
359359
}
360360
}
361361

@@ -414,7 +414,7 @@ protected void complete(TarantoolPacket packet, CompletableFuture<?> q) {
414414

415415
protected void completeSql(CompletableFuture<?> q, TarantoolPacket pack) {
416416
Long rowCount = SqlProtoUtils.getSqlRowCount(pack);
417-
if (rowCount!=null) {
417+
if (rowCount != null) {
418418
((CompletableFuture) q).complete(rowCount);
419419
} else {
420420
List<Map<String, Object>> values = SqlProtoUtils.readSqlResult(pack);
@@ -477,6 +477,40 @@ protected void stopIO() {
477477
closeChannel(channel);
478478
}
479479

480+
/**
481+
* Blocks until a reconnection process can be carried on
482+
*
483+
* @see #trySignalForReconnection()
484+
*/
485+
private void awaitReconnection() {
486+
connectorLock.lock();
487+
try {
488+
while (state.getState() != StateHelper.RECONNECT) {
489+
reconnectRequired.await();
490+
}
491+
} catch (InterruptedException ignored) {
492+
Thread.currentThread().interrupt();
493+
} finally {
494+
connectorLock.unlock();
495+
}
496+
}
497+
498+
/**
499+
* Signals to the connector that reconnection process can be performed
500+
*
501+
* @see #awaitReconnection()
502+
*/
503+
private void trySignalForReconnection() {
504+
if (state.compareAndSet(StateHelper.UNINITIALIZED, StateHelper.RECONNECT)) {
505+
connectorLock.lock();
506+
try {
507+
reconnectRequired.signal();
508+
} finally {
509+
connectorLock.unlock();
510+
}
511+
}
512+
}
513+
480514
@Override
481515
public boolean isAlive() {
482516
return state.getState() == StateHelper.ALIVE && thumbstone == null;
@@ -499,7 +533,7 @@ public TarantoolClientOps<Integer, List<?>, Object, List<?>> syncOps() {
499533

500534
@Override
501535
public TarantoolClientOps<Integer, List<?>, Object, Future<List<?>>> asyncOps() {
502-
return (TarantoolClientOps)this;
536+
return (TarantoolClientOps) this;
503537
}
504538

505539
@Override
@@ -515,7 +549,7 @@ public TarantoolClientOps<Integer, List<?>, Object, Long> fireAndForgetOps() {
515549

516550
@Override
517551
public TarantoolSQLOps<Object, Long, List<Map<String, Object>>> sqlSyncOps() {
518-
return new TarantoolSQLOps<Object, Long, List<Map<String,Object>>>() {
552+
return new TarantoolSQLOps<Object, Long, List<Map<String, Object>>>() {
519553

520554
@Override
521555
public Long update(String sql, Object... bind) {
@@ -531,7 +565,7 @@ public List<Map<String, Object>> query(String sql, Object... bind) {
531565

532566
@Override
533567
public TarantoolSQLOps<Object, Future<Long>, Future<List<Map<String, Object>>>> sqlAsyncOps() {
534-
return new TarantoolSQLOps<Object, Future<Long>, Future<List<Map<String,Object>>>>() {
568+
return new TarantoolSQLOps<Object, Future<Long>, Future<List<Map<String, Object>>>>() {
535569
@Override
536570
public Future<Long> update(String sql, Object... bind) {
537571
return (Future<Long>) exec(Code.EXECUTE, Key.SQL_TEXT, sql, Key.SQL_BIND, bind);
@@ -619,6 +653,7 @@ public TarantoolClientStats getStats() {
619653
* Manages state changes.
620654
*/
621655
protected final class StateHelper {
656+
static final int UNINITIALIZED = 0;
622657
static final int READING = 1;
623658
static final int WRITING = 2;
624659
static final int ALIVE = READING | WRITING;
@@ -628,7 +663,7 @@ protected final class StateHelper {
628663
private final AtomicInteger state;
629664

630665
private final AtomicReference<CountDownLatch> nextAliveLatch =
631-
new AtomicReference<CountDownLatch>(new CountDownLatch(1));
666+
new AtomicReference<CountDownLatch>(new CountDownLatch(1));
632667

633668
private final CountDownLatch closedLatch = new CountDownLatch(1);
634669

@@ -641,7 +676,7 @@ protected int getState() {
641676
}
642677

643678
protected boolean close() {
644-
for (;;) {
679+
for (; ; ) {
645680
int st = getState();
646681
if ((st & CLOSED) == CLOSED)
647682
return false;
@@ -651,24 +686,29 @@ protected boolean close() {
651686
}
652687

653688
protected boolean acquire(int mask) {
654-
for (;;) {
655-
int st = getState();
656-
if ((st & CLOSED) == CLOSED)
689+
for (; ; ) {
690+
int currentState = getState();
691+
if ((currentState & CLOSED) == CLOSED) {
657692
return false;
658-
659-
if ((st & mask) != 0)
693+
}
694+
if ((currentState & RECONNECT) > mask) {
695+
return false;
696+
}
697+
if ((currentState & mask) != 0) {
660698
throw new IllegalStateException("State is already " + mask);
661-
662-
if (compareAndSet(st, st | mask))
699+
}
700+
if (compareAndSet(currentState, currentState | mask)) {
663701
return true;
702+
}
664703
}
665704
}
666705

667706
protected void release(int mask) {
668-
for (;;) {
707+
for (; ; ) {
669708
int st = getState();
670-
if (compareAndSet(st, st & ~mask))
709+
if (compareAndSet(st, st & ~mask)) {
671710
return;
711+
}
672712
}
673713
}
674714

@@ -710,7 +750,7 @@ private CountDownLatch getStateLatch(int state) {
710750
CountDownLatch latch = nextAliveLatch.get();
711751
/* It may happen so that an error is detected but the state is still alive.
712752
Wait for the 'next' alive state in such cases. */
713-
return (getState() == ALIVE && thumbstone == null) ? null : latch;
753+
return (getState() == ALIVE && thumbstone == null) ? null : latch;
714754
}
715755
return null;
716756
}

0 commit comments

Comments
 (0)