Skip to content

Commit 3f30b17

Browse files
committed
Avoid deadlock when subscribing/unsubscribing from multiple threads
Manager and manager pool in consumers coordinator used synchronized block and called each other method which were themselves synchronized, causing deadlock when the calls where interleaved. This commit replaces the synchronized methods in manager by a synchronized block on the manager pool (aka "owner"). This avoids the deadlock at the cost of making managers waiting on their owner lock. This should be fine in terms of performance as those classes are called only when subscribing/unsubscribing, which are synchronous operations. The cost of the network communication should always be higher than the cost of synchronization. Fixes #49
1 parent 477430c commit 3f30b17

File tree

2 files changed

+172
-107
lines changed

2 files changed

+172
-107
lines changed

src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java

+115-107
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ private ClientSubscriptionsManager(
476476
stream);
477477

478478
Set<SubscriptionTracker> affectedSubscriptions;
479-
synchronized (ClientSubscriptionsManager.this) {
479+
synchronized (this.owner) {
480480
Set<SubscriptionTracker> subscriptions = streamToStreamSubscriptions.remove(stream);
481481
if (subscriptions != null && !subscriptions.isEmpty()) {
482482
List<SubscriptionTracker> newSubscriptions =
@@ -616,129 +616,137 @@ private void assignConsumersToStream(
616616
});
617617
}
618618

619-
synchronized void add(
619+
void add(
620620
SubscriptionTracker subscriptionTracker,
621621
OffsetSpecification offsetSpecification,
622622
boolean isInitialSubscription) {
623-
// FIXME check manager is still open (not closed because of connection failure)
624-
byte subscriptionId = 0;
625-
for (int i = 0; i < MAX_SUBSCRIPTIONS_PER_CLIENT; i++) {
626-
if (subscriptionTrackers.get(i) == null) {
627-
subscriptionId = (byte) i;
628-
break;
623+
synchronized (this.owner) {
624+
625+
// FIXME check manager is still open (not closed because of connection failure)
626+
byte subscriptionId = 0;
627+
for (int i = 0; i < MAX_SUBSCRIPTIONS_PER_CLIENT; i++) {
628+
if (subscriptionTrackers.get(i) == null) {
629+
subscriptionId = (byte) i;
630+
break;
631+
}
629632
}
630-
}
631633

632-
List<SubscriptionTracker> previousSubscriptions = this.subscriptionTrackers;
633-
634-
LOGGER.debug(
635-
"Subscribing to {}, requested offset specification is {}, offset tracking reference is {}",
636-
subscriptionTracker.stream,
637-
offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification,
638-
subscriptionTracker.offsetTrackingReference);
639-
try {
640-
// updating data structures before subscribing
641-
// (to make sure they are up-to-date in case message would arrive super fast)
642-
subscriptionTracker.assign(subscriptionId, this);
643-
streamToStreamSubscriptions
644-
.computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet())
645-
.add(subscriptionTracker);
646-
this.subscriptionTrackers =
647-
update(previousSubscriptions, subscriptionId, subscriptionTracker);
648-
649-
String offsetTrackingReference = subscriptionTracker.offsetTrackingReference;
650-
if (offsetTrackingReference != null) {
651-
QueryOffsetResponse queryOffsetResponse =
652-
client.queryOffset(offsetTrackingReference, subscriptionTracker.stream);
653-
if (queryOffsetResponse.isOk() && queryOffsetResponse.getOffset() != 0) {
654-
if (offsetSpecification != null && isInitialSubscription) {
655-
// subscription call (not recovery), so telling the user their offset specification is
656-
// ignored
657-
LOGGER.info(
658-
"Requested offset specification {} not used in favor of stored offset found for reference {}",
659-
offsetSpecification,
660-
offsetTrackingReference);
634+
List<SubscriptionTracker> previousSubscriptions = this.subscriptionTrackers;
635+
636+
LOGGER.debug(
637+
"Subscribing to {}, requested offset specification is {}, offset tracking reference is {}",
638+
subscriptionTracker.stream,
639+
offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification,
640+
subscriptionTracker.offsetTrackingReference);
641+
try {
642+
// updating data structures before subscribing
643+
// (to make sure they are up-to-date in case message would arrive super fast)
644+
subscriptionTracker.assign(subscriptionId, this);
645+
streamToStreamSubscriptions
646+
.computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet())
647+
.add(subscriptionTracker);
648+
this.subscriptionTrackers =
649+
update(previousSubscriptions, subscriptionId, subscriptionTracker);
650+
651+
String offsetTrackingReference = subscriptionTracker.offsetTrackingReference;
652+
if (offsetTrackingReference != null) {
653+
QueryOffsetResponse queryOffsetResponse =
654+
client.queryOffset(offsetTrackingReference, subscriptionTracker.stream);
655+
if (queryOffsetResponse.isOk() && queryOffsetResponse.getOffset() != 0) {
656+
if (offsetSpecification != null && isInitialSubscription) {
657+
// subscription call (not recovery), so telling the user their offset specification
658+
// is
659+
// ignored
660+
LOGGER.info(
661+
"Requested offset specification {} not used in favor of stored offset found for reference {}",
662+
offsetSpecification,
663+
offsetTrackingReference);
664+
}
665+
LOGGER.debug(
666+
"Using offset {} to start consuming from {} with consumer {} "
667+
+ "(instead of {})",
668+
queryOffsetResponse.getOffset(),
669+
subscriptionTracker.stream,
670+
offsetTrackingReference,
671+
offsetSpecification);
672+
offsetSpecification = OffsetSpecification.offset(queryOffsetResponse.getOffset() + 1);
661673
}
662-
LOGGER.debug(
663-
"Using offset {} to start consuming from {} with consumer {} " + "(instead of {})",
664-
queryOffsetResponse.getOffset(),
665-
subscriptionTracker.stream,
666-
offsetTrackingReference,
667-
offsetSpecification);
668-
offsetSpecification = OffsetSpecification.offset(queryOffsetResponse.getOffset() + 1);
669674
}
670-
}
671675

672-
offsetSpecification =
673-
offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification;
676+
offsetSpecification =
677+
offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification;
674678

675-
Map<String, String> subscriptionProperties = Collections.emptyMap();
676-
if (subscriptionTracker.offsetTrackingReference != null) {
677-
subscriptionProperties = new HashMap<>(1);
678-
subscriptionProperties.put("name", subscriptionTracker.offsetTrackingReference);
679-
}
679+
Map<String, String> subscriptionProperties = Collections.emptyMap();
680+
if (subscriptionTracker.offsetTrackingReference != null) {
681+
subscriptionProperties = new HashMap<>(1);
682+
subscriptionProperties.put("name", subscriptionTracker.offsetTrackingReference);
683+
}
680684

681-
SubscriptionContext subscriptionContext =
682-
new DefaultSubscriptionContext(offsetSpecification);
683-
subscriptionTracker.subscriptionListener.preSubscribe(subscriptionContext);
684-
LOGGER.info(
685-
"Computed offset specification {}, offset specification used after subscription listener {}",
686-
offsetSpecification,
687-
subscriptionContext.offsetSpecification());
688-
689-
// FIXME consider using fewer initial credits
690-
Client.Response subscribeResponse =
691-
client.subscribe(
692-
subscriptionId,
693-
subscriptionTracker.stream,
694-
subscriptionContext.offsetSpecification(),
695-
10,
696-
subscriptionProperties);
697-
if (!subscribeResponse.isOk()) {
698-
String message =
699-
"Subscription to stream "
700-
+ subscriptionTracker.stream
701-
+ " failed with code "
702-
+ formatConstant(subscribeResponse.getResponseCode());
703-
LOGGER.debug(message);
704-
throw new StreamException(message);
685+
SubscriptionContext subscriptionContext =
686+
new DefaultSubscriptionContext(offsetSpecification);
687+
subscriptionTracker.subscriptionListener.preSubscribe(subscriptionContext);
688+
LOGGER.info(
689+
"Computed offset specification {}, offset specification used after subscription listener {}",
690+
offsetSpecification,
691+
subscriptionContext.offsetSpecification());
692+
693+
// FIXME consider using fewer initial credits
694+
Client.Response subscribeResponse =
695+
client.subscribe(
696+
subscriptionId,
697+
subscriptionTracker.stream,
698+
subscriptionContext.offsetSpecification(),
699+
10,
700+
subscriptionProperties);
701+
if (!subscribeResponse.isOk()) {
702+
String message =
703+
"Subscription to stream "
704+
+ subscriptionTracker.stream
705+
+ " failed with code "
706+
+ formatConstant(subscribeResponse.getResponseCode());
707+
LOGGER.debug(message);
708+
throw new StreamException(message);
709+
}
710+
} catch (RuntimeException e) {
711+
subscriptionTracker.assign((byte) -1, null);
712+
this.subscriptionTrackers = previousSubscriptions;
713+
streamToStreamSubscriptions
714+
.computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet())
715+
.remove(subscriptionTracker);
716+
throw e;
705717
}
706-
} catch (RuntimeException e) {
707-
subscriptionTracker.assign((byte) -1, null);
708-
this.subscriptionTrackers = previousSubscriptions;
709-
streamToStreamSubscriptions
710-
.computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet())
711-
.remove(subscriptionTracker);
712-
throw e;
713-
}
714718

715-
LOGGER.debug("Subscribed to {}", subscriptionTracker.stream);
719+
LOGGER.debug("Subscribed to {}", subscriptionTracker.stream);
720+
}
716721
}
717722

718723
synchronized void remove(SubscriptionTracker subscriptionTracker) {
719-
// FIXME check manager is still open (not closed because of connection failure)
720-
byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient;
721-
Client.Response unsubscribeResponse = client.unsubscribe(subscriptionIdInClient);
722-
if (!unsubscribeResponse.isOk()) {
723-
LOGGER.warn(
724-
"Unexpected response code when unsubscribing from {}: {} (subscription ID {})",
724+
synchronized (this.owner) {
725+
726+
// FIXME check manager is still open (not closed because of connection failure)
727+
byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient;
728+
Client.Response unsubscribeResponse = client.unsubscribe(subscriptionIdInClient);
729+
if (!unsubscribeResponse.isOk()) {
730+
LOGGER.warn(
731+
"Unexpected response code when unsubscribing from {}: {} (subscription ID {})",
732+
subscriptionTracker.stream,
733+
formatConstant(unsubscribeResponse.getResponseCode()),
734+
subscriptionIdInClient);
735+
}
736+
this.subscriptionTrackers = update(this.subscriptionTrackers, subscriptionIdInClient, null);
737+
streamToStreamSubscriptions.compute(
725738
subscriptionTracker.stream,
726-
formatConstant(unsubscribeResponse.getResponseCode()),
727-
subscriptionIdInClient);
739+
(stream, subscriptionsForThisStream) -> {
740+
if (subscriptionsForThisStream == null || subscriptionsForThisStream.isEmpty()) {
741+
// should not happen
742+
return null;
743+
} else {
744+
subscriptionsForThisStream.remove(subscriptionTracker);
745+
return subscriptionsForThisStream.isEmpty() ? null : subscriptionsForThisStream;
746+
}
747+
});
748+
this.owner.maybeDisposeManager(this);
728749
}
729-
this.subscriptionTrackers = update(this.subscriptionTrackers, subscriptionIdInClient, null);
730-
streamToStreamSubscriptions.compute(
731-
subscriptionTracker.stream,
732-
(stream, subscriptionsForThisStream) -> {
733-
if (subscriptionsForThisStream == null || subscriptionsForThisStream.isEmpty()) {
734-
// should not happen
735-
return null;
736-
} else {
737-
subscriptionsForThisStream.remove(subscriptionTracker);
738-
return subscriptionsForThisStream.isEmpty() ? null : subscriptionsForThisStream;
739-
}
740-
});
741-
this.owner.maybeDisposeManager(this);
742750
}
743751

744752
private List<SubscriptionTracker> update(

src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java

+57
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import static com.rabbitmq.stream.BackOffDelayPolicy.fixedWithInitialDelay;
1717
import static com.rabbitmq.stream.impl.TestUtils.b;
18+
import static com.rabbitmq.stream.impl.TestUtils.latchAssert;
1819
import static com.rabbitmq.stream.impl.TestUtils.metadata;
1920
import static com.rabbitmq.stream.impl.TestUtils.namedConsumer;
2021
import static org.assertj.core.api.Assertions.assertThat;
@@ -47,6 +48,8 @@
4748
import java.util.Map;
4849
import java.util.concurrent.ConcurrentHashMap;
4950
import java.util.concurrent.CopyOnWriteArrayList;
51+
import java.util.concurrent.CountDownLatch;
52+
import java.util.concurrent.ExecutorService;
5053
import java.util.concurrent.Executors;
5154
import java.util.concurrent.ScheduledExecutorService;
5255
import java.util.concurrent.atomic.AtomicInteger;
@@ -1156,6 +1159,60 @@ void shouldUseStoredOffsetOnRecovery(Consumer<ConsumersCoordinatorTest> configur
11561159
verify(client, times(1)).unsubscribe(subscriptionIdCaptor.getValue());
11571160
}
11581161

1162+
@Test
1163+
void subscribeUnsubscribeInDifferentThreadsShouldNotDeadlock() throws Exception {
1164+
when(locator.metadata("stream")).thenReturn(metadata(null, replicas()));
1165+
1166+
when(clientFactory.client(any())).thenReturn(client);
1167+
when(client.subscribe(
1168+
subscriptionIdCaptor.capture(),
1169+
anyString(),
1170+
any(OffsetSpecification.class),
1171+
anyInt(),
1172+
anyMap()))
1173+
.thenReturn(new Client.Response(Constants.RESPONSE_CODE_OK));
1174+
when(client.unsubscribe(anyByte())).thenReturn(new Client.Response(Constants.RESPONSE_CODE_OK));
1175+
1176+
ExecutorService executorService = Executors.newFixedThreadPool(2);
1177+
1178+
try {
1179+
Runnable subUnsub =
1180+
() -> {
1181+
Runnable closingRunnable =
1182+
coordinator.subscribe(
1183+
consumer,
1184+
"stream",
1185+
OffsetSpecification.first(),
1186+
null,
1187+
NO_OP_SUBSCRIPTION_LISTENER,
1188+
(offset, message) -> {});
1189+
1190+
closingRunnable.run();
1191+
};
1192+
CountDownLatch latch = new CountDownLatch(2);
1193+
executorService.submit(
1194+
() -> {
1195+
int count = 0;
1196+
while (count++ < 100) {
1197+
subUnsub.run();
1198+
}
1199+
latch.countDown();
1200+
});
1201+
executorService.submit(
1202+
() -> {
1203+
int count = 0;
1204+
while (count++ < 100) {
1205+
subUnsub.run();
1206+
}
1207+
latch.countDown();
1208+
});
1209+
1210+
assertThat(latchAssert(latch)).completes();
1211+
} finally {
1212+
executorService.shutdownNow();
1213+
}
1214+
}
1215+
11591216
Client.Broker leader() {
11601217
return new Client.Broker("leader", -1);
11611218
}

0 commit comments

Comments
 (0)