From 246133c3778f900ce927c1a1b7c41b38207d0f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Fri, 25 Nov 2022 18:17:27 +0100 Subject: [PATCH 01/10] Improve recovery Create a locator connection for each provided URI. This way a connection can take over when the locator node goes down. This speeds up recovery. Track scheduled tasks. This is likely to be disabled in a stable release. Useful to track down unfinished tasks. Add retry to operations in the consumer coordinator. Refresh consumer candidate nodes if the re-assignment of a consumer times out. This improvements are based on the feedback from the effects of a rolling restart in K8S using stream-perf-test. Not all producers and consumers are recovered after all nodes have been restarted. The changes in this commit mitigates this problem. --- .../com/rabbitmq/stream/impl/AsyncRetry.java | 79 +-- .../java/com/rabbitmq/stream/impl/Client.java | 17 +- .../stream/impl/ConsumersCoordinator.java | 535 +++++++++++------- .../impl/OffsetTrackingCoordinator.java | 58 +- .../stream/impl/ProducersCoordinator.java | 61 +- .../impl/ScheduledExecutorServiceWrapper.java | 235 ++++++++ .../rabbitmq/stream/impl/StreamConsumer.java | 24 +- .../stream/impl/StreamEnvironment.java | 297 +++++++--- .../rabbitmq/stream/impl/StreamProducer.java | 46 +- .../stream/impl/StreamStreamCreator.java | 7 +- .../stream/impl/SuperStreamConsumer.java | 9 +- .../stream/impl/SuperStreamProducer.java | 32 +- .../stream/impl/TimeoutStreamException.java | 23 + .../java/com/rabbitmq/stream/impl/Utils.java | 50 ++ src/test/java/com/rabbitmq/stream/Host.java | 38 +- .../stream/impl/ConsumersCoordinatorTest.java | 143 ++++- .../stream/impl/MonitoringTestUtils.java | 14 +- .../impl/OffsetTrackingCoordinatorTest.java | 2 +- .../stream/impl/ProducersCoordinatorTest.java | 16 +- .../stream/impl/StreamEnvironmentTest.java | 73 ++- .../impl/StreamEnvironmentUnitTest.java | 2 + .../stream/impl/StreamProducerUnitTest.java | 1 - 22 files changed, 1304 insertions(+), 458 deletions(-) create mode 100644 src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java create mode 100644 src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java diff --git a/src/main/java/com/rabbitmq/stream/impl/AsyncRetry.java b/src/main/java/com/rabbitmq/stream/impl/AsyncRetry.java index aa1f6c8535..4577700354 100644 --- a/src/main/java/com/rabbitmq/stream/impl/AsyncRetry.java +++ b/src/main/java/com/rabbitmq/stream/impl/AsyncRetry.java @@ -13,6 +13,8 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.namedRunnable; + import com.rabbitmq.stream.BackOffDelayPolicy; import java.time.Duration; import java.util.concurrent.Callable; @@ -42,47 +44,56 @@ private AsyncRetry( AtomicReference retryableTaskReference = new AtomicReference<>(); AtomicInteger attempts = new AtomicInteger(0); Runnable retryableTask = - () -> { - if (Thread.currentThread().isInterrupted()) { - LOGGER.debug("Task '{}' interrupted, failing future", description); - this.completableFuture.completeExceptionally(new CancellationException()); - return; - } - try { - V result = task.call(); - LOGGER.debug("Task '{}' succeeded, completing future", description); - completableFuture.complete(result); - } catch (Exception e) { - int attemptCount = attempts.getAndIncrement(); - if (retry.test(e)) { - if (delayPolicy.delay(attemptCount).equals(BackOffDelayPolicy.TIMEOUT)) { - LOGGER.debug( - "Retryable attempts for task '{}' timed out, failing future", description); - this.completableFuture.completeExceptionally(new RetryTimeoutException()); - } else { - LOGGER.debug( - "Retryable exception ({}) for task '{}', scheduling another attempt", - e.getClass().getSimpleName(), - description); - scheduler.schedule( - retryableTaskReference.get(), - delayPolicy.delay(attemptCount).toMillis(), - TimeUnit.MILLISECONDS); + namedRunnable( + () -> { + if (Thread.currentThread().isInterrupted()) { + LOGGER.debug("Task '{}' interrupted, failing future", description); + this.completableFuture.completeExceptionally(new CancellationException()); + return; + } + try { + V result = task.call(); + LOGGER.debug("Task '{}' succeeded, completing future", description); + completableFuture.complete(result); + } catch (Exception e) { + int attemptCount = attempts.getAndIncrement(); + if (retry.test(e)) { + if (delayPolicy.delay(attemptCount).equals(BackOffDelayPolicy.TIMEOUT)) { + LOGGER.debug( + "Retryable attempts for task '{}' timed out, failing future", description); + this.completableFuture.completeExceptionally(new RetryTimeoutException()); + } else { + LOGGER.debug( + "Retryable exception ({}) for task '{}', scheduling another attempt", + e.getClass().getSimpleName(), + description); + schedule( + scheduler, retryableTaskReference.get(), delayPolicy.delay(attemptCount)); + } + } else { + LOGGER.debug( + "Non-retryable exception for task '{}', failing future", description); + this.completableFuture.completeExceptionally(e); + } } - } else { - LOGGER.debug("Non-retryable exception for task '{}', failing future", description); - this.completableFuture.completeExceptionally(e); - } - } - }; + }, + description); retryableTaskReference.set(retryableTask); Duration initialDelay = delayPolicy.delay(attempts.getAndIncrement()); LOGGER.debug("Scheduling task '{}' with policy {}", description, delayPolicy); if (initialDelay.isZero()) { retryableTask.run(); } else { - scheduler.schedule( - retryableTaskReference.get(), initialDelay.toMillis(), TimeUnit.MILLISECONDS); + schedule(scheduler, retryableTaskReference.get(), initialDelay); + } + } + + private static void schedule( + ScheduledExecutorService scheduler, Runnable command, Duration delay) { + try { + scheduler.schedule(command, delay.toMillis(), TimeUnit.MILLISECONDS); + } catch (RuntimeException e) { + LOGGER.debug("Error while scheduling command", e); } } diff --git a/src/main/java/com/rabbitmq/stream/impl/Client.java b/src/main/java/com/rabbitmq/stream/impl/Client.java index 3079c08201..bd12e8e424 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Client.java +++ b/src/main/java/com/rabbitmq/stream/impl/Client.java @@ -308,8 +308,18 @@ public void write( }); ChannelFuture f; + String clientConnectionName = + parameters.clientProperties == null + ? "" + : (parameters.clientProperties.containsKey("connection_name") + ? parameters.clientProperties.get("connection_name") + : ""); try { - LOGGER.debug("Trying to create stream connection to {}:{}", parameters.host, parameters.port); + LOGGER.debug( + "Trying to create stream connection to {}:{}, with client connection name '{}'", + parameters.host, + parameters.port, + clientConnectionName); f = b.connect(parameters.host, parameters.port).sync(); this.host = parameters.host; this.port = parameters.port; @@ -1113,6 +1123,9 @@ public QueryOffsetResponse queryOffset(String reference, String stream) { request.block(); QueryOffsetResponse response = request.response.get(); return response; + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); throw new StreamException(e); @@ -2284,7 +2297,7 @@ void block() { throw new StreamException("Interrupted while waiting for response"); } if (!completed) { - throw new StreamException("Could not get response in " + timeout.toMillis() + " ms"); + throw new TimeoutStreamException("Could not get response in " + timeout.toMillis() + " ms"); } } diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index 3b5ae24fd2..7d6d23d733 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -15,6 +15,9 @@ import static com.rabbitmq.stream.impl.Utils.formatConstant; import static com.rabbitmq.stream.impl.Utils.isSac; +import static com.rabbitmq.stream.impl.Utils.namedFunction; +import static com.rabbitmq.stream.impl.Utils.namedRunnable; +import static java.lang.String.format; import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; @@ -26,6 +29,7 @@ import com.rabbitmq.stream.StreamException; import com.rabbitmq.stream.SubscriptionListener; import com.rabbitmq.stream.SubscriptionListener.SubscriptionContext; +import com.rabbitmq.stream.impl.Client.Broker; import com.rabbitmq.stream.impl.Client.ChunkListener; import com.rabbitmq.stream.impl.Client.ConsumerUpdateListener; import com.rabbitmq.stream.impl.Client.CreditNotification; @@ -49,6 +53,8 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; +import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.slf4j.Logger; @@ -97,7 +103,6 @@ Runnable subscribe( Runnable trackingClosingCallback, MessageHandler messageHandler, Map subscriptionProperties) { - // FIXME fail immediately if there's no locator (can provide a supplier that does not retry) List candidates = findBrokersForStream(stream); Client.Broker newNode = pickBroker(candidates); if (newNode == null) { @@ -141,9 +146,10 @@ Runnable subscribe( // package protected for testing List findBrokersForStream(String stream) { - // FIXME make sure locator is not null (retry) Map metadata = - this.environment.locatorOperation(c -> c.metadata(stream)); + this.environment.locatorOperation( + namedFunction( + c -> c.metadata(stream), "Candidate lookup to consume from '%s'", stream)); if (metadata.size() == 0 || metadata.get(stream) == null) { // this is not supposed to happen throw new StreamDoesNotExistException(stream); @@ -368,29 +374,54 @@ private ManagerPool(String name, Client.ClientParameters clientParameters) { managers.add(new ClientSubscriptionsManager(this, clientParameters)); } - private synchronized void add( + private void add( SubscriptionTracker subscriptionTracker, OffsetSpecification offsetSpecification, boolean isInitialSubscription) { - boolean added = false; - // FIXME deal with manager unavailability (manager may be closing because of connection - // closing) - // try all of them until it succeeds, throw exception if failure - for (ClientSubscriptionsManager manager : managers) { - if (!manager.isFull()) { - manager.add(subscriptionTracker, offsetSpecification, isInitialSubscription); - added = true; - break; + + ClientSubscriptionsManager pickedManager = null; + while (pickedManager == null) { + // FIXME deal with manager unavailability (manager may be closing because of connection + // closing) + synchronized (this) { + for (ClientSubscriptionsManager manager : managers) { + if (!manager.isFull()) { + pickedManager = manager; + break; + } + } + if (pickedManager == null) { + LOGGER.debug( + "Creating subscription manager on {}, this is subscription manager #{}", + name, + managers.size() + 1); + pickedManager = new ClientSubscriptionsManager(this, clientParameters); + managers.add(pickedManager); + } + } + try { + pickedManager.add(subscriptionTracker, offsetSpecification, isInitialSubscription); + } catch (IllegalStateException e) { + pickedManager = null; + } catch (TimeoutStreamException e) { + // it's very likely the manager connection is dead + // scheduling its closing in another thread to avoid blocking this one + if (pickedManager.isEmpty()) { + ClientSubscriptionsManager manager = pickedManager; + ConsumersCoordinator.this.environment.execute( + () -> { + this.remove(manager); + manager.close(); + }, + "Consumer manager closing after timeout, consumer %d on stream '%s'", + subscriptionTracker.consumer.id(), + subscriptionTracker.stream); + } + throw e; + } catch (RuntimeException e) { + this.maybeDisposeManager(pickedManager); + throw e; } - } - if (!added) { - LOGGER.debug( - "Creating subscription manager on {}, this is subscription manager #{}", - name, - managers.size() + 1); - ClientSubscriptionsManager manager = new ClientSubscriptionsManager(this, clientParameters); - managers.add(manager); - manager.add(subscriptionTracker, offsetSpecification, isInitialSubscription); } } @@ -437,8 +468,10 @@ private class ClientSubscriptionsManager { private final Map> streamToStreamSubscriptions = new ConcurrentHashMap<>(); private final ManagerPool owner; + // trackers and tracker must be kept in sync private volatile List subscriptionTrackers = new ArrayList<>(maxConsumersByConnection); + private volatile int trackerCount = 0; private ClientSubscriptionsManager( ManagerPool owner, Client.ClientParameters clientParameters) { @@ -446,6 +479,7 @@ private ClientSubscriptionsManager( String name = owner.name; LOGGER.debug("creating subscription manager on {}", name); IntStream.range(0, maxConsumersByConnection).forEach(i -> subscriptionTrackers.add(null)); + this.trackerCount = 0; AtomicBoolean clientInitializedInManager = new AtomicBoolean(false); ChunkListener chunkListener = (client, subscriptionId, offset, messageCount, dataSize) -> { @@ -491,7 +525,7 @@ private ClientSubscriptionsManager( // we may be closing the client because it's not the right node, so the manager // should not be removed from its pool, because it's not really in it already if (clientInitializedInManager.get()) { - owner.remove(this); + this.owner.remove(this); } if (shutdownContext.isShutdownUnexpected()) { LOGGER.debug( @@ -500,30 +534,34 @@ private ClientSubscriptionsManager( environment .scheduledExecutorService() .execute( - () -> { - if (Thread.currentThread().isInterrupted()) { - return; - } - subscriptionTrackers.stream() - .filter(Objects::nonNull) - .forEach(SubscriptionTracker::detachFromManager); - for (Entry> entry : - streamToStreamSubscriptions.entrySet()) { - if (Thread.currentThread().isInterrupted()) { - break; - } - String stream = entry.getKey(); - LOGGER.debug( - "Re-assigning {} consumer(s) to stream {} after disconnection", - entry.getValue().size(), - stream); - assignConsumersToStream( - entry.getValue(), - stream, - attempt -> environment.recoveryBackOffDelayPolicy().delay(attempt), - false); - } - }); + namedRunnable( + () -> { + if (Thread.currentThread().isInterrupted()) { + return; + } + subscriptionTrackers.stream() + .filter(Objects::nonNull) + .forEach(SubscriptionTracker::detachFromManager); + for (Entry> entry : + streamToStreamSubscriptions.entrySet()) { + if (Thread.currentThread().isInterrupted()) { + break; + } + String stream = entry.getKey(); + LOGGER.debug( + "Re-assigning {} consumer(s) to stream {} after disconnection", + entry.getValue().size(), + stream); + assignConsumersToStream( + entry.getValue(), + stream, + attempt -> + environment.recoveryBackOffDelayPolicy().delay(attempt), + false); + } + }, + "Consumers re-assignment after disconnection from %s", + name)); } }; MetadataListener metadataListener = @@ -533,7 +571,7 @@ private ClientSubscriptionsManager( stream); Set affectedSubscriptions; - synchronized (this.owner) { + synchronized (this) { Set subscriptions = streamToStreamSubscriptions.remove(stream); if (subscriptions != null && !subscriptions.isEmpty()) { List newSubscriptions = @@ -550,7 +588,7 @@ private ClientSubscriptionsManager( newSubscriptions.set(subscription.subscriptionIdInClient & 0xFF, null); subscription.consumer.setSubscriptionClient(null); } - this.subscriptionTrackers = newSubscriptions; + this.setSubscriptionTrackers(newSubscriptions); } affectedSubscriptions = subscriptions; } @@ -561,20 +599,23 @@ private ClientSubscriptionsManager( environment .scheduledExecutorService() .execute( - () -> { - if (Thread.currentThread().isInterrupted()) { - return; - } - LOGGER.debug( - "Trying to move {} subscription(s) (stream {})", - affectedSubscriptions.size(), - stream); - assignConsumersToStream( - affectedSubscriptions, - stream, - metadataUpdateBackOffDelayPolicy(), - isEmpty()); - }); + namedRunnable( + () -> { + if (Thread.currentThread().isInterrupted()) { + return; + } + LOGGER.debug( + "Trying to move {} subscription(s) (stream {})", + affectedSubscriptions.size(), + stream); + assignConsumersToStream( + affectedSubscriptions, + stream, + metadataUpdateBackOffDelayPolicy(), + isEmpty()); + }, + "Consumers re-assignment after metadata update on stream '%s'", + stream)); } }; ConsumerUpdateListener consumerUpdateListener = @@ -631,53 +672,75 @@ private void assignConsumersToStream( }; AsyncRetry.asyncRetry(() -> findBrokersForStream(stream)) - .description("Candidate lookup to consume from " + stream) + .description("Candidate lookup to consume from '%s'", stream) .scheduler(environment.scheduledExecutorService()) .retry(ex -> !(ex instanceof StreamDoesNotExistException)) .delayPolicy(delayPolicy) .build() .thenAccept( - candidates -> { + candidateNodes -> { + List candidates = candidateNodes; if (candidates == null) { + LOGGER.debug("No candidate nodes to consume from '{}'", stream); consumersClosingCallback.run(); } else { for (SubscriptionTracker affectedSubscription : subscriptions) { - try { - if (affectedSubscription.consumer.isOpen()) { - Client.Broker broker = pickBroker(candidates); - LOGGER.debug("Using {} to resume consuming from {}", broker, stream); - String key = keyForClientSubscription(broker); - // FIXME in case the broker is no longer there, we may have to deal with an - // error here - // we could renew the list of candidates for the stream - ManagerPool subscriptionPool = - pools.computeIfAbsent( - key, - s -> - new ManagerPool( - key, - environment - .clientParametersCopy() - .host(broker.getHost()) - .port(broker.getPort()))); - synchronized (affectedSubscription.consumer) { - if (affectedSubscription.consumer.isOpen()) { - OffsetSpecification offsetSpecification; - if (affectedSubscription.hasReceivedSomething) { - offsetSpecification = - OffsetSpecification.offset(affectedSubscription.offset); - } else { - offsetSpecification = affectedSubscription.initialOffsetSpecification; + ManagerPool subscriptionPool = null; + boolean reassigned = false; + while (!reassigned) { + try { + if (affectedSubscription.consumer.isOpen()) { + Client.Broker broker = pickBroker(candidates); + LOGGER.debug("Using {} to resume consuming from {}", broker, stream); + String key = keyForClientSubscription(broker); + subscriptionPool = + pools.computeIfAbsent( + key, + s -> + new ManagerPool( + key, + environment + .clientParametersCopy() + .host(broker.getHost()) + .port(broker.getPort()))); + synchronized (affectedSubscription.consumer) { + if (affectedSubscription.consumer.isOpen()) { + OffsetSpecification offsetSpecification; + if (affectedSubscription.hasReceivedSomething) { + offsetSpecification = + OffsetSpecification.offset(affectedSubscription.offset); + } else { + offsetSpecification = + affectedSubscription.initialOffsetSpecification; + } + subscriptionPool.add( + affectedSubscription, offsetSpecification, false); + reassigned = true; } - subscriptionPool.add(affectedSubscription, offsetSpecification, false); } + } else { + LOGGER.debug("Not re-assigning consumer because it has been closed"); + } + } catch (TimeoutStreamException e) { + LOGGER.debug( + "Consumer {} re-assignment on stream {} timed out, refreshing candidates and retrying", + affectedSubscription.consumer.id(), + affectedSubscription.stream); + if (subscriptionPool != null) { + subscriptionPool.clean(); } - } else { - LOGGER.debug("Not re-assigning consumer because it has been closed"); + // maybe not a good candidate, let's refresh and retry for this one + candidates = + callAndMaybeRetry( + () -> findBrokersForStream(stream), + ex -> !(ex instanceof StreamDoesNotExistException), + "Candidate lookup to consume from '%s'", + stream); + + } catch (Exception e) { + LOGGER.warn( + "Error while re-assigning subscription from stream {}", stream, e); } - } catch (Exception e) { - LOGGER.warn( - "Error while re-assigning subscription from stream {}", stream, e); } } if (closeClient) { @@ -697,112 +760,129 @@ private void assignConsumersToStream( }); } - void add( + synchronized void add( SubscriptionTracker subscriptionTracker, OffsetSpecification offsetSpecification, boolean isInitialSubscription) { - synchronized (this.owner) { - - // FIXME check manager is still open (not closed because of connection failure) - byte subscriptionId = 0; - for (int i = 0; i < MAX_SUBSCRIPTIONS_PER_CLIENT; i++) { - if (subscriptionTrackers.get(i) == null) { - subscriptionId = (byte) i; - break; - } + if (this.isFull()) { + throw new IllegalStateException("Cannot add subscription tracker, the manager is full"); + } + // FIXME check manager is still open (not closed because of connection failure) + byte subscriptionId = 0; + for (int i = 0; i < MAX_SUBSCRIPTIONS_PER_CLIENT; i++) { + if (subscriptionTrackers.get(i) == null) { + subscriptionId = (byte) i; + break; } + } - List previousSubscriptions = this.subscriptionTrackers; - - LOGGER.debug( - "Subscribing to {}, requested offset specification is {}, offset tracking reference is {}, properties are {}", - subscriptionTracker.stream, - offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification, - subscriptionTracker.offsetTrackingReference, - subscriptionTracker.subscriptionProperties); - try { - // updating data structures before subscribing - // (to make sure they are up-to-date in case message would arrive super fast) - subscriptionTracker.assign(subscriptionId, this); - streamToStreamSubscriptions - .computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet()) - .add(subscriptionTracker); - this.subscriptionTrackers = - update(previousSubscriptions, subscriptionId, subscriptionTracker); - - String offsetTrackingReference = subscriptionTracker.offsetTrackingReference; - if (offsetTrackingReference != null) { - QueryOffsetResponse queryOffsetResponse = - client.queryOffset(offsetTrackingReference, subscriptionTracker.stream); - if (queryOffsetResponse.isOk() && queryOffsetResponse.getOffset() != 0) { - if (offsetSpecification != null && isInitialSubscription) { - // subscription call (not recovery), so telling the user their offset specification - // is - // ignored - LOGGER.info( - "Requested offset specification {} not used in favor of stored offset found for reference {}", - offsetSpecification, - offsetTrackingReference); - } - LOGGER.debug( - "Using offset {} to start consuming from {} with consumer {} " - + "(instead of {})", - queryOffsetResponse.getOffset(), + List previousSubscriptions = this.subscriptionTrackers; + + LOGGER.debug( + "Subscribing to {}, requested offset specification is {}, offset tracking reference is {}, properties are {}", + subscriptionTracker.stream, + offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification, + subscriptionTracker.offsetTrackingReference, + subscriptionTracker.subscriptionProperties); + try { + // updating data structures before subscribing + // (to make sure they are up-to-date in case message would arrive super fast) + subscriptionTracker.assign(subscriptionId, this); + streamToStreamSubscriptions + .computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet()) + .add(subscriptionTracker); + this.setSubscriptionTrackers( + update(previousSubscriptions, subscriptionId, subscriptionTracker)); + + String offsetTrackingReference = subscriptionTracker.offsetTrackingReference; + if (offsetTrackingReference != null) { + QueryOffsetResponse queryOffsetResponse = + callAndMaybeRetry( + () -> client.queryOffset(offsetTrackingReference, subscriptionTracker.stream), + RETRY_ON_TIMEOUT, + "Offset query for consumer %s on stream '%s' (reference %s)", + subscriptionTracker.consumer.id(), subscriptionTracker.stream, - offsetTrackingReference, - offsetSpecification); - offsetSpecification = OffsetSpecification.offset(queryOffsetResponse.getOffset() + 1); + offsetTrackingReference); + if (queryOffsetResponse.isOk() && queryOffsetResponse.getOffset() != 0) { + if (offsetSpecification != null && isInitialSubscription) { + // subscription call (not recovery), so telling the user their offset specification + // is + // ignored + LOGGER.info( + "Requested offset specification {} not used in favor of stored offset found for reference {}", + offsetSpecification, + offsetTrackingReference); } + LOGGER.debug( + "Using offset {} to start consuming from {} with consumer {} " + "(instead of {})", + queryOffsetResponse.getOffset(), + subscriptionTracker.stream, + offsetTrackingReference, + offsetSpecification); + offsetSpecification = OffsetSpecification.offset(queryOffsetResponse.getOffset() + 1); } + } - offsetSpecification = - offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification; - - // TODO consider using/emulating ConsumerUpdateListener, to have only one API, not 2 - // even when the consumer is not a SAC. - SubscriptionContext subscriptionContext = - new DefaultSubscriptionContext(offsetSpecification); - subscriptionTracker.subscriptionListener.preSubscribe(subscriptionContext); - LOGGER.info( - "Computed offset specification {}, offset specification used after subscription listener {}", - offsetSpecification, - subscriptionContext.offsetSpecification()); - - // FIXME consider using fewer initial credits - Client.Response subscribeResponse = - client.subscribe( - subscriptionId, - subscriptionTracker.stream, - subscriptionContext.offsetSpecification(), - 10, - subscriptionTracker.subscriptionProperties); - if (!subscribeResponse.isOk()) { - String message = - "Subscription to stream " - + subscriptionTracker.stream - + " failed with code " - + formatConstant(subscribeResponse.getResponseCode()); - LOGGER.debug(message); - throw new StreamException(message); - } - } catch (RuntimeException e) { - subscriptionTracker.assign((byte) -1, null); - this.subscriptionTrackers = previousSubscriptions; - streamToStreamSubscriptions - .computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet()) - .remove(subscriptionTracker); - throw e; + offsetSpecification = + offsetSpecification == null ? DEFAULT_OFFSET_SPECIFICATION : offsetSpecification; + + // TODO consider using/emulating ConsumerUpdateListener, to have only one API, not 2 + // even when the consumer is not a SAC. + SubscriptionContext subscriptionContext = + new DefaultSubscriptionContext(offsetSpecification); + subscriptionTracker.subscriptionListener.preSubscribe(subscriptionContext); + LOGGER.info( + "Computed offset specification {}, offset specification used after subscription listener {}", + offsetSpecification, + subscriptionContext.offsetSpecification()); + + // FIXME consider using fewer initial credits + byte subId = subscriptionId; + Client.Response subscribeResponse = + callAndMaybeRetry( + () -> + client.subscribe( + subId, + subscriptionTracker.stream, + subscriptionContext.offsetSpecification(), + 10, + subscriptionTracker.subscriptionProperties), + RETRY_ON_TIMEOUT, + "Subscribe request for consumer %s on stream '%s'", + subscriptionTracker.consumer.id(), + subscriptionTracker.stream); + if (!subscribeResponse.isOk()) { + String message = + "Subscription to stream " + + subscriptionTracker.stream + + " failed with code " + + formatConstant(subscribeResponse.getResponseCode()); + LOGGER.debug(message); + throw new StreamException(message); } - LOGGER.debug("Subscribed to {}", subscriptionTracker.stream); + } catch (RuntimeException e) { + subscriptionTracker.assign((byte) -1, null); + this.setSubscriptionTrackers(previousSubscriptions); + streamToStreamSubscriptions + .computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet()) + .remove(subscriptionTracker); + throw e; } + LOGGER.debug("Subscribed to {}", subscriptionTracker.stream); } - void remove(SubscriptionTracker subscriptionTracker) { - synchronized (this.owner) { - - // FIXME check manager is still open (not closed because of connection failure) - byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient; - Client.Response unsubscribeResponse = client.unsubscribe(subscriptionIdInClient); + synchronized void remove(SubscriptionTracker subscriptionTracker) { + // FIXME check manager is still open (not closed because of connection failure) + byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient; + try { + Client.Response unsubscribeResponse = + callAndMaybeRetry( + () -> client.unsubscribe(subscriptionIdInClient), + RETRY_ON_TIMEOUT, + "Unsubscribe request for consumer %d on stream '%s'", + subscriptionTracker.consumer.id(), + subscriptionTracker.stream); if (!unsubscribeResponse.isOk()) { LOGGER.warn( "Unexpected response code when unsubscribing from {}: {} (subscription ID {})", @@ -810,20 +890,26 @@ void remove(SubscriptionTracker subscriptionTracker) { formatConstant(unsubscribeResponse.getResponseCode()), subscriptionIdInClient); } - this.subscriptionTrackers = update(this.subscriptionTrackers, subscriptionIdInClient, null); - streamToStreamSubscriptions.compute( - subscriptionTracker.stream, - (stream, subscriptionsForThisStream) -> { - if (subscriptionsForThisStream == null || subscriptionsForThisStream.isEmpty()) { - // should not happen - return null; - } else { - subscriptionsForThisStream.remove(subscriptionTracker); - return subscriptionsForThisStream.isEmpty() ? null : subscriptionsForThisStream; - } - }); - this.owner.maybeDisposeManager(this); + } catch (TimeoutStreamException e) { + LOGGER.debug( + "Reached timeout when trying to unsubscribe consumer {} from stream '{}'", + subscriptionTracker.consumer.id(), + subscriptionTracker.stream); } + + this.setSubscriptionTrackers(update(this.subscriptionTrackers, subscriptionIdInClient, null)); + streamToStreamSubscriptions.compute( + subscriptionTracker.stream, + (stream, subscriptionsForThisStream) -> { + if (subscriptionsForThisStream == null || subscriptionsForThisStream.isEmpty()) { + // should not happen + return null; + } else { + subscriptionsForThisStream.remove(subscriptionTracker); + return subscriptionsForThisStream.isEmpty() ? null : subscriptionsForThisStream; + } + }); + this.owner.maybeDisposeManager(this); } private List update( @@ -836,16 +922,17 @@ private List update( return newSubcriptions; } - synchronized boolean isFull() { - return trackersCount() == maxConsumersByConnection; + private void setSubscriptionTrackers(List trackers) { + this.subscriptionTrackers = trackers; + this.trackerCount = (int) this.subscriptionTrackers.stream().filter(Objects::nonNull).count(); } - synchronized boolean isEmpty() { - return trackersCount() == 0; + boolean isFull() { + return this.trackerCount == maxConsumersByConnection; } - private synchronized int trackersCount() { - return (int) this.subscriptionTrackers.stream().filter(Objects::nonNull).count(); + boolean isEmpty() { + return this.trackerCount == 0; } synchronized void close() { @@ -906,4 +993,34 @@ private static void maybeExchangeCommandVersions(Client client) { LOGGER.info("Error while exchanging command versions: {}", e.getMessage()); } } + + static T callAndMaybeRetry( + Supplier operation, Predicate retryCondition, String format, Object... args) { + String description = format(format, args); + int attempt = 0; + RuntimeException lastException = null; + while (attempt++ < 3) { + try { + return operation.get(); + } catch (RuntimeException e) { + lastException = e; + if (retryCondition.test(e)) { + LOGGER.debug("Operation '{}' timed out, retrying...", description); + } else { + break; + } + } + } + String message = + format("Could not complete task '%s' after %d attempt(s)", description, --attempt); + LOGGER.debug(message); + if (lastException == null) { + throw new StreamException(message); + } else { + throw lastException; + } + } + + private static final Predicate RETRY_ON_TIMEOUT = + e -> e instanceof TimeoutStreamException; } diff --git a/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java index 546a1b5d96..3c3bcc7973 100644 --- a/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java @@ -13,6 +13,7 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.namedRunnable; import static com.rabbitmq.stream.impl.Utils.offsetBefore; import com.rabbitmq.stream.MessageHandler.Context; @@ -83,36 +84,41 @@ Registration registerTrackingConsumer( this.checkFuture = this.executor() .scheduleAtFixedRate( - () -> { - if (flushingOnGoing.compareAndSet(false, true)) { - try { - this.clock.setTime(System.nanoTime()); - Iterator iterator = trackers.iterator(); - while (iterator.hasNext()) { - if (Thread.currentThread().isInterrupted()) { - Thread.currentThread().interrupt(); - break; - } - Tracker t = iterator.next(); - if (t.consumer().isOpen()) { - try { - t.flushIfNecessary(); - } catch (Exception e) { - LOGGER.info("Error while flushing tracker: {}", e.getMessage()); + namedRunnable( + () -> { + if (flushingOnGoing.compareAndSet(false, true)) { + try { + this.clock.setTime(System.nanoTime()); + LOGGER.debug( + "Background offset tracking flushing, {} tracker(s) to check"); + Iterator iterator = trackers.iterator(); + while (iterator.hasNext()) { + if (Thread.currentThread().isInterrupted()) { + Thread.currentThread().interrupt(); + break; + } + Tracker t = iterator.next(); + if (t.consumer().isOpen()) { + try { + t.flushIfNecessary(); + } catch (Exception e) { + LOGGER.info("Error while flushing tracker: {}", e.getMessage()); + } + } else { + iterator.remove(); + } } - } else { - iterator.remove(); + } finally { + flushingOnGoing.set(false); } - } - } finally { - flushingOnGoing.set(false); - } - // TODO consider cancelling the task if there are no more consumers to track - // it should then be restarted on demand. + // TODO consider cancelling the task if there are no more consumers to + // track + // it should then be restarted on demand. - } - }, + } + }, + "Offset tracking background task"), this.checkInterval.toMillis(), this.checkInterval.toMillis(), TimeUnit.MILLISECONDS); diff --git a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java index 6e1f59ca43..f72360721e 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java @@ -14,6 +14,8 @@ package com.rabbitmq.stream.impl; import static com.rabbitmq.stream.impl.Utils.formatConstant; +import static com.rabbitmq.stream.impl.Utils.namedFunction; +import static com.rabbitmq.stream.impl.Utils.namedRunnable; import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; @@ -101,7 +103,8 @@ private Runnable registerAgentTracker(AgentTracker tracker, String stream) { private Client.Broker getBrokerForProducer(String stream) { Map metadata = - this.environment.locatorOperation(c -> c.metadata(stream)); + this.environment.locatorOperation( + namedFunction(c -> c.metadata(stream), "Candidate lookup to publish to '%s'", stream)); if (metadata.size() == 0 || metadata.get(stream) == null) { throw new StreamDoesNotExistException(stream); } @@ -438,7 +441,7 @@ private ClientProducersManager( } if (shutdownContext.isShutdownUnexpected()) { LOGGER.debug( - "Recovering {} producers after unexpected connection termination", + "Recovering {} producer(s) after unexpected connection termination", producers.size()); producers.forEach((publishingId, tracker) -> tracker.unavailable()); trackingConsumerTrackers.forEach(AgentTracker::unavailable); @@ -446,18 +449,21 @@ private ClientProducersManager( environment .scheduledExecutorService() .execute( - () -> { - if (Thread.currentThread().isInterrupted()) { - return; - } - streamToTrackers.forEach( - (stream, trackers) -> { - if (!Thread.currentThread().isInterrupted()) { - assignProducersToNewManagers( - trackers, stream, environment.recoveryBackOffDelayPolicy()); - } - }); - }); + namedRunnable( + () -> { + if (Thread.currentThread().isInterrupted()) { + return; + } + streamToTrackers.forEach( + (stream, trackers) -> { + if (!Thread.currentThread().isInterrupted()) { + assignProducersToNewManagers( + trackers, stream, environment.recoveryBackOffDelayPolicy()); + } + }); + }, + "Producer recovery after disconnection from %s", + owner.name)); } }; MetadataListener metadataListener = @@ -477,18 +483,21 @@ private ClientProducersManager( environment .scheduledExecutorService() .execute( - () -> { - if (Thread.currentThread().isInterrupted()) { - return; - } - // close manager if no more trackers for it - // needs to be done in another thread than the IO thread - this.owner.maybeDisposeManager(this); - assignProducersToNewManagers( - affectedTrackers, - stream, - environment.topologyUpdateBackOffDelayPolicy()); - }); + namedRunnable( + () -> { + if (Thread.currentThread().isInterrupted()) { + return; + } + // close manager if no more trackers for it + // needs to be done in another thread than the IO thread + this.owner.maybeDisposeManager(this); + assignProducersToNewManagers( + affectedTrackers, + stream, + environment.topologyUpdateBackOffDelayPolicy()); + }, + "Producer re-assignment after metadata update on stream '%s'", + stream)); } } }; diff --git a/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java b/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java new file mode 100644 index 0000000000..c33d429621 --- /dev/null +++ b/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java @@ -0,0 +1,235 @@ +// Copyright (c) 2022 VMware, Inc. or its affiliates. All rights reserved. +// +// This software, the RabbitMQ Stream Java client library, is dual-licensed under the +// Mozilla Public License 2.0 ("MPL"), and the Apache License version 2 ("ASL"). +// For the MPL, please see LICENSE-MPL-RabbitMQ. For the ASL, +// please see LICENSE-APACHE2. +// +// This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, +// either express or implied. See the LICENSE file for specific language governing +// rights and limitations of this software. +// +// If you have any questions regarding licensing, please contact us at +// info@rabbitmq.com. +package com.rabbitmq.stream.impl; + +import io.micrometer.core.instrument.util.NamedThreadFactory; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class ScheduledExecutorServiceWrapper implements ScheduledExecutorService { + + private static final Logger LOGGER = + LoggerFactory.getLogger(ScheduledExecutorServiceWrapper.class); + private final ScheduledExecutorService delegate; + private final Set tasks = ConcurrentHashMap.newKeySet(); + private final ScheduledExecutorService scheduler = + Executors.newSingleThreadScheduledExecutor( + new NamedThreadFactory("rabbitmq-stream-scheduled-executor-service-wrapper-")); + + ScheduledExecutorServiceWrapper(ScheduledExecutorService delegate) { + this.delegate = delegate; + Duration period = Duration.ofSeconds(10); + this.scheduler.scheduleAtFixedRate( + () -> { + LOGGER.debug("Background scheduled task check, {} task(s) submitted", this.tasks.size()); + try { + long now = System.nanoTime(); + Duration warningTimeout = Duration.ofSeconds(60); + int cleanedCount = 0; + Iterator iterator = this.tasks.iterator(); + while (iterator.hasNext()) { + Task task = iterator.next(); + if (task.isCompleted()) { + iterator.remove(); + cleanedCount++; + } else { + Duration elapsed = task.elapsedTime(now); + if (elapsed.compareTo(warningTimeout) > 0) { + LOGGER.debug( + "Warning: task {} has been running for {} second(s)", + task.description, + elapsed.getSeconds()); + } + } + } + LOGGER.debug("{} completed task(s) cleaned", cleanedCount); + } catch (Exception e) { + LOGGER.debug("Error during background scheduled task check", e.getMessage()); + } + }, + period.toMillis(), + period.toMillis(), + TimeUnit.MILLISECONDS); + } + + private void task(Runnable command, Future future) { + task(command.toString(), future); + } + + private void task(Callable callable, Future future) { + task(callable.toString(), future); + } + + private void task(String description, Future future) { + this.tasks.add(new Task(description, future)); + } + + @Override + public ScheduledFuture schedule(Runnable command, long delay, TimeUnit unit) { + ScheduledFuture future = this.delegate.schedule(command, delay, unit); + task(command, future); + return future; + } + + @Override + public ScheduledFuture schedule(Callable callable, long delay, TimeUnit unit) { + ScheduledFuture future = this.delegate.schedule(callable, delay, unit); + task(callable, future); + return future; + } + + @Override + public ScheduledFuture scheduleAtFixedRate( + Runnable command, long initialDelay, long period, TimeUnit unit) { + // we don't track these, because they are expected to run for a long time + LOGGER.debug("Registering scheduled at fixed rate task '%s'", command.toString()); + return this.delegate.scheduleAtFixedRate(command, initialDelay, period, unit); + } + + @Override + public ScheduledFuture scheduleWithFixedDelay( + Runnable command, long initialDelay, long delay, TimeUnit unit) { + // we don't track these, because they are expected to run for a long time + LOGGER.debug("Registering scheduled with fixed delay task '%s'", command.toString()); + return this.delegate.scheduleWithFixedDelay(command, initialDelay, delay, unit); + } + + @Override + public void shutdown() { + this.delegate.shutdown(); + } + + @Override + public List shutdownNow() { + return this.delegate.shutdownNow(); + } + + @Override + public boolean isShutdown() { + return this.delegate.isShutdown(); + } + + @Override + public boolean isTerminated() { + return this.delegate.isTerminated(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException { + return this.delegate.awaitTermination(timeout, unit); + } + + @Override + public Future submit(Callable task) { + Future future = this.delegate.submit(task); + task(task, future); + return future; + } + + @Override + public Future submit(Runnable task, T result) { + Future future = this.delegate.submit(task, result); + task(task, future); + return future; + } + + @Override + public Future submit(Runnable task) { + Future future = this.delegate.submit(task); + task(task, future); + return future; + } + + @Override + public List> invokeAll(Collection> tasks) + throws InterruptedException { + List> taskList = new ArrayList<>(tasks); + List> futures = this.delegate.invokeAll(taskList); + for (int i = 0; i < taskList.size(); i++) { + task(taskList.get(i), futures.get(i)); + } + return futures; + } + + @Override + public List> invokeAll( + Collection> tasks, long timeout, TimeUnit unit) + throws InterruptedException { + List> taskList = new ArrayList<>(tasks); + List> futures = this.delegate.invokeAll(taskList, timeout, unit); + for (int i = 0; i < taskList.size(); i++) { + task(taskList.get(i), futures.get(i)); + } + return futures; + } + + @Override + public T invokeAny(Collection> tasks) { + throw new UnsupportedOperationException(); + } + + @Override + public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) { + throw new UnsupportedOperationException(); + } + + @Override + public void execute(Runnable command) { + Callable callable = + () -> { + command.run(); + return null; + }; + Future future = this.delegate.submit(callable); + task(command, future); + } + + static class Task { + + private final Future future; + private final String description; + private final long start; + + Task(String description, Future future) { + this.description = description; + this.future = future; + this.start = System.nanoTime(); + } + + boolean isCompleted() { + return this.future.isDone(); + } + + Duration elapsedTime(long now) { + if (now - start < 0) { + return Duration.ZERO; + } else { + return Duration.ofNanos(now - start); + } + } + } +} diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamConsumer.java b/src/main/java/com/rabbitmq/stream/impl/StreamConsumer.java index 78d785e1f0..867c41bddc 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamConsumer.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamConsumer.java @@ -13,10 +13,11 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.BackOffDelayPolicy.fixedWithInitialDelay; import static com.rabbitmq.stream.impl.AsyncRetry.asyncRetry; import static com.rabbitmq.stream.impl.Utils.offsetBefore; +import static java.time.Duration.ofMillis; -import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; import com.rabbitmq.stream.Consumer; import com.rabbitmq.stream.ConsumerUpdateListener; @@ -30,7 +31,6 @@ import com.rabbitmq.stream.impl.StreamConsumerBuilder.TrackingConfiguration; import com.rabbitmq.stream.impl.StreamEnvironment.TrackingConsumerRegistration; import com.rabbitmq.stream.impl.Utils.CompositeConsumerUpdateListener; -import java.time.Duration; import java.util.Collections; import java.util.Map; import java.util.Objects; @@ -283,7 +283,7 @@ static long getStoredOffsetSafely(StreamConsumer consumer, StreamEnvironment env .scheduler(environment.scheduledExecutorService()) .retry(ex -> ex instanceof IllegalStateException) .delayPolicy( - BackOffDelayPolicy.fixedWithInitialDelay( + fixedWithInitialDelay( environment.recoveryBackOffDelayPolicy().delay(0), environment.recoveryBackOffDelayPolicy().delay(1), environment.recoveryBackOffDelayPolicy().delay(0).multipliedBy(3))) @@ -346,9 +346,7 @@ void waitForOffsetToBeStored(long expectedStoredOffset) { .description( "Last stored offset for consumer %s on stream %s must be %d", this.name, this.stream, expectedStoredOffset) - .delayPolicy( - BackOffDelayPolicy.fixedWithInitialDelay( - Duration.ofMillis(200), Duration.ofMillis(200))) + .delayPolicy(fixedWithInitialDelay(ofMillis(200), ofMillis(200))) .retry(exception -> exception instanceof IllegalStateException) .scheduler(environment.scheduledExecutorService()) .build(); @@ -498,7 +496,7 @@ boolean isOpen() { return !this.closed.get(); } - synchronized void setTrackingClient(Client client) { + void setTrackingClient(Client client) { this.trackingClient = client; } @@ -529,8 +527,8 @@ long storedOffset(Supplier clientSupplier) { } catch (Exception e) { throw new IllegalStateException( String.format( - "Not possible to query offset for consumer %s on stream %s for now", - this.name, this.stream), + "Not possible to query offset for consumer %s on stream %s for now: %s", + this.name, this.stream, e.getMessage()), e); } if (response.isOk()) { @@ -554,8 +552,8 @@ long storedOffset(Supplier clientSupplier) { } else { throw new IllegalStateException( String.format( - "Not possible to query offset for consumer %s on stream %s for now", - this.name, this.stream)); + "Not possible to query offset for consumer %s on stream %s for now, consumer status is %s", + this.name, this.stream, this.status.name())); } } @@ -618,4 +616,8 @@ private void checkNotClosed() { throw new IllegalStateException("This producer instance has been closed"); } } + + long id() { + return this.id; + } } diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java index 8332fab8a3..4eefcf98d1 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java @@ -14,6 +14,7 @@ package com.rabbitmq.stream.impl; import static com.rabbitmq.stream.impl.Utils.formatConstant; +import static com.rabbitmq.stream.impl.Utils.namedRunnable; import static com.rabbitmq.stream.impl.Utils.propagateException; import static java.util.concurrent.TimeUnit.SECONDS; @@ -34,6 +35,7 @@ import com.rabbitmq.stream.SubscriptionListener; import com.rabbitmq.stream.compression.CompressionCodecFactory; import com.rabbitmq.stream.impl.Client.ClientParameters; +import com.rabbitmq.stream.impl.Client.ShutdownListener; import com.rabbitmq.stream.impl.Client.StreamStatsResponse; import com.rabbitmq.stream.impl.OffsetTrackingCoordinator.Registration; import com.rabbitmq.stream.impl.StreamConsumerBuilder.TrackingConfiguration; @@ -47,10 +49,11 @@ import java.io.IOException; import java.net.URI; import java.net.URLDecoder; +import java.time.Duration; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Random; +import java.util.Optional; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; @@ -74,8 +77,6 @@ class StreamEnvironment implements Environment { private static final Logger LOGGER = LoggerFactory.getLogger(StreamEnvironment.class); - private final Random random = new Random(); - private final EventLoopGroup eventLoopGroup; private final ScheduledExecutorService scheduledExecutorService; private final boolean privateScheduleExecutorService; @@ -94,9 +95,9 @@ class StreamEnvironment implements Environment { private final Clock clock = new Clock(); private final ScheduledFuture clockRefreshFuture; private final ByteBufAllocator byteBufAllocator; - private final AtomicBoolean locatorInitialized = new AtomicBoolean(false); + private final AtomicBoolean locatorsInitialized = new AtomicBoolean(false); private final Runnable locatorInitializationSequence; - private volatile Client locator; + private final List locators = new CopyOnWriteArrayList<>(); StreamEnvironment( ScheduledExecutorService scheduledExecutorService, @@ -188,6 +189,8 @@ class StreamEnvironment implements Environment { .collect(Collectors.toList()); } + this.addresses.forEach(address -> this.locators.add(new Locator(address))); + if (clientParametersPrototype.eventLoopGroup == null) { this.eventLoopGroup = new NioEventLoopGroup(); this.clientParametersPrototype = @@ -199,14 +202,16 @@ class StreamEnvironment implements Environment { .duplicate() .eventLoopGroup(clientParametersPrototype.eventLoopGroup); } + ScheduledExecutorService executorService; if (scheduledExecutorService == null) { - this.scheduledExecutorService = + executorService = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors()); this.privateScheduleExecutorService = true; } else { - this.scheduledExecutorService = scheduledExecutorService; + executorService = scheduledExecutorService; this.privateScheduleExecutorService = false; } + this.scheduledExecutorService = new ScheduledExecutorServiceWrapper(executorService); this.producersCoordinator = new ProducersCoordinator( @@ -222,50 +227,13 @@ class StreamEnvironment implements Environment { connectionNamingStrategy, Utils.coordinatorClientFactory(this)); this.offsetTrackingCoordinator = new OffsetTrackingCoordinator(this); - - AtomicReference shutdownListenerReference = new AtomicReference<>(); - Client.ShutdownListener shutdownListener = - shutdownContext -> { - if (shutdownContext.isShutdownUnexpected()) { - this.locator = null; - LOGGER.debug("Unexpected locator disconnection, trying to reconnect"); - Client.ClientParameters newLocatorParameters = - this.clientParametersPrototype - .duplicate() - .shutdownListener(shutdownListenerReference.get()); - AsyncRetry.asyncRetry( - () -> { - Address address = - addresses.size() == 1 - ? addresses.get(0) - : addresses.get(random.nextInt(addresses.size())); - address = addressResolver.resolve(address); - LOGGER.debug("Trying to reconnect locator on {}", address); - String connectionName = - connectionNamingStrategy.apply(ClientConnectionType.LOCATOR); - Client newLocator = - clientFactory.apply( - newLocatorParameters - .host(address.host()) - .port(address.port()) - .clientProperty("connection_name", connectionName)); - LOGGER.debug("Created locator connection '{}'", connectionName); - LOGGER.debug("Locator connected on {}", address); - return newLocator; - }) - .description("Locator recovery") - .scheduler(this.scheduledExecutorService) - .delayPolicy(recoveryBackOffDelayPolicy) - .build() - .thenAccept(newLocator -> this.locator = newLocator); - } - }; - shutdownListenerReference.set(shutdownListener); ClientParameters clientParametersForInit = clientParametersPrototype.duplicate(); Runnable locatorInitSequence = () -> { RuntimeException lastException = null; - for (Address address : addresses) { + for (int i = 0; i < addresses.size(); i++) { + Address address = addresses.get(i); + Locator locator = locator(i); address = addressResolver.resolve(address); String connectionName = connectionNamingStrategy.apply(ClientConnectionType.LOCATOR); Client.ClientParameters locatorParameters = @@ -274,26 +242,34 @@ class StreamEnvironment implements Environment { .host(address.host()) .port(address.port()) .clientProperty("connection_name", connectionName) - .shutdownListener(shutdownListenerReference.get()); + .shutdownListener( + shutdownListener(locator, connectionNamingStrategy, clientFactory)); try { - this.locator = clientFactory.apply(locatorParameters); + Client client = clientFactory.apply(locatorParameters); + locator.client(client); LOGGER.debug("Created locator connection '{}'", connectionName); LOGGER.debug("Locator connected to {}", address); - break; } catch (RuntimeException e) { LOGGER.debug("Error while try to connect to {}: {}", address, e.getMessage()); lastException = e; } } - if (this.locator == null) { + if (this.locators.stream().allMatch(l -> l.isNotSet())) { throw lastException; + } else { + this.locators.forEach( + l -> { + if (l.isNotSet()) { + scheduleLocatorConnection(l, connectionNamingStrategy, clientFactory); + } + }); } }; if (lazyInit) { this.locatorInitializationSequence = locatorInitSequence; } else { locatorInitSequence.run(); - locatorInitialized.set(true); + locatorsInitialized.set(true); this.locatorInitializationSequence = () -> {}; } this.codec = @@ -302,7 +278,112 @@ class StreamEnvironment implements Environment { : clientParametersPrototype.codec(); this.clockRefreshFuture = this.scheduledExecutorService.scheduleAtFixedRate( - () -> this.clock.refresh(), 1, 1, SECONDS); + Utils.namedRunnable(() -> this.clock.refresh(), "Background clock refresh"), + 1, + 1, + SECONDS); + } + + private ShutdownListener shutdownListener( + Locator locator, + Function connectionNamingStrategy, + Function clientFactory) { + AtomicReference shutdownListenerReference = new AtomicReference<>(); + Client.ShutdownListener shutdownListener = + shutdownContext -> { + if (shutdownContext.isShutdownUnexpected()) { + locator.client(null); + LOGGER.debug("Unexpected locator disconnection, trying to reconnect"); + try { + Client.ClientParameters newLocatorParameters = + this.clientParametersPrototype + .duplicate() + .shutdownListener(shutdownListenerReference.get()); + AsyncRetry.asyncRetry( + () -> { + LOGGER.debug("Locator reconnection..."); + Address resolvedAddress = addressResolver.resolve(locator.address()); + String connectionName = + connectionNamingStrategy.apply(ClientConnectionType.LOCATOR); + LOGGER.debug( + "Trying to reconnect locator on {}, with client connection name '{}'", + resolvedAddress, + connectionName); + Client newLocator = + clientFactory.apply( + newLocatorParameters + .host(resolvedAddress.host()) + .port(resolvedAddress.port()) + .clientProperty("connection_name", connectionName)); + LOGGER.debug("Created locator connection '{}'", connectionName); + LOGGER.debug("Locator connected on {}", resolvedAddress); + return newLocator; + }) + .description("Locator recovery") + .scheduler(this.scheduledExecutorService) + .delayPolicy(recoveryBackOffDelayPolicy) + .build() + .thenAccept(newClient -> locator.client(newClient)) + .exceptionally( + ex -> { + LOGGER.debug("Locator recovery failed", ex); + return null; + }); + } catch (Exception e) { + LOGGER.debug("Error while scheduling locator reconnection", e); + } + } + }; + shutdownListenerReference.set(shutdownListener); + return shutdownListener; + } + + private void scheduleLocatorConnection( + Locator locator, + Function connectionNamingStrategy, + Function clientFactory) { + ShutdownListener shutdownListener = + shutdownListener(locator, connectionNamingStrategy, clientFactory); + try { + Client.ClientParameters newLocatorParameters = + this.clientParametersPrototype.duplicate().shutdownListener(shutdownListener); + AsyncRetry.asyncRetry( + () -> { + LOGGER.debug("Locator reconnection..."); + Address resolvedAddress = addressResolver.resolve(locator.address()); + String connectionName = + connectionNamingStrategy.apply(ClientConnectionType.LOCATOR); + LOGGER.debug( + "Trying to reconnect locator on {}, with client connection name '{}'", + resolvedAddress, + connectionName); + Client newLocator = + clientFactory.apply( + newLocatorParameters + .host(resolvedAddress.host()) + .port(resolvedAddress.port()) + .clientProperty("connection_name", connectionName)); + LOGGER.debug("Created locator connection '{}'", connectionName); + LOGGER.debug("Locator connected on {}", resolvedAddress); + return newLocator; + }) + .description("Locator recovery") + .scheduler(this.scheduledExecutorService) + .delayPolicy(recoveryBackOffDelayPolicy) + .build() + .thenAccept(newClient -> locator.client(newClient)) + .exceptionally( + ex -> { + LOGGER.debug("Locator recovery failed", ex); + return null; + }); + } catch (Exception e) { + LOGGER.debug("Error while scheduling locator reconnection", e); + } + } + + private Locator locator(int i) { + return this.locators.get(i); } private static String uriDecode(String s) { @@ -361,11 +442,11 @@ public ByteBufAllocator byteBufAllocator() { } void maybeInitializeLocator() { - if (this.locatorInitialized.compareAndSet(false, true)) { + if (this.locatorsInitialized.compareAndSet(false, true)) { try { this.locatorInitializationSequence.run(); } catch (RuntimeException e) { - this.locatorInitialized.set(false); + this.locatorsInitialized.set(false); throw e; } } @@ -398,14 +479,17 @@ public StreamStats queryStreamStats(String stream) { checkNotClosed(); StreamStatsResponse response = locatorOperation( - client -> { - if (Utils.is3_11_OrMore(client.brokerVersion())) { - return client.streamStats(stream); - } else { - throw new UnsupportedOperationException( - "QueryStringInfo is available only for RabbitMQ 3.11 or more."); - } - }); + Utils.namedFunction( + client -> { + if (Utils.is3_11_OrMore(client.brokerVersion())) { + return client.streamStats(stream); + } else { + throw new UnsupportedOperationException( + "QueryStringInfo is available only for RabbitMQ 3.11 or more."); + } + }, + "Query stream stats on stream '%s'", + stream)); if (response.isOk()) { Map info = response.getInfo(); BiFunction offsetSupplierLogic = @@ -508,14 +592,17 @@ public void close() { this.consumersCoordinator.close(); this.offsetTrackingCoordinator.close(); - try { - if (this.locator != null && this.locator.isOpen()) { - this.locator.close(); - this.locator = null; + for (Locator locator : this.locators) { + try { + if (locator.isSet()) { + locator.client().close(); + locator.client(null); + } + } catch (Exception e) { + LOGGER.warn("Error while closing locator client", e); } - } catch (Exception e) { - LOGGER.warn("Error while closing locator client", e); } + this.clockRefreshFuture.cancel(false); if (privateScheduleExecutorService) { this.scheduledExecutorService.shutdownNow(); @@ -541,6 +628,10 @@ ScheduledExecutorService scheduledExecutorService() { return this.scheduledExecutorService; } + void execute(Runnable task, String description, Object... args) { + this.scheduledExecutorService().execute(namedRunnable(task, description, args)); + } + BackOffDelayPolicy recoveryBackOffDelayPolicy() { return this.recoveryBackOffDelayPolicy; } @@ -580,10 +671,11 @@ Runnable registerProducer(StreamProducer producer, String reference, String stre } Client locator() { - if (this.locator == null) { - throw new LocatorNotAvailableException(); - } - return this.locator; + return this.locators.stream() + .filter(Locator::isSet) + .findAny() + .orElseThrow(() -> new LocatorNotAvailableException()) + .client(); } T locatorOperation(Function operation) { @@ -599,9 +691,15 @@ static T locatorOperation( boolean executed = false; Exception lastException = null; T result = null; + LOGGER.debug("Starting locator operation '{}'", operation); + long start = System.nanoTime(); while (attempt < maxAttempt) { try { result = operation.apply(clientSupplier.get()); + LOGGER.debug( + "Locator operation '{}' succeeded in {}", + operation, + Duration.ofNanos(System.nanoTime() - start)); executed = true; break; } catch (LocatorNotAvailableException e) { @@ -613,6 +711,10 @@ static T locatorOperation( Thread.currentThread().interrupt(); break; } + } catch (Exception e) { + LOGGER.debug("Exception during locator operation '{}': {}", operation, e.getMessage()); + lastException = e; + break; } } if (!executed) { @@ -687,10 +789,15 @@ TrackingConsumerRegistration registerTrackingConsumer( @Override public String toString() { - Client locator = this.locator; - return "{ \"locator\" : " - + (locator == null ? "null" : ("\"" + locator.connectionName() + "\"")) - + ", " + return "{ \"locators\" : [" + + this.locators.stream() + .map( + l -> { + Client c = l.nullableClient(); + return c == null ? "null" : ("\"" + c.connectionName() + "\""); + }) + .collect(Collectors.joining(",")) + + "], " + "\"producers\" : " + this.producersCoordinator + ", \"consumers\" : " @@ -747,4 +854,40 @@ private void checkNotClosed() { throw new IllegalStateException("This environment instance has been closed"); } } + + private static class Locator { + + private final Address address; + private volatile Optional client; + + private Locator(Address address) { + this.address = address; + this.client = Optional.empty(); + } + + Locator client(Client client) { + this.client = Optional.ofNullable(client); + return this; + } + + private boolean isNotSet() { + return !this.isSet(); + } + + private boolean isSet() { + return this.client.isPresent(); + } + + private Client client() { + return this.client.orElseThrow(() -> new LocatorNotAvailableException()); + } + + private Client nullableClient() { + return this.client.orElse(null); + } + + private Address address() { + return this.address; + } + } } diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java b/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java index 2993b9cf78..00d363ea3e 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java @@ -17,6 +17,7 @@ import static com.rabbitmq.stream.Constants.CODE_PRODUCER_CLOSED; import static com.rabbitmq.stream.Constants.CODE_PRODUCER_NOT_AVAILABLE; import static com.rabbitmq.stream.impl.Utils.formatConstant; +import static com.rabbitmq.stream.impl.Utils.namedRunnable; import com.rabbitmq.stream.Codec; import com.rabbitmq.stream.ConfirmationHandler; @@ -166,13 +167,26 @@ public int fragmentLength(Object entity) { environment .scheduledExecutorService() .schedule( - taskReference.get(), batchPublishingDelay.toMillis(), TimeUnit.MILLISECONDS); + namedRunnable( + taskReference.get(), + "Background batch publishing task for publisher %d on stream '%s'", + this.id, + this.stream), + batchPublishingDelay.toMillis(), + TimeUnit.MILLISECONDS); } }; taskReference.set(task); environment .scheduledExecutorService() - .schedule(task, batchPublishingDelay.toMillis(), TimeUnit.MILLISECONDS); + .schedule( + namedRunnable( + task, + "Background batch publishing task for publisher %d on stream '%s'", + this.id, + this.stream), + batchPublishingDelay.toMillis(), + TimeUnit.MILLISECONDS); } this.batchSize = batchSize; this.codec = environment.codec(); @@ -192,14 +206,27 @@ public int fragmentLength(Object entity) { this.environment .scheduledExecutorService() .schedule( - taskReference.get(), confirmTimeout.toMillis(), TimeUnit.MILLISECONDS); + namedRunnable( + taskReference.get(), + "Background confirm timeout task for producer %d on stream %s", + this.id, + this.stream), + confirmTimeout.toMillis(), + TimeUnit.MILLISECONDS); } }; taskReference.set(wrapperTask); this.confirmTimeoutFuture = this.environment .scheduledExecutorService() - .schedule(taskReference.get(), confirmTimeout.toMillis(), TimeUnit.MILLISECONDS); + .schedule( + namedRunnable( + taskReference.get(), + "Background confirm timeout task for producer %d on stream %s", + this.id, + this.stream), + confirmTimeout.toMillis(), + TimeUnit.MILLISECONDS); } this.status = Status.RUNNING; } @@ -209,7 +236,6 @@ private Runnable confirmTimeoutTask(Duration confirmTimeout) { long limit = this.environment.clock().time() - confirmTimeout.toNanos(); SortedMap unconfirmedSnapshot = new TreeMap<>(this.unconfirmedMessages); - LOGGER.debug("Starting confirm timeout check task"); int count = 0; for (Entry unconfirmedEntry : unconfirmedSnapshot.entrySet()) { if (unconfirmedEntry.getValue().time() < limit) { @@ -223,7 +249,15 @@ private Runnable confirmTimeoutTask(Duration confirmTimeout) { break; } } - LOGGER.debug("Failed {} message(s) which had timed out (limit {})", count, limit); + if (count > 0) { + LOGGER.debug( + "{} outbound message(s) had reached the confirm timeout (limit {}) " + + "for producer {} on stream '{}', application notified with callback", + count, + limit, + this.id, + this.stream); + } }; } diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamStreamCreator.java b/src/main/java/com/rabbitmq/stream/impl/StreamStreamCreator.java index 5aa0f016bc..0b7f0fd559 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamStreamCreator.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamStreamCreator.java @@ -14,6 +14,7 @@ package com.rabbitmq.stream.impl; import static com.rabbitmq.stream.impl.Utils.formatConstant; +import static com.rabbitmq.stream.impl.Utils.namedFunction; import com.rabbitmq.stream.ByteCapacity; import com.rabbitmq.stream.Constants; @@ -73,7 +74,11 @@ public void create() { } this.environment.maybeInitializeLocator(); Client.Response response = - environment.locatorOperation(c -> c.create(stream, streamParametersBuilder.build())); + environment.locatorOperation( + namedFunction( + c -> c.create(stream, streamParametersBuilder.build()), + "Creation of stream '%s'", + this.stream)); if (!response.isOk() && response.getResponseCode() != Constants.RESPONSE_CODE_STREAM_ALREADY_EXISTS) { throw new StreamException( diff --git a/src/main/java/com/rabbitmq/stream/impl/SuperStreamConsumer.java b/src/main/java/com/rabbitmq/stream/impl/SuperStreamConsumer.java index 117b13b566..c884da7e3c 100644 --- a/src/main/java/com/rabbitmq/stream/impl/SuperStreamConsumer.java +++ b/src/main/java/com/rabbitmq/stream/impl/SuperStreamConsumer.java @@ -13,6 +13,8 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.namedFunction; + import com.rabbitmq.stream.Consumer; import com.rabbitmq.stream.Message; import com.rabbitmq.stream.MessageHandler; @@ -38,7 +40,12 @@ class SuperStreamConsumer implements Consumer { StreamEnvironment environment, TrackingConfiguration trackingConfiguration) { this.superStream = superStream; - List partitions = environment.locatorOperation(c -> c.partitions(superStream)); + List partitions = + environment.locatorOperation( + namedFunction( + c -> c.partitions(superStream), + "Partition lookup for super stream '%s'", + superStream)); // for manual offset tracking strategy only ConsumerState[] states = new ConsumerState[partitions.size()]; diff --git a/src/main/java/com/rabbitmq/stream/impl/SuperStreamProducer.java b/src/main/java/com/rabbitmq/stream/impl/SuperStreamProducer.java index ff232fb5b3..8a000d94b7 100644 --- a/src/main/java/com/rabbitmq/stream/impl/SuperStreamProducer.java +++ b/src/main/java/com/rabbitmq/stream/impl/SuperStreamProducer.java @@ -13,6 +13,8 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.namedFunction; + import com.rabbitmq.stream.Codec; import com.rabbitmq.stream.ConfirmationHandler; import com.rabbitmq.stream.ConfirmationStatus; @@ -70,12 +72,23 @@ public MessageBuilder messageBuilder() { @Override public long getLastPublishingId() { if (this.name != null && !this.name.isEmpty()) { - List streams = this.environment.locatorOperation(c -> c.partitions(superStream)); + List streams = + this.environment.locatorOperation( + namedFunction( + c -> c.partitions(superStream), + "Partition lookup for super stream '%s'", + this.superStream)); long publishingId = 0; boolean first = true; for (String partition : streams) { long pubId = - this.environment.locatorOperation(c -> c.queryPublisherSequence(this.name, partition)); + this.environment.locatorOperation( + namedFunction( + c -> c.queryPublisherSequence(this.name, partition), + "Publisher sequence query for on partition '%s' of super stream '%s', publisher name '%s'", + partition, + this.superStream, + this.name)); if (first) { publishingId = pubId; first = false; @@ -148,7 +161,12 @@ private static class DefaultSuperStreamMetadata implements Metadata { private DefaultSuperStreamMetadata(String superStream, StreamEnvironment environment) { this.superStream = superStream; this.environment = environment; - List ps = environment.locatorOperation(c -> c.partitions(superStream)); + List ps = + environment.locatorOperation( + namedFunction( + c -> c.partitions(superStream), + "Partition lookup for super stream '%s'", + superStream)); this.partitions = new CopyOnWriteArrayList<>(ps); } @@ -161,7 +179,13 @@ public List partitions() { public List route(String routingKey) { return routes.computeIfAbsent( routingKey, - routingKey1 -> environment.locatorOperation(c -> c.route(routingKey1, superStream))); + routingKey1 -> + environment.locatorOperation( + namedFunction( + c -> c.route(routingKey1, superStream), + "Route lookup on super stream '%s' for key '%s'", + this.superStream, + routingKey1))); } } } diff --git a/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java b/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java new file mode 100644 index 0000000000..b1b8a725fb --- /dev/null +++ b/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java @@ -0,0 +1,23 @@ +// Copyright (c) 2022 VMware, Inc. or its affiliates. All rights reserved. +// +// This software, the RabbitMQ Stream Java client library, is dual-licensed under the +// Mozilla Public License 2.0 ("MPL"), and the Apache License version 2 ("ASL"). +// For the MPL, please see LICENSE-MPL-RabbitMQ. For the ASL, +// please see LICENSE-APACHE2. +// +// This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, +// either express or implied. See the LICENSE file for specific language governing +// rights and limitations of this software. +// +// If you have any questions regarding licensing, please contact us at +// info@rabbitmq.com. +package com.rabbitmq.stream.impl; + +import com.rabbitmq.stream.StreamException; + +class TimeoutStreamException extends StreamException { + + public TimeoutStreamException(String message) { + super(message); + } +} diff --git a/src/main/java/com/rabbitmq/stream/impl/Utils.java b/src/main/java/com/rabbitmq/stream/impl/Utils.java index bff0dcbc86..adf2fe8f69 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Utils.java +++ b/src/main/java/com/rabbitmq/stream/impl/Utils.java @@ -151,6 +151,14 @@ static ClientFactory connectToAdvertisedNodeClientFactory( retryInterval); } + static Runnable namedRunnable(Runnable task, String format, Object... args) { + return new NamedRunnable(String.format(format, args), task); + } + + static Function namedFunction(Function task, String format, Object... args) { + return new NamedFunction<>(String.format(format, args), task); + } + interface ClientFactory { Client client(ClientFactoryContext context); @@ -341,4 +349,46 @@ static StreamException propagateException(short responseCode, String stream) { return new StreamException(message, responseCode); } } + + private static class NamedRunnable implements Runnable { + + private final String name; + private final Runnable delegate; + + private NamedRunnable(String name, Runnable delegate) { + this.name = name; + this.delegate = delegate; + } + + @Override + public void run() { + this.delegate.run(); + } + + @Override + public String toString() { + return this.name; + } + } + + private static class NamedFunction implements Function { + + private final String name; + private final Function delegate; + + private NamedFunction(String name, Function delegate) { + this.name = name; + this.delegate = delegate; + } + + @Override + public R apply(T t) { + return this.delegate.apply(t); + } + + @Override + public String toString() { + return this.name; + } + } } diff --git a/src/test/java/com/rabbitmq/stream/Host.java b/src/test/java/com/rabbitmq/stream/Host.java index a83e05ff4a..7fe4c1b97d 100644 --- a/src/test/java/com/rabbitmq/stream/Host.java +++ b/src/test/java/com/rabbitmq/stream/Host.java @@ -98,24 +98,32 @@ public static Process rabbitmqctl(String command) throws IOException { return executeCommand(rabbitmqctlCommand() + " " + command); } - public static Process killConnection(String connectionName) throws IOException { - List cs = listConnections(); - if (cs.stream().filter(c -> connectionName.equals(c.clientProvidedName())).count() != 1) { - throw new IllegalArgumentException( - String.format( - "Could not find 1 connection '%s' in stream connections: %s", - connectionName, - cs.stream() - .map(ConnectionInfo::clientProvidedName) - .collect(Collectors.joining(", ")))); + public static Process killConnection(String connectionName) { + try { + List cs = listConnections(); + if (cs.stream().filter(c -> connectionName.equals(c.clientProvidedName())).count() != 1) { + throw new IllegalArgumentException( + String.format( + "Could not find 1 connection '%s' in stream connections: %s", + connectionName, + cs.stream() + .map(ConnectionInfo::clientProvidedName) + .collect(Collectors.joining(", ")))); + } + return rabbitmqctl("eval 'rabbit_stream:kill_connection(\"" + connectionName + "\").'"); + } catch (IOException e) { + throw new RuntimeException(e); } - return rabbitmqctl("eval 'rabbit_stream:kill_connection(\"" + connectionName + "\").'"); } - public static List listConnections() throws IOException { - Process process = - rabbitmqctl("list_stream_connections --formatter json conn_name,client_properties"); - return toConnectionInfoList(capture(process.getInputStream())); + public static List listConnections() { + try { + Process process = + rabbitmqctl("list_stream_connections --formatter json conn_name,client_properties"); + return toConnectionInfoList(capture(process.getInputStream())); + } catch (IOException e) { + throw new RuntimeException(e); + } } static List toConnectionInfoList(String json) { diff --git a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java index 215b525a44..27bf911671 100644 --- a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java @@ -18,6 +18,7 @@ import static com.rabbitmq.stream.impl.TestUtils.latchAssert; import static com.rabbitmq.stream.impl.TestUtils.metadata; import static com.rabbitmq.stream.impl.TestUtils.namedConsumer; +import static com.rabbitmq.stream.impl.TestUtils.waitAtMost; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; @@ -40,6 +41,7 @@ import com.rabbitmq.stream.codec.WrapperMessageBuilder; import com.rabbitmq.stream.impl.Client.MessageListener; import com.rabbitmq.stream.impl.Client.QueryOffsetResponse; +import com.rabbitmq.stream.impl.Client.Response; import com.rabbitmq.stream.impl.MonitoringTestUtils.ConsumersPoolInfo; import com.rabbitmq.stream.impl.Utils.ClientFactory; import java.time.Duration; @@ -143,6 +145,7 @@ public Client.ClientParameters shutdownListener( when(environment.locatorOperation(any())).thenCallRealMethod(); when(environment.clientParametersCopy()).thenReturn(clientParameters); when(environment.addressResolver()).thenReturn(address -> address); + when(client.brokerVersion()).thenReturn("3.11.0"); coordinator = new ConsumersCoordinator( @@ -493,7 +496,7 @@ void subscribeShouldSubscribeToStreamAndDispatchMessageWithManySubscriptions() { @Test void shouldRedistributeConsumerIfConnectionIsLost() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(100); when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); @@ -506,13 +509,18 @@ void shouldRedistributeConsumerIfConnectionIsLost() throws Exception { .thenReturn(metadata(null, replica())); when(clientFactory.client(any())).thenReturn(client); + AtomicInteger subscriptionCount = new AtomicInteger(0); when(client.subscribe( subscriptionIdCaptor.capture(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap())) - .thenReturn(new Client.Response(Constants.RESPONSE_CODE_OK)); + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return new Client.Response(Constants.RESPONSE_CODE_OK); + }); StreamConsumer consumerClosedAfterConnectionLost = mock(StreamConsumer.class); when(consumerClosedAfterConnectionLost.isOpen()).thenReturn(false); @@ -553,12 +561,12 @@ void shouldRedistributeConsumerIfConnectionIsLost() throws Exception { shutdownListener.handle( new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); - Thread.sleep(retryDelay.toMillis() * 5); + // the second consumer does not re-subscribe because it returns it is not open + waitAtMost(() -> subscriptionCount.get() == 2 + 1); // the consumer connection should be reset after the connection disruption verify(consumer, times(1)).setSubscriptionClient(isNull()); - // the second consumer does not re-subscribe because it returns it is not open verify(client, times(2 + 1)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); @@ -582,7 +590,7 @@ void shouldRedistributeConsumerIfConnectionIsLost() throws Exception { void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { BackOffDelayPolicy delayPolicy = fixedWithInitialDelay(ms(100), ms(100)); when(environment.topologyUpdateBackOffDelayPolicy()).thenReturn(delayPolicy); - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(consumer.isOpen()).thenReturn(true); when(locator.metadata("stream")).thenReturn(metadata(null, replicas())); @@ -592,13 +600,18 @@ void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { StreamConsumer consumerClosedAfterMetadataUpdate = mock(StreamConsumer.class); when(consumerClosedAfterMetadataUpdate.isOpen()).thenReturn(false); + AtomicInteger subscriptionCount = new AtomicInteger(0); when(client.subscribe( subscriptionIdCaptor.capture(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap())) - .thenReturn(new Client.Response(Constants.RESPONSE_CODE_OK)); + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return responseOk(); + }); AtomicInteger messageHandlerCalls = new AtomicInteger(); Runnable closingRunnable = @@ -644,9 +657,8 @@ void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { // the consumer connection should be reset after the metadata update verify(consumer, times(1)).setSubscriptionClient(isNull()); - Thread.sleep(delayPolicy.delay(0).toMillis() * 5); - // the second consumer does not re-subscribe because it returns it is not open + waitAtMost(() -> subscriptionCount.get() == 2 + 1); verify(client, times(2 + 1)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); @@ -677,7 +689,7 @@ void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { void shouldRetryRedistributionIfMetadataIsNotUpdatedImmediately() throws Exception { BackOffDelayPolicy delayPolicy = fixedWithInitialDelay(ms(100), ms(100)); when(environment.topologyUpdateBackOffDelayPolicy()).thenReturn(delayPolicy); - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(consumer.isOpen()).thenReturn(true); when(locator.metadata("stream")) @@ -744,7 +756,7 @@ void shouldRetryRedistributionIfMetadataIsNotUpdatedImmediately() throws Excepti void metadataUpdate_shouldCloseConsumerIfStreamIsDeleted() throws Exception { BackOffDelayPolicy delayPolicy = fixedWithInitialDelay(ms(50), ms(50)); when(environment.topologyUpdateBackOffDelayPolicy()).thenReturn(delayPolicy); - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(consumer.isOpen()).thenReturn(true); when(locator.metadata("stream")) @@ -796,7 +808,7 @@ void metadataUpdate_shouldCloseConsumerIfRetryTimeoutIsReached() throws Exceptio Duration retryTimeout = Duration.ofMillis(200); BackOffDelayPolicy delayPolicy = fixedWithInitialDelay(ms(50), ms(50), ms(200)); when(environment.topologyUpdateBackOffDelayPolicy()).thenReturn(delayPolicy); - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(consumer.isOpen()).thenReturn(true); when(locator.metadata("stream")) @@ -904,7 +916,7 @@ void shouldUseNewClientsForMoreThanMaxSubscriptionsAndCloseClientAfterUnsubscrip @Test void shouldRemoveClientSubscriptionManagerFromPoolAfterConnectionDies() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(100); when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); @@ -969,7 +981,7 @@ void shouldRemoveClientSubscriptionManagerFromPoolAfterConnectionDies() throws E void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() throws Exception { BackOffDelayPolicy delayPolicy = fixedWithInitialDelay(ms(50), ms(50)); when(environment.topologyUpdateBackOffDelayPolicy()).thenReturn(delayPolicy); - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(consumer.isOpen()).thenReturn(true); when(locator.metadata("stream")).thenReturn(metadata(null, replicas().subList(0, 1))); @@ -1051,7 +1063,7 @@ void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() t @MethodSource("disruptionArguments") void shouldRestartWhereItLeftOffAfterDisruption(Consumer configurator) throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(100); when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); @@ -1122,7 +1134,7 @@ void shouldRestartWhereItLeftOffAfterDisruption(Consumer configurator) throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(100); when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); @@ -1186,7 +1198,7 @@ void shouldReUseInitialOffsetSpecificationAfterDisruptionIfNoMessagesReceived( @SuppressWarnings("unchecked") void shouldUseStoredOffsetOnRecovery(Consumer configurator) throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(100); when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); @@ -1268,7 +1280,89 @@ void shouldUseStoredOffsetOnRecovery(Consumer configur } @Test - void subscribeUnsubscribeInDifferentThreadsShouldNotDeadlock() throws Exception { + @SuppressWarnings("unchecked") + void shouldRetryAssignmentOnManagerClientTimeout() throws Exception { + scheduledExecutorService = createScheduledExecutorService(2); + when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); + Duration retryDelay = Duration.ofMillis(100); + when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(environment.topologyUpdateBackOffDelayPolicy()) + .thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(consumer.isOpen()).thenReturn(true); + when(locator.metadata("stream-1")).thenReturn(metadata("stream-1", null, replica())); + when(locator.metadata("stream-2")).thenReturn(metadata("stream-2", null, replica())); + + when(clientFactory.client(any())).thenReturn(client); + + String consumerName = "consumer-name"; + when(client.queryOffset(consumerName, "stream-1")) + .thenReturn(new QueryOffsetResponse(Constants.RESPONSE_CODE_OK, 0L)); + when(client.queryOffset(consumerName, "stream-2")) + .thenReturn(new QueryOffsetResponse(Constants.RESPONSE_CODE_OK, 0L)) // first subscription + .thenThrow(new TimeoutStreamException("")) // on recovery + .thenThrow(new TimeoutStreamException("")) // on recovery, retry + .thenThrow(new TimeoutStreamException("")) // on recovery, retry + .thenReturn(new QueryOffsetResponse(Constants.RESPONSE_CODE_OK, 0L)); + + AtomicInteger subscriptionCount = new AtomicInteger(0); + when(client.subscribe( + subscriptionIdCaptor.capture(), + anyString(), + any(OffsetSpecification.class), + anyInt(), + anyMap())) + .thenAnswer( + a -> { + subscriptionCount.incrementAndGet(); + return new Client.Response(Constants.RESPONSE_CODE_OK); + }); + + Runnable closingRunnable1 = + coordinator.subscribe( + consumer, + "stream-1", + null, + consumerName, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> {}, + Collections.emptyMap()); + verify(clientFactory, times(1)).client(any()); + verify(client, times(1)) + .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); + + Runnable closingRunnable2 = + coordinator.subscribe( + consumer, + "stream-2", + null, + consumerName, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> {}, + Collections.emptyMap()); + verify(clientFactory, times(1)).client(any()); + verify(client, times(1 + 1)) + .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); + + this.shutdownListener.handle( + new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); + + Thread.sleep(retryDelay.toMillis() * 5); + + waitAtMost(() -> subscriptionCount.get() == (1 + 1) * 2); + + verify(locator, times(2)).metadata("stream-1"); + verify(client, times(2)).queryOffset(consumerName, "stream-1"); + // for stream-2, the offset query on recovery timed out, so more calls... + verify(locator, times(3)).metadata("stream-2"); + verify(client, times(1 + 3 + 1)) + .queryOffset( + consumerName, "stream-2"); // subscription call, times out 3 times, retry that succeeds + } + + @Test + void subscribeUnsubscribeInDifferentThreadsShouldNotDeadlock() { when(locator.metadata("stream")).thenReturn(metadata(null, replicas())); when(clientFactory.client(any())).thenReturn(client); @@ -1342,4 +1436,19 @@ private MessageListener firstMessageListener() { private MessageListener lastMessageListener() { return this.messageListeners.get(messageListeners.size() - 1); } + + private static ScheduledExecutorService createScheduledExecutorService() { + return createScheduledExecutorService(1); + } + + private static ScheduledExecutorService createScheduledExecutorService(int nbThreads) { + return new ScheduledExecutorServiceWrapper( + nbThreads == 1 + ? Executors.newSingleThreadScheduledExecutor() + : Executors.newScheduledThreadPool(nbThreads)); + } + + private static Response responseOk() { + return new Response(Constants.RESPONSE_CODE_OK); + } } diff --git a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java index 41be5cf5db..f3a6d04cca 100644 --- a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java +++ b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java @@ -63,19 +63,19 @@ static ConsumerInfo extract(Consumer consumer) { public static class EnvironmentInfo { - private final String locator; + private final String[] locators; private final ProducersPoolInfo[] producers; private final ConsumersPoolInfo[] consumers; public EnvironmentInfo( - String locator, ProducersPoolInfo[] producers, ConsumersPoolInfo[] consumers) { - this.locator = locator; + String[] locators, ProducersPoolInfo[] producers, ConsumersPoolInfo[] consumers) { + this.locators = locators; this.producers = producers; this.consumers = consumers; } - public String getLocator() { - return locator; + public String[] getLocators() { + return locators; } public List getConsumers() { @@ -89,8 +89,8 @@ public List getProducers() { @Override public String toString() { return "EnvironmentInfo{" - + "locator='" - + locator + + "locators='" + + Arrays.toString(locators) + '\'' + ", producers=" + Arrays.toString(producers) diff --git a/src/test/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinatorTest.java index a3a3ed8f57..8a7af3ece6 100644 --- a/src/test/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinatorTest.java @@ -50,7 +50,7 @@ public class OffsetTrackingCoordinatorTest { @BeforeEach void init() { mocks = MockitoAnnotations.openMocks(this); - executorService = Executors.newScheduledThreadPool(2); + executorService = new ScheduledExecutorServiceWrapper(Executors.newScheduledThreadPool(2)); when(env.scheduledExecutorService()).thenReturn(executorService); when(consumer.isOpen()).thenReturn(true); } diff --git a/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java index 3faae0c8a1..13ab7d2b41 100644 --- a/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java @@ -229,7 +229,7 @@ void shouldGetExactNodeImmediatelyWithAdvertisedHostNameClientFactoryAndExactNod @Test void shouldRedistributeProducerAndTrackingConsumerIfConnectionIsLost() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(50); when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); @@ -295,7 +295,7 @@ void shouldRedistributeProducerAndTrackingConsumerIfConnectionIsLost() throws Ex @Test void shouldDisposeProducerAndNotTrackingConsumerIfRecoveryTimesOut() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(environment.recoveryBackOffDelayPolicy()) .thenReturn(BackOffDelayPolicy.fixedWithInitialDelay(ms(10), ms(10), ms(100))); @@ -336,7 +336,7 @@ void shouldDisposeProducerAndNotTrackingConsumerIfRecoveryTimesOut() throws Exce @Test void shouldRedistributeProducersAndTrackingConsumersOnMetadataUpdate() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(50); when(environment.topologyUpdateBackOffDelayPolicy()) @@ -426,7 +426,7 @@ void shouldRedistributeProducersAndTrackingConsumersOnMetadataUpdate() throws Ex @Test void shouldDisposeProducerIfStreamIsDeleted() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(environment.topologyUpdateBackOffDelayPolicy()) .thenReturn(BackOffDelayPolicy.fixedWithInitialDelay(ms(10), ms(10), ms(100))); @@ -460,7 +460,7 @@ void shouldDisposeProducerIfStreamIsDeleted() throws Exception { @Test void shouldDisposeProducerAndNotTrackingConsumerIfMetadataUpdateTimesOut() throws Exception { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(environment.topologyUpdateBackOffDelayPolicy()) .thenReturn(BackOffDelayPolicy.fixedWithInitialDelay(ms(10), ms(10), ms(100))); @@ -500,7 +500,7 @@ void shouldDisposeProducerAndNotTrackingConsumerIfMetadataUpdateTimesOut() throw @Test void growShrinkResourcesBasedOnProducersAndTrackingConsumersCount() { - scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(); + scheduledExecutorService = createScheduledExecutorService(); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); when(locator.metadata("stream")).thenReturn(metadata(leader(), replicas())); @@ -601,4 +601,8 @@ class TrackingConsumerInfo { assertThat(coordinator.poolSize()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); } + + private static ScheduledExecutorService createScheduledExecutorService() { + return new ScheduledExecutorServiceWrapper(Executors.newSingleThreadScheduledExecutor()); + } } diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java index 07d15187c7..3ada84c483 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java @@ -20,6 +20,8 @@ import static com.rabbitmq.stream.impl.TestUtils.streamName; import static com.rabbitmq.stream.impl.TestUtils.waitAtMost; import static java.util.concurrent.TimeUnit.SECONDS; +import static java.util.stream.Collectors.toList; +import static java.util.stream.IntStream.range; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -34,6 +36,7 @@ import com.rabbitmq.stream.Environment; import com.rabbitmq.stream.EnvironmentBuilder; import com.rabbitmq.stream.Host; +import com.rabbitmq.stream.Host.ConnectionInfo; import com.rabbitmq.stream.Message; import com.rabbitmq.stream.NoOffsetException; import com.rabbitmq.stream.OffsetSpecification; @@ -64,8 +67,8 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; import java.util.stream.Collectors; -import java.util.stream.IntStream; import javax.net.ssl.SNIHostName; import javax.net.ssl.SSLParameters; import org.assertj.core.api.ThrowableAssert.ThrowingCallable; @@ -180,24 +183,24 @@ void environmentCreationShouldSucceedWhenUsingTls() { void producersAndConsumersShouldBeClosedWhenEnvironmentIsClosed(boolean lazyInit) { Environment environment = environmentBuilder.lazyInitialization(lazyInit).build(); Collection producers = - IntStream.range(0, 2) + range(0, 2) .mapToObj(i -> environment.producerBuilder().stream(stream).build()) - .collect(Collectors.toList()); + .collect(toList()); Collection consumers = - IntStream.range(0, 2) + range(0, 2) .mapToObj( i -> environment.consumerBuilder().stream(stream) .name(UUID.randomUUID().toString()) .messageHandler((offset, message) -> {}) .build()) - .collect(Collectors.toList()); + .collect(toList()); producers.forEach(producer -> assertThat(((StreamProducer) producer).isOpen()).isTrue()); consumers.forEach(consumer -> assertThat(((StreamConsumer) consumer).isOpen()).isTrue()); EnvironmentInfo environmentInfo = MonitoringTestUtils.extract(environment); - assertThat(environmentInfo.getLocator()).isNotNull(); + assertThat(environmentInfo.getLocators()).isNotEmpty(); assertThat(environmentInfo.getProducers()) .hasSize(1) .element(0) @@ -223,7 +226,7 @@ void producersAndConsumersShouldBeClosedWhenEnvironmentIsClosed(boolean lazyInit consumers.forEach(consumer -> assertThat(((StreamConsumer) consumer).isOpen()).isFalse()); environmentInfo = MonitoringTestUtils.extract(environment); - assertThat(environmentInfo.getLocator()).isNull(); + assertThat(environmentInfo.getLocators()).isNotEmpty(); assertThat(environmentInfo.getProducers()).isEmpty(); assertThat(environmentInfo.getConsumers()).isEmpty(); } @@ -237,7 +240,7 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) try (Environment environment = environmentBuilder.rpcTimeout(Duration.ofSeconds(20)).build()) { List streams = - IntStream.range(0, streamCount) + range(0, streamCount) .mapToObj(i -> streamName(info)) .map( s -> { @@ -250,16 +253,16 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) CountDownLatch consumeLatch = new CountDownLatch(messageCount * producersCount); List producers = - IntStream.range(0, producersCount) + range(0, producersCount) .mapToObj( i -> { String s = streams.get(i % streams.size()); return environment.producerBuilder().stream(s).build(); }) - .collect(Collectors.toList()); + .collect(toList()); List consumers = - IntStream.range(0, consumersCount) + range(0, consumersCount) .mapToObj( i -> { String s = streams.get(new Random().nextInt(streams.size())); @@ -267,13 +270,13 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) .messageHandler((offset, message) -> consumeLatch.countDown()) .build(); }) - .collect(Collectors.toList()); + .collect(toList()); producers.stream() .parallel() .forEach( producer -> { - IntStream.range(0, messageCount) + range(0, messageCount) .forEach( messageIndex -> { producer.send( @@ -403,6 +406,48 @@ void locatorShouldReconnectIfConnectionIsLost(TestInfo info) throws Exception { } } + @Test + @TestUtils.DisabledIfRabbitMqCtlNotSet + void shouldHaveSeveralLocatorsWhenSeveralUrisSpecifiedAndShouldRecoverThemIfClosed(TestInfo info) + throws Exception { + List uris = + range(0, 3).mapToObj(ignored -> "rabbitmq-stream://localhost:5552").collect(toList()); + try (Environment environment = + environmentBuilder + .recoveryBackOffDelayPolicy(BackOffDelayPolicy.fixed(Duration.ofSeconds(1))) + .uris(uris) + .build()) { + String s = streamName(info); + environment.streamCreator().stream(s).create(); + environment.deleteStream(s); + + Supplier> locatorConnectionNamesSupplier = + () -> + Host.listConnections().stream() + .map(ConnectionInfo::clientProvidedName) + .filter(name -> name.contains("-locator-")) + .collect(toList()); + List locatorConnectionNames = locatorConnectionNamesSupplier.get(); + assertThat(locatorConnectionNames).hasSameSizeAs(uris); + + locatorConnectionNames.forEach(connectionName -> Host.killConnection(connectionName)); + + environment.streamCreator().stream(s).create(); + try { + Producer producer = environment.producerBuilder().stream(s).build(); + Consumer consumer = + environment.consumerBuilder().stream(s) + .messageHandler((context, message) -> {}) + .build(); + producer.close(); + consumer.close(); + } finally { + environment.deleteStream(s); + } + waitAtMost(() -> locatorConnectionNamesSupplier.get().size() == uris.size()); + } + } + @Test void createDelete(TestInfo info) { try (Environment environment = environmentBuilder.build(); @@ -482,7 +527,7 @@ void createPublishConsumeDelete(boolean lazyInit, TestInfo info) { Producer producer = env.producerBuilder().stream(s).build(); ConfirmationHandler confirmationHandler = confirmationStatus -> confirmLatch.countDown(); - IntStream.range(0, messageCount) + range(0, messageCount) .forEach( i -> { Message message = diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java index f05b09b36e..f4aa8b541b 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java @@ -133,6 +133,8 @@ void retryLocatorRecovery() throws Exception { @SuppressWarnings("unchecked") void shouldTryUrisOnInitializationFailure() throws Exception { reset(cf); + // we don't want the scheduled retry to kick in + when(recoveryBackOffDelayPolicy.delay(anyInt())).thenReturn(Duration.ofMinutes(60)); when(cf.apply(any(Client.ClientParameters.class))) .thenThrow(new RuntimeException()) .thenThrow(new RuntimeException()) diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamProducerUnitTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamProducerUnitTest.java index 254329abc2..360b6a86d1 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamProducerUnitTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamProducerUnitTest.java @@ -111,7 +111,6 @@ void init() { any(ToLongFunction.class))) .thenCallRealMethod(); when(env.scheduledExecutorService()).thenReturn(executorService); - when(env.locator()).thenReturn(client); when(env.locatorOperation(any())).thenCallRealMethod(); when(env.clock()).thenReturn(clock); when(env.codec()).thenReturn(new SimpleCodec()); From f753c1eb4efd1e28f7b62658c76a2999b5bf8bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Mon, 28 Nov 2022 10:02:22 +0100 Subject: [PATCH 02/10] Complete re-assignment if consumer is closed --- .../com/rabbitmq/stream/impl/ConsumersCoordinator.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index 7d6d23d733..f316e88a6b 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -686,8 +686,8 @@ private void assignConsumersToStream( } else { for (SubscriptionTracker affectedSubscription : subscriptions) { ManagerPool subscriptionPool = null; - boolean reassigned = false; - while (!reassigned) { + boolean reassignmentCompleted = false; + while (!reassignmentCompleted) { try { if (affectedSubscription.consumer.isOpen()) { Client.Broker broker = pickBroker(candidates); @@ -715,11 +715,14 @@ private void assignConsumersToStream( } subscriptionPool.add( affectedSubscription, offsetSpecification, false); - reassigned = true; + reassignmentCompleted = true; + } else { + reassignmentCompleted = true; } } } else { LOGGER.debug("Not re-assigning consumer because it has been closed"); + reassignmentCompleted = true; } } catch (TimeoutStreamException e) { LOGGER.debug( @@ -740,6 +743,7 @@ private void assignConsumersToStream( } catch (Exception e) { LOGGER.warn( "Error while re-assigning subscription from stream {}", stream, e); + reassignmentCompleted = true; } } } From 10a0887a297afb6ef5e67ce67ce3f9887eae8315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Tue, 29 Nov 2022 11:00:18 +0100 Subject: [PATCH 03/10] Prevent concurrent consumer recovery Another recovery can kick in while a first one is in progress. As we added retry, the first one can retry and refresh its connection, so both will succeed and we end up with more consumers than expected. --- .../java/com/rabbitmq/stream/impl/Client.java | 8 + .../stream/impl/ClientProperties.java | 2 +- .../stream/impl/ConsumersCoordinator.java | 259 ++++++++++++------ .../impl/OffsetTrackingCoordinator.java | 3 +- .../stream/impl/StreamEnvironment.java | 8 +- .../stream/impl/TimeoutStreamException.java | 4 + .../stream/impl/ConsumersCoordinatorTest.java | 105 +++++-- .../stream/impl/StreamConsumerTest.java | 11 +- .../impl/StreamEnvironmentUnitTest.java | 15 +- 9 files changed, 293 insertions(+), 122 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/impl/Client.java b/src/main/java/com/rabbitmq/stream/impl/Client.java index bd12e8e424..b070937731 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Client.java +++ b/src/main/java/com/rabbitmq/stream/impl/Client.java @@ -86,6 +86,7 @@ import io.netty.channel.ChannelOption; import io.netty.channel.ChannelOutboundHandlerAdapter; import io.netty.channel.ChannelPromise; +import io.netty.channel.ConnectTimeoutException; import io.netty.channel.EventLoopGroup; import io.netty.channel.nio.NioEventLoopGroup; import io.netty.channel.socket.SocketChannel; @@ -324,6 +325,13 @@ public void write( this.host = parameters.host; this.port = parameters.port; } catch (Exception e) { + if (e instanceof ConnectTimeoutException) { + throw new TimeoutStreamException( + String.format( + "Error while creating stream connection to %s:%d", + parameters.host, parameters.port), + e); + } throw new StreamException(e); } diff --git a/src/main/java/com/rabbitmq/stream/impl/ClientProperties.java b/src/main/java/com/rabbitmq/stream/impl/ClientProperties.java index 80f12e2225..b1d3d912a7 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ClientProperties.java +++ b/src/main/java/com/rabbitmq/stream/impl/ClientProperties.java @@ -44,7 +44,7 @@ final class ClientProperties { put("product", "RabbitMQ Stream"); put("version", ClientProperties.VERSION); put("platform", "Java"); - put("copyright", "Copyright (c) 2020-2021 VMware, Inc. or its affiliates."); + put("copyright", "Copyright (c) 2020-2022 VMware, Inc. or its affiliates."); put("information", "Licensed under the MPL 2.0. See https://www.rabbitmq.com/"); } }); diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index f316e88a6b..a6ce3bd3f6 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -52,6 +52,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; import java.util.function.Supplier; @@ -174,7 +175,7 @@ List findBrokersForStream(String stream) { List brokers; if (replicas == null || replicas.isEmpty()) { brokers = Collections.singletonList(streamMetadata.getLeader()); - LOGGER.debug("Consuming from {} on leader node {}", stream, streamMetadata.getLeader()); + LOGGER.debug("Only leader node {} for consuming from {}", streamMetadata.getLeader(), stream); } else { LOGGER.debug("Replicas for consuming from {}: {}", stream, replicas); brokers = new ArrayList<>(replicas); @@ -250,6 +251,8 @@ private static class SubscriptionTracker { private volatile boolean hasReceivedSomething = false; private volatile byte subscriptionIdInClient; private volatile ClientSubscriptionsManager manager; + private volatile AtomicReference state = + new AtomicReference<>(SubscriptionState.OPENING); private SubscriptionTracker( StreamConsumer consumer, @@ -290,6 +293,7 @@ synchronized void cancel() { } else { LOGGER.debug("No manager to remove consumer from"); } + this.state(SubscriptionState.CLOSED); } synchronized void assign(byte subscriptionIdInClient, ClientSubscriptionsManager manager) { @@ -308,6 +312,25 @@ synchronized void detachFromManager() { this.manager = null; this.consumer.setSubscriptionClient(null); } + + void state(SubscriptionState state) { + this.state.set(state); + } + + boolean compareAndSet(SubscriptionState expected, SubscriptionState newValue) { + return this.state.compareAndSet(expected, newValue); + } + + SubscriptionState state() { + return this.state.get(); + } + } + + private enum SubscriptionState { + OPENING, + ACTIVE, + RECOVERING, + CLOSED } private static final class MessageHandlerContext implements Context { @@ -403,7 +426,7 @@ private void add( pickedManager.add(subscriptionTracker, offsetSpecification, isInitialSubscription); } catch (IllegalStateException e) { pickedManager = null; - } catch (TimeoutStreamException e) { + } catch (TimeoutStreamException | ClientClosedException e) { // it's very likely the manager connection is dead // scheduling its closing in another thread to avoid blocking this one if (pickedManager.isEmpty()) { @@ -467,11 +490,11 @@ private class ClientSubscriptionsManager { // the 2 data structures track the subscriptions, they must remain consistent private final Map> streamToStreamSubscriptions = new ConcurrentHashMap<>(); - private final ManagerPool owner; - // trackers and tracker must be kept in sync + // trackers and tracker count must be kept in sync private volatile List subscriptionTrackers = new ArrayList<>(maxConsumersByConnection); private volatile int trackerCount = 0; + private final ManagerPool owner; private ClientSubscriptionsManager( ManagerPool owner, Client.ClientParameters clientParameters) { @@ -529,8 +552,12 @@ private ClientSubscriptionsManager( } if (shutdownContext.isShutdownUnexpected()) { LOGGER.debug( - "Unexpected shutdown notification on subscription client {}, scheduling consumers re-assignment", + "Unexpected shutdown notification on subscription connection {}, scheduling consumers re-assignment", name); + LOGGER.debug( + "Subscription connection has {} consumer(s) over {} stream(s) to recover", + this.subscriptionTrackers.stream().filter(Objects::nonNull).count(), + this.streamToStreamSubscriptions.size()); environment .scheduledExecutorService() .execute( @@ -541,23 +568,32 @@ private ClientSubscriptionsManager( } subscriptionTrackers.stream() .filter(Objects::nonNull) + .filter(t -> t.state() == SubscriptionState.ACTIVE) .forEach(SubscriptionTracker::detachFromManager); for (Entry> entry : streamToStreamSubscriptions.entrySet()) { if (Thread.currentThread().isInterrupted()) { + LOGGER.debug("Interrupting consumer re-assignment task"); break; } String stream = entry.getKey(); - LOGGER.debug( - "Re-assigning {} consumer(s) to stream {} after disconnection", - entry.getValue().size(), - stream); - assignConsumersToStream( - entry.getValue(), - stream, - attempt -> - environment.recoveryBackOffDelayPolicy().delay(attempt), - false); + Set trackersToReAssign = entry.getValue(); + if (trackersToReAssign == null || trackersToReAssign.isEmpty()) { + LOGGER.debug( + "No consumer to re-assign to stream {} after disconnection", + stream); + } else { + LOGGER.debug( + "Re-assigning {} consumer(s) to stream {} after disconnection", + trackersToReAssign.size(), + stream); + assignConsumersToStream( + trackersToReAssign, + stream, + attempt -> + environment.recoveryBackOffDelayPolicy().delay(attempt), + false); + } } }, "Consumers re-assignment after disconnection from %s", @@ -685,66 +721,16 @@ private void assignConsumersToStream( consumersClosingCallback.run(); } else { for (SubscriptionTracker affectedSubscription : subscriptions) { - ManagerPool subscriptionPool = null; - boolean reassignmentCompleted = false; - while (!reassignmentCompleted) { - try { - if (affectedSubscription.consumer.isOpen()) { - Client.Broker broker = pickBroker(candidates); - LOGGER.debug("Using {} to resume consuming from {}", broker, stream); - String key = keyForClientSubscription(broker); - subscriptionPool = - pools.computeIfAbsent( - key, - s -> - new ManagerPool( - key, - environment - .clientParametersCopy() - .host(broker.getHost()) - .port(broker.getPort()))); - synchronized (affectedSubscription.consumer) { - if (affectedSubscription.consumer.isOpen()) { - OffsetSpecification offsetSpecification; - if (affectedSubscription.hasReceivedSomething) { - offsetSpecification = - OffsetSpecification.offset(affectedSubscription.offset); - } else { - offsetSpecification = - affectedSubscription.initialOffsetSpecification; - } - subscriptionPool.add( - affectedSubscription, offsetSpecification, false); - reassignmentCompleted = true; - } else { - reassignmentCompleted = true; - } - } - } else { - LOGGER.debug("Not re-assigning consumer because it has been closed"); - reassignmentCompleted = true; - } - } catch (TimeoutStreamException e) { - LOGGER.debug( - "Consumer {} re-assignment on stream {} timed out, refreshing candidates and retrying", - affectedSubscription.consumer.id(), - affectedSubscription.stream); - if (subscriptionPool != null) { - subscriptionPool.clean(); - } - // maybe not a good candidate, let's refresh and retry for this one - candidates = - callAndMaybeRetry( - () -> findBrokersForStream(stream), - ex -> !(ex instanceof StreamDoesNotExistException), - "Candidate lookup to consume from '%s'", - stream); - - } catch (Exception e) { - LOGGER.warn( - "Error while re-assigning subscription from stream {}", stream, e); - reassignmentCompleted = true; - } + if (affectedSubscription.compareAndSet( + SubscriptionState.ACTIVE, SubscriptionState.RECOVERING)) { + recoverSubscription(stream, candidates, affectedSubscription); + } else { + LOGGER.debug( + "Not recovering consumer {} from stream {}, state is {}, expected is {}", + affectedSubscription.consumer.id(), + affectedSubscription.stream, + affectedSubscription.state(), + SubscriptionState.ACTIVE); } } if (closeClient) { @@ -764,6 +750,73 @@ private void assignConsumersToStream( }); } + private void recoverSubscription( + String stream, List candidates, SubscriptionTracker affectedSubscription) { + ManagerPool subscriptionPool = null; + boolean reassignmentCompleted = false; + while (!reassignmentCompleted) { + try { + if (affectedSubscription.consumer.isOpen()) { + Broker broker = pickBroker(candidates); + LOGGER.debug("Using {} to resume consuming from {}", broker, stream); + String key = keyForClientSubscription(broker); + subscriptionPool = + pools.computeIfAbsent( + key, + s -> + new ManagerPool( + key, + environment + .clientParametersCopy() + .host(broker.getHost()) + .port(broker.getPort()))); + synchronized (affectedSubscription.consumer) { + if (affectedSubscription.consumer.isOpen()) { + OffsetSpecification offsetSpecification; + if (affectedSubscription.hasReceivedSomething) { + offsetSpecification = OffsetSpecification.offset(affectedSubscription.offset); + } else { + offsetSpecification = affectedSubscription.initialOffsetSpecification; + } + subscriptionPool.add(affectedSubscription, offsetSpecification, false); + reassignmentCompleted = true; + } else { + reassignmentCompleted = true; + } + } + } else { + LOGGER.debug("Not re-assigning consumer because it has been closed"); + reassignmentCompleted = true; + } + } catch (TimeoutStreamException | ClientClosedException e) { + LOGGER.debug( + "Consumer {} re-assignment on stream {} timed out or connection closed, refreshing candidates and retrying", + affectedSubscription.consumer.id(), + affectedSubscription.stream); + if (subscriptionPool != null) { + subscriptionPool.clean(); + } + // maybe not a good candidate, let's refresh and retry for this one + candidates = + callAndMaybeRetry( + () -> findBrokersForStream(stream), + ex -> !(ex instanceof StreamDoesNotExistException), + "Candidate lookup to consume from '%s'", + stream); + + } catch (Exception e) { + LOGGER.warn("Error while re-assigning subscription from stream {}", stream, e); + reassignmentCompleted = true; + } + } + } + + private void checkNotClosed() { + if (!this.client.isOpen()) { + throw new ClientClosedException(); + } + } + synchronized void add( SubscriptionTracker subscriptionTracker, OffsetSpecification offsetSpecification, @@ -771,6 +824,9 @@ synchronized void add( if (this.isFull()) { throw new IllegalStateException("Cannot add subscription tracker, the manager is full"); } + + checkNotClosed(); + // FIXME check manager is still open (not closed because of connection failure) byte subscriptionId = 0; for (int i = 0; i < MAX_SUBSCRIPTIONS_PER_CLIENT; i++) { @@ -800,6 +856,7 @@ synchronized void add( String offsetTrackingReference = subscriptionTracker.offsetTrackingReference; if (offsetTrackingReference != null) { + checkNotClosed(); QueryOffsetResponse queryOffsetResponse = callAndMaybeRetry( () -> client.queryOffset(offsetTrackingReference, subscriptionTracker.stream), @@ -841,6 +898,7 @@ synchronized void add( offsetSpecification, subscriptionContext.offsetSpecification()); + checkNotClosed(); // FIXME consider using fewer initial credits byte subId = subscriptionId; Client.Response subscribeResponse = @@ -871,11 +929,25 @@ synchronized void add( streamToStreamSubscriptions .computeIfAbsent(subscriptionTracker.stream, s -> ConcurrentHashMap.newKeySet()) .remove(subscriptionTracker); + maybeCleanStreamToStreamSubscriptions(subscriptionTracker.stream); throw e; } + subscriptionTracker.state(SubscriptionState.ACTIVE); LOGGER.debug("Subscribed to {}", subscriptionTracker.stream); } + private void maybeCleanStreamToStreamSubscriptions(String stream) { + this.streamToStreamSubscriptions.compute( + stream, + (s, trackers) -> { + if (trackers == null || trackers.isEmpty()) { + return null; + } else { + return trackers; + } + }); + } + synchronized void remove(SubscriptionTracker subscriptionTracker) { // FIXME check manager is still open (not closed because of connection failure) byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient; @@ -941,21 +1013,25 @@ boolean isEmpty() { synchronized void close() { if (this.client != null && this.client.isOpen()) { - subscriptionTrackers.stream() - .filter(Objects::nonNull) - .forEach( - tracker -> { - try { - if (this.client != null && this.client.isOpen() && tracker.consumer.isOpen()) { - this.client.unsubscribe(tracker.subscriptionIdInClient); - } - } catch (Exception e) { - // OK, moving on - } - }); + for (int i = 0; i < this.subscriptionTrackers.size(); i++) { + SubscriptionTracker tracker = this.subscriptionTrackers.get(i); + if (tracker != null) { + try { + if (this.client != null && this.client.isOpen() && tracker.consumer.isOpen()) { + this.client.unsubscribe(tracker.subscriptionIdInClient); + } + } catch (Exception e) { + // OK, moving on + LOGGER.debug( + "Error while unsubscribing from {}, registration {}", + tracker.stream, + tracker.subscriptionIdInClient); + } + this.subscriptionTrackers.set(i, null); + } + } streamToStreamSubscriptions.clear(); - subscriptionTrackers.clear(); if (this.client != null && this.client.isOpen()) { this.client.close(); @@ -1027,4 +1103,11 @@ static T callAndMaybeRetry( private static final Predicate RETRY_ON_TIMEOUT = e -> e instanceof TimeoutStreamException; + + private static class ClientClosedException extends StreamException { + + public ClientClosedException() { + super("Client already closed"); + } + } } diff --git a/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java index 3c3bcc7973..913e679e94 100644 --- a/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/OffsetTrackingCoordinator.java @@ -90,7 +90,8 @@ Registration registerTrackingConsumer( try { this.clock.setTime(System.nanoTime()); LOGGER.debug( - "Background offset tracking flushing, {} tracker(s) to check"); + "Background offset tracking flushing, {} tracker(s) to check", + this.trackers.size()); Iterator iterator = trackers.iterator(); while (iterator.hasNext()) { if (Thread.currentThread().isInterrupted()) { diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java index 4eefcf98d1..4a3732522f 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java @@ -695,7 +695,13 @@ static T locatorOperation( long start = System.nanoTime(); while (attempt < maxAttempt) { try { - result = operation.apply(clientSupplier.get()); + Client client = clientSupplier.get(); + LOGGER.debug( + "Using locator on {}:{} to run operation '{}'", + client.getHost(), + client.getPort(), + operation); + result = operation.apply(client); LOGGER.debug( "Locator operation '{}' succeeded in {}", operation, diff --git a/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java b/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java index b1b8a725fb..f23ac86de6 100644 --- a/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java +++ b/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java @@ -20,4 +20,8 @@ class TimeoutStreamException extends StreamException { public TimeoutStreamException(String message) { super(message); } + + public TimeoutStreamException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java index 27bf911671..ad9c48a65b 100644 --- a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java @@ -19,6 +19,7 @@ import static com.rabbitmq.stream.impl.TestUtils.metadata; import static com.rabbitmq.stream.impl.TestUtils.namedConsumer; import static com.rabbitmq.stream.impl.TestUtils.waitAtMost; +import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; @@ -146,6 +147,7 @@ public Client.ClientParameters shutdownListener( when(environment.clientParametersCopy()).thenReturn(clientParameters); when(environment.addressResolver()).thenReturn(address -> address); when(client.brokerVersion()).thenReturn("3.11.0"); + when(client.isOpen()).thenReturn(true); coordinator = new ConsumersCoordinator( @@ -586,6 +588,69 @@ void shouldRedistributeConsumerIfConnectionIsLost() throws Exception { assertThat(messageHandlerCalls.get()).isEqualTo(2); } + @Test + void shouldSkipRecoveryIfRecoveryIsAlreadyInProgress() throws Exception { + scheduledExecutorService = createScheduledExecutorService(2); + when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); + Duration retryDelay = Duration.ofMillis(100); + when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(consumer.isOpen()).thenReturn(true); + when(locator.metadata("stream")).thenReturn(metadata(null, replica())); + + when(clientFactory.client(any())).thenReturn(client); + AtomicInteger subscriptionCount = new AtomicInteger(0); + when(client.subscribe( + subscriptionIdCaptor.capture(), + anyString(), + any(OffsetSpecification.class), + anyInt(), + anyMap())) + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return new Client.Response(Constants.RESPONSE_CODE_OK); + }); + + String trackingReference = "reference"; + + when(client.queryOffset(trackingReference, "stream")) + .thenReturn(new QueryOffsetResponse(Constants.RESPONSE_CODE_OK, 0L)) // first subscription + .thenAnswer( + invocation -> { + // during recovery, we trigger another disconnection + shutdownListener.handle( + new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); + Thread.sleep(retryDelay.multipliedBy(3).toMillis()); + throw new TimeoutStreamException(""); + }) + .thenReturn(new QueryOffsetResponse(Constants.RESPONSE_CODE_OK, 0L)); + + AtomicInteger messageHandlerCalls = new AtomicInteger(); + coordinator.subscribe( + consumer, + "stream", + OffsetSpecification.first(), + trackingReference, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> messageHandlerCalls.incrementAndGet(), + Collections.emptyMap()); + verify(clientFactory, times(1)).client(any()); + verify(client, times(1)) + .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); + + shutdownListener.handle( + new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); + + waitAtMost( + () -> subscriptionCount.get() == 1 + 1, + () -> format("Subscription count is %s", subscriptionCount.get())); + + verify(consumer, times(1)).setSubscriptionClient(isNull()); + verify(client, times(1 + 1)) + .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); + } + @Test void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { BackOffDelayPolicy delayPolicy = fixedWithInitialDelay(ms(100), ms(100)); @@ -1317,30 +1382,28 @@ void shouldRetryAssignmentOnManagerClientTimeout() throws Exception { return new Client.Response(Constants.RESPONSE_CODE_OK); }); - Runnable closingRunnable1 = - coordinator.subscribe( - consumer, - "stream-1", - null, - consumerName, - NO_OP_SUBSCRIPTION_LISTENER, - NO_OP_TRACKING_CLOSING_CALLBACK, - (offset, message) -> {}, - Collections.emptyMap()); + coordinator.subscribe( + consumer, + "stream-1", + null, + consumerName, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> {}, + Collections.emptyMap()); verify(clientFactory, times(1)).client(any()); verify(client, times(1)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); - Runnable closingRunnable2 = - coordinator.subscribe( - consumer, - "stream-2", - null, - consumerName, - NO_OP_SUBSCRIPTION_LISTENER, - NO_OP_TRACKING_CLOSING_CALLBACK, - (offset, message) -> {}, - Collections.emptyMap()); + coordinator.subscribe( + consumer, + "stream-2", + null, + consumerName, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> {}, + Collections.emptyMap()); verify(clientFactory, times(1)).client(any()); verify(client, times(1 + 1)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); @@ -1348,8 +1411,6 @@ void shouldRetryAssignmentOnManagerClientTimeout() throws Exception { this.shutdownListener.handle( new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); - Thread.sleep(retryDelay.toMillis() * 5); - waitAtMost(() -> subscriptionCount.get() == (1 + 1) * 2); verify(locator, times(2)).metadata("stream-1"); diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamConsumerTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamConsumerTest.java index 1b11d91bf1..78acda9623 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamConsumerTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamConsumerTest.java @@ -482,6 +482,7 @@ void consumerShouldKeepConsumingAfterDisruption( java.util.function.Consumer disruption, TestInfo info) throws Exception { String s = streamName(info); environment.streamCreator().stream(s).create(); + StreamConsumer consumer = null; try { int messageCount = 10_000; CountDownLatch publishLatch = new CountDownLatch(messageCount); @@ -499,7 +500,7 @@ void consumerShouldKeepConsumingAfterDisruption( AtomicInteger receivedMessageCount = new AtomicInteger(0); CountDownLatch consumeLatch = new CountDownLatch(messageCount); CountDownLatch consumeLatchSecondWave = new CountDownLatch(messageCount * 2); - StreamConsumer consumer = + consumer = (StreamConsumer) environment.consumerBuilder().stream(s) .offset(OffsetSpecification.first()) @@ -519,7 +520,7 @@ void consumerShouldKeepConsumingAfterDisruption( Client client = cf.get(); TestUtils.waitAtMost( - 10, + recoveryInitialDelay.plusSeconds(2), () -> { Client.StreamMetadata metadata = client.metadata(s).get(s); return metadata.getLeader() != null || !metadata.getReplicas().isEmpty(); @@ -537,13 +538,15 @@ void consumerShouldKeepConsumingAfterDisruption( assertThat(publishLatchSecondWave.await(10, TimeUnit.SECONDS)).isTrue(); producerSecondWave.close(); - assertThat(consumeLatchSecondWave.await(10, TimeUnit.SECONDS)).isTrue(); + latchAssert(consumeLatchSecondWave).completes(recoveryInitialDelay.plusSeconds(2)); assertThat(receivedMessageCount.get()) .isBetween(messageCount * 2, messageCount * 2 + 1); // there can be a duplicate assertThat(consumer.isOpen()).isTrue(); - consumer.close(); } finally { + if (consumer != null) { + consumer.close(); + } environment.deleteStream(s); } } diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java index f4aa8b541b..b55996a9f7 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentUnitTest.java @@ -34,6 +34,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; +import java.util.function.Supplier; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -191,7 +192,9 @@ void locatorOperationShouldReturnOperationResultIfNoProblem() { AtomicInteger counter = new AtomicInteger(); int result = StreamEnvironment.locatorOperation( - c -> counter.incrementAndGet(), () -> null, BackOffDelayPolicy.fixed(Duration.ZERO)); + c -> counter.incrementAndGet(), + CLIENT_SUPPLIER, + BackOffDelayPolicy.fixed(Duration.ZERO)); assertThat(result).isEqualTo(1); } @@ -207,7 +210,7 @@ void locatorOperationShouldRetryAndReturnResultIfLocatorException() { return counter.get(); } }, - () -> null, + CLIENT_SUPPLIER, BackOffDelayPolicy.fixed(Duration.ofMillis(10))); assertThat(result).isEqualTo(2); } @@ -222,7 +225,7 @@ void locatorOperationShouldThrowLocatorExceptionWhenRetryExhausts() { counter.incrementAndGet(); throw new LocatorNotAvailableException(); }, - () -> null, + CLIENT_SUPPLIER, BackOffDelayPolicy.fixed(Duration.ofMillis(10)))) .isInstanceOf(LocatorNotAvailableException.class); assertThat(counter).hasValue(3); @@ -242,7 +245,7 @@ void locatorOperationShouldThrowInterruptedExceptionAsCauseIfInterrupted() latch.countDown(); throw new LocatorNotAvailableException(); }, - () -> null, + CLIENT_SUPPLIER, BackOffDelayPolicy.fixed(Duration.ofMinutes(10))); } catch (StreamException e) { exception.set(e); @@ -268,9 +271,11 @@ void locatorOperationShouldNotRetryAndReThrowUnexpectedException() { counter.incrementAndGet(); throw new RuntimeException(); }, - () -> null, + CLIENT_SUPPLIER, BackOffDelayPolicy.fixed(Duration.ofMillis(10)))) .isInstanceOf(RuntimeException.class); assertThat(counter).hasValue(1); } + + private static final Supplier CLIENT_SUPPLIER = () -> mock(Client.class); } From c2a162629e6a64b9920f4c619bc3b1c479acbe66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Tue, 29 Nov 2022 18:06:55 +0100 Subject: [PATCH 04/10] Retry consumer recovery on more conditions When stream is not available and even when there is no candidates at the moment (added some retry delay for the latter). --- .../stream/impl/ConsumersCoordinator.java | 162 +++++++++++++----- .../stream/impl/StreamEnvironment.java | 10 +- .../java/com/rabbitmq/stream/impl/Utils.java | 7 +- .../stream/impl/ConsumersCoordinatorTest.java | 133 +++++++++++++- .../stream/impl/MonitoringTestUtils.java | 35 +++- 5 files changed, 290 insertions(+), 57 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index a6ce3bd3f6..4137bd07cd 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -13,6 +13,7 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.convertCodeToException; import static com.rabbitmq.stream.impl.Utils.formatConstant; import static com.rabbitmq.stream.impl.Utils.isSac; import static com.rabbitmq.stream.impl.Utils.namedFunction; @@ -27,6 +28,7 @@ import com.rabbitmq.stream.OffsetSpecification; import com.rabbitmq.stream.StreamDoesNotExistException; import com.rabbitmq.stream.StreamException; +import com.rabbitmq.stream.StreamNotAvailableException; import com.rabbitmq.stream.SubscriptionListener; import com.rabbitmq.stream.SubscriptionListener.SubscriptionContext; import com.rabbitmq.stream.impl.Client.Broker; @@ -40,6 +42,7 @@ import com.rabbitmq.stream.impl.Utils.ClientConnectionType; import com.rabbitmq.stream.impl.Utils.ClientFactory; import com.rabbitmq.stream.impl.Utils.ClientFactoryContext; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -74,6 +77,7 @@ class ConsumersCoordinator { private final ClientFactory clientFactory; private final int maxConsumersByConnection; private final Function connectionNamingStrategy; + private final List trackers = new CopyOnWriteArrayList<>(); ConsumersCoordinator( StreamEnvironment environment, @@ -87,10 +91,13 @@ class ConsumersCoordinator { } private static String keyForClientSubscription(Client.Broker broker) { - // FIXME make sure this is a reasonable key for brokers return broker.getHost() + ":" + broker.getPort(); } + private BackOffDelayPolicy recoveryBackOffDelayPolicy() { + return this.environment.recoveryBackOffDelayPolicy(); + } + private BackOffDelayPolicy metadataUpdateBackOffDelayPolicy() { return environment.topologyUpdateBackOffDelayPolicy(); } @@ -142,6 +149,7 @@ Runnable subscribe( managerPool.clean(); throw e; } + this.trackers.add(subscriptionTracker); return subscriptionTracker::cancel; } @@ -206,29 +214,76 @@ int poolSize() { return pools.size(); } + private static String quote(String value) { + if (value == null) { + return "null"; + } else { + return "\"" + value + "\""; + } + } + @Override public String toString() { - return ("[ \n" - + pools.entrySet().stream() + StringBuilder builder = new StringBuilder("{"); + builder.append("\"subscription_count\" : ").append(this.trackers.size()).append(","); + builder.append("\"subscriptions\" : ["); + builder.append( + this.trackers.stream() .map( - poolEntry -> - " { \"broker\" : \"" - + poolEntry.getKey() - + "\", \"client_count\" : " - + poolEntry.getValue().managers.size() - + ", \"clients\" : [ " - + poolEntry.getValue().managers.stream() - .map( - manager -> - "{ \"consumer_count\" : " - + manager.subscriptionTrackers.stream() - .filter(Objects::nonNull) - .count() - + " }") - .collect(Collectors.joining(", ")) - + " ] }") - .collect(Collectors.joining(", \n")) - + "\n]"); + t -> { + StringBuilder b = new StringBuilder("{"); + b.append(quote("stream")).append(":").append(quote(t.stream)).append(","); + b.append(quote("node")).append(":"); + Client client = null; + ClientSubscriptionsManager manager = t.manager; + if (manager != null) { + client = manager.client; + } + if (client == null) { + b.append("null"); + } else { + b.append(quote(client.getHost() + ":" + client.getPort())); + } + b.append(", ").append(quote("pool")).append(" : "); + ManagerPool pool = null; + if (manager != null) { + pool = manager.owner; + } + if (pool == null) { + b.append("null"); + } else { + b.append(quote(pool.name)); + } + b.append("}"); + return b.toString(); + }) + .collect(Collectors.joining(","))); + builder.append("],"); + builder.append("\"pool_count\" : ").append(this.pools.size()).append(","); + builder.append("\"pools\" : "); + builder.append( + "[ \n" + + pools.entrySet().stream() + .map( + poolEntry -> + " { \"broker\" : \"" + + poolEntry.getKey() + + "\", \"client_count\" : " + + poolEntry.getValue().managers.size() + + ", \"clients\" : [ " + + poolEntry.getValue().managers.stream() + .map( + manager -> + "{ \"consumer_count\" : " + + manager.subscriptionTrackers.stream() + .filter(Objects::nonNull) + .count() + + " }") + .collect(Collectors.joining(", ")) + + " ] }") + .collect(Collectors.joining(", \n")) + + "\n]}"); + return builder.toString(); } /** @@ -426,9 +481,9 @@ private void add( pickedManager.add(subscriptionTracker, offsetSpecification, isInitialSubscription); } catch (IllegalStateException e) { pickedManager = null; - } catch (TimeoutStreamException | ClientClosedException e) { - // it's very likely the manager connection is dead - // scheduling its closing in another thread to avoid blocking this one + } catch (TimeoutStreamException | ClientClosedException | StreamNotAvailableException e) { + // manager connection is dead or stream not available + // scheduling manager closing if necessary in another thread to avoid blocking this one if (pickedManager.isEmpty()) { ClientSubscriptionsManager manager = pickedManager; ConsumersCoordinator.this.environment.execute( @@ -590,8 +645,7 @@ private ClientSubscriptionsManager( assignConsumersToStream( trackersToReAssign, stream, - attempt -> - environment.recoveryBackOffDelayPolicy().delay(attempt), + recoveryBackOffDelayPolicy(), false); } } @@ -763,13 +817,16 @@ private void recoverSubscription( subscriptionPool = pools.computeIfAbsent( key, - s -> - new ManagerPool( - key, - environment - .clientParametersCopy() - .host(broker.getHost()) - .port(broker.getPort()))); + s -> { + LOGGER.debug( + "Creating new connection pool {} to consume from {}", key, stream); + return new ManagerPool( + key, + environment + .clientParametersCopy() + .host(broker.getHost()) + .port(broker.getPort())); + }); synchronized (affectedSubscription.consumer) { if (affectedSubscription.consumer.isOpen()) { OffsetSpecification offsetSpecification; @@ -788,9 +845,10 @@ private void recoverSubscription( LOGGER.debug("Not re-assigning consumer because it has been closed"); reassignmentCompleted = true; } - } catch (TimeoutStreamException | ClientClosedException e) { + } catch (TimeoutStreamException | ClientClosedException | StreamNotAvailableException e) { LOGGER.debug( - "Consumer {} re-assignment on stream {} timed out or connection closed, refreshing candidates and retrying", + "Consumer {} re-assignment on stream {} timed out or connection closed or stream not available, " + + "refreshing candidates and retrying", affectedSubscription.consumer.id(), affectedSubscription.stream); if (subscriptionPool != null) { @@ -801,6 +859,7 @@ private void recoverSubscription( callAndMaybeRetry( () -> findBrokersForStream(stream), ex -> !(ex instanceof StreamDoesNotExistException), + environment.recoveryBackOffDelayPolicy(), "Candidate lookup to consume from '%s'", stream); @@ -921,7 +980,9 @@ synchronized void add( + " failed with code " + formatConstant(subscribeResponse.getResponseCode()); LOGGER.debug(message); - throw new StreamException(message); + throw convertCodeToException( + subscribeResponse.getResponseCode(), subscriptionTracker.stream, () -> message); + // throw new StreamException(message); } } catch (RuntimeException e) { subscriptionTracker.assign((byte) -1, null); @@ -1076,16 +1137,35 @@ private static void maybeExchangeCommandVersions(Client client) { static T callAndMaybeRetry( Supplier operation, Predicate retryCondition, String format, Object... args) { + return callAndMaybeRetry(operation, retryCondition, i -> Duration.ZERO, format, args); + } + + static T callAndMaybeRetry( + Supplier operation, + Predicate retryCondition, + BackOffDelayPolicy delayPolicy, + String format, + Object... args) { String description = format(format, args); int attempt = 0; - RuntimeException lastException = null; + Exception lastException = null; while (attempt++ < 3) { try { return operation.get(); - } catch (RuntimeException e) { + } catch (Exception e) { lastException = e; if (retryCondition.test(e)) { - LOGGER.debug("Operation '{}' timed out, retrying...", description); + LOGGER.debug("Operation '{}' failed, retrying...", description); + Duration delay = delayPolicy.delay(attempt - 1); + if (!delay.isZero()) { + try { + Thread.sleep(delay.toMillis()); + } catch (InterruptedException ex) { + Thread.interrupted(); + lastException = ex; + break; + } + } } else { break; } @@ -1096,8 +1176,10 @@ static T callAndMaybeRetry( LOGGER.debug(message); if (lastException == null) { throw new StreamException(message); + } else if (lastException instanceof RuntimeException) { + throw (RuntimeException) lastException; } else { - throw lastException; + throw new StreamException(message, lastException); } } diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java index 4a3732522f..445cc39550 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java @@ -13,9 +13,9 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.convertCodeToException; import static com.rabbitmq.stream.impl.Utils.formatConstant; import static com.rabbitmq.stream.impl.Utils.namedRunnable; -import static com.rabbitmq.stream.impl.Utils.propagateException; import static java.util.concurrent.TimeUnit.SECONDS; import com.rabbitmq.stream.Address; @@ -516,7 +516,13 @@ public StreamStats queryStreamStats(String stream) { "committed_chunk_id", "No committed chunk ID for stream " + stream); return new DefaultStreamStats(firstOffsetSupplier, committedOffsetSupplier); } else { - throw propagateException(response.getResponseCode(), stream); + throw convertCodeToException( + response.getResponseCode(), + stream, + () -> + "Error while querying stream info: " + + formatConstant(response.getResponseCode()) + + "."); } } diff --git a/src/main/java/com/rabbitmq/stream/impl/Utils.java b/src/main/java/com/rabbitmq/stream/impl/Utils.java index adf2fe8f69..dff11d8131 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Utils.java +++ b/src/main/java/com/rabbitmq/stream/impl/Utils.java @@ -38,6 +38,7 @@ import java.util.function.LongConsumer; import java.util.function.LongSupplier; import java.util.function.Predicate; +import java.util.function.Supplier; import javax.net.ssl.X509TrustManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -339,14 +340,14 @@ static boolean is3_11_OrMore(String brokerVersion) { return versionCompare(currentVersion(brokerVersion), "3.11.0") >= 0; } - static StreamException propagateException(short responseCode, String stream) { + static StreamException convertCodeToException( + short responseCode, String stream, Supplier fallbackMessage) { if (responseCode == Constants.RESPONSE_CODE_STREAM_DOES_NOT_EXIST) { return new StreamDoesNotExistException(stream); } else if (responseCode == Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE) { return new StreamNotAvailableException(stream); } else { - String message = "Error while querying stream info: " + formatConstant(responseCode) + "."; - return new StreamException(message, responseCode); + return new StreamException(fallbackMessage.get(), responseCode); } } diff --git a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java index ad9c48a65b..11bedbd0d4 100644 --- a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java @@ -340,7 +340,7 @@ void subscribePropagateExceptionWhenClientSubscriptionFails() { Collections.emptyMap())) .isInstanceOf(StreamException.class) .hasMessage(exceptionMessage); - assertThat(MonitoringTestUtils.extract(coordinator)).isEmpty(); + assertThat(MonitoringTestUtils.extract(coordinator).isEmpty()).isTrue(); } @Test @@ -1081,7 +1081,8 @@ void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() t verify(client, times(subscriptionCount)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); - List info = MonitoringTestUtils.extract(coordinator); + List info = + MonitoringTestUtils.extract(coordinator).consumerCoordinatorInfos(); assertThat(info) .hasSize(1) .element(0) @@ -1093,7 +1094,7 @@ void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() t Thread.sleep(delayPolicy.delay(0).toMillis() * 5); - info = MonitoringTestUtils.extract(coordinator); + info = MonitoringTestUtils.extract(coordinator).consumerCoordinatorInfos(); assertThat(info) .hasSize(1) .element(0) @@ -1116,7 +1117,7 @@ void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() t verify(client, times(subscriptionCount + ConsumersCoordinator.MAX_SUBSCRIPTIONS_PER_CLIENT + 1)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); - info = MonitoringTestUtils.extract(coordinator); + info = MonitoringTestUtils.extract(coordinator).consumerCoordinatorInfos(); assertThat(info) .hasSize(1) .element(0) @@ -1346,7 +1347,7 @@ void shouldUseStoredOffsetOnRecovery(Consumer configur @Test @SuppressWarnings("unchecked") - void shouldRetryAssignmentOnManagerClientTimeout() throws Exception { + void shouldRetryAssignmentOnRecoveryTimeout() throws Exception { scheduledExecutorService = createScheduledExecutorService(2); when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); Duration retryDelay = Duration.ofMillis(100); @@ -1422,6 +1423,128 @@ void shouldRetryAssignmentOnManagerClientTimeout() throws Exception { consumerName, "stream-2"); // subscription call, times out 3 times, retry that succeeds } + @Test + @SuppressWarnings("unchecked") + void shouldRetryAssignmentOnRecoveryStreamNotAvailableFailure() throws Exception { + scheduledExecutorService = createScheduledExecutorService(2); + when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); + Duration retryDelay = Duration.ofMillis(100); + when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(environment.topologyUpdateBackOffDelayPolicy()) + .thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(consumer.isOpen()).thenReturn(true); + when(locator.metadata("stream")).thenReturn(metadata("stream", null, replicas())); + + when(clientFactory.client(any())).thenReturn(client); + + AtomicInteger subscriptionCount = new AtomicInteger(0); + when(client.subscribe( + subscriptionIdCaptor.capture(), + anyString(), + any(OffsetSpecification.class), + anyInt(), + anyMap())) + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return responseOk(); + }) + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return new Response(Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); + }) + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return responseOk(); + }); + + coordinator.subscribe( + consumer, + "stream", + null, + null, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> {}, + Collections.emptyMap()); + verify(clientFactory, times(1)).client(any()); + verify(client, times(1)) + .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); + + this.shutdownListener.handle( + new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); + + waitAtMost(() -> subscriptionCount.get() == 1 + 1 + 1); + + verify(locator, times(3)).metadata("stream"); + } + + @Test + @SuppressWarnings("unchecked") + void shouldRetryAssignmentOnRecoveryCandidateLookupFailure() throws Exception { + scheduledExecutorService = createScheduledExecutorService(); + when(environment.scheduledExecutorService()).thenReturn(scheduledExecutorService); + Duration retryDelay = Duration.ofMillis(100); + when(environment.recoveryBackOffDelayPolicy()).thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(environment.topologyUpdateBackOffDelayPolicy()) + .thenReturn(BackOffDelayPolicy.fixed(retryDelay)); + when(consumer.isOpen()).thenReturn(true); + when(locator.metadata("stream")) + .thenReturn(metadata("stream", null, replicas())) + .thenReturn(metadata("stream", null, replicas())) + .thenReturn(metadata("stream", null, null)) + .thenReturn(metadata("stream", null, replicas())); + + when(clientFactory.client(any())).thenReturn(client); + + AtomicInteger subscriptionCount = new AtomicInteger(0); + when(client.subscribe( + subscriptionIdCaptor.capture(), + anyString(), + any(OffsetSpecification.class), + anyInt(), + anyMap())) + .thenAnswer( + invocation -> { + // first subscription + subscriptionCount.incrementAndGet(); + return responseOk(); + }) + .thenAnswer( + invocation -> { + // on recovery, subscription fails, to trigger candidate lookup + subscriptionCount.incrementAndGet(); + return new Response(Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); + }) + .thenAnswer( + invocation -> { + subscriptionCount.incrementAndGet(); + return responseOk(); + }); + + coordinator.subscribe( + consumer, + "stream", + null, + null, + NO_OP_SUBSCRIPTION_LISTENER, + NO_OP_TRACKING_CLOSING_CALLBACK, + (offset, message) -> {}, + Collections.emptyMap()); + verify(clientFactory, times(1)).client(any()); + verify(client, times(1)) + .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); + + this.shutdownListener.handle( + new Client.ShutdownContext(Client.ShutdownContext.ShutdownReason.UNKNOWN)); + + waitAtMost(() -> subscriptionCount.get() == 1 + 1 + 1); + + verify(locator, times(4)).metadata("stream"); + } + @Test void subscribeUnsubscribeInDifferentThreadsShouldNotDeadlock() { when(locator.metadata("stream")).thenReturn(metadata(null, replicas())); diff --git a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java index f3a6d04cca..642c2596c3 100644 --- a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java +++ b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java @@ -40,9 +40,8 @@ static List extract(ProducersCoordinator coordinator) { return GSON.fromJson(coordinator.toString(), type); } - static List extract(ConsumersCoordinator coordinator) { - Type type = new TypeToken>() {}.getType(); - return GSON.fromJson(coordinator.toString(), type); + static ConsumerCoordinatorInfo extract(ConsumersCoordinator coordinator) { + return GSON.fromJson(coordinator.toString(), ConsumerCoordinatorInfo.class); } static EnvironmentInfo extract(Environment environment) { @@ -65,10 +64,10 @@ public static class EnvironmentInfo { private final String[] locators; private final ProducersPoolInfo[] producers; - private final ConsumersPoolInfo[] consumers; + private final ConsumerCoordinatorInfo consumers; public EnvironmentInfo( - String[] locators, ProducersPoolInfo[] producers, ConsumersPoolInfo[] consumers) { + String[] locators, ProducersPoolInfo[] producers, ConsumerCoordinatorInfo consumers) { this.locators = locators; this.producers = producers; this.consumers = consumers; @@ -79,7 +78,7 @@ public String[] getLocators() { } public List getConsumers() { - return arrayToList(this.consumers); + return this.consumers.consumerCoordinatorInfos(); } public List getProducers() { @@ -95,11 +94,33 @@ public String toString() { + ", producers=" + Arrays.toString(producers) + ", consumers=" - + Arrays.toString(consumers) + + consumers + '}'; } } + public static class ConsumerCoordinatorInfo { + + private final int subscription_count; + private final int pool_count; + private final ConsumersPoolInfo[] pools; + + public ConsumerCoordinatorInfo( + int subscription_count, int pool_count, ConsumersPoolInfo[] pools) { + this.subscription_count = subscription_count; + this.pool_count = pool_count; + this.pools = pools; + } + + boolean isEmpty() { + return this.pool_count == 0; + } + + List consumerCoordinatorInfos() { + return Arrays.asList(this.pools); + } + } + public static class ConsumersPoolInfo { private final String broker; From 8841de158f7fad3e5d3dc00a0ea8d4835b2edba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Thu, 1 Dec 2022 17:51:11 +0100 Subject: [PATCH 05/10] Remove per-node data structure in consumer coordinator It would contain the client connections for a given node. It was difficult to maintain the consistency between those structures, so we're better off not using this layer. Now client connections are looked up with a linear scan, which is good enough, as the number of connections should remain under the thousand. --- .../java/com/rabbitmq/stream/impl/Client.java | 71 +-- .../impl/ConnectionStreamException.java | 27 ++ .../stream/impl/ConsumersCoordinator.java | 456 +++++++++--------- .../stream/impl/StreamEnvironment.java | 1 + .../stream/impl/TimeoutStreamException.java | 4 +- .../stream/impl/ConsumersCoordinatorTest.java | 75 ++- .../stream/impl/MonitoringTestUtils.java | 75 ++- .../stream/impl/StreamEnvironmentTest.java | 26 +- 8 files changed, 372 insertions(+), 363 deletions(-) create mode 100644 src/main/java/com/rabbitmq/stream/impl/ConnectionStreamException.java diff --git a/src/main/java/com/rabbitmq/stream/impl/Client.java b/src/main/java/com/rabbitmq/stream/impl/Client.java index b070937731..6db0b1b53f 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Client.java +++ b/src/main/java/com/rabbitmq/stream/impl/Client.java @@ -45,6 +45,7 @@ import static com.rabbitmq.stream.impl.Utils.encodeResponseCode; import static com.rabbitmq.stream.impl.Utils.extractResponseCode; import static com.rabbitmq.stream.impl.Utils.formatConstant; +import static java.lang.String.format; import static java.util.concurrent.TimeUnit.SECONDS; import com.rabbitmq.stream.AuthenticationFailureException; @@ -102,6 +103,7 @@ import java.io.IOException; import java.io.OutputStream; import java.lang.reflect.Field; +import java.net.ConnectException; import java.net.InetSocketAddress; import java.net.SocketAddress; import java.nio.charset.StandardCharsets; @@ -325,14 +327,16 @@ public void write( this.host = parameters.host; this.port = parameters.port; } catch (Exception e) { + String message = + format( + "Error while creating stream connection to %s:%d", parameters.host, parameters.port); if (e instanceof ConnectTimeoutException) { - throw new TimeoutStreamException( - String.format( - "Error while creating stream connection to %s:%d", - parameters.host, parameters.port), - e); + throw new TimeoutStreamException(message, e); + } else if (e instanceof ConnectException) { + throw new ConnectionStreamException(message, e); + } else { + throw new StreamException(message, e); } - throw new StreamException(e); } this.channel = f.channel(); @@ -407,7 +411,7 @@ private Map peerProperties() { .writeShort(entry.getValue().length()) .writeBytes(entry.getValue().getBytes(StandardCharsets.UTF_8)); } - OutstandingRequest> request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest> request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -475,8 +479,7 @@ private SaslAuthenticateResponse sendSaslAuthenticate( } else { bb.writeInt(challengeResponse.length).writeBytes(challengeResponse); } - OutstandingRequest request = - new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -498,7 +501,7 @@ private Map open(String virtualHost) { bb.writeInt(correlationId); bb.writeShort(virtualHost.length()); bb.writeBytes(virtualHost.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -540,7 +543,7 @@ private void sendClose(short code, String reason) { bb.writeShort(code); bb.writeShort(reason.length()); bb.writeBytes(reason.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -567,7 +570,7 @@ private List getSaslMechanisms() { bb.writeShort(encodeRequestCode(COMMAND_SASL_HANDSHAKE)); bb.writeShort(VERSION_1); bb.writeInt(correlationId); - OutstandingRequest> request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest> request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -603,7 +606,7 @@ public Response create(String stream, Map arguments) { bb.writeShort(argument.getValue().length()); bb.writeBytes(argument.getValue().getBytes(StandardCharsets.UTF_8)); } - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -648,7 +651,7 @@ public Response delete(String stream) { bb.writeInt(correlationId); bb.writeShort(stream.length()); bb.writeBytes(stream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -680,8 +683,7 @@ public Map metadata(String... streams) { bb.writeShort(stream.length()); bb.writeBytes(stream.getBytes(StandardCharsets.UTF_8)); } - OutstandingRequest> request = - new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest> request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -716,7 +718,7 @@ public Response declarePublisher(byte publisherId, String publisherReference, St } bb.writeShort(stream.length()); bb.writeBytes(stream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -737,7 +739,7 @@ public Response deletePublisher(byte publisherId) { bb.writeShort(VERSION_1); bb.writeInt(correlationId); bb.writeByte(publisherId); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1068,7 +1070,7 @@ public Response subscribe( .writeBytes(entry.getValue().getBytes(StandardCharsets.UTF_8)); } } - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); if (offsetSpecification.isOffset()) { subscriptionOffsets.add( @@ -1077,6 +1079,9 @@ public Response subscribe( channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); throw new StreamException(e); @@ -1125,7 +1130,7 @@ public QueryOffsetResponse queryOffset(String reference, String stream) { bb.writeBytes(reference.getBytes(StandardCharsets.UTF_8)); bb.writeShort(stream.length()); bb.writeBytes(stream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1163,8 +1168,7 @@ public long queryPublisherSequence(String publisherReference, String stream) { bb.writeBytes(publisherReference.getBytes(StandardCharsets.UTF_8)); bb.writeShort(stream.length()); bb.writeBytes(stream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = - new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1191,7 +1195,7 @@ public Response unsubscribe(byte subscriptionId) { bb.writeShort(VERSION_1); bb.writeInt(correlationId); bb.writeByte(subscriptionId); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1315,7 +1319,7 @@ public List route(String routingKey, String superStream) { bb.writeBytes(routingKey.getBytes(StandardCharsets.UTF_8)); bb.writeShort(superStream.length()); bb.writeBytes(superStream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest> request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest> request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1341,7 +1345,7 @@ public List partitions(String superStream) { bb.writeInt(correlationId); bb.writeShort(superStream.length()); bb.writeBytes(superStream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest> request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest> request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1369,8 +1373,7 @@ List exchangeCommandVersions() { bb.writeShort(commandVersion.getMinVersion()); bb.writeShort(commandVersion.getMaxVersion()); } - OutstandingRequest> request = - new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest> request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -1395,7 +1398,7 @@ StreamStatsResponse streamStats(String stream) { bb.writeInt(correlationId); bb.writeShort(stream.length()); bb.writeBytes(stream.getBytes(StandardCharsets.UTF_8)); - OutstandingRequest request = new OutstandingRequest<>(this.rpcTimeout); + OutstandingRequest request = outstandingRequest(); outstandingRequests.put(correlationId, request); channel.writeAndFlush(bb); request.block(); @@ -2288,12 +2291,15 @@ static class OutstandingRequest { private final Duration timeout; + private final String node; + private final AtomicReference response = new AtomicReference<>(); private final AtomicReference error = new AtomicReference<>(); - private OutstandingRequest(Duration timeout) { + private OutstandingRequest(Duration timeout, String node) { this.timeout = timeout; + this.node = node; } void block() { @@ -2305,7 +2311,8 @@ void block() { throw new StreamException("Interrupted while waiting for response"); } if (!completed) { - throw new TimeoutStreamException("Could not get response in " + timeout.toMillis() + " ms"); + throw new TimeoutStreamException( + format("Could not get response in %d ms from node %s", timeout.toMillis(), node)); } } @@ -2512,4 +2519,8 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { ctx.close(); } } + + private OutstandingRequest outstandingRequest() { + return new OutstandingRequest<>(this.rpcTimeout, this.host + ":" + this.port); + } } diff --git a/src/main/java/com/rabbitmq/stream/impl/ConnectionStreamException.java b/src/main/java/com/rabbitmq/stream/impl/ConnectionStreamException.java new file mode 100644 index 0000000000..68e3253bd0 --- /dev/null +++ b/src/main/java/com/rabbitmq/stream/impl/ConnectionStreamException.java @@ -0,0 +1,27 @@ +// Copyright (c) 2022 VMware, Inc. or its affiliates. All rights reserved. +// +// This software, the RabbitMQ Stream Java client library, is dual-licensed under the +// Mozilla Public License 2.0 ("MPL"), and the Apache License version 2 ("ASL"). +// For the MPL, please see LICENSE-MPL-RabbitMQ. For the ASL, +// please see LICENSE-APACHE2. +// +// This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, +// either express or implied. See the LICENSE file for specific language governing +// rights and limitations of this software. +// +// If you have any questions regarding licensing, please contact us at +// info@rabbitmq.com. +package com.rabbitmq.stream.impl; + +import com.rabbitmq.stream.StreamException; + +class ConnectionStreamException extends StreamException { + + public ConnectionStreamException(String message) { + super(message); + } + + public ConnectionStreamException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index 4137bd07cd..a97bc103b1 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -33,6 +33,7 @@ import com.rabbitmq.stream.SubscriptionListener.SubscriptionContext; import com.rabbitmq.stream.impl.Client.Broker; import com.rabbitmq.stream.impl.Client.ChunkListener; +import com.rabbitmq.stream.impl.Client.ClientParameters; import com.rabbitmq.stream.impl.Client.ConsumerUpdateListener; import com.rabbitmq.stream.impl.Client.CreditNotification; import com.rabbitmq.stream.impl.Client.MessageListener; @@ -46,15 +47,19 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.NavigableSet; import java.util.Objects; import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; @@ -73,11 +78,14 @@ class ConsumersCoordinator { private static final Logger LOGGER = LoggerFactory.getLogger(ConsumersCoordinator.class); private final Random random = new Random(); private final StreamEnvironment environment; - private final Map pools = new ConcurrentHashMap<>(); private final ClientFactory clientFactory; private final int maxConsumersByConnection; private final Function connectionNamingStrategy; + // TODO remove the list of trackers (it's here just for debugging) private final List trackers = new CopyOnWriteArrayList<>(); + private final AtomicLong managerIdSequence = new AtomicLong(0); + private final NavigableSet managers = new ConcurrentSkipListSet<>(); + private final AtomicLong trackerIdSequence = new AtomicLong(0); ConsumersCoordinator( StreamEnvironment environment, @@ -121,6 +129,7 @@ Runnable subscribe( // we keep this instance when we move the subscription from a client to another one SubscriptionTracker subscriptionTracker = new SubscriptionTracker( + this.trackerIdSequence.getAndIncrement(), consumer, stream, offsetSpecification, @@ -130,27 +139,89 @@ Runnable subscribe( messageHandler, subscriptionProperties); - String key = keyForClientSubscription(newNode); - - ManagerPool managerPool = - pools.computeIfAbsent( - key, - s -> - new ManagerPool( - key, - environment - .clientParametersCopy() - .host(newNode.getHost()) - .port(newNode.getPort()))); - try { - managerPool.add(subscriptionTracker, offsetSpecification, true); - } catch (RuntimeException e) { - managerPool.clean(); - throw e; + addToManager(newNode, subscriptionTracker, offsetSpecification, true); + } catch (ConnectionStreamException e) { + // these exceptions are not public + throw new StreamException(e.getMessage()); } + this.trackers.add(subscriptionTracker); - return subscriptionTracker::cancel; + return () -> { + try { + this.trackers.remove(subscriptionTracker); + } catch (Exception e) { + LOGGER.debug("Error while removing subscription tracker from list"); + } + subscriptionTracker.cancel(); + }; + } + + private void addToManager( + Broker node, + SubscriptionTracker tracker, + OffsetSpecification offsetSpecification, + boolean isInitialSubscription) { + ClientParameters clientParameters = + environment.clientParametersCopy().host(node.getHost()).port(node.getPort()); + ClientSubscriptionsManager pickedManager = null; + while (pickedManager == null) { + Iterator iterator = this.managers.iterator(); + while (iterator.hasNext()) { + pickedManager = iterator.next(); + if (pickedManager.isClosed()) { + iterator.remove(); + } + if (node.equals(pickedManager.node) && !pickedManager.isFull()) { + break; + } else { + pickedManager = null; + } + } + if (pickedManager == null) { + String name = keyForClientSubscription(node); + LOGGER.debug("Creating subscription manager on {}", name); + pickedManager = new ClientSubscriptionsManager(node, clientParameters); + LOGGER.debug("Created subscription manager on {}, id {}", name, pickedManager.id); + } + try { + pickedManager.add(tracker, offsetSpecification, isInitialSubscription); + LOGGER.debug( + "Assigned tracker {} (stream '{}') to manager {} (node {}), subscription ID {}", + tracker.id, + tracker.stream, + pickedManager.id, + pickedManager.name, + tracker.subscriptionIdInClient); + this.managers.add(pickedManager); + } catch (IllegalStateException e) { + pickedManager = null; + } catch (ConnectionStreamException | ClientClosedException | StreamNotAvailableException e) { + // manager connection is dead or stream not available + // scheduling manager closing if necessary in another thread to avoid blocking this one + if (pickedManager.isEmpty()) { + ClientSubscriptionsManager manager = pickedManager; + ConsumersCoordinator.this.environment.execute( + () -> { + manager.closeIfEmpty(); + }, + "Consumer manager closing after timeout, consumer %d on stream '%s'", + tracker.consumer.id(), + tracker.stream); + } + throw e; + } catch (RuntimeException e) { + if (pickedManager != null) { + pickedManager.closeIfEmpty(); + } + throw e; + } + } + } + + // for testing + int managerCount() { + return this.managers.size(); } // package protected for testing @@ -205,15 +276,22 @@ private Client.Broker pickBroker(List brokers) { } public void close() { - for (ManagerPool subscriptionPool : this.pools.values()) { - subscriptionPool.close(); + Iterator iterator = this.managers.iterator(); + while (iterator.hasNext()) { + ClientSubscriptionsManager manager = iterator.next(); + try { + iterator.remove(); + manager.close(); + } catch (Exception e) { + LOGGER.info( + "Error while closing manager {} connected to node {}: {}", + manager.id, + manager.name, + e.getMessage()); + } } } - int poolSize() { - return pools.size(); - } - private static String quote(String value) { if (value == null) { return "null"; @@ -222,9 +300,33 @@ private static String quote(String value) { } } + private static String jsonField(String name, Number value) { + return quote(name) + " : " + value.longValue(); + } + + private static String jsonField(String name, String value) { + return quote(name) + " : " + quote(value); + } + @Override public String toString() { StringBuilder builder = new StringBuilder("{"); + builder.append(jsonField("client_count", this.managers.size())).append(", "); + builder.append(quote("clients")).append(" : ["); + builder.append( + this.managers.stream() + .map( + m -> { + StringBuilder b = new StringBuilder("{"); + b.append(jsonField("id", m.id)) + .append(",") + .append(jsonField("node", m.name)) + .append(",") + .append(jsonField("consumer_count", m.trackerCount)); + return b.append("}").toString(); + }) + .collect(Collectors.joining(","))); + builder.append("],"); builder.append("\"subscription_count\" : ").append(this.trackers.size()).append(","); builder.append("\"subscriptions\" : ["); builder.append( @@ -244,45 +346,10 @@ public String toString() { } else { b.append(quote(client.getHost() + ":" + client.getPort())); } - b.append(", ").append(quote("pool")).append(" : "); - ManagerPool pool = null; - if (manager != null) { - pool = manager.owner; - } - if (pool == null) { - b.append("null"); - } else { - b.append(quote(pool.name)); - } - b.append("}"); - return b.toString(); + return b.append("}").toString(); }) .collect(Collectors.joining(","))); - builder.append("],"); - builder.append("\"pool_count\" : ").append(this.pools.size()).append(","); - builder.append("\"pools\" : "); - builder.append( - "[ \n" - + pools.entrySet().stream() - .map( - poolEntry -> - " { \"broker\" : \"" - + poolEntry.getKey() - + "\", \"client_count\" : " - + poolEntry.getValue().managers.size() - + ", \"clients\" : [ " - + poolEntry.getValue().managers.stream() - .map( - manager -> - "{ \"consumer_count\" : " - + manager.subscriptionTrackers.stream() - .filter(Objects::nonNull) - .count() - + " }") - .collect(Collectors.joining(", ")) - + " ] }") - .collect(Collectors.joining(", \n")) - + "\n]}"); + builder.append("]}"); return builder.toString(); } @@ -294,6 +361,7 @@ public String toString() { */ private static class SubscriptionTracker { + private final long id; private final String stream; private final OffsetSpecification initialOffsetSpecification; private final String offsetTrackingReference; @@ -310,6 +378,7 @@ private static class SubscriptionTracker { new AtomicReference<>(SubscriptionState.OPENING); private SubscriptionTracker( + long id, StreamConsumer consumer, String stream, OffsetSpecification initialOffsetSpecification, @@ -318,6 +387,7 @@ private SubscriptionTracker( Runnable trackingClosingCallback, MessageHandler messageHandler, Map subscriptionProperties) { + this.id = id; this.consumer = consumer; this.stream = stream; this.initialOffsetSpecification = initialOffsetSpecification; @@ -433,115 +503,18 @@ public Consumer consumer() { } } - /** - * Maintains {@link ClientSubscriptionsManager} instances for a given host. - * - *

Creates new {@link ClientSubscriptionsManager} instances (and so {@link Client}s, i.e. - * connections) when needed and disposes them when appropriate. - */ - private class ManagerPool { - - private final List managers = new CopyOnWriteArrayList<>(); - private final String name; - private final Client.ClientParameters clientParameters; - - private ManagerPool(String name, Client.ClientParameters clientParameters) { - this.name = name; - this.clientParameters = clientParameters; - LOGGER.debug("Creating client subscription pool on {}", name); - managers.add(new ClientSubscriptionsManager(this, clientParameters)); - } - - private void add( - SubscriptionTracker subscriptionTracker, - OffsetSpecification offsetSpecification, - boolean isInitialSubscription) { - - ClientSubscriptionsManager pickedManager = null; - while (pickedManager == null) { - // FIXME deal with manager unavailability (manager may be closing because of connection - // closing) - synchronized (this) { - for (ClientSubscriptionsManager manager : managers) { - if (!manager.isFull()) { - pickedManager = manager; - break; - } - } - if (pickedManager == null) { - LOGGER.debug( - "Creating subscription manager on {}, this is subscription manager #{}", - name, - managers.size() + 1); - pickedManager = new ClientSubscriptionsManager(this, clientParameters); - managers.add(pickedManager); - } - } - try { - pickedManager.add(subscriptionTracker, offsetSpecification, isInitialSubscription); - } catch (IllegalStateException e) { - pickedManager = null; - } catch (TimeoutStreamException | ClientClosedException | StreamNotAvailableException e) { - // manager connection is dead or stream not available - // scheduling manager closing if necessary in another thread to avoid blocking this one - if (pickedManager.isEmpty()) { - ClientSubscriptionsManager manager = pickedManager; - ConsumersCoordinator.this.environment.execute( - () -> { - this.remove(manager); - manager.close(); - }, - "Consumer manager closing after timeout, consumer %d on stream '%s'", - subscriptionTracker.consumer.id(), - subscriptionTracker.stream); - } - throw e; - } catch (RuntimeException e) { - this.maybeDisposeManager(pickedManager); - throw e; - } - } - } - - private synchronized void clean() { - for (ClientSubscriptionsManager manager : managers) { - maybeDisposeManager(manager); - } - } - - private synchronized void maybeDisposeManager( - ClientSubscriptionsManager clientSubscriptionsManager) { - if (clientSubscriptionsManager.isEmpty()) { - clientSubscriptionsManager.close(); - this.remove(clientSubscriptionsManager); - } - } - - private synchronized void remove(ClientSubscriptionsManager clientSubscriptionsManager) { - managers.remove(clientSubscriptionsManager); - if (managers.isEmpty()) { - pools.remove(name); - LOGGER.debug("Disposed client subscription pool on {} because it was empty", name); - } - } - - synchronized void close() { - for (ClientSubscriptionsManager manager : managers) { - manager.close(); - } - managers.clear(); - } - } - /** * Maintains a set of {@link SubscriptionTracker} instances on a {@link Client}. * *

It dispatches inbound messages to the appropriate {@link SubscriptionTracker} and * re-allocates {@link SubscriptionTracker}s in case of stream unavailability or disconnection. */ - private class ClientSubscriptionsManager { + private class ClientSubscriptionsManager implements Comparable { + private final long id; + private final Broker node; private final Client client; + private final String name; // the 2 data structures track the subscriptions, they must remain consistent private final Map> streamToStreamSubscriptions = new ConcurrentHashMap<>(); @@ -549,12 +522,12 @@ private class ClientSubscriptionsManager { private volatile List subscriptionTrackers = new ArrayList<>(maxConsumersByConnection); private volatile int trackerCount = 0; - private final ManagerPool owner; + private final AtomicBoolean closed = new AtomicBoolean(false); - private ClientSubscriptionsManager( - ManagerPool owner, Client.ClientParameters clientParameters) { - this.owner = owner; - String name = owner.name; + private ClientSubscriptionsManager(Broker node, Client.ClientParameters clientParameters) { + this.id = managerIdSequence.getAndIncrement(); + this.node = node; + this.name = keyForClientSubscription(node); LOGGER.debug("creating subscription manager on {}", name); IntStream.range(0, maxConsumersByConnection).forEach(i -> subscriptionTrackers.add(null)); this.trackerCount = 0; @@ -582,7 +555,6 @@ private ClientSubscriptionsManager( (subscriptionId, offset, chunkTimestamp, committedOffset, message) -> { SubscriptionTracker subscriptionTracker = subscriptionTrackers.get(subscriptionId & 0xFF); - if (subscriptionTracker != null) { subscriptionTracker.offset = offset; subscriptionTracker.hasReceivedSomething = true; @@ -592,19 +564,17 @@ private ClientSubscriptionsManager( message); // FIXME set offset here as well, best effort to avoid duplicates? } else { - LOGGER.debug("Could not find stream subscription {}", subscriptionId); + LOGGER.debug( + "Could not find stream subscription {} in manager {}, node {}", + subscriptionId, + this.id, + this.name); } }; ShutdownListener shutdownListener = shutdownContext -> { - // FIXME should the pool check if it's empty and so remove itself from the - // pools data structure? - - // we may be closing the client because it's not the right node, so the manager - // should not be removed from its pool, because it's not really in it already - if (clientInitializedInManager.get()) { - this.owner.remove(this); - } + this.closed.set(true); + managers.remove(this); if (shutdownContext.isShutdownUnexpected()) { LOGGER.debug( "Unexpected shutdown notification on subscription connection {}, scheduling consumers re-assignment", @@ -659,7 +629,6 @@ private ClientSubscriptionsManager( LOGGER.debug( "Received metadata notification for '{}', stream is likely to have become unavailable", stream); - Set affectedSubscriptions; synchronized (this) { Set subscriptions = streamToStreamSubscriptions.remove(stream); @@ -682,9 +651,7 @@ private ClientSubscriptionsManager( } affectedSubscriptions = subscriptions; } - if (isEmpty()) { - this.owner.remove(this); - } + if (affectedSubscriptions != null && !affectedSubscriptions.isEmpty()) { environment .scheduledExecutorService() @@ -695,14 +662,14 @@ private ClientSubscriptionsManager( return; } LOGGER.debug( - "Trying to move {} subscription(s) (stream {})", + "Trying to move {} subscription(s) (stream '{}')", affectedSubscriptions.size(), stream); assignConsumersToStream( affectedSubscriptions, stream, metadataUpdateBackOffDelayPolicy(), - isEmpty()); + true); }, "Consumers re-assignment after metadata update on stream '%s'", stream)); @@ -738,7 +705,7 @@ private ClientSubscriptionsManager( .shutdownListener(shutdownListener) .metadataListener(metadataListener) .consumerUpdateListener(consumerUpdateListener)) - .key(owner.name); + .key(name); this.client = clientFactory.client(clientFactoryContext); LOGGER.debug("Created consumer connection '{}'", connectionName); maybeExchangeCommandVersions(client); @@ -749,7 +716,7 @@ private void assignConsumersToStream( Collection subscriptions, String stream, BackOffDelayPolicy delayPolicy, - boolean closeClient) { + boolean maybeCloseClient) { Runnable consumersClosingCallback = () -> { for (SubscriptionTracker affectedSubscription : subscriptions) { @@ -787,8 +754,8 @@ private void assignConsumersToStream( SubscriptionState.ACTIVE); } } - if (closeClient) { - this.close(); + if (maybeCloseClient) { + this.closeIfEmpty(); } } }) @@ -800,33 +767,21 @@ private void assignConsumersToStream( stream, ex); consumersClosingCallback.run(); + if (maybeCloseClient) { + this.closeIfEmpty(); + } return null; }); } private void recoverSubscription( String stream, List candidates, SubscriptionTracker affectedSubscription) { - ManagerPool subscriptionPool = null; boolean reassignmentCompleted = false; while (!reassignmentCompleted) { try { if (affectedSubscription.consumer.isOpen()) { Broker broker = pickBroker(candidates); LOGGER.debug("Using {} to resume consuming from {}", broker, stream); - String key = keyForClientSubscription(broker); - subscriptionPool = - pools.computeIfAbsent( - key, - s -> { - LOGGER.debug( - "Creating new connection pool {} to consume from {}", key, stream); - return new ManagerPool( - key, - environment - .clientParametersCopy() - .host(broker.getHost()) - .port(broker.getPort())); - }); synchronized (affectedSubscription.consumer) { if (affectedSubscription.consumer.isOpen()) { OffsetSpecification offsetSpecification; @@ -835,7 +790,7 @@ private void recoverSubscription( } else { offsetSpecification = affectedSubscription.initialOffsetSpecification; } - subscriptionPool.add(affectedSubscription, offsetSpecification, false); + addToManager(broker, affectedSubscription, offsetSpecification, false); reassignmentCompleted = true; } else { reassignmentCompleted = true; @@ -845,15 +800,14 @@ private void recoverSubscription( LOGGER.debug("Not re-assigning consumer because it has been closed"); reassignmentCompleted = true; } - } catch (TimeoutStreamException | ClientClosedException | StreamNotAvailableException e) { + } catch (ConnectionStreamException + | ClientClosedException + | StreamNotAvailableException e) { LOGGER.debug( "Consumer {} re-assignment on stream {} timed out or connection closed or stream not available, " + "refreshing candidates and retrying", affectedSubscription.consumer.id(), affectedSubscription.stream); - if (subscriptionPool != null) { - subscriptionPool.clean(); - } // maybe not a good candidate, let's refresh and retry for this one candidates = callAndMaybeRetry( @@ -883,10 +837,12 @@ synchronized void add( if (this.isFull()) { throw new IllegalStateException("Cannot add subscription tracker, the manager is full"); } + if (this.isClosed()) { + throw new IllegalStateException("Cannot add subscription tracker, the manager is closed"); + } checkNotClosed(); - // FIXME check manager is still open (not closed because of connection failure) byte subscriptionId = 0; for (int i = 0; i < MAX_SUBSCRIPTIONS_PER_CLIENT; i++) { if (subscriptionTrackers.get(i) == null) { @@ -982,7 +938,6 @@ synchronized void add( LOGGER.debug(message); throw convertCodeToException( subscribeResponse.getResponseCode(), subscriptionTracker.stream, () -> message); - // throw new StreamException(message); } } catch (RuntimeException e) { subscriptionTracker.assign((byte) -1, null); @@ -994,7 +949,7 @@ synchronized void add( throw e; } subscriptionTracker.state(SubscriptionState.ACTIVE); - LOGGER.debug("Subscribed to {}", subscriptionTracker.stream); + LOGGER.debug("Subscribed to '{}'", subscriptionTracker.stream); } private void maybeCleanStreamToStreamSubscriptions(String stream) { @@ -1010,7 +965,6 @@ private void maybeCleanStreamToStreamSubscriptions(String stream) { } synchronized void remove(SubscriptionTracker subscriptionTracker) { - // FIXME check manager is still open (not closed because of connection failure) byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient; try { Client.Response unsubscribeResponse = @@ -1046,7 +1000,8 @@ synchronized void remove(SubscriptionTracker subscriptionTracker) { return subscriptionsForThisStream.isEmpty() ? null : subscriptionsForThisStream; } }); - this.owner.maybeDisposeManager(this); + closeIfEmpty(); + // this.owner.maybeDisposeManager(this); } private List update( @@ -1072,33 +1027,72 @@ boolean isEmpty() { return this.trackerCount == 0; } + boolean isClosed() { + if (!this.client.isOpen()) { + this.close(); + } + return this.closed.get(); + } + + synchronized void closeIfEmpty() { + if (this.isEmpty()) { + this.close(); + } + } + synchronized void close() { - if (this.client != null && this.client.isOpen()) { - for (int i = 0; i < this.subscriptionTrackers.size(); i++) { - SubscriptionTracker tracker = this.subscriptionTrackers.get(i); - if (tracker != null) { - try { - if (this.client != null && this.client.isOpen() && tracker.consumer.isOpen()) { - this.client.unsubscribe(tracker.subscriptionIdInClient); + if (this.closed.compareAndSet(false, true)) { + managers.remove(this); + LOGGER.debug("Closing consumer subscription manager on {}, id {}", this.name, this.id); + if (this.client != null && this.client.isOpen()) { + for (int i = 0; i < this.subscriptionTrackers.size(); i++) { + SubscriptionTracker tracker = this.subscriptionTrackers.get(i); + if (tracker != null) { + try { + if (this.client != null && this.client.isOpen() && tracker.consumer.isOpen()) { + this.client.unsubscribe(tracker.subscriptionIdInClient); + } + } catch (Exception e) { + // OK, moving on + LOGGER.debug( + "Error while unsubscribing from {}, registration {}", + tracker.stream, + tracker.subscriptionIdInClient); } - } catch (Exception e) { - // OK, moving on - LOGGER.debug( - "Error while unsubscribing from {}, registration {}", - tracker.stream, - tracker.subscriptionIdInClient); + this.subscriptionTrackers.set(i, null); } - this.subscriptionTrackers.set(i, null); } - } - streamToStreamSubscriptions.clear(); + streamToStreamSubscriptions.clear(); - if (this.client != null && this.client.isOpen()) { - this.client.close(); + if (this.client != null && this.client.isOpen()) { + this.client.close(); + } } } } + + @Override + public int compareTo(ClientSubscriptionsManager o) { + return Long.compare(this.id, o.id); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ClientSubscriptionsManager that = (ClientSubscriptionsManager) o; + return id == that.id; + } + + @Override + public int hashCode() { + return Objects.hash(id); + } } private static final class DefaultSubscriptionContext implements SubscriptionContext { diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java index 445cc39550..f7338e321d 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java @@ -211,6 +211,7 @@ class StreamEnvironment implements Environment { executorService = scheduledExecutorService; this.privateScheduleExecutorService = false; } + // TODO remove executor wrapper (it's here just for debugging) this.scheduledExecutorService = new ScheduledExecutorServiceWrapper(executorService); this.producersCoordinator = diff --git a/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java b/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java index f23ac86de6..7c0c787012 100644 --- a/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java +++ b/src/main/java/com/rabbitmq/stream/impl/TimeoutStreamException.java @@ -13,9 +13,7 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; -import com.rabbitmq.stream.StreamException; - -class TimeoutStreamException extends StreamException { +class TimeoutStreamException extends ConnectionStreamException { public TimeoutStreamException(String message) { super(message); diff --git a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java index 11bedbd0d4..38076b247a 100644 --- a/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/ConsumersCoordinatorTest.java @@ -43,7 +43,7 @@ import com.rabbitmq.stream.impl.Client.MessageListener; import com.rabbitmq.stream.impl.Client.QueryOffsetResponse; import com.rabbitmq.stream.impl.Client.Response; -import com.rabbitmq.stream.impl.MonitoringTestUtils.ConsumersPoolInfo; +import com.rabbitmq.stream.impl.MonitoringTestUtils.ConsumerCoordinatorInfo; import com.rabbitmq.stream.impl.Utils.ClientFactory; import java.time.Duration; import java.util.ArrayList; @@ -728,17 +728,18 @@ void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); assertThat(messageHandlerCalls.get()).isEqualTo(1); - lastMessageListener() - .handle( - subscriptionIdCaptor.getAllValues().get(0), - 0, - 0, - 0, - new WrapperMessageBuilder().build()); + // listener is per manager (connection), so it can have been disposed of, + // depending on which replica is chosen + // we dispatch to all of them, we should have only one subscription active + // we use the latest subscription ID though + this.messageListeners.forEach( + l -> { + l.handle(subscriptionIdCaptor.getValue(), 0, 0, 0, new WrapperMessageBuilder().build()); + }); + assertThat(messageHandlerCalls.get()).isEqualTo(2); - when(client.unsubscribe(subscriptionIdCaptor.getValue())) - .thenReturn(new Client.Response(Constants.RESPONSE_CODE_OK)); + when(client.unsubscribe(subscriptionIdCaptor.getValue())).thenReturn(responseOk()); closingRunnable.run(); verify(client, times(1)).unsubscribe(subscriptionIdCaptor.getValue()); @@ -747,7 +748,7 @@ void shouldRedistributeConsumerOnMetadataUpdate() throws Exception { .handle(subscriptionIdCaptor.getValue(), 0, 0, 0, new WrapperMessageBuilder().build()); assertThat(messageHandlerCalls.get()).isEqualTo(2); - assertThat(coordinator.poolSize()).isZero(); + assertThat(coordinator.managerCount()).isZero(); } @Test @@ -764,13 +765,18 @@ void shouldRetryRedistributionIfMetadataIsNotUpdatedImmediately() throws Excepti .thenReturn(metadata(null, replicas())); when(clientFactory.client(any())).thenReturn(client); + AtomicInteger subscriptionCount = new AtomicInteger(0); when(client.subscribe( subscriptionIdCaptor.capture(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap())) - .thenReturn(new Client.Response(Constants.RESPONSE_CODE_OK)); + .then( + invocation -> { + subscriptionCount.incrementAndGet(); + return responseOk(); + }); AtomicInteger messageHandlerCalls = new AtomicInteger(); Runnable closingRunnable = @@ -794,7 +800,7 @@ void shouldRetryRedistributionIfMetadataIsNotUpdatedImmediately() throws Excepti metadataListener.handle("stream", Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); - Thread.sleep(delayPolicy.delay(0).toMillis() + delayPolicy.delay(1).toMillis() * 5); + waitAtMost(() -> subscriptionCount.get() == 2); verify(client, times(2)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); @@ -814,7 +820,7 @@ void shouldRetryRedistributionIfMetadataIsNotUpdatedImmediately() throws Excepti subscriptionIdCaptor.getValue(), 0, 0, 0, new WrapperMessageBuilder().build()); assertThat(messageHandlerCalls.get()).isEqualTo(2); - assertThat(coordinator.poolSize()).isZero(); + assertThat(coordinator.managerCount()).isZero(); } @Test @@ -865,7 +871,7 @@ void metadataUpdate_shouldCloseConsumerIfStreamIsDeleted() throws Exception { .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); verify(client, times(0)).unsubscribe(anyByte()); - assertThat(coordinator.poolSize()).isZero(); + assertThat(coordinator.managerCount()).isZero(); } @Test @@ -917,7 +923,7 @@ void metadataUpdate_shouldCloseConsumerIfRetryTimeoutIsReached() throws Exceptio .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); verify(client, times(0)).unsubscribe(anyByte()); - assertThat(coordinator.poolSize()).isZero(); + assertThat(coordinator.managerCount()).isZero(); } @Test @@ -1081,28 +1087,21 @@ void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() t verify(client, times(subscriptionCount)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); - List info = - MonitoringTestUtils.extract(coordinator).consumerCoordinatorInfos(); - assertThat(info) - .hasSize(1) - .element(0) - .extracting(pool -> pool.consumerCount()) - .isEqualTo(subscriptionCount); + ConsumerCoordinatorInfo info = MonitoringTestUtils.extract(coordinator); + assertThat(info.nodesConnected()); + assertThat(info.consumerCount()).isEqualTo(subscriptionCount); - // let's kill the first client connection + // let's make the stream unavailable on the first manager metadataListeners.get(0).handle("stream", Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); Thread.sleep(delayPolicy.delay(0).toMillis() * 5); - info = MonitoringTestUtils.extract(coordinator).consumerCoordinatorInfos(); - assertThat(info) - .hasSize(1) - .element(0) - .extracting(pool -> pool.consumerCount()) - .isEqualTo(subscriptionCount); + info = MonitoringTestUtils.extract(coordinator); + assertThat(info.nodesConnected()).hasSize(1); + assertThat(info.consumerCount()).isEqualTo(subscriptionCount); - // the MAX consumers must have been re-allocated to the existing client and a new one - // let's add a new subscription to make sure we are still using the same pool + // the MAX consumers must have been re-allocated to the initial client because it's not closed + // let's add a new subscription to make sure we are still using the second client coordinator.subscribe( consumer, "stream", @@ -1113,16 +1112,14 @@ void shouldRemoveClientSubscriptionManagerFromPoolIfEmptyAfterMetadataUpdate() t (offset, message) -> {}, Collections.emptyMap()); - verify(clientFactory, times(2 + 1)).client(any()); + // no more client creation + verify(clientFactory, times(2)).client(any()); verify(client, times(subscriptionCount + ConsumersCoordinator.MAX_SUBSCRIPTIONS_PER_CLIENT + 1)) .subscribe(anyByte(), anyString(), any(OffsetSpecification.class), anyInt(), anyMap()); - info = MonitoringTestUtils.extract(coordinator).consumerCoordinatorInfos(); - assertThat(info) - .hasSize(1) - .element(0) - .extracting(pool -> pool.consumerCount()) - .isEqualTo(subscriptionCount + 1); + info = MonitoringTestUtils.extract(coordinator); + assertThat(info.nodesConnected()).hasSize(1); + assertThat(info.consumerCount()).isEqualTo(subscriptionCount + 1); } @ParameterizedTest diff --git a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java index 642c2596c3..c1709683f4 100644 --- a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java +++ b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java @@ -22,6 +22,8 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; class MonitoringTestUtils { @@ -77,8 +79,8 @@ public String[] getLocators() { return locators; } - public List getConsumers() { - return this.consumers.consumerCoordinatorInfos(); + public ConsumerCoordinatorInfo getConsumers() { + return this.consumers; } public List getProducers() { @@ -102,67 +104,40 @@ public String toString() { public static class ConsumerCoordinatorInfo { private final int subscription_count; - private final int pool_count; - private final ConsumersPoolInfo[] pools; - - public ConsumerCoordinatorInfo( - int subscription_count, int pool_count, ConsumersPoolInfo[] pools) { - this.subscription_count = subscription_count; - this.pool_count = pool_count; - this.pools = pools; - } - - boolean isEmpty() { - return this.pool_count == 0; - } - - List consumerCoordinatorInfos() { - return Arrays.asList(this.pools); - } - } - - public static class ConsumersPoolInfo { - - private final String broker; private final ConsumerManager[] clients; - public ConsumersPoolInfo(String broker, ConsumerManager[] clients) { - this.broker = broker; + public ConsumerCoordinatorInfo(int subscription_count, ConsumerManager[] clients) { + this.subscription_count = subscription_count; this.clients = clients; } - public String getBroker() { - return broker; + boolean isEmpty() { + return this.clients.length == 0; } - public List getClients() { - return arrayToList(this.clients); + Set nodesConnected() { + return Arrays.stream(this.clients).map(m -> m.node).collect(Collectors.toSet()); } - public int consumerCount() { - return getClients().stream() - .map(manager -> manager.getConsumerCount()) - .reduce(0, (acc, count) -> acc + count); + List clients() { + return Arrays.asList(this.clients); } - @Override - public String toString() { - return "ConsumerPoolInfo{" - + "broker='" - + broker - + '\'' - + ", clients=" - + Arrays.toString(clients) - + '}'; + int consumerCount() { + return Arrays.stream(this.clients).mapToInt(ConsumerManager::getConsumerCount).sum(); } } public static class ConsumerManager { + private final long id; + private final String node; private final int consumer_count; - public ConsumerManager(int consumerCount) { - this.consumer_count = consumerCount; + public ConsumerManager(long id, String node, int consumer_count) { + this.id = id; + this.node = node; + this.consumer_count = consumer_count; } public int getConsumerCount() { @@ -171,7 +146,15 @@ public int getConsumerCount() { @Override public String toString() { - return "ConsumerManager{" + "consumerCount=" + consumer_count + '}'; + return "ConsumerManager{" + + "id=" + + id + + ", node='" + + node + + '\'' + + ", consumer_count=" + + consumer_count + + '}'; } } diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java index 3ada84c483..93f75f0c4d 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java @@ -47,6 +47,7 @@ import com.rabbitmq.stream.StreamException; import com.rabbitmq.stream.StreamStats; import com.rabbitmq.stream.impl.Client.StreamMetadata; +import com.rabbitmq.stream.impl.MonitoringTestUtils.ConsumerCoordinatorInfo; import com.rabbitmq.stream.impl.MonitoringTestUtils.EnvironmentInfo; import com.rabbitmq.stream.impl.TestUtils.BrokerVersion; import com.rabbitmq.stream.impl.TestUtils.BrokerVersionAtLeast; @@ -211,14 +212,10 @@ void producersAndConsumersShouldBeClosedWhenEnvironmentIsClosed(boolean lazyInit .isEqualTo(2); assertThat(environmentInfo.getProducers().get(0).getClients().get(0).getTrackingConsumerCount()) .isEqualTo(2); - assertThat(environmentInfo.getConsumers()) - .hasSize(1) - .element(0) - .extracting(pool -> pool.getClients()) - .asList() - .hasSize(1); - assertThat(environmentInfo.getConsumers().get(0).getClients().get(0).getConsumerCount()) - .isEqualTo(2); + ConsumerCoordinatorInfo consumerInfo = environmentInfo.getConsumers(); + assertThat(consumerInfo.nodesConnected()).hasSize(1); + assertThat(consumerInfo.clients()).hasSize(1); + assertThat(consumerInfo.consumerCount()).isEqualTo(2); environment.close(); @@ -228,7 +225,7 @@ void producersAndConsumersShouldBeClosedWhenEnvironmentIsClosed(boolean lazyInit environmentInfo = MonitoringTestUtils.extract(environment); assertThat(environmentInfo.getLocators()).isNotEmpty(); assertThat(environmentInfo.getProducers()).isEmpty(); - assertThat(environmentInfo.getConsumers()).isEmpty(); + assertThat(environmentInfo.getConsumers().clients()).isEmpty(); } @Test @@ -296,8 +293,9 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) assertThat(environmentInfo.getProducers()).hasSize(1); int producerManagerCount = environmentInfo.getProducers().get(0).getClients().size(); assertThat(producerManagerCount).isPositive(); - assertThat(environmentInfo.getConsumers()).hasSize(1); - int consumerManagerCount = environmentInfo.getConsumers().get(0).getClients().size(); + ConsumerCoordinatorInfo consumerInfo = environmentInfo.getConsumers(); + assertThat(consumerInfo.nodesConnected()).hasSize(1); + int consumerManagerCount = consumerInfo.clients().size(); assertThat(consumerManagerCount).isPositive(); java.util.function.Consumer closing = @@ -325,9 +323,9 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) assertThat(environmentInfo.getProducers()).hasSize(1); assertThat(environmentInfo.getProducers().get(0).getClients()) .hasSizeLessThan(producerManagerCount); - assertThat(environmentInfo.getConsumers()).hasSize(1); - assertThat(environmentInfo.getConsumers().get(0).getClients()) - .hasSizeLessThan(consumerManagerCount); + consumerInfo = environmentInfo.getConsumers(); + assertThat(consumerInfo.nodesConnected()).hasSize(1); + assertThat(consumerInfo.clients()).hasSizeLessThan(consumerManagerCount); producers.forEach(closing); consumers.forEach(closing); From a8b01e002c7cf91573bc76faf919c8a10aebdf3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Fri, 2 Dec 2022 19:00:16 +0100 Subject: [PATCH 06/10] Remove per-node data structure in producer coordinator It would contain the client connections for a given node. It was difficult to maintain the consistency between those structures, so we're better off not using this layer. Now client connections are looked up with a linear scan, which is good enough, as the number of connections should remain under the thousand. --- .../java/com/rabbitmq/stream/impl/Client.java | 10 + .../stream/impl/ConsumersCoordinator.java | 107 +--- .../stream/impl/ProducersCoordinator.java | 536 ++++++++++++------ .../impl/ScheduledExecutorServiceWrapper.java | 2 +- .../java/com/rabbitmq/stream/impl/Utils.java | 51 ++ .../stream/impl/MonitoringTestUtils.java | 71 ++- .../stream/impl/ProducersCoordinatorTest.java | 27 +- .../stream/impl/StreamEnvironmentTest.java | 26 +- .../stream/impl/StreamProducerTest.java | 2 +- 9 files changed, 507 insertions(+), 325 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/impl/Client.java b/src/main/java/com/rabbitmq/stream/impl/Client.java index 6db0b1b53f..095bd433e5 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Client.java +++ b/src/main/java/com/rabbitmq/stream/impl/Client.java @@ -723,6 +723,9 @@ public Response declarePublisher(byte publisherId, String publisherReference, St channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); throw new StreamException(e); @@ -744,6 +747,9 @@ public Response deletePublisher(byte publisherId) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); throw new StreamException(e); @@ -2048,6 +2054,10 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(host, port); } + + String label() { + return this.host + ":" + this.port; + } } public static class ClientParameters { diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index a97bc103b1..c804a8e5ca 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -18,7 +18,6 @@ import static com.rabbitmq.stream.impl.Utils.isSac; import static com.rabbitmq.stream.impl.Utils.namedFunction; import static com.rabbitmq.stream.impl.Utils.namedRunnable; -import static java.lang.String.format; import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; @@ -43,7 +42,6 @@ import com.rabbitmq.stream.impl.Utils.ClientConnectionType; import com.rabbitmq.stream.impl.Utils.ClientFactory; import com.rabbitmq.stream.impl.Utils.ClientFactoryContext; -import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -63,7 +61,6 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; -import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.slf4j.Logger; @@ -171,11 +168,14 @@ private void addToManager( pickedManager = iterator.next(); if (pickedManager.isClosed()) { iterator.remove(); - } - if (node.equals(pickedManager.node) && !pickedManager.isFull()) { - break; - } else { pickedManager = null; + } else { + if (node.equals(pickedManager.node) && !pickedManager.isFull()) { + // let's try this one + break; + } else { + pickedManager = null; + } } } if (pickedManager == null) { @@ -744,7 +744,7 @@ private void assignConsumersToStream( for (SubscriptionTracker affectedSubscription : subscriptions) { if (affectedSubscription.compareAndSet( SubscriptionState.ACTIVE, SubscriptionState.RECOVERING)) { - recoverSubscription(stream, candidates, affectedSubscription); + recoverSubscription(candidates, affectedSubscription); } else { LOGGER.debug( "Not recovering consumer {} from stream {}, state is {}, expected is {}", @@ -774,51 +774,46 @@ private void assignConsumersToStream( }); } - private void recoverSubscription( - String stream, List candidates, SubscriptionTracker affectedSubscription) { + private void recoverSubscription(List candidates, SubscriptionTracker tracker) { boolean reassignmentCompleted = false; while (!reassignmentCompleted) { try { - if (affectedSubscription.consumer.isOpen()) { + if (tracker.consumer.isOpen()) { Broker broker = pickBroker(candidates); - LOGGER.debug("Using {} to resume consuming from {}", broker, stream); - synchronized (affectedSubscription.consumer) { - if (affectedSubscription.consumer.isOpen()) { + LOGGER.debug("Using {} to resume consuming from {}", broker, tracker.stream); + synchronized (tracker.consumer) { + if (tracker.consumer.isOpen()) { OffsetSpecification offsetSpecification; - if (affectedSubscription.hasReceivedSomething) { - offsetSpecification = OffsetSpecification.offset(affectedSubscription.offset); + if (tracker.hasReceivedSomething) { + offsetSpecification = OffsetSpecification.offset(tracker.offset); } else { - offsetSpecification = affectedSubscription.initialOffsetSpecification; + offsetSpecification = tracker.initialOffsetSpecification; } - addToManager(broker, affectedSubscription, offsetSpecification, false); - reassignmentCompleted = true; - } else { - reassignmentCompleted = true; + addToManager(broker, tracker, offsetSpecification, false); } } } else { LOGGER.debug("Not re-assigning consumer because it has been closed"); - reassignmentCompleted = true; } + reassignmentCompleted = true; } catch (ConnectionStreamException | ClientClosedException | StreamNotAvailableException e) { LOGGER.debug( "Consumer {} re-assignment on stream {} timed out or connection closed or stream not available, " + "refreshing candidates and retrying", - affectedSubscription.consumer.id(), - affectedSubscription.stream); + tracker.consumer.id(), + tracker.stream); // maybe not a good candidate, let's refresh and retry for this one candidates = - callAndMaybeRetry( - () -> findBrokersForStream(stream), + Utils.callAndMaybeRetry( + () -> findBrokersForStream(tracker.stream), ex -> !(ex instanceof StreamDoesNotExistException), environment.recoveryBackOffDelayPolicy(), "Candidate lookup to consume from '%s'", - stream); - + tracker.stream); } catch (Exception e) { - LOGGER.warn("Error while re-assigning subscription from stream {}", stream, e); + LOGGER.warn("Error while re-assigning subscription from stream {}", tracker.stream, e); reassignmentCompleted = true; } } @@ -873,7 +868,7 @@ synchronized void add( if (offsetTrackingReference != null) { checkNotClosed(); QueryOffsetResponse queryOffsetResponse = - callAndMaybeRetry( + Utils.callAndMaybeRetry( () -> client.queryOffset(offsetTrackingReference, subscriptionTracker.stream), RETRY_ON_TIMEOUT, "Offset query for consumer %s on stream '%s' (reference %s)", @@ -917,7 +912,7 @@ synchronized void add( // FIXME consider using fewer initial credits byte subId = subscriptionId; Client.Response subscribeResponse = - callAndMaybeRetry( + Utils.callAndMaybeRetry( () -> client.subscribe( subId, @@ -968,7 +963,7 @@ synchronized void remove(SubscriptionTracker subscriptionTracker) { byte subscriptionIdInClient = subscriptionTracker.subscriptionIdInClient; try { Client.Response unsubscribeResponse = - callAndMaybeRetry( + Utils.callAndMaybeRetry( () -> client.unsubscribe(subscriptionIdInClient), RETRY_ON_TIMEOUT, "Unsubscribe request for consumer %d on stream '%s'", @@ -1129,54 +1124,6 @@ private static void maybeExchangeCommandVersions(Client client) { } } - static T callAndMaybeRetry( - Supplier operation, Predicate retryCondition, String format, Object... args) { - return callAndMaybeRetry(operation, retryCondition, i -> Duration.ZERO, format, args); - } - - static T callAndMaybeRetry( - Supplier operation, - Predicate retryCondition, - BackOffDelayPolicy delayPolicy, - String format, - Object... args) { - String description = format(format, args); - int attempt = 0; - Exception lastException = null; - while (attempt++ < 3) { - try { - return operation.get(); - } catch (Exception e) { - lastException = e; - if (retryCondition.test(e)) { - LOGGER.debug("Operation '{}' failed, retrying...", description); - Duration delay = delayPolicy.delay(attempt - 1); - if (!delay.isZero()) { - try { - Thread.sleep(delay.toMillis()); - } catch (InterruptedException ex) { - Thread.interrupted(); - lastException = ex; - break; - } - } - } else { - break; - } - } - } - String message = - format("Could not complete task '%s' after %d attempt(s)", description, --attempt); - LOGGER.debug(message); - if (lastException == null) { - throw new StreamException(message); - } else if (lastException instanceof RuntimeException) { - throw (RuntimeException) lastException; - } else { - throw new StreamException(message, lastException); - } - } - private static final Predicate RETRY_ON_TIMEOUT = e -> e instanceof TimeoutStreamException; diff --git a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java index f72360721e..71410c61b0 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022 VMware, Inc. or its affiliates. All rights reserved. +// Copyright (c) 2022 VMware, Inc. or its affiliates. All rights reserved. // // This software, the RabbitMQ Stream Java client library, is dual-licensed under the // Mozilla Public License 2.0 ("MPL"), and the Apache License version 2 ("ASL"). @@ -13,14 +13,19 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static com.rabbitmq.stream.impl.Utils.callAndMaybeRetry; import static com.rabbitmq.stream.impl.Utils.formatConstant; import static com.rabbitmq.stream.impl.Utils.namedFunction; import static com.rabbitmq.stream.impl.Utils.namedRunnable; +import static java.util.stream.Collectors.toSet; import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; import com.rabbitmq.stream.StreamDoesNotExistException; import com.rabbitmq.stream.StreamException; +import com.rabbitmq.stream.StreamNotAvailableException; +import com.rabbitmq.stream.impl.Client.Broker; +import com.rabbitmq.stream.impl.Client.ClientParameters; import com.rabbitmq.stream.impl.Client.MetadataListener; import com.rabbitmq.stream.impl.Client.PublishConfirmListener; import com.rabbitmq.stream.impl.Client.PublishErrorListener; @@ -30,15 +35,19 @@ import com.rabbitmq.stream.impl.Utils.ClientFactory; import com.rabbitmq.stream.impl.Utils.ClientFactoryContext; import java.util.Collection; -import java.util.List; +import java.util.Iterator; import java.util.Map; +import java.util.NavigableSet; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; +import java.util.function.Predicate; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,9 +59,11 @@ class ProducersCoordinator { private static final Logger LOGGER = LoggerFactory.getLogger(ProducersCoordinator.class); private final StreamEnvironment environment; private final ClientFactory clientFactory; - private final Map pools = new ConcurrentHashMap<>(); private final int maxProducersByClient, maxTrackingConsumersByClient; private final Function connectionNamingStrategy; + private final AtomicLong managerIdSequence = new AtomicLong(0); + private final NavigableSet managers = new ConcurrentSkipListSet<>(); + private final AtomicLong trackerIdSequence = new AtomicLong(0); ProducersCoordinator( StreamEnvironment environment, @@ -67,40 +78,92 @@ class ProducersCoordinator { this.connectionNamingStrategy = connectionNamingStrategy; } - private static String keyForManagerPool(Client.Broker broker) { - // FIXME make sure this is a reasonable key for brokers + private static String keyForNode(Client.Broker broker) { return broker.getHost() + ":" + broker.getPort(); } Runnable registerProducer(StreamProducer producer, String reference, String stream) { - return registerAgentTracker(new ProducerTracker(reference, stream, producer), stream); + return registerAgentTracker( + new ProducerTracker(trackerIdSequence.getAndIncrement(), reference, stream, producer), + stream); } Runnable registerTrackingConsumer(StreamConsumer consumer) { return registerAgentTracker( - new TrackingConsumerTracker(consumer.stream(), consumer), consumer.stream()); + new TrackingConsumerTracker( + trackerIdSequence.getAndIncrement(), consumer.stream(), consumer), + consumer.stream()); } private Runnable registerAgentTracker(AgentTracker tracker, String stream) { - Client.Broker brokerForProducer = getBrokerForProducer(stream); - - String key = keyForManagerPool(brokerForProducer); - ManagerPool pool = - pools.computeIfAbsent( - key, - s -> - new ManagerPool( - key, - environment - .clientParametersCopy() - .host(brokerForProducer.getHost()) - .port(brokerForProducer.getPort()))); - - pool.add(tracker); + Client.Broker broker = getBrokerForProducer(stream); + + addToManager(broker, tracker); return tracker::cancel; } + private void addToManager(Broker node, AgentTracker tracker) { + ClientParameters clientParameters = + environment.clientParametersCopy().host(node.getHost()).port(node.getPort()); + ClientProducersManager pickedManager = null; + while (pickedManager == null) { + Iterator iterator = this.managers.iterator(); + while (iterator.hasNext()) { + pickedManager = iterator.next(); + if (pickedManager.isClosed()) { + iterator.remove(); + pickedManager = null; + } else { + if (node.equals(pickedManager.node) && !pickedManager.isFullFor(tracker)) { + // let's try this one + break; + } else { + pickedManager = null; + } + } + } + if (pickedManager == null) { + String name = keyForNode(node); + LOGGER.debug("Creating producer manager on {}", name); + pickedManager = new ClientProducersManager(node, this.clientFactory, clientParameters); + LOGGER.debug("Created producer manager on {}, id {}", name, pickedManager.id); + } + try { + pickedManager.register(tracker); + LOGGER.debug( + "Assigned tracker {} (stream '{}') to manager {} (node {}), subscription ID {}", + tracker.uniqueId(), + tracker.stream(), + pickedManager.id, + pickedManager.name, + tracker.identifiable() ? tracker.id() : "N/A"); + this.managers.add(pickedManager); + } catch (IllegalStateException e) { + pickedManager = null; + } catch (ConnectionStreamException | ClientClosedException | StreamNotAvailableException e) { + // manager connection is dead or stream not available + // scheduling manager closing if necessary in another thread to avoid blocking this one + if (pickedManager.isEmpty()) { + ClientProducersManager manager = pickedManager; + this.environment.execute( + () -> { + manager.closeIfEmpty(); + }, + "Producer manager closing after timeout, producer %d on stream '%s'", + tracker.uniqueId(), + tracker.stream()); + } + throw e; + } catch (RuntimeException e) { + if (pickedManager != null) { + pickedManager.closeIfEmpty(); + } + throw e; + } + } + } + private Client.Broker getBrokerForProducer(String stream) { Map metadata = this.environment.locatorOperation( @@ -130,44 +193,80 @@ private Client.Broker getBrokerForProducer(String stream) { } void close() { - for (ManagerPool pool : pools.values()) { - pool.close(); + Iterator iterator = this.managers.iterator(); + while (iterator.hasNext()) { + ClientProducersManager manager = iterator.next(); + try { + iterator.remove(); + manager.close(); + } catch (Exception e) { + LOGGER.info( + "Error while closing manager {} connected to node {}: {}", + manager.id, + manager.name, + e.getMessage()); + } } - pools.clear(); } - int poolSize() { - return pools.size(); + int clientCount() { + return this.managers.size(); } - int clientCount() { - return pools.values().stream().map(pool -> pool.managers.size()).reduce(0, Integer::sum); + int nodesConnected() { + return this.managers.stream().map(m -> m.name).collect(toSet()).size(); + } + + private static String quote(String value) { + if (value == null) { + return "null"; + } else { + return "\"" + value + "\""; + } + } + + private static String jsonField(String name, Number value) { + return quote(name) + " : " + value.longValue(); + } + + private static String jsonField(String name, String value) { + return quote(name) + " : " + quote(value); } @Override public String toString() { - return ("[ \n" - + pools.entrySet().stream() + StringBuilder builder = new StringBuilder("{"); + builder.append(jsonField("client_count", this.managers.size())).append(","); + builder + .append( + jsonField( + "producer_count", this.managers.stream().mapToInt(m -> m.producers.size()).sum())) + .append(","); + builder + .append( + jsonField( + "tracking_consumer_count", + this.managers.stream().mapToInt(m -> m.trackingConsumerTrackers.size()).sum())) + .append(","); + builder.append(quote("clients")).append(" : ["); + builder.append( + this.managers.stream() .map( - poolEntry -> - " { \"broker\" : \"" - + poolEntry.getKey() - + "\", \"client_count\" : " - + poolEntry.getValue().managers.size() - + ", \"clients\" : [ " - + poolEntry.getValue().managers.stream() - .map( - manager -> - "{ \"producer_count\" : " - + manager.producers.size() - + ", " - + " \"tracking_consumer_count\" : " - + manager.trackingConsumerTrackers.size() - + " }") - .collect(Collectors.joining(", ")) - + " ] }") - .collect(Collectors.joining(", \n")) - + "\n]"); + m -> { + StringBuilder b = new StringBuilder("{"); + b.append(jsonField("id", m.id)) + .append(",") + .append(jsonField("node", m.name)) + .append(",") + .append(jsonField("producer_count", m.producers.size())) + .append(",") + .append( + jsonField("tracking_consumer_count", m.trackingConsumerTrackers.size())); + return b.append("}").toString(); + }) + .collect(Collectors.joining(","))); + builder.append("]"); + return builder.append("}").toString(); } private interface AgentTracker { @@ -191,17 +290,27 @@ private interface AgentTracker { String reference(); boolean isOpen(); + + long uniqueId(); + + String type(); + + boolean markRecoveryInProgress(); } private static class ProducerTracker implements AgentTracker { + private final long uniqueId; private final String reference; private final String stream; private final StreamProducer producer; private volatile byte publisherId; private volatile ClientProducersManager clientProducersManager; + private final AtomicBoolean recovering = new AtomicBoolean(false); - private ProducerTracker(String reference, String stream, StreamProducer producer) { + private ProducerTracker( + long uniqueId, String reference, String stream, StreamProducer producer) { + this.uniqueId = uniqueId; this.reference = reference; this.stream = stream; this.producer = producer; @@ -248,6 +357,7 @@ public void unavailable() { @Override public void running() { this.producer.running(); + this.recovering.set(false); } @Override @@ -266,15 +376,33 @@ public void closeAfterStreamDeletion(short code) { public boolean isOpen() { return producer.isOpen(); } + + @Override + public long uniqueId() { + return this.uniqueId; + } + + @Override + public String type() { + return "producer"; + } + + @Override + public boolean markRecoveryInProgress() { + return this.recovering.compareAndSet(false, true); + } } private static class TrackingConsumerTracker implements AgentTracker { + private final long uniqueId; private final String stream; private final StreamConsumer consumer; private volatile ClientProducersManager clientProducersManager; + private final AtomicBoolean recovering = new AtomicBoolean(false); - private TrackingConsumerTracker(String stream, StreamConsumer consumer) { + private TrackingConsumerTracker(long uniqueId, String stream, StreamConsumer consumer) { + this.uniqueId = uniqueId; this.stream = stream; this.consumer = consumer; } @@ -318,6 +446,7 @@ public void unavailable() { @Override public void running() { this.consumer.running(); + this.recovering.set(false); } @Override @@ -337,78 +466,41 @@ public void closeAfterStreamDeletion(short code) { public boolean isOpen() { return this.consumer.isOpen(); } - } - - private class ManagerPool { - private final List managers = new CopyOnWriteArrayList<>(); - private final String name; - private final Client.ClientParameters clientParameters; - - private ManagerPool(String name, Client.ClientParameters clientParameters) { - this.name = name; - this.clientParameters = clientParameters; - this.managers.add(new ClientProducersManager(this, clientFactory, clientParameters)); - } - - private synchronized void add(AgentTracker producerTracker) { - boolean added = false; - // FIXME deal with state unavailability (state may be closing because of connection closing) - // try all of them until it succeeds, throw exception if failure - for (ClientProducersManager manager : this.managers) { - if (!manager.isFullFor(producerTracker)) { - manager.register(producerTracker); - added = true; - break; - } - } - if (!added) { - LOGGER.debug( - "Creating producers tracker on {}, this is subscription state #{}", - name, - managers.size() + 1); - ClientProducersManager manager = - new ClientProducersManager(this, clientFactory, clientParameters); - this.managers.add(manager); - manager.register(producerTracker); - } - } - - synchronized void maybeDisposeManager(ClientProducersManager manager) { - if (manager.isEmpty()) { - manager.close(); - this.remove(manager); - } + @Override + public long uniqueId() { + return this.uniqueId; } - private synchronized void remove(ClientProducersManager manager) { - this.managers.remove(manager); - if (this.managers.isEmpty()) { - pools.remove(this.name); - } + @Override + public String type() { + return "tracking consumer"; } - synchronized void close() { - for (ClientProducersManager manager : managers) { - manager.close(); - } - managers.clear(); + @Override + public boolean markRecoveryInProgress() { + return this.recovering.compareAndSet(false, true); } } - private class ClientProducersManager { + private class ClientProducersManager implements Comparable { + private final long id; + private final String name; + private final Broker node; private final ConcurrentMap producers = new ConcurrentHashMap<>(maxProducersByClient); private final Set trackingConsumerTrackers = ConcurrentHashMap.newKeySet(maxTrackingConsumersByClient); private final Map> streamToTrackers = new ConcurrentHashMap<>(); private final Client client; - private final ManagerPool owner; + private final AtomicBoolean closed = new AtomicBoolean(false); private ClientProducersManager( - ManagerPool owner, ClientFactory cf, Client.ClientParameters clientParameters) { - this.owner = owner; + Broker node, ClientFactory cf, Client.ClientParameters clientParameters) { + this.id = managerIdSequence.getAndIncrement(); + this.name = keyForNode(node); + this.node = node; AtomicReference ref = new AtomicReference<>(); AtomicBoolean clientInitializedInManager = new AtomicBoolean(false); PublishConfirmListener publishConfirmListener = @@ -434,11 +526,7 @@ private ClientProducersManager( }; ShutdownListener shutdownListener = shutdownContext -> { - // we may be closing the client because it's not the right node, so the manager - // should not be removed from its pool, because it's not really in it already - if (clientInitializedInManager.get()) { - owner.remove(this); - } + managers.remove(this); if (shutdownContext.isShutdownUnexpected()) { LOGGER.debug( "Recovering {} producer(s) after unexpected connection termination", @@ -463,13 +551,20 @@ private ClientProducersManager( }); }, "Producer recovery after disconnection from %s", - owner.name)); + name)); } }; MetadataListener metadataListener = (stream, code) -> { + LOGGER.debug( + "Received metadata notification for '{}', stream is likely to have become unavailable", + stream); + Set affectedTrackers; synchronized (ClientProducersManager.this) { - Set affectedTrackers = streamToTrackers.remove(stream); + affectedTrackers = streamToTrackers.remove(stream); + LOGGER.debug( + "Affected publishers and consumer trackers after metadata update: {}", + affectedTrackers.size()); if (affectedTrackers != null && !affectedTrackers.isEmpty()) { affectedTrackers.forEach( tracker -> { @@ -480,26 +575,28 @@ private ClientProducersManager( trackingConsumerTrackers.remove(tracker); } }); - environment - .scheduledExecutorService() - .execute( - namedRunnable( - () -> { - if (Thread.currentThread().isInterrupted()) { - return; - } - // close manager if no more trackers for it - // needs to be done in another thread than the IO thread - this.owner.maybeDisposeManager(this); - assignProducersToNewManagers( - affectedTrackers, - stream, - environment.topologyUpdateBackOffDelayPolicy()); - }, - "Producer re-assignment after metadata update on stream '%s'", - stream)); } } + if (affectedTrackers != null && !affectedTrackers.isEmpty()) { + environment + .scheduledExecutorService() + .execute( + namedRunnable( + () -> { + if (Thread.currentThread().isInterrupted()) { + return; + } + // close manager if no more trackers for it + // needs to be done in another thread than the IO thread + closeIfEmpty(); + assignProducersToNewManagers( + affectedTrackers, + stream, + environment.topologyUpdateBackOffDelayPolicy()); + }, + "Producer re-assignment after metadata update on stream '%s'", + stream)); + } }; String connectionName = connectionNamingStrategy.apply(ClientConnectionType.PRODUCER); ClientFactoryContext connectionFactoryContext = @@ -510,7 +607,7 @@ private ClientProducersManager( .shutdownListener(shutdownListener) .metadataListener(metadataListener) .clientProperty("connection_name", connectionName)) - .key(owner.name); + .key(name); this.client = cf.client(connectionFactoryContext); LOGGER.debug("Created producer connection '{}'", connectionName); clientInitializedInManager.set(true); @@ -527,48 +624,21 @@ private void assignProducersToNewManagers( .build() .thenAccept( broker -> { - String key = keyForManagerPool(broker); + String key = keyForNode(broker); LOGGER.debug("Assigning {} producer(s) to {}", trackers.size(), key); - trackers.forEach( tracker -> { - try { - if (tracker.isOpen()) { - // we create the pool only if necessary - ManagerPool pool = - pools.computeIfAbsent( - key, - s -> - new ManagerPool( - key, - environment - .clientParametersCopy() - .host(broker.getHost()) - .port(broker.getPort()))); - pool.add(tracker); - tracker.running(); - } else { - LOGGER.debug("Not re-assigning producer because it has been closed"); - } - } catch (Exception e) { - LOGGER.info( - "Error while re-assigning producer {} to {}: {}. Moving on.", - tracker.identifiable() ? tracker.id() : "(tracking consumer)", - key, - e.getMessage()); + if (tracker.markRecoveryInProgress()) { + recoverAgent(broker, tracker); } }); }) .exceptionally( ex -> { - LOGGER.info("Error while re-assigning producers: {}", ex.getMessage()); + LOGGER.info( + "Error while re-assigning producers and consumer trackers, closing them: {}", + ex.getMessage()); for (AgentTracker tracker : trackers) { - // FIXME what to do with tracking consumers after a timeout? - // here they are left as "unavailable" and not, meaning they will not be - // able to store. Yet recovery mechanism could try to reconnect them, but - // that seems far-fetched (the first recovery already failed). They could - // be put in a state whereby they refuse all new store commands and inform - // with an exception they should be restarted. try { short code; if (ex instanceof StreamDoesNotExistException @@ -586,15 +656,70 @@ private void assignProducersToNewManagers( }); } + private void recoverAgent(Broker node, AgentTracker tracker) { + boolean reassignmentCompleted = false; + while (!reassignmentCompleted) { + try { + if (tracker.isOpen()) { + LOGGER.debug( + "Using {} to resume {} to {}", node.label(), tracker.type(), tracker.stream()); + addToManager(node, tracker); + tracker.running(); + } else { + LOGGER.debug("Not recovering {} because it has been closed", tracker.type()); + } + reassignmentCompleted = true; + } catch (ConnectionStreamException + | ClientClosedException + | StreamNotAvailableException e) { + LOGGER.debug( + "{} re-assignment on stream {} timed out or connection closed or stream not available, " + + "refreshing candidate leader and retrying", + tracker.type(), + tracker.id(), + tracker.stream()); + // maybe not a good candidate, let's refresh and retry for this one + node = + Utils.callAndMaybeRetry( + () -> getBrokerForProducer(tracker.stream()), + ex -> !(ex instanceof StreamDoesNotExistException), + environment.recoveryBackOffDelayPolicy(), + "Candidate lookup for %s on stream '%s'", + tracker.type(), + tracker.stream()); + } catch (Exception e) { + LOGGER.warn( + "Error while re-assigning %s (stream '{}')", tracker.type(), tracker.stream(), e); + reassignmentCompleted = true; + } + } + } + private synchronized void register(AgentTracker tracker) { + if (this.isFullFor(tracker)) { + throw new IllegalStateException("Cannot add subscription tracker, the manager is full"); + } + if (this.isClosed()) { + throw new IllegalStateException("Cannot add subscription tracker, the manager is closed"); + } + checkNotClosed(); if (tracker.identifiable()) { ProducerTracker producerTracker = (ProducerTracker) tracker; // using the next available slot for (int i = 0; i < maxProducersByClient; i++) { ProducerTracker previousValue = producers.putIfAbsent((byte) i, producerTracker); if (previousValue == null) { + this.checkNotClosed(); + int index = i; Response response = - this.client.declarePublisher((byte) i, tracker.reference(), tracker.stream()); + callAndMaybeRetry( + () -> + this.client.declarePublisher( + (byte) index, tracker.reference(), tracker.stream()), + RETRY_ON_TIMEOUT, + "Declare publisher request for publisher %d on stream '%s'", + producerTracker.uniqueId(), + producerTracker.stream()); if (response.isOk()) { tracker.assign((byte) i, this.client, this); } else { @@ -613,13 +738,14 @@ private synchronized void register(AgentTracker tracker) { tracker.assign((byte) 0, this.client, this); trackingConsumerTrackers.add(tracker); } - streamToTrackers .computeIfAbsent(tracker.stream(), s -> ConcurrentHashMap.newKeySet()) .add(tracker); } private synchronized void unregister(AgentTracker tracker) { + LOGGER.debug( + "Unregistering {} {} from manager on {}", tracker.type(), tracker.uniqueId(), this.name); if (tracker.identifiable()) { producers.remove(tracker.id()); } else { @@ -636,7 +762,7 @@ private synchronized void unregister(AgentTracker tracker) { return trackersForThisStream.isEmpty() ? null : trackersForThisStream; } }); - this.owner.maybeDisposeManager(this); + closeIfEmpty(); } synchronized boolean isFullFor(AgentTracker tracker) { @@ -651,14 +777,74 @@ synchronized boolean isEmpty() { return producers.isEmpty() && trackingConsumerTrackers.isEmpty(); } + private void checkNotClosed() { + if (!this.client.isOpen()) { + throw new ClientClosedException(); + } + } + + boolean isClosed() { + if (!this.client.isOpen()) { + this.close(); + } + return this.closed.get(); + } + + private void closeIfEmpty() { + if (!closed.get()) { + synchronized (this) { + if (this.isEmpty()) { + this.close(); + } else { + LOGGER.debug("Not closing producer manager {} because it is not empty", this.id); + } + } + } + } + private void close() { - try { - if (this.client.isOpen()) { - this.client.close(); + if (closed.compareAndSet(false, true)) { + managers.remove(this); + try { + if (this.client.isOpen()) { + this.client.close(); + } + } catch (Exception e) { + LOGGER.debug("Error while closing client producer connection: ", e.getMessage()); } - } catch (Exception e) { - // ok } } + + @Override + public int compareTo(ClientProducersManager o) { + return Long.compare(this.id, o.id); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ClientProducersManager that = (ClientProducersManager) o; + return id == that.id; + } + + @Override + public int hashCode() { + return Objects.hash(id); + } + } + + private static final Predicate RETRY_ON_TIMEOUT = + e -> e instanceof TimeoutStreamException; + + private static class ClientClosedException extends StreamException { + + public ClientClosedException() { + super("Client already closed"); + } } } diff --git a/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java b/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java index c33d429621..87463c9aed 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java +++ b/src/main/java/com/rabbitmq/stream/impl/ScheduledExecutorServiceWrapper.java @@ -106,7 +106,7 @@ public ScheduledFuture schedule(Callable callable, long delay, TimeUni public ScheduledFuture scheduleAtFixedRate( Runnable command, long initialDelay, long period, TimeUnit unit) { // we don't track these, because they are expected to run for a long time - LOGGER.debug("Registering scheduled at fixed rate task '%s'", command.toString()); + LOGGER.debug("Registering scheduled at fixed rate task '{}'", command.toString()); return this.delegate.scheduleAtFixedRate(command, initialDelay, period, unit); } diff --git a/src/main/java/com/rabbitmq/stream/impl/Utils.java b/src/main/java/com/rabbitmq/stream/impl/Utils.java index dff11d8131..e89d58b832 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Utils.java +++ b/src/main/java/com/rabbitmq/stream/impl/Utils.java @@ -13,7 +13,10 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; +import static java.lang.String.format; + import com.rabbitmq.stream.Address; +import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; import com.rabbitmq.stream.ConsumerUpdateListener; import com.rabbitmq.stream.OffsetSpecification; @@ -160,6 +163,54 @@ static Function namedFunction(Function task, String format, O return new NamedFunction<>(String.format(format, args), task); } + static T callAndMaybeRetry( + Supplier operation, Predicate retryCondition, String format, Object... args) { + return callAndMaybeRetry(operation, retryCondition, i -> Duration.ZERO, format, args); + } + + static T callAndMaybeRetry( + Supplier operation, + Predicate retryCondition, + BackOffDelayPolicy delayPolicy, + String format, + Object... args) { + String description = format(format, args); + int attempt = 0; + Exception lastException = null; + while (attempt++ < 3) { + try { + return operation.get(); + } catch (Exception e) { + lastException = e; + if (retryCondition.test(e)) { + LOGGER.debug("Operation '{}' failed, retrying...", description); + Duration delay = delayPolicy.delay(attempt - 1); + if (!delay.isZero()) { + try { + Thread.sleep(delay.toMillis()); + } catch (InterruptedException ex) { + Thread.interrupted(); + lastException = ex; + break; + } + } + } else { + break; + } + } + } + String message = + format("Could not complete task '%s' after %d attempt(s)", description, --attempt); + LOGGER.debug(message); + if (lastException == null) { + throw new StreamException(message); + } else if (lastException instanceof RuntimeException) { + throw (RuntimeException) lastException; + } else { + throw new StreamException(message, lastException); + } + } + interface ClientFactory { Client client(ClientFactoryContext context); diff --git a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java index c1709683f4..e09178c978 100644 --- a/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java +++ b/src/test/java/com/rabbitmq/stream/impl/MonitoringTestUtils.java @@ -13,14 +13,11 @@ // info@rabbitmq.com. package com.rabbitmq.stream.impl; -import com.google.common.reflect.TypeToken; import com.google.gson.Gson; import com.rabbitmq.stream.Consumer; import com.rabbitmq.stream.Environment; import com.rabbitmq.stream.Producer; -import java.lang.reflect.Type; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -29,17 +26,8 @@ class MonitoringTestUtils { private static final Gson GSON = new Gson(); - private static List arrayToList(T[] array) { - if (array == null || array.length == 0) { - return Collections.emptyList(); - } else { - return Arrays.asList(array); - } - } - - static List extract(ProducersCoordinator coordinator) { - Type type = new TypeToken>() {}.getType(); - return GSON.fromJson(coordinator.toString(), type); + static ProducersCoordinatorInfo extract(ProducersCoordinator coordinator) { + return GSON.fromJson(coordinator.toString(), ProducersCoordinatorInfo.class); } static ConsumerCoordinatorInfo extract(ConsumersCoordinator coordinator) { @@ -65,11 +53,11 @@ static ConsumerInfo extract(Consumer consumer) { public static class EnvironmentInfo { private final String[] locators; - private final ProducersPoolInfo[] producers; + private final ProducersCoordinatorInfo producers; private final ConsumerCoordinatorInfo consumers; public EnvironmentInfo( - String[] locators, ProducersPoolInfo[] producers, ConsumerCoordinatorInfo consumers) { + String[] locators, ProducersCoordinatorInfo producers, ConsumerCoordinatorInfo consumers) { this.locators = locators; this.producers = producers; this.consumers = consumers; @@ -83,8 +71,8 @@ public ConsumerCoordinatorInfo getConsumers() { return this.consumers; } - public List getProducers() { - return arrayToList(this.producers); + public ProducersCoordinatorInfo getProducers() { + return this.producers; } @Override @@ -94,7 +82,7 @@ public String toString() { + Arrays.toString(locators) + '\'' + ", producers=" - + Arrays.toString(producers) + + producers + ", consumers=" + consumers + '}'; @@ -158,42 +146,51 @@ public String toString() { } } - public static class ProducersPoolInfo { + public static class ProducersCoordinatorInfo { - private final String broker; + private final int client_count; + private final int producer_count; + private final int tracking_consumer_count; private final ProducerManager[] clients; - public ProducersPoolInfo(String broker, ProducerManager[] clients) { - this.broker = broker; + public ProducersCoordinatorInfo( + int client_count, + int producer_count, + int tracking_consumer_count, + ProducerManager[] clients) { + this.client_count = client_count; + this.producer_count = producer_count; + this.tracking_consumer_count = tracking_consumer_count; this.clients = clients; } - public String getBroker() { - return broker; + int clientCount() { + return this.client_count; } - public List getClients() { - return arrayToList(this.clients); + int producerCount() { + return this.producer_count; } - @Override - public String toString() { - return "ProducersPoolInfo{" - + "broker='" - + broker - + '\'' - + ", clients=" - + Arrays.toString(clients) - + '}'; + int trackingConsumerCount() { + return this.tracking_consumer_count; + } + + Set nodesConnected() { + return Arrays.stream(this.clients).map(m -> m.node).collect(Collectors.toSet()); } } public static class ProducerManager { + private final long id; + private final String node; private final int producer_count; private final int tracking_consumer_count; - public ProducerManager(int producerCount, int tracking_consumer_count) { + public ProducerManager(long id, String node, int producerCount, int tracking_consumer_count) { + this.id = id; + this.node = node; this.producer_count = producerCount; this.tracking_consumer_count = tracking_consumer_count; } diff --git a/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java b/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java index 13ab7d2b41..74e3a0c562 100644 --- a/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/ProducersCoordinatorTest.java @@ -118,6 +118,8 @@ public Client.ClientParameters metadataListener( ProducersCoordinator.MAX_TRACKING_CONSUMERS_PER_CLIENT, type -> "producer-connection", clientFactory); + when(client.isOpen()).thenReturn(true); + when(client.deletePublisher(anyByte())).thenReturn(new Response(Constants.RESPONSE_CODE_OK)); } @AfterEach @@ -272,7 +274,7 @@ void shouldRedistributeProducerAndTrackingConsumerIfConnectionIsLost() throws Ex verify(producer, times(1)).setClient(client); verify(trackingConsumer, times(1)).setTrackingClient(client); verify(producerClosedAfterDisconnection, times(1)).setClient(client); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); shutdownListener.handle( @@ -289,7 +291,7 @@ void shouldRedistributeProducerAndTrackingConsumerIfConnectionIsLost() throws Ex verify(producerClosedAfterDisconnection, times(1)).unavailable(); verify(producerClosedAfterDisconnection, times(1)).setClient(client); verify(producerClosedAfterDisconnection, never()).running(); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); } @@ -316,7 +318,7 @@ void shouldDisposeProducerAndNotTrackingConsumerIfRecoveryTimesOut() throws Exce verify(producer, times(1)).setClient(client); verify(trackingConsumer, times(1)).setTrackingClient(client); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); shutdownListener.handle( @@ -330,7 +332,7 @@ void shouldDisposeProducerAndNotTrackingConsumerIfRecoveryTimesOut() throws Exce verify(trackingConsumer, times(1)).setTrackingClient(client); verify(trackingConsumer, never()).running(); verify(trackingConsumer, never()).closeAfterStreamDeletion(); - assertThat(coordinator.poolSize()).isEqualTo(0); + assertThat(coordinator.nodesConnected()).isEqualTo(0); assertThat(coordinator.clientCount()).isEqualTo(0); } @@ -396,7 +398,6 @@ void shouldRedistributeProducersAndTrackingConsumersOnMetadataUpdate() throws Ex verify(producerClosedAfterDisconnection, times(1)).setClient(client); verify(movingTrackingConsumer, times(1)).setTrackingClient(client); verify(fixedTrackingConsumer, times(1)).setTrackingClient(client); - assertThat(coordinator.poolSize()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); metadataListener.handle(movingStream, Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); @@ -420,7 +421,7 @@ void shouldRedistributeProducersAndTrackingConsumersOnMetadataUpdate() throws Ex verify(fixedTrackingConsumer, never()).unavailable(); verify(fixedTrackingConsumer, times(1)).setTrackingClient(client); verify(fixedTrackingConsumer, never()).running(); - assertThat(coordinator.poolSize()).isEqualTo(2); + assertThat(coordinator.nodesConnected()).isEqualTo(2); assertThat(coordinator.clientCount()).isEqualTo(2); } @@ -444,7 +445,6 @@ void shouldDisposeProducerIfStreamIsDeleted() throws Exception { coordinator.registerProducer(producer, null, "stream"); verify(producer, times(1)).setClient(client); - assertThat(coordinator.poolSize()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); metadataListener.handle("stream", Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); @@ -454,7 +454,6 @@ void shouldDisposeProducerIfStreamIsDeleted() throws Exception { verify(producer, times(1)).setClient(client); verify(producer, never()).running(); - assertThat(coordinator.poolSize()).isEqualTo(0); assertThat(coordinator.clientCount()).isEqualTo(0); } @@ -481,7 +480,7 @@ void shouldDisposeProducerAndNotTrackingConsumerIfMetadataUpdateTimesOut() throw verify(producer, times(1)).setClient(client); verify(trackingConsumer, times(1)).setTrackingClient(client); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); metadataListener.handle("stream", Constants.RESPONSE_CODE_STREAM_NOT_AVAILABLE); @@ -494,7 +493,7 @@ void shouldDisposeProducerAndNotTrackingConsumerIfMetadataUpdateTimesOut() throw verify(trackingConsumer, times(1)).setTrackingClient(client); verify(trackingConsumer, never()).running(); verify(trackingConsumer, never()).closeAfterStreamDeletion(); - assertThat(coordinator.poolSize()).isEqualTo(0); + assertThat(coordinator.nodesConnected()).isEqualTo(0); assertThat(coordinator.clientCount()).isEqualTo(0); } @@ -529,7 +528,7 @@ class ProducerInfo { producerInfos.add(info); }); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(2); // let's add some tracking consumers @@ -554,7 +553,7 @@ class TrackingConsumerInfo { trackingConsumerInfos.add(info); }); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()) .as("new tracking consumers needs yet another client") .isEqualTo(3); @@ -588,7 +587,7 @@ class TrackingConsumerInfo { verify(p, times(1)).setClient(client); assertThat(publishingIdForNewProducer.get()).isEqualTo(info.publishingId); - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(2); // close some of the last producers, this should free a whole producer manager and a bit of the @@ -598,7 +597,7 @@ class TrackingConsumerInfo { producerInfo.cleaningCallback.run(); } - assertThat(coordinator.poolSize()).isEqualTo(1); + assertThat(coordinator.nodesConnected()).isEqualTo(1); assertThat(coordinator.clientCount()).isEqualTo(1); } diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java index 93f75f0c4d..2a3b4f15da 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamEnvironmentTest.java @@ -202,16 +202,9 @@ void producersAndConsumersShouldBeClosedWhenEnvironmentIsClosed(boolean lazyInit EnvironmentInfo environmentInfo = MonitoringTestUtils.extract(environment); assertThat(environmentInfo.getLocators()).isNotEmpty(); - assertThat(environmentInfo.getProducers()) - .hasSize(1) - .element(0) - .extracting(pool -> pool.getClients()) - .asList() - .hasSize(1); - assertThat(environmentInfo.getProducers().get(0).getClients().get(0).getProducerCount()) - .isEqualTo(2); - assertThat(environmentInfo.getProducers().get(0).getClients().get(0).getTrackingConsumerCount()) - .isEqualTo(2); + assertThat(environmentInfo.getProducers().clientCount()).isEqualTo(1); + assertThat(environmentInfo.getProducers().producerCount()).isEqualTo(2); + assertThat(environmentInfo.getProducers().trackingConsumerCount()).isEqualTo(2); ConsumerCoordinatorInfo consumerInfo = environmentInfo.getConsumers(); assertThat(consumerInfo.nodesConnected()).hasSize(1); assertThat(consumerInfo.clients()).hasSize(1); @@ -224,7 +217,7 @@ void producersAndConsumersShouldBeClosedWhenEnvironmentIsClosed(boolean lazyInit environmentInfo = MonitoringTestUtils.extract(environment); assertThat(environmentInfo.getLocators()).isNotEmpty(); - assertThat(environmentInfo.getProducers()).isEmpty(); + assertThat(environmentInfo.getProducers().clientCount()).isZero(); assertThat(environmentInfo.getConsumers().clients()).isEmpty(); } @@ -290,8 +283,8 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) latchAssert(consumeLatch).completes(); EnvironmentInfo environmentInfo = MonitoringTestUtils.extract(environment); - assertThat(environmentInfo.getProducers()).hasSize(1); - int producerManagerCount = environmentInfo.getProducers().get(0).getClients().size(); + assertThat(environmentInfo.getProducers().nodesConnected()).hasSize(1); + int producerManagerCount = environmentInfo.getProducers().clientCount(); assertThat(producerManagerCount).isPositive(); ConsumerCoordinatorInfo consumerInfo = environmentInfo.getConsumers(); assertThat(consumerInfo.nodesConnected()).hasSize(1); @@ -320,9 +313,8 @@ void growShrinkResourcesWhenProducersConsumersAreOpenedAndClosed(TestInfo info) consumers.removeAll(subConsumers); environmentInfo = MonitoringTestUtils.extract(environment); - assertThat(environmentInfo.getProducers()).hasSize(1); - assertThat(environmentInfo.getProducers().get(0).getClients()) - .hasSizeLessThan(producerManagerCount); + assertThat(environmentInfo.getProducers().nodesConnected()).hasSize(1); + assertThat(environmentInfo.getProducers().clientCount()).isLessThan(producerManagerCount); consumerInfo = environmentInfo.getConsumers(); assertThat(consumerInfo.nodesConnected()).hasSize(1); assertThat(consumerInfo.clients()).hasSizeLessThan(consumerManagerCount); @@ -380,7 +372,7 @@ void environmentPublishersConsumersShouldCloseSuccessfullyWhenBrokerIsDown() thr @Test @TestUtils.DisabledIfRabbitMqCtlNotSet - void locatorShouldReconnectIfConnectionIsLost(TestInfo info) throws Exception { + void locatorShouldReconnectIfConnectionIsLost(TestInfo info) { try (Environment environment = environmentBuilder .recoveryBackOffDelayPolicy(BackOffDelayPolicy.fixed(Duration.ofSeconds(1))) diff --git a/src/test/java/com/rabbitmq/stream/impl/StreamProducerTest.java b/src/test/java/com/rabbitmq/stream/impl/StreamProducerTest.java index 001279b830..fcb75b3b8c 100644 --- a/src/test/java/com/rabbitmq/stream/impl/StreamProducerTest.java +++ b/src/test/java/com/rabbitmq/stream/impl/StreamProducerTest.java @@ -637,7 +637,7 @@ void subEntryBatchesSentCompressedShouldBeConsumedProperly() { } @Test - void methodsShouldThrowExceptionWhenProducerIsClosed() { + void methodsShouldThrowExceptionWhenProducerIsClosed() throws InterruptedException { Producer producer = environment.producerBuilder().stream(stream).build(); producer.close(); assertThatThrownBy(() -> producer.getLastPublishingId()) From 39fe70283f697d7c5736a9c8e0a4d07b43aeaf18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Mon, 12 Dec 2022 17:40:57 +0100 Subject: [PATCH 07/10] Release permits only if possible --- .../stream/impl/ConsumersCoordinator.java | 27 ++++-- .../stream/impl/ProducersCoordinator.java | 83 +++++++++++++++++-- .../rabbitmq/stream/impl/StreamProducer.java | 7 +- 3 files changed, 101 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index c804a8e5ca..cfa615d3ae 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -305,7 +305,7 @@ private static String jsonField(String name, Number value) { } private static String jsonField(String name, String value) { - return quote(name) + " : " + quote(value); + return quote(name) + " : " + (value == null ? "null" : quote(value)); } @Override @@ -317,13 +317,30 @@ public String toString() { this.managers.stream() .map( m -> { - StringBuilder b = new StringBuilder("{"); - b.append(jsonField("id", m.id)) + StringBuilder managerBuilder = new StringBuilder("{"); + managerBuilder + .append(jsonField("id", m.id)) .append(",") .append(jsonField("node", m.name)) .append(",") - .append(jsonField("consumer_count", m.trackerCount)); - return b.append("}").toString(); + .append(jsonField("consumer_count", m.trackerCount)) + .append(","); + managerBuilder.append("\"subscriptions\" : ["); + List trackers = m.subscriptionTrackers; + managerBuilder.append( + trackers.stream() + .filter(Objects::nonNull) + .map( + t -> { + StringBuilder trackerBuilder = new StringBuilder("{"); + trackerBuilder.append(jsonField("stream", t.stream)).append(","); + trackerBuilder.append( + jsonField("subscription_id", t.subscriptionIdInClient)); + return trackerBuilder.append("}").toString(); + }) + .collect(Collectors.joining(","))); + managerBuilder.append("]"); + return managerBuilder.append("}").toString(); }) .collect(Collectors.joining(","))); builder.append("],"); diff --git a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java index 71410c61b0..f1ea405b74 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java @@ -36,6 +36,7 @@ import com.rabbitmq.stream.impl.Utils.ClientFactoryContext; import java.util.Collection; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.NavigableSet; import java.util.Objects; @@ -43,6 +44,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -64,6 +66,8 @@ class ProducersCoordinator { private final AtomicLong managerIdSequence = new AtomicLong(0); private final NavigableSet managers = new ConcurrentSkipListSet<>(); private final AtomicLong trackerIdSequence = new AtomicLong(0); + // TODO remove the list of trackers (it's here just for debugging) + private final List producerTrackers = new CopyOnWriteArrayList<>(); ProducersCoordinator( StreamEnvironment environment, @@ -83,9 +87,10 @@ private static String keyForNode(Client.Broker broker) { } Runnable registerProducer(StreamProducer producer, String reference, String stream) { - return registerAgentTracker( - new ProducerTracker(trackerIdSequence.getAndIncrement(), reference, stream, producer), - stream); + ProducerTracker tracker = + new ProducerTracker(trackerIdSequence.getAndIncrement(), reference, stream, producer); + this.producerTrackers.add(tracker); + return registerAgentTracker(tracker, stream); } Runnable registerTrackingConsumer(StreamConsumer consumer) { @@ -100,7 +105,16 @@ private Runnable registerAgentTracker(AgentTracker tracker, String stream) { addToManager(broker, tracker); - return tracker::cancel; + return () -> { + if (tracker instanceof ProducerTracker) { + try { + this.producerTrackers.remove(tracker); + } catch (Exception e) { + LOGGER.debug("Error while removing producer tracker from list"); + } + } + tracker.cancel(); + }; } private void addToManager(Broker node, AgentTracker tracker) { @@ -248,20 +262,68 @@ public String toString() { "tracking_consumer_count", this.managers.stream().mapToInt(m -> m.trackingConsumerTrackers.size()).sum())) .append(","); + builder.append(jsonField("producer_tracker_count", this.producerTrackers.size())).append(","); builder.append(quote("clients")).append(" : ["); builder.append( this.managers.stream() .map( m -> { - StringBuilder b = new StringBuilder("{"); - b.append(jsonField("id", m.id)) + StringBuilder managerBuilder = new StringBuilder("{"); + managerBuilder + .append(jsonField("id", m.id)) .append(",") .append(jsonField("node", m.name)) .append(",") .append(jsonField("producer_count", m.producers.size())) .append(",") .append( - jsonField("tracking_consumer_count", m.trackingConsumerTrackers.size())); + jsonField("tracking_consumer_count", m.trackingConsumerTrackers.size())) + .append(","); + managerBuilder.append("\"producers\" : ["); + managerBuilder.append( + m.producers.values().stream() + .map( + p -> { + StringBuilder producerBuilder = new StringBuilder("{"); + producerBuilder.append(jsonField("stream", p.stream())).append(","); + producerBuilder.append(jsonField("producer_id", p.publisherId)); + return producerBuilder.append("}").toString(); + }) + .collect(Collectors.joining(","))); + managerBuilder.append("],"); + managerBuilder.append("\"tracking_consumers\" : ["); + managerBuilder.append( + m.trackingConsumerTrackers.stream() + .map( + t -> { + StringBuilder trackerBuilder = new StringBuilder("{"); + trackerBuilder.append(jsonField("stream", t.stream())); + return trackerBuilder.append("}").toString(); + }) + .collect(Collectors.joining(","))); + managerBuilder.append("]"); + return managerBuilder.append("}").toString(); + }) + .collect(Collectors.joining(","))); + builder.append("],"); + builder.append("\"producer_trackers\" : ["); + builder.append( + this.producerTrackers.stream() + .map( + t -> { + StringBuilder b = new StringBuilder("{"); + b.append(quote("stream")).append(":").append(quote(t.stream)).append(","); + b.append(quote("node")).append(":"); + Client client = null; + ClientProducersManager manager = t.clientProducersManager; + if (manager != null) { + client = manager.client; + } + if (client == null) { + b.append("null"); + } else { + b.append(quote(client.getHost() + ":" + client.getPort())); + } return b.append("}").toString(); }) .collect(Collectors.joining(","))); @@ -630,6 +692,11 @@ private void assignProducersToNewManagers( tracker -> { if (tracker.markRecoveryInProgress()) { recoverAgent(broker, tracker); + } else { + LOGGER.debug( + "Not recovering {} (stream {}), recovery is already is progress", + tracker.type(), + tracker.stream()); } }); }) @@ -689,7 +756,7 @@ private void recoverAgent(Broker node, AgentTracker tracker) { tracker.stream()); } catch (Exception e) { LOGGER.warn( - "Error while re-assigning %s (stream '{}')", tracker.type(), tracker.stream(), e); + "Error while re-assigning {} (stream '{}')", tracker.type(), tracker.stream(), e); reassignmentCompleted = true; } } diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java b/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java index 00d363ea3e..49919164eb 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamProducer.java @@ -484,9 +484,10 @@ void running() { } } publishBatch(false); - if (unconfirmedMessagesSemaphore.availablePermits() != maxUnconfirmedMessages) { - unconfirmedMessagesSemaphore.release( - maxUnconfirmedMessages - unconfirmedMessagesSemaphore.availablePermits()); + + int toRelease = maxUnconfirmedMessages - unconfirmedMessagesSemaphore.availablePermits(); + if (toRelease > 0) { + unconfirmedMessagesSemaphore.release(toRelease); if (!unconfirmedMessagesSemaphore.tryAcquire(this.unconfirmedMessages.size())) { LOGGER.debug( "Could not acquire {} permit(s) for message republishing", From 1b71be8243a374af6ee856d9fa0f312570b57ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Wed, 14 Dec 2022 11:01:13 +0100 Subject: [PATCH 08/10] Rethrow StreamExceptions in Client Instead of wrapping them in StreamExceptions. Easier to deal with timeout exception. --- .../java/com/rabbitmq/stream/Environment.java | 2 +- .../java/com/rabbitmq/stream/impl/Client.java | 92 +++++++++++++++---- .../java/com/rabbitmq/stream/impl/Codecs.java | 2 +- .../stream/impl/CompressionCodecs.java | 2 +- .../stream/impl/ConsumersCoordinator.java | 84 +++++++++-------- .../stream/impl/ProducersCoordinator.java | 91 ++++++++++-------- .../stream/impl/StreamEnvironment.java | 3 +- .../java/com/rabbitmq/stream/impl/Utils.java | 14 ++- 8 files changed, 192 insertions(+), 98 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/Environment.java b/src/main/java/com/rabbitmq/stream/Environment.java index a46b413ef4..7b97dc4493 100644 --- a/src/main/java/com/rabbitmq/stream/Environment.java +++ b/src/main/java/com/rabbitmq/stream/Environment.java @@ -42,7 +42,7 @@ static EnvironmentBuilder builder() { .getConstructor() .newInstance(); } catch (Exception e) { - throw new StreamException(e); + throw new StreamException("Error while creating stream environment builder", e); } } diff --git a/src/main/java/com/rabbitmq/stream/impl/Client.java b/src/main/java/com/rabbitmq/stream/impl/Client.java index 095bd433e5..fecfcb9d17 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Client.java +++ b/src/main/java/com/rabbitmq/stream/impl/Client.java @@ -46,6 +46,7 @@ import static com.rabbitmq.stream.impl.Utils.extractResponseCode; import static com.rabbitmq.stream.impl.Utils.formatConstant; import static java.lang.String.format; +import static java.lang.String.join; import static java.util.concurrent.TimeUnit.SECONDS; import com.rabbitmq.stream.AuthenticationFailureException; @@ -421,10 +422,11 @@ private Map peerProperties() { throw new StreamException("Error when establishing stream connection", request.error()); } } catch (StreamException e) { + outstandingRequests.remove(correlationId); throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while trying to exchange peer properties", e); } } @@ -484,9 +486,12 @@ private SaslAuthenticateResponse sendSaslAuthenticate( channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while trying to authenticate", e); } } @@ -516,7 +521,7 @@ private Map open(String virtualHost) { throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error during open command", e); } } @@ -527,7 +532,7 @@ void send(byte[] content) { try { channel.writeAndFlush(bb).sync(); } catch (InterruptedException e) { - throw new StreamException(e); + throw new StreamException("Error while sending bytes", e); } } @@ -555,9 +560,12 @@ private void sendClose(short code, String reason) { "Unexpected response code when closing: " + formatConstant(request.response.get().getResponseCode())); } + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while closing connection", e); } } @@ -575,9 +583,12 @@ private List getSaslMechanisms() { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while exchanging SASL mechanisms", e); } } @@ -611,9 +622,12 @@ public Response create(String stream, Map arguments) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException(format("Error while creating stream '%s'", stream), e); } } @@ -656,9 +670,12 @@ public Response delete(String stream) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException(format("Error while deleting stream '%s'", stream), e); } } @@ -688,9 +705,13 @@ public Map metadata(String... streams) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format("Error while getting metadata for stream(s) '%s'", join(",", streams)), e); } } @@ -728,7 +749,8 @@ public Response declarePublisher(byte publisherId, String publisherReference, St throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format("Error while declaring publisher for stream '%s'", stream), e); } } @@ -752,7 +774,7 @@ public Response deletePublisher(byte publisherId) { throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while deleting publisher", e); } } @@ -1090,7 +1112,8 @@ public Response subscribe( throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format("Error while trying to subscribe to stream '%s'", stream), e); } } @@ -1147,7 +1170,10 @@ public QueryOffsetResponse queryOffset(String reference, String stream) { throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format( + "Error while querying offset for reference '%s' on stream '%s'", reference, stream), + e); } } @@ -1185,9 +1211,16 @@ public long queryPublisherSequence(String publisherReference, String stream) { formatConstant(response.getResponseCode())); } return response.getSequence(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format( + "Error while querying publisher sequence for '%s' on stream '%s'", + publisherReference, stream), + e); } } @@ -1206,9 +1239,12 @@ public Response unsubscribe(byte subscriptionId) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while unsubscribing", e); } } @@ -1330,9 +1366,16 @@ public List route(String routingKey, String superStream) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format( + "Error while querying route for routing key '%s' on super stream '%s'", + routingKey, superStream), + e); } } @@ -1356,9 +1399,13 @@ public List partitions(String superStream) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format("Error while querying partitions for super stream '%s'", superStream), e); } } @@ -1384,9 +1431,12 @@ List exchangeCommandVersions() { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException("Error while exchanging command version", e); } } @@ -1409,9 +1459,13 @@ StreamStatsResponse streamStats(String stream) { channel.writeAndFlush(bb); request.block(); return request.response.get(); + } catch (StreamException e) { + outstandingRequests.remove(correlationId); + throw e; } catch (RuntimeException e) { outstandingRequests.remove(correlationId); - throw new StreamException(e); + throw new StreamException( + format("Error while querying statistics for stream '%s'", stream), e); } } diff --git a/src/main/java/com/rabbitmq/stream/impl/Codecs.java b/src/main/java/com/rabbitmq/stream/impl/Codecs.java index 34deb4444a..2634ea29ef 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Codecs.java +++ b/src/main/java/com/rabbitmq/stream/impl/Codecs.java @@ -31,7 +31,7 @@ private static Codec instanciateDefault() { return (Codec) Class.forName("com.rabbitmq.stream.codec.QpidProtonCodec").getConstructor().newInstance(); } catch (Exception e) { - throw new StreamException(e); + throw new StreamException("Error while creating QPid Proton codec", e); } } } diff --git a/src/main/java/com/rabbitmq/stream/impl/CompressionCodecs.java b/src/main/java/com/rabbitmq/stream/impl/CompressionCodecs.java index 02ac3a2a0c..0bfc91e72a 100644 --- a/src/main/java/com/rabbitmq/stream/impl/CompressionCodecs.java +++ b/src/main/java/com/rabbitmq/stream/impl/CompressionCodecs.java @@ -33,7 +33,7 @@ private static CompressionCodecFactory instanciateDefault() { .getConstructor() .newInstance(); } catch (Exception e) { - throw new StreamException(e); + throw new StreamException("Error while creating compression codec factory", e); } } } diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index cfa615d3ae..449e8875fe 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -78,12 +78,13 @@ class ConsumersCoordinator { private final ClientFactory clientFactory; private final int maxConsumersByConnection; private final Function connectionNamingStrategy; - // TODO remove the list of trackers (it's here just for debugging) - private final List trackers = new CopyOnWriteArrayList<>(); private final AtomicLong managerIdSequence = new AtomicLong(0); private final NavigableSet managers = new ConcurrentSkipListSet<>(); private final AtomicLong trackerIdSequence = new AtomicLong(0); + private final boolean debug = true; + private final List trackers = new CopyOnWriteArrayList<>(); + ConsumersCoordinator( StreamEnvironment environment, int maxConsumersByConnection, @@ -143,15 +144,19 @@ Runnable subscribe( throw new StreamException(e.getMessage()); } - this.trackers.add(subscriptionTracker); - return () -> { - try { - this.trackers.remove(subscriptionTracker); - } catch (Exception e) { - LOGGER.debug("Error while removing subscription tracker from list"); - } - subscriptionTracker.cancel(); - }; + if (debug) { + this.trackers.add(subscriptionTracker); + return () -> { + try { + this.trackers.remove(subscriptionTracker); + } catch (Exception e) { + LOGGER.debug("Error while removing subscription tracker from list"); + } + subscriptionTracker.cancel(); + }; + } else { + return subscriptionTracker::cancel; + } } private void addToManager( @@ -343,30 +348,34 @@ public String toString() { return managerBuilder.append("}").toString(); }) .collect(Collectors.joining(","))); - builder.append("],"); - builder.append("\"subscription_count\" : ").append(this.trackers.size()).append(","); - builder.append("\"subscriptions\" : ["); - builder.append( - this.trackers.stream() - .map( - t -> { - StringBuilder b = new StringBuilder("{"); - b.append(quote("stream")).append(":").append(quote(t.stream)).append(","); - b.append(quote("node")).append(":"); - Client client = null; - ClientSubscriptionsManager manager = t.manager; - if (manager != null) { - client = manager.client; - } - if (client == null) { - b.append("null"); - } else { - b.append(quote(client.getHost() + ":" + client.getPort())); - } - return b.append("}").toString(); - }) - .collect(Collectors.joining(","))); - builder.append("]}"); + builder.append("]"); + if (debug) { + builder.append(","); + builder.append("\"subscription_count\" : ").append(this.trackers.size()).append(","); + builder.append("\"subscriptions\" : ["); + builder.append( + this.trackers.stream() + .map( + t -> { + StringBuilder b = new StringBuilder("{"); + b.append(quote("stream")).append(":").append(quote(t.stream)).append(","); + b.append(quote("node")).append(":"); + Client client = null; + ClientSubscriptionsManager manager = t.manager; + if (manager != null) { + client = manager.client; + } + if (client == null) { + b.append("null"); + } else { + b.append(quote(client.getHost() + ":" + client.getPort())); + } + return b.append("}").toString(); + }) + .collect(Collectors.joining(","))); + builder.append("]"); + } + builder.append("}"); return builder.toString(); } @@ -810,7 +819,10 @@ private void recoverSubscription(List candidates, SubscriptionTracker tr } } } else { - LOGGER.debug("Not re-assigning consumer because it has been closed"); + LOGGER.debug( + "Not re-assigning consumer {} (stream '{}') because it has been closed", + tracker.consumer.id(), + tracker.stream); } reassignmentCompleted = true; } catch (ConnectionStreamException diff --git a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java index f1ea405b74..29e4af5b71 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java @@ -66,7 +66,7 @@ class ProducersCoordinator { private final AtomicLong managerIdSequence = new AtomicLong(0); private final NavigableSet managers = new ConcurrentSkipListSet<>(); private final AtomicLong trackerIdSequence = new AtomicLong(0); - // TODO remove the list of trackers (it's here just for debugging) + private final boolean debug = true; private final List producerTrackers = new CopyOnWriteArrayList<>(); ProducersCoordinator( @@ -89,7 +89,9 @@ private static String keyForNode(Client.Broker broker) { Runnable registerProducer(StreamProducer producer, String reference, String stream) { ProducerTracker tracker = new ProducerTracker(trackerIdSequence.getAndIncrement(), reference, stream, producer); - this.producerTrackers.add(tracker); + if (debug) { + this.producerTrackers.add(tracker); + } return registerAgentTracker(tracker, stream); } @@ -105,16 +107,20 @@ private Runnable registerAgentTracker(AgentTracker tracker, String stream) { addToManager(broker, tracker); - return () -> { - if (tracker instanceof ProducerTracker) { - try { - this.producerTrackers.remove(tracker); - } catch (Exception e) { - LOGGER.debug("Error while removing producer tracker from list"); + if (debug) { + return () -> { + if (tracker instanceof ProducerTracker) { + try { + this.producerTrackers.remove(tracker); + } catch (Exception e) { + LOGGER.debug("Error while removing producer tracker from list"); + } } - } - tracker.cancel(); - }; + tracker.cancel(); + }; + } else { + return tracker::cancel; + } } private void addToManager(Broker node, AgentTracker tracker) { @@ -146,7 +152,8 @@ private void addToManager(Broker node, AgentTracker tracker) { try { pickedManager.register(tracker); LOGGER.debug( - "Assigned tracker {} (stream '{}') to manager {} (node {}), subscription ID {}", + "Assigned {} tracker {} (stream '{}') to manager {} (node {}), subscription ID {}", + tracker.type(), tracker.uniqueId(), tracker.stream(), pickedManager.id, @@ -262,7 +269,9 @@ public String toString() { "tracking_consumer_count", this.managers.stream().mapToInt(m -> m.trackingConsumerTrackers.size()).sum())) .append(","); - builder.append(jsonField("producer_tracker_count", this.producerTrackers.size())).append(","); + if (debug) { + builder.append(jsonField("producer_tracker_count", this.producerTrackers.size())).append(","); + } builder.append(quote("clients")).append(" : ["); builder.append( this.managers.stream() @@ -305,29 +314,32 @@ public String toString() { return managerBuilder.append("}").toString(); }) .collect(Collectors.joining(","))); - builder.append("],"); - builder.append("\"producer_trackers\" : ["); - builder.append( - this.producerTrackers.stream() - .map( - t -> { - StringBuilder b = new StringBuilder("{"); - b.append(quote("stream")).append(":").append(quote(t.stream)).append(","); - b.append(quote("node")).append(":"); - Client client = null; - ClientProducersManager manager = t.clientProducersManager; - if (manager != null) { - client = manager.client; - } - if (client == null) { - b.append("null"); - } else { - b.append(quote(client.getHost() + ":" + client.getPort())); - } - return b.append("}").toString(); - }) - .collect(Collectors.joining(","))); builder.append("]"); + if (debug) { + builder.append(","); + builder.append("\"producer_trackers\" : ["); + builder.append( + this.producerTrackers.stream() + .map( + t -> { + StringBuilder b = new StringBuilder("{"); + b.append(quote("stream")).append(":").append(quote(t.stream)).append(","); + b.append(quote("node")).append(":"); + Client client = null; + ClientProducersManager manager = t.clientProducersManager; + if (manager != null) { + client = manager.client; + } + if (client == null) { + b.append("null"); + } else { + b.append(quote(client.getHost() + ":" + client.getPort())); + } + return b.append("}").toString(); + }) + .collect(Collectors.joining(","))); + builder.append("]"); + } return builder.append("}").toString(); } @@ -626,7 +638,7 @@ private ClientProducersManager( affectedTrackers = streamToTrackers.remove(stream); LOGGER.debug( "Affected publishers and consumer trackers after metadata update: {}", - affectedTrackers.size()); + affectedTrackers == null ? 0 : affectedTrackers.size()); if (affectedTrackers != null && !affectedTrackers.isEmpty()) { affectedTrackers.forEach( tracker -> { @@ -694,7 +706,7 @@ private void assignProducersToNewManagers( recoverAgent(broker, tracker); } else { LOGGER.debug( - "Not recovering {} (stream {}), recovery is already is progress", + "Not recovering {} (stream '{}'), recovery is already is progress", tracker.type(), tracker.stream()); } @@ -733,7 +745,10 @@ private void recoverAgent(Broker node, AgentTracker tracker) { addToManager(node, tracker); tracker.running(); } else { - LOGGER.debug("Not recovering {} because it has been closed", tracker.type()); + LOGGER.debug( + "Not recovering {} (stream '{}') because it has been closed", + tracker.type(), + tracker.stream()); } reassignmentCompleted = true; } catch (ConnectionStreamException diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java index f7338e321d..d9e90ba244 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java @@ -14,6 +14,7 @@ package com.rabbitmq.stream.impl; import static com.rabbitmq.stream.impl.Utils.convertCodeToException; +import static com.rabbitmq.stream.impl.Utils.exceptionMessage; import static com.rabbitmq.stream.impl.Utils.formatConstant; import static com.rabbitmq.stream.impl.Utils.namedRunnable; import static java.util.concurrent.TimeUnit.SECONDS; @@ -725,7 +726,7 @@ static T locatorOperation( break; } } catch (Exception e) { - LOGGER.debug("Exception during locator operation '{}': {}", operation, e.getMessage()); + LOGGER.debug("Exception during locator operation '{}': {}", operation, exceptionMessage(e)); lastException = e; break; } diff --git a/src/main/java/com/rabbitmq/stream/impl/Utils.java b/src/main/java/com/rabbitmq/stream/impl/Utils.java index e89d58b832..a05f5cb9f4 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Utils.java +++ b/src/main/java/com/rabbitmq/stream/impl/Utils.java @@ -200,7 +200,9 @@ static T callAndMaybeRetry( } } String message = - format("Could not complete task '%s' after %d attempt(s)", description, --attempt); + format( + "Could not complete task '%s' after %d attempt(s) (reason: {})", + description, --attempt, exceptionMessage(lastException)); LOGGER.debug(message); if (lastException == null) { throw new StreamException(message); @@ -211,6 +213,16 @@ static T callAndMaybeRetry( } } + static String exceptionMessage(Exception e) { + if (e == null) { + return "unknown"; + } else if (e.getMessage() == null) { + return e.getClass().getSimpleName(); + } else { + return e.getMessage() + " [" + e.getClass().getSimpleName() + "]"; + } + } + interface ClientFactory { Client client(ClientFactoryContext context); From e62b82e5e97a19758ba555b46fe2bde77be9907b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Wed, 14 Dec 2022 16:37:44 +0100 Subject: [PATCH 09/10] Base retry completion on backoff delay policy --- .../stream/impl/ConsumersCoordinator.java | 52 +++++++++---------- .../stream/impl/ProducersCoordinator.java | 50 ++++++++---------- .../stream/impl/StreamEnvironment.java | 4 ++ .../java/com/rabbitmq/stream/impl/Utils.java | 45 ++++++++++++---- 4 files changed, 87 insertions(+), 64 deletions(-) diff --git a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java index 449e8875fe..5501330082 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java @@ -16,8 +16,10 @@ import static com.rabbitmq.stream.impl.Utils.convertCodeToException; import static com.rabbitmq.stream.impl.Utils.formatConstant; import static com.rabbitmq.stream.impl.Utils.isSac; +import static com.rabbitmq.stream.impl.Utils.jsonField; import static com.rabbitmq.stream.impl.Utils.namedFunction; import static com.rabbitmq.stream.impl.Utils.namedRunnable; +import static com.rabbitmq.stream.impl.Utils.quote; import com.rabbitmq.stream.BackOffDelayPolicy; import com.rabbitmq.stream.Constants; @@ -224,7 +226,6 @@ private void addToManager( } } - // for testing int managerCount() { return this.managers.size(); } @@ -297,22 +298,6 @@ public void close() { } } - private static String quote(String value) { - if (value == null) { - return "null"; - } else { - return "\"" + value + "\""; - } - } - - private static String jsonField(String name, Number value) { - return quote(name) + " : " + value.longValue(); - } - - private static String jsonField(String name, String value) { - return quote(name) + " : " + (value == null ? "null" : quote(value)); - } - @Override public String toString() { StringBuilder builder = new StringBuilder("{"); @@ -768,17 +753,7 @@ private void assignConsumersToStream( consumersClosingCallback.run(); } else { for (SubscriptionTracker affectedSubscription : subscriptions) { - if (affectedSubscription.compareAndSet( - SubscriptionState.ACTIVE, SubscriptionState.RECOVERING)) { - recoverSubscription(candidates, affectedSubscription); - } else { - LOGGER.debug( - "Not recovering consumer {} from stream {}, state is {}, expected is {}", - affectedSubscription.consumer.id(), - affectedSubscription.stream, - affectedSubscription.state(), - SubscriptionState.ACTIVE); - } + maybeRecoverSubscription(candidates, affectedSubscription); } if (maybeCloseClient) { this.closeIfEmpty(); @@ -800,6 +775,27 @@ private void assignConsumersToStream( }); } + private void maybeRecoverSubscription(List candidates, SubscriptionTracker tracker) { + if (tracker.compareAndSet(SubscriptionState.ACTIVE, SubscriptionState.RECOVERING)) { + try { + recoverSubscription(candidates, tracker); + } catch (Exception e) { + LOGGER.warn( + "Error while recovering consumer {} from stream '{}'. Reason: {}", + tracker.consumer.id(), + tracker.stream, + Utils.exceptionMessage(e)); + } + } else { + LOGGER.debug( + "Not recovering consumer {} from stream {}, state is {}, expected is {}", + tracker.consumer.id(), + tracker.stream, + tracker.state(), + SubscriptionState.ACTIVE); + } + } + private void recoverSubscription(List candidates, SubscriptionTracker tracker) { boolean reassignmentCompleted = false; while (!reassignmentCompleted) { diff --git a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java index 29e4af5b71..9eda0b91d0 100644 --- a/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java +++ b/src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java @@ -15,8 +15,10 @@ import static com.rabbitmq.stream.impl.Utils.callAndMaybeRetry; import static com.rabbitmq.stream.impl.Utils.formatConstant; +import static com.rabbitmq.stream.impl.Utils.jsonField; import static com.rabbitmq.stream.impl.Utils.namedFunction; import static com.rabbitmq.stream.impl.Utils.namedRunnable; +import static com.rabbitmq.stream.impl.Utils.quote; import static java.util.stream.Collectors.toSet; import com.rabbitmq.stream.BackOffDelayPolicy; @@ -238,22 +240,6 @@ int nodesConnected() { return this.managers.stream().map(m -> m.name).collect(toSet()).size(); } - private static String quote(String value) { - if (value == null) { - return "null"; - } else { - return "\"" + value + "\""; - } - } - - private static String jsonField(String name, Number value) { - return quote(name) + " : " + value.longValue(); - } - - private static String jsonField(String name, String value) { - return quote(name) + " : " + quote(value); - } - @Override public String toString() { StringBuilder builder = new StringBuilder("{"); @@ -700,17 +686,7 @@ private void assignProducersToNewManagers( broker -> { String key = keyForNode(broker); LOGGER.debug("Assigning {} producer(s) to {}", trackers.size(), key); - trackers.forEach( - tracker -> { - if (tracker.markRecoveryInProgress()) { - recoverAgent(broker, tracker); - } else { - LOGGER.debug( - "Not recovering {} (stream '{}'), recovery is already is progress", - tracker.type(), - tracker.stream()); - } - }); + trackers.forEach(tracker -> maybeRecoverAgent(broker, tracker)); }) .exceptionally( ex -> { @@ -735,6 +711,26 @@ private void assignProducersToNewManagers( }); } + private void maybeRecoverAgent(Broker broker, AgentTracker tracker) { + if (tracker.markRecoveryInProgress()) { + try { + recoverAgent(broker, tracker); + } catch (Exception e) { + LOGGER.warn( + "Error while recovering {} tracker {} (stream '{}'). Reason: {}", + tracker.type(), + tracker.uniqueId(), + tracker.stream(), + Utils.exceptionMessage(e)); + } + } else { + LOGGER.debug( + "Not recovering {} (stream '{}'), recovery is already is progress", + tracker.type(), + tracker.stream()); + } + } + private void recoverAgent(Broker node, AgentTracker tracker) { boolean reassignmentCompleted = false; while (!reassignmentCompleted) { diff --git a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java index d9e90ba244..38685c1b3e 100644 --- a/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java +++ b/src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java @@ -812,6 +812,10 @@ public String toString() { }) .collect(Collectors.joining(",")) + "], " + + Utils.jsonField("producer_client_count", this.producersCoordinator.clientCount()) + + "," + + Utils.jsonField("consumer_client_count", this.consumersCoordinator.managerCount()) + + "," + "\"producers\" : " + this.producersCoordinator + ", \"consumers\" : " diff --git a/src/main/java/com/rabbitmq/stream/impl/Utils.java b/src/main/java/com/rabbitmq/stream/impl/Utils.java index a05f5cb9f4..141c42b432 100644 --- a/src/main/java/com/rabbitmq/stream/impl/Utils.java +++ b/src/main/java/com/rabbitmq/stream/impl/Utils.java @@ -165,7 +165,12 @@ static Function namedFunction(Function task, String format, O static T callAndMaybeRetry( Supplier operation, Predicate retryCondition, String format, Object... args) { - return callAndMaybeRetry(operation, retryCondition, i -> Duration.ZERO, format, args); + return callAndMaybeRetry( + operation, + retryCondition, + i -> i >= 3 ? BackOffDelayPolicy.TIMEOUT : Duration.ZERO, + format, + args); } static T callAndMaybeRetry( @@ -177,32 +182,38 @@ static T callAndMaybeRetry( String description = format(format, args); int attempt = 0; Exception lastException = null; - while (attempt++ < 3) { + boolean keepTrying = true; + while (keepTrying) { try { - return operation.get(); + attempt++; + T result = operation.get(); + LOGGER.debug("Operation '{}' completed after {} attempt(s)", description, attempt); + return result; } catch (Exception e) { lastException = e; if (retryCondition.test(e)) { LOGGER.debug("Operation '{}' failed, retrying...", description); - Duration delay = delayPolicy.delay(attempt - 1); - if (!delay.isZero()) { + Duration delay = delayPolicy.delay(attempt); + if (BackOffDelayPolicy.TIMEOUT.equals(delay)) { + keepTrying = false; + } else if (!delay.isZero()) { try { Thread.sleep(delay.toMillis()); } catch (InterruptedException ex) { Thread.interrupted(); lastException = ex; - break; + keepTrying = false; } } } else { - break; + keepTrying = false; } } } String message = format( - "Could not complete task '%s' after %d attempt(s) (reason: {})", - description, --attempt, exceptionMessage(lastException)); + "Could not complete task '%s' after %d attempt(s) (reason: %s)", + description, attempt, exceptionMessage(lastException)); LOGGER.debug(message); if (lastException == null) { throw new StreamException(message); @@ -455,4 +466,20 @@ public String toString() { return this.name; } } + + static String quote(String value) { + if (value == null) { + return "null"; + } else { + return "\"" + value + "\""; + } + } + + static String jsonField(String name, Number value) { + return quote(name) + " : " + value.longValue(); + } + + static String jsonField(String name, String value) { + return quote(name) + " : " + quote(value); + } } From f168ca8584005b6dd57d915c348cba20e89d40d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20Cogolu=C3=A8gnes?= Date: Wed, 14 Dec 2022 16:40:35 +0100 Subject: [PATCH 10/10] Bump SLF4J and logback for performance tool References #200 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 88a07bc6e9..e14b1ffe0e 100644 --- a/pom.xml +++ b/pom.xml @@ -51,8 +51,8 @@ true 1.7.36 1.2.11 - 2.0.3 - 1.3.3 + 2.0.6 + 1.3.5 4.1.85.Final 0.34.0 4.2.13