Skip to content

Commit c2c9699

Browse files
committed
Allow to connect to a replica for producers
This can speed up producer creation/recovery when a load balancer is used.
1 parent 0e3329e commit c2c9699

10 files changed

+312
-57
lines changed

src/docs/asciidoc/api.adoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,11 @@ The client retries 5 times before falling back to the stream leader node.
220220
Set to `true` only for clustered environments, not for 1-node environments, where only the stream leader is available.
221221
|`false`
222222

223+
|`forceLeaderForProducers`
224+
|Force connecting to a stream leader for producers.
225+
Set to `false` if it acceptable to stay connected to a stream replica when a load balancer is in use.
226+
|`true`
227+
223228
|`id`
224229
|Informational ID for the environment instance.
225230
Used as a prefix for connection names.

src/main/java/com/rabbitmq/stream/EnvironmentBuilder.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020-2023 Broadcom. All Rights Reserved.
1+
// Copyright (c) 2020-2024 Broadcom. All Rights Reserved.
22
// The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
33
//
44
// This software, the RabbitMQ Stream Java client library, is dual-licensed under the
@@ -354,7 +354,7 @@ EnvironmentBuilder topologyUpdateBackOffDelayPolicy(
354354
* <p><b>Do not set this flag to <code>true</code> when streams have only 1 member (the leader),
355355
* e.g. for local development.</b>
356356
*
357-
* <p>Default is false.
357+
* <p>Default is <code>false</code>.
358358
*
359359
* @param forceReplica whether to force the connection to a replica or not
360360
* @return this builder instance
@@ -364,6 +364,37 @@ EnvironmentBuilder topologyUpdateBackOffDelayPolicy(
364364
*/
365365
EnvironmentBuilder forceReplicaForConsumers(boolean forceReplica);
366366

367+
/**
368+
* Flag to force the connection to the stream leader for producers.
369+
*
370+
* <p>The library prefers to connect to a node that hosts a stream leader for producers (default
371+
* behavior).
372+
*
373+
* <p>When using a load balancer, the library does not know in advance the node it connects to. It
374+
* may have to retry to connect to the appropriate node.
375+
*
376+
* <p>It will retry until it connects to the appropriate node (flag set to <code>true</code>, the
377+
* default). This provides the best data locality, but may require several attempts, delaying the
378+
* creation or the recovery of producers. This usually suits high-throughput use cases.
379+
*
380+
* <p>The library will accept the connection to a stream replica if the flag is set to <code>false
381+
* </code>. This will speed up the creation/recovery of producers, but at the cost of network hops
382+
* between cluster nodes when publishing messages because only a stream leader accepts writes.
383+
* This is usually acceptable for low-throughput use cases.
384+
*
385+
* <p>Changing the default value should only benefit systems where a load balancer sits between
386+
* the client applications and the cluster nodes.
387+
*
388+
* <p>Default is <code>true</code>.
389+
*
390+
* @param forceLeader whether to force the connection to the leader or not
391+
* @return this builder instance
392+
* @see #recoveryBackOffDelayPolicy(BackOffDelayPolicy)
393+
* @see #topologyUpdateBackOffDelayPolicy(BackOffDelayPolicy)
394+
* @since 0.21.0
395+
*/
396+
EnvironmentBuilder forceLeaderForProducers(boolean forceLeader);
397+
367398
/**
368399
* Create the {@link Environment} instance.
369400
*

src/main/java/com/rabbitmq/stream/impl/Client.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2245,7 +2245,10 @@ public StreamMetadata(String stream, short responseCode, Broker leader, List<Bro
22452245
this.stream = stream;
22462246
this.responseCode = responseCode;
22472247
this.leader = leader;
2248-
this.replicas = replicas == null ? null : Collections.unmodifiableList(replicas);
2248+
this.replicas =
2249+
(replicas == null || replicas.isEmpty())
2250+
? Collections.emptyList()
2251+
: Collections.unmodifiableList(replicas);
22492252
}
22502253

22512254
public short getResponseCode() {

src/main/java/com/rabbitmq/stream/impl/ProducersCoordinator.java

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package com.rabbitmq.stream.impl;
1616

1717
import static com.rabbitmq.stream.impl.Utils.*;
18+
import static java.util.stream.Collectors.toList;
1819
import static java.util.stream.Collectors.toSet;
1920

2021
import com.rabbitmq.stream.BackOffDelayPolicy;
@@ -49,7 +50,7 @@
4950
import org.slf4j.Logger;
5051
import org.slf4j.LoggerFactory;
5152

52-
class ProducersCoordinator {
53+
final class ProducersCoordinator implements AutoCloseable {
5354

5455
static final int MAX_PRODUCERS_PER_CLIENT = 256;
5556
static final int MAX_TRACKING_CONSUMERS_PER_CLIENT = 50;
@@ -67,18 +68,21 @@ class ProducersCoordinator {
6768
new DefaultExecutorServiceFactory(
6869
Runtime.getRuntime().availableProcessors(), 10, "rabbitmq-stream-producer-connection-");
6970
private final Lock coordinatorLock = new ReentrantLock();
71+
private final boolean forceLeader;
7072

7173
ProducersCoordinator(
7274
StreamEnvironment environment,
7375
int maxProducersByClient,
7476
int maxTrackingConsumersByClient,
7577
Function<ClientConnectionType, String> connectionNamingStrategy,
76-
ClientFactory clientFactory) {
78+
ClientFactory clientFactory,
79+
boolean forceLeader) {
7780
this.environment = environment;
7881
this.clientFactory = clientFactory;
7982
this.maxProducersByClient = maxProducersByClient;
8083
this.maxTrackingConsumersByClient = maxTrackingConsumersByClient;
8184
this.connectionNamingStrategy = connectionNamingStrategy;
85+
this.forceLeader = forceLeader;
8286
}
8387

8488
Runnable registerProducer(StreamProducer producer, String reference, String stream) {
@@ -105,9 +109,10 @@ Runnable registerTrackingConsumer(StreamConsumer consumer) {
105109
}
106110

107111
private Runnable registerAgentTracker(AgentTracker tracker, String stream) {
108-
Client.Broker broker = getBrokerForProducer(stream);
112+
List<BrokerWrapper> candidates = findCandidateNodes(stream, this.forceLeader);
113+
Broker broker = pickBroker(candidates);
109114

110-
addToManager(broker, tracker);
115+
addToManager(broker, candidates, tracker);
111116

112117
if (DEBUG) {
113118
return () -> {
@@ -125,7 +130,7 @@ private Runnable registerAgentTracker(AgentTracker tracker, String stream) {
125130
}
126131
}
127132

128-
private void addToManager(Broker node, AgentTracker tracker) {
133+
private void addToManager(Broker node, List<BrokerWrapper> candidates, AgentTracker tracker) {
129134
ClientParameters clientParameters =
130135
environment
131136
.clientParametersCopy()
@@ -153,7 +158,8 @@ private void addToManager(Broker node, AgentTracker tracker) {
153158
if (pickedManager == null) {
154159
String name = keyForNode(node);
155160
LOGGER.debug("Trying to create producer manager on {}", name);
156-
pickedManager = new ClientProducersManager(node, this.clientFactory, clientParameters);
161+
pickedManager =
162+
new ClientProducersManager(node, candidates, this.clientFactory, clientParameters);
157163
LOGGER.debug("Created producer manager on {}, id {}", name, pickedManager.id);
158164
}
159165
try {
@@ -192,11 +198,12 @@ private void addToManager(Broker node, AgentTracker tracker) {
192198
}
193199
}
194200

195-
private Client.Broker getBrokerForProducer(String stream) {
201+
// package protected for testing
202+
List<BrokerWrapper> findCandidateNodes(String stream, boolean forceLeader) {
196203
Map<String, Client.StreamMetadata> metadata =
197204
this.environment.locatorOperation(
198205
namedFunction(c -> c.metadata(stream), "Candidate lookup to publish to '%s'", stream));
199-
if (metadata.size() == 0 || metadata.get(stream) == null) {
206+
if (metadata.isEmpty() || metadata.get(stream) == null) {
200207
throw new StreamDoesNotExistException(stream);
201208
}
202209

@@ -210,17 +217,34 @@ private Client.Broker getBrokerForProducer(String stream) {
210217
}
211218
}
212219

220+
List<BrokerWrapper> candidates = new ArrayList<>();
213221
Client.Broker leader = streamMetadata.getLeader();
214-
if (leader == null) {
222+
if (leader == null && forceLeader) {
215223
throw new IllegalStateException("Not leader available for stream " + stream);
216224
}
217-
LOGGER.debug(
218-
"Using client on {}:{} to publish to {}", leader.getHost(), leader.getPort(), stream);
225+
candidates.add(new BrokerWrapper(leader, true));
219226

220-
return leader;
227+
if (!forceLeader && !streamMetadata.getReplicas().isEmpty()) {
228+
candidates.addAll(
229+
streamMetadata.getReplicas().stream()
230+
.map(b -> new BrokerWrapper(b, false))
231+
.collect(toList()));
232+
}
233+
234+
LOGGER.debug("Candidates to publish to {}: {}", stream, candidates);
235+
236+
return Collections.unmodifiableList(candidates);
237+
}
238+
239+
static Broker pickBroker(List<BrokerWrapper> candidates) {
240+
return candidates.stream()
241+
.filter(BrokerWrapper::isLeader)
242+
.findFirst()
243+
.map(BrokerWrapper::broker)
244+
.orElseThrow(() -> new IllegalStateException("Not leader available"));
221245
}
222246

223-
void close() {
247+
public void close() {
224248
Iterator<ClientProducersManager> iterator = this.managers.iterator();
225249
while (iterator.hasNext()) {
226250
ClientProducersManager manager = iterator.next();
@@ -568,7 +592,10 @@ private class ClientProducersManager implements Comparable<ClientProducersManage
568592
private final AtomicBoolean closed = new AtomicBoolean(false);
569593

570594
private ClientProducersManager(
571-
Broker targetNode, ClientFactory cf, Client.ClientParameters clientParameters) {
595+
Broker targetNode,
596+
List<BrokerWrapper> candidates,
597+
ClientFactory cf,
598+
Client.ClientParameters clientParameters) {
572599
this.id = managerIdSequence.getAndIncrement();
573600
AtomicReference<String> nameReference = new AtomicReference<>();
574601
AtomicReference<Client> ref = new AtomicReference<>();
@@ -682,7 +709,7 @@ private ClientProducersManager(
682709
.metadataListener(metadataListener)
683710
.clientProperty("connection_name", connectionName),
684711
keyForNode(targetNode),
685-
Collections.emptyList());
712+
candidates.stream().map(BrokerWrapper::broker).collect(toList()));
686713
this.client = cf.client(connectionFactoryContext);
687714
this.node = Utils.brokerFromClient(this.client);
688715
this.name = keyForNode(this.node);
@@ -694,18 +721,19 @@ private ClientProducersManager(
694721

695722
private void assignProducersToNewManagers(
696723
Collection<AgentTracker> trackers, String stream, BackOffDelayPolicy delayPolicy) {
697-
AsyncRetry.asyncRetry(() -> getBrokerForProducer(stream))
724+
AsyncRetry.asyncRetry(() -> findCandidateNodes(stream, forceLeader))
698725
.description("Candidate lookup to publish to " + stream)
699726
.scheduler(environment.scheduledExecutorService())
700727
.retry(ex -> !(ex instanceof StreamDoesNotExistException))
701728
.delayPolicy(delayPolicy)
702729
.build()
703730
.thenAccept(
704-
broker -> {
731+
candidates -> {
732+
Broker broker = pickBroker(candidates);
705733
String key = keyForNode(broker);
706734
LOGGER.debug(
707735
"Assigning {} producer(s) and consumer tracker(s) to {}", trackers.size(), key);
708-
trackers.forEach(tracker -> maybeRecoverAgent(broker, tracker));
736+
trackers.forEach(tracker -> maybeRecoverAgent(broker, candidates, tracker));
709737
})
710738
.exceptionally(
711739
ex -> {
@@ -730,10 +758,11 @@ private void assignProducersToNewManagers(
730758
});
731759
}
732760

733-
private void maybeRecoverAgent(Broker broker, AgentTracker tracker) {
761+
private void maybeRecoverAgent(
762+
Broker broker, List<BrokerWrapper> candidates, AgentTracker tracker) {
734763
if (tracker.markRecoveryInProgress()) {
735764
try {
736-
recoverAgent(broker, tracker);
765+
recoverAgent(broker, candidates, tracker);
737766
} catch (Exception e) {
738767
LOGGER.warn(
739768
"Error while recovering {} tracker {} (stream '{}'). Reason: {}",
@@ -750,14 +779,14 @@ private void maybeRecoverAgent(Broker broker, AgentTracker tracker) {
750779
}
751780
}
752781

753-
private void recoverAgent(Broker node, AgentTracker tracker) {
782+
private void recoverAgent(Broker node, List<BrokerWrapper> candidates, AgentTracker tracker) {
754783
boolean reassignmentCompleted = false;
755784
while (!reassignmentCompleted) {
756785
try {
757786
if (tracker.isOpen()) {
758787
LOGGER.debug(
759788
"Using {} to resume {} to {}", node.label(), tracker.type(), tracker.stream());
760-
addToManager(node, tracker);
789+
addToManager(node, candidates, tracker);
761790
tracker.running();
762791
} else {
763792
LOGGER.debug(
@@ -776,14 +805,15 @@ private void recoverAgent(Broker node, AgentTracker tracker) {
776805
tracker.identifiable() ? tracker.id() : "N/A",
777806
tracker.stream());
778807
// maybe not a good candidate, let's refresh and retry for this one
779-
node =
808+
candidates =
780809
Utils.callAndMaybeRetry(
781-
() -> getBrokerForProducer(tracker.stream()),
810+
() -> findCandidateNodes(tracker.stream(), forceLeader),
782811
ex -> !(ex instanceof StreamDoesNotExistException),
783812
environment.recoveryBackOffDelayPolicy(),
784813
"Candidate lookup for %s on stream '%s'",
785814
tracker.type(),
786815
tracker.stream());
816+
node = pickBroker(candidates);
787817
} catch (Exception e) {
788818
LOGGER.warn(
789819
"Error while re-assigning {} (stream '{}')", tracker.type(), tracker.stream(), e);

src/main/java/com/rabbitmq/stream/impl/StreamEnvironment.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,10 @@ class StreamEnvironment implements Environment {
102102
Function<ClientConnectionType, String> connectionNamingStrategy,
103103
Function<Client.ClientParameters, Client> clientFactory,
104104
ObservationCollector<?> observationCollector,
105-
boolean forceReplicaForConsumers) {
105+
boolean forceReplicaForConsumers,
106+
boolean forceLeaderForProducers,
107+
Duration producerNodeRetryDelay,
108+
Duration consumerNodeRetryDelay) {
106109
this.recoveryBackOffDelayPolicy = recoveryBackOffDelayPolicy;
107110
this.topologyUpdateBackOffDelayPolicy = topologyBackOffDelayPolicy;
108111
this.byteBufAllocator = byteBufAllocator;
@@ -212,13 +215,14 @@ class StreamEnvironment implements Environment {
212215
maxProducersByConnection,
213216
maxTrackingConsumersByConnection,
214217
connectionNamingStrategy,
215-
Utils.coordinatorClientFactory(this));
218+
Utils.coordinatorClientFactory(this, producerNodeRetryDelay),
219+
forceLeaderForProducers);
216220
this.consumersCoordinator =
217221
new ConsumersCoordinator(
218222
this,
219223
maxConsumersByConnection,
220224
connectionNamingStrategy,
221-
Utils.coordinatorClientFactory(this),
225+
Utils.coordinatorClientFactory(this, consumerNodeRetryDelay),
222226
forceReplicaForConsumers,
223227
Utils.brokerPicker());
224228
this.offsetTrackingCoordinator = new OffsetTrackingCoordinator(this);

src/main/java/com/rabbitmq/stream/impl/StreamEnvironmentBuilder.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020-2023 Broadcom. All Rights Reserved.
1+
// Copyright (c) 2020-2024 Broadcom. All Rights Reserved.
22
// The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
33
//
44
// This software, the RabbitMQ Stream Java client library, is dual-licensed under the
@@ -65,8 +65,11 @@ public class StreamEnvironmentBuilder implements EnvironmentBuilder {
6565
private CompressionCodecFactory compressionCodecFactory;
6666
private boolean lazyInit = false;
6767
private boolean forceReplicaForConsumers = false;
68+
private boolean forceLeaderForProducers = true;
6869
private Function<Client.ClientParameters, Client> clientFactory = Client::new;
6970
private ObservationCollector<?> observationCollector = ObservationCollector.NO_OP;
71+
private Duration producerNodeRetryDelay = Duration.ofMillis(500);
72+
private Duration consumerNodeRetryDelay = Duration.ofMillis(1000);
7073

7174
public StreamEnvironmentBuilder() {}
7275

@@ -274,6 +277,12 @@ public EnvironmentBuilder forceReplicaForConsumers(boolean forceReplica) {
274277
return this;
275278
}
276279

280+
@Override
281+
public EnvironmentBuilder forceLeaderForProducers(boolean forceLeader) {
282+
this.forceLeaderForProducers = forceLeader;
283+
return this;
284+
}
285+
277286
@Override
278287
public TlsConfiguration tls() {
279288
this.tls.enable();
@@ -296,6 +305,16 @@ public EnvironmentBuilder observationCollector(ObservationCollector<?> observati
296305
return this;
297306
}
298307

308+
StreamEnvironmentBuilder producerNodeRetryDelay(Duration producerNodeRetryDelay) {
309+
this.producerNodeRetryDelay = producerNodeRetryDelay;
310+
return this;
311+
}
312+
313+
StreamEnvironmentBuilder consumerNodeRetryDelay(Duration consumerNodeRetryDelay) {
314+
this.consumerNodeRetryDelay = consumerNodeRetryDelay;
315+
return this;
316+
}
317+
299318
@Override
300319
public Environment build() {
301320
if (this.compressionCodecFactory == null) {
@@ -327,7 +346,10 @@ public Environment build() {
327346
connectionNamingStrategy,
328347
this.clientFactory,
329348
this.observationCollector,
330-
this.forceReplicaForConsumers);
349+
this.forceReplicaForConsumers,
350+
this.forceLeaderForProducers,
351+
this.producerNodeRetryDelay,
352+
this.consumerNodeRetryDelay);
331353
}
332354

333355
static final class DefaultTlsConfiguration implements TlsConfiguration {

src/main/java/com/rabbitmq/stream/impl/Utils.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,6 @@ static short encodeResponseCode(Short code) {
135135
return (short) (code | 0B1000_0000_0000_0000);
136136
}
137137

138-
static ClientFactory coordinatorClientFactory(StreamEnvironment environment) {
139-
return coordinatorClientFactory(environment, ConditionalClientFactory.RETRY_INTERVAL);
140-
}
141-
142138
static ClientFactory coordinatorClientFactory(
143139
StreamEnvironment environment, Duration retryInterval) {
144140
String messageFormat =

0 commit comments

Comments
 (0)