Skip to content

Commit 9db6076

Browse files
committed
Remove server when failing on session acquisition
We were only checking for connection failures on running and consuming the results, however there might also be a connection failure on session acquisition. By not properly removing the server on session-acquisition failures we end up in a scenario where we constantly retry to connect to that endpoint.
1 parent 68ff674 commit 9db6076

File tree

2 files changed

+69
-11
lines changed

2 files changed

+69
-11
lines changed

driver/src/main/java/org/neo4j/driver/internal/RoutingDriver.java

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public int compare( BoltServerAddress o1, BoltServerAddress o2 )
6666
}
6767
};
6868
private static final int MIN_SERVERS = 1;
69+
private static final int CONNECTION_RETRIES = 3;
6970
private final ConnectionPool connections;
7071
private final BiFunction<Connection,Logger,Session> sessionProvider;
7172
private final Clock clock;
@@ -110,7 +111,6 @@ private Set<BoltServerAddress> forgetAllServers()
110111
seen.addAll( routingServers );
111112
seen.addAll( readServers );
112113
seen.addAll( writeServers );
113-
routingServers.clear();
114114
readServers.clear();
115115
writeServers.clear();
116116
return seen;
@@ -138,11 +138,11 @@ private void getServers()
138138
{
139139
boolean success = false;
140140

141-
ConcurrentRoundRobinSet<BoltServerAddress> routers = new ConcurrentRoundRobinSet<>( routingServers );
141+
final Set<BoltServerAddress> newRouters = new HashSet<>( );
142142
final Set<BoltServerAddress> seen = forgetAllServers();
143-
while ( !routers.isEmpty() && !success )
143+
while ( !routingServers.isEmpty() && !success )
144144
{
145-
address = routers.hop();
145+
address = routingServers.hop();
146146
success = call( address, GET_SERVERS, new Consumer<Record>()
147147
{
148148
@Override
@@ -162,12 +162,19 @@ public void accept( Record record )
162162
writeServers.addAll( server.addresses() );
163163
break;
164164
case "ROUTE":
165-
routingServers.addAll( server.addresses() );
165+
newRouters.addAll( server.addresses() );
166166
break;
167167
}
168168
}
169169
}
170170
} );
171+
//We got trough but server gave us an empty list of routers
172+
if (success && newRouters.isEmpty()) {
173+
success = false;
174+
} else if (success) {
175+
routingServers.clear();
176+
routingServers.addAll( newRouters );
177+
}
171178
}
172179
if ( !success )
173180
{
@@ -249,7 +256,7 @@ private boolean call( BoltServerAddress address, String procedureName, Consumer<
249256
recorder.accept( records.next() );
250257
}
251258
}
252-
catch ( ConnectionFailureException e )
259+
catch ( Throwable e )
253260
{
254261
forget( address );
255262
return false;
@@ -306,18 +313,36 @@ public void onWriteFailure( BoltServerAddress address )
306313

307314
private Connection acquireConnection( AccessMode role )
308315
{
309-
//Potentially rediscover servers if we are not happy with our current knowledge
310-
checkServers();
311-
316+
ConcurrentRoundRobinSet<BoltServerAddress> servers;
312317
switch ( role )
313318
{
314319
case READ:
315-
return connections.acquire( readServers.hop() );
320+
servers = readServers;
321+
break;
316322
case WRITE:
317-
return connections.acquire( writeServers.hop() );
323+
servers = writeServers;
324+
break;
318325
default:
319326
throw new ClientException( role + " is not supported for creating new sessions" );
320327
}
328+
329+
//Potentially rediscover servers if we are not happy with our current knowledge
330+
checkServers();
331+
int numberOfServers = servers.size();
332+
for ( int i = 0; i < numberOfServers; i++ )
333+
{
334+
BoltServerAddress address = servers.hop();
335+
try
336+
{
337+
return connections.acquire( address );
338+
}
339+
catch ( ConnectionFailureException e )
340+
{
341+
forget( address );
342+
}
343+
}
344+
345+
throw new ConnectionFailureException( "Failed to connect to any servers" );
321346
}
322347

323348
@Override

driver/src/test/java/org/neo4j/driver/internal/RoutingDriverStubTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@
4242
import org.neo4j.driver.v1.GraphDatabase;
4343
import org.neo4j.driver.v1.Record;
4444
import org.neo4j.driver.v1.Session;
45+
import org.neo4j.driver.v1.exceptions.ConnectionFailureException;
4546
import org.neo4j.driver.v1.exceptions.ServiceUnavailableException;
4647
import org.neo4j.driver.v1.exceptions.SessionExpiredException;
4748
import org.neo4j.driver.v1.util.Function;
4849
import org.neo4j.driver.v1.util.StubServer;
4950

5051
import static org.hamcrest.Matchers.contains;
5152
import static org.hamcrest.Matchers.containsInAnyOrder;
53+
import static org.hamcrest.Matchers.empty;
5254
import static org.hamcrest.Matchers.hasItem;
5355
import static org.hamcrest.Matchers.hasSize;
5456
import static org.hamcrest.core.IsEqual.equalTo;
@@ -331,6 +333,37 @@ public void shouldForgetEndpointsOnFailure() throws IOException, InterruptedExce
331333
assertThat( server.exitStatus(), equalTo( 0 ) );
332334
}
333335

336+
@Test
337+
public void shouldForgetEndpointsOnFailedSessionAcquisition() throws IOException, InterruptedException, StubServer.ForceKilled
338+
{
339+
// Given
340+
StubServer server = StubServer.start( resource( "acquire_endpoints.script" ), 9001 );
341+
342+
//no read servers
343+
344+
URI uri = URI.create( "bolt+routing://127.0.0.1:9001" );
345+
RoutingDriver driver = (RoutingDriver) GraphDatabase.driver( uri, config );
346+
try
347+
{
348+
driver.session( AccessMode.READ );
349+
fail();
350+
}
351+
catch ( ConnectionFailureException e )
352+
{
353+
//ignore
354+
}
355+
356+
assertThat( driver.readServers(), empty() );
357+
assertThat( driver.writeServers(), hasSize( 2 ) );
358+
assertFalse( driver.connectionPool().hasAddress( address( 9005 ) ) );
359+
assertFalse( driver.connectionPool().hasAddress( address( 9006 ) ) );
360+
driver.close();
361+
362+
// Finally
363+
assertThat( server.exitStatus(), equalTo( 0 ) );
364+
}
365+
366+
334367
@Test
335368
public void shouldRediscoverIfNecessaryOnSessionAcquisition()
336369
throws IOException, InterruptedException, StubServer.ForceKilled

0 commit comments

Comments
 (0)