Skip to content

Commit bf253bd

Browse files
committed
Improve waiting for leader to step down in CC tests
Use direct drivers to fetch cluster overview from cores, instead of routing driver and write session. Wait for at most 60 seconds.
1 parent 87a46c8 commit bf253bd

File tree

3 files changed

+101
-35
lines changed

3 files changed

+101
-35
lines changed

driver/src/test/java/org/neo4j/driver/v1/integration/CausalClusteringIT.java

Lines changed: 91 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.ArrayList;
2929
import java.util.Arrays;
3030
import java.util.List;
31+
import java.util.Set;
3132
import java.util.concurrent.Callable;
3233
import java.util.concurrent.CompletionStage;
3334
import java.util.concurrent.CountDownLatch;
@@ -58,6 +59,7 @@
5859
import org.neo4j.driver.v1.TransactionWork;
5960
import org.neo4j.driver.v1.Values;
6061
import org.neo4j.driver.v1.exceptions.ClientException;
62+
import org.neo4j.driver.v1.exceptions.Neo4jException;
6163
import org.neo4j.driver.v1.exceptions.ServiceUnavailableException;
6264
import org.neo4j.driver.v1.exceptions.SessionExpiredException;
6365
import org.neo4j.driver.v1.exceptions.TransientException;
@@ -413,11 +415,12 @@ public void shouldNotServeWritesWhenMajorityOfCoresAreDead() throws Exception
413415

414416
try ( Driver driver = createDriver( leader.getRoutingUri() ) )
415417
{
418+
Set<ClusterMember> cores = cluster.cores();
416419
for ( ClusterMember follower : cluster.followers() )
417420
{
418421
cluster.kill( follower );
419422
}
420-
awaitLeaderToStepDown( driver );
423+
awaitLeaderToStepDown( cores );
421424

422425
// now we should be unable to write because majority of cores is down
423426
for ( int i = 0; i < 10; i++ )
@@ -462,11 +465,12 @@ public Integer execute( Transaction tx )
462465

463466
ensureNodeVisible( cluster, "Star Lord", bookmark );
464467

468+
Set<ClusterMember> cores = cluster.cores();
465469
for ( ClusterMember follower : cluster.followers() )
466470
{
467471
cluster.kill( follower );
468472
}
469-
awaitLeaderToStepDown( driver );
473+
awaitLeaderToStepDown( cores );
470474

471475
// now we should be unable to write because majority of cores is down
472476
try ( Session session = driver.session( AccessMode.WRITE ) )
@@ -913,44 +917,27 @@ public Integer execute( Transaction tx )
913917
}
914918
}
915919

916-
private void awaitLeaderToStepDown( Driver driver )
920+
private void awaitLeaderToStepDown( Set<ClusterMember> cores )
917921
{
918-
int leadersCount;
919-
int followersCount;
920-
int readReplicasCount;
922+
long deadline = System.currentTimeMillis() + DEFAULT_TIMEOUT_MS;
923+
ClusterOverview overview = null;
921924
do
922925
{
923-
try ( Session session = driver.session() )
926+
for ( ClusterMember core : cores )
924927
{
925-
int newLeadersCount = 0;
926-
int newFollowersCount = 0;
927-
int newReadReplicasCount = 0;
928-
for ( Record record : session.run( "CALL dbms.cluster.overview()" ).list() )
928+
overview = fetchClusterOverview( core );
929+
if ( overview != null )
929930
{
930-
ClusterMemberRole role = ClusterMemberRole.valueOf( record.get( "role" ).asString() );
931-
if ( role == ClusterMemberRole.LEADER )
932-
{
933-
newLeadersCount++;
934-
}
935-
else if ( role == ClusterMemberRole.FOLLOWER )
936-
{
937-
newFollowersCount++;
938-
}
939-
else if ( role == ClusterMemberRole.READ_REPLICA )
940-
{
941-
newReadReplicasCount++;
942-
}
943-
else
944-
{
945-
throw new AssertionError( "Unknown role: " + role );
946-
}
931+
break;
947932
}
948-
leadersCount = newLeadersCount;
949-
followersCount = newFollowersCount;
950-
readReplicasCount = newReadReplicasCount;
951933
}
952934
}
953-
while ( !(leadersCount == 0 && followersCount == 1 && readReplicasCount == 2) );
935+
while ( !isSingleFollowerWithReadReplicas( overview ) && System.currentTimeMillis() <= deadline );
936+
937+
if ( System.currentTimeMillis() > deadline )
938+
{
939+
throw new IllegalStateException( "Leader did not step down in " + DEFAULT_TIMEOUT_MS + "ms. Last seen cluster overview: " + overview );
940+
}
954941
}
955942

956943
private Driver createDriver( URI boltUri )
@@ -968,6 +955,43 @@ private Driver discoverDriver( List<URI> routingUris )
968955
return GraphDatabase.routingDriver( routingUris, clusterRule.getDefaultAuthToken(), configWithoutLogging() );
969956
}
970957

958+
private ClusterOverview fetchClusterOverview( ClusterMember member )
959+
{
960+
int leaderCount = 0;
961+
int followerCount = 0;
962+
int readReplicaCount = 0;
963+
964+
Driver driver = clusterRule.getCluster().getDirectDriver( member );
965+
try ( Session session = driver.session() )
966+
{
967+
for ( Record record : session.run( "CALL dbms.cluster.overview()" ).list() )
968+
{
969+
ClusterMemberRole role = ClusterMemberRole.valueOf( record.get( "role" ).asString() );
970+
if ( role == ClusterMemberRole.LEADER )
971+
{
972+
leaderCount++;
973+
}
974+
else if ( role == ClusterMemberRole.FOLLOWER )
975+
{
976+
followerCount++;
977+
}
978+
else if ( role == ClusterMemberRole.READ_REPLICA )
979+
{
980+
readReplicaCount++;
981+
}
982+
else
983+
{
984+
throw new AssertionError( "Unknown role: " + role );
985+
}
986+
}
987+
return new ClusterOverview( leaderCount, followerCount, readReplicaCount );
988+
}
989+
catch ( Neo4jException ignore )
990+
{
991+
return null;
992+
}
993+
}
994+
971995
private static void createNodesInDifferentThreads( int count, final Driver driver ) throws Exception
972996
{
973997
final CountDownLatch beforeRunLatch = new CountDownLatch( count );
@@ -1133,6 +1157,17 @@ private static ExecutorService newExecutor()
11331157
return Executors.newCachedThreadPool( daemon( CausalClusteringIT.class.getSimpleName() + "-thread-" ) );
11341158
}
11351159

1160+
private static boolean isSingleFollowerWithReadReplicas( ClusterOverview overview )
1161+
{
1162+
if ( overview == null )
1163+
{
1164+
return false;
1165+
}
1166+
return overview.leaderCount == 0 &&
1167+
overview.followerCount == 1 &&
1168+
overview.readReplicaCount == ClusterRule.READ_REPLICA_COUNT;
1169+
}
1170+
11361171
private static class RecordAndSummary
11371172
{
11381173
final Record record;
@@ -1144,4 +1179,28 @@ private static class RecordAndSummary
11441179
this.summary = summary;
11451180
}
11461181
}
1182+
1183+
private static class ClusterOverview
1184+
{
1185+
final int leaderCount;
1186+
final int followerCount;
1187+
final int readReplicaCount;
1188+
1189+
ClusterOverview( int leaderCount, int followerCount, int readReplicaCount )
1190+
{
1191+
this.leaderCount = leaderCount;
1192+
this.followerCount = followerCount;
1193+
this.readReplicaCount = readReplicaCount;
1194+
}
1195+
1196+
@Override
1197+
public String toString()
1198+
{
1199+
return "ClusterOverview{" +
1200+
"leaderCount=" + leaderCount +
1201+
", followerCount=" + followerCount +
1202+
", readReplicaCount=" + readReplicaCount +
1203+
'}';
1204+
}
1205+
}
11471206
}

driver/src/test/java/org/neo4j/driver/v1/util/cc/Cluster.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,14 @@ public ClusterMember anyReadReplica()
123123
return randomOf( readReplicas() );
124124
}
125125

126+
public Set<ClusterMember> cores()
127+
{
128+
Set<ClusterMember> readReplicas = membersWithRole( ClusterMemberRole.READ_REPLICA );
129+
Set<ClusterMember> cores = new HashSet<>( members );
130+
cores.removeAll( readReplicas );
131+
return cores;
132+
}
133+
126134
public Set<ClusterMember> readReplicas()
127135
{
128136
return membersWithRole( ClusterMemberRole.READ_REPLICA );

driver/src/test/java/org/neo4j/driver/v1/util/cc/ClusterRule.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,8 @@ public class ClusterRule extends ExternalResource
3939
private static final String PASSWORD = "test";
4040
private static final int INITIAL_PORT = 20_000;
4141

42-
// todo: should be possible to configure (dynamically add/remove) cores and read replicas
43-
private static final int CORE_COUNT = 3;
44-
private static final int READ_REPLICA_COUNT = 2;
42+
public static final int CORE_COUNT = 3;
43+
public static final int READ_REPLICA_COUNT = 2;
4544

4645
public Cluster getCluster()
4746
{

0 commit comments

Comments
 (0)