Skip to content

Commit 2023a3c

Browse files
authored
Merge pull request #8487 from rabbitmq/mergify/bp/v3.11.x/pr-8479
rabbit_feature_flags: Use cluster members hint for cluster sync (backport #8477) (backport #8479)
2 parents 7c6f714 + fd9dec5 commit 2023a3c

File tree

3 files changed

+35
-12
lines changed

3 files changed

+35
-12
lines changed

deps/rabbit/src/rabbit_feature_flags.erl

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,10 +1822,19 @@ sync_feature_flags_with_cluster(Nodes, NodeIsVirgin) ->
18221822
sync_feature_flags_with_cluster(Nodes, NodeIsVirgin, Timeout) ->
18231823
Clustered = Nodes =/= [],
18241824
case is_enabled(feature_flags_v2) of
1825-
true when Clustered -> rabbit_ff_controller:sync_cluster();
1826-
true when NodeIsVirgin -> rabbit_ff_controller:enable_default();
1827-
true -> ok;
1828-
false -> sync_cluster_v1(Nodes, NodeIsVirgin, Timeout)
1825+
true when Clustered ->
1826+
%% We don't use `rabbit_nodes:filter_running()' here because the
1827+
%% given `Nodes' list may contain nodes which are not members yet
1828+
%% (the cluster could be being created or expanded).
1829+
Nodes1 = [N || N <- Nodes, rabbit:is_running(N)],
1830+
Nodes2 = lists:usort([node() | Nodes1]),
1831+
rabbit_ff_controller:sync_cluster(Nodes2);
1832+
true when NodeIsVirgin ->
1833+
rabbit_ff_controller:enable_default();
1834+
true ->
1835+
ok;
1836+
false ->
1837+
sync_cluster_v1(Nodes, NodeIsVirgin, Timeout)
18291838
end.
18301839

18311840
sync_cluster_v1([], NodeIsVirgin, _) ->
@@ -1930,7 +1939,12 @@ sync_cluster_v1(Nodes, _NodeIsVirgin, Timeout) ->
19301939
"switching to controller's sync",
19311940
[],
19321941
#{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}),
1933-
rabbit_ff_controller:sync_cluster();
1942+
%% We don't use `rabbit_nodes:filter_running()' here because the
1943+
%% given `Nodes' list may contain nodes which are not members yet
1944+
%% (the cluster could be being created or expanded).
1945+
Nodes1 = [N || N <- Nodes, rabbit:is_running(N)],
1946+
Nodes2 = lists:usort([node() | Nodes1]),
1947+
rabbit_ff_controller:sync_cluster(Nodes2);
19341948
false ->
19351949
_ = verify_which_feature_flags_are_actually_enabled(),
19361950
RemoteNodes = Nodes -- [node()],

deps/rabbit/src/rabbit_ff_controller.erl

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
enable/1,
3636
enable_default/0,
3737
check_node_compatibility/1,
38-
sync_cluster/0,
38+
sync_cluster/1,
3939
refresh_after_app_load/0,
4040
get_forced_feature_flag_names/0]).
4141

@@ -134,21 +134,21 @@ check_node_compatibility(RemoteNode) ->
134134
%% feature flags.
135135
check_node_compatibility_task(ThisNode, RemoteNode).
136136

137-
sync_cluster() ->
137+
sync_cluster(Nodes) ->
138138
?LOG_DEBUG(
139139
"Feature flags: SYNCING FEATURE FLAGS in cluster...",
140140
[],
141141
#{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}),
142142
case erlang:whereis(?LOCAL_NAME) of
143143
Pid when is_pid(Pid) ->
144144
%% The function is called while `rabbit' is running.
145-
gen_statem:call(?LOCAL_NAME, sync_cluster);
145+
gen_statem:call(?LOCAL_NAME, {sync_cluster, Nodes});
146146
undefined ->
147147
%% The function is called while `rabbit' is stopped. We need to
148148
%% start a one-off controller, again to make sure concurrent
149149
%% changes are blocked.
150150
{ok, Pid} = start_link(),
151-
Ret = gen_statem:call(Pid, sync_cluster),
151+
Ret = gen_statem:call(Pid, {sync_cluster, Nodes}),
152152
gen_statem:stop(Pid),
153153
Ret
154154
end.
@@ -273,8 +273,8 @@ proceed_with_task({enable, FeatureNames}) ->
273273
enable_task(FeatureNames);
274274
proceed_with_task(enable_default) ->
275275
enable_default_task();
276-
proceed_with_task(sync_cluster) ->
277-
sync_cluster_task();
276+
proceed_with_task({sync_cluster, Nodes}) ->
277+
sync_cluster_task(Nodes);
278278
proceed_with_task(refresh_after_app_load) ->
279279
refresh_after_app_load_task().
280280

@@ -645,6 +645,15 @@ get_forced_feature_flag_names_from_config() ->
645645
Reason :: term().
646646

647647
sync_cluster_task() ->
648+
Nodes = running_nodes(),
649+
sync_cluster_task(Nodes).
650+
651+
-spec sync_cluster_task(Nodes) -> Ret when
652+
Nodes :: [node()],
653+
Ret :: ok | {error, Reason},
654+
Reason :: term().
655+
656+
sync_cluster_task(Nodes) ->
648657
%% We assume that a feature flag can only be enabled, not disabled.
649658
%% Therefore this synchronization searches for feature flags enabled on
650659
%% some nodes but not all, and make sure they are enabled everywhere.
@@ -657,7 +666,6 @@ sync_cluster_task() ->
657666
%% would make sure a feature flag isn't enabled while there is a network
658667
%% partition. On the other hand, this would require that all nodes are
659668
%% running before we can expand the cluster...
660-
Nodes = running_nodes(),
661669
?LOG_DEBUG(
662670
"Feature flags: synchronizing feature flags on nodes: ~p",
663671
[Nodes],

deps/rabbit/test/feature_flags_v2_SUITE.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ setup_slave_node(Config, Testcase) ->
202202
_ ->
203203
ok = maybe_enable_feature_flags_v2(Config)
204204
end,
205+
_ = catch rabbit_boot_state:set(ready),
205206
ok.
206207

207208
setup_logger() ->

0 commit comments

Comments
 (0)