@@ -82,7 +82,10 @@ groups() ->
82
82
classic_queue_v2
83
83
]},
84
84
{classic_queue_regressions , [], [
85
- reg_v1_full_recover_only_journal
85
+ reg_v1_full_recover_only_journal ,
86
+ reg_v1_no_del_jif ,
87
+ reg_v1_no_del_idx ,
88
+ reg_v1_no_del_idx_unclean
86
89
]}
87
90
].
88
91
@@ -1122,6 +1125,244 @@ do_reg_v1_full_recover_only_journal(Config) ->
1122
1125
1123
1126
Res15 = cmd_restart_vhost_clean (St14 ),
1124
1127
true = postcondition (St14 , {call , undefined , cmd_restart_vhost_clean , [St14 ]}, Res15 ),
1125
- _ = next_state (St14 , Res15 , {call , undefined , cmd_restart_vhost_clean , [St14 ]}),
1128
+ St15 = next_state (St14 , Res15 , {call , undefined , cmd_restart_vhost_clean , [St14 ]}),
1129
+
1130
+ cmd_teardown_queue (St15 ),
1126
1131
1127
1132
true .
1133
+
1134
+ % % The following reg_v1_no_del_* cases test when a classic queue has a
1135
+ % % published message before an upgrade to 3.10. In that case there is
1136
+ % % no delivery marker in the v1 queue index.
1137
+
1138
+ % % After upgrade to 3.10 there is a published message in the journal file.
1139
+ % % Consuming and acknowledging the message should work fine.
1140
+ reg_v1_no_del_jif (Config ) ->
1141
+ try
1142
+ true = rabbit_ct_broker_helpers :rpc (
1143
+ Config , 0 , ? MODULE , do_reg_v1_no_del_jif , [Config ])
1144
+ catch exit :{exception , Reason } ->
1145
+ exit (Reason )
1146
+ end .
1147
+
1148
+ do_reg_v1_no_del_jif (Config ) ->
1149
+ St0 = # cq {name = prop_classic_queue_v1 , version = 1 ,
1150
+ config = minimal_config (Config )},
1151
+
1152
+ Res1 = cmd_setup_queue (St0 ),
1153
+ St3 = St0 # cq {amq = Res1 },
1154
+
1155
+ {St4 , Ch } = cmd (cmd_channel_open , St3 , []),
1156
+
1157
+ % % Simulate pre-3.10.0 behaviour by making deliver a noop
1158
+ ok = meck :new (rabbit_queue_index , [passthrough ]),
1159
+ ok = meck :expect (rabbit_queue_index , deliver , fun (_ , State ) -> State end ),
1160
+
1161
+ {St5 , _Res5 } = cmd (cmd_channel_publish , St4 , [Ch , 4 , _Persistent = 2 , _NotMandatory = false , _NoExpiration = undefined ]),
1162
+
1163
+ % % Enforce syncing journal to disk
1164
+ % % (Not strictly necessary as vhost restart also triggers a sync)
1165
+ % % At this point there should be a publish entry in the journal and no segment files
1166
+ rabbit_amqqueue :pid_of (St5 # cq .amq ) ! timeout ,
1167
+
1168
+ {SyncTime , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , sync , '_' , 1000 ) end ),
1169
+ ct :pal (" wait for sync took ~p ms" , [SyncTime div 1000 ]),
1170
+
1171
+ % % Simulate RabbitMQ version upgrade by a clean vhost restart
1172
+ % % (also reset delivery to normal operation)
1173
+ ok = meck :delete (rabbit_queue_index , deliver , 2 ),
1174
+ {St10 , _ } = cmd (cmd_restart_vhost_clean , St5 , []),
1175
+
1176
+ meck :reset (rabbit_queue_index ),
1177
+
1178
+ % % Consume the message and acknowledge it
1179
+ % % The queue index should not crash when finding a pub+ack but no_del in the journal
1180
+ % % (It used to crash in `action_to_entry/3' with a case_clause)
1181
+ {St6 , _Tag } = cmd (cmd_channel_consume , St10 , [Ch ]),
1182
+ receive SomeMsg -> self () ! SomeMsg
1183
+ after 5000 -> ct :fail (no_message_consumed )
1184
+ end ,
1185
+ {St7 , _Msg = # amqp_msg {}} = cmd (cmd_channel_receive_and_ack , St6 , [Ch ]),
1186
+
1187
+ % % enforce syncing journal to disk
1188
+ rabbit_amqqueue :pid_of (St7 # cq .amq ) ! timeout ,
1189
+
1190
+ {SyncTime2 , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , sync , '_' , 1000 ) end ),
1191
+ ct :pal (" wait for sync took ~p ms" , [SyncTime2 div 1000 ]),
1192
+
1193
+ validate_and_teaddown (St7 ).
1194
+
1195
+ % % After upgrade to 3.10 there is a published message in a segment file.
1196
+ % % Consuming and acknowledging the message inserts an ack entry in the journal file.
1197
+ % % A subsequent restart (of the queue/vhost/node) should work fine.
1198
+ reg_v1_no_del_idx (Config ) ->
1199
+ try
1200
+ true = rabbit_ct_broker_helpers :rpc (
1201
+ Config , 0 , ? MODULE , do_reg_v1_no_del_idx , [Config ])
1202
+ catch exit :{exception , Reason } ->
1203
+ exit (Reason )
1204
+ end .
1205
+
1206
+ do_reg_v1_no_del_idx (Config ) ->
1207
+ St0 = # cq {name = prop_classic_queue_v1 , version = 1 ,
1208
+ config = minimal_config (Config )},
1209
+
1210
+ Res1 = cmd_setup_queue (St0 ),
1211
+ St3 = St0 # cq {amq = Res1 },
1212
+
1213
+ {St4 , Ch } = cmd (cmd_channel_open , St3 , []),
1214
+
1215
+ % % Simulate pre-3.10.0 behaviour by making deliver a noop
1216
+ ok = meck :new (rabbit_queue_index , [passthrough ]),
1217
+ ok = meck :expect (rabbit_queue_index , deliver , fun (_ , State ) -> State end ),
1218
+
1219
+ ok = meck :new (rabbit_variable_queue , [passthrough ]),
1220
+
1221
+ {St5 , _Res5 } = cmd (cmd_channel_publish , St4 , [Ch , 4 , _Persistent = 2 , _NotMandatory = false , _NoExpiration = undefined ]),
1222
+
1223
+ % % Wait for the queue process to get hibernated
1224
+ % % handle_pre_hibernate syncs and flushes the journal
1225
+ % % At this point there should be a publish entry in the segment file and an empty journal
1226
+ {Time , ok } = timer :tc (fun () -> meck :wait (rabbit_variable_queue , handle_pre_hibernate , '_' , 10000 ) end ),
1227
+ ct :pal (" wait for hibernate took ~p ms" , [Time div 1000 ]),
1228
+ ok = meck :unload (rabbit_variable_queue ),
1229
+
1230
+ % % Simulate RabbitMQ version upgrade by a clean vhost restart
1231
+ % % (also reset delivery to normal operation)
1232
+ ok = meck :delete (rabbit_queue_index , deliver , 2 ),
1233
+ {St10 , _ } = cmd (cmd_restart_vhost_clean , St5 , []),
1234
+
1235
+ % % Consume the message and acknowledge it
1236
+ {St6 , _Tag } = cmd (cmd_channel_consume , St10 , [Ch ]),
1237
+ receive SomeMsg -> self () ! SomeMsg
1238
+ after 5000 -> ct :fail (no_message_consumed )
1239
+ end ,
1240
+ {St7 , _Msg = # amqp_msg {}} = cmd (cmd_channel_receive_and_ack , St6 , [Ch ]),
1241
+
1242
+ meck :reset (rabbit_queue_index ),
1243
+
1244
+ % % enforce syncing journal to disk
1245
+ % % At this point there should be a publish entry in the segment file and an ack in the journal
1246
+ rabbit_amqqueue :pid_of (St7 # cq .amq ) ! timeout ,
1247
+ {SyncTime , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , sync , '_' , 1000 ) end ),
1248
+ ct :pal (" wait for sync took ~p ms" , [SyncTime div 1000 ]),
1249
+
1250
+ meck :reset (rabbit_queue_index ),
1251
+
1252
+ % % Another clean vhost restart
1253
+ % % The queue index should not crash when finding a pub in a
1254
+ % % segment, an ack in the journal, but no_del
1255
+ % % (It used to crash in `segment_plus_journal1/2' with a function_clause)
1256
+ catch cmd (cmd_restart_vhost_clean , St7 , []),
1257
+
1258
+ {ReadTime , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , read , '_' , 1000 ) end ),
1259
+ ct :pal (" wait for queue read took ~p ms" , [ReadTime div 1000 ]),
1260
+
1261
+ validate_and_teaddown (St7 ).
1262
+
1263
+ % % After upgrade to 3.10 there is a published message in a segment file.
1264
+ % % Consuming and acknowledging the message inserts an ack entry in the journal file.
1265
+ % % The recovery after a subsequent unclean shutdown (of the queue/vhost/node) should work fine.
1266
+ reg_v1_no_del_idx_unclean (Config ) ->
1267
+ try
1268
+ true = rabbit_ct_broker_helpers :rpc (
1269
+ Config , 0 , ? MODULE , do_reg_v1_no_del_idx_unclean , [Config ])
1270
+ catch exit :{exception , Reason } ->
1271
+ exit (Reason )
1272
+ end .
1273
+
1274
+ do_reg_v1_no_del_idx_unclean (Config ) ->
1275
+ St0 = # cq {name = prop_classic_queue_v1 , version = 1 ,
1276
+ config = minimal_config (Config )},
1277
+
1278
+ Res1 = cmd_setup_queue (St0 ),
1279
+ St3 = St0 # cq {amq = Res1 },
1280
+
1281
+ {St4 , Ch } = cmd (cmd_channel_open , St3 , []),
1282
+
1283
+ % % Simulate pre-3.10.0 behaviour by making deliver a noop
1284
+ ok = meck :new (rabbit_queue_index , [passthrough ]),
1285
+ ok = meck :expect (rabbit_queue_index , deliver , fun (_ , State ) -> State end ),
1286
+
1287
+ ok = meck :new (rabbit_variable_queue , [passthrough ]),
1288
+
1289
+ {St5 , _Res5 } = cmd (cmd_channel_publish , St4 , [Ch , 4 , _Persistent = 2 , _NotMandatory = false , _NoExpiration = undefined ]),
1290
+
1291
+ % % Wait for the queue process to get hibernated
1292
+ % % handle_pre_hibernate syncs and flushes the journal
1293
+ % % At this point there should be a publish entry in the segment file and an empty journal
1294
+ {Time , ok } = timer :tc (fun () -> meck :wait (rabbit_variable_queue , handle_pre_hibernate , '_' , 10000 ) end ),
1295
+ ct :pal (" wait for hibernate took ~p ms" , [Time div 1000 ]),
1296
+ ok = meck :unload (rabbit_variable_queue ),
1297
+
1298
+ % % Simulate RabbitMQ version upgrade by a clean vhost restart
1299
+ % % (also reset delivery to normal operation)
1300
+ ok = meck :delete (rabbit_queue_index , deliver , 2 ),
1301
+ {St10 , _ } = cmd (cmd_restart_vhost_clean , St5 , []),
1302
+
1303
+ % % Consume the message and acknowledge it
1304
+ {St6 , _Tag } = cmd (cmd_channel_consume , St10 , [Ch ]),
1305
+ receive SomeMsg -> self () ! SomeMsg
1306
+ after 5000 -> ct :fail (no_message_consumed )
1307
+ end ,
1308
+ meck :reset (rabbit_queue_index ),
1309
+ {St7 , _Msg = # amqp_msg {}} = cmd (cmd_channel_receive_and_ack , St6 , [Ch ]),
1310
+
1311
+ % % (need to ensure that the queue processed the ack before triggering the sync)
1312
+ {AckTime , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , ack , '_' , 1000 ) end ),
1313
+ ct :pal (" wait for ack took ~p ms" , [AckTime div 1000 ]),
1314
+
1315
+ % % enforce syncing journal to disk
1316
+ % % At this point there should be a publish entry in the segment file and an ack in the journal
1317
+ rabbit_amqqueue :pid_of (St7 # cq .amq ) ! timeout ,
1318
+ {SyncTime , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , sync , '_' , 1000 ) end ),
1319
+ ct :pal (" wait for sync took ~p ms" , [SyncTime div 1000 ]),
1320
+
1321
+ meck :reset (rabbit_queue_index ),
1322
+
1323
+ % % Recovery after unclean queue shutdown
1324
+ % % The queue index should not crash when finding a pub in a
1325
+ % % segment, an ack in the journal, but no_del
1326
+ % % (It used to crash in `journal_minus_segment1/2' with a function_clause)
1327
+ {St20 , _ } = cmd (cmd_restart_queue_dirty , St7 , []),
1328
+
1329
+ {RecoverTime , ok } = timer :tc (fun () -> meck :wait (rabbit_queue_index , recover , '_' , 1000 ) end ),
1330
+ ct :pal (" wait for queue recover took ~p ms" , [RecoverTime div 1000 ]),
1331
+
1332
+ validate_and_teaddown (St20 ).
1333
+
1334
+ cmd (CmdName , StIn , ExtraArgs ) ->
1335
+ Res0 = apply (? MODULE , CmdName , [StIn | ExtraArgs ]),
1336
+ true = postcondition (StIn , {call , undefined , CmdName , [StIn | ExtraArgs ]}, Res0 ),
1337
+ StOut = next_state (StIn , Res0 , {call , undefined , CmdName , [StIn | ExtraArgs ]}),
1338
+ {StOut , Res0 }.
1339
+
1340
+ validate_and_teaddown (St ) ->
1341
+ try
1342
+ case meck :validate (rabbit_queue_index ) of
1343
+ true ->
1344
+ true ;
1345
+ false ->
1346
+ FailedCalls =
1347
+ [Hist || Hist = {_CallerPid , _MFA , _Class , _Reason , _ST }
1348
+ <- meck :history (rabbit_queue_index )],
1349
+ ct :pal (" Failed call(s) to rabbit_queue_index:~n~p " , [FailedCalls ]),
1350
+
1351
+ {_ , _ , _ , _ , [{_M , F , _A , _Loc }|_ ]} = hd (FailedCalls ),
1352
+ ct :fail ({queue_index_crashed , F })
1353
+ end
1354
+ after
1355
+ ok = meck :unload (rabbit_queue_index ),
1356
+ safe_teardown_queue (St )
1357
+ end .
1358
+
1359
+ safe_teardown_queue (St ) ->
1360
+ try cmd_teardown_queue (St )
1361
+ catch _ :_ ->
1362
+ % % It is possible that asking a queue process in cyclic
1363
+ % % crashing to stop fails.
1364
+ VHostDir = rabbit_vhost :msg_store_dir_path (<<" /" >>),
1365
+ [ok = file :delete (QIFile )
1366
+ || QIFile <- filelib :wildcard (filename :join (VHostDir , " queues/*/*" ))],
1367
+ cmd_teardown_queue (St )
1368
+ end .
0 commit comments