Skip to content

Commit 33254d9

Browse files
committed
vinyl: remove vylog newer than snap in casual recovery
As a follow-up to the previous patch, let's check also emptiness of the vylog being removed. During vylog rotation all entries are squashed (e.g. "delete range" annihilates "insert range"), written to the new vylog and at the end of new vylog SNAPSHOT marker is placed. If the last entry in the vylog is SNAPSHOT, we can safely remove it without hesitation. So it is OK to remove it even during casual recovery process. However, if it contains rows after SNAPSHOT marker, removal of vylog may cause data loss. In this case we still can remove it only in force_recovery mode. Follow-up tarantool#5823
1 parent 149ccce commit 33254d9

File tree

4 files changed

+54
-47
lines changed

4 files changed

+54
-47
lines changed

src/box/vy_log.c

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,51 @@ vy_log_bootstrap(void)
10261026
return 0;
10271027
}
10281028

1029+
/**
1030+
* Return true if the last vylog is new and contains no user data
1031+
* (i.e. last entry is VY_LOG_SNAPSHOT).
1032+
* In case of any errors log them and return false.
1033+
*/
1034+
static bool
1035+
vy_log_last_entry_is_snapshot(void)
1036+
{
1037+
const char *path =
1038+
vy_log_filename(vclock_sum(&vy_log.last_checkpoint));
1039+
if (access(path, F_OK) < 0) {
1040+
say_error("Failed to access last vylog");
1041+
return false;
1042+
}
1043+
struct xlog_cursor cursor;
1044+
if (xdir_open_cursor(&vy_log.dir,
1045+
vclock_sum(&vy_log.last_checkpoint),
1046+
&cursor) < 0) {
1047+
diag_log();
1048+
diag_clear(diag_get());
1049+
return false;
1050+
}
1051+
int rc;
1052+
struct xrow_header row;
1053+
while ((rc = xlog_cursor_next(&cursor, &row, false)) == 0) {
1054+
struct vy_log_record record;
1055+
rc = vy_log_record_decode(&record, &row);
1056+
if (rc < 0)
1057+
break;
1058+
if (record.type == VY_LOG_SNAPSHOT) {
1059+
rc = xlog_cursor_next(&cursor, &row, false);
1060+
if (rc <= 0)
1061+
break;
1062+
xlog_cursor_close(&cursor, false);
1063+
return true;
1064+
}
1065+
}
1066+
xlog_cursor_close(&cursor, false);
1067+
if (rc < 0) {
1068+
diag_log();
1069+
diag_clear(diag_get());
1070+
}
1071+
return false;
1072+
}
1073+
10291074
struct vy_recovery *
10301075
vy_log_begin_recovery(const struct vclock *vclock, bool force_recovery)
10311076
{
@@ -1065,7 +1110,12 @@ vy_log_begin_recovery(const struct vclock *vclock, bool force_recovery)
10651110
* So in case we are anyway in force recovery mode, let's
10661111
* try to delete last .vylog file and continue recovery process.
10671112
*/
1068-
if (!force_recovery) {
1113+
bool is_vylog_empty = vy_log_last_entry_is_snapshot();
1114+
if (!is_vylog_empty) {
1115+
say_info("Last vylog is not empty. Its removal "
1116+
"may cause data loss!");
1117+
}
1118+
if (!force_recovery && !is_vylog_empty) {
10691119
diag_set(ClientError, ER_MISSING_SNAPSHOT);
10701120
say_info("To bootstrap instance try to remove last "
10711121
".vylog file or run in force_recovery mode");

test/vinyl/gh-5823-crash_snapshot.lua

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#!/usr/bin/env tarantool
22

33
--
4-
-- mode == 0: casual bootstrap;
5-
-- mode == 1: force recovery bootstrap;
4+
-- mode == 1: casual bootstrap;
65
-- mode == 2: casual bootstrap and fill in data.
76
--
87
local mode = tonumber(arg[1])
98
box.cfg ({
10-
force_recovery = (mode == 1),
119
})
1210

1311
if mode == 2 then

test/vinyl/gh-5823-skip-newer-than-snap-vylog.result

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ test_run = require('test_run').new()
77
-- 1. There's both memtx and vinyl data;
88
-- 2. User starts checkpoint process;
99
-- 3. In the most unsuitable moment instance crashes;
10-
-- 4. Recovering in the casual mode does not help;
11-
-- 5. Recovering in the force recovery mode solves the problem (deletes
12-
-- redundant vylog file).
10+
-- 4. Recovering in the casual mode successes.
1311
--
1412
test_run:cmd("create server test with script='vinyl/gh-5823-crash_snapshot.lua'")
1513
| ---
@@ -19,38 +17,6 @@ test_run:cmd("start server test with args='2' with crash_expected=True")
1917
| ---
2018
| - false
2119
| ...
22-
-- Can't bootstrap instance without force_recovery.
23-
--
24-
test_run:cmd("start server test with args='0' with crash_expected=True")
25-
| ---
26-
| - false
27-
| ...
28-
29-
fio = require('fio')
30-
| ---
31-
| ...
32-
fh = fio.open(fio.pathjoin(fio.cwd(), 'gh-5823-crash_snapshot.log'), {'O_RDONLY'})
33-
| ---
34-
| ...
35-
size = fh:seek(0, 'SEEK_END')
36-
| ---
37-
| ...
38-
fh:seek(-256, 'SEEK_END') ~= nil
39-
| ---
40-
| - true
41-
| ...
42-
line = fh:read(256)
43-
| ---
44-
| ...
45-
fh:close()
46-
| ---
47-
| - true
48-
| ...
49-
string.match(line, "Can\'t find snapshot") ~= nil
50-
| ---
51-
| - true
52-
| ...
53-
5420
test_run:cmd("start server test with args='1'")
5521
| ---
5622
| - true

test/vinyl/gh-5823-skip-newer-than-snap-vylog.test.lua

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,10 @@ test_run = require('test_run').new()
44
-- 1. There's both memtx and vinyl data;
55
-- 2. User starts checkpoint process;
66
-- 3. In the most unsuitable moment instance crashes;
7-
-- 4. Recovering in the casual mode does not help;
8-
-- 5. Recovering in the force recovery mode solves the problem (deletes
9-
-- redundant vylog file).
7+
-- 4. Recovering in the casual mode successes.
108
--
119
test_run:cmd("create server test with script='vinyl/gh-5823-crash_snapshot.lua'")
1210
test_run:cmd("start server test with args='2' with crash_expected=True")
13-
-- Can't bootstrap instance without force_recovery.
14-
--
15-
test_run:cmd("start server test with args='0' with crash_expected=True")
16-
test_run:grep_log('test', "Can\'t find snapshot", nil, {filename='gh-5823-crash_snapshot.log'}) ~= nil
17-
1811
test_run:cmd("start server test with args='1'")
1912
test_run:cmd("switch test")
2013
box.space.test_v:select({5})

0 commit comments

Comments
 (0)